From e559a28cc19f87c2b1933bd2f7c9800ba290d5ab Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 26 Apr 2024 17:12:05 +0900 Subject: [PATCH 001/261] GH-41327: [Ruby] Show type name in Arrow::Table#to_s (#41328) ### Rationale for this change It's useful to detect type difference. ### What changes are included in this PR? Add `:show_column_type` option to `Arrow::Table#to_s` and enables it by default. This is a backward incompatible change but this'll help users. ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. **This PR includes breaking changes to public APIs.** * GitHub Issue: #41327 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- ruby/red-arrow/lib/arrow/field-containable.rb | 2 +- ruby/red-arrow/lib/arrow/table-formatter.rb | 40 +- .../lib/arrow/table-list-formatter.rb | 6 +- .../lib/arrow/table-table-formatter.rb | 7 + ruby/red-arrow/test/test-csv-loader.rb | 72 +-- ruby/red-arrow/test/test-group.rb | 13 + ruby/red-arrow/test/test-schema.rb | 2 +- ruby/red-arrow/test/test-slicer.rb | 345 ++++++----- ruby/red-arrow/test/test-struct-data-type.rb | 2 +- ruby/red-arrow/test/test-table.rb | 535 ++++++++++-------- 10 files changed, 595 insertions(+), 429 deletions(-) diff --git a/ruby/red-arrow/lib/arrow/field-containable.rb b/ruby/red-arrow/lib/arrow/field-containable.rb index e4dbf4ec26cae..103e901f5d2de 100644 --- a/ruby/red-arrow/lib/arrow/field-containable.rb +++ b/ruby/red-arrow/lib/arrow/field-containable.rb @@ -29,7 +29,7 @@ def find_field(name_or_index) return nil if index < 0 or index >= n_fields get_field(index) else - message = "field name or index must be String, Symbol or Integer" + message = +"field name or index must be String, Symbol or Integer" message << ": <#{name_or_index.inspect}>" raise ArgumentError, message end diff --git a/ruby/red-arrow/lib/arrow/table-formatter.rb b/ruby/red-arrow/lib/arrow/table-formatter.rb index d039679f9a03a..b93faf09cbd02 100644 --- a/ruby/red-arrow/lib/arrow/table-formatter.rb +++ b/ruby/red-arrow/lib/arrow/table-formatter.rb @@ -24,7 +24,8 @@ class ColumnFormatter attr_reader :head_values attr_reader :tail_values attr_reader :sample_values - def initialize(column, head_values, tail_values) + def initialize(table_formatter, column, head_values, tail_values) + @table_formatter = table_formatter @column = column @head_values = head_values @tail_values = tail_values @@ -36,6 +37,15 @@ def data_type @data_type ||= @column.data_type end + def formatted_data_type_name + @formatted_data_type_name ||= "(#{data_type.name})" + end + + def aligned_data_type_name + @aligned_data_type_name ||= + "%*s" % [aligned_name.size, formatted_data_type_name] + end + def name @name ||= @column.name end @@ -63,7 +73,7 @@ def format_value(value, width=0) formatted_value = format_value(value[field_name], field_value_width) "#{formatted_name}: #{formatted_value}" end - formatted = "{" + formatted = +"{" formatted << formatted_values.join(", ") formatted << "}" "%-*s" % [width, formatted] @@ -90,9 +100,16 @@ def compute_field_value_width(field, sample_values) end def format_aligned_name(name, data_type, sample_values) + if @table_formatter.show_column_type? + min_width = formatted_data_type_name.size + else + min_width = 0 + end case data_type when TimestampDataType - "%*s" % [::Time.now.iso8601.size, name] + width = ::Time.now.iso8601.size + width = min_width if width < min_width + "%*s" % [width, name] when IntegerDataType have_null = false have_negative = false @@ -118,9 +135,12 @@ def format_aligned_name(name, data_type, sample_values) end width += 1 if have_negative # Need "-" width = [width, FORMATTED_NULL.size].max if have_null + width = min_width if width < min_width "%*s" % [width, name] when FloatDataType, DoubleDataType - "%*s" % [FLOAT_N_DIGITS, name] + width = FLOAT_N_DIGITS + width = min_width if width < min_width + "%*s" % [width, name] when StructDataType field_widths = data_type.fields.collect do |field| field_value_width = compute_field_value_width(field, sample_values) @@ -130,9 +150,11 @@ def format_aligned_name(name, data_type, sample_values) if field_widths.size > 0 width += (", ".size * (field_widths.size - 1)) end + width = min_width if width < min_width "%*s" % [width, name] else - name + width = min_width + "%*s" % [width, name] end end end @@ -143,7 +165,7 @@ def initialize(table, options={}) end def format - text = "" + text = +"" n_rows = @table.n_rows border = @options[:border] || 10 @@ -159,7 +181,7 @@ def format else tail_values = [] end - ColumnFormatter.new(column, head_values, tail_values) + ColumnFormatter.new(self, column, head_values, tail_values) end format_header(text, column_formatters) @@ -186,5 +208,9 @@ def format text end + + def show_column_type? + @options.fetch(:show_column_type, true) + end end end diff --git a/ruby/red-arrow/lib/arrow/table-list-formatter.rb b/ruby/red-arrow/lib/arrow/table-list-formatter.rb index 4fe2934160a69..3e4d410ffbee8 100644 --- a/ruby/red-arrow/lib/arrow/table-list-formatter.rb +++ b/ruby/red-arrow/lib/arrow/table-list-formatter.rb @@ -27,9 +27,9 @@ def format_rows(text, column_formatters, rows, n_digits, start_offset) text << ("=" * 20 + " #{start_offset + nth_row} " + "=" * 20 + "\n") row.each_with_index do |column_value, nth_column| column_formatter = column_formatters[nth_column] - formatted_name = column_formatter.name - formatted_value = column_formatter.format_value(column_value) - text << "#{formatted_name}: #{formatted_value}\n" + text << column_formatter.name + text << "(#{column_formatter.data_type.name})" if show_column_type? + text << ": #{column_formatter.format_value(column_value)}\n" end end end diff --git a/ruby/red-arrow/lib/arrow/table-table-formatter.rb b/ruby/red-arrow/lib/arrow/table-table-formatter.rb index 36121e1b6f0e4..acf4aca8bb6d1 100644 --- a/ruby/red-arrow/lib/arrow/table-table-formatter.rb +++ b/ruby/red-arrow/lib/arrow/table-table-formatter.rb @@ -26,6 +26,13 @@ def format_header(text, column_formatters) text << "\t" text << column_formatter.aligned_name end + if show_column_type? + text << "\n" + column_formatters.each do |column_formatter| + text << "\t" + text << column_formatter.aligned_data_type_name + end + end text << "\n" end diff --git a/ruby/red-arrow/test/test-csv-loader.rb b/ruby/red-arrow/test/test-csv-loader.rb index 0b21f6f9b71f9..1e0445db06ef9 100644 --- a/ruby/red-arrow/test/test-csv-loader.rb +++ b/ruby/red-arrow/test/test-csv-loader.rb @@ -27,80 +27,88 @@ def load_csv(input) test("String: data: with header") do data = fixture_path("with-header-float.csv").read assert_equal(<<-TABLE, load_csv(data).to_s) - name score -0 alice 10.100000 -1 bob 29.200000 -2 chris -1.300000 + name score + (utf8) (double) +0 alice 10.100000 +1 bob 29.200000 +2 chris -1.300000 TABLE end test("String: data: without header") do data = fixture_path("without-header-float.csv").read assert_equal(<<-TABLE, load_csv(data).to_s) - 0 1 -0 alice 10.100000 -1 bob 29.200000 -2 chris -1.300000 + 0 1 + (utf8) (double) +0 alice 10.100000 +1 bob 29.200000 +2 chris -1.300000 TABLE end test("String: path: with header") do path = fixture_path("with-header-float.csv").to_s assert_equal(<<-TABLE, load_csv(path).to_s) - name score -0 alice 10.100000 -1 bob 29.200000 -2 chris -1.300000 + name score + (utf8) (double) +0 alice 10.100000 +1 bob 29.200000 +2 chris -1.300000 TABLE end test("String: path: without header") do path = fixture_path("without-header-float.csv").to_s assert_equal(<<-TABLE, load_csv(path).to_s) - 0 1 -0 alice 10.100000 -1 bob 29.200000 -2 chris -1.300000 + 0 1 + (utf8) (double) +0 alice 10.100000 +1 bob 29.200000 +2 chris -1.300000 TABLE end test("Pathname: with header") do path = fixture_path("with-header-float.csv") assert_equal(<<-TABLE, load_csv(path).to_s) - name score -0 alice 10.100000 -1 bob 29.200000 -2 chris -1.300000 + name score + (utf8) (double) +0 alice 10.100000 +1 bob 29.200000 +2 chris -1.300000 TABLE end test("Pathname: without header") do path = fixture_path("without-header-float.csv") assert_equal(<<-TABLE, load_csv(path).to_s) - 0 1 -0 alice 10.100000 -1 bob 29.200000 -2 chris -1.300000 + 0 1 + (utf8) (double) +0 alice 10.100000 +1 bob 29.200000 +2 chris -1.300000 TABLE end test("null: with double quote") do path = fixture_path("null-with-double-quote.csv").to_s assert_equal(<<-TABLE, load_csv(path).to_s) - name score -0 alice 10 -1 bob (null) -2 chris -1 + name score + (utf8) (int8) +0 alice 10 +1 bob (null) +2 chris -1 TABLE end test("null: without double quote") do path = fixture_path("null-without-double-quote.csv").to_s assert_equal(<<-TABLE, load_csv(path).to_s) - name score -0 alice 10 -1 bob (null) -2 chris -1 + name score + (utf8) (int8) +0 alice 10 +1 bob (null) +2 chris -1 TABLE end diff --git a/ruby/red-arrow/test/test-group.rb b/ruby/red-arrow/test/test-group.rb index 68e927df69bc8..f4831289eda48 100644 --- a/ruby/red-arrow/test/test-group.rb +++ b/ruby/red-arrow/test/test-group.rb @@ -43,6 +43,7 @@ def setup table = Arrow::Table.new(raw_table) assert_equal(<<-TABLE, table.group(:time).count.to_s) time count(int) + (timestamp) (int64) 0 #{time_values[0].iso8601} 1 1 #{time_values[1].iso8601} 1 TABLE @@ -53,6 +54,7 @@ def setup test("single") do assert_equal(<<-TABLE, @table.group(:group_key1).count.to_s) group_key1 count(group_key2) count(int) count(uint) count(float) count(string) + (uint8) (int64) (int64) (int64) (int64) (int64) 0 1 2 2 1 1 2 1 2 1 0 1 1 1 2 3 3 3 3 3 2 @@ -62,6 +64,7 @@ def setup test("multiple") do assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).count.to_s) group_key1 group_key2 count(int) count(uint) count(float) count(string) + (uint8) (uint8) (int64) (int64) (int64) (int64) 0 1 1 2 1 1 2 1 2 1 0 1 1 1 2 3 1 1 1 1 0 @@ -73,6 +76,7 @@ def setup group = @table.group(:group_key1, :group_key2) assert_equal(<<-TABLE, group.count(:int, :uint).to_s) group_key1 group_key2 count(int) count(uint) + (uint8) (uint8) (int64) (int64) 0 1 1 2 1 1 2 1 0 1 2 3 1 1 1 @@ -85,6 +89,7 @@ def setup test("single") do assert_equal(<<-TABLE, @table.group(:group_key1).sum.to_s) group_key1 sum(group_key2) sum(int) sum(uint) sum(float) + (uint8) (uint64) (int64) (uint64) (double) 0 1 2 -3 1 2.200000 1 2 1 (null) 3 3.300000 2 3 5 -15 15 16.500000 @@ -94,6 +99,7 @@ def setup test("multiple") do assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).sum.to_s) group_key1 group_key2 sum(int) sum(uint) sum(float) + (uint8) (uint8) (int64) (uint64) (double) 0 1 1 -3 1 2.200000 1 2 1 (null) 3 3.300000 2 3 1 -4 4 4.400000 @@ -106,6 +112,7 @@ def setup test("single") do assert_equal(<<-TABLE, @table.group(:group_key1).mean.to_s) group_key1 mean(group_key2) mean(int) mean(uint) mean(float) + (uint8) (double) (double) (double) (double) 0 1 1.000000 -1.500000 1.000000 2.200000 1 2 1.000000 (null) 3.000000 3.300000 2 3 1.666667 -5.000000 5.000000 5.500000 @@ -115,6 +122,7 @@ def setup test("multiple") do assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).mean.to_s) group_key1 group_key2 mean(int) mean(uint) mean(float) + (uint8) (uint8) (double) (double) (double) 0 1 1 -1.500000 1.000000 2.200000 1 2 1 (null) 3.000000 3.300000 2 3 1 -4.000000 4.000000 4.400000 @@ -127,6 +135,7 @@ def setup test("single") do assert_equal(<<-TABLE, @table.group(:group_key1).min.to_s) group_key1 min(group_key2) min(int) min(uint) min(float) + (uint8) (uint8) (int32) (uint32) (float) 0 1 1 -2 1 2.200000 1 2 1 (null) 3 3.300000 2 3 1 -6 4 4.400000 @@ -136,6 +145,7 @@ def setup test("multiple") do assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).min.to_s) group_key1 group_key2 min(int) min(uint) min(float) + (uint8) (uint8) (int32) (uint32) (float) 0 1 1 -2 1 2.200000 1 2 1 (null) 3 3.300000 2 3 1 -4 4 4.400000 @@ -148,6 +158,7 @@ def setup test("single") do assert_equal(<<-TABLE, @table.group(:group_key1).max.to_s) group_key1 max(group_key2) max(int) max(uint) max(float) + (uint8) (uint8) (int32) (uint32) (float) 0 1 1 -1 1 2.200000 1 2 1 (null) 3 3.300000 2 3 2 -4 6 6.600000 @@ -157,6 +168,7 @@ def setup test("multiple") do assert_equal(<<-TABLE, @table.group(:group_key1, :group_key2).max.to_s) group_key1 group_key2 max(int) max(uint) max(float) + (uint8) (uint8) (int32) (uint32) (float) 0 1 1 -1 1 2.200000 1 2 1 (null) 3 3.300000 2 3 1 -4 4 4.400000 @@ -170,6 +182,7 @@ def setup group = @table.group(:group_key1, :group_key2) assert_equal(<<-TABLE, group.aggregate("count(int)", "sum(uint)").to_s) group_key1 group_key2 count(int) sum(uint) + (uint8) (uint8) (int64) (uint64) 0 1 1 2 1 1 2 1 0 3 2 3 1 1 4 diff --git a/ruby/red-arrow/test/test-schema.rb b/ruby/red-arrow/test/test-schema.rb index 20d73b2726d6b..c4164d83903f2 100644 --- a/ruby/red-arrow/test/test-schema.rb +++ b/ruby/red-arrow/test/test-schema.rb @@ -95,7 +95,7 @@ def setup test("[invalid]") do invalid = [] - message = "field name or index must be String, Symbol or Integer" + message = +"field name or index must be String, Symbol or Integer" message << ": <#{invalid.inspect}>" assert_raise(ArgumentError.new(message)) do @schema[invalid] diff --git a/ruby/red-arrow/test/test-slicer.rb b/ruby/red-arrow/test/test-slicer.rb index d33748a387c8f..89cf34b0d13f7 100644 --- a/ruby/red-arrow/test/test-slicer.rb +++ b/ruby/red-arrow/test/test-slicer.rb @@ -45,11 +45,12 @@ def setup slicer.visible end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 1 true -1 8 true -2 16 true -3 256 true + count visible + (uint32) (bool) +0 1 true +1 8 true +2 16 true +3 256 true TABLE end @@ -58,15 +59,16 @@ def setup slicer.count end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 1 true -1 2 false -2 4 (null) -3 8 true -4 16 true -5 32 false -6 64 (null) -7 256 true + count visible + (uint32) (bool) +0 1 true +1 2 false +2 4 (null) +3 8 true +4 16 true +5 32 false +6 64 (null) +7 256 true TABLE end end @@ -77,9 +79,10 @@ def setup !slicer.visible end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 2 false -1 32 false + count visible + (uint32) (bool) +0 2 false +1 32 false TABLE end @@ -88,8 +91,9 @@ def setup !slicer.count end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 0 (null) + count visible + (uint32) (bool) +0 0 (null) TABLE end end @@ -99,11 +103,12 @@ def setup slicer.visible.null? end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 0 (null) -1 4 (null) -2 64 (null) -3 (null) (null) + count visible + (uint32) (bool) +0 0 (null) +1 4 (null) +2 64 (null) +3 (null) (null) TABLE end @@ -112,13 +117,14 @@ def setup slicer.visible.valid? end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 1 true -1 2 false -2 8 true -3 16 true -4 32 false -5 256 true + count visible + (uint32) (bool) +0 1 true +1 2 false +2 8 true +3 16 true +4 32 false +5 256 true TABLE end @@ -128,11 +134,12 @@ def setup slicer.visible == nil end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 0 (null) -1 4 (null) -2 64 (null) -3 (null) (null) + count visible + (uint32) (bool) +0 0 (null) +1 4 (null) +2 64 (null) +3 (null) (null) TABLE end @@ -141,11 +148,12 @@ def setup slicer.visible == true end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 1 true -1 8 true -2 16 true -3 256 true + count visible + (uint32) (bool) +0 1 true +1 8 true +2 16 true +3 256 true TABLE end end @@ -156,13 +164,14 @@ def setup !(slicer.visible == nil) end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 1 true -1 2 false -2 8 true -3 16 true -4 32 false -5 256 true + count visible + (uint32) (bool) +0 1 true +1 2 false +2 8 true +3 16 true +4 32 false +5 256 true TABLE end @@ -171,9 +180,10 @@ def setup !(slicer.visible == true) end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 2 false -1 32 false + count visible + (uint32) (bool) +0 2 false +1 32 false TABLE end end @@ -184,13 +194,14 @@ def setup slicer.visible != nil end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 1 true -1 2 false -2 8 true -3 16 true -4 32 false -5 256 true + count visible + (uint32) (bool) +0 1 true +1 2 false +2 8 true +3 16 true +4 32 false +5 256 true TABLE end @@ -199,9 +210,10 @@ def setup slicer.visible != true end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 2 false -1 32 false + count visible + (uint32) (bool) +0 2 false +1 32 false TABLE end end @@ -211,12 +223,13 @@ def setup slicer.count < 16 end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 0 (null) -1 1 true -2 2 false -3 4 (null) -4 8 true + count visible + (uint32) (bool) +0 0 (null) +1 1 true +2 2 false +3 4 (null) +4 8 true TABLE end @@ -225,11 +238,12 @@ def setup !(slicer.count < 16) end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 16 true -1 32 false -2 64 (null) -3 256 true + count visible + (uint32) (bool) +0 16 true +1 32 false +2 64 (null) +3 256 true TABLE end @@ -238,13 +252,14 @@ def setup slicer.count <= 16 end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 0 (null) -1 1 true -2 2 false -3 4 (null) -4 8 true -5 16 true + count visible + (uint32) (bool) +0 0 (null) +1 1 true +2 2 false +3 4 (null) +4 8 true +5 16 true TABLE end @@ -253,10 +268,11 @@ def setup !(slicer.count <= 16) end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 32 false -1 64 (null) -2 256 true + count visible + (uint32) (bool) +0 32 false +1 64 (null) +2 256 true TABLE end @@ -265,10 +281,11 @@ def setup slicer.count > 16 end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 32 false -1 64 (null) -2 256 true + count visible + (uint32) (bool) +0 32 false +1 64 (null) +2 256 true TABLE end @@ -277,13 +294,14 @@ def setup !(slicer.count > 16) end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 0 (null) -1 1 true -2 2 false -3 4 (null) -4 8 true -5 16 true + count visible + (uint32) (bool) +0 0 (null) +1 1 true +2 2 false +3 4 (null) +4 8 true +5 16 true TABLE end @@ -292,11 +310,12 @@ def setup slicer.count >= 16 end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 16 true -1 32 false -2 64 (null) -3 256 true + count visible + (uint32) (bool) +0 16 true +1 32 false +2 64 (null) +3 256 true TABLE end @@ -305,12 +324,13 @@ def setup !(slicer.count >= 16) end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 0 (null) -1 1 true -2 2 false -3 4 (null) -4 8 true + count visible + (uint32) (bool) +0 0 (null) +1 1 true +2 2 false +3 4 (null) +4 8 true TABLE end @@ -319,11 +339,12 @@ def setup slicer.count.in?([1, 4, 16, 64]) end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 1 true -1 4 (null) -2 16 true -3 64 (null) + count visible + (uint32) (bool) +0 1 true +1 4 (null) +2 16 true +3 64 (null) TABLE end @@ -332,13 +353,14 @@ def setup !slicer.count.in?([1, 4, 16, 64]) end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 0 (null) -1 2 false -2 8 true -3 32 false -4 (null) (null) -5 256 true + count visible + (uint32) (bool) +0 0 (null) +1 2 false +2 8 true +3 32 false +4 (null) (null) +5 256 true TABLE end @@ -347,9 +369,10 @@ def setup slicer.visible & (slicer.count >= 16) end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 16 true -1 256 true + count visible + (uint32) (bool) +0 16 true +1 256 true TABLE end @@ -358,12 +381,13 @@ def setup slicer.visible | (slicer.count >= 16) end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 1 true -1 8 true -2 16 true -3 32 false -4 256 true + count visible + (uint32) (bool) +0 1 true +1 8 true +2 16 true +3 32 false +4 256 true TABLE end @@ -372,10 +396,11 @@ def setup slicer.visible ^ (slicer.count >= 16) end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 1 true -1 8 true -2 32 false + count visible + (uint32) (bool) +0 1 true +1 8 true +2 32 false TABLE end @@ -386,15 +411,16 @@ def setup end end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 0 (null) -1 1 true -2 4 (null) -3 8 true -4 16 true -5 64 (null) -6 (null) (null) -7 256 true + count visible + (uint32) (bool) +0 0 (null) +1 1 true +2 4 (null) +3 8 true +4 16 true +5 64 (null) +6 (null) (null) +7 256 true TABLE end @@ -405,9 +431,10 @@ def setup end end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 2 false -1 32 false + count visible + (uint32) (bool) +0 2 false +1 32 false TABLE end @@ -418,9 +445,10 @@ def setup end end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 2 false -1 32 false + count visible + (uint32) (bool) +0 2 false +1 32 false TABLE end @@ -431,15 +459,16 @@ def setup end end assert_equal(<<-TABLE, sliced_table.to_s) - count visible -0 0 (null) -1 1 true -2 4 (null) -3 8 true -4 16 true -5 64 (null) -6 (null) (null) -7 256 true + count visible + (uint32) (bool) +0 0 (null) +1 1 true +2 4 (null) +3 8 true +4 16 true +5 64 (null) +6 (null) (null) +7 256 true TABLE end @@ -456,6 +485,7 @@ def setup end assert_equal(<<~TABLE, sliced_table.to_s) string + (utf8) 0 Arrow 1 window TABLE @@ -467,6 +497,7 @@ def setup end assert_equal(<<~TABLE, sliced_table.to_s) string + (utf8) 0 array 1 Arrow TABLE @@ -478,6 +509,7 @@ def setup end assert_equal(<<~TABLE, sliced_table.to_s) string + (utf8) 0 array 1 carrot TABLE @@ -489,6 +521,7 @@ def setup end assert_equal(<<~TABLE, sliced_table.to_s) string + (utf8) 0 array 1 Arrow 2 carrot @@ -501,6 +534,7 @@ def setup end assert_equal(<<~TABLE, sliced_table.to_s) string + (utf8) 0 Arrow 1 window TABLE @@ -512,6 +546,7 @@ def setup end assert_equal(<<~TABLE, sliced_table.to_s) string + (utf8) 0 Arrow 1 window TABLE @@ -523,6 +558,7 @@ def setup end assert_equal(<<~TABLE, sliced_table.to_s) string + (utf8) 0 array 1 Arrow 2 carrot @@ -545,6 +581,7 @@ def setup end assert_equal(<<~TABLE, sliced_table.to_s) string + (utf8) 0 carrot TABLE end diff --git a/ruby/red-arrow/test/test-struct-data-type.rb b/ruby/red-arrow/test/test-struct-data-type.rb index d106e38b1d841..9bf9a17dd645a 100644 --- a/ruby/red-arrow/test/test-struct-data-type.rb +++ b/ruby/red-arrow/test/test-struct-data-type.rb @@ -101,7 +101,7 @@ def setup test("[invalid]") do invalid = [] - message = "field name or index must be String, Symbol or Integer" + message = +"field name or index must be String, Symbol or Integer" message << ": <#{invalid.inspect}>" assert_raise(ArgumentError.new(message)) do @data_type[invalid] diff --git a/ruby/red-arrow/test/test-table.rb b/ruby/red-arrow/test/test-table.rb index 883cf70c269bb..a69e926156809 100644 --- a/ruby/red-arrow/test/test-table.rb +++ b/ruby/red-arrow/test/test-table.rb @@ -87,24 +87,26 @@ def array_like.to_ary target_rows_raw = [nil, true, true, false, true, false, true, true] target_rows = Arrow::BooleanArray.new(target_rows_raw) assert_equal(<<-TABLE, @table.slice(target_rows).to_s) - count visible -0 2 false -1 4 (null) -2 16 true -3 64 (null) -4 128 (null) + count visible + (uint8) (bool) +0 2 false +1 4 (null) +2 16 true +3 64 (null) +4 128 (null) TABLE end test("Array: boolean") do target_rows_raw = [nil, true, true, false, true, false, true, true] assert_equal(<<-TABLE, @table.slice(target_rows_raw).to_s) - count visible -0 2 false -1 4 (null) -2 16 true -3 64 (null) -4 128 (null) + count visible + (uint8) (bool) +0 2 false +1 4 (null) +2 16 true +3 64 (null) +4 128 (null) TABLE end @@ -131,83 +133,93 @@ def array_like.to_ary test("Range: positive: include end") do assert_equal(<<-TABLE, @table.slice(2..4).to_s) - count visible -0 4 (null) -1 8 true -2 16 true + count visible + (uint8) (bool) +0 4 (null) +1 8 true +2 16 true TABLE end test("Range: positive: exclude end") do assert_equal(<<-TABLE, @table.slice(2...4).to_s) - count visible -0 4 (null) -1 8 true + count visible + (uint8) (bool) +0 4 (null) +1 8 true TABLE end test("Range: negative: include end") do assert_equal(<<-TABLE, @table.slice(-4..-2).to_s) - count visible -0 16 true -1 32 false -2 64 (null) + count visible + (uint8) (bool) +0 16 true +1 32 false +2 64 (null) TABLE end test("Range: negative: exclude end") do assert_equal(<<-TABLE, @table.slice(-4...-2).to_s) - count visible -0 16 true -1 32 false + count visible + (uint8) (bool) +0 16 true +1 32 false TABLE end test("[from, to]: positive") do assert_equal(<<-TABLE, @table.slice(0, 2).to_s) - count visible -0 1 true -1 2 false + count visible + (uint8) (bool) +0 1 true +1 2 false TABLE end test("[from, to]: negative") do assert_equal(<<-TABLE, @table.slice(-4, 2).to_s) - count visible -0 16 true -1 32 false + count visible + (uint8) (bool) +0 16 true +1 32 false TABLE end test("{key: Number}") do assert_equal(<<-TABLE, @table.slice(count: 16).to_s) - count visible -0 16 true + count visible + (uint8) (bool) +0 16 true TABLE end test("{key: String}") do table = Arrow::Table.new(name: Arrow::StringArray.new(["a", "b", "c"])) assert_equal(<<-TABLE, table.slice(name: 'b').to_s) - name -0 b + name + (utf8) +0 b TABLE end test("{key: true}") do assert_equal(<<-TABLE, @table.slice(visible: true).to_s) - count visible -0 1 true -1 8 true -2 16 true + count visible + (uint8) (bool) +0 1 true +1 8 true +2 16 true TABLE end test("{key: false}") do assert_equal(<<-TABLE, @table.slice(visible: false).to_s) - count visible -0 2 false -1 32 false + count visible + (uint8) (bool) +0 2 false +1 32 false TABLE end @@ -218,11 +230,12 @@ def array_like.to_ary omit("beginless range isn't supported") end assert_equal(<<-TABLE, @table.slice(count: range).to_s) - count visible -0 1 true -1 2 false -2 4 (null) -3 8 true + count visible + (uint8) (bool) +0 1 true +1 2 false +2 4 (null) +3 8 true TABLE end @@ -233,10 +246,11 @@ def array_like.to_ary omit("beginless range isn't supported") end assert_equal(<<-TABLE, @table.slice(count: range).to_s) - count visible -0 1 true -1 2 false -2 4 (null) + count visible + (uint8) (bool) +0 1 true +1 2 false +2 4 (null) TABLE end @@ -247,39 +261,43 @@ def array_like.to_ary omit("endless range isn't supported") end assert_equal(<<-TABLE, @table.slice(count: range).to_s) - count visible -0 16 true -1 32 false -2 64 (null) -3 128 (null) + count visible + (uint8) (bool) +0 16 true +1 32 false +2 64 (null) +3 128 (null) TABLE end test("{key: Range}: include end") do assert_equal(<<-TABLE, @table.slice(count: 1..16).to_s) - count visible -0 1 true -1 2 false -2 4 (null) -3 8 true -4 16 true + count visible + (uint8) (bool) +0 1 true +1 2 false +2 4 (null) +3 8 true +4 16 true TABLE end test("{key: Range}: exclude end") do assert_equal(<<-TABLE, @table.slice(count: 1...16).to_s) - count visible -0 1 true -1 2 false -2 4 (null) -3 8 true + count visible + (uint8) (bool) +0 1 true +1 2 false +2 4 (null) +3 8 true TABLE end test("{key1: Range, key2: true}") do assert_equal(<<-TABLE, @table.slice(count: 0..8, visible: false).to_s) - count visible -0 2 false + count visible + (uint8) (bool) +0 2 false TABLE end @@ -372,44 +390,47 @@ def setup test("add") do name_array = Arrow::StringArray.new(["a", "b", "c", "d", "e", "f", "g", "h"]) assert_equal(<<-TABLE, @table.merge(:name => name_array).to_s) - count visible name -0 1 true a -1 2 false b -2 4 (null) c -3 8 true d -4 16 true e -5 32 false f -6 64 (null) g -7 128 (null) h + count visible name + (uint8) (bool) (utf8) +0 1 true a +1 2 false b +2 4 (null) c +3 8 true d +4 16 true e +5 32 false f +6 64 (null) g +7 128 (null) h TABLE end test("remove") do assert_equal(<<-TABLE, @table.merge(:visible => nil).to_s) - count -0 1 -1 2 -2 4 -3 8 -4 16 -5 32 -6 64 -7 128 + count + (uint8) +0 1 +1 2 +2 4 +3 8 +4 16 +5 32 +6 64 +7 128 TABLE end test("replace") do visible_array = Arrow::Int32Array.new([1] * @visible_array.length) assert_equal(<<-TABLE, @table.merge(:visible => visible_array).to_s) - count visible -0 1 1 -1 2 1 -2 4 1 -3 8 1 -4 16 1 -5 32 1 -6 64 1 -7 128 1 + count visible + (uint8) (int32) +0 1 1 +1 2 1 +2 4 1 +3 8 1 +4 16 1 +5 32 1 +6 64 1 +7 128 1 TABLE end end @@ -419,15 +440,16 @@ def setup name_array = Arrow::StringArray.new(["a", "b", "c", "d", "e", "f", "g", "h"]) table = Arrow::Table.new("name" => name_array) assert_equal(<<-TABLE, @table.merge(table).to_s) - count visible name -0 1 true a -1 2 false b -2 4 (null) c -3 8 true d -4 16 true e -5 32 false f -6 64 (null) g -7 128 (null) h + count visible name + (uint8) (bool) (utf8) +0 1 true a +1 2 false b +2 4 (null) c +3 8 true d +4 16 true e +5 32 false f +6 64 (null) g +7 128 (null) h TABLE end @@ -435,15 +457,16 @@ def setup visible_array = Arrow::Int32Array.new([1] * @visible_array.length) table = Arrow::Table.new("visible" => visible_array) assert_equal(<<-TABLE, @table.merge(table).to_s) - count visible -0 1 1 -1 2 1 -2 4 1 -3 8 1 -4 16 1 -5 32 1 -6 64 1 -7 128 1 + count visible + (uint8) (int32) +0 1 1 +1 2 1 +2 4 1 +3 8 1 +4 16 1 +5 32 1 +6 64 1 +7 128 1 TABLE end end @@ -457,29 +480,31 @@ def setup sub_test_case("#remove_column") do test("String") do assert_equal(<<-TABLE, @table.remove_column("visible").to_s) - count -0 1 -1 2 -2 4 -3 8 -4 16 -5 32 -6 64 -7 128 + count + (uint8) +0 1 +1 2 +2 4 +3 8 +4 16 +5 32 +6 64 +7 128 TABLE end test("Symbol") do assert_equal(<<-TABLE, @table.remove_column(:visible).to_s) - count -0 1 -1 2 -2 4 -3 8 -4 16 -5 32 -6 64 -7 128 + count + (uint8) +0 1 +1 2 +2 4 +3 8 +4 16 +5 32 +6 64 +7 128 TABLE end @@ -491,29 +516,31 @@ def setup test("Integer") do assert_equal(<<-TABLE, @table.remove_column(1).to_s) - count -0 1 -1 2 -2 4 -3 8 -4 16 -5 32 -6 64 -7 128 + count + (uint8) +0 1 +1 2 +2 4 +3 8 +4 16 +5 32 +6 64 +7 128 TABLE end test("negative integer") do assert_equal(<<-TABLE, @table.remove_column(-1).to_s) - count -0 1 -1 2 -2 4 -3 8 -4 16 -5 32 -6 64 -7 128 + count + (uint8) +0 1 +1 2 +2 4 +3 8 +4 16 +5 32 +6 64 +7 128 TABLE end @@ -544,29 +571,33 @@ def setup test("names") do assert_equal(<<-TABLE, @table.select_columns(:c, :a).to_s) - c a -0 1 1 + c a + (uint8) (uint8) +0 1 1 TABLE end test("range") do assert_equal(<<-TABLE, @table.select_columns(2...4).to_s) - c d -0 1 1 + c d + (uint8) (uint8) +0 1 1 TABLE end test("indexes") do assert_equal(<<-TABLE, @table.select_columns(0, -1, 2).to_s) - a e c -0 1 1 1 + a e c + (uint8) (uint8) (uint8) +0 1 1 1 TABLE end test("mixed") do assert_equal(<<-TABLE, @table.select_columns(:a, -1, 2..3).to_s) - a e c d -0 1 1 1 1 + a e c d + (uint8) (uint8) (uint8) (uint8) +0 1 1 1 1 TABLE end @@ -575,8 +606,9 @@ def setup column.name == "a" or i.odd? end assert_equal(<<-TABLE, selected_table.to_s) - a b d -0 1 1 1 + a b d + (uint8) (uint8) (uint8) +0 1 1 1 TABLE end @@ -585,15 +617,17 @@ def setup column.name == "a" end assert_equal(<<-TABLE, selected_table.to_s) - a -0 1 + a + (uint8) +0 1 TABLE end test("empty result") do selected_table = @table.filter([false] * @table.size).select_columns(:a) assert_equal(<<-TABLE, selected_table.to_s) - a + a + (uint8) TABLE end end @@ -682,7 +716,7 @@ def test_json output = create_output(".json") # TODO: Implement this. # @table.save(output, format: :json) - columns = "" + columns = +"" @table.each_record.each do |record| column = { "count" => record.count, @@ -789,10 +823,11 @@ def create_output(extension) path = fixture_path("with-header.csv") table = Arrow::Table.load(path, skip_lines: /^\#/) assert_equal(<<-TABLE, table.to_s) - name score -0 alice 10 -1 bob 29 -2 chris -1 + name score + (utf8) (int8) +0 alice 10 +1 bob 29 +2 chris -1 TABLE end @@ -808,10 +843,11 @@ def create_output(extension) CSV end assert_equal(<<-TABLE, Arrow::Table.load(file.path).to_s) - name score -0 alice 10 -1 bob 29 -2 chris -1 + name score + (utf8) (int64) +0 alice 10 +1 bob 29 +2 chris -1 TABLE end @@ -826,10 +862,11 @@ def create_output(extension) file.close table = Arrow::Table.load(file.path) assert_equal(<<-TABLE, table.to_s) - name score -0 alice 10 -1 bob 29 -2 chris -1 + name score + (utf8) (int64) +0 alice 10 +1 bob 29 +2 chris -1 TABLE end end @@ -881,7 +918,7 @@ def test_http(data) output.data.to_s, content_type) do |port| input = URI("http://127.0.0.1:#{port}#{path}") - loaded_table = Arrow::Table.load(input) + loaded_table = Arrow::Table.load(input, schema: @table.schema) assert_equal(@table.to_s, loaded_table.to_s) end end @@ -962,15 +999,16 @@ def test_join packed_table = @table.pack column_n_chunks = packed_table.columns.collect {|c| c.data.n_chunks} assert_equal([[1, 1], <<-TABLE], [column_n_chunks, packed_table.to_s]) - count visible -0 1 true -1 2 false -2 4 (null) -3 8 true -4 16 true -5 32 false -6 64 (null) -7 128 (null) + count visible + (uint8) (bool) +0 1 true +1 2 false +2 4 (null) +3 8 true +4 16 true +5 32 false +6 64 (null) +7 128 (null) TABLE end @@ -1009,19 +1047,20 @@ def setup test(":list") do assert_equal(<<-TABLE, @table.to_s(format: :list)) ==================== 0 ==================== -count: 1 -visible: true +count(uint8): 1 +visible(bool): true ==================== 1 ==================== -count: 2 -visible: false +count(uint8): 2 +visible(bool): false TABLE end test(":table") do assert_equal(<<-TABLE, @table.to_s(format: :table)) - count visible -0 1 true -1 2 false + count visible + (uint8) (bool) +0 1 true +1 2 false TABLE end @@ -1033,6 +1072,35 @@ def setup end end + sub_test_case(":show_column_type") do + def setup + columns = { + "count" => Arrow::UInt8Array.new([1, 2]), + "visible" => Arrow::BooleanArray.new([true, false]), + } + @table = Arrow::Table.new(columns) + end + + test(":list") do + assert_equal(<<-TABLE, @table.to_s(format: :list, show_column_type: false)) +==================== 0 ==================== +count: 1 +visible: true +==================== 1 ==================== +count: 2 +visible: false + TABLE + end + + test(":table") do + assert_equal(<<-TABLE, @table.to_s(format: :table, show_column_type: false)) + count visible +0 1 true +1 2 false + TABLE + end + end + sub_test_case("#==") do test("Arrow::Table") do assert do @@ -1058,13 +1126,14 @@ def setup test("Array: boolean") do filter = [nil, true, true, false, true, false, true, true] assert_equal(<<-TABLE, @table.filter(filter, @options).to_s) - count visible -0 (null) (null) -1 2 false -2 4 (null) -3 16 true -4 64 (null) -5 128 (null) + count visible + (uint8) (bool) +0 (null) (null) +1 2 false +2 4 (null) +3 16 true +4 64 (null) +5 128 (null) TABLE end @@ -1072,13 +1141,14 @@ def setup array = [nil, true, true, false, true, false, true, true] filter = Arrow::BooleanArray.new(array) assert_equal(<<-TABLE, @table.filter(filter, @options).to_s) - count visible -0 (null) (null) -1 2 false -2 4 (null) -3 16 true -4 64 (null) -5 128 (null) + count visible + (uint8) (bool) +0 (null) (null) +1 2 false +2 4 (null) +3 16 true +4 64 (null) +5 128 (null) TABLE end @@ -1090,13 +1160,14 @@ def setup ] filter = Arrow::ChunkedArray.new(filter_chunks) assert_equal(<<-TABLE, @table.filter(filter, @options).to_s) - count visible -0 (null) (null) -1 2 false -2 4 (null) -3 16 true -4 64 (null) -5 128 (null) + count visible + (uint8) (bool) +0 (null) (null) +1 2 false +2 4 (null) +3 16 true +4 64 (null) +5 128 (null) TABLE end end @@ -1105,20 +1176,22 @@ def setup test("Arrow: boolean") do indices = [1, 0, 2] assert_equal(<<-TABLE, @table.take(indices).to_s) - count visible -0 2 false -1 1 true -2 4 (null) + count visible + (uint8) (bool) +0 2 false +1 1 true +2 4 (null) TABLE end test("Arrow::Array") do indices = Arrow::Int16Array.new([1, 0, 2]) assert_equal(<<-TABLE, @table.take(indices).to_s) - count visible -0 2 false -1 1 true -2 4 (null) + count visible + (uint8) (bool) +0 2 false +1 1 true +2 4 (null) TABLE end @@ -1129,10 +1202,11 @@ def setup ] indices = Arrow::ChunkedArray.new(chunks) assert_equal(<<-TABLE, @table.take(indices).to_s) - count visible -0 2 false -1 1 true -2 4 (null) + count visible + (uint8) (bool) +0 2 false +1 1 true +2 4 (null) TABLE end end @@ -1144,9 +1218,10 @@ def setup table2 = Arrow::Table.new(b: [false]) concatenated = table1.concatenate([table2], unify_schemas: true) assert_equal(<<-TABLE, concatenated.to_s) - a b -0 true false -1 (null) false + a b + (bool) (bool) +0 true false +1 (null) false TABLE end end From 64be7a2d073759b5eea90e3a6167ca2c1d56ce79 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 26 Apr 2024 17:15:13 +0900 Subject: [PATCH 002/261] GH-41333: [C++][CMake] Prefer protobuf-config.cmake to FindProtobuf.cmake (#41360) ### Rationale for this change `protobuf::libprotobuf` provided by `FindProtobuf.cmake` (provided by CMake) may not provide needed dependencies such as Abseil. ### What changes are included in this PR? Try `protobuf-config.cmake` provided by Protobuf before `FindProtobuf.cmake`. `protobuf::libprotobuf` provided by `protobuf-config.cmake` must have needed dependencies. ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. * GitHub Issue: #41333 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- cpp/cmake_modules/FindProtobufAlt.cmake | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/cpp/cmake_modules/FindProtobufAlt.cmake b/cpp/cmake_modules/FindProtobufAlt.cmake index 15fe1b4f27ef7..f343b42f2b762 100644 --- a/cpp/cmake_modules/FindProtobufAlt.cmake +++ b/cpp/cmake_modules/FindProtobufAlt.cmake @@ -28,12 +28,22 @@ endif() if(ProtobufAlt_FIND_QUIETLY) list(APPEND find_package_args QUIET) endif() -find_package(Protobuf ${find_package_args}) -set(ProtobufAlt_FOUND ${Protobuf_FOUND}) +find_package(protobuf CONFIG ${find_package_args}) +set(ProtobufAlt_FOUND ${protobuf_FOUND}) if(ProtobufAlt_FOUND) - set(ProtobufAlt_VERSION ${Protobuf_VERSION}) - set(ProtobufAlt_VERSION_MAJOR ${Protobuf_VERSION_MAJOR}) - set(ProtobufAlt_VERSION_MINOR ${Protobuf_VERSION_MINOR}) - set(ProtobufAlt_VERSION_PATCH ${Protobuf_VERSION_PATCH}) - set(ProtobufAlt_VERSION_TWEEK ${Protobuf_VERSION_TWEEK}) + set(ProtobufAlt_VERSION ${protobuf_VERSION}) + set(ProtobufAlt_VERSION_MAJOR ${protobuf_VERSION_MAJOR}) + set(ProtobufAlt_VERSION_MINOR ${protobuf_VERSION_MINOR}) + set(ProtobufAlt_VERSION_PATCH ${protobuf_VERSION_PATCH}) + set(ProtobufAlt_VERSION_TWEEK ${protobuf_VERSION_TWEEK}) +else() + find_package(Protobuf ${find_package_args}) + set(ProtobufAlt_FOUND ${Protobuf_FOUND}) + if(ProtobufAlt_FOUND) + set(ProtobufAlt_VERSION ${Protobuf_VERSION}) + set(ProtobufAlt_VERSION_MAJOR ${Protobuf_VERSION_MAJOR}) + set(ProtobufAlt_VERSION_MINOR ${Protobuf_VERSION_MINOR}) + set(ProtobufAlt_VERSION_PATCH ${Protobuf_VERSION_PATCH}) + set(ProtobufAlt_VERSION_TWEEK ${Protobuf_VERSION_TWEEK}) + endif() endif() From 6cbdec55b6bbaee3f00a42541bf89998452a3718 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 26 Apr 2024 12:26:22 +0200 Subject: [PATCH 003/261] GH-41282: [Dev] Always prompt next major version on merge script if it exists (#41305) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change When we created the `16.1.0` milestone the merge script started prompting it instead of `17.0.0` we want to default to the next major release. ### What changes are included in this PR? Update archery logic to default to major versions online. ### Are these changes tested? I've tested locally and now it defaults to `17.0.0`: ``` Enter fix version [17.0.0]: ``` ### Are there any user-facing changes? No * GitHub Issue: #41282 Authored-by: Raúl Cumplido Signed-off-by: Raúl Cumplido --- dev/merge_arrow_pr.py | 14 +++------ dev/test_merge_arrow_pr.py | 62 ++++++++++++++++++++++++++++---------- 2 files changed, 51 insertions(+), 25 deletions(-) diff --git a/dev/merge_arrow_pr.py b/dev/merge_arrow_pr.py index 25d3372d8b4d3..344d943fd87e1 100755 --- a/dev/merge_arrow_pr.py +++ b/dev/merge_arrow_pr.py @@ -306,15 +306,11 @@ def version_tuple(x): # Only suggest versions starting with a number, like 0.x but not JS-0.x mainline_versions = all_versions - mainline_non_patch_versions = [] - for v in mainline_versions: - (major, minor, patch) = v.split(".") - if patch == "0": - mainline_non_patch_versions.append(v) - - if len(mainline_versions) > len(mainline_non_patch_versions): - # If there is a non-patch release, suggest that instead - mainline_versions = mainline_non_patch_versions + major_versions = [v for v in mainline_versions if v.endswith('.0.0')] + + if len(mainline_versions) > len(major_versions): + # If there is a future major release, suggest that + mainline_versions = major_versions mainline_versions = [v for v in mainline_versions if f"maint-{v}" not in maintenance_branches] diff --git a/dev/test_merge_arrow_pr.py b/dev/test_merge_arrow_pr.py index 305b08f2830bb..0067c10414c65 100755 --- a/dev/test_merge_arrow_pr.py +++ b/dev/test_merge_arrow_pr.py @@ -26,14 +26,17 @@ FakeIssue = namedtuple('issue', ['fields']) FakeFields = namedtuple('fields', ['status', 'summary', 'assignee', - 'components', 'fixVersions']) + 'components', 'fixVersions', 'milestone']) FakeAssignee = namedtuple('assignee', ['displayName']) FakeStatus = namedtuple('status', ['name']) FakeComponent = namedtuple('component', ['name']) FakeVersion = namedtuple('version', ['name', 'raw']) +FakeMilestone = namedtuple('milestone', ['state']) RAW_VERSION_JSON = [ {'name': 'JS-0.4.0', 'released': False}, + {'name': '1.0.0', 'released': False}, + {'name': '2.0.0', 'released': False}, {'name': '0.9.0', 'released': False}, {'name': '0.10.0', 'released': False}, {'name': '0.8.0', 'released': True}, @@ -50,7 +53,7 @@ status = FakeStatus('In Progress') fields = FakeFields(status, 'issue summary', FakeAssignee('groundhog'), [FakeComponent('C++'), FakeComponent('Format')], - []) + [], FakeMilestone('closed')._asdict()) FAKE_ISSUE_1 = FakeIssue(fields) @@ -92,6 +95,31 @@ def project_versions(self, project): return self._project_versions +class FakeGitHub: + + def __init__(self, issue=None, project_versions=None): + self._issue = issue + self._project_versions = project_versions + + @property + def issue(self): + return self._issue.fields._asdict() + + @property + def current_versions(self): + all_versions = self._project_versions or SOURCE_VERSIONS + return [ + v for v in all_versions if not v.raw.get("released") + ] + ['0.11.0'] + + @property + def current_fix_versions(self): + return 'JS-0.4.0' + + def project_versions(self, project): + return self._project_versions + + class FakeCLI: def __init__(self, responses=()): @@ -115,11 +143,11 @@ def test_jira_fix_versions(): fix_version = merge_arrow_pr.get_candidate_fix_version( issue.current_versions ) - assert fix_version == '0.9.0' + assert fix_version == '1.0.0' def test_jira_fix_versions_filters_maintenance(): - maintenance_branches = ["maint-0.9.0"] + maintenance_branches = ["maint-1.0.0"] jira = FakeJIRA(project_versions=SOURCE_VERSIONS, transitions=TRANSITIONS) @@ -128,13 +156,14 @@ def test_jira_fix_versions_filters_maintenance(): issue.current_versions, maintenance_branches=maintenance_branches ) - assert fix_version == '0.10.0' + assert fix_version == '2.0.0' -def test_jira_no_suggest_patch_release(): +def test_jira_only_suggest_major_release(): versions_json = [ {'name': '0.9.1', 'released': False}, {'name': '0.10.0', 'released': False}, + {'name': '1.0.0', 'released': False}, ] versions = [FakeVersion(raw['name'], raw) for raw in versions_json] @@ -144,7 +173,7 @@ def test_jira_no_suggest_patch_release(): fix_version = merge_arrow_pr.get_candidate_fix_version( issue.current_versions ) - assert fix_version == '0.10.0' + assert fix_version == '1.0.0' def test_jira_parquet_no_suggest_non_cpp(): @@ -153,8 +182,10 @@ def test_jira_parquet_no_suggest_non_cpp(): {'name': 'cpp-1.5.0', 'released': True}, {'name': 'cpp-1.6.0', 'released': False}, {'name': 'cpp-1.7.0', 'released': False}, + {'name': 'cpp-2.0.0', 'released': False}, {'name': '1.11.0', 'released': False}, - {'name': '1.12.0', 'released': False} + {'name': '1.12.0', 'released': False}, + {'name': '2.0.0', 'released': False} ] versions = [FakeVersion(raw['name'], raw) @@ -166,7 +197,7 @@ def test_jira_parquet_no_suggest_non_cpp(): fix_version = merge_arrow_pr.get_candidate_fix_version( issue.current_versions ) - assert fix_version == 'cpp-1.6.0' + assert fix_version == 'cpp-2.0.0' def test_jira_invalid_issue(): @@ -219,13 +250,12 @@ def test_jira_resolve_non_mainline(): def test_jira_resolve_released_fix_version(): # ARROW-5083 - jira = FakeJIRA(issue=FAKE_ISSUE_1, - project_versions=SOURCE_VERSIONS, - transitions=TRANSITIONS) + jira = FakeGitHub(issue=FAKE_ISSUE_1, + project_versions=SOURCE_VERSIONS) - cmd = FakeCLI(responses=['0.7.0']) + cmd = FakeCLI(responses=['1.0.0']) fix_versions_json = merge_arrow_pr.prompt_for_fix_version(cmd, jira) - assert fix_versions_json == "0.7.0" + assert fix_versions_json == "1.0.0" def test_multiple_authors_bad_input(): @@ -256,7 +286,7 @@ def test_multiple_authors_bad_input(): def test_jira_already_resolved(): status = FakeStatus('Resolved') fields = FakeFields(status, 'issue summary', FakeAssignee('groundhog'), - [FakeComponent('Java')], []) + [FakeComponent('Java')], [], None) issue = FakeIssue(fields) jira = FakeJIRA(issue=issue, @@ -287,7 +317,7 @@ def test_no_unset_point_release_fix_version(): fields = FakeFields(status, 'summary', FakeAssignee('someone'), [FakeComponent('Java')], [FakeVersion(v, versions_json[v]) - for v in ['0.17.0', '0.15.1', '0.14.2']]) + for v in ['0.17.0', '0.15.1', '0.14.2']], None) issue = FakeIssue(fields) jira = FakeJIRA( From 2710626b234d5e387a3c63988ca5899c70547dcf Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Fri, 26 Apr 2024 06:26:42 -0700 Subject: [PATCH 004/261] GH-41375: [C#] Move to .NET 8.0 (#41376) ### What changes are included in this PR? Changes to workflow infrastructure and projects to install and target net8.0 instead of net7.0. ### Are these changes tested? Yes ### Are there any user-facing changes? Users will need to install .NET 8 to run tests and examples. No impact on product code. Closes #41375 * GitHub Issue: #41375 Lead-authored-by: Curt Hagenlocher Co-authored-by: Sutou Kouhei Signed-off-by: Curt Hagenlocher --- .env | 2 +- .github/workflows/csharp.yml | 6 +++--- .github/workflows/dev.yml | 2 +- ci/docker/conda-integration.dockerfile | 2 +- ci/docker/ubuntu-22.04-csharp.dockerfile | 2 +- .../FlightAspServerExample/FlightAspServerExample.csproj | 2 +- .../examples/FlightClientExample/FlightClientExample.csproj | 2 +- .../FluentBuilderExample/FluentBuilderExample.csproj | 2 +- .../Apache.Arrow.Benchmarks/Apache.Arrow.Benchmarks.csproj | 2 +- .../Apache.Arrow.Compression.Tests.csproj | 2 +- .../Apache.Arrow.Flight.Sql.Tests.csproj | 2 +- .../Apache.Arrow.Flight.TestWeb.csproj | 2 +- .../Apache.Arrow.Flight.Tests.csproj | 2 +- .../Apache.Arrow.IntegrationTest.csproj | 2 +- csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj | 6 ++++-- dev/archery/archery/integration/tester_csharp.py | 6 +++--- dev/release/verify-release-candidate.sh | 4 ++-- dev/tasks/verify-rc/github.macos.yml | 4 ++-- docs/source/developers/release_verification.rst | 2 +- 19 files changed, 28 insertions(+), 26 deletions(-) diff --git a/.env b/.env index f379ca14cd205..d9f875a4d454e 100644 --- a/.env +++ b/.env @@ -56,7 +56,7 @@ UBUNTU=20.04 CLANG_TOOLS=14 CUDA=11.2.2 DASK=latest -DOTNET=7.0 +DOTNET=8.0 GCC_VERSION="" GO=1.21.8 STATICCHECK=v0.4.7 diff --git a/.github/workflows/csharp.yml b/.github/workflows/csharp.yml index 17ef2de81088f..12f946fe66fc9 100644 --- a/.github/workflows/csharp.yml +++ b/.github/workflows/csharp.yml @@ -46,7 +46,7 @@ jobs: strategy: fail-fast: false matrix: - dotnet: ['7.0.x'] + dotnet: ['8.0.x'] steps: - name: Install C# uses: actions/setup-dotnet@v4 @@ -74,7 +74,7 @@ jobs: strategy: fail-fast: false matrix: - dotnet: ['7.0.x'] + dotnet: ['8.0.x'] steps: - name: Install C# uses: actions/setup-dotnet@v4 @@ -101,7 +101,7 @@ jobs: strategy: fail-fast: false matrix: - dotnet: ['7.0.x'] + dotnet: ['8.0.x'] steps: - name: Install C# uses: actions/setup-dotnet@v4 diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 7c438be6024d6..6111d1d2e5fe3 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -115,7 +115,7 @@ jobs: - name: Install .NET uses: actions/setup-dotnet@4d6c8fcf3c8f7a60068d26b594648e99df24cee3 # v4.0.0 with: - dotnet-version: '7.0.x' + dotnet-version: '8.0.x' - name: Install Dependencies shell: bash run: | diff --git a/ci/docker/conda-integration.dockerfile b/ci/docker/conda-integration.dockerfile index a747ccbc7262f..30b9cd5199fab 100644 --- a/ci/docker/conda-integration.dockerfile +++ b/ci/docker/conda-integration.dockerfile @@ -56,7 +56,7 @@ RUN wget -nv -O - https://dl.google.com/go/go${go}.linux-${arch}.tar.gz | tar -x ENV DOTNET_ROOT=/opt/dotnet \ PATH=/opt/dotnet:$PATH -RUN curl -sSL https://dot.net/v1/dotnet-install.sh | bash /dev/stdin -Channel 7.0 -InstallDir /opt/dotnet +RUN curl -sSL https://dot.net/v1/dotnet-install.sh | bash /dev/stdin -Channel 8.0 -InstallDir /opt/dotnet ENV ARROW_ACERO=OFF \ ARROW_AZURE=OFF \ diff --git a/ci/docker/ubuntu-22.04-csharp.dockerfile b/ci/docker/ubuntu-22.04-csharp.dockerfile index aebbd8fab74e9..4d77ba060b877 100644 --- a/ci/docker/ubuntu-22.04-csharp.dockerfile +++ b/ci/docker/ubuntu-22.04-csharp.dockerfile @@ -16,7 +16,7 @@ # under the License. ARG arch=amd64 -ARG dotnet=7.0 +ARG dotnet=8.0 ARG platform=jammy FROM mcr.microsoft.com/dotnet/sdk:${dotnet}-${platform}-${arch} diff --git a/csharp/examples/FlightAspServerExample/FlightAspServerExample.csproj b/csharp/examples/FlightAspServerExample/FlightAspServerExample.csproj index 98e8bb324c727..79312520ba8c0 100644 --- a/csharp/examples/FlightAspServerExample/FlightAspServerExample.csproj +++ b/csharp/examples/FlightAspServerExample/FlightAspServerExample.csproj @@ -20,7 +20,7 @@ - net7.0 + net8.0 enable enable 10 diff --git a/csharp/examples/FlightClientExample/FlightClientExample.csproj b/csharp/examples/FlightClientExample/FlightClientExample.csproj index 228c5cf4ea44c..ce0ec83359769 100644 --- a/csharp/examples/FlightClientExample/FlightClientExample.csproj +++ b/csharp/examples/FlightClientExample/FlightClientExample.csproj @@ -21,7 +21,7 @@ Exe - net7.0 + net8.0 diff --git a/csharp/examples/FluentBuilderExample/FluentBuilderExample.csproj b/csharp/examples/FluentBuilderExample/FluentBuilderExample.csproj index a7bd5392f007d..b1d76d4db00e0 100644 --- a/csharp/examples/FluentBuilderExample/FluentBuilderExample.csproj +++ b/csharp/examples/FluentBuilderExample/FluentBuilderExample.csproj @@ -2,7 +2,7 @@ Exe - net7.0 + net8.0 diff --git a/csharp/test/Apache.Arrow.Benchmarks/Apache.Arrow.Benchmarks.csproj b/csharp/test/Apache.Arrow.Benchmarks/Apache.Arrow.Benchmarks.csproj index d44b7488e3b17..f735f01b022d0 100644 --- a/csharp/test/Apache.Arrow.Benchmarks/Apache.Arrow.Benchmarks.csproj +++ b/csharp/test/Apache.Arrow.Benchmarks/Apache.Arrow.Benchmarks.csproj @@ -2,7 +2,7 @@ Exe - net7.0 + net8.0 diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj index 5cc0d303e881e..b386ccf79c12c 100644 --- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj +++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj @@ -1,7 +1,7 @@ - net7.0 + net8.0 false diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj index 5b7c10f35bed0..ae6f9f1e69667 100644 --- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj +++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj @@ -1,7 +1,7 @@ - net7.0 + net8.0 false diff --git a/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj b/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj index d7a2042a4581a..bd6425e7ed99b 100644 --- a/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj +++ b/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj @@ -1,7 +1,7 @@ - net7.0 + net8.0 diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj index 050d0f452cc4e..ed158ca8656d3 100644 --- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj +++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj @@ -1,7 +1,7 @@ - net7.0 + net8.0 false diff --git a/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj b/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj index e77f329bf2a15..7f226fd08818f 100644 --- a/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj +++ b/csharp/test/Apache.Arrow.IntegrationTest/Apache.Arrow.IntegrationTest.csproj @@ -4,7 +4,7 @@ Exe true - net7.0 + net8.0 diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj index 92f6e2d662f38..06fb44e0a0e88 100644 --- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj +++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj @@ -4,13 +4,15 @@ true true + + true - net7.0;net472;net462 + net8.0;net472;net462 - net7.0 + net8.0 diff --git a/dev/archery/archery/integration/tester_csharp.py b/dev/archery/archery/integration/tester_csharp.py index 9aab5b0b28ef9..02ced0701deaf 100644 --- a/dev/archery/archery/integration/tester_csharp.py +++ b/dev/archery/archery/integration/tester_csharp.py @@ -28,7 +28,7 @@ _EXE_PATH = os.path.join(_ARTIFACTS_PATH, "Apache.Arrow.IntegrationTest", - "Debug/net7.0/Apache.Arrow.IntegrationTest", + "Debug/net8.0/Apache.Arrow.IntegrationTest", ) _clr_loaded = False @@ -44,10 +44,10 @@ def _load_clr(): import clr clr.AddReference( f"{_ARTIFACTS_PATH}/Apache.Arrow.IntegrationTest/" - f"Debug/net7.0/Apache.Arrow.IntegrationTest.dll") + f"Debug/net8.0/Apache.Arrow.IntegrationTest.dll") clr.AddReference( f"{_ARTIFACTS_PATH}/Apache.Arrow.Tests/" - f"Debug/net7.0/Apache.Arrow.Tests.dll") + f"Debug/net8.0/Apache.Arrow.Tests.dll") from Apache.Arrow.IntegrationTest import CDataInterface CDataInterface.Initialize() diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 77ea01e3eec04..95be4800f7ffd 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -347,7 +347,7 @@ install_csharp() { show_info "Ensuring that C# is installed..." - if dotnet --version | grep 7\.0 > /dev/null 2>&1; then + if dotnet --version | grep 8\.0 > /dev/null 2>&1; then local csharp_bin=$(dirname $(which dotnet)) show_info "Found C# at $(which csharp) (.NET $(dotnet --version))" else @@ -355,7 +355,7 @@ install_csharp() { show_info "dotnet found but it is the wrong version and will be ignored." fi local csharp_bin=${ARROW_TMPDIR}/csharp/bin - local dotnet_version=7.0.102 + local dotnet_version=8.0.204 local dotnet_platform= case "$(uname)" in Linux) diff --git a/dev/tasks/verify-rc/github.macos.yml b/dev/tasks/verify-rc/github.macos.yml index 8963954dba49d..4bc3fff71b64a 100644 --- a/dev/tasks/verify-rc/github.macos.yml +++ b/dev/tasks/verify-rc/github.macos.yml @@ -51,9 +51,9 @@ jobs: distribution: 'temurin' java-version: '11' - - uses: actions/setup-dotnet@v2 + - uses: actions/setup-dotnet@v4 with: - dotnet-version: '7.0.x' + dotnet-version: '8.0.x' - uses: actions/setup-node@v4 with: diff --git a/docs/source/developers/release_verification.rst b/docs/source/developers/release_verification.rst index ec474a5729b64..8c301b44a3c42 100644 --- a/docs/source/developers/release_verification.rst +++ b/docs/source/developers/release_verification.rst @@ -152,7 +152,7 @@ As an example: * NVIDIA CUDA Build cuda_11.5.r11.5/compiler.30672275_0 * openjdk version "17.0.9" 2023-10-17 * ruby 3.0.2p107 (2021-07-07 revision 0db68f0233) [x86_64-linux-gnu] - * dotnet 7.0.115 + * dotnet 8.0.204 * Ubuntu 22.04 LTS If there were some issues during verification please report them on the From 0f56339ee803858b597418aefdabb993def19f48 Mon Sep 17 00:00:00 2001 From: Ian Cook Date: Fri, 26 Apr 2024 11:33:34 -0400 Subject: [PATCH 005/261] GH-41386: [Docs] Document Arrow Go Avro read support (#41393) Updates the Implementation Status docs page to reflect that the Go implementation can read Avro files. For the Rust implementation, I inferred from [this PR](https://github.com/apache/arrow-rs/issues/4886) and [this comment](https://github.com/apache/arrow-rs/issues/5562#issuecomment-2024885348) that we should hold off on indicating that the Rust implementation can read Avro files. * GitHub Issue: #41386 --- docs/source/status.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/status.rst b/docs/source/status.rst index f4672d6b4bc55..266381175608a 100644 --- a/docs/source/status.rst +++ b/docs/source/status.rst @@ -353,7 +353,7 @@ Third-Party Data Formats | Format | C++ | Java | Go | JS | C# | Rust | Julia | Swift | | | | | | | | | | | +=============================+=========+=========+=======+====+=======+=======+=======+=======+ -| Avro | | R | | | | | | | +| Avro | | R | R | | | | | | +-----------------------------+---------+---------+-------+----+-------+-------+-------+-------+ | CSV | R/W | R (2) | R/W | | | R/W | R/W | | +-----------------------------+---------+---------+-------+----+-------+-------+-------+-------+ From 15986ae5ffef2f274c04cf0d5eec2155fe6523a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 26 Apr 2024 19:13:39 +0200 Subject: [PATCH 006/261] GH-41390: [CI] Use setup-python GitHub action on csharp macOS job (#41392) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change The new macos-latest GH runner has changed and jobs are currently failing. ### What changes are included in this PR? Install python with setup-python. ### Are these changes tested? Will be on CI ### Are there any user-facing changes? No * GitHub Issue: #41390 Authored-by: Raúl Cumplido Signed-off-by: Raúl Cumplido --- .github/workflows/csharp.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/csharp.yml b/.github/workflows/csharp.yml index 12f946fe66fc9..7ae3606a44812 100644 --- a/.github/workflows/csharp.yml +++ b/.github/workflows/csharp.yml @@ -94,7 +94,7 @@ jobs: run: ci/scripts/csharp_test.sh $(pwd) macos: - name: AMD64 macOS 11 C# ${{ matrix.dotnet }} + name: ARM64 macOS 14 C# ${{ matrix.dotnet }} runs-on: macos-latest if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 15 @@ -107,6 +107,10 @@ jobs: uses: actions/setup-dotnet@v4 with: dotnet-version: ${{ matrix.dotnet }} + - name: Setup Python + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 + with: + python-version: 3.12 - name: Checkout Arrow uses: actions/checkout@v4 with: From ea314a3f8d9d4446836aa999b66659c07421f7a4 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Fri, 26 Apr 2024 18:32:32 -0400 Subject: [PATCH 007/261] GH-41358: [R] Support join "na_matches" argument (#41372) ### Rationale for this change Noticed in #41350, I made #41358 to implement this in C++, but it turns out the option was there, just buried a bit. ### What changes are included in this PR? `na_matches` is mapped through to the `key_cmp` field in `HashJoinNodeOptions`. Acero supports having a different value for this for each of the join keys, but dplyr does not, so I kept it constant for all key columns to match the dplyr behavior. ### Are these changes tested? Yes ### Are there any user-facing changes? Yes * GitHub Issue: #41358 --- r/NEWS.md | 1 + r/R/arrow-package.R | 12 +++++------ r/R/arrowExports.R | 4 ++-- r/R/dplyr-funcs-doc.R | 12 +++++------ r/R/dplyr-join.R | 8 +++++--- r/R/query-engine.R | 8 +++++--- r/man/acero.Rd | 12 +++++------ r/src/arrowExports.cpp | 11 +++++----- r/src/compute-exec.cpp | 18 ++++++++++++----- r/tests/testthat/test-dplyr-join.R | 32 ++++++++++++++++++++++++++++++ 10 files changed, 82 insertions(+), 36 deletions(-) diff --git a/r/NEWS.md b/r/NEWS.md index 4ed9f28a28436..05f934dac68f3 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -21,6 +21,7 @@ * R functions that users write that use functions that Arrow supports in dataset queries now can be used in queries too. Previously, only functions that used arithmetic operators worked. For example, `time_hours <- function(mins) mins / 60` worked, but `time_hours_rounded <- function(mins) round(mins / 60)` did not; now both work. These are automatic translations rather than true user-defined functions (UDFs); for UDFs, see `register_scalar_function()`. (#41223) * `summarize()` supports more complex expressions, and correctly handles cases where column names are reused in expressions. +* The `na_matches` argument to the `dplyr::*_join()` functions is now supported. This argument controls whether `NA` values are considered equal when joining. (#41358) # arrow 16.0.0 diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index f6977e626276b..7087a40c4903a 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -66,12 +66,12 @@ supported_dplyr_methods <- list( compute = NULL, collapse = NULL, distinct = "`.keep_all = TRUE` not supported", - left_join = "the `copy` and `na_matches` arguments are ignored", - right_join = "the `copy` and `na_matches` arguments are ignored", - inner_join = "the `copy` and `na_matches` arguments are ignored", - full_join = "the `copy` and `na_matches` arguments are ignored", - semi_join = "the `copy` and `na_matches` arguments are ignored", - anti_join = "the `copy` and `na_matches` arguments are ignored", + left_join = "the `copy` argument is ignored", + right_join = "the `copy` argument is ignored", + inner_join = "the `copy` argument is ignored", + full_join = "the `copy` argument is ignored", + semi_join = "the `copy` argument is ignored", + anti_join = "the `copy` argument is ignored", count = NULL, tally = NULL, rename_with = NULL, diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index 752d3a266b26a..62e2182ffcd52 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -484,8 +484,8 @@ ExecNode_Aggregate <- function(input, options, key_names) { .Call(`_arrow_ExecNode_Aggregate`, input, options, key_names) } -ExecNode_Join <- function(input, join_type, right_data, left_keys, right_keys, left_output, right_output, output_suffix_for_left, output_suffix_for_right) { - .Call(`_arrow_ExecNode_Join`, input, join_type, right_data, left_keys, right_keys, left_output, right_output, output_suffix_for_left, output_suffix_for_right) +ExecNode_Join <- function(input, join_type, right_data, left_keys, right_keys, left_output, right_output, output_suffix_for_left, output_suffix_for_right, na_matches) { + .Call(`_arrow_ExecNode_Join`, input, join_type, right_data, left_keys, right_keys, left_output, right_output, output_suffix_for_left, output_suffix_for_right, na_matches) } ExecNode_Union <- function(input, right_data) { diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R index 2042f800142b7..fda77bca83fc2 100644 --- a/r/R/dplyr-funcs-doc.R +++ b/r/R/dplyr-funcs-doc.R @@ -36,7 +36,7 @@ #' which returns an `arrow` [Table], or `collect()`, which pulls the resulting #' Table into an R `tibble`. #' -#' * [`anti_join()`][dplyr::anti_join()]: the `copy` and `na_matches` arguments are ignored +#' * [`anti_join()`][dplyr::anti_join()]: the `copy` argument is ignored #' * [`arrange()`][dplyr::arrange()] #' * [`collapse()`][dplyr::collapse()] #' * [`collect()`][dplyr::collect()] @@ -45,22 +45,22 @@ #' * [`distinct()`][dplyr::distinct()]: `.keep_all = TRUE` not supported #' * [`explain()`][dplyr::explain()] #' * [`filter()`][dplyr::filter()] -#' * [`full_join()`][dplyr::full_join()]: the `copy` and `na_matches` arguments are ignored +#' * [`full_join()`][dplyr::full_join()]: the `copy` argument is ignored #' * [`glimpse()`][dplyr::glimpse()] #' * [`group_by()`][dplyr::group_by()] #' * [`group_by_drop_default()`][dplyr::group_by_drop_default()] #' * [`group_vars()`][dplyr::group_vars()] #' * [`groups()`][dplyr::groups()] -#' * [`inner_join()`][dplyr::inner_join()]: the `copy` and `na_matches` arguments are ignored -#' * [`left_join()`][dplyr::left_join()]: the `copy` and `na_matches` arguments are ignored +#' * [`inner_join()`][dplyr::inner_join()]: the `copy` argument is ignored +#' * [`left_join()`][dplyr::left_join()]: the `copy` argument is ignored #' * [`mutate()`][dplyr::mutate()]: window functions (e.g. things that require aggregation within groups) not currently supported #' * [`pull()`][dplyr::pull()]: the `name` argument is not supported; returns an R vector by default but this behavior is deprecated and will return an Arrow [ChunkedArray] in a future release. Provide `as_vector = TRUE/FALSE` to control this behavior, or set `options(arrow.pull_as_vector)` globally. #' * [`relocate()`][dplyr::relocate()] #' * [`rename()`][dplyr::rename()] #' * [`rename_with()`][dplyr::rename_with()] -#' * [`right_join()`][dplyr::right_join()]: the `copy` and `na_matches` arguments are ignored +#' * [`right_join()`][dplyr::right_join()]: the `copy` argument is ignored #' * [`select()`][dplyr::select()] -#' * [`semi_join()`][dplyr::semi_join()]: the `copy` and `na_matches` arguments are ignored +#' * [`semi_join()`][dplyr::semi_join()]: the `copy` argument is ignored #' * [`show_query()`][dplyr::show_query()] #' * [`slice_head()`][dplyr::slice_head()]: slicing within groups not supported; Arrow datasets do not have row order, so head is non-deterministic; `prop` only supported on queries where `nrow()` is knowable without evaluating #' * [`slice_max()`][dplyr::slice_max()]: slicing within groups not supported; `with_ties = TRUE` (dplyr default) is not supported; `prop` only supported on queries where `nrow()` is knowable without evaluating diff --git a/r/R/dplyr-join.R b/r/R/dplyr-join.R index 39237f574bd28..e76e041a54277 100644 --- a/r/R/dplyr-join.R +++ b/r/R/dplyr-join.R @@ -25,14 +25,15 @@ do_join <- function(x, suffix = c(".x", ".y"), ..., keep = FALSE, - na_matches, + na_matches = c("na", "never"), join_type) { # TODO: handle `copy` arg: ignore? - # TODO: handle `na_matches` arg x <- as_adq(x) y <- as_adq(y) by <- handle_join_by(by, x, y) + na_matches <- match.arg(na_matches) + # For outer joins, we need to output the join keys on both sides so we # can coalesce them afterwards. left_output <- if (!keep && join_type == "RIGHT_OUTER") { @@ -54,7 +55,8 @@ do_join <- function(x, left_output = left_output, right_output = right_output, suffix = suffix, - keep = keep + keep = keep, + na_matches = na_matches == "na" ) collapse.arrow_dplyr_query(x) } diff --git a/r/R/query-engine.R b/r/R/query-engine.R index 0f8a84f9b867e..fb48d790fd36e 100644 --- a/r/R/query-engine.R +++ b/r/R/query-engine.R @@ -148,7 +148,8 @@ ExecPlan <- R6Class("ExecPlan", left_output = .data$join$left_output, right_output = .data$join$right_output, left_suffix = .data$join$suffix[[1]], - right_suffix = .data$join$suffix[[2]] + right_suffix = .data$join$suffix[[2]], + na_matches = .data$join$na_matches ) } @@ -307,7 +308,7 @@ ExecNode <- R6Class("ExecNode", out$extras$source_schema$metadata[["r"]]$attributes <- NULL out }, - Join = function(type, right_node, by, left_output, right_output, left_suffix, right_suffix) { + Join = function(type, right_node, by, left_output, right_output, left_suffix, right_suffix, na_matches = TRUE) { self$preserve_extras( ExecNode_Join( self, @@ -318,7 +319,8 @@ ExecNode <- R6Class("ExecNode", left_output = left_output, right_output = right_output, output_suffix_for_left = left_suffix, - output_suffix_for_right = right_suffix + output_suffix_for_right = right_suffix, + na_matches = na_matches ) ) }, diff --git a/r/man/acero.Rd b/r/man/acero.Rd index 365795d9fc65c..ca51ef56334eb 100644 --- a/r/man/acero.Rd +++ b/r/man/acero.Rd @@ -23,7 +23,7 @@ the query on the data. To run the query, call either \code{compute()}, which returns an \code{arrow} \link{Table}, or \code{collect()}, which pulls the resulting Table into an R \code{tibble}. \itemize{ -\item \code{\link[dplyr:filter-joins]{anti_join()}}: the \code{copy} and \code{na_matches} arguments are ignored +\item \code{\link[dplyr:filter-joins]{anti_join()}}: the \code{copy} argument is ignored \item \code{\link[dplyr:arrange]{arrange()}} \item \code{\link[dplyr:compute]{collapse()}} \item \code{\link[dplyr:compute]{collect()}} @@ -32,22 +32,22 @@ Table into an R \code{tibble}. \item \code{\link[dplyr:distinct]{distinct()}}: \code{.keep_all = TRUE} not supported \item \code{\link[dplyr:explain]{explain()}} \item \code{\link[dplyr:filter]{filter()}} -\item \code{\link[dplyr:mutate-joins]{full_join()}}: the \code{copy} and \code{na_matches} arguments are ignored +\item \code{\link[dplyr:mutate-joins]{full_join()}}: the \code{copy} argument is ignored \item \code{\link[dplyr:glimpse]{glimpse()}} \item \code{\link[dplyr:group_by]{group_by()}} \item \code{\link[dplyr:group_by_drop_default]{group_by_drop_default()}} \item \code{\link[dplyr:group_data]{group_vars()}} \item \code{\link[dplyr:group_data]{groups()}} -\item \code{\link[dplyr:mutate-joins]{inner_join()}}: the \code{copy} and \code{na_matches} arguments are ignored -\item \code{\link[dplyr:mutate-joins]{left_join()}}: the \code{copy} and \code{na_matches} arguments are ignored +\item \code{\link[dplyr:mutate-joins]{inner_join()}}: the \code{copy} argument is ignored +\item \code{\link[dplyr:mutate-joins]{left_join()}}: the \code{copy} argument is ignored \item \code{\link[dplyr:mutate]{mutate()}}: window functions (e.g. things that require aggregation within groups) not currently supported \item \code{\link[dplyr:pull]{pull()}}: the \code{name} argument is not supported; returns an R vector by default but this behavior is deprecated and will return an Arrow \link{ChunkedArray} in a future release. Provide \code{as_vector = TRUE/FALSE} to control this behavior, or set \code{options(arrow.pull_as_vector)} globally. \item \code{\link[dplyr:relocate]{relocate()}} \item \code{\link[dplyr:rename]{rename()}} \item \code{\link[dplyr:rename]{rename_with()}} -\item \code{\link[dplyr:mutate-joins]{right_join()}}: the \code{copy} and \code{na_matches} arguments are ignored +\item \code{\link[dplyr:mutate-joins]{right_join()}}: the \code{copy} argument is ignored \item \code{\link[dplyr:select]{select()}} -\item \code{\link[dplyr:filter-joins]{semi_join()}}: the \code{copy} and \code{na_matches} arguments are ignored +\item \code{\link[dplyr:filter-joins]{semi_join()}}: the \code{copy} argument is ignored \item \code{\link[dplyr:explain]{show_query()}} \item \code{\link[dplyr:slice]{slice_head()}}: slicing within groups not supported; Arrow datasets do not have row order, so head is non-deterministic; \code{prop} only supported on queries where \code{nrow()} is knowable without evaluating \item \code{\link[dplyr:slice]{slice_max()}}: slicing within groups not supported; \code{with_ties = TRUE} (dplyr default) is not supported; \code{prop} only supported on queries where \code{nrow()} is knowable without evaluating diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index a4c4b614d6d75..d5aec50219e0b 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -1163,8 +1163,8 @@ extern "C" SEXP _arrow_ExecNode_Aggregate(SEXP input_sexp, SEXP options_sexp, SE // compute-exec.cpp #if defined(ARROW_R_WITH_ACERO) -std::shared_ptr ExecNode_Join(const std::shared_ptr& input, acero::JoinType join_type, const std::shared_ptr& right_data, std::vector left_keys, std::vector right_keys, std::vector left_output, std::vector right_output, std::string output_suffix_for_left, std::string output_suffix_for_right); -extern "C" SEXP _arrow_ExecNode_Join(SEXP input_sexp, SEXP join_type_sexp, SEXP right_data_sexp, SEXP left_keys_sexp, SEXP right_keys_sexp, SEXP left_output_sexp, SEXP right_output_sexp, SEXP output_suffix_for_left_sexp, SEXP output_suffix_for_right_sexp){ +std::shared_ptr ExecNode_Join(const std::shared_ptr& input, acero::JoinType join_type, const std::shared_ptr& right_data, std::vector left_keys, std::vector right_keys, std::vector left_output, std::vector right_output, std::string output_suffix_for_left, std::string output_suffix_for_right, bool na_matches); +extern "C" SEXP _arrow_ExecNode_Join(SEXP input_sexp, SEXP join_type_sexp, SEXP right_data_sexp, SEXP left_keys_sexp, SEXP right_keys_sexp, SEXP left_output_sexp, SEXP right_output_sexp, SEXP output_suffix_for_left_sexp, SEXP output_suffix_for_right_sexp, SEXP na_matches_sexp){ BEGIN_CPP11 arrow::r::Input&>::type input(input_sexp); arrow::r::Input::type join_type(join_type_sexp); @@ -1175,11 +1175,12 @@ BEGIN_CPP11 arrow::r::Input>::type right_output(right_output_sexp); arrow::r::Input::type output_suffix_for_left(output_suffix_for_left_sexp); arrow::r::Input::type output_suffix_for_right(output_suffix_for_right_sexp); - return cpp11::as_sexp(ExecNode_Join(input, join_type, right_data, left_keys, right_keys, left_output, right_output, output_suffix_for_left, output_suffix_for_right)); + arrow::r::Input::type na_matches(na_matches_sexp); + return cpp11::as_sexp(ExecNode_Join(input, join_type, right_data, left_keys, right_keys, left_output, right_output, output_suffix_for_left, output_suffix_for_right, na_matches)); END_CPP11 } #else -extern "C" SEXP _arrow_ExecNode_Join(SEXP input_sexp, SEXP join_type_sexp, SEXP right_data_sexp, SEXP left_keys_sexp, SEXP right_keys_sexp, SEXP left_output_sexp, SEXP right_output_sexp, SEXP output_suffix_for_left_sexp, SEXP output_suffix_for_right_sexp){ +extern "C" SEXP _arrow_ExecNode_Join(SEXP input_sexp, SEXP join_type_sexp, SEXP right_data_sexp, SEXP left_keys_sexp, SEXP right_keys_sexp, SEXP left_output_sexp, SEXP right_output_sexp, SEXP output_suffix_for_left_sexp, SEXP output_suffix_for_right_sexp, SEXP na_matches_sexp){ Rf_error("Cannot call ExecNode_Join(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); } #endif @@ -5790,7 +5791,7 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_ExecNode_Filter", (DL_FUNC) &_arrow_ExecNode_Filter, 2}, { "_arrow_ExecNode_Project", (DL_FUNC) &_arrow_ExecNode_Project, 3}, { "_arrow_ExecNode_Aggregate", (DL_FUNC) &_arrow_ExecNode_Aggregate, 3}, - { "_arrow_ExecNode_Join", (DL_FUNC) &_arrow_ExecNode_Join, 9}, + { "_arrow_ExecNode_Join", (DL_FUNC) &_arrow_ExecNode_Join, 10}, { "_arrow_ExecNode_Union", (DL_FUNC) &_arrow_ExecNode_Union, 2}, { "_arrow_ExecNode_Fetch", (DL_FUNC) &_arrow_ExecNode_Fetch, 3}, { "_arrow_ExecNode_OrderBy", (DL_FUNC) &_arrow_ExecNode_OrderBy, 2}, diff --git a/r/src/compute-exec.cpp b/r/src/compute-exec.cpp index e0b3c62c47d7f..d0c50315c299f 100644 --- a/r/src/compute-exec.cpp +++ b/r/src/compute-exec.cpp @@ -411,10 +411,17 @@ std::shared_ptr ExecNode_Join( const std::shared_ptr& right_data, std::vector left_keys, std::vector right_keys, std::vector left_output, std::vector right_output, - std::string output_suffix_for_left, std::string output_suffix_for_right) { + std::string output_suffix_for_left, std::string output_suffix_for_right, + bool na_matches) { std::vector left_refs, right_refs, left_out_refs, right_out_refs; + std::vector key_cmps; for (auto&& name : left_keys) { left_refs.emplace_back(std::move(name)); + // Populate key_cmps in this loop, one for each key + // Note that Acero supports having different values for each key, but dplyr + // only supports one value for all keys, so we're only going to support that + // for now. + key_cmps.emplace_back(na_matches ? acero::JoinKeyCmp::IS : acero::JoinKeyCmp::EQ); } for (auto&& name : right_keys) { right_refs.emplace_back(std::move(name)); @@ -434,10 +441,11 @@ std::shared_ptr ExecNode_Join( return MakeExecNodeOrStop( "hashjoin", input->plan(), {input.get(), right_data.get()}, - acero::HashJoinNodeOptions{ - join_type, std::move(left_refs), std::move(right_refs), - std::move(left_out_refs), std::move(right_out_refs), compute::literal(true), - std::move(output_suffix_for_left), std::move(output_suffix_for_right)}); + acero::HashJoinNodeOptions{join_type, std::move(left_refs), std::move(right_refs), + std::move(left_out_refs), std::move(right_out_refs), + std::move(key_cmps), compute::literal(true), + std::move(output_suffix_for_left), + std::move(output_suffix_for_right)}); } // [[acero::export]] diff --git a/r/tests/testthat/test-dplyr-join.R b/r/tests/testthat/test-dplyr-join.R index e3e1e98cfca15..9a1c8b7b80fea 100644 --- a/r/tests/testthat/test-dplyr-join.R +++ b/r/tests/testthat/test-dplyr-join.R @@ -441,3 +441,35 @@ test_that("full joins handle keep", { small_dataset_df ) }) + +left <- tibble::tibble( + x = c(1, NA, 3), +) +right <- tibble::tibble( + x = c(1, NA, 3), + y = c("a", "b", "c") +) +na_matches_na <- right +na_matches_never <- tibble::tibble( + x = c(1, NA, 3), + y = c("a", NA, "c") +) +test_that("na_matches argument to join: na (default)", { + expect_equal( + arrow_table(left) %>% + left_join(right, by = "x", na_matches = "na") %>% + arrange(x) %>% + collect(), + na_matches_na %>% arrange(x) + ) +}) + +test_that("na_matches argument to join: never", { + expect_equal( + arrow_table(left) %>% + left_join(right, by = "x", na_matches = "never") %>% + arrange(x) %>% + collect(), + na_matches_never %>% arrange(x) + ) +}) From 858054bec01a9b9d820107ca0ec45865385a5e89 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Sat, 27 Apr 2024 11:37:29 -0500 Subject: [PATCH 008/261] MINOR: [Docs] Correct a small typo in archery docs (#41412) ### Rationale for this change The name is archery not archer ### What changes are included in this PR? `s/archer /archery/` ### Are these changes tested? No, docs only ### Are there any user-facing changes? Yes, to the docs Authored-by: Jonathan Keane Signed-off-by: AlenkaF --- dev/archery/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/archery/README.md b/dev/archery/README.md index 9991e7402d832..0b9d5c743d122 100644 --- a/dev/archery/README.md +++ b/dev/archery/README.md @@ -23,7 +23,7 @@ Archery is documented on the Arrow website: * [Daily development using Archery](https://arrow.apache.org/docs/developers/continuous_integration/archery.html) * [Using Archery and Crossbow](https://arrow.apache.org/docs/developers/continuous_integration/crossbow.html) -* [Using Archer and Docker](https://arrow.apache.org/docs/developers/continuous_integration/docker.html) +* [Using Archery and Docker](https://arrow.apache.org/docs/developers/continuous_integration/docker.html) # Installing Archery From 5ee70ee1bc9c8c9f6ede63ea5e6c52e04446fe08 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Sat, 27 Apr 2024 15:42:30 -0400 Subject: [PATCH 009/261] GH-41367: [C++] Replace [[maybe_unused]] with Arrow macro (#41359) ### Rationale for this change This is a follow up to https://github.com/apache/arrow/pull/41111 which was created as an issue in #41367 ### What changes are included in this PR? Replace [[maybe_unused]] with Arrow macro ### Are these changes tested? Builds cleanly ### Are there any user-facing changes? No * GitHub Issue: #41367 Authored-by: Will Ayd Signed-off-by: Sutou Kouhei --- cpp/apidoc/Doxyfile | 11 ++++++----- cpp/cmake_modules/SetupCxxFlags.cmake | 1 + cpp/src/arrow/array/builder_base.h | 6 +++--- cpp/src/arrow/array/builder_nested.h | 2 +- cpp/src/arrow/array/builder_primitive.h | 4 ++-- cpp/src/arrow/device.h | 6 +++--- cpp/src/arrow/type.h | 6 +++--- cpp/src/arrow/util/macros.h | 4 ++++ 8 files changed, 23 insertions(+), 17 deletions(-) diff --git a/cpp/apidoc/Doxyfile b/cpp/apidoc/Doxyfile index e19c933cd454f..5be93032c00d9 100644 --- a/cpp/apidoc/Doxyfile +++ b/cpp/apidoc/Doxyfile @@ -2168,16 +2168,17 @@ INCLUDE_FILE_PATTERNS = PREDEFINED = __attribute__(x)= \ __declspec(x)= \ - PARQUET_EXPORT= \ - GANDIVA_EXPORT= \ - ARROW_EXPORT= \ ARROW_ACERO_EXPORT= \ + ARROW_ARG_UNUSED(x)=x \ + ARROW_DEPRECATED(x)= \ ARROW_DS_EXPORT= \ ARROW_ENGINE_EXPORT= \ + ARROW_EXPORT= \ + ARROW_EXTERN_TEMPLATE= \ ARROW_FLIGHT_EXPORT= \ ARROW_FLIGHT_SQL_EXPORT= \ - ARROW_EXTERN_TEMPLATE= \ - ARROW_DEPRECATED(x)= + GANDIVA_EXPORT= \ + PARQUET_EXPORT= # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index d56609c123968..ea357b47794ce 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -314,6 +314,7 @@ if("${BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wextra") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wdocumentation") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -DARROW_WARN_DOCUMENTATION") if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # size_t is 32 bit in Emscripten wasm32 - ignore conversion errors set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-shorten-64-to-32") diff --git a/cpp/src/arrow/array/builder_base.h b/cpp/src/arrow/array/builder_base.h index e6c0b2d2387f2..3a85318735f80 100644 --- a/cpp/src/arrow/array/builder_base.h +++ b/cpp/src/arrow/array/builder_base.h @@ -175,9 +175,9 @@ class ARROW_EXPORT ArrayBuilder { /// \brief Append a range of values from an array. /// /// The given array must be the same type as the builder. - virtual Status AppendArraySlice([[maybe_unused]] const ArraySpan& array, - [[maybe_unused]] int64_t offset, - [[maybe_unused]] int64_t length) { + virtual Status AppendArraySlice(const ArraySpan& ARROW_ARG_UNUSED(array), + int64_t ARROW_ARG_UNUSED(offset), + int64_t ARROW_ARG_UNUSED(length)) { return Status::NotImplemented("AppendArraySlice for builder for ", *type()); } diff --git a/cpp/src/arrow/array/builder_nested.h b/cpp/src/arrow/array/builder_nested.h index 9f7b0fcdbce07..6089cf04d421f 100644 --- a/cpp/src/arrow/array/builder_nested.h +++ b/cpp/src/arrow/array/builder_nested.h @@ -248,7 +248,7 @@ class ARROW_EXPORT VarLengthListLikeBuilder : public ArrayBuilder { /// \brief Append dimensions for a single list slot. /// /// ListViewBuilder overrides this to also append the size. - virtual void UnsafeAppendDimensions(int64_t offset, [[maybe_unused]] int64_t size) { + virtual void UnsafeAppendDimensions(int64_t offset, int64_t ARROW_ARG_UNUSED(size)) { offsets_builder_.UnsafeAppend(static_cast(offset)); } diff --git a/cpp/src/arrow/array/builder_primitive.h b/cpp/src/arrow/array/builder_primitive.h index db8d2cbaabb61..de7af1b46bdee 100644 --- a/cpp/src/arrow/array/builder_primitive.h +++ b/cpp/src/arrow/array/builder_primitive.h @@ -32,10 +32,10 @@ namespace arrow { class ARROW_EXPORT NullBuilder : public ArrayBuilder { public: explicit NullBuilder(MemoryPool* pool = default_memory_pool(), - [[maybe_unused]] int64_t alignment = kDefaultBufferAlignment) + int64_t ARROW_ARG_UNUSED(alignment) = kDefaultBufferAlignment) : ArrayBuilder(pool) {} - explicit NullBuilder([[maybe_unused]] const std::shared_ptr& type, + explicit NullBuilder(const std::shared_ptr& ARROW_ARG_UNUSED(type), MemoryPool* pool = default_memory_pool(), int64_t alignment = kDefaultBufferAlignment) : NullBuilder(pool, alignment) {} diff --git a/cpp/src/arrow/device.h b/cpp/src/arrow/device.h index 3003bad7c459c..a591167ef9a45 100644 --- a/cpp/src/arrow/device.h +++ b/cpp/src/arrow/device.h @@ -140,7 +140,7 @@ class ARROW_EXPORT Device : public std::enable_shared_from_this, /// derived from Device::Stream to allow for stream ordered events /// and memory allocations. virtual Result> MakeStream( - [[maybe_unused]] unsigned int flags) { + unsigned int ARROW_ARG_UNUSED(flags)) { return NULLPTR; } @@ -151,8 +151,8 @@ class ARROW_EXPORT Device : public std::enable_shared_from_this, /// a no-op function can be passed to indicate ownership is maintained /// externally virtual Result> WrapStream( - [[maybe_unused]] void* device_stream, - [[maybe_unused]] Stream::release_fn_t release_fn) { + void* ARROW_ARG_UNUSED(device_stream), + Stream::release_fn_t ARROW_ARG_UNUSED(release_fn)) { return NULLPTR; } diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index 58c9df04ec5c3..bb05e6efdb987 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -1723,7 +1723,7 @@ class ARROW_EXPORT MonthIntervalType : public IntervalType { MonthIntervalType() : IntervalType(type_id) {} - std::string ToString([[maybe_unused]] bool show_metadata = false) const override { + std::string ToString(bool ARROW_ARG_UNUSED(show_metadata) = false) const override { return name(); } std::string name() const override { return "month_interval"; } @@ -1761,7 +1761,7 @@ class ARROW_EXPORT DayTimeIntervalType : public IntervalType { int bit_width() const override { return static_cast(sizeof(c_type) * CHAR_BIT); } - std::string ToString([[maybe_unused]] bool show_metadata = false) const override { + std::string ToString(bool ARROW_ARG_UNUSED(show_metadata) = false) const override { return name(); } std::string name() const override { return "day_time_interval"; } @@ -1803,7 +1803,7 @@ class ARROW_EXPORT MonthDayNanoIntervalType : public IntervalType { int bit_width() const override { return static_cast(sizeof(c_type) * CHAR_BIT); } - std::string ToString([[maybe_unused]] bool show_metadata = false) const override { + std::string ToString(bool ARROW_ARG_UNUSED(show_metadata) = false) const override { return name(); } std::string name() const override { return "month_day_nano_interval"; } diff --git a/cpp/src/arrow/util/macros.h b/cpp/src/arrow/util/macros.h index d80828869b33c..d0c05a7908256 100644 --- a/cpp/src/arrow/util/macros.h +++ b/cpp/src/arrow/util/macros.h @@ -67,7 +67,11 @@ // [5] J. Doerfert et al. 2019. "Performance Exploration Through Optimistic Static // Program Annotations". https://github.com/jdoerfert/PETOSPA/blob/master/ISC19.pdf #define ARROW_UNUSED(x) (void)(x) +#ifdef ARROW_WARN_DOCUMENTATION +#define ARROW_ARG_UNUSED(x) x +#else #define ARROW_ARG_UNUSED(x) +#endif #if defined(__GNUC__) // GCC and compatible compilers (clang, Intel ICC) #define ARROW_NORETURN __attribute__((noreturn)) #define ARROW_NOINLINE __attribute__((noinline)) From 4b2cf22f5aea16f4ad0db447624b6e713b43d62c Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sun, 28 Apr 2024 05:31:55 +0900 Subject: [PATCH 010/261] GH-41405: [Release][Docs][GLib] Use Sphinx based GLib front page (#41406) ### Rationale for this change We should use the GLib front page generated by Sphinx. ### What changes are included in this PR? Stop reverting the GLib front page change in release script. ### Are these changes tested? No. ### Are there any user-facing changes? Yes. * GitHub Issue: #41405 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- dev/release/post-08-docs.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/dev/release/post-08-docs.sh b/dev/release/post-08-docs.sh index 1e457c95c033c..c59f9b96857a6 100755 --- a/dev/release/post-08-docs.sh +++ b/dev/release/post-08-docs.sh @@ -79,7 +79,6 @@ curl \ https://apache.jfrog.io/artifactory/arrow/docs/${version}/docs.tar.gz tar xvf docs.tar.gz rm -f docs.tar.gz -git checkout docs/c_glib/index.html if [ "$is_major_release" = "yes" ] ; then previous_series=${previous_version%.*} mv docs_temp docs/${previous_series} From 9090e679da91e0544171c2da9f2b9ce8ba23d389 Mon Sep 17 00:00:00 2001 From: Laurent Goujon Date: Sun, 28 Apr 2024 17:00:52 -0700 Subject: [PATCH 011/261] GH-41307: [Java] Use org.apache:apache parent pom version 31 (#41309) ### Rationale for this change Use/update Maven modules to `org.apache:parent:31` and clean up Maven modules to remove unnecessary configuration or outdated workarounds ### What changes are included in this PR? * Add `org.apache:parent:31` to `org.apache.arrow:arrow-bom` and `org.apache.arrow.maven.plugins:arrow-maven-plugins` to make them conformant with ASF standards * Update `org.apache.arrow:arrow-java-root` parent to `org.apache:parent:31` * Use `version.*` and other properties to override plugin versions defined by `org.apache:parent` * Move standalone plugin versions under pluginManagement at the top level * Cleanup redundant plugin version or configuration declaration * Update `maven-dependency-plugin` to 3.6.1 and add the required overrides when necessary * Update `maven-shade-plugin` to 3.5.1 (via `org.apache:parent`) * Remove enforcer check for java and maven version (handled by `org.apache:parent`) * Remove unnecessary `mvnrepository` link comments * Remove `m2e.version` property check in profiles (only needed for errorprone plugin configuration which is incompatible with M2E) * Cleanup `argLine` overrides for surefire/failsafe plugins * Remove unnecessary `../pom.xml` `` directives * Remove source/target/encoding configuration properties for `maven-compiler-plugin`, `maven-javadoc-plugin` and `maven-resources-plugin` as it is handled by `org.apache:parent` and plugin themselves * Remove unnecessary copy of codegen templates in `arrow-vector` module * Remove unnecessary junit jupiter engine dependencies for surefire/failsafe plugins. ### Are these changes tested? No net new code. tested via CI jobs ### Are there any user-facing changes? None * GitHub Issue: #41307 Authored-by: Laurent Goujon Signed-off-by: David Li --- java/adapter/avro/pom.xml | 9 - java/adapter/jdbc/pom.xml | 7 - java/adapter/orc/pom.xml | 17 ++ java/bom/pom.xml | 21 +- java/c/pom.xml | 1 - java/dataset/pom.xml | 12 +- java/flight/flight-core/pom.xml | 27 +-- java/flight/flight-integration-tests/pom.xml | 2 - java/flight/flight-sql-jdbc-core/pom.xml | 10 - java/flight/flight-sql-jdbc-driver/pom.xml | 1 - java/flight/flight-sql/pom.xml | 5 - java/format/pom.xml | 2 - java/gandiva/pom.xml | 19 +- .../module-info-compiler-maven-plugin/pom.xml | 28 +-- java/maven/pom.xml | 75 +++---- java/memory/memory-core/pom.xml | 22 +- java/performance/pom.xml | 49 ----- java/pom.xml | 207 +++++++----------- java/tools/pom.xml | 22 +- java/vector/pom.xml | 91 +------- 20 files changed, 177 insertions(+), 450 deletions(-) diff --git a/java/adapter/avro/pom.xml b/java/adapter/avro/pom.xml index 9ddc150253874..645e8c4ff2e60 100644 --- a/java/adapter/avro/pom.xml +++ b/java/adapter/avro/pom.xml @@ -25,36 +25,27 @@ http://maven.apache.org - - org.apache.arrow arrow-memory-core - - org.apache.arrow arrow-memory-netty runtime - - org.apache.arrow arrow-vector - org.immutables value - org.apache.avro avro ${dep.avro.version} - diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml index 5f72729bb76e7..33360c64b13b6 100644 --- a/java/adapter/jdbc/pom.xml +++ b/java/adapter/jdbc/pom.xml @@ -26,20 +26,17 @@ - org.apache.arrow arrow-memory-core - org.apache.arrow arrow-memory-netty runtime - org.apache.arrow arrow-vector @@ -51,7 +48,6 @@ value - com.h2database h2 @@ -94,9 +90,6 @@ jdk11+ [11,] - - !m2e.version - diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml index f6aadca6de4d3..ec6f73a3e9e40 100644 --- a/java/adapter/orc/pom.xml +++ b/java/adapter/orc/pom.xml @@ -134,5 +134,22 @@ + + + org.apache.maven.plugins + maven-dependency-plugin + + + analyze + + + + org.apache.arrow:arrow-format + + + + + + diff --git a/java/bom/pom.xml b/java/bom/pom.xml index 12b9950ad80fc..0af50c638055e 100644 --- a/java/bom/pom.xml +++ b/java/bom/pom.xml @@ -15,7 +15,7 @@ org.apache apache - 18 + 31 org.apache.arrow @@ -27,6 +27,19 @@ + + 1.8 + 1.8 + 3.11.0 + 3.2.5 + 0.16.1 + 3.7.1 + 3.12.1 + 3.6.1 + 3.2.0 + 3.2.2 + 3.6.3 + 3.5.0 @@ -138,11 +151,9 @@ ${project.version} - - @@ -156,12 +167,10 @@ org.apache.maven.plugins maven-project-info-reports-plugin - 3.5.0 org.apache.maven.plugins maven-site-plugin - 3.12.1 com.diffplug.spotless @@ -188,12 +197,10 @@ org.apache.maven.plugins maven-project-info-reports-plugin - 3.5.0 org.apache.maven.plugins maven-site-plugin - 3.12.1 diff --git a/java/c/pom.xml b/java/c/pom.xml index 1095e99bbdd3f..43a62a8303bfe 100644 --- a/java/c/pom.xml +++ b/java/c/pom.xml @@ -83,5 +83,4 @@ - diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml index 62ea79f55ccd4..2121119af398e 100644 --- a/java/dataset/pom.xml +++ b/java/dataset/pom.xml @@ -23,7 +23,6 @@ Java implementation of Arrow Dataset API/Framework ../../../cpp/release-build/ - 2.5.0 1.13.1 1.11.3 @@ -195,21 +194,14 @@ jdk11+ [11,] - - !m2e.version - org.apache.maven.plugins maven-surefire-plugin - - false - - ${project.basedir}/../../testing/data - - --add-reads=org.apache.arrow.dataset=com.fasterxml.jackson.databind --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED + + --add-reads=org.apache.arrow.dataset=com.fasterxml.jackson.databind --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml index 897af0b9e1129..163b4c24031b1 100644 --- a/java/flight/flight-core/pom.xml +++ b/java/flight/flight-core/pom.xml @@ -15,7 +15,6 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT - ../pom.xml flight-core @@ -151,13 +150,6 @@ org.apache.maven.plugins maven-shade-plugin - - 3.2.4 shade-main @@ -244,7 +236,6 @@ org.apache.maven.plugins maven-dependency-plugin - 3.3.0 analyze @@ -264,7 +255,6 @@ org.codehaus.mojo build-helper-maven-plugin - 1.9.1 add-generated-sources-to-classpath @@ -282,7 +272,6 @@ maven-assembly-plugin - 3.7.1 jar-with-dependencies @@ -299,13 +288,6 @@ - - - kr.motd.maven - os-maven-plugin - 1.7.1 - - @@ -313,18 +295,14 @@ jdk11+ [11,] - - !m2e.version - org.apache.maven.plugins maven-surefire-plugin - - --add-opens=org.apache.arrow.flight.core/org.apache.arrow.flight.perf.impl=protobuf.java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED - false + + --add-opens=org.apache.arrow.flight.core/org.apache.arrow.flight.perf.impl=protobuf.java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED ${project.basedir}/../../../testing/data @@ -334,5 +312,4 @@ - diff --git a/java/flight/flight-integration-tests/pom.xml b/java/flight/flight-integration-tests/pom.xml index 74016d81e91e5..cd2c28ba8959f 100644 --- a/java/flight/flight-integration-tests/pom.xml +++ b/java/flight/flight-integration-tests/pom.xml @@ -15,7 +15,6 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT - ../pom.xml flight-integration-tests @@ -63,7 +62,6 @@ maven-assembly-plugin - 3.7.1 jar-with-dependencies diff --git a/java/flight/flight-sql-jdbc-core/pom.xml b/java/flight/flight-sql-jdbc-core/pom.xml index 36da335b37b9a..2e0de90fcf8bc 100644 --- a/java/flight/flight-sql-jdbc-core/pom.xml +++ b/java/flight/flight-sql-jdbc-core/pom.xml @@ -15,7 +15,6 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT - ../pom.xml flight-sql-jdbc-core @@ -47,20 +46,17 @@ - org.apache.arrow arrow-memory-core - org.apache.arrow arrow-memory-netty runtime - org.apache.arrow arrow-vector @@ -136,11 +132,6 @@ - - - src/main/resources - - maven-surefire-plugin @@ -154,7 +145,6 @@ org.codehaus.mojo properties-maven-plugin - 1.2.1 write-project-properties-to-file diff --git a/java/flight/flight-sql-jdbc-driver/pom.xml b/java/flight/flight-sql-jdbc-driver/pom.xml index b3afbe1defdba..4456270e7b347 100644 --- a/java/flight/flight-sql-jdbc-driver/pom.xml +++ b/java/flight/flight-sql-jdbc-driver/pom.xml @@ -15,7 +15,6 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT - ../pom.xml flight-sql-jdbc-driver diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml index 7ed217db68b07..cf466ab1720cf 100644 --- a/java/flight/flight-sql/pom.xml +++ b/java/flight/flight-sql/pom.xml @@ -15,7 +15,6 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT - ../pom.xml flight-sql @@ -119,9 +118,6 @@ jdk11+ [11,] - - !m2e.version - @@ -136,5 +132,4 @@ - diff --git a/java/format/pom.xml b/java/format/pom.xml index e9eded79de660..4483047e20960 100644 --- a/java/format/pom.xml +++ b/java/format/pom.xml @@ -31,7 +31,6 @@ - @@ -42,6 +41,5 @@ - diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml index 00acb89f1d7cf..c5703c62dfe23 100644 --- a/java/gandiva/pom.xml +++ b/java/gandiva/pom.xml @@ -22,13 +22,12 @@ jar Arrow Gandiva Java wrappers around the native Gandiva SQL expression compiler. + - 1.8 - 1.8 - 3.25.1 true ../../../cpp/release-build + org.apache.arrow @@ -51,7 +50,6 @@ com.google.protobuf protobuf-java - ${protobuf.version} com.google.guava @@ -62,6 +60,7 @@ slf4j-api + @@ -88,14 +87,6 @@ - - - - kr.motd.maven - os-maven-plugin - 1.7.1 - - @@ -105,7 +96,6 @@ org.apache.maven.plugins maven-source-plugin - 2.2.1 attach-sources @@ -118,7 +108,6 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.6.3 attach-javadocs @@ -131,7 +120,6 @@ org.apache.maven.plugins maven-gpg-plugin - 3.2.2 sign-artifacts @@ -146,5 +134,4 @@ - diff --git a/java/maven/module-info-compiler-maven-plugin/pom.xml b/java/maven/module-info-compiler-maven-plugin/pom.xml index 6589020d6ecb5..5909b6b3484fc 100644 --- a/java/maven/module-info-compiler-maven-plugin/pom.xml +++ b/java/maven/module-info-compiler-maven-plugin/pom.xml @@ -64,39 +64,14 @@ org.apache.maven.plugin-tools maven-plugin-annotations - 3.11.0 + ${maven.plugin.tools.version} provided - - - maven-clean-plugin - 3.3.2 - - - maven-plugin-plugin - 3.12.0 - - - maven-jar-plugin - 3.3.0 - - - maven-install-plugin - 3.1.1 - - - maven-deploy-plugin - 3.1.1 - - - maven-invoker-plugin - 3.1.0 - com.gradle gradle-enterprise-maven-extension @@ -118,7 +93,6 @@ org.apache.maven.plugins maven-plugin-plugin - 3.12.0 true diff --git a/java/maven/pom.xml b/java/maven/pom.xml index f290ded2e2913..4314192eda73b 100644 --- a/java/maven/pom.xml +++ b/java/maven/pom.xml @@ -15,6 +15,13 @@ Note: Do not inherit from the Arrow parent POM as plugins can be referenced during the parent POM, introducing circular dependencies. --> + + org.apache + apache + 31 + + + org.apache.arrow.maven.plugins arrow-maven-plugins 17.0.0-SNAPSHOT @@ -27,25 +34,38 @@ true + + 1.8 + 1.8 + 3.12.0 + 3.2.5 + 0.16.1 + 3.7.1 + 3.12.1 + 3.6.1 + 3.2.0 + 3.2.2 + 3.6.3 + 3.5.0 - org.apache.maven.plugins - maven-project-info-reports-plugin - 3.5.0 + com.diffplug.spotless + spotless-maven-plugin + 2.30.0 - org.apache.maven.plugins - maven-site-plugin - 3.12.1 + pl.project13.maven + git-commit-id-plugin + 4.0.5 - com.diffplug.spotless - spotless-maven-plugin - 2.30.0 + org.cyclonedx + cyclonedx-maven-plugin + 2.7.11 @@ -119,11 +139,6 @@ **/logback.xml - true - - true - true - org.apache.arrow ${username} @@ -143,43 +158,17 @@ - - org.apache.maven.plugins - maven-resources-plugin - - UTF-8 - - org.apache.maven.plugins maven-compiler-plugin - UTF-8 - 1.8 - 1.8 2048m - false true maven-enforcer-plugin - - validate_java_and_maven_version - - enforce - - verify - false - - - - [3.3.0,4) - - - - avoid_bad_dependencies @@ -205,8 +194,6 @@ pl.project13.maven git-commit-id-plugin - 4.0.5 - dd.MM.yyyy '@' HH:mm:ss z false @@ -248,7 +235,6 @@ org.apache.maven.plugins maven-checkstyle-plugin - 3.1.0 ../dev/checkstyle/checkstyle.xml ../dev/checkstyle/checkstyle.license @@ -288,7 +274,6 @@ org.cyclonedx cyclonedx-maven-plugin - 2.7.11 @@ -353,12 +338,10 @@ org.apache.maven.plugins maven-project-info-reports-plugin - 3.5.0 org.apache.maven.plugins maven-site-plugin - 3.12.1 diff --git a/java/memory/memory-core/pom.xml b/java/memory/memory-core/pom.xml index ca5bc603bd4dc..8e39ae43d116f 100644 --- a/java/memory/memory-core/pom.xml +++ b/java/memory/memory-core/pom.xml @@ -61,9 +61,6 @@ jdk11+ [11,] - - !m2e.version - @@ -92,7 +89,6 @@ org.apache.maven.plugins maven-surefire-plugin - opens-tests @@ -101,12 +97,9 @@ test - - -Dfoo=bar - - - **/TestArrowBuf.java - + + + **/TestOpens.java @@ -129,9 +122,6 @@ org.apache.maven.plugins maven-compiler-plugin - 8 - 8 - UTF-8 -Xmaxerrs @@ -150,12 +140,6 @@ ${checker.framework.version} - - - org.immutables.value.internal.$processor$.$Processor - - org.checkerframework.checker.nullness.NullnessChecker - diff --git a/java/performance/pom.xml b/java/performance/pom.xml index 1e99a29265724..c819e6393d78f 100644 --- a/java/performance/pom.xml +++ b/java/performance/pom.xml @@ -22,9 +22,7 @@ JMH Performance benchmarks for other Arrow libraries. - UTF-8 1.37 - 1.8 benchmarks true .* @@ -96,52 +94,7 @@ - - - - maven-clean-plugin - 3.3.2 - - - maven-deploy-plugin - 3.1.1 - - - maven-install-plugin - 3.1.1 - - - maven-jar-plugin - 3.3.0 - - - maven-javadoc-plugin - 3.6.3 - - - maven-resources-plugin - 3.3.1 - - - maven-source-plugin - 2.2.1 - - - maven-surefire-plugin - 3.2.5 - - - - - org.apache.maven.plugins - maven-compiler-plugin - - ${javac.target} - ${javac.target} - ${javac.target} - - org.apache.maven.plugins maven-shade-plugin @@ -175,7 +128,6 @@ org.codehaus.mojo exec-maven-plugin - 3.2.0 ${skip.perf.benchmarks} test @@ -212,5 +164,4 @@ - diff --git a/java/pom.xml b/java/pom.xml index 16564ae828b0f..39fd1e00b64e9 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -15,7 +15,7 @@ org.apache apache - 18 + 31 org.apache.arrow @@ -85,7 +85,7 @@ 33.0.0-jre 4.1.108.Final 1.63.0 - 3.23.1 + 3.25.1 2.17.0 3.4.0 23.5.26 @@ -95,10 +95,28 @@ true 9+181-r4173-1 2.24.0 - 3.12.1 5.11.0 5.2.0 3.42.0 + none + -Xdoclint:none + + 1.8 + 1.8 + 3.11.0 + 3.2.5 + 0.16.1 + 3.7.1 + 3.12.1 + 3.6.1 + 3.2.0 + + 3.2.2 + 3.6.3 + 3.5.0 @@ -268,40 +286,16 @@ 8.3.0 test - - - - - - org.apache.maven.plugins - maven-dependency-plugin - - 3.1.2 - - - org.apache.rat - apache-rat-plugin - 0.16.1 - - - org.apache.maven.plugins - maven-resources-plugin - 3.3.1 - org.apache.maven.plugins maven-compiler-plugin - ${maven-compiler-plugin.version} + true **/module-info.java **/module-info.java false @@ -314,18 +308,8 @@ - - maven-enforcer-plugin - 3.4.1 - - - org.apache.maven.plugins - maven-shade-plugin - 3.5.1 - maven-surefire-plugin - 3.2.5 true true @@ -340,22 +324,9 @@ 1048576 - - - org.junit.jupiter - junit-jupiter-engine - ${dep.junit.jupiter.version} - - - org.apache.maven.surefire - surefire-junit-platform - 3.2.5 - - maven-failsafe-plugin - 3.2.5 ${project.build.directory} @@ -444,6 +415,22 @@ + + + org.apache.drill.tools + drill-fmpp-maven-plugin + [1.0,) + + generate + + + + + false + true + + + @@ -451,9 +438,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.6.3 - 8 **/module-info.java @@ -464,16 +449,6 @@ module-info-compiler-maven-plugin ${project.version} - - org.apache.maven.plugins - maven-project-info-reports-plugin - 3.5.0 - - - org.apache.maven.plugins - maven-site-plugin - 3.12.1 - com.gradle gradle-enterprise-maven-extension @@ -521,6 +496,36 @@ spotless-maven-plugin 2.30.0 + + org.codehaus.mojo + build-helper-maven-plugin + 1.9.1 + + + org.codehaus.mojo + properties-maven-plugin + 1.2.1 + + + org.codehaus.mojo + exec-maven-plugin + 3.2.0 + + + pl.project13.maven + git-commit-id-plugin + 4.0.5 + + + org.cyclonedx + cyclonedx-maven-plugin + 2.7.11 + + + org.apache.drill.tools + drill-fmpp-maven-plugin + 1.21.1 + @@ -594,11 +599,6 @@ **/logback.xml - true - - true - true - org.apache.arrow ${username} @@ -618,42 +618,17 @@ - - org.apache.maven.plugins - maven-resources-plugin - - UTF-8 - - org.apache.maven.plugins maven-compiler-plugin - 1.8 - 1.8 2048m - false true maven-enforcer-plugin - - validate_java_and_maven_version - - enforce - - verify - false - - - - [3.3.0,4) - - - - avoid_bad_dependencies @@ -679,8 +654,6 @@ pl.project13.maven git-commit-id-plugin - 4.0.5 - dd.MM.yyyy '@' HH:mm:ss z false @@ -722,7 +695,6 @@ org.apache.maven.plugins maven-checkstyle-plugin - 3.1.0 **/module-info.java dev/checkstyle/checkstyle.xml @@ -786,7 +758,6 @@ org.cyclonedx cyclonedx-maven-plugin - 2.7.11 @@ -817,12 +788,10 @@ org.apache.maven.plugins maven-project-info-reports-plugin - 3.5.0 org.apache.maven.plugins maven-site-plugin - 3.12.1 com.diffplug.spotless @@ -857,7 +826,6 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.6.3 **/module-info.java @@ -885,28 +853,15 @@ org.apache.maven.plugins maven-project-info-reports-plugin - 3.5.0 org.apache.maven.plugins maven-site-plugin - 3.12.1 - - java-nodoclint - - [1.8,) - - - none - -Xdoclint:none - - - arrow-c-data @@ -954,7 +909,6 @@ org.apache.maven.plugins maven-compiler-plugin - true -XDcompilePolicy=simple -Xplugin:ErrorProne @@ -987,9 +941,6 @@ org.apache.maven.plugins maven-compiler-plugin - 8 - 8 - UTF-8 -XDcompilePolicy=simple -Xplugin:ErrorProne -XepExcludedPaths:.*/(target/generated-sources)/.* @@ -1013,6 +964,16 @@ + + + + + jdk11+ + + [11,] + + + org.apache.maven.plugins maven-surefire-plugin @@ -1020,6 +981,13 @@ --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED + + org.apache.maven.plugins + maven-failsafe-plugin + + --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED + + @@ -1060,7 +1028,6 @@ org.jacoco jacoco-maven-plugin - 0.8.11 @@ -1106,7 +1073,6 @@ org.codehaus.mojo exec-maven-plugin - 3.2.0 cdata-cmake @@ -1163,7 +1129,6 @@ org.codehaus.mojo exec-maven-plugin - 3.2.0 jni-cpp-cmake @@ -1270,7 +1235,6 @@ org.codehaus.mojo exec-maven-plugin - 3.2.0 jni-cpp-cmake @@ -1360,5 +1324,4 @@ - diff --git a/java/tools/pom.xml b/java/tools/pom.xml index b1507cd301f31..58b790c9f027f 100644 --- a/java/tools/pom.xml +++ b/java/tools/pom.xml @@ -54,6 +54,11 @@ 1.3.14 test + com.fasterxml.jackson.core jackson-core @@ -85,7 +90,6 @@ maven-assembly-plugin - 3.7.1 jar-with-dependencies @@ -101,7 +105,21 @@ + + org.apache.maven.plugins + maven-dependency-plugin + + + analyze + verify + + + com.fasterxml.jackson.core:* + + + + + - diff --git a/java/vector/pom.xml b/java/vector/pom.xml index 07af93a499907..ca932ae6f26f9 100644 --- a/java/vector/pom.xml +++ b/java/vector/pom.xml @@ -76,64 +76,7 @@ - - - - true - - - false - - apache - apache - https://repo.maven.apache.org/maven2/ - - - - - - - codegen - - ${basedir}/src/main/codegen - - - - - - - org.eclipse.m2e - lifecycle-mapping - 1.0.0 - - - - - - org.apache.drill.tools - drill-fmpp-maven-plugin - [1.0,) - - generate - - - - - false - true - - - - - - - - - - org.apache.maven.plugins @@ -163,33 +106,10 @@ - - maven-resources-plugin - - - - copy-fmpp-resources - - copy-resources - - initialize - - ${project.build.directory}/codegen - - - src/main/codegen - false - - - - - - org.apache.drill.tools drill-fmpp-maven-plugin - 1.21.1 generate-fmpp @@ -200,7 +120,7 @@ src/main/codegen/config.fmpp ${project.build.directory}/generated-sources - ${project.build.directory}/codegen/templates + src/main/codegen/templates @@ -208,13 +128,6 @@ org.apache.maven.plugins maven-shade-plugin - - 3.2.4 @@ -243,7 +156,6 @@ - @@ -276,5 +188,4 @@ - From a8c4f8620117f31425de45a67a275fbf8044ba1f Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Mon, 29 Apr 2024 05:55:17 +0530 Subject: [PATCH 012/261] GH-40339: [Java] StringView Initial Implementation (#40340) ### Rationale for this change StringView implementation in Java. This PR only includes the core implementation of StringView ### What changes are included in this PR? - [X] Adding ViewVarBinaryVector - [X] Adding ViewVarCharVector - [X] Adding corresponding test cases in the given scope - [X] Including required implementation extensions with not supported warnings - [X] Interface for Holders ### Non Goals of this PR - [ ] https://github.com/apache/arrow/issues/40937 - [ ] https://github.com/apache/arrow/issues/40936 - [ ] https://github.com/apache/arrow/issues/40932 - [ ] https://github.com/apache/arrow/issues/40943 - [ ] https://github.com/apache/arrow/issues/40944 - [ ] https://github.com/apache/arrow/issues/40942 - [ ] https://github.com/apache/arrow/issues/40945 - [ ] https://github.com/apache/arrow/issues/40941 - [ ] https://github.com/apache/arrow/issues/40946 ### Are these changes tested? Yes. Existing test cases on `VarCharVector` and `VarBinaryVector` are verified with view implementations and additional test cases have also been added to check view functionality. And explitly tests have been added to evaluate the view functionality with `ViewVarCharVector` ### Are there any user-facing changes? Yes, this introduces a new API and some public methods have been included in an interface so that it can be extended to write custom functionality like done for views. * GitHub Issue: #40339 Lead-authored-by: Vibhatha Abeykoon Co-authored-by: vibhatha Co-authored-by: Vibhatha Lakmal Abeykoon Signed-off-by: David Li --- .../binder/ColumnBinderArrowTypeVisitor.java | 10 + .../arrow/c/BufferImportTypeVisitor.java | 10 + .../BinaryViewAvaticaParameterConverter.java | 42 + .../Utf8ViewAvaticaParameterConverter.java | 43 + .../jdbc/utils/AvaticaParameterBinder.java | 10 + .../arrow/driver/jdbc/utils/ConvertUtils.java | 12 + .../src/main/java/module-info.java | 1 + .../apache/arrow/memory/ReusableBuffer.java | 2 + .../src/main/codegen/data/ArrowTypes.tdd | 10 + .../main/codegen/data/ValueVectorTypes.tdd | 4 +- .../codegen/templates/HolderReaderImpl.java | 4 +- .../main/codegen/templates/UnionReader.java | 2 +- .../main/codegen/templates/ValueHolders.java | 10 +- .../apache/arrow/vector/AddOrGetResult.java | 2 +- .../vector/BaseLargeVariableWidthVector.java | 11 +- .../arrow/vector/BaseVariableWidthVector.java | 11 +- .../vector/BaseVariableWidthViewVector.java | 1451 ++++++++++++++++ .../arrow/vector/LargeVarBinaryVector.java | 1 + .../arrow/vector/LargeVarCharVector.java | 4 +- .../org/apache/arrow/vector/TypeLayout.java | 25 + .../apache/arrow/vector/VarBinaryVector.java | 1 + .../apache/arrow/vector/VarCharVector.java | 1 + .../vector/VariableWidthFieldVector.java | 143 ++ .../arrow/vector/ViewVarBinaryVector.java | 229 +++ .../arrow/vector/ViewVarCharVector.java | 291 ++++ .../vector/compare/RangeEqualsVisitor.java | 6 + .../vector/compare/TypeEqualsVisitor.java | 6 + .../arrow/vector/compare/VectorVisitor.java | 3 + .../org/apache/arrow/vector/types/Types.java | 44 + .../vector/util/DataSizeRoundingUtil.java | 4 + .../arrow/vector/util/ReusableByteArray.java | 7 + .../org/apache/arrow/vector/util/Text.java | 4 +- .../arrow/vector/util/VectorAppender.java | 6 + .../validate/ValidateVectorBufferVisitor.java | 6 + .../validate/ValidateVectorDataVisitor.java | 6 + .../validate/ValidateVectorTypeVisitor.java | 6 + .../validate/ValidateVectorVisitor.java | 6 + .../org/apache/arrow/vector/TestUtils.java | 10 + .../arrow/vector/TestVarCharViewVector.java | 1462 +++++++++++++++++ .../testing/ValueVectorDataPopulator.java | 12 + 40 files changed, 3898 insertions(+), 20 deletions(-) create mode 100644 java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/BinaryViewAvaticaParameterConverter.java create mode 100644 java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/Utf8ViewAvaticaParameterConverter.java create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/VariableWidthFieldVector.java create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java create mode 100644 java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java index dc708724043d0..7d50676688e0f 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java @@ -148,6 +148,11 @@ public ColumnBinder visit(ArrowType.Utf8 type) { new VarCharBinder<>(varChar, jdbcType); } + @Override + public ColumnBinder visit(ArrowType.Utf8View type) { + throw new UnsupportedOperationException("Column binder implemented for type " + type + " is not supported"); + } + @Override public ColumnBinder visit(ArrowType.LargeUtf8 type) { LargeVarCharVector varChar = (LargeVarCharVector) vector; @@ -162,6 +167,11 @@ public ColumnBinder visit(ArrowType.Binary type) { new VarBinaryBinder<>(varBinary, jdbcType); } + @Override + public ColumnBinder visit(ArrowType.BinaryView type) { + throw new UnsupportedOperationException("Column binder implemented for type " + type + " is not supported"); + } + @Override public ColumnBinder visit(ArrowType.LargeBinary type) { LargeVarBinaryVector varBinary = (LargeVarBinaryVector) vector; diff --git a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java index cd2a464f4fa17..bc6139cc84c54 100644 --- a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java +++ b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java @@ -209,6 +209,11 @@ public List visit(ArrowType.Utf8 type) { } } + @Override + public List visit(ArrowType.Utf8View type) { + throw new UnsupportedOperationException("Importing buffers for view type: " + type + " not supported"); + } + @Override public List visit(ArrowType.LargeUtf8 type) { try (ArrowBuf offsets = importOffsets(type, LargeVarCharVector.OFFSET_WIDTH)) { @@ -237,6 +242,11 @@ public List visit(ArrowType.Binary type) { } } + @Override + public List visit(ArrowType.BinaryView type) { + throw new UnsupportedOperationException("Importing buffers for view type: " + type + " not supported"); + } + @Override public List visit(ArrowType.LargeBinary type) { try (ArrowBuf offsets = importOffsets(type, LargeVarBinaryVector.OFFSET_WIDTH)) { diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/BinaryViewAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/BinaryViewAvaticaParameterConverter.java new file mode 100644 index 0000000000000..dfd4727014292 --- /dev/null +++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/BinaryViewAvaticaParameterConverter.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.driver.jdbc.converter.impl; + +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.calcite.avatica.AvaticaParameter; +import org.apache.calcite.avatica.remote.TypedValue; + +/** AvaticaParameterConverter for BinaryView Arrow types. */ +public class BinaryViewAvaticaParameterConverter extends BaseAvaticaParameterConverter { + + public BinaryViewAvaticaParameterConverter(ArrowType.BinaryView type) { + + } + + @Override + public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { + throw new UnsupportedOperationException("Not implemented"); + } + + @Override + public AvaticaParameter createParameter(Field field) { + return createParameter(field, false); + } +} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/Utf8ViewAvaticaParameterConverter.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/Utf8ViewAvaticaParameterConverter.java new file mode 100644 index 0000000000000..2c826aefb9c1c --- /dev/null +++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/converter/impl/Utf8ViewAvaticaParameterConverter.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.driver.jdbc.converter.impl; + +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.calcite.avatica.AvaticaParameter; +import org.apache.calcite.avatica.remote.TypedValue; + +/** + * AvaticaParameterConverter for Utf8View Arrow types. + */ +public class Utf8ViewAvaticaParameterConverter extends BaseAvaticaParameterConverter { + + public Utf8ViewAvaticaParameterConverter(ArrowType.Utf8View type) { + } + + @Override + public boolean bindParameter(FieldVector vector, TypedValue typedValue, int index) { + throw new UnsupportedOperationException("Utf8View not supported"); + } + + @Override + public AvaticaParameter createParameter(Field field) { + return createParameter(field, false); + } +} diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java index b2bd8e745ecca..fd9127c226910 100644 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java +++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java @@ -190,6 +190,11 @@ public Boolean visit(ArrowType.Utf8 type) { return new Utf8AvaticaParameterConverter(type).bindParameter(vector, typedValue, index); } + @Override + public Boolean visit(ArrowType.Utf8View type) { + throw new UnsupportedOperationException("Utf8View is unsupported"); + } + @Override public Boolean visit(ArrowType.LargeUtf8 type) { return new LargeUtf8AvaticaParameterConverter(type).bindParameter(vector, typedValue, index); @@ -200,6 +205,11 @@ public Boolean visit(ArrowType.Binary type) { return new BinaryAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); } + @Override + public Boolean visit(ArrowType.BinaryView type) { + throw new UnsupportedOperationException("BinaryView is unsupported"); + } + @Override public Boolean visit(ArrowType.LargeBinary type) { return new LargeBinaryAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java index 843fe0cb89d9f..93b5faaef32c7 100644 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java +++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java @@ -23,6 +23,7 @@ import java.util.stream.Stream; import org.apache.arrow.driver.jdbc.converter.impl.BinaryAvaticaParameterConverter; +import org.apache.arrow.driver.jdbc.converter.impl.BinaryViewAvaticaParameterConverter; import org.apache.arrow.driver.jdbc.converter.impl.BoolAvaticaParameterConverter; import org.apache.arrow.driver.jdbc.converter.impl.DateAvaticaParameterConverter; import org.apache.arrow.driver.jdbc.converter.impl.DecimalAvaticaParameterConverter; @@ -43,6 +44,7 @@ import org.apache.arrow.driver.jdbc.converter.impl.TimestampAvaticaParameterConverter; import org.apache.arrow.driver.jdbc.converter.impl.UnionAvaticaParameterConverter; import org.apache.arrow.driver.jdbc.converter.impl.Utf8AvaticaParameterConverter; +import org.apache.arrow.driver.jdbc.converter.impl.Utf8ViewAvaticaParameterConverter; import org.apache.arrow.flight.sql.FlightSqlColumnMetadata; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; @@ -208,6 +210,11 @@ public AvaticaParameter visit(ArrowType.Utf8 type) { return new Utf8AvaticaParameterConverter(type).createParameter(field); } + @Override + public AvaticaParameter visit(ArrowType.Utf8View type) { + return new Utf8ViewAvaticaParameterConverter(type).createParameter(field); + } + @Override public AvaticaParameter visit(ArrowType.LargeUtf8 type) { return new LargeUtf8AvaticaParameterConverter(type).createParameter(field); @@ -218,6 +225,11 @@ public AvaticaParameter visit(ArrowType.Binary type) { return new BinaryAvaticaParameterConverter(type).createParameter(field); } + @Override + public AvaticaParameter visit(ArrowType.BinaryView type) { + return new BinaryViewAvaticaParameterConverter(type).createParameter(field); + } + @Override public AvaticaParameter visit(ArrowType.LargeBinary type) { return new LargeBinaryAvaticaParameterConverter(type).createParameter(field); diff --git a/java/memory/memory-core/src/main/java/module-info.java b/java/memory/memory-core/src/main/java/module-info.java index 34ba34e80bc69..5024b7f45769c 100644 --- a/java/memory/memory-core/src/main/java/module-info.java +++ b/java/memory/memory-core/src/main/java/module-info.java @@ -25,4 +25,5 @@ requires jsr305; requires org.immutables.value; requires org.slf4j; + requires org.checkerframework.checker.qual; } diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReusableBuffer.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReusableBuffer.java index 3530b819aadff..9e37c286ad831 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReusableBuffer.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ReusableBuffer.java @@ -44,4 +44,6 @@ public interface ReusableBuffer { * @param len the number of bytes of the new data */ void set(ArrowBuf srcBytes, long start, long len); + + void set(byte[] srcBytes, long start, long len); } diff --git a/java/vector/src/main/codegen/data/ArrowTypes.tdd b/java/vector/src/main/codegen/data/ArrowTypes.tdd index 3cf9a968791a4..9fe40f2319bfd 100644 --- a/java/vector/src/main/codegen/data/ArrowTypes.tdd +++ b/java/vector/src/main/codegen/data/ArrowTypes.tdd @@ -65,6 +65,11 @@ fields: [], complex: false }, + { + name: "Utf8View", + fields: [], + complex: false + }, { name: "LargeUtf8", fields: [], @@ -75,6 +80,11 @@ fields: [], complex: false }, + { + name: "BinaryView", + fields: [], + complex: false + }, { name: "LargeBinary", fields: [], diff --git a/java/vector/src/main/codegen/data/ValueVectorTypes.tdd b/java/vector/src/main/codegen/data/ValueVectorTypes.tdd index 6c2a967712454..ad1f1b93bb3aa 100644 --- a/java/vector/src/main/codegen/data/ValueVectorTypes.tdd +++ b/java/vector/src/main/codegen/data/ValueVectorTypes.tdd @@ -189,7 +189,9 @@ fields: [{name: "start", type: "int"}, {name: "end", type: "int"}, {name: "buffer", type: "ArrowBuf"}], minor: [ { class: "VarBinary" , friendlyType: "byte[]" }, - { class: "VarChar" , friendlyType: "Text" } + { class: "VarChar" , friendlyType: "Text" }, + { class: "ViewVarBinary" , friendlyType: "byte[]" }, + { class: "ViewVarChar" , friendlyType: "Text" } ] }, { diff --git a/java/vector/src/main/codegen/templates/HolderReaderImpl.java b/java/vector/src/main/codegen/templates/HolderReaderImpl.java index 8394aaad41756..1151ea5d39dda 100644 --- a/java/vector/src/main/codegen/templates/HolderReaderImpl.java +++ b/java/vector/src/main/codegen/templates/HolderReaderImpl.java @@ -109,9 +109,9 @@ public void read(Nullable${name}Holder h) { byte[] value = new byte [length]; holder.buffer.getBytes(holder.start, value, 0, length); - <#if minor.class == "VarBinary" || minor.class == "LargeVarBinary"> + <#if minor.class == "VarBinary" || minor.class == "LargeVarBinary" || minor.class == "ViewVarBinary"> return value; - <#elseif minor.class == "VarChar" || minor.class == "LargeVarChar"> + <#elseif minor.class == "VarChar" || minor.class == "LargeVarChar" || minor.class == "ViewVarChar"> Text text = new Text(); text.set(value); return text; diff --git a/java/vector/src/main/codegen/templates/UnionReader.java b/java/vector/src/main/codegen/templates/UnionReader.java index 822d4822987fb..956bc91e9185c 100644 --- a/java/vector/src/main/codegen/templates/UnionReader.java +++ b/java/vector/src/main/codegen/templates/UnionReader.java @@ -39,7 +39,7 @@ @SuppressWarnings("unused") public class UnionReader extends AbstractFieldReader { - private static final int NUM_SUPPORTED_TYPES = 46; + private static final int NUM_SUPPORTED_TYPES = 48; private BaseReader[] readers = new BaseReader[NUM_SUPPORTED_TYPES]; public UnionVector data; diff --git a/java/vector/src/main/codegen/templates/ValueHolders.java b/java/vector/src/main/codegen/templates/ValueHolders.java index 973efd870a662..2a2bbe81b2e74 100644 --- a/java/vector/src/main/codegen/templates/ValueHolders.java +++ b/java/vector/src/main/codegen/templates/ValueHolders.java @@ -27,7 +27,6 @@ package org.apache.arrow.vector.holders; <#include "/@includes/vv_imports.ftl" /> - /** * Source code generated using FreeMarker template ${.template_name} */ @@ -40,11 +39,12 @@ public final class ${className} implements ValueHolder{ /** The last index (exclusive) into the Vector. **/ public int end; - + /** The Vector holding the actual values. **/ public ${minor.class}Vector vector; - + <#else> + public static final int WIDTH = ${type.width}; <#if mode.name == "Optional">public int isSet; @@ -70,10 +70,6 @@ public String toString(){ throw new UnsupportedOperationException(); } - - - - } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/AddOrGetResult.java b/java/vector/src/main/java/org/apache/arrow/vector/AddOrGetResult.java index b41dbb245e8a2..5f9decbae4eac 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/AddOrGetResult.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/AddOrGetResult.java @@ -20,7 +20,7 @@ import org.apache.arrow.util.Preconditions; /** - * Tuple class containing a vector and whether is was created. + * Tuple class containing a vector and whether it was created. * * @param The type of vector the result is for. */ diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java index 2ef6e4bd8b374..6365493051b92 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseLargeVariableWidthVector.java @@ -42,8 +42,7 @@ /** * BaseLargeVariableWidthVector is a base class providing functionality for large strings/large bytes types. */ -public abstract class BaseLargeVariableWidthVector extends BaseValueVector - implements VariableWidthVector, FieldVector, VectorDefinitionSetter { +public abstract class BaseLargeVariableWidthVector extends BaseValueVector implements VariableWidthFieldVector { private static final int DEFAULT_RECORD_BYTE_COUNT = 12; private static final int INITIAL_BYTE_COUNT = INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT; private int lastValueCapacity; @@ -942,6 +941,7 @@ public void setValueCount(int valueCount) { * * @param index target index */ + @Override public void fillEmpties(int index) { handleSafe(index, emptyByteArray.length); fillHoles(index); @@ -955,6 +955,7 @@ public void fillEmpties(int index) { * * @param value desired index of last non-null element. */ + @Override public void setLastSet(int value) { lastSet = value; } @@ -964,6 +965,7 @@ public void setLastSet(int value) { * * @return index of the last non-null element */ + @Override public int getLastSet() { return lastSet; } @@ -1003,6 +1005,7 @@ public void setValueLengthSafe(int index, int length) { * @param index position of element to get * @return greater than 0 length for non-null element, 0 otherwise */ + @Override public int getValueLength(int index) { assert index >= 0; if (isSet(index) == 0) { @@ -1021,6 +1024,7 @@ public int getValueLength(int index) { * @param index position of the element to set * @param value array of bytes to write */ + @Override public void set(int index, byte[] value) { assert index >= 0; fillHoles(index); @@ -1037,6 +1041,7 @@ public void set(int index, byte[] value) { * @param index position of the element to set * @param value array of bytes to write */ + @Override public void setSafe(int index, byte[] value) { assert index >= 0; handleSafe(index, value.length); @@ -1055,6 +1060,7 @@ public void setSafe(int index, byte[] value) { * @param start start index in array of bytes * @param length length of data in array of bytes */ + @Override public void set(int index, byte[] value, int start, int length) { assert index >= 0; fillHoles(index); @@ -1091,6 +1097,7 @@ public void setSafe(int index, byte[] value, int start, int length) { * @param start start index in ByteBuffer * @param length length of data in ByteBuffer */ + @Override public void set(int index, ByteBuffer value, int start, int length) { assert index >= 0; fillHoles(index); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java index d533629cdd44e..0412b9600b773 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthVector.java @@ -42,8 +42,7 @@ /** * BaseVariableWidthVector is a base class providing functionality for strings/bytes types. */ -public abstract class BaseVariableWidthVector extends BaseValueVector - implements VariableWidthVector, FieldVector, VectorDefinitionSetter { +public abstract class BaseVariableWidthVector extends BaseValueVector implements VariableWidthFieldVector { private static final int DEFAULT_RECORD_BYTE_COUNT = 8; private static final int INITIAL_BYTE_COUNT = INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT; private static final int MAX_BUFFER_SIZE = (int) Math.min(MAX_ALLOCATION_SIZE, Integer.MAX_VALUE); @@ -993,6 +992,7 @@ public void setValueCount(int valueCount) { * * @param index target index */ + @Override public void fillEmpties(int index) { handleSafe(index, emptyByteArray.length); fillHoles(index); @@ -1006,6 +1006,7 @@ public void fillEmpties(int index) { * * @param value desired index of last non-null element. */ + @Override public void setLastSet(int value) { lastSet = value; } @@ -1015,6 +1016,7 @@ public void setLastSet(int value) { * * @return index of the last non-null element */ + @Override public int getLastSet() { return lastSet; } @@ -1050,6 +1052,7 @@ public void setIndexDefined(int index) { * @param index position of the element to set * @param length length of the element */ + @Override public void setValueLengthSafe(int index, int length) { assert index >= 0; handleSafe(index, length); @@ -1065,6 +1068,7 @@ public void setValueLengthSafe(int index, int length) { * @param index position of element to get * @return greater than 0 length for non-null element, 0 otherwise */ + @Override public int getValueLength(int index) { assert index >= 0; if (isSet(index) == 0) { @@ -1083,6 +1087,7 @@ public int getValueLength(int index) { * @param index position of the element to set * @param value array of bytes to write */ + @Override public void set(int index, byte[] value) { assert index >= 0; fillHoles(index); @@ -1099,6 +1104,7 @@ public void set(int index, byte[] value) { * @param index position of the element to set * @param value array of bytes to write */ + @Override public void setSafe(int index, byte[] value) { assert index >= 0; handleSafe(index, value.length); @@ -1153,6 +1159,7 @@ public void setSafe(int index, byte[] value, int start, int length) { * @param start start index in ByteBuffer * @param length length of data in ByteBuffer */ + @Override public void set(int index, ByteBuffer value, int start, int length) { assert index >= 0; fillHoles(index); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java new file mode 100644 index 0000000000000..2f80775a48f58 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java @@ -0,0 +1,1451 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; +import static org.apache.arrow.vector.util.DataSizeRoundingUtil.roundUpToMultipleOf16; + +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.OutOfMemoryException; +import org.apache.arrow.memory.ReusableBuffer; +import org.apache.arrow.memory.util.ArrowBufPointer; +import org.apache.arrow.memory.util.ByteFunctionHelpers; +import org.apache.arrow.memory.util.CommonUtil; +import org.apache.arrow.memory.util.hash.ArrowBufHasher; +import org.apache.arrow.vector.compare.VectorVisitor; +import org.apache.arrow.vector.ipc.message.ArrowFieldNode; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.util.CallBack; +import org.apache.arrow.vector.util.OversizedAllocationException; +import org.apache.arrow.vector.util.TransferPair; + +/** + * BaseVariableWidthViewVector is a base class providing functionality for strings/bytes types in view format. + * + */ +public abstract class BaseVariableWidthViewVector extends BaseValueVector implements VariableWidthFieldVector { + // A single element of a view comprises 16 bytes + protected static final int ELEMENT_SIZE = 16; + public static final int INITIAL_VIEW_VALUE_ALLOCATION = 4096; + private static final int INITIAL_BYTE_COUNT = INITIAL_VIEW_VALUE_ALLOCATION * ELEMENT_SIZE; + private static final int MAX_BUFFER_SIZE = (int) Math.min(MAX_ALLOCATION_SIZE, Integer.MAX_VALUE); + private int lastValueCapacity; + private long lastValueAllocationSizeInBytes; + + /* + * Variable Width View Vector comprises the following format + * + * Short strings, length <= 12 + * | Bytes 0-3 | Bytes 4-15 | + * |------------|---------------------------------------| + * | length | data (padded with 0) | + * |------------|---------------------------------------| + * + * Long strings, length > 12 + * | Bytes 0-3 | Bytes 4-7 | Bytes 8-11 | Bytes 12-15 | + * |------------|------------|------------|-------------| + * | length | prefix | buf.index | offset | + * |------------|------------|------------|-------------| + * + * */ + // 12 byte unsigned int to track inline views + protected static final int INLINE_SIZE = 12; + // The first 4 bytes of view are allocated for length + protected static final int LENGTH_WIDTH = 4; + // The second 4 bytes of view are allocated for prefix width + protected static final int PREFIX_WIDTH = 4; + // The third 4 bytes of view are allocated for buffer index + protected static final int BUF_INDEX_WIDTH = 4; + protected static final byte[] EMPTY_BYTE_ARRAY = new byte[]{}; + protected ArrowBuf validityBuffer; + // The view buffer is used to store the variable width view elements + protected ArrowBuf viewBuffer; + // The external buffer which stores the long strings + protected List dataBuffers; + protected int initialDataBufferSize; + protected int valueCount; + protected int lastSet; + protected final Field field; + + + /** + * Constructs a new instance. + * + * @param field The field materialized by this vector + * @param allocator The allocator to use for creating/resizing buffers + */ + public BaseVariableWidthViewVector(Field field, final BufferAllocator allocator) { + super(allocator); + this.field = field; + lastValueAllocationSizeInBytes = INITIAL_BYTE_COUNT; + lastValueCapacity = INITIAL_VIEW_VALUE_ALLOCATION; + valueCount = 0; + lastSet = -1; + validityBuffer = allocator.getEmpty(); + viewBuffer = allocator.getEmpty(); + dataBuffers = new ArrayList<>(); + } + + @Override + public String getName() { + return field.getName(); + } + + /* TODO: + * see if getNullCount() can be made faster -- O(1) + */ + + /* TODO: + * Once the entire hierarchy has been refactored, move common functions + * like getNullCount(), splitAndTransferValidityBuffer to top level + * base class BaseValueVector. + * + * Along with this, some class members (validityBuffer) can also be + * abstracted out to top level base class. + * + * Right now BaseValueVector is the top level base class for other + * vector types in ValueVector hierarchy (non-nullable) and those + * vectors have not yet been refactored/removed so moving things to + * the top class as of now is not a good idea. + */ + + /* TODO: + * Implement TransferPair functionality + * https://github.com/apache/arrow/issues/40932 + * + */ + + /** + * Get buffer that manages the validity (NULL or NON-NULL nature) of + * elements in the vector. Consider it as a buffer for internal bit vector + * data structure. + * + * @return buffer + */ + @Override + public ArrowBuf getValidityBuffer() { + return validityBuffer; + } + + /** + * Get the buffer that stores the data for elements in the vector. + * + * @return buffer + */ + @Override + public ArrowBuf getDataBuffer() { + return viewBuffer; + } + + /** + * BaseVariableWidthViewVector doesn't support offset buffer. + * + * @return throws UnsupportedOperationException + */ + @Override + public ArrowBuf getOffsetBuffer() { + throw new UnsupportedOperationException("Offset buffer is not supported in BaseVariableWidthViewVector"); + } + + /** + * BaseVariableWidthViewVector doesn't support offset buffer. + * + * @return throws UnsupportedOperationException + */ + @Override + public long getOffsetBufferAddress() { + throw new UnsupportedOperationException("Offset buffer is not supported in BaseVariableWidthViewVector"); + } + + /** + * Get the memory address of buffer that manages the validity + * (NULL or NON-NULL nature) of elements in the vector. + * + * @return starting address of the buffer + */ + @Override + public long getValidityBufferAddress() { + return validityBuffer.memoryAddress(); + } + + /** + * Get the memory address of buffer that stores the data for elements + * in the vector. + * + * @return starting address of the buffer + */ + @Override + public long getDataBufferAddress() { + return viewBuffer.memoryAddress(); + } + + /** + * Sets the desired value capacity for the vector. This function doesn't + * allocate any memory for the vector. + * + * @param valueCount desired number of elements in the vector + */ + @Override + public void setInitialCapacity(int valueCount) { + final long size = (long) valueCount * ELEMENT_SIZE; + checkDataBufferSize(size); + lastValueAllocationSizeInBytes = (int) size; + lastValueCapacity = valueCount; + } + + /** + * Sets the desired value capacity for the vector. This function doesn't + * allocate any memory for the vector. + * + * @param valueCount desired number of elements in the vector + * @param density average number of bytes per variable width view element + */ + @Override + public void setInitialCapacity(int valueCount, double density) { + final long size = (long) valueCount * ELEMENT_SIZE; + initialDataBufferSize = (int) (valueCount * density); + checkDataBufferSize(size); + lastValueAllocationSizeInBytes = (int) size; + lastValueCapacity = valueCount; + } + + /** + * Get the density of this ListVector. + * @return density + */ + public double getDensity() { + if (valueCount == 0) { + return 0.0D; + } + final double totalListSize = getTotalValueLengthUpToIndex(valueCount); + return totalListSize / valueCount; + } + + /** + * Get the current capacity which does not exceed either validity buffer or value buffer. + * Note: Here the `getValueCapacity` has a relationship with the value buffer. + * + * @return number of elements that vector can hold. + */ + @Override + public int getValueCapacity() { + final int validityCapacity = getValidityBufferValueCapacity(); + final int valueBufferCapacity = Math.max(capAtMaxInt(viewBuffer.capacity() / ELEMENT_SIZE), 0); + return Math.min(valueBufferCapacity, validityCapacity); + } + + private int getValidityBufferValueCapacity() { + return capAtMaxInt(validityBuffer.capacity() * 8); + } + + /** + * zero out the vector and the data in associated buffers. + */ + public void zeroVector() { + initValidityBuffer(); + viewBuffer.setZero(0, viewBuffer.capacity()); + clearDataBuffers(); + } + + /* zero out the validity buffer */ + private void initValidityBuffer() { + validityBuffer.setZero(0, validityBuffer.capacity()); + } + + /** + * Reset the vector to initial state. + * Note that this method doesn't release any memory. + */ + @Override + public void reset() { + zeroVector(); + lastSet = -1; + valueCount = 0; + } + + /** + * Close the vector and release the associated buffers. + */ + @Override + public void close() { + clear(); + } + + /** + * Same as {@link #close()}. + */ + @Override + public void clear() { + validityBuffer = releaseBuffer(validityBuffer); + viewBuffer = releaseBuffer(viewBuffer); + clearDataBuffers(); + lastSet = -1; + valueCount = 0; + } + + /** + * Release the data buffers and clear the list. + */ + public void clearDataBuffers() { + for (ArrowBuf buffer : dataBuffers) { + releaseBuffer(buffer); + } + dataBuffers.clear(); + } + + /** + * Get the inner vectors. + * + * @deprecated This API will be removed as the current implementations no longer support inner vectors. + * + * @return the inner vectors for this field as defined by the TypeLayout + */ + @Deprecated + @Override + public List getFieldInnerVectors() { + throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers"); + } + + /** + * Initialize the children in schema for this Field. This operation is a + * NO-OP for scalar types since they don't have any children. + * @param children the schema + * @throws IllegalArgumentException if children is a non-empty list for scalar types. + */ + @Override + public void initializeChildrenFromFields(List children) { + if (!children.isEmpty()) { + throw new IllegalArgumentException("primitive type vector cannot have children"); + } + } + + /** + * Get the inner child vectors. + * @return list of child vectors for complex types, empty list for scalar vector types + */ + @Override + public List getChildrenFromFields() { + return Collections.emptyList(); + } + + + /** + * Load the buffers of this vector with provided source buffers. + * The caller manages the source buffers and populates them before invoking + * this method. + * @param fieldNode the fieldNode indicating the value count + * @param ownBuffers the buffers for this Field (own buffers only, children not included) + */ + @Override + public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { + // TODO: https://github.com/apache/arrow/issues/40931 + throw new UnsupportedOperationException("loadFieldBuffers is not supported for BaseVariableWidthViewVector"); + } + + /** + * Get the buffers belonging to this vector. + * @return the inner buffers. + */ + @Override + public List getFieldBuffers() { + List result = new ArrayList<>(2 + dataBuffers.size()); + setReaderAndWriterIndex(); + result.add(validityBuffer); + result.add(viewBuffer); + // append data buffers + result.addAll(dataBuffers); + + return result; + } + + /** + * Set the reader and writer indexes for the inner buffers. + */ + private void setReaderAndWriterIndex() { + validityBuffer.readerIndex(0); + viewBuffer.readerIndex(0); + if (valueCount == 0) { + validityBuffer.writerIndex(0); + viewBuffer.writerIndex(0); + } else { + validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount)); + viewBuffer.writerIndex(valueCount * ELEMENT_SIZE); + } + } + + /** + * Same as {@link #allocateNewSafe()}. + */ + @Override + public void allocateNew() { + allocateNew(lastValueAllocationSizeInBytes, lastValueCapacity); + } + + /** + * Allocate memory for the vector. We internally use a default value count + * of 4096 to allocate memory for at least these many elements in the + * vector. See {@link #allocateNew(long, int)} for allocating memory for specific + * number of elements in the vector. + * + * @return false if memory allocation fails, true otherwise. + */ + @Override + public boolean allocateNewSafe() { + try { + allocateNew(lastValueAllocationSizeInBytes, lastValueCapacity); + return true; + } catch (Exception e) { + return false; + } + } + + /** + * Allocate memory for the vector to support storing at least the provided number of + * elements in the vector. This method must be called prior to using the ValueVector. + * + * @param totalBytes desired total memory capacity + * @param valueCount the desired number of elements in the vector + * @throws OutOfMemoryException if memory allocation fails + */ + @Override + public void allocateNew(long totalBytes, int valueCount) { + assert totalBytes >= 0; + + checkDataBufferSize(totalBytes); + + /* we are doing a new allocation -- release the current buffers */ + clear(); + + try { + allocateBytes(totalBytes, valueCount); + } catch (Exception e) { + clear(); + throw e; + } + } + + @Override + public void allocateNew(int valueCount) { + allocateNew(lastValueAllocationSizeInBytes, valueCount); + } + + /* Check if the data buffer size is within bounds. */ + private void checkDataBufferSize(long size) { + if (size > MAX_BUFFER_SIZE || size < 0) { + throw new OversizedAllocationException("Memory required for vector " + + "is (" + size + "), which is overflow or more than max allowed (" + MAX_BUFFER_SIZE + "). " + + "You could consider using LargeVarCharVector/LargeVarBinaryVector for large strings/large bytes types"); + } + } + + /* allocate the inner buffers */ + private void allocateBytes(final long valueBufferSize, final int valueCount) { + /* allocate data buffer */ + viewBuffer = allocator.buffer(valueBufferSize); + viewBuffer.readerIndex(0); + + validityBuffer = allocator.buffer((valueCount + 7) / 8); + initValidityBuffer(); + + lastValueCapacity = getValueCapacity(); + lastValueAllocationSizeInBytes = capAtMaxInt(viewBuffer.capacity()); + } + + /** + * Resize the vector to increase the capacity. The internal behavior is to + * double the current value capacity. + */ + @Override + public void reAlloc() { + reallocViewBuffer(); + reallocViewDataBuffer(); + reallocValidityBuffer(); + } + + /** + * Reallocate the view buffer. View Buffer stores the views for + * VIEWVARCHAR or VIEWVARBINARY elements in the vector. The behavior is to double + * the size of buffer. + * @throws OversizedAllocationException if the desired new size is more than + * max allowed + * @throws OutOfMemoryException if the internal memory allocation fails + */ + public void reallocViewBuffer() { + long currentViewBufferCapacity = viewBuffer.capacity(); + + long newAllocationSize = currentViewBufferCapacity * 2; + if (newAllocationSize == 0) { + if (lastValueAllocationSizeInBytes > 0) { + newAllocationSize = lastValueAllocationSizeInBytes; + } else { + newAllocationSize = INITIAL_BYTE_COUNT * 2L; + } + } + + reallocViewBuffer(newAllocationSize); + } + + /** + * Reallocate the data buffer associated with view buffer. + */ + public void reallocViewDataBuffer() { + long currentDataBufferCapacity = 0; + if (!dataBuffers.isEmpty()) { + currentDataBufferCapacity = dataBuffers.get(dataBuffers.size() - 1).capacity(); + } + + long newAllocationSize = currentDataBufferCapacity * 2; + if (newAllocationSize == 0) { + if (lastValueAllocationSizeInBytes > 0) { + newAllocationSize = lastValueAllocationSizeInBytes; + } else { + newAllocationSize = INITIAL_BYTE_COUNT * 2L; + } + } + + reallocViewDataBuffer(newAllocationSize); + } + + /** + * Reallocate the view buffer to given size. View Buffer stores the views for + * VIEWVARCHAR or VIEWVARBINARY elements in the vector. The actual allocated size may be larger + * than the request one because it will round up the provided value to the nearest + * power of two. + * + * @param desiredAllocSize the desired new allocation size + * @throws OversizedAllocationException if the desired new size is more than + * max allowed + * @throws OutOfMemoryException if the internal memory allocation fails + */ + public void reallocViewBuffer(long desiredAllocSize) { + if (desiredAllocSize == 0) { + return; + } + long newAllocationSize = CommonUtil.nextPowerOfTwo(desiredAllocSize); + assert newAllocationSize >= 1; + + checkDataBufferSize(newAllocationSize); + // for each set operation, we have to allocate 16 bytes + // here we are adjusting the desired allocation-based allocation size + // to align with the 16bytes requirement. + newAllocationSize = roundUpToMultipleOf16(newAllocationSize); + + final ArrowBuf newBuf = allocator.buffer(newAllocationSize); + newBuf.setBytes(0, viewBuffer, 0, viewBuffer.capacity()); + + viewBuffer.getReferenceManager().release(); + viewBuffer = newBuf; + lastValueAllocationSizeInBytes = viewBuffer.capacity(); + } + + /** + * Reallocate the data buffer for views. + * + * @param desiredAllocSize allocation size in bytes + */ + public void reallocViewDataBuffer(long desiredAllocSize) { + if (desiredAllocSize == 0) { + return; + } + + if (dataBuffers.isEmpty()) { + return; + } + + ArrowBuf currentBuf = dataBuffers.get(dataBuffers.size() - 1); + if (currentBuf.capacity() - currentBuf.writerIndex() >= desiredAllocSize) { + return; + } + + final long newAllocationSize = CommonUtil.nextPowerOfTwo(desiredAllocSize); + assert newAllocationSize >= 1; + + checkDataBufferSize(newAllocationSize); + + final ArrowBuf newBuf = allocator.buffer(newAllocationSize); + dataBuffers.add(newBuf); + } + + /** + * Reallocate Validity buffer. + */ + public void reallocValidityBuffer() { + int targetValidityCount = capAtMaxInt((validityBuffer.capacity() * 8) * 2); + if (targetValidityCount == 0) { + if (lastValueCapacity > 0) { + targetValidityCount = lastValueCapacity; + } else { + targetValidityCount = 2 * INITIAL_VALUE_ALLOCATION; + } + } + + long validityBufferSize = computeValidityBufferSize(targetValidityCount); + + final ArrowBuf newValidityBuffer = allocator.buffer(validityBufferSize); + newValidityBuffer.setBytes(0, validityBuffer, 0, validityBuffer.capacity()); + newValidityBuffer.setZero(validityBuffer.capacity(), newValidityBuffer.capacity() - validityBuffer.capacity()); + validityBuffer.getReferenceManager().release(); + validityBuffer = newValidityBuffer; + + lastValueCapacity = getValueCapacity(); + } + + private long computeValidityBufferSize(int valueCount) { + return (valueCount + 7) / 8; + } + + /** + * Get the size (number of bytes) of underlying view buffer. + * @return number of bytes in the view buffer + */ + @Override + public int getByteCapacity() { + return capAtMaxInt(viewBuffer.capacity()); + } + + @Override + public int sizeOfValueBuffer() { + throw new UnsupportedOperationException("sizeOfValueBuffer is not supported for BaseVariableWidthViewVector"); + } + + /** + * Get the size (number of bytes) of underlying elements in the view buffer. + * @return number of bytes used by data in the view buffer + */ + public int sizeOfViewBufferElements() { + if (valueCount == 0) { + return 0; + } + int totalSize = 0; + for (int i = 0; i < valueCount; i++) { + totalSize += getValueLength(i); + } + return totalSize; + } + + /** + * Get the size (number of bytes) of underlying buffers used by this + * vector. + * @return size of underlying buffers. + */ + @Override + public int getBufferSize() { + return getBufferSizeFor(this.valueCount); + } + + /** + * Get the potential buffer size for a particular number of records. + * @param valueCount desired number of elements in the vector + * @return estimated size of underlying buffers if the vector holds + * a given number of elements + */ + @Override + public int getBufferSizeFor(final int valueCount) { + if (valueCount == 0) { + return 0; + } + + final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); + final int viewBufferSize = valueCount * ELEMENT_SIZE; + final int dataBufferSize = getDataBufferSize(); + return validityBufferSize + viewBufferSize + dataBufferSize; + } + + private int getDataBufferSize() { + int dataBufferSize = 0; + for (ArrowBuf buf : dataBuffers) { + dataBufferSize += (int) buf.writerIndex(); + } + return dataBufferSize; + } + + /** + * Get information about how this field is materialized. + * @return the field corresponding to this vector + */ + @Override + public Field getField() { + return field; + } + + /** + * Return the underlying buffers associated with this vector. Note that this doesn't + * impact the reference counts for this buffer, so it only should be used for in-context + * access. Also note that this buffer changes regularly, thus + * external classes shouldn't hold a reference to it (unless they change it). + *

+ * Note: This method only returns validityBuffer and valueBuffer. + * But it doesn't return the data buffers. + *

+ * TODO: Implement a strategy to retrieve the data buffers. + * data buffer retrieval. + * + * @param clear Whether to clear vector before returning, the buffers will still be refcounted + * but the returned array will be the only reference to them + * @return The underlying {@link ArrowBuf buffers} that is used by this + * vector instance. + */ + @Override + public ArrowBuf[] getBuffers(boolean clear) { + final ArrowBuf[] buffers; + setReaderAndWriterIndex(); + if (getBufferSize() == 0) { + buffers = new ArrowBuf[0]; + } else { + buffers = new ArrowBuf[2]; + buffers[0] = validityBuffer; + buffers[1] = viewBuffer; + } + if (clear) { + for (final ArrowBuf buffer : buffers) { + buffer.getReferenceManager().retain(); + } + clear(); + } + return buffers; + } + + /** + * Validate the scalar values held by this vector. + */ + public void validateScalars() { + // No validation by default. + } + + /** + * Construct a transfer pair of this vector and another vector of the same type. + * @param field The field materialized by this vector. + * @param allocator allocator for the target vector + * @param callBack not used + * @return TransferPair + */ + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { + return getTransferPair(field, allocator); + } + + /** + * Construct a transfer pair of this vector and another vector of the same type. + * @param ref name of the target vector + * @param allocator allocator for the target vector + * @param callBack not used + * @return TransferPair + */ + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { + return getTransferPair(ref, allocator); + } + + /** + * Construct a transfer pair of this vector and another vector of the same type. + * @param allocator allocator for the target vector + * @return TransferPair + */ + @Override + public TransferPair getTransferPair(BufferAllocator allocator) { + return getTransferPair(getName(), allocator); + } + + /** + * Construct a transfer pair of this vector and another vector of the same type. + * @param ref name of the target vector + * @param allocator allocator for the target vector + * @return TransferPair + */ + @Override + public abstract TransferPair getTransferPair(String ref, BufferAllocator allocator); + + /** + * Construct a transfer pair of this vector and another vector of the same type. + * @param field The field materialized by this vector. + * @param allocator allocator for the target vector + * @return TransferPair + */ + @Override + public abstract TransferPair getTransferPair(Field field, BufferAllocator allocator); + + /** + * Transfer this vector's data to another vector. + * The memory associated with this vector is transferred to the allocator of target vector + * for accounting and management purposes. + * @param target destination vector for transfer + */ + public void transferTo(BaseVariableWidthViewVector target) { + throw new UnsupportedOperationException("trasferTo function not supported!"); + } + + /** + * Slice this vector at desired index and length and transfer the + * corresponding data to the target vector. + * @param startIndex start position of the split in source vector. + * @param length length of the split. + * @param target destination vector + */ + public void splitAndTransferTo(int startIndex, int length, + BaseVariableWidthViewVector target) { + throw new UnsupportedOperationException("splitAndTransferTo function not supported!"); + } + + /*----------------------------------------------------------------* + | | + | common getters and setters | + | | + *----------------------------------------------------------------*/ + + + /** + * Get the number of elements that are null in the vector. + * + * @return the number of null elements. + */ + @Override + public int getNullCount() { + return BitVectorHelper.getNullCount(validityBuffer, valueCount); + } + + /** + * Check if the given index is within the current value capacity + * of the vector. + * + * @param index position to check + * @return true if the index is within the current value capacity + */ + public boolean isSafe(int index) { + return index < getValueCapacity(); + } + + /** + * Check if an element at given index is null. + * + * @param index position of an element + * @return true if an element at given index is null + */ + @Override + public boolean isNull(int index) { + return (isSet(index) == 0); + } + + /** + * Same as {@link #isNull(int)}. + * + * @param index position of an element + * @return 1 if element at given index is not null, 0 otherwise + */ + public int isSet(int index) { + final int byteIndex = index >> 3; + final byte b = validityBuffer.getByte(byteIndex); + final int bitIndex = index & 7; + return (b >> bitIndex) & 0x01; + } + + /** + * Get the value count of vector. This will always be zero unless + * setValueCount(int) has been called prior to calling this. + * + * @return valueCount for the vector + */ + @Override + public int getValueCount() { + return valueCount; + } + + /** + * Sets the value count for the vector. + * + * @param valueCount value count + */ + @Override + public void setValueCount(int valueCount) { + assert valueCount >= 0; + this.valueCount = valueCount; + while (valueCount > getValueCapacity()) { + reallocViewBuffer(); + reallocValidityBuffer(); + } + lastSet = valueCount - 1; + setReaderAndWriterIndex(); + } + + /** + * Create holes in the vector upto the given index (exclusive). + * Holes will be created from the current last-set position in + * the vector. + * + * @param index target index + */ + @Override + public void fillEmpties(int index) { + handleSafe(index, EMPTY_BYTE_ARRAY.length); + lastSet = index - 1; + } + + /** + * Set the index of the last non-null element in the vector. + * It is important to call this method with appropriate value + * before calling {@link #setValueCount(int)}. + * + * @param value desired index of last non-null element. + */ + @Override + public void setLastSet(int value) { + lastSet = value; + } + + /** + * Get the index of the last non-null element in the vector. + * + * @return index of the last non-null element + */ + @Override + public int getLastSet() { + return lastSet; + } + + /** + * Mark the particular position in the vector as non-null. + * + * @param index position of the element. + */ + @Override + public void setIndexDefined(int index) { + // We need to check and reallocate the validity buffer + while (index >= getValueCapacity()) { + reallocValidityBuffer(); + } + BitVectorHelper.setBit(validityBuffer, index); + } + + /** + * Sets the value length for an element. + * + * @param index position of the element to set + * @param length length of the element + */ + @Override + public void setValueLengthSafe(int index, int length) { + assert index >= 0; + handleSafe(index, length); + lastSet = index; + } + + /** + * Get the variable length element at specified index as Text. + * + * @param index position of an element to get + * @return greater than length 0 for a non-null element, 0 otherwise + */ + @Override + public int getValueLength(int index) { + assert index >= 0; + if (index < 0 || index >= viewBuffer.capacity() / ELEMENT_SIZE) { + throw new IndexOutOfBoundsException("Index out of bounds: " + index); + } + if (isSet(index) == 0) { + return 0; + } + return viewBuffer.getInt(((long) index * ELEMENT_SIZE)); + } + + /** + * Set the variable length element at the specified index to the supplied + * byte array. This is same as using {@link #set(int, byte[], int, int)} + * with start as Zero and length as #value.length + * + * @param index position of the element to set + * @param value array of bytes to write + */ + public void set(int index, byte[] value) { + assert index >= 0; + BitVectorHelper.setBit(validityBuffer, index); + setBytes(index, value, 0, value.length); + lastSet = index; + } + + /** + * Same as {@link #set(int, byte[])} except that it handles the + * case where index and length of a new element are beyond the existing + * capacity of the vector. + * + * @param index position of the element to set + * @param value array of bytes to write + */ + @Override + public void setSafe(int index, byte[] value) { + assert index >= 0; + // check if the current index can be populated + handleSafe(index, value.length); + BitVectorHelper.setBit(validityBuffer, index); + setBytes(index, value, 0, value.length); + lastSet = index; + } + + /** + * Set the variable length element at the specified index to the supplied + * byte array. + * + * @param index position of the element to set + * @param value array of bytes to write + * @param start start index in an array of bytes + * @param length length of data in an array of bytes + */ + public void set(int index, byte[] value, int start, int length) { + assert index >= 0; + BitVectorHelper.setBit(validityBuffer, index); + setBytes(index, value, start, length); + lastSet = index; + } + + /** + * Same as {@link #set(int, byte[], int, int)} except that it handles the + * case where index and length of a new element are beyond the existing + * capacity of the vector. + * + * @param index position of the element to set + * @param value array of bytes to write + * @param start start index in an array of bytes + * @param length length of data in an array of bytes + */ + public void setSafe(int index, byte[] value, int start, int length) { + assert index >= 0; + handleSafe(index, length); + BitVectorHelper.setBit(validityBuffer, index); + setBytes(index, value, start, length); + lastSet = index; + } + + /** + * Set the variable length element at the specified index to the + * content in supplied ByteBuffer. + * + * @param index position of the element to set + * @param value ByteBuffer with data + * @param start start index in ByteBuffer + * @param length length of data in ByteBuffer + */ + public void set(int index, ByteBuffer value, int start, int length) { + assert index >= 0; + BitVectorHelper.setBit(validityBuffer, index); + setBytes(index, value.array(), start, length); + lastSet = index; + } + + /** + * Same as {@link #set(int, ByteBuffer, int, int)} except that it handles the + * case where index and length of a new element are beyond the existing + * capacity of the vector. + * + * @param index position of the element to set + * @param value ByteBuffer with data + * @param start start index in ByteBuffer + * @param length length of data in ByteBuffer + */ + public void setSafe(int index, ByteBuffer value, int start, int length) { + assert index >= 0; + handleSafe(index, length); + BitVectorHelper.setBit(validityBuffer, index); + setBytes(index, value.array(), start, length); + lastSet = index; + } + + /** + * Set the element at the given index to null. + * + * @param index position of an element + */ + @Override + public void setNull(int index) { + // We need to check and reallocate the validity buffer + while (index >= getValueCapacity()) { + reallocValidityBuffer(); + } + BitVectorHelper.unsetBit(validityBuffer, index); + } + + /** + * Store the given value at a particular position in the vector. isSet indicates + * whether the value is NULL or not. + * @param index position of the new value + * @param isSet Zero for NULL value, 1 otherwise + * @param start start position of data in buffer + * @param end end position of data in buffer + * @param buffer data buffer containing the variable width element to be stored + * in the vector + */ + public void set(int index, int isSet, int start, int end, ArrowBuf buffer) { + assert index >= 0; + final int dataLength = end - start; + BitVectorHelper.setValidityBit(validityBuffer, index, isSet); + setBytes(index, buffer, start, dataLength); + lastSet = index; + } + + /** + * Same as {@link #set(int, int, int, int, ArrowBuf)} except that it handles the case + * when index is greater than or equal to current value capacity of the + * vector. + * @param index position of the new value + * @param isSet Zero for NULL value, 1 otherwise + * @param start start position of data in buffer + * @param end end position of data in buffer + * @param buffer data buffer containing the variable width element to be stored + * in the vector + */ + public void setSafe(int index, int isSet, int start, int end, ArrowBuf buffer) { + assert index >= 0; + final int dataLength = end - start; + handleSafe(index, dataLength); + BitVectorHelper.setValidityBit(validityBuffer, index, isSet); + setBytes(index, buffer, start, dataLength); + lastSet = index; + } + + /** + * Store the given value at a particular position in the vector. isSet indicates + * whether the value is NULL or not. + * @param index position of the new value + * @param start start position of data in buffer + * @param length length of data in buffer + * @param buffer data buffer containing the variable width element to be stored + * in the vector + */ + public void set(int index, int start, int length, ArrowBuf buffer) { + assert index >= 0; + BitVectorHelper.setBit(validityBuffer, index); + setBytes(index, buffer, start, length); + lastSet = index; + } + + /** + * Same as {@link #set(int, int, int, int, ArrowBuf)} except that it handles the case + * when index is greater than or equal to current value capacity of the + * vector. + * @param index position of the new value + * @param start start position of data in buffer + * @param length length of data in buffer + * @param buffer data buffer containing the variable width element to be stored + * in the vector + */ + public void setSafe(int index, int start, int length, ArrowBuf buffer) { + assert index >= 0; + handleSafe(index, length); + BitVectorHelper.setBit(validityBuffer, index); + setBytes(index, buffer, start, length); + lastSet = index; + } + + + /*----------------------------------------------------------------* + | | + | helper methods for setters | + | | + *----------------------------------------------------------------*/ + + + protected ArrowBuf allocateOrGetLastDataBuffer(int length) { + long dataBufferSize; + if (initialDataBufferSize > 0) { + dataBufferSize = Math.max(initialDataBufferSize, length); + } else { + dataBufferSize = Math.max(lastValueAllocationSizeInBytes, length); + } + + if (dataBuffers.isEmpty() || dataBuffers.get(dataBuffers.size() - 1).capacity() - + dataBuffers.get(dataBuffers.size() - 1).writerIndex() < length) { + ArrowBuf newBuf = allocator.buffer(dataBufferSize); + dataBuffers.add(newBuf); + } + + return dataBuffers.get(dataBuffers.size() - 1); + } + + /** + * This method is used to create a view buffer for a variable width vector. + * It handles both inline and data buffers. + *

+ * If the length of the value is less than or equal to {@link #INLINE_SIZE}, the value is stored in the valueBuffer + * directly as an inline buffer. + * The valueBuffer stores the length of the value followed by the value itself. + * If the length of the value is greater than {@link #INLINE_SIZE}, a new buffer is allocated and added to dataBuffers + * to hold the value. + * The viewBuffer in this case stores the length of the value, a prefix of the value, the index of the + * new buffer in dataBuffers, and the offset of the value in the new buffer. + * + * @param index The index at which the new value will be inserted. + * @param value The byte array that contains the data to be inserted. + * @param start The start index in the byte array from where the data for the new value begins. + * @param length The length of the data in the byte array that belongs to the new value. + */ + protected final void setBytes(int index, byte[] value, int start, int length) { + int writePosition = index * ELEMENT_SIZE; + + // to clear the memory segment of view being written to + // this is helpful in case of overwriting the value + viewBuffer.setZero(writePosition, ELEMENT_SIZE); + + if (value.length <= INLINE_SIZE) { + // allocate inline buffer + // set length + viewBuffer.setInt(writePosition, length); + writePosition += LENGTH_WIDTH; + // set data + viewBuffer.setBytes(writePosition, value, start, length); + } else { + // allocate data buffer + ArrowBuf currentBuf = allocateOrGetLastDataBuffer(length); + + // set length + viewBuffer.setInt(writePosition, length); + writePosition += LENGTH_WIDTH; + // set prefix + viewBuffer.setBytes(writePosition, value, start, PREFIX_WIDTH); + writePosition += PREFIX_WIDTH; + // set buf id + viewBuffer.setInt(writePosition, dataBuffers.size() - 1); + writePosition += BUF_INDEX_WIDTH; + // set offset + viewBuffer.setInt(writePosition, (int) currentBuf.writerIndex()); + + currentBuf.setBytes(currentBuf.writerIndex(), value, start, length); + currentBuf.writerIndex(currentBuf.writerIndex() + length); + } + } + + /** + * This method is used to create a view buffer for a variable width vector. + * Similar to {@link #setBytes(int index, byte[] value, int start, int length)} + * + * @param index The index at which the new value will be inserted. + * @param valueBuf The byte array that contains the data to be inserted. + * @param start The start index in the byte array from where the data for the new value begins. + * @param length The length of the data in the byte array that belongs to the new value. + */ + protected final void setBytes(int index, ArrowBuf valueBuf, int start, int length) { + int writePosition = index * ELEMENT_SIZE; + + // to clear the memory segment of view being written to + // this is helpful in case of overwriting the value + viewBuffer.setZero(writePosition, ELEMENT_SIZE); + + if (length <= INLINE_SIZE) { + // allocate inline buffer + // set length + viewBuffer.setInt(writePosition, length); + writePosition += LENGTH_WIDTH; + // set data + viewBuffer.setBytes(writePosition, valueBuf, start, length); + } else { + // allocate data buffer + ArrowBuf currentBuf = allocateOrGetLastDataBuffer(length); + + // set length + viewBuffer.setInt(writePosition, length); + writePosition += LENGTH_WIDTH; + // set prefix + viewBuffer.setBytes(writePosition, valueBuf, start, PREFIX_WIDTH); + writePosition += PREFIX_WIDTH; + // set buf id + viewBuffer.setInt(writePosition, dataBuffers.size() - 1); + writePosition += BUF_INDEX_WIDTH; + // set offset + viewBuffer.setInt(writePosition, (int) currentBuf.writerIndex()); + + currentBuf.setBytes(currentBuf.writerIndex(), valueBuf, start, length); + currentBuf.writerIndex(currentBuf.writerIndex() + length); + } + } + + /** + * Get the total length of the elements up to the given index. + * @param index The index of the element in the vector. + * @return The total length up to the element at the given index. + */ + public final int getTotalValueLengthUpToIndex(int index) { + int totalLength = 0; + for (int i = 0; i < index - 1; i++) { + totalLength += getValueLength(i); + } + return totalLength; + } + + protected final void handleSafe(int index, int dataLength) { + final long lastSetCapacity = lastSet < 0 ? 0 : (long) index * ELEMENT_SIZE; + final long targetCapacity = roundUpToMultipleOf16(lastSetCapacity + dataLength); + // for views, we need each buffer with 16 byte alignment, so we need to check the last written index + // in the viewBuffer and allocate a new buffer which has 16 byte alignment for adding new values. + long writePosition = (long) index * ELEMENT_SIZE; + if (viewBuffer.capacity() <= writePosition || viewBuffer.capacity() < targetCapacity) { + /* + * Everytime we want to increase the capacity of the viewBuffer, we need to make sure that the new capacity + * meets 16 byte alignment. + * If the targetCapacity is larger than the writePosition, we may not necessarily + * want to allocate the targetCapacity to viewBuffer since when it is >={@link #INLINE_SIZE} either way + * we are writing to the dataBuffer. + */ + reallocViewBuffer(Math.max(writePosition, targetCapacity)); + } + + while (index >= getValueCapacity()) { + reallocValidityBuffer(); + } + } + + /** + * Copy a cell value from a particular index in source vector to a particular position in this + * vector. + * TODO: Improve functionality to support copying views. + * Enhance CopyFrom + * + * @param fromIndex position to copy from in source vector + * @param thisIndex position to copy to in this vector + * @param from source vector + */ + @Override + public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { + throw new UnsupportedOperationException("copyFrom is not supported for VariableWidthVector"); + } + + /** + * Same as {@link #copyFrom(int, int, ValueVector)} except that it handles the case when the + * capacity of the vector needs to be expanded before copy. + * TODO: Improve functionality to support copying views. + * Enhance CopyFrom + * @param fromIndex position to copy from in source vector + * @param thisIndex position to copy to in this vector + * @param from source vector + */ + @Override + public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { + throw new UnsupportedOperationException("copyFromSafe is not supported for VariableWidthVector"); + } + + @Override + public ArrowBufPointer getDataPointer(int index) { + return getDataPointer(index, new ArrowBufPointer()); + } + + @Override + public ArrowBufPointer getDataPointer(int index, ArrowBufPointer reuse) { + if (isNull(index)) { + reuse.set(null, 0, 0); + } else { + int length = getValueLength(index); + if (length < INLINE_SIZE) { + int start = index * ELEMENT_SIZE + LENGTH_WIDTH; + reuse.set(viewBuffer, start, length); + } else { + final int bufIndex = + viewBuffer.getInt(((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH); + ArrowBuf dataBuf = dataBuffers.get(bufIndex); + reuse.set(dataBuf, 0, length); + } + + } + return reuse; + } + + @Override + public int hashCode(int index) { + return hashCode(index, null); + } + + @Override + public int hashCode(int index, ArrowBufHasher hasher) { + if (isNull(index)) { + return ArrowBufPointer.NULL_HASH_CODE; + } + final int length = getValueLength(index); + if (length < INLINE_SIZE) { + int start = index * ELEMENT_SIZE + LENGTH_WIDTH; + return ByteFunctionHelpers.hash(hasher, this.getDataBuffer(), start, start + length); + } else { + final int bufIndex = + viewBuffer.getInt(((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH); + final int dataOffset = + viewBuffer.getInt( + ((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH); + ArrowBuf dataBuf = dataBuffers.get(bufIndex); + return ByteFunctionHelpers.hash(hasher, dataBuf, dataOffset, dataOffset + length); + } + } + + /** + * Retrieves the data of a variable-width element at a given index in the vector. + * + *

+ * If the length of the data is greater than {@link #INLINE_SIZE}, the data is stored in an inline buffer. + * The method retrieves the buffer index and data offset from the viewBuffer, and then retrieves the data from the + * corresponding buffer in the dataBuffers list. + *

+ * If the length of the data is less than or equal to {@link #INLINE_SIZE}, the data is stored directly in the + * viewBuffer. + * The method retrieves the data directly from the viewBuffer. + * + * @param index position of the element in the vector + * @return byte array containing the data of the element + */ + protected byte[] getData(int index) { + final int dataLength = getValueLength(index); + byte[] result = new byte[dataLength]; + if (dataLength > INLINE_SIZE) { + // data is in the data buffer + // get buffer index + final int bufferIndex = + viewBuffer.getInt(((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH); + // get data offset + final int dataOffset = + viewBuffer.getInt( + ((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH); + dataBuffers.get(bufferIndex).getBytes(dataOffset, result, 0, dataLength); + } else { + // data is in the view buffer + viewBuffer.getBytes( + (long) index * ELEMENT_SIZE + BUF_INDEX_WIDTH, result, 0, dataLength); + } + return result; + } + + protected void getData(int index, ReusableBuffer buffer) { + final int dataLength = getValueLength(index); + if (dataLength > INLINE_SIZE) { + // data is in the data buffer + // get buffer index + final int bufferIndex = + viewBuffer.getInt(((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH); + // get data offset + final int dataOffset = + viewBuffer.getInt( + ((long) index * ELEMENT_SIZE) + LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH); + ArrowBuf dataBuf = dataBuffers.get(bufferIndex); + buffer.set(dataBuf, dataOffset, dataLength); + } else { + // data is in the value buffer + buffer.set(viewBuffer, ((long) index * ELEMENT_SIZE) + BUF_INDEX_WIDTH, dataLength); + } + } + + @Override + public OUT accept(VectorVisitor visitor, IN value) { + return visitor.visit(this, value); + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java b/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java index 8560ba3a68b04..25c83260ef3ed 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/LargeVarBinaryVector.java @@ -119,6 +119,7 @@ public byte[] get(int index) { * @param index position of element. * @param buffer the buffer to write into. */ + @Override public void read(int index, ReusableBuffer buffer) { final long startOffset = getStartOffset(index); final long dataLength = getEndOffset(index) - startOffset; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java b/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java index df424c87488a0..bc3a1e09aaa79 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/LargeVarCharVector.java @@ -103,6 +103,7 @@ public Types.MinorType getMinorType() { * @param index position of element to get * @return array of bytes for non-null element, null otherwise */ + @Override public byte[] get(int index) { assert index >= 0; if (isSet(index) == 0) { @@ -140,6 +141,7 @@ public Text getObject(int index) { * @param index position of element. * @param buffer the buffer to write into. */ + @Override public void read(int index, ReusableBuffer buffer) { final long startOffset = getStartOffset(index); final long dataLength = getEndOffset(index) - startOffset; @@ -298,7 +300,7 @@ public void validateScalars() { *----------------------------------------------------------------*/ /** - * Construct a TransferPair comprising of this and a target vector of + * Construct a TransferPair comprising this and a target vector of * the same type. * * @param ref name of the target vector diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java index ae465418cf2fd..18032528c86d8 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java @@ -46,6 +46,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp; import org.apache.arrow.vector.types.pojo.ArrowType.Union; import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; +import org.apache.arrow.vector.types.pojo.ArrowType.Utf8View; /** * The buffer layout of vectors for a given type. @@ -173,11 +174,23 @@ public TypeLayout visit(Binary type) { return newVariableWidthTypeLayout(); } + @Override + public TypeLayout visit(ArrowType.BinaryView type) { + // TODO: https://github.com/apache/arrow/issues/40934 + throw new UnsupportedOperationException("BinaryView not supported"); + } + @Override public TypeLayout visit(Utf8 type) { return newVariableWidthTypeLayout(); } + @Override + public TypeLayout visit(Utf8View type) { + // TODO: https://github.com/apache/arrow/issues/40934 + throw new UnsupportedOperationException("Utf8View not supported"); + } + @Override public TypeLayout visit(LargeUtf8 type) { return newLargeVariableWidthTypeLayout(); @@ -347,11 +360,23 @@ public Integer visit(Binary type) { return VARIABLE_WIDTH_BUFFER_COUNT; } + @Override + public Integer visit(ArrowType.BinaryView type) { + // TODO: https://github.com/apache/arrow/issues/40935 + return VARIABLE_WIDTH_BUFFER_COUNT; + } + @Override public Integer visit(Utf8 type) { return VARIABLE_WIDTH_BUFFER_COUNT; } + @Override + public Integer visit(Utf8View type) { + // TODO: https://github.com/apache/arrow/issues/40935 + return VARIABLE_WIDTH_BUFFER_COUNT; + } + @Override public Integer visit(LargeUtf8 type) { return VARIABLE_WIDTH_BUFFER_COUNT; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java b/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java index ab67ebad965aa..82d4feda9a991 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VarBinaryVector.java @@ -120,6 +120,7 @@ public byte[] get(int index) { * @param index position of element. * @param buffer the buffer to write into. */ + @Override public void read(int index, ReusableBuffer buffer) { final int startOffset = getStartOffset(index); final int dataLength = getEndOffset(index) - startOffset; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java b/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java index c6d5a7090bc6f..fde9459e60084 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VarCharVector.java @@ -137,6 +137,7 @@ public Text getObject(int index) { * @param index position of element. * @param buffer the buffer to write into. */ + @Override public void read(int index, ReusableBuffer buffer) { final int startOffset = getStartOffset(index); final int dataLength = getEndOffset(index) - startOffset; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthFieldVector.java b/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthFieldVector.java new file mode 100644 index 0000000000000..58b6940a81a14 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/VariableWidthFieldVector.java @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +import java.nio.ByteBuffer; + +import org.apache.arrow.memory.ReusableBuffer; + +/** +* A base interface for common functionalities in variable width vectors. +*/ +public interface VariableWidthFieldVector extends VariableWidthVector, FieldVector, VectorDefinitionSetter { + + /** + * Set the variable length element at the specified index to the supplied byte array. + * + * @param index position of the element to set + * @param value array of bytes with data + */ + void set(int index, byte[] value); + + /** + * Set the variable length element at the specified index to the supplied byte array. + * + * @param index position of the element to set + * @param value array of bytes with data + * @param start start position in the array + * @param length length of the data to write + */ + void set(int index, byte[] value, int start, int length); + + /** + * Set the variable length element at the specified index to the supplied ByteBuffer. + * + * @param index position of the element to set + * @param value ByteBuffer with data + * @param start start position in the ByteBuffer + * @param length length of the data to write + */ + void set(int index, ByteBuffer value, int start, int length); + + /** + * Set the variable length element at the specified index to the supplied byte array, and it + * handles the case where index and length of a new element are beyond the existing capacity of the + * vector. + * + * @param index position of the element to set + * @param value array of bytes to write + */ + void setSafe(int index, byte[] value); + + /** + * Set the variable length element at the specified index to the supplied byte array, and it + * handles the case where index and length of a new element are beyond the existing capacity. + * + * @param index position of the element to set + * @param value array of bytes with data + * @param start start position in the array + * @param length length of the data to write + */ + void setSafe(int index, byte[] value, int start, int length); + + /** + * Set the variable length element at the specified index to the supplied ByteBuffer, and it + * handles the case where index and length of a new element are beyond the existing capacity. + * + * @param index position of the element to set + * @param value ByteBuffer with data + * @param start start position in the ByteBuffer + * @param length length of the data to write + */ + void setSafe(int index, ByteBuffer value, int start, int length); + + /** + * Get the variable length element at the specified index. + * + * @param index position of the element to get + * @return byte array with the data + */ + byte[] get(int index); + + /** + * Get the variable length element at the specified index using a ReusableBuffer. + * + * @param index position of the element to get + * @param buffer ReusableBuffer to write the data to + */ + void read(int index, ReusableBuffer buffer); + + /** + * Get the index of the last non-null element in the vector. + * + * @return index of the last non-null element + */ + int getLastSet(); + + /** + * Set the index of the last non-null element in the vector. + * + * @param value desired index of last non-null element + */ + void setLastSet(int value); + + /** + * Get the variable length element at specified index as Text. + * + * @param index position of an element to get + * @return greater than length 0 for a non-null element, 0 otherwise + */ + int getValueLength(int index); + + /** + * Create holes in the vector upto the given index (exclusive). + * Holes will be created from the current last-set position in + * the vector. + * + * @param index target index + */ + void fillEmpties(int index); + + /** + * Sets the value length for an element. + * + * @param index position of the element to set + * @param length length of the element + */ + void setValueLengthSafe(int index, int length); +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java new file mode 100644 index 0000000000000..393df96b2969e --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java @@ -0,0 +1,229 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.ReusableBuffer; +import org.apache.arrow.vector.complex.impl.ViewVarBinaryReaderImpl; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.holders.NullableViewVarBinaryHolder; +import org.apache.arrow.vector.holders.ViewVarBinaryHolder; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.TransferPair; + +/** + * ViewVarBinaryVector implements a variable width view vector of binary values which could be NULL. A + * validity buffer (bit vector) is maintained to track which elements in the vector are null. + */ +public final class ViewVarBinaryVector extends BaseVariableWidthViewVector { + + /** + * Instantiate a ViewVarBinaryVector. This doesn't allocate any memory for the data in vector. + * + * @param name name of the vector + * @param allocator allocator for memory management. + */ + public ViewVarBinaryVector(String name, BufferAllocator allocator) { + this(name, FieldType.nullable(MinorType.VIEWVARBINARY.getType()), allocator); + } + + /** + * Instantiate a ViewVarBinaryVector. This doesn't allocate any memory for the data in vector. + * + * @param name name of the vector + * @param fieldType type of Field materialized by this vector + * @param allocator allocator for memory management. + */ + public ViewVarBinaryVector(String name, FieldType fieldType, BufferAllocator allocator) { + this(new Field(name, fieldType, null), allocator); + } + + /** + * Instantiate a ViewVarBinaryVector. This doesn't allocate any memory for the data in vector. + * + * @param field field materialized by this vector + * @param allocator allocator for memory management. + */ + public ViewVarBinaryVector(Field field, BufferAllocator allocator) { + super(field, allocator); + } + + @Override + protected FieldReader getReaderImpl() { + return new ViewVarBinaryReaderImpl(ViewVarBinaryVector.this); + } + + /** + * Get a minor type for this vector. The vector holds values belonging to a particular type. + * + * @return {@link org.apache.arrow.vector.types.Types.MinorType} + */ + @Override + public MinorType getMinorType() { + return MinorType.VIEWVARBINARY; + } + + /*----------------------------------------------------------------* + | | + | vector value retrieval methods | + | | + *----------------------------------------------------------------*/ + + /** + * Get the variable length element at specified index as a byte array. + * + * @param index position of an element to get + * @return array of bytes for a non-null element, null otherwise + */ + public byte[] get(int index) { + assert index >= 0; + if (NULL_CHECKING_ENABLED && isSet(index) == 0) { + return null; + } + return getData(index); + } + + /** + * Read the value at the given position to the given output buffer. The caller is responsible for + * checking for nullity first. + * + * @param index position of an element. + * @param buffer the buffer to write into. + */ + @Override + public void read(int index, ReusableBuffer buffer) { + getData(index, buffer); + } + + /** + * Get the variable length element at a specified index as a byte array. + * + * @param index position of an element to get + * @return byte array for a non-null element, null otherwise + */ + @Override + public byte[] getObject(int index) { + return get(index); + } + + /** + * Get the variable length element at specified index and sets the state in provided holder. + * + * @param index position of an element to get + * @param holder data holder to be populated by this function + */ + public void get(int index, NullableViewVarBinaryHolder holder) { + // TODO: https://github.com/apache/arrow/issues/40936 + throw new UnsupportedOperationException("Unsupported operation"); + } + + /*----------------------------------------------------------------* + | | + | vector value setter methods | + | | + *----------------------------------------------------------------*/ + + /** + * Set the variable length element at the specified index to the data buffer supplied in the + * holder. + * + * @param index position of the element to set + * @param holder holder that carries data buffer. + */ + public void set(int index, ViewVarBinaryHolder holder) { + // TODO: https://github.com/apache/arrow/issues/40936 + throw new UnsupportedOperationException("Unsupported operation"); + } + + /** + * Same as {@link #set(int, ViewVarBinaryHolder)} except that it handles the case where index and + * length of a new element are beyond the existing capacity of the vector. + * + * @param index position of the element to set + * @param holder holder that carries data buffer. + */ + public void setSafe(int index, ViewVarBinaryHolder holder) { + // TODO: https://github.com/apache/arrow/issues/40936 + throw new UnsupportedOperationException("Unsupported operation"); + } + + /** + * Set the variable length element at the specified index to the data buffer supplied in the + * holder. + * + * @param index position of the element to set + * @param holder holder that carries data buffer. + */ + public void set(int index, NullableViewVarBinaryHolder holder) { + // TODO: https://github.com/apache/arrow/issues/40936 + throw new UnsupportedOperationException("Unsupported operation"); + } + + /** + * Same as {@link #set(int, NullableViewVarBinaryHolder)} except that it handles the case where index + * and length of a new element are beyond the existing capacity of the vector. + * + * @param index position of the element to set + * @param holder holder that carries data buffer. + */ + public void setSafe(int index, NullableViewVarBinaryHolder holder) { + // TODO: https://github.com/apache/arrow/issues/40936 + throw new UnsupportedOperationException("Unsupported operation"); + } + + /*----------------------------------------------------------------* + | | + | vector transfer | + | | + *----------------------------------------------------------------*/ + + /** + * Construct a TransferPair comprising this and a target vector of the same type. + * + * @param ref name of the target vector + * @param allocator allocator for the target vector + * @return {@link TransferPair} + */ + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator) { + // TODO: https://github.com/apache/arrow/issues/40932 + throw new UnsupportedOperationException("Unsupported operation"); + } + + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + // TODO: https://github.com/apache/arrow/issues/40932 + throw new UnsupportedOperationException("Unsupported operation"); + } + + /** + * Construct a TransferPair with a desired target vector of the same type. + * + * @param to target vector + * @return {@link TransferPair} + */ + @Override + public TransferPair makeTransferPair(ValueVector to) { + // TODO: https://github.com/apache/arrow/issues/40932 + throw new UnsupportedOperationException("Unsupported operation"); + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java new file mode 100644 index 0000000000000..010df02e0bce0 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java @@ -0,0 +1,291 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +import static org.apache.arrow.vector.NullCheckingForGet.NULL_CHECKING_ENABLED; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.ReusableBuffer; +import org.apache.arrow.vector.complex.impl.ViewVarCharReaderImpl; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.holders.NullableViewVarCharHolder; +import org.apache.arrow.vector.holders.ViewVarCharHolder; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.Text; +import org.apache.arrow.vector.util.TransferPair; +import org.apache.arrow.vector.validate.ValidateUtil; + +/** + * ViewVarCharVector implements a view of a variable width vector of VARCHAR + * values which could be NULL. A validity buffer (bit vector) is maintained + * to track which elements in the vector are null. A viewBuffer keeps track + * of all values in the vector, and an external data buffer is kept to keep longer + * strings (>12). + */ +public final class ViewVarCharVector extends BaseVariableWidthViewVector { + + /** + * Instantiate a ViewVarCharVector. This doesn't allocate any memory for + * the data in vector. + * @param name name of the vector + * @param allocator allocator for memory management. + */ + public ViewVarCharVector(String name, BufferAllocator allocator) { + this(name, FieldType.nullable(MinorType.VARCHAR.getType()), allocator); + } + + /** + * Instantiate a ViewVarCharVector. This doesn't allocate any memory for + * the data in vector. + * @param name name of the vector + * @param fieldType type of Field materialized by this vector + * @param allocator allocator for memory management. + */ + public ViewVarCharVector(String name, FieldType fieldType, BufferAllocator allocator) { + this(new Field(name, fieldType, null), allocator); + } + + /** + * Instantiate a ViewVarCharVector. This doesn't allocate any memory for + * the data in vector. + * + * @param field field materialized by this vector + * @param allocator allocator for memory management. + */ + public ViewVarCharVector(Field field, BufferAllocator allocator) { + super(field, allocator); + } + + @Override + protected FieldReader getReaderImpl() { + return new ViewVarCharReaderImpl(ViewVarCharVector.this); + } + + /** + * Get a minor type for this vector. The vector holds values belonging + * to a particular type. + * @return {@link org.apache.arrow.vector.types.Types.MinorType} + */ + @Override + public MinorType getMinorType() { + return MinorType.VIEWVARCHAR; + } + + /*----------------------------------------------------------------* + | | + | vector value retrieval methods | + | | + *----------------------------------------------------------------*/ + + /** + * Get the variable length element at specified index as a byte array. + * + * @param index position of an element to get + * @return array of bytes for a non-null element, null otherwise + */ + public byte[] get(int index) { + assert index >= 0; + if (NULL_CHECKING_ENABLED && isSet(index) == 0) { + return null; + } + return getData(index); + } + + /** + * Get the variable length element at specified index as Text. + * + * @param index position of an element to get + * @return Text object for a non-null element, null otherwise + */ + @Override + public Text getObject(int index) { + assert index >= 0; + if (NULL_CHECKING_ENABLED && isSet(index) == 0) { + return null; + } + + final Text result = new Text(); + read(index, result); + return result; + } + + /** + * Read the value at the given position to the given output buffer. + * The caller is responsible for checking for nullity first. + * + * @param index position of an element. + * @param buffer the buffer to write into. + */ + @Override + public void read(int index, ReusableBuffer buffer) { + getData(index, buffer); + } + + /** + * Get the variable length element at specified index and sets the state + * in provided holder. + * + * @param index position of an element to get + * @param holder data holder to be populated by this function + */ + public void get(int index, NullableViewVarCharHolder holder) { + // TODO: https://github.com/apache/arrow/issues/40937 + throw new UnsupportedOperationException("NullableViewVarCharHolder get operation not supported"); + } + + + /*----------------------------------------------------------------* + | | + | vector value setter methods | + | | + *----------------------------------------------------------------*/ + + + /** + * Set the variable length element at the specified index to the data + * buffer supplied in the holder. + * + * @param index position of the element to set + * @param holder holder that carries data buffer. + */ + public void set(int index, ViewVarCharHolder holder) { + // TODO: https://github.com/apache/arrow/issues/40937 + throw new UnsupportedOperationException("ViewVarCharHolder set operation not supported"); + } + + /** + * Same as {@link #set(int, ViewVarCharHolder)} except that it handles the + * case where index and length of a new element are beyond the existing + * capacity of the vector. + * + * @param index position of the element to set + * @param holder holder that carries data buffer. + */ + public void setSafe(int index, ViewVarCharHolder holder) { + // TODO: https://github.com/apache/arrow/issues/40937 + throw new UnsupportedOperationException("ViewVarCharHolder setSafe operation not supported"); + } + + /** + * Set the variable length element at the specified index to the data + * buffer supplied in the holder. + * + * @param index position of the element to set + * @param holder holder that carries data buffer. + */ + public void set(int index, NullableViewVarCharHolder holder) { + // TODO: https://github.com/apache/arrow/issues/40937 + throw new UnsupportedOperationException("NullableViewVarCharHolder set operation not supported"); + } + + /** + * Same as {@link #set(int, NullableViewVarCharHolder)} except that it handles the + * case where index and length of a new element are beyond the existing + * capacity of the vector. + * + * @param index position of the element to set + * @param holder holder that carries data buffer. + */ + public void setSafe(int index, NullableViewVarCharHolder holder) { + // TODO: https://github.com/apache/arrow/issues/40937 + throw new UnsupportedOperationException("NullableViewVarCharHolder setSafe operation not supported"); + } + + /** + * Set the variable length element at the specified index to the + * content in supplied Text. + * + * @param index position of the element to set + * @param text Text object with data + */ + public void set(int index, Text text) { + set(index, text.getBytes(), 0, (int) text.getLength()); + } + + /** + * Same as {@link #set(int, NullableViewVarCharHolder)} except that it handles the + * case where index and length of a new element are beyond the existing + * capacity of the vector. + * + * @param index position of the element to set. + * @param text Text object with data + */ + public void setSafe(int index, Text text) { + setSafe(index, text.getBytes(), 0, (int) text.getLength()); + } + + @Override + public void validateScalars() { + for (int i = 0; i < getValueCount(); ++i) { + byte[] value = get(i); + if (value != null) { + ValidateUtil.validateOrThrow(Text.validateUTF8NoThrow(value), + "Non-UTF-8 data in VarCharVector at position " + i + "."); + } + } + } + + /*----------------------------------------------------------------* + | | + | vector transfer | + | | + *----------------------------------------------------------------*/ + + /** + * Construct a TransferPair comprising this and a target vector of the same type. + * + * @param ref name of the target vector + * @param allocator allocator for the target vector + * @return {@link TransferPair} (UnsupportedOperationException) + */ + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator) { + // TODO: https://github.com/apache/arrow/issues/40932 + throw new UnsupportedOperationException( + "ViewVarCharVector does not support getTransferPair(String, BufferAllocator)"); + } + + /** + * Construct a TransferPair with a desired target vector of the same type. + * + * @param field The field materialized by this vector. + * @param allocator allocator for the target vector + * @return {@link TransferPair} (UnsupportedOperationException) + */ + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + // TODO: https://github.com/apache/arrow/issues/40932 + throw new UnsupportedOperationException( + "ViewVarCharVector does not support getTransferPair(Field, BufferAllocator)"); + } + + /** + * Construct a TransferPair with a desired target vector of the same type. + * + * @param target the target for the transfer + * @return {@link TransferPair} (UnsupportedOperationException) + */ + @Override + public TransferPair makeTransferPair(ValueVector target) { + // TODO: https://github.com/apache/arrow/issues/40932 + throw new UnsupportedOperationException( + "ViewVarCharVector does not support makeTransferPair(ValueVector)"); + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java index 5323ddda838c8..56220d270fa9b 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java @@ -27,6 +27,7 @@ import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.BaseLargeVariableWidthVector; import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.BaseVariableWidthViewVector; import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.ExtensionTypeVector; import org.apache.arrow.vector.NullVector; @@ -162,6 +163,11 @@ public Boolean visit(BaseLargeVariableWidthVector left, Range range) { return compareBaseLargeVariableWidthVectors(range); } + @Override + public Boolean visit(BaseVariableWidthViewVector left, Range range) { + throw new UnsupportedOperationException("View vectors are not supported."); + } + @Override public Boolean visit(ListVector left, Range range) { if (!validate(left)) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java index 443ee1f96e273..9bbe5c1b8997c 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java @@ -23,6 +23,7 @@ import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.BaseLargeVariableWidthVector; import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.BaseVariableWidthViewVector; import org.apache.arrow.vector.ExtensionTypeVector; import org.apache.arrow.vector.NullVector; import org.apache.arrow.vector.ValueVector; @@ -85,6 +86,11 @@ public Boolean visit(BaseLargeVariableWidthVector left, Void value) { return compareField(left.getField(), right.getField()); } + @Override + public Boolean visit(BaseVariableWidthViewVector left, Void value) { + throw new UnsupportedOperationException("View vectors are not supported."); + } + @Override public Boolean visit(ListVector left, Void value) { return compareField(left.getField(), right.getField()); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java index aee090706b3c8..de88f25e6753d 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/VectorVisitor.java @@ -20,6 +20,7 @@ import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.BaseLargeVariableWidthVector; import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.BaseVariableWidthViewVector; import org.apache.arrow.vector.ExtensionTypeVector; import org.apache.arrow.vector.NullVector; import org.apache.arrow.vector.complex.DenseUnionVector; @@ -42,6 +43,8 @@ public interface VectorVisitor { OUT visit(BaseLargeVariableWidthVector left, IN value); + OUT visit(BaseVariableWidthViewVector left, IN value); + OUT visit(ListVector left, IN value); OUT visit(FixedSizeListVector left, IN value); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java index 0b0e0d66a98f0..89d8441d42aa9 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java @@ -65,6 +65,8 @@ import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.ViewVarBinaryVector; +import org.apache.arrow.vector.ViewVarCharVector; import org.apache.arrow.vector.complex.DenseUnionVector; import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.LargeListVector; @@ -114,10 +116,13 @@ import org.apache.arrow.vector.complex.impl.UnionWriter; import org.apache.arrow.vector.complex.impl.VarBinaryWriterImpl; import org.apache.arrow.vector.complex.impl.VarCharWriterImpl; +import org.apache.arrow.vector.complex.impl.ViewVarBinaryWriterImpl; +import org.apache.arrow.vector.complex.impl.ViewVarCharWriterImpl; import org.apache.arrow.vector.complex.writer.FieldWriter; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeVisitor; import org.apache.arrow.vector.types.pojo.ArrowType.Binary; +import org.apache.arrow.vector.types.pojo.ArrowType.BinaryView; import org.apache.arrow.vector.types.pojo.ArrowType.Bool; import org.apache.arrow.vector.types.pojo.ArrowType.Date; import org.apache.arrow.vector.types.pojo.ArrowType.Decimal; @@ -138,6 +143,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp; import org.apache.arrow.vector.types.pojo.ArrowType.Union; import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; +import org.apache.arrow.vector.types.pojo.ArrowType.Utf8View; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.CallBack; @@ -504,6 +510,20 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { return new VarCharWriterImpl((VarCharVector) vector); } }, + VIEWVARCHAR(Utf8View.INSTANCE) { + @Override + public FieldVector getNewVector( + Field field, + BufferAllocator allocator, + CallBack schemaChangeCallback) { + return new ViewVarCharVector(field, allocator); + } + + @Override + public FieldWriter getNewFieldWriter(ValueVector vector) { + return new ViewVarCharWriterImpl((ViewVarCharVector) vector); + } + }, LARGEVARCHAR(LargeUtf8.INSTANCE) { @Override public FieldVector getNewVector( @@ -546,6 +566,20 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { return new VarBinaryWriterImpl((VarBinaryVector) vector); } }, + VIEWVARBINARY(Binary.INSTANCE) { + @Override + public FieldVector getNewVector( + Field field, + BufferAllocator allocator, + CallBack schemaChangeCallback) { + return new ViewVarBinaryVector(field, allocator); + } + + @Override + public FieldWriter getNewFieldWriter(ValueVector vector) { + return new ViewVarBinaryWriterImpl((ViewVarBinaryVector) vector); + } + }, DECIMAL(null) { @Override public FieldVector getNewVector( @@ -923,6 +957,11 @@ public MinorType visit(Utf8 type) { return MinorType.VARCHAR; } + @Override + public MinorType visit(Utf8View type) { + return MinorType.VIEWVARCHAR; + } + @Override public Types.MinorType visit(LargeUtf8 type) { return MinorType.LARGEVARCHAR; @@ -933,6 +972,11 @@ public MinorType visit(Binary type) { return MinorType.VARBINARY; } + @Override + public MinorType visit(BinaryView type) { + return MinorType.VIEWVARBINARY; + } + @Override public MinorType visit(LargeBinary type) { return MinorType.LARGEVARBINARY; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/DataSizeRoundingUtil.java b/java/vector/src/main/java/org/apache/arrow/vector/util/DataSizeRoundingUtil.java index 3af2c98374070..5f59933975133 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/DataSizeRoundingUtil.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/DataSizeRoundingUtil.java @@ -93,6 +93,10 @@ public static long divideBy8Ceil(long input) { return (input + 7) >>> (long) DIVIDE_BY_8_SHIFT_BITS; } + public static long roundUpToMultipleOf16(long num) { + return (num + 15) & 0xFFFFFFFFFFFFFFF0L; + } + private DataSizeRoundingUtil() { } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/ReusableByteArray.java b/java/vector/src/main/java/org/apache/arrow/vector/util/ReusableByteArray.java index d938cd833a41a..10a195e1e7b0a 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/ReusableByteArray.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/ReusableByteArray.java @@ -64,6 +64,13 @@ public void set(ArrowBuf srcBytes, long start, long len) { length = (int) len; } + @Override + public void set(byte[] srcBytes, long start, long len) { + setCapacity((int) len, false); + System.arraycopy(srcBytes, (int) start, bytes, 0, (int) len); + length = (int) len; + } + @Override public boolean equals(Object o) { if (o == this) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java b/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java index 95e35ce6938c3..ea631c59ce2f2 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/Text.java @@ -235,9 +235,7 @@ public void set(Text other) { * @param len the number of bytes of the new string */ public void set(byte[] utf8, int start, int len) { - setCapacity(len, false); - System.arraycopy(utf8, start, bytes, 0, len); - this.length = len; + super.set(utf8, start, len); } /** diff --git a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java index 068717c7acbc7..def8ef96877ed 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/util/VectorAppender.java @@ -26,6 +26,7 @@ import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.BaseLargeVariableWidthVector; import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.BaseVariableWidthViewVector; import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.BitVectorHelper; import org.apache.arrow.vector.ExtensionTypeVector; @@ -205,6 +206,11 @@ public ValueVector visit(BaseLargeVariableWidthVector deltaVector, Void value) { return targetVector; } + @Override + public ValueVector visit(BaseVariableWidthViewVector left, Void value) { + throw new UnsupportedOperationException("View vectors are not supported."); + } + @Override public ValueVector visit(ListVector deltaVector, Void value) { Preconditions.checkArgument(typeVisitor.equals(deltaVector), diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java index d4abaa1945b94..0a67db0455b41 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java @@ -23,6 +23,7 @@ import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.BaseLargeVariableWidthVector; import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.BaseVariableWidthViewVector; import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.ExtensionTypeVector; import org.apache.arrow.vector.FieldVector; @@ -133,6 +134,11 @@ public Void visit(BaseLargeVariableWidthVector vector, Void value) { return null; } + @Override + public Void visit(BaseVariableWidthViewVector vector, Void value) { + throw new UnsupportedOperationException("View vectors are not supported."); + } + @Override public Void visit(ListVector vector, Void value) { int valueCount = vector.getValueCount(); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java index 6d33be7a0dbac..ddcb658c1a95d 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorDataVisitor.java @@ -23,6 +23,7 @@ import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.BaseLargeVariableWidthVector; import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.BaseVariableWidthViewVector; import org.apache.arrow.vector.ExtensionTypeVector; import org.apache.arrow.vector.NullVector; import org.apache.arrow.vector.ValueVector; @@ -103,6 +104,11 @@ public Void visit(BaseLargeVariableWidthVector vector, Void value) { return null; } + @Override + public Void visit(BaseVariableWidthViewVector vector, Void value) { + throw new UnsupportedOperationException("View vectors are not supported."); + } + @Override public Void visit(ListVector vector, Void value) { validateOffsetBuffer(vector, vector.getValueCount()); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java index 3d1c5a4f27f7c..bbdabdb1226ad 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorTypeVisitor.java @@ -22,6 +22,7 @@ import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.BaseLargeVariableWidthVector; import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.BaseVariableWidthViewVector; import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.DateDayVector; @@ -308,6 +309,11 @@ public Void visit(BaseLargeVariableWidthVector vector, Void value) { return null; } + @Override + public Void visit(BaseVariableWidthViewVector vector, Void value) { + throw new UnsupportedOperationException("View vectors are not supported."); + } + @Override public Void visit(ListVector vector, Void value) { validateVectorCommon(vector, ArrowType.List.class); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java index 7e99b1f90fb61..786a1142a2b0b 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorVisitor.java @@ -23,6 +23,7 @@ import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.BaseLargeVariableWidthVector; import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.BaseVariableWidthViewVector; import org.apache.arrow.vector.ExtensionTypeVector; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.NullVector; @@ -98,6 +99,11 @@ public Void visit(BaseLargeVariableWidthVector left, Void value) { return null; } + @Override + public Void visit(BaseVariableWidthViewVector left, Void value) { + throw new UnsupportedOperationException("View vectors are not supported."); + } + @Override public Void visit(ListVector vector, Void value) { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestUtils.java b/java/vector/src/test/java/org/apache/arrow/vector/TestUtils.java index 7e64dd3864636..be83e573c7c46 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestUtils.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestUtils.java @@ -29,11 +29,21 @@ public static VarCharVector newVarCharVector(String name, BufferAllocator alloca FieldType.nullable(new ArrowType.Utf8()).createNewSingleVector(name, allocator, null); } + public static ViewVarCharVector newViewVarCharVector(String name, BufferAllocator allocator) { + return (ViewVarCharVector) + FieldType.nullable(new ArrowType.Utf8View()).createNewSingleVector(name, allocator, null); + } + public static VarBinaryVector newVarBinaryVector(String name, BufferAllocator allocator) { return (VarBinaryVector) FieldType.nullable(new ArrowType.Binary()).createNewSingleVector(name, allocator, null); } + public static ViewVarBinaryVector newViewVarBinaryVector(String name, BufferAllocator allocator) { + return (ViewVarBinaryVector) + FieldType.nullable(new ArrowType.BinaryView()).createNewSingleVector(name, allocator, null); + } + public static T newVector(Class c, String name, ArrowType type, BufferAllocator allocator) { return c.cast(FieldType.nullable(type).createNewSingleVector(name, allocator, null)); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java new file mode 100644 index 0000000000000..efb5afac91b13 --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java @@ -0,0 +1,1462 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +import static org.apache.arrow.vector.TestUtils.newVector; +import static org.apache.arrow.vector.TestUtils.newViewVarBinaryVector; +import static org.apache.arrow.vector.TestUtils.newViewVarCharVector; +import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; +import java.util.Random; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.memory.util.ArrowBufPointer; +import org.apache.arrow.memory.util.CommonUtil; +import org.apache.arrow.vector.testing.ValueVectorDataPopulator; +import org.apache.arrow.vector.types.Types; +import org.apache.arrow.vector.util.ReusableByteArray; +import org.apache.arrow.vector.util.Text; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + + +public class TestVarCharViewVector { + + // short string (length <= 12) + private static final byte[] STR0 = "0123456".getBytes(StandardCharsets.UTF_8); + // short string (length <= 12) + private static final byte[] STR1 = "012345678912".getBytes(StandardCharsets.UTF_8); + // long string (length > 12) + private static final byte[] STR2 = "0123456789123".getBytes(StandardCharsets.UTF_8); + // long string (length > 12) + private static final byte[] STR3 = "01234567891234567".getBytes(StandardCharsets.UTF_8); + // short string (length <= 12) + private static final byte[] STR4 = "01234567".getBytes(StandardCharsets.UTF_8); + // short string (length <= 12) + private static final byte[] STR5 = "A1234A".getBytes(StandardCharsets.UTF_8); + // short string (length <= 12) + private static final byte[] STR6 = "B1234567B".getBytes(StandardCharsets.UTF_8); + // long string (length > 12) + private static final byte[] STR7 = "K01234567891234567K".getBytes(StandardCharsets.UTF_8); + // long string (length > 12) + private static final byte[] STR8 = "M012345678912345678M".getBytes(StandardCharsets.UTF_8); + private static final String EMPTY_SCHEMA_PATH = ""; + + private BufferAllocator allocator; + + @BeforeEach + public void prepare() { + allocator = new RootAllocator(Integer.MAX_VALUE); + } + + @AfterEach + public void shutdown() { + allocator.close(); + } + + public static void setBytes(int index, byte[] bytes, ViewVarCharVector vector) { + BitVectorHelper.setBit(vector.validityBuffer, index); + vector.setBytes(index, bytes, 0, bytes.length); + } + + @Test + public void testInlineAllocation() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + viewVarCharVector.allocateNew(48, 3); + final int valueCount = 3; + viewVarCharVector.set(0, STR0); + viewVarCharVector.set(1, STR1); + viewVarCharVector.set(2, STR4); + viewVarCharVector.setValueCount(valueCount); + + byte[] view1 = viewVarCharVector.get(0); + byte[] view2 = viewVarCharVector.get(1); + byte[] view3 = viewVarCharVector.get(2); + + assertNotNull(view1); + assertNotNull(view2); + assertNotNull(view3); + + String str1 = new String(STR0, StandardCharsets.UTF_8); + String str2 = new String(STR1, StandardCharsets.UTF_8); + String str3 = new String(STR4, StandardCharsets.UTF_8); + + assertEquals(new String(view1, StandardCharsets.UTF_8), str1); + assertEquals(new String(view2, StandardCharsets.UTF_8), str2); + assertEquals(new String(view3, StandardCharsets.UTF_8), str3); + + assertTrue(viewVarCharVector.dataBuffers.isEmpty()); + + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(0)).getBuffer(), + StandardCharsets.UTF_8), str1); + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(1)).getBuffer(), + StandardCharsets.UTF_8), str2); + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(2)).getBuffer(), + StandardCharsets.UTF_8), str3); + } + } + + @Test + public void testDataBufferBasedAllocationInSameBuffer() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + viewVarCharVector.allocateNew(48, 4); + final int valueCount = 4; + String str4 = generateRandomString(34); + viewVarCharVector.set(0, STR1); + viewVarCharVector.set(1, STR2); + viewVarCharVector.set(2, STR3); + viewVarCharVector.set(3, str4.getBytes(StandardCharsets.UTF_8)); + viewVarCharVector.setValueCount(valueCount); + + byte[] view1 = viewVarCharVector.get(0); + byte[] view2 = viewVarCharVector.get(1); + byte[] view3 = viewVarCharVector.get(2); + byte[] view4 = viewVarCharVector.get(3); + + assertNotNull(view1); + assertNotNull(view2); + assertNotNull(view3); + assertNotNull(view4); + + String str1 = new String(STR1, StandardCharsets.UTF_8); + String str2 = new String(STR2, StandardCharsets.UTF_8); + String str3 = new String(STR3, StandardCharsets.UTF_8); + + assertEquals(new String(view1, StandardCharsets.UTF_8), str1); + assertEquals(new String(view2, StandardCharsets.UTF_8), str2); + assertEquals(new String(view3, StandardCharsets.UTF_8), str3); + assertEquals(new String(view4, StandardCharsets.UTF_8), str4); + + assertEquals(1, viewVarCharVector.dataBuffers.size()); + + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(0)).getBuffer(), + StandardCharsets.UTF_8), str1); + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(1)).getBuffer(), + StandardCharsets.UTF_8), str2); + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(2)).getBuffer(), + StandardCharsets.UTF_8), str3); + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(3)).getBuffer(), + StandardCharsets.UTF_8), str4); + } + } + + @Test + public void testDataBufferBasedAllocationInOtherBuffer() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + viewVarCharVector.allocateNew(48, 4); + final int valueCount = 4; + String str4 = generateRandomString(35); + viewVarCharVector.set(0, STR1); + viewVarCharVector.set(1, STR2); + viewVarCharVector.set(2, STR3); + viewVarCharVector.set(3, str4.getBytes(StandardCharsets.UTF_8)); + viewVarCharVector.setValueCount(valueCount); + + byte[] view1 = viewVarCharVector.get(0); + byte[] view2 = viewVarCharVector.get(1); + byte[] view3 = viewVarCharVector.get(2); + byte[] view4 = viewVarCharVector.get(3); + + assertNotNull(view1); + assertNotNull(view2); + assertNotNull(view3); + assertNotNull(view4); + + String str1 = new String(STR1, StandardCharsets.UTF_8); + String str2 = new String(STR2, StandardCharsets.UTF_8); + String str3 = new String(STR3, StandardCharsets.UTF_8); + + assertEquals(new String(view1, StandardCharsets.UTF_8), str1); + assertEquals(new String(view2, StandardCharsets.UTF_8), str2); + assertEquals(new String(view3, StandardCharsets.UTF_8), str3); + assertEquals(new String(view4, StandardCharsets.UTF_8), str4); + + assertEquals(2, viewVarCharVector.dataBuffers.size()); + + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(0)).getBuffer(), + StandardCharsets.UTF_8), str1); + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(1)).getBuffer(), + StandardCharsets.UTF_8), str2); + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(2)).getBuffer(), + StandardCharsets.UTF_8), str3); + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(3)).getBuffer(), + StandardCharsets.UTF_8), str4); + } + } + + @Test + public void testMixedAllocation() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + viewVarCharVector.allocateNew(128, 6); + final int valueCount = 6; + String str4 = generateRandomString(35); + String str6 = generateRandomString(40); + viewVarCharVector.set(0, STR1); + viewVarCharVector.set(1, STR2); + viewVarCharVector.set(2, STR3); + viewVarCharVector.set(3, str4.getBytes(StandardCharsets.UTF_8)); + viewVarCharVector.set(4, STR1); + viewVarCharVector.set(5, str6.getBytes(StandardCharsets.UTF_8)); + viewVarCharVector.setValueCount(valueCount); + + byte[] view1 = viewVarCharVector.get(0); + byte[] view2 = viewVarCharVector.get(1); + byte[] view3 = viewVarCharVector.get(2); + byte[] view4 = viewVarCharVector.get(3); + byte[] view5 = viewVarCharVector.get(4); + byte[] view6 = viewVarCharVector.get(5); + + assertNotNull(view1); + assertNotNull(view2); + assertNotNull(view3); + assertNotNull(view4); + assertNotNull(view5); + assertNotNull(view6); + + String str1 = new String(STR1, StandardCharsets.UTF_8); + String str2 = new String(STR2, StandardCharsets.UTF_8); + String str3 = new String(STR3, StandardCharsets.UTF_8); + + assertEquals(new String(view1, StandardCharsets.UTF_8), str1); + assertEquals(new String(view2, StandardCharsets.UTF_8), str2); + assertEquals(new String(view3, StandardCharsets.UTF_8), str3); + assertEquals(new String(view4, StandardCharsets.UTF_8), str4); + assertEquals(new String(view5, StandardCharsets.UTF_8), str1); + assertEquals(new String(view6, StandardCharsets.UTF_8), str6); + + assertEquals(1, viewVarCharVector.dataBuffers.size()); + + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(0)).getBuffer(), + StandardCharsets.UTF_8), str1); + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(1)).getBuffer(), + StandardCharsets.UTF_8), str2); + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(2)).getBuffer(), + StandardCharsets.UTF_8), str3); + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(3)).getBuffer(), + StandardCharsets.UTF_8), str4); + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(4)).getBuffer(), + StandardCharsets.UTF_8), str1); + assertEquals(new String(Objects.requireNonNull(viewVarCharVector.getObject(5)).getBuffer(), + StandardCharsets.UTF_8), str6); + } + } + + @Test + public void testAllocationIndexOutOfBounds() { + assertThrows(IndexOutOfBoundsException.class, () -> { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + viewVarCharVector.allocateNew(32, 3); + final int valueCount = 3; + viewVarCharVector.set(0, STR1); + viewVarCharVector.set(1, STR2); + viewVarCharVector.set(2, STR2); + viewVarCharVector.setValueCount(valueCount); + } + }); + } + + @Test + public void testSizeOfViewBufferElements() { + try (final ViewVarCharVector vector = new ViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { + int valueCount = 100; + int currentSize = 0; + vector.setInitialCapacity(valueCount); + vector.allocateNew(); + vector.setValueCount(valueCount); + for (int i = 0; i < valueCount; i++) { + currentSize += i; + vector.setSafe(i, new byte[i]); + } + assertEquals(currentSize, vector.sizeOfViewBufferElements()); + } + } + + @Test + public void testNullableVarType1() { + + // Create a new value vector for 1024 integers. + try (final ViewVarCharVector vector = newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { + vector.allocateNew(1024 * 10, 1024); + + vector.set(0, STR1); + vector.set(1, STR2); + vector.set(2, STR3); + vector.setSafe(3, STR3, 1, STR3.length - 1); + vector.setSafe(4, STR3, 2, STR3.length - 2); + ByteBuffer str3ByteBuffer = ByteBuffer.wrap(STR3); + vector.setSafe(5, str3ByteBuffer, 1, STR3.length - 1); + vector.setSafe(6, str3ByteBuffer, 2, STR3.length - 2); + + // Set with convenience function + Text txt = new Text("foo"); + vector.setSafe(7, txt.getBytes(), 0, (int) txt.getLength()); + + // Check the sample strings. + assertArrayEquals(STR1, vector.get(0)); + assertArrayEquals(STR2, vector.get(1)); + assertArrayEquals(STR3, vector.get(2)); + assertArrayEquals(Arrays.copyOfRange(STR3, 1, STR3.length), vector.get(3)); + assertArrayEquals(Arrays.copyOfRange(STR3, 2, STR3.length), vector.get(4)); + assertArrayEquals(Arrays.copyOfRange(STR3, 1, STR3.length), vector.get(5)); + assertArrayEquals(Arrays.copyOfRange(STR3, 2, STR3.length), vector.get(6)); + + // Check returning a Text object + assertEquals(txt, vector.getObject(7)); + + // Ensure null value throws. + assertNull(vector.get(8)); + } + } + + @Test + public void testGetTextRepeatedly() { + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + ValueVectorDataPopulator.setVector(vector, STR1, STR2); + vector.setValueCount(2); + + /* check the vector output */ + Text text = new Text(); + vector.read(0, text); + assertArrayEquals(STR1, text.getBytes()); + vector.read(1, text); + assertArrayEquals(STR2, text.getBytes()); + } + } + + @Test + public void testNullableVarType2() { + try (final ViewVarBinaryVector vector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + vector.allocateNew(1024 * 10, 1024); + vector.set(0, STR1); + vector.set(1, STR2); + vector.set(2, STR3); + vector.setSafe(3, STR3, 1, STR3.length - 1); + vector.setSafe(4, STR3, 2, STR3.length - 2); + ByteBuffer str3ByteBuffer = ByteBuffer.wrap(STR3); + vector.setSafe(5, str3ByteBuffer, 1, STR3.length - 1); + vector.setSafe(6, str3ByteBuffer, 2, STR3.length - 2); + + // Check the sample strings. + assertArrayEquals(STR1, vector.get(0)); + assertArrayEquals(STR2, vector.get(1)); + assertArrayEquals(STR3, vector.get(2)); + assertArrayEquals(Arrays.copyOfRange(STR3, 1, STR3.length), vector.get(3)); + assertArrayEquals(Arrays.copyOfRange(STR3, 2, STR3.length), vector.get(4)); + assertArrayEquals(Arrays.copyOfRange(STR3, 1, STR3.length), vector.get(5)); + assertArrayEquals(Arrays.copyOfRange(STR3, 2, STR3.length), vector.get(6)); + + // Ensure null value throws. + assertNull(vector.get(7)); + } + } + + @Test + public void testGetBytesRepeatedly() { + try (ViewVarBinaryVector vector = new ViewVarBinaryVector("", allocator)) { + vector.allocateNew(5, 1); + + final String str = "hello world!!!"; + final String str2 = "foo"; + vector.setSafe(0, str.getBytes(StandardCharsets.UTF_8)); + vector.setSafe(1, str2.getBytes(StandardCharsets.UTF_8)); + + // verify results + ReusableByteArray reusableByteArray = new ReusableByteArray(); + vector.read(0, reusableByteArray); + assertArrayEquals( + str.getBytes(StandardCharsets.UTF_8), + Arrays.copyOfRange( + reusableByteArray.getBuffer(), 0, (int) reusableByteArray.getLength())); + byte[] oldBuffer = reusableByteArray.getBuffer(); + + vector.read(1, reusableByteArray); + assertArrayEquals( + str2.getBytes(StandardCharsets.UTF_8), + Arrays.copyOfRange( + reusableByteArray.getBuffer(), 0, (int) reusableByteArray.getLength())); + + // There should not have been any reallocation since the newer value is smaller in length. + assertSame(oldBuffer, reusableByteArray.getBuffer()); + } + } + + @Test + public void testReAllocVariableWidthViewVector() { + try (final ViewVarCharVector vector = newVector(ViewVarCharVector.class, EMPTY_SCHEMA_PATH, + Types.MinorType.VIEWVARCHAR, allocator)) { + final int capacityLimit = 4095; + final int overLimitIndex = 200; + vector.setInitialCapacity(capacityLimit); + vector.allocateNew(); + + int initialCapacity = vector.getValueCapacity(); + assertTrue(initialCapacity >= capacityLimit); + + /* Put values in indexes that fall within the initial allocation */ + vector.setSafe(0, STR1, 0, STR1.length); + vector.setSafe(initialCapacity - 1, STR2, 0, STR2.length); + + /* the set calls above should NOT have triggered a realloc */ + assertEquals(initialCapacity, vector.getValueCapacity()); + + /* Now try to put values in space that falls beyond the initial allocation */ + vector.setSafe(initialCapacity + overLimitIndex, STR3, 0, STR3.length); + + /* Check valueCapacity is more than initial allocation */ + assertTrue(initialCapacity * 2 <= vector.getValueCapacity()); + + assertArrayEquals(STR1, vector.get(0)); + assertArrayEquals(STR2, vector.get(initialCapacity - 1)); + assertArrayEquals(STR3, vector.get(initialCapacity + overLimitIndex)); + + // Set the valueCount to be more than valueCapacity of current allocation. This is possible for ValueVectors + // as we don't call setSafe for null values, but we do call setValueCount when the current batch is processed. + vector.setValueCount(vector.getValueCapacity() + overLimitIndex); + } + } + + @Test + public void testSetSafeWithArrowBufNoExcessAllocs() { + final int numValues = BaseVariableWidthViewVector.INITIAL_VALUE_ALLOCATION * 2; + final byte[] valueBytes = "hello world!!!".getBytes(StandardCharsets.UTF_8); + final int valueBytesLength = valueBytes.length; + final int isSet = 1; + try (final ViewVarCharVector fromVector = + newVector( + ViewVarCharVector.class, + EMPTY_SCHEMA_PATH, + Types.MinorType.VIEWVARCHAR, + allocator); + final ViewVarCharVector toVector = + newVector( + ViewVarCharVector.class, + EMPTY_SCHEMA_PATH, + Types.MinorType.VIEWVARCHAR, + allocator)) { + /* + * Populate the `fromVector` with `numValues` with byte-arrays, each of size `valueBytesLength`. + */ + fromVector.setInitialCapacity(numValues); + fromVector.allocateNew(); + for (int i = 0; i < numValues; ++i) { + fromVector.setSafe(i, valueBytes, 0 /*start*/, valueBytesLength); + } + fromVector.setValueCount(numValues); + ArrowBuf fromDataBuffer = fromVector.getDataBuffer(); + assertTrue(numValues * valueBytesLength <= fromDataBuffer.capacity()); + + /* + * Copy the entries one-by-one from 'fromVector' to 'toVector', but use the setSafe with + * ArrowBuf API (instead of setSafe with byte-array). + */ + toVector.setInitialCapacity(numValues); + toVector.allocateNew(); + for (int i = 0; i < numValues; i++) { + int start = fromVector.getTotalValueLengthUpToIndex(i); + // across variable + // width implementations + int end = fromVector.getTotalValueLengthUpToIndex(i + 1); + toVector.setSafe(i, isSet, start, end, fromDataBuffer); + } + + /* + * Since the 'fromVector' and 'toVector' have the same initial capacity, and were populated + * with the same varchar elements, the allocations and hence, the final capacity should be + * the same. + */ + assertEquals(fromDataBuffer.capacity(), toVector.getDataBuffer().capacity()); + } + } + + @Test + public void testSetLastSetUsage() { + try (final ViewVarCharVector vector = new ViewVarCharVector("myvector", allocator)) { + vector.allocateNew(1024 * 10, 1024); + + setBytes(0, STR1, vector); + setBytes(1, STR2, vector); + setBytes(2, STR3, vector); + setBytes(3, STR4, vector); + + /* Check current lastSet */ + assertEquals(-1, vector.getLastSet()); + + /* Check the vector output */ + assertArrayEquals(STR1, vector.get(0)); + assertArrayEquals(STR2, vector.get(1)); + assertArrayEquals(STR3, vector.get(2)); + assertArrayEquals(STR4, vector.get(3)); + + /* + * If we don't do setLastSe(3) before setValueCount(), then the latter will corrupt + * the value vector by filling in all positions [0,valuecount-1] will empty byte arrays. + * Run the test by commenting on the next line, and we should see incorrect vector output. + */ + vector.setLastSet(3); + vector.setValueCount(20); + + /* Check current lastSet */ + assertEquals(19, vector.getLastSet()); + + /* Check the vector output again */ + assertArrayEquals(STR1, vector.get(0)); + assertArrayEquals(STR2, vector.get(1)); + assertArrayEquals(STR3, vector.get(2)); + assertArrayEquals(STR4, vector.get(3)); + + assertEquals(0, vector.getValueLength(4)); + assertEquals(0, vector.getValueLength(5)); + assertEquals(0, vector.getValueLength(6)); + assertEquals(0, vector.getValueLength(7)); + assertEquals(0, vector.getValueLength(8)); + assertEquals(0, vector.getValueLength(9)); + assertEquals(0, vector.getValueLength(10)); + assertEquals(0, vector.getValueLength(11)); + assertEquals(0, vector.getValueLength(12)); + assertEquals(0, vector.getValueLength(13)); + assertEquals(0, vector.getValueLength(14)); + assertEquals(0, vector.getValueLength(15)); + assertEquals(0, vector.getValueLength(16)); + assertEquals(0, vector.getValueLength(17)); + assertEquals(0, vector.getValueLength(18)); + assertEquals(0, vector.getValueLength(19)); + } + } + + @Test + public void testFillEmptiesUsage() { + try (final ViewVarCharVector vector = new ViewVarCharVector("myvector", allocator)) { + vector.allocateNew(1024 * 10, 1024); + + setBytes(0, STR1, vector); + setBytes(1, STR2, vector); + setBytes(2, STR3, vector); + setBytes(3, STR4, vector); + + /* Check current lastSet */ + assertEquals(-1, vector.getLastSet()); + + /* Check the vector output */ + assertArrayEquals(STR1, vector.get(0)); + assertArrayEquals(STR2, vector.get(1)); + assertArrayEquals(STR3, vector.get(2)); + assertArrayEquals(STR4, vector.get(3)); + + vector.setLastSet(3); + /* fill empty byte arrays from index [4, 9] */ + vector.fillEmpties(10); + + /* Check current lastSet */ + assertEquals(9, vector.getLastSet()); + + /* Check the vector output */ + assertArrayEquals(STR1, vector.get(0)); + assertArrayEquals(STR2, vector.get(1)); + assertArrayEquals(STR3, vector.get(2)); + assertArrayEquals(STR4, vector.get(3)); + assertEquals(0, vector.getValueLength(4)); + assertEquals(0, vector.getValueLength(5)); + assertEquals(0, vector.getValueLength(6)); + assertEquals(0, vector.getValueLength(7)); + assertEquals(0, vector.getValueLength(8)); + assertEquals(0, vector.getValueLength(9)); + + setBytes(10, STR1, vector); + setBytes(11, STR2, vector); + + vector.setLastSet(11); + /* fill empty byte arrays from index [12, 14] */ + vector.setValueCount(15); + + /* Check current lastSet */ + assertEquals(14, vector.getLastSet()); + + /* Check the vector output */ + assertArrayEquals(STR1, vector.get(0)); + assertArrayEquals(STR2, vector.get(1)); + assertArrayEquals(STR3, vector.get(2)); + assertArrayEquals(STR4, vector.get(3)); + assertEquals(0, vector.getValueLength(4)); + assertEquals(0, vector.getValueLength(5)); + assertEquals(0, vector.getValueLength(6)); + assertEquals(0, vector.getValueLength(7)); + assertEquals(0, vector.getValueLength(8)); + assertEquals(0, vector.getValueLength(9)); + assertArrayEquals(STR1, vector.get(10)); + assertArrayEquals(STR2, vector.get(11)); + assertEquals(0, vector.getValueLength(12)); + assertEquals(0, vector.getValueLength(13)); + assertEquals(0, vector.getValueLength(14)); + } + } + + @Test + public void testGetBufferAddress1() { + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + + setVector(vector, STR1, STR2, STR3, STR4); + vector.setValueCount(15); + + /* check the vector output */ + assertArrayEquals(STR1, vector.get(0)); + assertArrayEquals(STR2, vector.get(1)); + assertArrayEquals(STR3, vector.get(2)); + assertArrayEquals(STR4, vector.get(3)); + + List buffers = vector.getFieldBuffers(); + long bitAddress = vector.getValidityBufferAddress(); + long dataAddress = vector.getDataBufferAddress(); + + assertEquals(3, buffers.size()); + assertEquals(bitAddress, buffers.get(0).memoryAddress()); + assertEquals(dataAddress, buffers.get(1).memoryAddress()); + } + } + + @Test + public void testSetInitialCapacityInViews() { + try (final ViewVarCharVector vector = new ViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { + + /* use the default 16 data bytes on average per element */ + final int viewSize = BaseVariableWidthViewVector.ELEMENT_SIZE; + int defaultCapacity = BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION / viewSize; + vector.setInitialCapacity(defaultCapacity); + vector.allocateNew(); + assertEquals(defaultCapacity, vector.getValueCapacity()); + assertEquals(CommonUtil.nextPowerOfTwo(defaultCapacity * viewSize), vector.getDataBuffer().capacity()); + + double density = 4.0; + final int valueCount = 5; + vector.setInitialCapacity(valueCount, density); + vector.allocateNew(); + assertEquals(8, vector.getValueCapacity()); + assertEquals(128, vector.getDataBuffer().capacity()); + int initialDataBufferSize = (int) (valueCount * density); + // making sure a databuffer is allocated + vector.set(4, "01234567890123456".getBytes(StandardCharsets.UTF_8)); + assertEquals(vector.dataBuffers.size(), 1); + ArrowBuf dataBuf = vector.dataBuffers.get(0); + try (ArrowBuf tempBuf = vector.allocator.buffer(initialDataBufferSize)) { + // replicating a new buffer allocation process when a new buffer is added to the + // data buffer when inserting an element with length > 12 + assertEquals(tempBuf.capacity(), dataBuf.capacity()); + } + } + } + + @Test + public void testGetPointerVariableWidthViews() { + final String[] sampleData = new String[]{ + "abc", "1234567890123", "def", null, "hello world java", "aaaaa", "world", "2019", null, "0717"}; + + try (ViewVarCharVector vec1 = new ViewVarCharVector("vec1", allocator); + ViewVarCharVector vec2 = new ViewVarCharVector("vec2", allocator)) { + + vec1.allocateNew((long) sampleData.length * 16, sampleData.length); + vec2.allocateNew((long) sampleData.length * 16, sampleData.length); + + for (int i = 0; i < sampleData.length; i++) { + String str = sampleData[i]; + if (str != null) { + vec1.set(i, sampleData[i].getBytes(StandardCharsets.UTF_8)); + vec2.set(i, sampleData[i].getBytes(StandardCharsets.UTF_8)); + } else { + vec1.setNull(i); + + vec2.setNull(i); + } + } + + ArrowBufPointer ptr1 = new ArrowBufPointer(); + ArrowBufPointer ptr2 = new ArrowBufPointer(); + + for (int i = 0; i < sampleData.length; i++) { + vec1.getDataPointer(i, ptr1); + vec2.getDataPointer(i, ptr2); + + assertTrue(ptr1.equals(ptr2)); + assertTrue(ptr2.equals(ptr2)); + } + } + } + + @Test + public void testGetNullFromVariableWidthViewVector() { + try (final ViewVarCharVector varCharViewVector = new ViewVarCharVector("viewvarcharvec", allocator); + final ViewVarBinaryVector varBinaryViewVector = new ViewVarBinaryVector("viewvarbinary", allocator)) { + varCharViewVector.allocateNew(16, 1); + varBinaryViewVector.allocateNew(16, 1); + + varCharViewVector.setNull(0); + varBinaryViewVector.setNull(0); + + assertNull(varCharViewVector.get(0)); + assertNull(varBinaryViewVector.get(0)); + } + } + + @Test + public void testVariableWidthViewVectorNullHashCode() { + try (ViewVarCharVector viewVarChar = new ViewVarCharVector("view var char vector", allocator)) { + viewVarChar.allocateNew(100, 1); + viewVarChar.setValueCount(1); + + viewVarChar.set(0, "abc".getBytes(StandardCharsets.UTF_8)); + viewVarChar.setNull(0); + + assertEquals(0, viewVarChar.hashCode(0)); + } + } + + @Test + public void testUnloadVariableWidthViewVector() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("view var char", allocator)) { + viewVarCharVector.allocateNew(16, 2); + viewVarCharVector.setValueCount(2); + viewVarCharVector.set(0, "abcd".getBytes(StandardCharsets.UTF_8)); + + List bufs = viewVarCharVector.getFieldBuffers(); + assertEquals(2, bufs.size()); + + ArrowBuf viewBuf = bufs.get(1); + + assertEquals(32, viewBuf.writerIndex()); + final String longString = "012345678901234"; + viewVarCharVector.set(1, longString.getBytes(StandardCharsets.UTF_8)); + + bufs = viewVarCharVector.getFieldBuffers(); + assertEquals(3, bufs.size()); + + ArrowBuf referenceBuf = bufs.get(2); + assertEquals(longString.length(), referenceBuf.writerIndex()); + } + } + + @Test + public void testUnSupportedOffSet() { + // offset is not a feature required in ViewVarCharVector + assertThrows(UnsupportedOperationException.class, () -> { + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + + setVector(vector, STR1, STR2); + vector.setValueCount(2); + + /* check the vector output */ + assertArrayEquals(STR1, vector.get(0)); + assertArrayEquals(STR2, vector.get(1)); + + vector.getOffsetBuffer(); + } + }); + } + + private void validateViewBuffer(int index, ViewVarCharVector vector, byte[] expectedData, + int expectedBufId, int expectedOffSet) { + final ArrowBuf viewBuffer = vector.viewBuffer; + int writePosition = index * BaseVariableWidthViewVector.ELEMENT_SIZE; + final int prefixBufWidth = BaseVariableWidthViewVector.PREFIX_WIDTH; + final int lengthBufWidth = BaseVariableWidthViewVector.LENGTH_WIDTH; + int length = viewBuffer.getInt(writePosition); + + // validate length of the view + assertEquals(expectedData.length, length); + + byte[] prefixBytes = new byte[prefixBufWidth]; + viewBuffer.getBytes(writePosition + lengthBufWidth, prefixBytes); + + // validate the prefix + byte[] expectedPrefixBytes = new byte[prefixBufWidth]; + System.arraycopy(expectedData, 0, expectedPrefixBytes, 0, prefixBufWidth); + assertArrayEquals(expectedPrefixBytes, prefixBytes); + + if (length > 12) { + /// validate bufId + int bufId = viewBuffer.getInt(writePosition + lengthBufWidth + prefixBufWidth); + assertEquals(expectedBufId, bufId); + // validate offset + int offset = viewBuffer.getInt(writePosition + + lengthBufWidth + + prefixBufWidth + + BaseVariableWidthViewVector.BUF_INDEX_WIDTH); + assertEquals(expectedOffSet, offset); + } + // validate retrieved data + assertArrayEquals(expectedData, vector.get(index)); + } + + @Test + public void testOverwriteShortFromLongString() { + /*NA: not applicable */ + // Overwriting at the beginning of the buffer. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 1); + // set short string + vector.set(0, STR0); + vector.setValueCount(1); + assertEquals(0, vector.dataBuffers.size()); + assertArrayEquals(STR0, vector.get(0)); + + validateViewBuffer(0, vector, STR0, /*NA*/-1, /*NA*/-1); + + // set long string + vector.set(0, STR3); + vector.setValueCount(1); + assertEquals(1, vector.dataBuffers.size()); + assertArrayEquals(STR3, vector.get(0)); + + validateViewBuffer(0, vector, STR3, 0, 0); + } + + // Overwriting in the middle of the buffer when existing buffers are all shorts. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(48, 3); + // set short string 1 + vector.set(0, STR0); + // set short string 2 + vector.set(1, STR5); + // set short string 3 + vector.set(2, STR6); + vector.setValueCount(3); + + // overwrite index 1 with a long string + vector.set(1, STR7); + vector.setValueCount(3); + + validateViewBuffer(0, vector, STR0, /*NA*/-1, /*NA*/-1); + validateViewBuffer(1, vector, STR7, 0, 0); + validateViewBuffer(2, vector, STR6, /*NA*/-1, /*NA*/-1); + } + + // Overwriting in the middle of the buffer with a mix of short and long strings. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(80, 5); + // set short string 1 + vector.set(0, STR0); + // set long string 1 + vector.set(1, STR3); + // set short string 2 + vector.set(2, STR5); + // set short string 3 + vector.set(3, STR6); + // set long string 2 + vector.set(4, STR7); + vector.setValueCount(5); + + // overwrite index 2 with a long string + vector.set(2, STR8); + vector.setValueCount(5); + + validateViewBuffer(0, vector, STR0, /*NA*/-1, /*NA*/-1); + validateViewBuffer(1, vector, STR3, 0, 0); + // Since we did overwrite index 2 with STR8, and as we are using append-only approach, + // it will be appended to the data buffer. + // Thus, it will be stored in the dataBuffer in order i.e. [STR3, STR7, STR8]. + validateViewBuffer(2, vector, STR8, 0, STR3.length + STR7.length); + validateViewBuffer(3, vector, STR6, /*NA*/-1, /*NA*/-1); + validateViewBuffer(4, vector, STR7, 0, STR3.length); + } + + // Overwriting in the middle of the buffer with a mix of short and long strings. + // Here the short string is overwritten with a long string, and its length is larger than + // the remaining capacity of the existing data buffer. + // This would allocate a new buffer in the data buffers. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(80, 5); + // set short string 1 + vector.set(0, STR0); + // set long string 1 + vector.set(1, STR3); + // set short string 2 + vector.set(2, STR5); + // set short string 3 + vector.set(3, STR6); + // set long string 2 + vector.set(4, STR7); + + vector.setValueCount(5); + + // overwrite index 2 with a long string + String longString = generateRandomString(128); + byte[] longStringBytes = longString.getBytes(StandardCharsets.UTF_8); + // since the append-only approach is used and the remaining capacity + // is not enough to store the new string; a new buffer will be allocated. + final ArrowBuf currentDataBuf = vector.dataBuffers.get(0); + final long remainingCapacity = currentDataBuf.capacity() - currentDataBuf.writerIndex(); + assertTrue(remainingCapacity < longStringBytes.length); + vector.set(2, longStringBytes); + vector.setValueCount(5); + + validateViewBuffer(0, vector, STR0, /*NA*/-1, /*NA*/-1); + validateViewBuffer(1, vector, STR3, 0, 0); + // overwritten long string will be stored in the new data buffer. + validateViewBuffer(2, vector, longStringBytes, 1, 0); + validateViewBuffer(3, vector, STR6, /*NA*/-1, /*NA*/-1); + validateViewBuffer(4, vector, STR7, 0, STR3.length); + } + } + + @Test + public void testOverwriteLongFromShortString() { + // Overwriting at the beginning of the buffer. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 1); + // set short string + vector.set(0, STR3); + vector.setValueCount(1); + // set long string + vector.set(0, STR0); + vector.setValueCount(1); + + validateViewBuffer(0, vector, STR0, /*NA*/-1, /*NA*/-1); + } + + // Overwriting in the middle of the buffer when existing buffers are all longs. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(48, 3); + // set long string 1 + vector.set(0, STR3); + // set long string 2 + vector.set(1, STR8); + // set long string 3 + vector.set(2, STR7); + vector.setValueCount(3); + + // overwrite index 1 with a short string + vector.set(1, STR6); + vector.setValueCount(3); + + validateViewBuffer(0, vector, STR3, 0, 0); + validateViewBuffer(1, vector, STR6, /*NA*/-1, /*NA*/-1); + // since the append-only approach is used, + // STR8 will still be in the first data buffer in dataBuffers. + validateViewBuffer(2, vector, STR7, 0, STR3.length + STR8.length); + } + + // Overwriting in the middle of the buffer with a mix of short and long strings. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(80, 5); + // set long string 1 + vector.set(0, STR3); + // set short string 1 + vector.set(1, STR5); + // set long string 2 + vector.set(2, STR7); + // set long string 3 + vector.set(3, STR8); + // set short string 2 + vector.set(4, STR6); + vector.setValueCount(5); + + // overwrite index 2 with a short string + vector.set(2, STR0); + vector.setValueCount(5); + + validateViewBuffer(0, vector, STR3, 0, 0); + validateViewBuffer(1, vector, STR5, /*NA*/-1, /*NA*/-1); + validateViewBuffer(2, vector, STR0, /*NA*/-1, /*NA*/-1); + // since the append-only approach is used, + // STR7 will still be in the first data buffer in dataBuffers. + validateViewBuffer(3, vector, STR8, 0, STR3.length + STR7.length); + validateViewBuffer(4, vector, STR6, /*NA*/-1, /*NA*/-1); + } + } + + @Test + public void testOverwriteLongFromAShorterLongString() { + // Overwriting at the beginning of the buffer. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 1); + // set long string + vector.set(0, STR7); + vector.setValueCount(1); + // set shorter long string, since append-only approach is used and the remaining capacity + // is not enough to store the new string; a new buffer will be allocated. + final ArrowBuf currentDataBuf = vector.dataBuffers.get(0); + final long remainingCapacity = currentDataBuf.capacity() - currentDataBuf.writerIndex(); + assertTrue(remainingCapacity < STR3.length); + // set shorter long string + vector.set(0, STR3); + vector.setValueCount(1); + + validateViewBuffer(0, vector, STR3, 1, 0); + } + + // Overwriting in the middle of the buffer when existing buffers are all longs. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + // extra memory is allocated + vector.allocateNew(128, 3); + // set long string 1 + vector.set(0, STR3); + // set long string 2 + vector.set(1, STR8); + // set long string 3 + vector.set(2, STR7); + vector.setValueCount(3); + + // overwrite index 1 with a shorter long string + // Since append-only approach is used + // and the remaining capacity is enough to store in the same data buffer.; + final ArrowBuf currentDataBuf = vector.dataBuffers.get(0); + final long remainingCapacity = currentDataBuf.capacity() - currentDataBuf.writerIndex(); + assertTrue(remainingCapacity > STR2.length); + vector.set(1, STR2); + vector.setValueCount(3); + + validateViewBuffer(0, vector, STR3, 0, 0); + // since the append-only approach is used, + // STR8 will still be in the first data buffer in dataBuffers. + validateViewBuffer(1, vector, STR2, 0, STR3.length + STR8.length + STR7.length); + validateViewBuffer(2, vector, STR7, 0, STR3.length + STR8.length); + } + + // Overwriting in the middle of the buffer with a mix of short and long strings. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(128, 5); + // set long string 1 + vector.set(0, STR3); + // set short string 1 + vector.set(1, STR5); + // set long string 2 + vector.set(2, STR7); + // set long string 3 + vector.set(3, STR8); + // set short string 2 + vector.set(4, STR6); + vector.setValueCount(5); + + // overwrite index 2 with a shorter long string + // Since append-only approach is used + // and the remaining capacity is enough to store in the same data buffer.; + final ArrowBuf currentDataBuf = vector.dataBuffers.get(0); + final long remainingCapacity = currentDataBuf.capacity() - currentDataBuf.writerIndex(); + assertTrue(remainingCapacity > STR2.length); + vector.set(2, STR2); + vector.setValueCount(5); + + validateViewBuffer(0, vector, STR3, 0, 0); + validateViewBuffer(1, vector, STR5, /*NA*/-1, /*NA*/-1); + // since the append-only approach is used, + // STR7 will still be in the first data buffer in dataBuffers. + validateViewBuffer(2, vector, STR2, 0, STR3.length + + STR7.length + STR8.length); + validateViewBuffer(3, vector, STR8, 0, STR3.length + STR7.length); + validateViewBuffer(4, vector, STR6, /*NA*/-1, /*NA*/-1); + } + } + + @Test + public void testOverwriteLongFromALongerLongString() { + // Overwriting at the beginning of the buffer. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 1); + // set long string + vector.set(0, STR3); + vector.setValueCount(1); + // set longer long string, since append-only approach is used and the remaining capacity + // is not enough to store the new string; a new buffer will be allocated. + final ArrowBuf currentDataBuf = vector.dataBuffers.get(0); + final long remainingCapacity = currentDataBuf.capacity() - currentDataBuf.writerIndex(); + assertTrue(remainingCapacity < STR7.length); + // set longer long string + vector.set(0, STR7); + vector.setValueCount(1); + + validateViewBuffer(0, vector, STR7, 1, 0); + } + + // Overwriting in the middle of the buffer when existing buffers are all longs. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + // extra memory is allocated + vector.allocateNew(48, 3); + // set long string 1 + vector.set(0, STR3); + // set long string 2 + vector.set(1, STR8); + // set long string 3 + vector.set(2, STR7); + vector.setValueCount(3); + + // overwrite index 1 with a longer long string + // the remaining capacity is not enough to store in the same data buffer + // since a new buffer is added to the dataBuffers + final ArrowBuf currentDataBuf = vector.dataBuffers.get(0); + final long remainingCapacity = currentDataBuf.capacity() - currentDataBuf.writerIndex(); + String longerString = generateRandomString(35); + byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8); + assertTrue(remainingCapacity < longerStringBytes.length); + + vector.set(1, longerStringBytes); + vector.setValueCount(3); + + validateViewBuffer(0, vector, STR3, 0, 0); + validateViewBuffer(1, vector, longerStringBytes, 1, 0); + // since the append-only approach is used, + // STR8 will still be in the first data buffer in dataBuffers. + validateViewBuffer(2, vector, STR7, 0, STR3.length + STR8.length); + } + + // Overwriting in the middle of the buffer with a mix of short and long strings. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(128, 5); + // set long string 1 + vector.set(0, STR3); + // set short string 1 + vector.set(1, STR5); + // set long string 2 + vector.set(2, STR7); + // set long string 3 + vector.set(3, STR2); + // set short string 2 + vector.set(4, STR6); + vector.setValueCount(5); + + // overwrite index 2 with a longer long string + // the remaining capacity is enough to store in the same data buffer + final ArrowBuf currentDataBuf = vector.dataBuffers.get(0); + final long remainingCapacity = currentDataBuf.capacity() - currentDataBuf.writerIndex(); + String longerString = generateRandomString(24); + byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8); + assertTrue(remainingCapacity > longerStringBytes.length); + + vector.set(2, longerStringBytes); + vector.setValueCount(5); + + validateViewBuffer(0, vector, STR3, 0, 0); + validateViewBuffer(1, vector, STR5, /*NA*/-1, /*NA*/-1); + // since the append-only approach is used, + // STR7 will still be in the first data buffer in dataBuffers. + validateViewBuffer(2, vector, longerStringBytes, 0, STR3.length + STR7.length + STR2.length); + validateViewBuffer(3, vector, STR2, 0, STR3.length + STR7.length); + validateViewBuffer(4, vector, STR6, /*NA*/-1, /*NA*/-1); + } + } + + @Test + public void testSafeOverwriteShortFromLongString() { + /*NA: not applicable */ + // Overwriting at the beginning of the buffer. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 1); + // set short string + vector.setSafe(0, STR0); + vector.setValueCount(1); + assertEquals(0, vector.dataBuffers.size()); + assertArrayEquals(STR0, vector.get(0)); + + // set long string + vector.setSafe(0, STR3); + vector.setValueCount(1); + assertEquals(1, vector.dataBuffers.size()); + assertArrayEquals(STR3, vector.get(0)); + + } + + // Overwriting in the middle of the buffer when existing buffers are all shorts. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 3); + // set short string 1 + vector.setSafe(0, STR0); + // set short string 2 + vector.setSafe(1, STR5); + // set short string 3 + vector.setSafe(2, STR6); + vector.setValueCount(3); + + // overwrite index 1 with a long string + vector.setSafe(1, STR7); + vector.setValueCount(3); + + assertArrayEquals(STR0, vector.get(0)); + assertArrayEquals(STR7, vector.get(1)); + assertArrayEquals(STR6, vector.get(2)); + } + + // Overwriting in the middle of the buffer with a mix of short and long strings. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 5); + // set short string 1 + vector.setSafe(0, STR0); + // set long string 1 + vector.setSafe(1, STR3); + // set short string 2 + vector.setSafe(2, STR5); + // set short string 3 + vector.setSafe(3, STR6); + // set long string 2 + vector.setSafe(4, STR7); + vector.setValueCount(5); + + // overwrite index 2 with a long string + vector.setSafe(2, STR8); + vector.setValueCount(5); + + assertArrayEquals(STR0, vector.get(0)); + assertArrayEquals(STR3, vector.get(1)); + assertArrayEquals(STR8, vector.get(2)); + assertArrayEquals(STR6, vector.get(3)); + assertArrayEquals(STR7, vector.get(4)); + } + + // Overwriting in the middle of the buffer with a mix of short and long strings. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 5); + // set short string 1 + vector.setSafe(0, STR0); + // set long string 1 + vector.setSafe(1, STR3); + // set short string 2 + vector.setSafe(2, STR5); + // set short string 3 + vector.setSafe(3, STR6); + // set long string 2 + vector.setSafe(4, STR7); + + vector.setValueCount(5); + + // overwrite index 2 with a long string + String longString = generateRandomString(128); + byte[] longStringBytes = longString.getBytes(StandardCharsets.UTF_8); + + vector.setSafe(2, longStringBytes); + vector.setValueCount(5); + + assertArrayEquals(STR0, vector.get(0)); + assertArrayEquals(STR3, vector.get(1)); + assertArrayEquals(longStringBytes, vector.get(2)); + assertArrayEquals(STR6, vector.get(3)); + assertArrayEquals(STR7, vector.get(4)); + } + } + + @Test + public void testSafeOverwriteLongFromShortString() { + // Overwriting at the beginning of the buffer. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 1); + // set short string + vector.setSafe(0, STR3); + vector.setValueCount(1); + // set long string + vector.setSafe(0, STR0); + vector.setValueCount(1); + + assertArrayEquals(STR0, vector.get(0)); + } + + // Overwriting in the middle of the buffer when existing buffers are all longs. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 3); + // set long string 1 + vector.setSafe(0, STR3); + // set long string 2 + vector.setSafe(1, STR8); + // set long string 3 + vector.setSafe(2, STR7); + vector.setValueCount(3); + + // overwrite index 1 with a short string + vector.setSafe(1, STR6); + vector.setValueCount(3); + + assertArrayEquals(STR3, vector.get(0)); + assertArrayEquals(STR6, vector.get(1)); + assertArrayEquals(STR7, vector.get(2)); + } + + // Overwriting in the middle of the buffer with a mix of short and long strings. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 5); + // set long string 1 + vector.setSafe(0, STR3); + // set short string 1 + vector.setSafe(1, STR5); + // set long string 2 + vector.setSafe(2, STR7); + // set long string 3 + vector.setSafe(3, STR8); + // set short string 2 + vector.setSafe(4, STR6); + vector.setValueCount(5); + + // overwrite index 2 with a short string + vector.setSafe(2, STR0); + vector.setValueCount(5); + + assertArrayEquals(STR3, vector.get(0)); + assertArrayEquals(STR5, vector.get(1)); + assertArrayEquals(STR0, vector.get(2)); + assertArrayEquals(STR8, vector.get(3)); + assertArrayEquals(STR6, vector.get(4)); + } + } + + @Test + public void testSafeOverwriteLongFromAShorterLongString() { + // Overwriting at the beginning of the buffer. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 1); + // set long string + vector.setSafe(0, STR7); + vector.setValueCount(1); + // set shorter long string + vector.setSafe(0, STR3); + vector.setValueCount(1); + + assertArrayEquals(STR3, vector.get(0)); + } + + // Overwriting in the middle of the buffer when existing buffers are all longs. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + // extra memory is allocated + vector.allocateNew(16, 3); + // set long string 1 + vector.setSafe(0, STR3); + // set long string 2 + vector.setSafe(1, STR8); + // set long string 3 + vector.setSafe(2, STR7); + vector.setValueCount(3); + + // overwrite index 1 with a shorter long string + vector.setSafe(1, STR2); + vector.setValueCount(3); + + assertArrayEquals(STR3, vector.get(0)); + assertArrayEquals(STR2, vector.get(1)); + assertArrayEquals(STR7, vector.get(2)); + } + + // Overwriting in the middle of the buffer with a mix of short and long strings. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 5); + // set long string 1 + vector.setSafe(0, STR3); + // set short string 1 + vector.setSafe(1, STR5); + // set long string 2 + vector.setSafe(2, STR7); + // set long string 3 + vector.setSafe(3, STR8); + // set short string 2 + vector.setSafe(4, STR6); + vector.setValueCount(5); + + // overwrite index 2 with a shorter long string + vector.setSafe(2, STR2); + vector.setValueCount(5); + + assertArrayEquals(STR3, vector.get(0)); + assertArrayEquals(STR5, vector.get(1)); + assertArrayEquals(STR2, vector.get(2)); + assertArrayEquals(STR8, vector.get(3)); + assertArrayEquals(STR6, vector.get(4)); + } + } + + @Test + public void testSafeOverwriteLongFromALongerLongString() { + // Overwriting at the beginning of the buffer. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 1); + // set long string + vector.setSafe(0, STR3); + vector.setValueCount(1); + // set longer long string + vector.setSafe(0, STR7); + vector.setValueCount(1); + + assertArrayEquals(STR7, vector.get(0)); + } + + // Overwriting in the middle of the buffer when existing buffers are all longs. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + // extra memory is allocated + vector.allocateNew(16, 3); + // set long string 1 + vector.setSafe(0, STR3); + // set long string 2 + vector.setSafe(1, STR8); + // set long string 3 + vector.setSafe(2, STR7); + vector.setValueCount(3); + + String longerString = generateRandomString(35); + byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8); + + vector.setSafe(1, longerStringBytes); + vector.setValueCount(3); + + assertArrayEquals(STR3, vector.get(0)); + assertArrayEquals(longerStringBytes, vector.get(1)); + assertArrayEquals(STR7, vector.get(2)); + } + + // Overwriting in the middle of the buffer with a mix of short and long strings. + try (final ViewVarCharVector vector = new ViewVarCharVector("myviewvector", allocator)) { + vector.allocateNew(16, 5); + // set long string 1 + vector.setSafe(0, STR3); + // set short string 1 + vector.setSafe(1, STR5); + // set long string 2 + vector.setSafe(2, STR7); + // set long string 3 + vector.setSafe(3, STR2); + // set short string 2 + vector.setSafe(4, STR6); + vector.setValueCount(5); + + String longerString = generateRandomString(24); + byte[] longerStringBytes = longerString.getBytes(StandardCharsets.UTF_8); + + vector.setSafe(2, longerStringBytes); + vector.setValueCount(5); + + assertArrayEquals(STR3, vector.get(0)); + assertArrayEquals(STR5, vector.get(1)); + assertArrayEquals(longerStringBytes, vector.get(2)); + assertArrayEquals(STR2, vector.get(3)); + assertArrayEquals(STR6, vector.get(4)); + + } + } + + private String generateRandomString(int length) { + Random random = new Random(); + StringBuilder sb = new StringBuilder(length); + for (int i = 0; i < length; i++) { + sb.append(random.nextInt(10)); // 0-9 + } + return sb.toString(); + } +} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java index 9bfcb3c635d86..45e6e630792a9 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java @@ -61,6 +61,7 @@ import org.apache.arrow.vector.UInt8Vector; import org.apache.arrow.vector.VarBinaryVector; import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VariableWidthFieldVector; import org.apache.arrow.vector.complex.BaseRepeatedValueVector; import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.LargeListVector; @@ -586,6 +587,17 @@ public static void setVector(VarCharVector vector, byte[]... values) { vector.setValueCount(length); } + public static void setVector(VariableWidthFieldVector vector, byte[]... values) { + final int length = values.length; + vector.allocateNewSafe(); + for (int i = 0; i < length; i++) { + if (values[i] != null) { + vector.set(i, values[i]); + } + } + vector.setValueCount(length); + } + /** * Populate values for LargeVarCharVector. */ From ff679790e7a93969a13aa6842e2f70e7d6a208e5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Apr 2024 14:05:04 +0900 Subject: [PATCH 013/261] MINOR: [Java] Bump org.cyclonedx:cyclonedx-maven-plugin from 2.7.11 to 2.8.0 in /java (#41210) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [org.cyclonedx:cyclonedx-maven-plugin](https://github.com/CycloneDX/cyclonedx-maven-plugin) from 2.7.11 to 2.8.0.

Release notes

Sourced from org.cyclonedx:cyclonedx-maven-plugin's releases.

2.8.0

🚀 New features and improvements

🐛 Bug Fixes

📦 Dependency updates

Commits
  • 90e3817 [maven-release-plugin] prepare release cyclonedx-maven-plugin-2.8.0
  • eed838e convert external reference type by value instead of default CONSTANT_NAME
  • 3fd83bf Bump org.apache.maven.plugins:maven-compiler-plugin
  • 343c62d check if configured schemaVersion is supported
  • d001542 distribution-intake external reference is more accurate
  • fa5541d Bump actions/checkout from 4.1.1 to 4.1.2
  • a43cd05 Bump org.apache.commons:commons-compress
  • 31ff1f4 Bump org.junit:junit-bom from 5.10.1 to 5.10.2
  • ce8a6e7 Bump release-drafter/release-drafter from 5 to 6
  • 16dcb5b Bump commons-codec:commons-codec from 1.16.0 to 1.16.1
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.cyclonedx:cyclonedx-maven-plugin&package-manager=maven&previous-version=2.7.11&new-version=2.8.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: David Li --- java/maven/pom.xml | 2 +- java/pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/java/maven/pom.xml b/java/maven/pom.xml index 4314192eda73b..558532012a1ae 100644 --- a/java/maven/pom.xml +++ b/java/maven/pom.xml @@ -65,7 +65,7 @@ org.cyclonedx cyclonedx-maven-plugin - 2.7.11 + 2.8.0 diff --git a/java/pom.xml b/java/pom.xml index 39fd1e00b64e9..f3639858d7818 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -519,7 +519,7 @@ org.cyclonedx cyclonedx-maven-plugin - 2.7.11 + 2.8.0 org.apache.drill.tools From 6eb0b37386ecbfc4108e914d6dadb8b049a6f549 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Mon, 29 Apr 2024 08:39:07 -0500 Subject: [PATCH 014/261] GH-41402: [CI][R] Update our backwards compatibility CI any other R 4.4 cleanups (#41403) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change Keep up with the state of the world, ensure we are maintaining backwards compatibility. Resolves #41402 ### What changes are included in this PR? * Bump to 4.4 as the release * Remove old 3.6 jobs now that we no longer support that; clean up code where we hardcode things fro 3.6 and below * Move many of our CI jobs to [rhub's new containers](https://github.com/r-hub/containers). We were accidentally running stale R devel (from December 2023) because the other rhub images stopped being updated. (One exception to be done as a follow on: #41416) * Resolve a number of extended test failures With this PR R extended tests should be all green with the exceptions of: * Two sanitizer jobs (test-fedora-r-clang-sanitizer, test-ubuntu-r-sanitizer) — which are being investigated / fixed in #41421 * Valgrind — I'm running one last run with a new suppression file. * Binary jobs — these work but fail at upload, see https://github.com/apache/arrow/pull/41403#discussion_r1582245207 * Windows R Release — failing on main, #41398 ### Are these changes tested? By definition. ### Are there any user-facing changes? No. * GitHub Issue: #41402 Lead-authored-by: Jonathan Keane Co-authored-by: Jacob Wujciak-Jens Signed-off-by: Jonathan Keane --- .env | 6 +- .github/workflows/r.yml | 4 +- ci/docker/linux-apt-docs.dockerfile | 2 +- ci/docker/linux-apt-lint.dockerfile | 2 +- ci/docker/linux-apt-r.dockerfile | 2 +- ci/etc/valgrind-cran.supp | 20 ++++++- ci/scripts/r_sanitize.sh | 4 +- ci/scripts/r_test.sh | 7 ++- ci/scripts/r_valgrind.sh | 2 +- ...github.linux.arrow.version.back.compat.yml | 2 + dev/tasks/r/github.linux.offline.build.yml | 2 +- dev/tasks/r/github.linux.versions.yml | 2 +- dev/tasks/r/github.packages.yml | 10 ++-- dev/tasks/tasks.yml | 12 ++-- docker-compose.yml | 5 +- r/DESCRIPTION | 2 +- r/R/dplyr-funcs-type.R | 2 +- r/R/util.R | 14 ----- r/tests/testthat/test-Array.R | 5 -- r/tests/testthat/test-RecordBatch.R | 16 ++---- r/tests/testthat/test-Table.R | 4 -- r/tests/testthat/test-altrep.R | 7 ++- r/tests/testthat/test-chunked-array.R | 5 -- r/tests/testthat/test-dplyr-collapse.R | 10 ---- r/tests/testthat/test-dplyr-funcs-datetime.R | 32 ++++++----- r/tests/testthat/test-dplyr-funcs-type.R | 3 +- r/tests/testthat/test-dplyr-glimpse.R | 5 -- r/tests/testthat/test-scalar.R | 4 -- r/tools/test-nixlibs.R | 7 ++- r/vignettes/developers/docker.Rmd | 50 ++++++++--------- r/vignettes/install.Rmd | 55 +++++++++---------- 31 files changed, 139 insertions(+), 164 deletions(-) diff --git a/.env b/.env index d9f875a4d454e..ab2e4b4fbe7fb 100644 --- a/.env +++ b/.env @@ -71,12 +71,12 @@ NUMBA=latest NUMPY=latest PANDAS=latest PYTHON=3.8 -R=4.2 +R=4.4 SPARK=master TURBODBC=latest -# These correspond to images on Docker Hub that contain R, e.g. rhub/ubuntu-gcc-release:latest -R_IMAGE=ubuntu-gcc-release +# These correspond to images on Docker Hub that contain R, e.g. rhub/ubuntu-release:latest +R_IMAGE=ubuntu-release R_ORG=rhub R_TAG=latest diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index 05c85fa6dc2c2..8228aaad7ce37 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -121,7 +121,7 @@ jobs: strategy: fail-fast: false matrix: - r: ["4.3"] + r: ["4.4"] ubuntu: [20.04] force-tests: ["true"] env: @@ -192,7 +192,7 @@ jobs: fail-fast: false matrix: config: - - { org: "rhub", image: "debian-gcc-devel", tag: "latest", devtoolset: "" } + - { org: "rhub", image: "ubuntu-gcc12", tag: "latest", devtoolset: "" } env: R_ORG: ${{ matrix.config.org }} R_IMAGE: ${{ matrix.config.image }} diff --git a/ci/docker/linux-apt-docs.dockerfile b/ci/docker/linux-apt-docs.dockerfile index 76b5ae6f14363..ec424b4e6eaa0 100644 --- a/ci/docker/linux-apt-docs.dockerfile +++ b/ci/docker/linux-apt-docs.dockerfile @@ -18,7 +18,7 @@ ARG base FROM ${base} -ARG r=4.2 +ARG r=4.4 ARG jdk=8 # See R install instructions at https://cloud.r-project.org/bin/linux/ubuntu/ diff --git a/ci/docker/linux-apt-lint.dockerfile b/ci/docker/linux-apt-lint.dockerfile index 2b94a48871847..9ec80440a3c21 100644 --- a/ci/docker/linux-apt-lint.dockerfile +++ b/ci/docker/linux-apt-lint.dockerfile @@ -40,7 +40,7 @@ RUN apt-get update && \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* -ARG r=4.2 +ARG r=4.4 RUN wget -qO- https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | \ tee -a /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc && \ # NOTE: Only R >= 4.0 is available in this repo diff --git a/ci/docker/linux-apt-r.dockerfile b/ci/docker/linux-apt-r.dockerfile index d93732abb0032..a68354e3abf8d 100644 --- a/ci/docker/linux-apt-r.dockerfile +++ b/ci/docker/linux-apt-r.dockerfile @@ -35,7 +35,7 @@ ENV LANG=C.UTF-8 # Build R # [1] https://www.digitalocean.com/community/tutorials/how-to-install-r-on-ubuntu-18-04 # [2] https://linuxize.com/post/how-to-install-r-on-ubuntu-18-04/#installing-r-packages-from-cran -ARG r=3.6 +ARG r=4.4 RUN apt-get update -y && \ apt-get install -y \ dirmngr \ diff --git a/ci/etc/valgrind-cran.supp b/ci/etc/valgrind-cran.supp index 4d29220260823..e93c2a3465f79 100644 --- a/ci/etc/valgrind-cran.supp +++ b/ci/etc/valgrind-cran.supp @@ -16,7 +16,7 @@ # under the License. { - # `testthat::skip()`s cause a valgrind error that does not show up on CRAN. + # `testthat::skip()`s cause a valgrind error that does not show up on CRAN. Memcheck:Cond fun:gregexpr_Regexc @@ -32,3 +32,21 @@ fun:getvar fun:bcEval } +{ + # This also doesn't seem to cause issues on CRAN, so suppress it. + + Memcheck:Leak + match-leak-kinds: possible + fun:malloc + fun:libdeflate_alloc_compressor + fun:do_memCompress + fun:bcEval_loop + fun:bcEval + fun:Rf_eval + fun:R_execClosure + fun:applyClosure_core + fun:Rf_applyClosure + fun:Rf_eval + fun:do_set + fun:Rf_eval +} diff --git a/ci/scripts/r_sanitize.sh b/ci/scripts/r_sanitize.sh index 600ee0fa2cbe5..f7ed07f0c864b 100755 --- a/ci/scripts/r_sanitize.sh +++ b/ci/scripts/r_sanitize.sh @@ -49,7 +49,7 @@ export UBSAN_OPTIONS="print_stacktrace=1,suppressions=/arrow/r/tools/ubsan.supp" # run tests pushd tests -${R_BIN} < testthat.R > testthat.out 2>&1 || { cat testthat.out; exit 1; } +${R_BIN} --no-save < testthat.R > testthat.out 2>&1 || { cat testthat.out; exit 1; } cat testthat.out if grep -q "runtime error" testthat.out; then @@ -58,7 +58,7 @@ fi # run examples popd -${R_BIN} -e 'library(arrow); testthat::test_examples(".")' >> examples.out 2>&1 || { cat examples.out; exit 1; } +${R_BIN} --no-save -e 'library(arrow); testthat::test_examples(".")' >> examples.out 2>&1 || { cat examples.out; exit 1; } cat examples.out if grep -q "runtime error" examples.out; then diff --git a/ci/scripts/r_test.sh b/ci/scripts/r_test.sh index 72078ab3c06c2..95a49ee83a79b 100755 --- a/ci/scripts/r_test.sh +++ b/ci/scripts/r_test.sh @@ -46,7 +46,12 @@ if [ "$ARROW_USE_PKG_CONFIG" != "false" ]; then export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} export R_LD_LIBRARY_PATH=${LD_LIBRARY_PATH} fi -export _R_CHECK_COMPILATION_FLAGS_KNOWN_=${ARROW_R_CXXFLAGS} + +export _R_CHECK_COMPILATION_FLAGS_KNOWN_="${_R_CHECK_COMPILATION_FLAGS_KNOWN_} ${ARROW_R_CXXFLAGS}" +# These should generally be picked up, but are slightly wrong in rhub's containers it appears +# https://github.com/r-hub/containers/pull/63 +export _R_CHECK_COMPILATION_FLAGS_KNOWN_="${_R_CHECK_COMPILATION_FLAGS_KNOWN_} -Wno-parentheses -Werror=format-security -Wp,-D_FORTIFY_SOURCE=3" + if [ "$ARROW_R_DEV" = "TRUE" ]; then # These are sometimes used in the Arrow C++ build and are not a problem export _R_CHECK_COMPILATION_FLAGS_KNOWN_="${_R_CHECK_COMPILATION_FLAGS_KNOWN_} -Wno-attributes -msse4.2 -Wno-noexcept-type -Wno-subobject-linkage" diff --git a/ci/scripts/r_valgrind.sh b/ci/scripts/r_valgrind.sh index a14cb803ca898..0e40d792111c4 100755 --- a/ci/scripts/r_valgrind.sh +++ b/ci/scripts/r_valgrind.sh @@ -33,7 +33,7 @@ ${R_BIN} CMD INSTALL ${INSTALL_ARGS} arrow*.tar.gz pushd tests # to generate suppression files run: -# ${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --gen-suppressions=all --log-file=memcheck.log" -f testthat.supp +# ${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --gen-suppressions=all --log-file=memcheck.log" -f testthat.R ${R_BIN} --vanilla -d "valgrind --tool=memcheck --leak-check=full --track-origins=yes --suppressions=/${1}/ci/etc/valgrind-cran.supp" -f testthat.R |& tee testthat.out # valgrind --error-exitcode=1 should return an erroring exit code that we can catch, diff --git a/dev/tasks/r/github.linux.arrow.version.back.compat.yml b/dev/tasks/r/github.linux.arrow.version.back.compat.yml index 804f0d2127059..086705dbb9cf4 100644 --- a/dev/tasks/r/github.linux.arrow.version.back.compat.yml +++ b/dev/tasks/r/github.linux.arrow.version.back.compat.yml @@ -73,6 +73,8 @@ jobs: config: # We use the R version that was released at the time of the arrow release in order # to make sure we can download binaries from RSPM. + - { old_arrow_version: '14.0.2.1', r: '4.3' } + - { old_arrow_version: '13.0.0.1', r: '4.3' } - { old_arrow_version: '12.0.1.1', r: '4.3' } - { old_arrow_version: '11.0.0.3', r: '4.2' } - { old_arrow_version: '10.0.1', r: '4.2' } diff --git a/dev/tasks/r/github.linux.offline.build.yml b/dev/tasks/r/github.linux.offline.build.yml index 7a747ac480084..9ac0ebc40835e 100644 --- a/dev/tasks/r/github.linux.offline.build.yml +++ b/dev/tasks/r/github.linux.offline.build.yml @@ -84,7 +84,7 @@ jobs: {{ macros.github_set_sccache_envvars()|indent(8)}} run: | cd arrow/r - R CMD INSTALL --install-tests --no-test-load --no-docs --no-help --no-byte-compile arrow_with_deps.tar.gz + R CMD INSTALL --install-tests --no-test-load --no-byte-compile arrow_with_deps.tar.gz - name: Run the tests run: R -e 'if(tools::testInstalledPackage("arrow") != 0L) stop("There was a test failure.")' - name: Dump test logs diff --git a/dev/tasks/r/github.linux.versions.yml b/dev/tasks/r/github.linux.versions.yml index 48093e9fd5b32..753efe61d048e 100644 --- a/dev/tasks/r/github.linux.versions.yml +++ b/dev/tasks/r/github.linux.versions.yml @@ -30,9 +30,9 @@ jobs: r_version: # We test devel, release, and oldrel in regular CI. # This is for older versions - - "3.6" - "4.0" - "4.1" + - "4.2" env: R_ORG: "rstudio" R_IMAGE: "r-base" diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml index 2979f57bb6146..9ca7e59a957de 100644 --- a/dev/tasks/r/github.packages.yml +++ b/dev/tasks/r/github.packages.yml @@ -227,7 +227,7 @@ jobs: working-directory: 'arrow' extra-packages: cpp11 - name: Set CRAN like openssl - if: contains(matrix.platform.runs_on, 'arm64') + if: contains(matrix.platform.name, 'arm64') run: | # The arm64 runners contain openssl 1.1.1t in this path that is always included first so we need to override the # default setting of the brew --prefix as root dir to avoid version conflicts. @@ -300,16 +300,14 @@ jobs: # an OS that is not in the allowlist, so we have to opt-in to use the # binary. Other env vars used in r_docker_configure.sh can be added # here (like devtoolset) and wired up in the later steps. - - {image: "rhub/debian-clang-devel", libarrow_binary: "TRUE"} + - {image: "rhub/ubuntu-clang", libarrow_binary: "TRUE"} # fedora-clang-devel cannot use binaries bc of libc++ (uncomment to see the error) # - {image: "rhub/fedora-clang-devel", libarrow_binary: "TRUE"} - - {image: "rhub/ubuntu-gcc-release"} # currently ubuntu-20.04 (focal) - - {image: "rocker/r-ubuntu:22.04"} # openssl3 - - {image: "rocker/r-ver"} # whatever is latest ubuntu LTS + - {image: "rhub/ubuntu-release"} # currently ubuntu-22.04 - {image: "rocker/r-ver:4.0.0"} # ubuntu-20.04 - - {image: "rocker/r-ver:3.6.3", libarrow_binary: "TRUE"} # debian:buster (10) - {image: "rstudio/r-base:4.1-focal"} # ubuntu-20.04 - {image: "rstudio/r-base:4.2-centos7", devtoolset: "8"} + - {image: "rstudio/r-base:4.3-noble"} steps: # Get the arrow checkout just for the docker config scripts # Don't need submodules for this (hence false arg to macro): they fail on diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 3e7f7ea0c43a8..52a235c688eda 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -888,12 +888,12 @@ tasks: - r-lib__libarrow__bin__darwin-arm64-openssl-3.0__arrow-{no_rc_r_version}\.zip - r-lib__libarrow__bin__darwin-x86_64-openssl-1.1__arrow-{no_rc_r_version}\.zip - r-lib__libarrow__bin__darwin-x86_64-openssl-3.0__arrow-{no_rc_r_version}\.zip + - r-pkg__bin__windows__contrib__4.4__arrow_{no_rc_r_version}\.zip - r-pkg__bin__windows__contrib__4.3__arrow_{no_rc_r_version}\.zip - - r-pkg__bin__windows__contrib__4.2__arrow_{no_rc_r_version}\.zip + - r-pkg__bin__macosx__big-sur-x86_64__contrib__4.4__arrow_{no_rc_r_version}\.tgz - r-pkg__bin__macosx__big-sur-x86_64__contrib__4.3__arrow_{no_rc_r_version}\.tgz - - r-pkg__bin__macosx__contrib__4.2__arrow_{no_rc_r_version}\.tgz + - r-pkg__bin__macosx__big-sur-arm64__contrib__4.4__arrow_{no_rc_r_version}\.tgz - r-pkg__bin__macosx__big-sur-arm64__contrib__4.3__arrow_{no_rc_r_version}\.tgz - - r-pkg__bin__macosx__big-sur-arm64__contrib__4.2__arrow_{no_rc_r_version}\.tgz - r-pkg__src__contrib__arrow_{no_rc_r_version}\.tar\.gz @@ -1356,7 +1356,7 @@ tasks: r_tag: latest r_custom_ccache: true -{% for r_org, r_image, r_tag in [("rhub", "ubuntu-gcc-release", "latest"), +{% for r_org, r_image, r_tag in [("rhub", "ubuntu-release", "latest"), ("rocker", "r-ver", "latest"), ("rstudio", "r-base", "4.2-focal"), ("rstudio", "r-base", "4.1-opensuse153")] %} @@ -1377,9 +1377,9 @@ tasks: template: r/azure.linux.yml params: r_org: rhub - r_image: debian-gcc-devel-lto + r_image: gcc13 r_tag: latest - flags: '-e NOT_CRAN=false -e INSTALL_ARGS=--use-LTO' + flags: '-e INSTALL_ARGS=--use-LTO' # This one has -flto=auto test-r-ubuntu-22.04: diff --git a/docker-compose.yml b/docker-compose.yml index 60edf1420bc0f..d771fc2d22a35 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1459,8 +1459,8 @@ services: # (including building the C++ library) on any Docker image that contains R # # Usage: - # R_ORG=rhub R_IMAGE=ubuntu-gcc-release R_TAG=latest docker-compose build r - # R_ORG=rhub R_IMAGE=ubuntu-gcc-release R_TAG=latest docker-compose run r + # R_ORG=rhub R_IMAGE=ubuntu-release R_TAG=latest docker-compose build r + # R_ORG=rhub R_IMAGE=ubuntu-release R_TAG=latest docker-compose run r image: ${REPO}:r-${R_ORG}-${R_IMAGE}-${R_TAG} build: context: . @@ -1523,6 +1523,7 @@ services: cache_from: - ${REPO}:r-rhub-fedora-clang-devel-latest args: + # TODO: change this to rhub/clang-asan base: rhub/fedora-clang-devel-san r_dev: ${ARROW_R_DEV} devtoolset_version: ${DEVTOOLSET_VERSION} diff --git a/r/DESCRIPTION b/r/DESCRIPTION index eeff8168b361c..38cbaa94a3c25 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -21,7 +21,7 @@ Description: 'Apache' 'Arrow' is a cross-language language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware. This package provides an interface to the 'Arrow C++' library. -Depends: R (>= 3.4) +Depends: R (>= 4.0) License: Apache License (>= 2.0) URL: https://github.com/apache/arrow/, https://arrow.apache.org/docs/r/ BugReports: https://github.com/apache/arrow/issues diff --git a/r/R/dplyr-funcs-type.R b/r/R/dplyr-funcs-type.R index f244682737cb4..efb3c6b756a16 100644 --- a/r/R/dplyr-funcs-type.R +++ b/r/R/dplyr-funcs-type.R @@ -140,7 +140,7 @@ register_bindings_type_cast <- function() { fix.empty.names = TRUE, stringsAsFactors = FALSE) { # we need a specific value of stringsAsFactors because the default was - # TRUE in R <= 3.6 + # TRUE in R <= 3.6 and folks might still be cargoculting to stay in the past. if (!identical(stringsAsFactors, FALSE)) { arrow_not_supported("stringsAsFactors = TRUE") } diff --git a/r/R/util.R b/r/R/util.R index a7cb5b3792d29..14e4544ab1e54 100644 --- a/r/R/util.R +++ b/r/R/util.R @@ -15,20 +15,6 @@ # specific language governing permissions and limitations # under the License. -# for compatibility with R versions earlier than 4.0.0 -if (!exists("deparse1")) { - deparse1 <- function(expr, collapse = " ", width.cutoff = 500L, ...) { - paste(deparse(expr, width.cutoff, ...), collapse = collapse) - } -} - -# for compatibility with R versions earlier than 3.6.0 -if (!exists("str2lang")) { - str2lang <- function(s) { - parse(text = s, keep.source = FALSE)[[1]] - } -} - oxford_paste <- function(x, conjunction = "and", quote = TRUE, diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index bb005605de318..98068bdea20d5 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -818,11 +818,6 @@ test_that("Handling string data with embedded nuls", { ) array_with_nul <- arrow_array(raws)$cast(utf8()) - # The behavior of the warnings/errors is slightly different with and without - # altrep. Without it (i.e. 3.5.0 and below, the error would trigger immediately - # on `as.vector()` where as with it, the error only happens on materialization) - skip_on_r_older_than("3.6") - # no error on conversion, because altrep laziness v <- expect_error(as.vector(array_with_nul), NA) diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R index f29b75dbf4095..5987f5a4b7c17 100644 --- a/r/tests/testthat/test-RecordBatch.R +++ b/r/tests/testthat/test-RecordBatch.R @@ -595,14 +595,10 @@ test_that("RecordBatch supports cbind", { ) # Rejects Table and ChunkedArray arguments - if (getRversion() >= "4.0.0") { - # R 3.6 cbind dispatch rules cause cbind to fall back to default impl if - # there are multiple arguments with distinct cbind implementations - expect_error( - cbind(record_batch(a = 1:2), arrow_table(b = 3:4)), - regexp = "Cannot cbind a RecordBatch with Tables or ChunkedArrays" - ) - } + expect_error( + cbind(record_batch(a = 1:2), arrow_table(b = 3:4)), + regexp = "Cannot cbind a RecordBatch with Tables or ChunkedArrays" + ) expect_error( cbind(record_batch(a = 1:2), b = chunked_array(1, 2)), regexp = "Cannot cbind a RecordBatch with Tables or ChunkedArrays" @@ -622,10 +618,6 @@ test_that("Handling string data with embedded nuls", { batch_with_nul <- record_batch(a = 1:5, b = raws) batch_with_nul$b <- batch_with_nul$b$cast(utf8()) - # The behavior of the warnings/errors is slightly different with and without - # altrep. Without it (i.e. 3.5.0 and below, the error would trigger immediately - # on `as.vector()` where as with it, the error only happens on materialization) - skip_on_r_older_than("3.6") df <- as.data.frame(batch_with_nul) expect_error( diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R index ce3254a158eee..f6cec3b2b7683 100644 --- a/r/tests/testthat/test-Table.R +++ b/r/tests/testthat/test-Table.R @@ -581,10 +581,6 @@ test_that("Table supports cbind", { }) test_that("cbind.Table handles record batches and tables", { - # R 3.6 cbind dispatch rules cause cbind to fall back to default impl if - # there are multiple arguments with distinct cbind implementations - skip_if(getRversion() < "4.0.0", "R 3.6 cbind dispatch rules prevent this behavior") - expect_equal( cbind(arrow_table(a = 1L:2L), record_batch(b = 4:5)), arrow_table(a = 1L:2L, b = 4:5) diff --git a/r/tests/testthat/test-altrep.R b/r/tests/testthat/test-altrep.R index 7a66d0e778282..50bd40988e550 100644 --- a/r/tests/testthat/test-altrep.R +++ b/r/tests/testthat/test-altrep.R @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -skip_on_r_older_than("3.6") - test_that("altrep test functions do not include base altrep", { expect_false(is_arrow_altrep(1:10)) expect_identical(test_arrow_altrep_is_materialized(1:10), NA) @@ -373,6 +371,11 @@ test_that("altrep min/max/sum identical to R versions for double", { expect_altrep_roundtrip(x, max) expect_altrep_roundtrip(x, sum) + # On valgrind the NA_real_ is sometimes transformed to NaN + # https://stat.ethz.ch/pipermail/r-devel/2021-April/080683.html + # so we skip these there to avoid complicated NA == NaN logic, + # and they are tested on a number of other platforms / conditions + skip_on_linux_devel() x <- c(1, 2, NA_real_) expect_altrep_roundtrip(x, min, na.rm = TRUE) expect_altrep_roundtrip(x, max, na.rm = TRUE) diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R index 223f5022d3b94..bb01df427f713 100644 --- a/r/tests/testthat/test-chunked-array.R +++ b/r/tests/testthat/test-chunked-array.R @@ -475,11 +475,6 @@ test_that("Handling string data with embedded nuls", { ) chunked_array_with_nul <- ChunkedArray$create(raws)$cast(utf8()) - # The behavior of the warnings/errors is slightly different with and without - # altrep. Without it (i.e. 3.5.0 and below, the error would trigger immediately - # on `as.vector()` where as with it, the error only happens on materialization) - skip_on_r_older_than("3.6") - v <- expect_error(as.vector(chunked_array_with_nul), NA) expect_error( diff --git a/r/tests/testthat/test-dplyr-collapse.R b/r/tests/testthat/test-dplyr-collapse.R index a8aa5556f1e0d..f50fa8945db11 100644 --- a/r/tests/testthat/test-dplyr-collapse.R +++ b/r/tests/testthat/test-dplyr-collapse.R @@ -185,16 +185,6 @@ See $.data for the source Arrow object", fixed = TRUE ) - skip_if(getRversion() < "3.6.0", "TODO investigate why these aren't equal") - # On older R versions: - # ── Failure (test-dplyr-collapse.R:172:3): Properties of collapsed query ──────── - # head(q, 1) %>% collect() not equal to tibble::tibble(lgl = FALSE, total = 8L, extra = 40). - # Component "total": Mean relative difference: 0.3846154 - # Component "extra": Mean relative difference: 0.3846154 - # ── Failure (test-dplyr-collapse.R:176:3): Properties of collapsed query ──────── - # tail(q, 1) %>% collect() not equal to tibble::tibble(lgl = NA, total = 25L, extra = 125). - # Component "total": Mean relative difference: 0.9230769 - # Component "extra": Mean relative difference: 0.9230769 expect_equal( q %>% arrange(lgl) %>% diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index 4d3226798d3ff..6f520f6e3223b 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -180,7 +180,7 @@ test_that("strptime", { ) # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") compare_dplyr_binding( @@ -198,7 +198,7 @@ test_that("strptime works for individual formats", { skip_on_cran() # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") expect_equal( @@ -269,7 +269,7 @@ test_that("timestamp round trip correctly via strftime and strptime", { skip_on_cran() # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") tz <- "Pacific/Marquesas" @@ -291,7 +291,9 @@ test_that("timestamp round trip correctly via strftime and strptime", { # Some formats are not supported on Windows if (!tolower(Sys.info()[["sysname"]]) == "windows") { - formats <- c(formats, "%a", "%A", "%b", "%B", "%OS", "%I%p", "%r", "%T%z") + # "%r" could also be here, though it is only valid in some locales (those + # that use 12 hour formats, so skip for now) + formats <- c(formats, "%a", "%A", "%b", "%B", "%OS", "%I%p", "%T%z") } for (fmt in formats) { @@ -2080,7 +2082,7 @@ test_that("as_datetime() works with other functions", { test_that("parse_date_time() works with year, month, and date components", { # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") compare_dplyr_binding( .input %>% @@ -2139,7 +2141,7 @@ test_that("parse_date_time() works with year, month, and date components", { test_that("parse_date_time() works with a mix of formats and orders", { # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") test_df <- tibble( string_combi = c("2021-09-1", "2/09//2021", "09.3.2021") @@ -2169,7 +2171,7 @@ test_that("year, month, day date/time parsers", { ) # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") compare_dplyr_binding( .input %>% @@ -2221,7 +2223,7 @@ test_that("ym, my & yq parsers", { ) # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") compare_dplyr_binding( .input %>% @@ -2270,7 +2272,7 @@ test_that("ym, my & yq parsers", { test_that("parse_date_time's other formats", { # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") compare_dplyr_binding( @@ -2401,7 +2403,7 @@ test_that("lubridate's fast_strptime", { ) # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") compare_dplyr_binding( @@ -2508,7 +2510,7 @@ test_that("parse_date_time with hours, minutes and seconds components", { # the unseparated strings are versions of "1987-08-22 20:13:59" (with %y) # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") compare_dplyr_binding( @@ -2638,7 +2640,7 @@ test_that("parse_date_time with month names and HMS", { skip_on_os("windows") # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6 & the minimal nightly builds) + # RE2 library (not available in the minimal nightly builds) skip_if_not_available("re2") test_dates_times2 <- tibble( @@ -2737,7 +2739,7 @@ test_that("parse_date_time with `quiet = FALSE` not supported", { # https://issues.apache.org/jira/browse/ARROW-17146 # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6 & the minimal nightly builds) + # RE2 library (not available in the minimal nightly builds) skip_if_not_available("re2") expect_warning( @@ -2766,7 +2768,7 @@ test_that("parse_date_time with `quiet = FALSE` not supported", { test_that("parse_date_time with truncated formats", { # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") test_truncation_df <- tibble( @@ -2853,7 +2855,7 @@ test_that("parse_date_time with `exact = TRUE`, and with regular R objects", { ) # these functions' internals use some string processing which requires the - # RE2 library (not available on Windows with R 3.6) + # RE2 library skip_if_not_available("re2") compare_dplyr_binding( .input %>% diff --git a/r/tests/testthat/test-dplyr-funcs-type.R b/r/tests/testthat/test-dplyr-funcs-type.R index 2624e16156bce..ecb6b3b7b45b6 100644 --- a/r/tests/testthat/test-dplyr-funcs-type.R +++ b/r/tests/testthat/test-dplyr-funcs-type.R @@ -754,11 +754,10 @@ test_that("structs/nested data frames/tibbles can be created", { ) # check that data.frame is mapped too - # stringsAsFactors default is TRUE in R 3.6, which is still tested on CI compare_dplyr_binding( .input %>% transmute( - df_col = data.frame(regular_col1, regular_col2, stringsAsFactors = FALSE) + df_col = data.frame(regular_col1, regular_col2) ) %>% collect() %>% mutate(df_col = as.data.frame(df_col)), diff --git a/r/tests/testthat/test-dplyr-glimpse.R b/r/tests/testthat/test-dplyr-glimpse.R index c93273bdeef34..d39fef9e82cca 100644 --- a/r/tests/testthat/test-dplyr-glimpse.R +++ b/r/tests/testthat/test-dplyr-glimpse.R @@ -15,11 +15,6 @@ # specific language governing permissions and limitations # under the License. -# The glimpse output for tests with `example_data` is different on R < 3.6 -# because the `lgl` column is generated with `sample()` and the RNG -# algorithm is different in older R versions. -skip_on_r_older_than("3.6") - library(dplyr, warn.conflicts = FALSE) test_that("glimpse() Table/ChunkedArray", { diff --git a/r/tests/testthat/test-scalar.R b/r/tests/testthat/test-scalar.R index 06f956504350e..8335dc95cd85c 100644 --- a/r/tests/testthat/test-scalar.R +++ b/r/tests/testthat/test-scalar.R @@ -94,10 +94,6 @@ test_that("Handling string data with embedded nuls", { ) scalar_with_nul <- scalar(raws, binary())$cast(utf8()) - # The behavior of the warnings/errors is slightly different with and without - # altrep. Without it (i.e. 3.5.0 and below, the error would trigger immediately - # on `as.vector()` where as with it, the error only happens on materialization) - skip_on_r_older_than("3.6") v <- expect_error(as.vector(scalar_with_nul), NA) expect_error( v[1], diff --git a/r/tools/test-nixlibs.R b/r/tools/test-nixlibs.R index ed5192d806990..6996f234ced45 100644 --- a/r/tools/test-nixlibs.R +++ b/r/tools/test-nixlibs.R @@ -23,8 +23,9 @@ TESTING <- TRUE # The functions use `on_macos` from the env they were sourced in, so we need tool # explicitly set it in that environment. +# We capture.output for a cleaner testthat output. nixlibs_env <- environment() -source("nixlibs.R", local = nixlibs_env) +capture.output(source("nixlibs.R", local = nixlibs_env)) test_that("identify_binary() based on LIBARROW_BINARY", { expect_null(identify_binary("FALSE")) @@ -157,6 +158,10 @@ test_that("check_allowlist", { }) test_that("find_latest_nightly()", { + skip_if( + getRversion() > "4.4.0", + "long last version components (>8) fail to max on r-devel" + ) tf <- tempfile() tf_uri <- paste0("file://", tf) on.exit(unlink(tf)) diff --git a/r/vignettes/developers/docker.Rmd b/r/vignettes/developers/docker.Rmd index de2795cfa6bb5..13f60904c9484 100644 --- a/r/vignettes/developers/docker.Rmd +++ b/r/vignettes/developers/docker.Rmd @@ -5,23 +5,23 @@ description: > output: rmarkdown::html_vignette --- -Arrow is compatible with a huge number of combinations of OSs, OS versions, -compilers, R versions, and other variables. Sometimes these combinations of -variables means that behaviours are found in some environments which cannot be -replicated in others. In addition, there are different ways of building Arrow, -for example, using environment variables to specify the building of optional +Arrow is compatible with a huge number of combinations of OSs, OS versions, +compilers, R versions, and other variables. Sometimes these combinations of +variables means that behaviours are found in some environments which cannot be +replicated in others. In addition, there are different ways of building Arrow, +for example, using environment variables to specify the building of optional components. -What all this means is that you may need to use a different setup to the one in -which you are working, when diagnosing a bug or testing out a new feature which -you have reason to believe may be affected by these variables. One way to do +What all this means is that you may need to use a different setup to the one in +which you are working, when diagnosing a bug or testing out a new feature which +you have reason to believe may be affected by these variables. One way to do this is so spin up a Docker image containing the desired setup. This article provides a basic guide to using Docker in your R development. ## How do I run a Docker container? -There are a number of images which have been created for the convenience of +There are a number of images which have been created for the convenience of Arrow devs and you can find them on [the DockerHub repo](https://hub.docker.com/r/apache/arrow-dev/tags). The code below shows an example command you could use to run a Docker container. @@ -29,7 +29,7 @@ The code below shows an example command you could use to run a Docker container. This should be run in the root directory of a checkout of the arrow repo. ```shell -docker run -it -e ARROW_DEPENDENCY_SOURCE=AUTO -v $(pwd):/arrow apache/arrow-dev:r-rhub-ubuntu-gcc-release-latest +docker run -it -e ARROW_DEPENDENCY_SOURCE=AUTO -v $(pwd):/arrow apache/arrow-dev:r-rhub-ubuntu-release-latest ``` Components: @@ -39,13 +39,13 @@ Components: * `-e ARROW_DEPENDENCY_SOURCE=AUTO` - set the environment variable `ARROW_DEPENDENCY_SOURCE` to the value `AUTO` * `-v $(pwd):/arrow` - mount the current directory at `/arrow` in the container * `apache/arrow-dev` - the DockerHub repo to get this container from -* `r-rhub-ubuntu-gcc-release-latest` - the image tag +* `r-rhub-ubuntu-release-latest` - the image tag -Once you run this command, if you don't have a copy of that particular image +Once you run this command, if you don't have a copy of that particular image saved locally, it will first be downloaded before a container is spun up. -In the example above, mounting the directory in which the Arrow repo was stored -on the local machine, meant that that code could be built and tested on the +In the example above, mounting the directory in which the Arrow repo was stored +on the local machine, meant that that code could be built and tested on the container. ## How do I exit this image? @@ -73,29 +73,29 @@ sudo docker ps -a ## Running existing workflows from docker-compose.yml There are a number of workflows outlined in the file `docker-compose.yml` in the -arrow repo root directory. For example, you can use the workflow called `r` to -test building and installing the R package. This is advantageous as you can use -existing utility scripts and install it onto a container which already has R on +arrow repo root directory. For example, you can use the workflow called `r` to +test building and installing the R package. This is advantageous as you can use +existing utility scripts and install it onto a container which already has R on it. -These workflows are also parameterized, which means you can specify different +These workflows are also parameterized, which means you can specify different options (or just use the defaults, which can be found in `.env`) ### Example - The manual way -If you wanted to run [RHub's latest `ubuntu-gcc-release` image](https://hub.docker.com/r/rhub/ubuntu-gcc-release), you could +If you wanted to run [RHub's latest `ubuntu-release` image](https://hub.docker.com/r/rhub/ubuntu-release), you could run: -```shell -R_ORG=rhub R_IMAGE=ubuntu-gcc-release R_TAG=latest docker-compose build r -R_ORG=rhub R_IMAGE=ubuntu-gcc-release R_TAG=latest docker-compose run r +```shell +R_ORG=rhub R_IMAGE=ubuntu-release R_TAG=latest docker-compose build r +R_ORG=rhub R_IMAGE=ubuntu-release R_TAG=latest docker-compose run r ``` ### Example - Using Archery Alternatively, you may prefer to use the [Archery tool to run docker images](https://arrow.apache.org/docs/developers/docker.html). -This has the advantage of making it simpler to build some of the existing Arrow -CI jobs which have hierarchical dependencies, and so for example, you could +This has the advantage of making it simpler to build some of the existing Arrow +CI jobs which have hierarchical dependencies, and so for example, you could build the R package on a container which already has the C++ code pre-built. This is the same tool which our CI uses - via a tool called [Crossbow](https://arrow.apache.org/docs/developers/crossbow.html). @@ -103,5 +103,5 @@ This is the same tool which our CI uses - via a tool called [Crossbow](https://a If you want to run the `r` workflow discussed above, you could run: ```shell -R_ORG=rhub R_IMAGE=ubuntu-gcc-release R_TAG=latest archery docker run r +R_ORG=rhub R_IMAGE=ubuntu-release R_TAG=latest archery docker run r ``` diff --git a/r/vignettes/install.Rmd b/r/vignettes/install.Rmd index df43a9de36fc2..cc90c5ff08c60 100644 --- a/r/vignettes/install.Rmd +++ b/r/vignettes/install.Rmd @@ -69,7 +69,7 @@ The prebuilt binaries come with S3 and GCS support enabled, so you will need to ## Install release version (easy way) -On macOS and Windows, when you run `install.packages("arrow")` and install arrow from CRAN, you get an R binary package that contains a precompiled version of libarrow. Installing binaries is much easier that installing from source, but CRAN does not host binaries for Linux. This means that the default behaviour when you run `install.packages()` on Linux is to retrieve the source version of the R package and compile both the R package _and_ libarrow from source. We'll talk about this scenario in the next section (the "less easy" way), but first we'll suggest two faster alternatives that are usually much easier. +On macOS and Windows, when you run `install.packages("arrow")` and install arrow from CRAN, you get an R binary package that contains a precompiled version of libarrow. Installing binaries is much easier that installing from source, but CRAN does not host binaries for Linux. This means that the default behaviour when you run `install.packages()` on Linux is to retrieve the source version of the R package and compile both the R package _and_ libarrow from source. We'll talk about this scenario in the next section (the "less easy" way), but first we'll suggest two faster alternatives that are usually much easier. ### Binary R package with libarrow binary via RSPM/conda @@ -154,15 +154,15 @@ knitr::include_graphics("./r_source_libarrow_source.png") ``` The "less easy" way to install arrow is to install both the R package and the underlying Arrow C++ library (libarrow) from source. This method is somewhat more -difficult because compiling and installing R packages with C++ dependencies -generally requires installing system packages, which you may not have privileges -to do, and/or building the C++ dependencies separately, which introduces all sorts +difficult because compiling and installing R packages with C++ dependencies +generally requires installing system packages, which you may not have privileges +to do, and/or building the C++ dependencies separately, which introduces all sorts of additional ways for things to go wrong. -Installing from the full source build of arrow, compiling both C++ and R -bindings, will handle most of the dependency management for you, but it is -much slower than using binaries. However, if using binaries isn't an option -for you,or you wish to customize your Linux installation, the instructions in +Installing from the full source build of arrow, compiling both C++ and R +bindings, will handle most of the dependency management for you, but it is +much slower than using binaries. However, if using binaries isn't an option +for you,or you wish to customize your Linux installation, the instructions in this section explain how to do that. ### Basic configuration @@ -369,10 +369,10 @@ satisfy C++ dependencies. ## Offline installation -The `install-arrow.R` file mentioned in the previous section includes a -function called `create_package_with_all_dependencies()`. Normally, when -installing on a computer with internet access, the build process will -download third-party dependencies as needed. This function provides a +The `install-arrow.R` file mentioned in the previous section includes a +function called `create_package_with_all_dependencies()`. Normally, when +installing on a computer with internet access, the build process will +download third-party dependencies as needed. This function provides a way to download them in advance, which can be useful when installing Arrow on a computer without internet access. The process is as follows: @@ -380,11 +380,11 @@ on a computer without internet access. The process is as follows: **Step 1.** Using a computer with internet access, download dependencies: * Install the arrow package **or** source the script directly using the following command: - + ```r source("https://raw.githubusercontent.com/apache/arrow/main/r/R/install-arrow.R") ``` - + * Use the `create_package_with_all_dependencies()` function to create the installation bundle: ```r @@ -399,27 +399,27 @@ on a computer without internet access. The process is as follows: ```r install.packages( - "my_arrow_pkg.tar.gz", + "my_arrow_pkg.tar.gz", dependencies = c("Depends", "Imports", "LinkingTo") ) ``` This installation will build from source, so `cmake` must be available - + * Run `arrow_info()` to check installed capabilities Notes: -- arrow _can_ be installed on a computer without internet access -without using this function, but many useful features will be disabled, +- arrow _can_ be installed on a computer without internet access +without using this function, but many useful features will be disabled, as they depend on third-party components. More precisely, `arrow::arrow_info()$capabilities()` will be `FALSE` for every capability. - If you are using binary packages you shouldn't need to this function. You can download the appropriate binary from your package repository, transfer -that to the offline computer, and install that. +that to the offline computer, and install that. - If you're using RStudio Package Manager on Linux (RSPM), and you want to make a source bundle with this function, make sure to set the first repository @@ -523,11 +523,11 @@ so that we can improve the script. ## Contributing -We are constantly working to make the installation process as painless as +We are constantly working to make the installation process as painless as possible. If you find ways to improve the process, please [report an issue](https://github.com/apache/arrow/issues) so that we can document it. Similarly, if you find that your Linux distribution -or version is not supported, we would welcome the contribution of Docker -images (hosted on Docker Hub) that we can use in our continuous integration +or version is not supported, we would welcome the contribution of Docker +images (hosted on Docker Hub) that we can use in our continuous integration and hopefully improve our coverage. If you do contribute a Docker image, it should be as minimal as possible, containing only R and the dependencies it requires. For reference, see the images that @@ -537,19 +537,16 @@ You can test the arrow R package installation using the `docker-compose` setup included in the `apache/arrow` git repository. For example, ``` -R_ORG=rhub R_IMAGE=ubuntu-gcc-release R_TAG=latest docker-compose build r -R_ORG=rhub R_IMAGE=ubuntu-gcc-release R_TAG=latest docker-compose run r +R_ORG=rhub R_IMAGE=ubuntu-release R_TAG=latest docker-compose build r +R_ORG=rhub R_IMAGE=ubuntu-release R_TAG=latest docker-compose run r ``` installs the arrow R package, including libarrow, on the -[rhub/ubuntu-gcc-release](https://hub.docker.com/r/rhub/ubuntu-gcc-release) +[rhub/ubuntu-release](https://hub.docker.com/r/rhub/ubuntu-release) image. ## Further reading - To learn about installing development versions, see the article on [installing nightly builds](./install_nightly.html). -- If you're contributing to the Arrow project, see the [Arrow R developers guide](./developing.html) for resources to help you on set up your development environment. +- If you're contributing to the Arrow project, see the [Arrow R developers guide](./developing.html) for resources to help you on set up your development environment. - Arrow developers may also wish to read a more detailed discussion of the code run during the installation process, described in the [install details article](./developers/install_details.html). - - - From c87073737b6ffef9715549a199499b92630e8e5f Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Mon, 29 Apr 2024 11:32:01 -0400 Subject: [PATCH 015/261] MINOR: [R] refactor arrow_mask to include aggregations list (#41414) ### Rationale for this change Keeping the `..aggregations` list in parent.frame felt a little wrong. As we're starting to use this in more places (like mutate in #41350, and potentially more places), I wanted to try to improve this. I tried a bunch of things before to put it somewhere better (like in the mask) but failed. Finally I found one that worked. ### What changes are included in this PR? Just a refactor ### Are these changes tested? Existing tests pass. ### Are there any user-facing changes? Nope. --- r/R/dplyr-eval.R | 8 +++----- r/R/dplyr-funcs-agg.R | 23 ++++++++++++----------- r/R/dplyr-summarize.R | 41 ++++++++++++++++++----------------------- 3 files changed, 33 insertions(+), 39 deletions(-) diff --git a/r/R/dplyr-eval.R b/r/R/dplyr-eval.R index 3aaa29696b8c8..ff1619ce944d0 100644 --- a/r/R/dplyr-eval.R +++ b/r/R/dplyr-eval.R @@ -125,13 +125,9 @@ arrow_mask <- function(.data, aggregation = FALSE) { f_env <- new_environment(.cache$functions) if (aggregation) { - # Add the aggregation functions to the environment, and set the enclosing - # environment to the parent frame so that, when called from summarize_eval(), - # they can reference and assign into `..aggregations` defined there. - pf <- parent.frame() + # Add the aggregation functions to the environment. for (f in names(agg_funcs)) { f_env[[f]] <- agg_funcs[[f]] - environment(f_env[[f]]) <- pf } } else { # Add functions that need to error hard and clear. @@ -156,6 +152,8 @@ arrow_mask <- function(.data, aggregation = FALSE) { # TODO: figure out what rlang::as_data_pronoun does/why we should use it # (because if we do we get `Error: Can't modify the data pronoun` in mutate()) out$.data <- .data$selected_columns + # Add the aggregations list to collect any that get pulled out when evaluating + out$.aggregations <- empty_named_list() out } diff --git a/r/R/dplyr-funcs-agg.R b/r/R/dplyr-funcs-agg.R index ab1df1d2f15a5..d84f8f28f0dff 100644 --- a/r/R/dplyr-funcs-agg.R +++ b/r/R/dplyr-funcs-agg.R @@ -17,7 +17,7 @@ # Aggregation functions # -# These all insert into an ..aggregations list (in a parent frame) a list containing: +# These all insert into an .aggregations list in the mask, a list containing: # @param fun string function name # @param data list of 0 or more Expressions # @param options list of function options, as passed to call_function @@ -154,11 +154,11 @@ register_bindings_aggregate <- function() { set_agg <- function(...) { agg_data <- list2(...) - # Find the environment where ..aggregations is stored + # Find the environment where .aggregations is stored target <- find_aggregations_env() - aggs <- get("..aggregations", target) + aggs <- get(".aggregations", target) lapply(agg_data[["data"]], function(expr) { - # If any of the fields referenced in the expression are in ..aggregations, + # If any of the fields referenced in the expression are in .aggregations, # then we can't aggregate over them. # This is mainly for combinations of dataset columns and aggregations, # like sum(x - mean(x)), i.e. window functions. @@ -169,23 +169,24 @@ set_agg <- function(...) { } }) - # Record the (fun, data, options) in ..aggregations + # Record the (fun, data, options) in .aggregations # and return a FieldRef pointing to it tmpname <- paste0("..temp", length(aggs)) aggs[[tmpname]] <- agg_data - assign("..aggregations", aggs, envir = target) + assign(".aggregations", aggs, envir = target) Expression$field_ref(tmpname) } find_aggregations_env <- function() { - # Find the environment where ..aggregations is stored, + # Find the environment where .aggregations is stored, # it's in parent.env of something in the call stack - for (f in sys.frames()) { - if (exists("..aggregations", envir = f)) { - return(f) + n <- 1 + while (TRUE) { + if (exists(".aggregations", envir = caller_env(n))) { + return(caller_env(n)) } + n <- n + 1 } - stop("Could not find ..aggregations") } ensure_one_arg <- function(args, fun) { diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R index 5bb81dc2b34fc..56de14db6dd44 100644 --- a/r/R/dplyr-summarize.R +++ b/r/R/dplyr-summarize.R @@ -80,34 +80,32 @@ do_arrow_summarize <- function(.data, ..., .groups = NULL) { # ExecNode), and in the expressions, replace them with FieldRefs so that # further operations can happen (in what will become a ProjectNode that works # on the result of the Aggregate). - # To do this, we create a list in this function scope, and in arrow_mask(), - # and we make sure this environment here is the parent env of the binding - # functions, so that when they receive an expression, they can pull out - # aggregations and insert them into the list, which they can find because it - # is in the parent env. + # To do this, arrow_mask() includes a list called .aggregations, + # and the aggregation functions will pull out those terms and insert into + # that list. # nolint end - ..aggregations <- empty_named_list() - - # We'll collect any transformations after the aggregation here - ..post_mutate <- empty_named_list() mask <- arrow_mask(.data, aggregation = TRUE) + # We'll collect any transformations after the aggregation here. + # summarize_eval() returns NULL when the outer expression is an aggregation, + # i.e. there is no projection to do after + post_mutate <- empty_named_list() for (i in seq_along(exprs)) { # Iterate over the indices and not the names because names may be repeated # (which overwrites the previous name) name <- names(exprs)[i] - ..post_mutate[[name]] <- summarize_eval(name, exprs[[i]], mask) + post_mutate[[name]] <- summarize_eval(name, exprs[[i]], mask) } # Apply the results to the .data object. # First, the aggregations - .data$aggregations <- ..aggregations + .data$aggregations <- mask$.aggregations # Then collapse the query so that the resulting query object can have # additional operations applied to it out <- collapse.arrow_dplyr_query(.data) - # Now, add the projections in ..post_mutate (if any) - for (post in names(..post_mutate)) { + # Now, add the projections in post_mutate (if any) + for (post in names(post_mutate)) { # One last check: it's possible that an expression like y - mean(y) would # successfully evaluate, but it's not supported. It gets transformed to: # nolint start @@ -121,7 +119,7 @@ do_arrow_summarize <- function(.data, ..., .groups = NULL) { # We can tell the expression is invalid if it references fields not in # the schema of the data after summarize(). Evaulating its type will # throw an error if it's invalid. - tryCatch(..post_mutate[[post]]$type(out$.data$schema), error = function(e) { + tryCatch(post_mutate[[post]]$type(out$.data$schema), error = function(e) { msg <- paste( "Expression", as_label(exprs[[post]]), "is not a valid aggregation expression or is" @@ -129,7 +127,7 @@ do_arrow_summarize <- function(.data, ..., .groups = NULL) { arrow_not_supported(msg) }) # If it's valid, add it to the .data object - out$selected_columns[[post]] <- ..post_mutate[[post]] + out$selected_columns[[post]] <- post_mutate[[post]] } # Make sure column order is correct (and also drop ..temp columns) @@ -266,10 +264,10 @@ format_aggregation <- function(x) { # This function evaluates an expression and returns the post-summarize # projection that results, or NULL if there is none because the top-level # expression was an aggregation. Any aggregations are pulled out and collected -# in the ..aggregations list outside this function. +# in the .aggregations list outside this function. summarize_eval <- function(name, quosure, mask) { # Add previous aggregations to the mask, so they can be referenced - for (n in names(get("..aggregations", parent.frame()))) { + for (n in names(mask$.aggregations)) { mask[[n]] <- mask$.data[[n]] <- Expression$field_ref(n) } # Evaluate: @@ -286,14 +284,11 @@ summarize_eval <- function(name, quosure, mask) { # Handle case where outer expr is ..temp field ref. This came from an # aggregation at the top level. So the resulting name should be `name`. # not `..tempN`. Rename the corresponding aggregation. - post_aggs <- get("..aggregations", parent.frame()) result_field_name <- value$field_name - if (result_field_name %in% names(post_aggs)) { + if (result_field_name %in% names(mask$.aggregations)) { # Do this by assigning over `name` in case something else was in `name` - post_aggs[[name]] <- post_aggs[[result_field_name]] - post_aggs[[result_field_name]] <- NULL - # Assign back into the parent environment - assign("..aggregations", post_aggs, parent.frame()) + mask$.aggregations[[name]] <- mask$.aggregations[[result_field_name]] + mask$.aggregations[[result_field_name]] <- NULL # Return NULL because there is no post-mutate projection, it's just # the aggregation return(NULL) From e3db586eb343e80dce58d8cbf6eef91aba14dfff Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Apr 2024 11:33:30 -0400 Subject: [PATCH 016/261] MINOR: [Go] Bump github.com/apache/thrift from 0.19.0 to 0.20.0 in /go (#40777) Bumps [github.com/apache/thrift](https://github.com/apache/thrift) from 0.19.0 to 0.20.0.
Release notes

Sourced from github.com/apache/thrift's releases.

Version 0.20.0

Please head over to the official release download source: http://thrift.apache.org/download

The assets listed below are added by Github based on the release tag and they will therefore not match the checkums published on the Thrift project website.

Changelog

Sourced from github.com/apache/thrift's changelog.

0.20.0

Known Open Issues (Blocker or Critical)

  • THRIFT-3877 - C++ library don't work with HTTP (csharp server, cpp client; need cross test enhancement)
  • THRIFT-5468 - Swift service generator doesn't support oneway
  • THRIFT-5654 - LNK4042 and LNK2019 in go_validator_generator.cc

Build Process

C++

Compiler (General)

Delphi

Documentation

Erlang

Go

Haxe

Java

netstd

... (truncated)

Commits
  • 0d0ac75 Preparing 0.20.0
  • 417eafd Add license header to pypi workflow file
  • 9a253e7 THRIFT-5688: Add PyPI publishing github actions
  • dd1217f THRIFT-5745: Implement slog.LogValuer on go TStructs
  • ab6f3ef THRIFT-5744: Switch to slog for go library
  • 318731b Updated manual version info and CHANGES
  • 0c637d7 Bump com.diffplug.spotless from 6.23.3 to 6.25.0 in /lib/java
  • 9d96f20 Fix ambigous typescript definitions
  • 7d4c7fa THRIFT-5750 deprecate "ansistr_binary_" option
  • 7507fa0 THRIFT-5753 PHP 8.1 deprecated warning about return type in jsonSerialize fun...
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/apache/thrift&package-manager=go_modules&previous-version=0.19.0&new-version=0.20.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Matt Topol --- go/go.mod | 2 +- go/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go/go.mod b/go/go.mod index 972940ee3c299..79c3cc3981231 100644 --- a/go/go.mod +++ b/go/go.mod @@ -21,7 +21,7 @@ go 1.21 require ( github.com/JohnCGriffin/overflow v0.0.0-20211019200055-46fa312c352c github.com/andybalholm/brotli v1.1.0 - github.com/apache/thrift v0.19.0 + github.com/apache/thrift v0.20.0 github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815 github.com/goccy/go-json v0.10.2 github.com/golang/snappy v0.0.4 diff --git a/go/go.sum b/go/go.sum index 0a45cb751f77e..e8c2fde15181a 100644 --- a/go/go.sum +++ b/go/go.sum @@ -8,8 +8,8 @@ github.com/alecthomas/repr v0.2.0 h1:HAzS41CIzNW5syS8Mf9UwXhNH1J9aix/BvDRf1Ml2Yk github.com/alecthomas/repr v0.2.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4= github.com/andybalholm/brotli v1.1.0 h1:eLKJA0d02Lf0mVpIDgYnqXcUn0GqVmEFny3VuID1U3M= github.com/andybalholm/brotli v1.1.0/go.mod h1:sms7XGricyQI9K10gOSf56VKKWS4oLer58Q+mhRPtnY= -github.com/apache/thrift v0.19.0 h1:sOqkWPzMj7w6XaYbJQG7m4sGqVolaW/0D28Ln7yPzMk= -github.com/apache/thrift v0.19.0/go.mod h1:SUALL216IiaOw2Oy+5Vs9lboJ/t9g40C+G07Dc0QC1I= +github.com/apache/thrift v0.20.0 h1:631+KvYbsBZxmuJjYwhezVsrfc/TbqtZV4QcxOX1fOI= +github.com/apache/thrift v0.20.0/go.mod h1:hOk1BQqcp2OLzGsyVXdfMk7YFlMxK3aoEVhjD06QhB8= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= From 00df70c6dca6b7cf9a274e131ea88ed588133aec Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Mon, 29 Apr 2024 19:25:35 +0100 Subject: [PATCH 017/261] GH-41398: [R][CI] Windows job failing after R 4.4 release (#41409) ### Rationale for this change We can't throw warnings on cran. ### What changes are included in this PR? Update function to match changes in libarrow added in GH-39864 ### Are these changes tested? CI ### Are there any user-facing changes? No * GitHub Issue: #41398 Authored-by: Jacob Wujciak-Jens Signed-off-by: Jacob Wujciak-Jens --- r/src/extension-impl.cpp | 8 +++++++- r/src/extension.h | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/r/src/extension-impl.cpp b/r/src/extension-impl.cpp index a13b252b2832f..14c771cc98e4f 100644 --- a/r/src/extension-impl.cpp +++ b/r/src/extension-impl.cpp @@ -87,7 +87,9 @@ arrow::Result> RExtensionType::Deserialize( return std::shared_ptr(cloned.release()); } -std::string RExtensionType::ToString() const { +std::string RExtensionType::ToString() const { return ToString(false); } + +std::string RExtensionType::ToString(bool show_metadata) const { arrow::Result result = SafeCallIntoR([&]() { cpp11::environment instance = r6_instance(); cpp11::function instance_ToString(instance["ToString"]); @@ -98,7 +100,11 @@ std::string RExtensionType::ToString() const { // In the event of an error (e.g., we are not on the main thread // and we are not inside RunWithCapturedR()), just call the default method if (!result.ok()) { +#if ARROW_VERSION_MAJOR >= 16 + return ExtensionType::ToString(show_metadata); +#else return ExtensionType::ToString(); +#endif } else { return result.ValueUnsafe(); } diff --git a/r/src/extension.h b/r/src/extension.h index fbd3ad484691a..6e6c6f7c29761 100644 --- a/r/src/extension.h +++ b/r/src/extension.h @@ -52,6 +52,8 @@ class RExtensionType : public arrow::ExtensionType { std::string Serialize() const { return extension_metadata_; } + std::string ToString(bool show_metadata = false) const; + // wrapper for libarrow < 16 std::string ToString() const; cpp11::sexp Convert(const std::shared_ptr& array) const; From 2ef4059566eb3dfc5cceb85d8ea8fa83e33234bb Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Mon, 29 Apr 2024 17:19:41 -0400 Subject: [PATCH 018/261] GH-29537: [R] Support mutate/summarize with implicit join (#41350) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change Since it doesn't look like Acero will be getting window functions any time soon, implement support in `mutate()` for transformations that involve aggregations, like `x - mean(x)`, via left_join. ### What changes are included in this PR? Following #41223, I realized I could reuse that evaluation path in `mutate()`. Evaluating expressions accumulates `..aggregations` and `mutate_stuff`; in summarize() we apply aggregations and then mutate on the result. If expressions in the `mutate_stuff` reference columns in the original data and not just the result of aggregations, we reject it. Here, if there are aggregations, we apply them on a copy of the query up to that point, and join the result back onto the query, then apply the mutations on that. It's not a problem for those mutate expressions to reference both columns in the original data and the results of the aggregations because both are present. There are ~three~ two caveats: * Join has non-deterministic order, so while `mutate()` doesn't generally affect row order, if this code path is activated, row order may not be stable. With datasets, it's not guaranteed anyway. * ~Acero's join seems to have a limitation currently where missing values are not joined to each other. If your join key has NA in it, and you do a left_join, your new columns will all be NA, even if there is a corresponding value in the right dataset. I made https://github.com/apache/arrow/issues/41358 to address that, and in the meantime, I've added a workaround (https://github.com/apache/arrow/pull/41350/commits/b9de50452e926fe5f39aeb3887a04e203302b960) that's not awesome but has the right behavior.~ Fixed and rebased. * I believe it is possible in dplyr to get this behavior in other verbs: filter, arrange, even summarize. I've only done this for mutate. Are we ok with that? ### Are these changes tested? Yes ### Are there any user-facing changes? This works now: ``` r library(arrow) library(dplyr) mtcars |> arrow_table() |> select(cyl, mpg, hp) |> group_by(cyl) |> mutate(stdize_mpg = (mpg - mean(mpg)) / sd(mpg)) |> collect() #> # A tibble: 32 × 4 #> # Groups: cyl [3] #> cyl mpg hp stdize_mpg #> #> 1 6 21 110 0.865 #> 2 6 21 110 0.865 #> 3 4 22.8 93 -0.857 #> 4 6 21.4 110 1.14 #> 5 8 18.7 175 1.41 #> 6 6 18.1 105 -1.13 #> 7 8 14.3 245 -0.312 #> 8 4 24.4 62 -0.502 #> 9 4 22.8 95 -0.857 #> 10 6 19.2 123 -0.373 #> # ℹ 22 more rows ``` Created on 2024-04-23 with [reprex v2.1.0](https://reprex.tidyverse.org) * GitHub Issue: #29537 --- r/R/arrow-package.R | 5 +-- r/R/dplyr-funcs-agg.R | 1 - r/R/dplyr-funcs-doc.R | 2 +- r/R/dplyr-mutate.R | 39 ++++++++++++------ r/man/acero.Rd | 2 +- r/tests/testthat/test-dataset-dplyr.R | 11 ------ r/tests/testthat/test-dplyr-mutate.R | 57 ++++++++++++--------------- r/vignettes/data_wrangling.Rmd | 28 +------------ 8 files changed, 58 insertions(+), 87 deletions(-) diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index 7087a40c4903a..44dfbbcd5c7e7 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -48,10 +48,7 @@ supported_dplyr_methods <- list( group_vars = NULL, group_by_drop_default = NULL, ungroup = NULL, - mutate = c( - "window functions (e.g. things that require aggregation within groups)", - "not currently supported" - ), + mutate = NULL, transmute = NULL, arrange = NULL, rename = NULL, diff --git a/r/R/dplyr-funcs-agg.R b/r/R/dplyr-funcs-agg.R index d84f8f28f0dff..9411ce5ce6faf 100644 --- a/r/R/dplyr-funcs-agg.R +++ b/r/R/dplyr-funcs-agg.R @@ -164,7 +164,6 @@ set_agg <- function(...) { # like sum(x - mean(x)), i.e. window functions. # This will reject (sum(sum(x)) as well, but that's not a useful operation. if (any(expr$field_names_in_expression() %in% names(aggs))) { - # TODO: support in ARROW-13926 arrow_not_supported("aggregate within aggregate expression") } }) diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R index fda77bca83fc2..7f0627c33d010 100644 --- a/r/R/dplyr-funcs-doc.R +++ b/r/R/dplyr-funcs-doc.R @@ -53,7 +53,7 @@ #' * [`groups()`][dplyr::groups()] #' * [`inner_join()`][dplyr::inner_join()]: the `copy` argument is ignored #' * [`left_join()`][dplyr::left_join()]: the `copy` argument is ignored -#' * [`mutate()`][dplyr::mutate()]: window functions (e.g. things that require aggregation within groups) not currently supported +#' * [`mutate()`][dplyr::mutate()] #' * [`pull()`][dplyr::pull()]: the `name` argument is not supported; returns an R vector by default but this behavior is deprecated and will return an Arrow [ChunkedArray] in a future release. Provide `as_vector = TRUE/FALSE` to control this behavior, or set `options(arrow.pull_as_vector)` globally. #' * [`relocate()`][dplyr::relocate()] #' * [`rename()`][dplyr::rename()] diff --git a/r/R/dplyr-mutate.R b/r/R/dplyr-mutate.R index 287532dee08a9..880f7799e6316 100644 --- a/r/R/dplyr-mutate.R +++ b/r/R/dplyr-mutate.R @@ -45,17 +45,11 @@ mutate.arrow_dplyr_query <- function(.data, return(out) } - # Restrict the cases we support for now - has_aggregations <- any(unlist(lapply(exprs, all_funs)) %in% names(agg_funcs)) - if (has_aggregations) { - # ARROW-13926 - # mutate() on a grouped dataset does calculations within groups - # This doesn't matter on scalar ops (arithmetic etc.) but it does - # for things with aggregations (e.g. subtracting the mean) - return(abandon_ship(call, .data, "window functions not currently supported in Arrow")) - } - - mask <- arrow_mask(out) + # Create a mask with aggregation functions in it + # If there are any aggregations, we will need to compute them and + # and join the results back in, for "window functions" like x - mean(x) + mask <- arrow_mask(out, aggregation = TRUE) + # Evaluate the mutate expressions results <- list() for (i in seq_along(exprs)) { # Iterate over the indices and not the names because names may be repeated @@ -81,6 +75,24 @@ mutate.arrow_dplyr_query <- function(.data, mask[[new_var]] <- mask$.data[[new_var]] <- results[[new_var]] } + if (length(mask$.aggregations)) { + # Make a copy of .data, do the aggregations on it, and then left_join on + # the group_by variables. + agg_query <- as_adq(.data) + # These may be computed by .by, make sure they're set + agg_query$group_by_vars <- grv + agg_query$aggregations <- mask$.aggregations + agg_query <- collapse.arrow_dplyr_query(agg_query) + if (length(grv)) { + out <- left_join(out, agg_query, by = grv) + } else { + # If there are no group_by vars, add a scalar column to both and join on that + agg_query$selected_columns[["..tempjoin"]] <- Expression$scalar(1L) + out$selected_columns[["..tempjoin"]] <- Expression$scalar(1L) + out <- left_join(out, agg_query, by = "..tempjoin") + } + } + old_vars <- names(out$selected_columns) # Note that this is names(exprs) not names(results): # if results$new_var is NULL, that means we are supposed to remove it @@ -91,6 +103,11 @@ mutate.arrow_dplyr_query <- function(.data, out$selected_columns[[new_var]] <- results[[new_var]] } + # Prune any ..temp columns from the result, which would have come from + # .aggregations + temps <- grepl("^\\.\\.temp", names(out$selected_columns)) + out$selected_columns <- out$selected_columns[!temps] + # Deduplicate new_vars and remove NULL columns from new_vars new_vars <- intersect(union(new_vars, grv), names(out$selected_columns)) diff --git a/r/man/acero.Rd b/r/man/acero.Rd index ca51ef56334eb..9ef9cd7dda6fb 100644 --- a/r/man/acero.Rd +++ b/r/man/acero.Rd @@ -40,7 +40,7 @@ Table into an R \code{tibble}. \item \code{\link[dplyr:group_data]{groups()}} \item \code{\link[dplyr:mutate-joins]{inner_join()}}: the \code{copy} argument is ignored \item \code{\link[dplyr:mutate-joins]{left_join()}}: the \code{copy} argument is ignored -\item \code{\link[dplyr:mutate]{mutate()}}: window functions (e.g. things that require aggregation within groups) not currently supported +\item \code{\link[dplyr:mutate]{mutate()}} \item \code{\link[dplyr:pull]{pull()}}: the \code{name} argument is not supported; returns an R vector by default but this behavior is deprecated and will return an Arrow \link{ChunkedArray} in a future release. Provide \code{as_vector = TRUE/FALSE} to control this behavior, or set \code{options(arrow.pull_as_vector)} globally. \item \code{\link[dplyr:relocate]{relocate()}} \item \code{\link[dplyr:rename]{rename()}} diff --git a/r/tests/testthat/test-dataset-dplyr.R b/r/tests/testthat/test-dataset-dplyr.R index b8d93841921d7..1e36ea8bd4966 100644 --- a/r/tests/testthat/test-dataset-dplyr.R +++ b/r/tests/testthat/test-dataset-dplyr.R @@ -163,17 +163,6 @@ See $.data for the source Arrow object", ) }) -test_that("mutate() features not yet implemented", { - ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8())) - expect_error( - ds %>% - group_by(int) %>% - mutate(avg = mean(int)), - "window functions not currently supported in Arrow\nCall collect() first to pull data into R.", - fixed = TRUE - ) -}) - test_that("filter scalar validation doesn't crash (ARROW-7772)", { ds <- open_dataset(dataset_dir, partitioning = schema(part = uint8())) expect_error( diff --git a/r/tests/testthat/test-dplyr-mutate.R b/r/tests/testthat/test-dplyr-mutate.R index 0889fffedd508..71c1e52d33c1d 100644 --- a/r/tests/testthat/test-dplyr-mutate.R +++ b/r/tests/testthat/test-dplyr-mutate.R @@ -378,18 +378,16 @@ test_that("dplyr::mutate's examples", { # The mutate operation may yield different results on grouped # tibbles because the expressions are computed within groups. # The following normalises `mass` by the global average: - # TODO(ARROW-13926): support window functions compare_dplyr_binding( .input %>% select(name, mass, species) %>% mutate(mass_norm = mass / mean(mass, na.rm = TRUE)) %>% collect(), - starwars, - warning = "window function" + starwars ) }) -test_that("Can mutate after group_by as long as there are no aggregations", { +test_that("Can mutate after group_by, including with some aggregations", { compare_dplyr_binding( .input %>% select(int, chr) %>% @@ -417,31 +415,31 @@ test_that("Can mutate after group_by as long as there are no aggregations", { collect(), tbl ) - expect_warning( - tbl %>% - Table$create() %>% + compare_dplyr_binding( + .input %>% select(int, chr) %>% group_by(chr) %>% mutate(avg_int = mean(int)) %>% + # Because this silently does a join, the rows can get unsorted + arrange(chr) %>% collect(), - "window functions not currently supported in Arrow; pulling data into R", - fixed = TRUE + tbl ) - expect_warning( - tbl %>% - Table$create() %>% + compare_dplyr_binding( + .input %>% select(mean = int, chr) %>% # rename `int` to `mean` and use `mean(mean)` in `mutate()` to test that # `all_funs()` detects `mean()` despite the collision with a column name group_by(chr) %>% mutate(avg_int = mean(mean)) %>% + # Because this silently does a join, the rows can get unsorted + arrange(chr) %>% collect(), - "window functions not currently supported in Arrow; pulling data into R", - fixed = TRUE + tbl ) }) -test_that("Can mutate with .by argument as long as there are no aggregations", { +test_that("Can mutate with .by argument, even with some aggregations", { compare_dplyr_binding( .input %>% select(int, chr) %>% @@ -479,25 +477,25 @@ test_that("Can mutate with .by argument as long as there are no aggregations", { collect(), tbl ) - expect_warning( - tbl %>% - Table$create() %>% + compare_dplyr_binding( + .input %>% select(int, chr) %>% mutate(avg_int = mean(int), .by = chr) %>% + # Because this silently does a join, the rows can get unsorted + arrange(chr) %>% collect(), - "window functions not currently supported in Arrow; pulling data into R", - fixed = TRUE + tbl ) - expect_warning( - tbl %>% - Table$create() %>% + compare_dplyr_binding( + .input %>% select(mean = int, chr) %>% # rename `int` to `mean` and use `mean(mean)` in `mutate()` to test that # `all_funs()` detects `mean()` despite the collision with a column name mutate(avg_int = mean(mean), .by = chr) %>% + # Because this silently does a join, the rows can get unsorted + arrange(chr) %>% collect(), - "window functions not currently supported in Arrow; pulling data into R", - fixed = TRUE + tbl ) }) @@ -682,7 +680,6 @@ test_that("mutate() and transmute() with namespaced functions", { }) test_that("Can use across() within mutate()", { - # expressions work in the right order compare_dplyr_binding( .input %>% @@ -717,17 +714,15 @@ test_that("Can use across() within mutate()", { example_data ) - # gives the right error with window functions - expect_warning( - arrow_table(example_data) %>% + compare_dplyr_binding( + .input %>% mutate( x = int + 2, across(c("int", "dbl"), list(mean = mean, sd = sd, round)), exp(dbl2) ) %>% collect(), - "window functions not currently supported in Arrow; pulling data into R", - fixed = TRUE + example_data ) }) diff --git a/r/vignettes/data_wrangling.Rmd b/r/vignettes/data_wrangling.Rmd index 305a91c156eb1..1d074ef0cfedb 100644 --- a/r/vignettes/data_wrangling.Rmd +++ b/r/vignettes/data_wrangling.Rmd @@ -165,33 +165,7 @@ sw2 %>% transmute(name, height, mass, res = residuals(lm(mass ~ height))) ``` -Because window functions are not supported, computing an aggregation like `mean()` on a grouped table or within a rowwise operation like `filter()` is not supported: - -```{r} -sw %>% - select(1:4) %>% - filter(!is.na(hair_color)) %>% - group_by(hair_color) %>% - filter(height < mean(height, na.rm = TRUE)) -``` - -This operation is sometimes referred to as a windowed aggregate and can be accomplished in Arrow by computing the aggregation separately, for example within a join operation: - -```{r} -sw %>% - select(1:4) %>% - filter(!is.na(hair_color)) %>% - left_join( - sw %>% - group_by(hair_color) %>% - summarize(mean_height = mean(height, na.rm = TRUE)) - ) %>% - filter(height < mean_height) %>% - select(!mean_height) %>% - collect() -``` - -Alternatively, [DuckDB](https:\www.duckdb.org) supports Arrow natively, so you can pass the `Table` object to DuckDB without paying a performance penalty using the helper function `to_duckdb()` and pass the object back to Arrow with `to_arrow()`: +For some operations, you can use [DuckDB](https://www.duckdb.org). It supports Arrow natively, so you can pass the `Dataset` or query object to DuckDB without paying a performance penalty using the helper function `to_duckdb()` and pass the object back to Arrow with `to_arrow()`: ```{r} sw %>% From d60ff53394788aef9a6070dfdf46a2bcade128ad Mon Sep 17 00:00:00 2001 From: David Li Date: Tue, 30 Apr 2024 08:46:26 +0900 Subject: [PATCH 019/261] GH-41427: [Go] Fix stateless prepared statements (#41428) ### Rationale for this change Stateless prepared statements didn't actually work ### What changes are included in this PR? Update the handle after binding parameters ### Are these changes tested? Yes ### Are there any user-facing changes? No * GitHub Issue: #41427 Authored-by: David Li Signed-off-by: David Li --- go/arrow/flight/flightsql/client.go | 93 ++++++++++-------------- go/arrow/flight/flightsql/client_test.go | 10 +-- 2 files changed, 45 insertions(+), 58 deletions(-) diff --git a/go/arrow/flight/flightsql/client.go b/go/arrow/flight/flightsql/client.go index e594191c35fdf..c6794820dc172 100644 --- a/go/arrow/flight/flightsql/client.go +++ b/go/arrow/flight/flightsql/client.go @@ -1119,24 +1119,10 @@ func (p *PreparedStatement) Execute(ctx context.Context, opts ...grpc.CallOption return nil, err } - if p.hasBindParameters() { - pstream, err := p.client.Client.DoPut(ctx, opts...) - if err != nil { - return nil, err - } - wr, err := p.writeBindParameters(pstream, desc) - if err != nil { - return nil, err - } - if err = wr.Close(); err != nil { - return nil, err - } - pstream.CloseSend() - if err = p.captureDoPutPreparedStatementHandle(pstream); err != nil { - return nil, err - } + desc, err = p.bindParameters(ctx, desc, opts...) + if err != nil { + return nil, err } - return p.client.getFlightInfo(ctx, desc, opts...) } @@ -1156,23 +1142,9 @@ func (p *PreparedStatement) ExecutePut(ctx context.Context, opts ...grpc.CallOpt return err } - if p.hasBindParameters() { - pstream, err := p.client.Client.DoPut(ctx, opts...) - if err != nil { - return err - } - - wr, err := p.writeBindParameters(pstream, desc) - if err != nil { - return err - } - if err = wr.Close(); err != nil { - return err - } - pstream.CloseSend() - if err = p.captureDoPutPreparedStatementHandle(pstream); err != nil { - return err - } + _, err = p.bindParameters(ctx, desc, opts...) + if err != nil { + return err } return nil @@ -1200,23 +1172,9 @@ func (p *PreparedStatement) ExecutePoll(ctx context.Context, retryDescriptor *fl } if retryDescriptor == nil { - if p.hasBindParameters() { - pstream, err := p.client.Client.DoPut(ctx, opts...) - if err != nil { - return nil, err - } - - wr, err := p.writeBindParameters(pstream, desc) - if err != nil { - return nil, err - } - if err = wr.Close(); err != nil { - return nil, err - } - pstream.CloseSend() - if err = p.captureDoPutPreparedStatementHandle(pstream); err != nil { - return nil, err - } + desc, err = p.bindParameters(ctx, desc, opts...) + if err != nil { + return nil, err } } return p.client.Client.PollFlightInfo(ctx, desc, opts...) @@ -1248,7 +1206,7 @@ func (p *PreparedStatement) ExecuteUpdate(ctx context.Context, opts ...grpc.Call return } if p.hasBindParameters() { - wr, err = p.writeBindParameters(pstream, desc) + wr, err = p.writeBindParametersToStream(pstream, desc) if err != nil { return } @@ -1283,7 +1241,36 @@ func (p *PreparedStatement) hasBindParameters() bool { return (p.paramBinding != nil && p.paramBinding.NumRows() > 0) || (p.streamBinding != nil) } -func (p *PreparedStatement) writeBindParameters(pstream pb.FlightService_DoPutClient, desc *pb.FlightDescriptor) (*flight.Writer, error) { +func (p *PreparedStatement) bindParameters(ctx context.Context, desc *pb.FlightDescriptor, opts ...grpc.CallOption) (*flight.FlightDescriptor, error) { + if p.hasBindParameters() { + pstream, err := p.client.Client.DoPut(ctx, opts...) + if err != nil { + return nil, err + } + wr, err := p.writeBindParametersToStream(pstream, desc) + if err != nil { + return nil, err + } + if err = wr.Close(); err != nil { + return nil, err + } + pstream.CloseSend() + if err = p.captureDoPutPreparedStatementHandle(pstream); err != nil { + return nil, err + } + + cmd := pb.CommandPreparedStatementQuery{PreparedStatementHandle: p.handle} + desc, err = descForCommand(&cmd) + if err != nil { + return nil, err + } + return desc, nil + } + return desc, nil +} + +// XXX: this does not capture the updated handle. Prefer bindParameters. +func (p *PreparedStatement) writeBindParametersToStream(pstream pb.FlightService_DoPutClient, desc *pb.FlightDescriptor) (*flight.Writer, error) { if p.paramBinding != nil { wr := flight.NewRecordWriter(pstream, ipc.WithSchema(p.paramBinding.Schema())) wr.SetFlightDescriptor(desc) diff --git a/go/arrow/flight/flightsql/client_test.go b/go/arrow/flight/flightsql/client_test.go index 727fe02aa7063..33da79167c4ae 100644 --- a/go/arrow/flight/flightsql/client_test.go +++ b/go/arrow/flight/flightsql/client_test.go @@ -448,9 +448,9 @@ func (s *FlightSqlClientSuite) TestPreparedStatementExecuteParamBinding() { expectedDesc := getDesc(&pb.CommandPreparedStatementQuery{PreparedStatementHandle: []byte(handle)}) // mocked DoPut result - doPutPreparedStatementResult := &pb.DoPutPreparedStatementResult{PreparedStatementHandle: []byte(updatedHandle)} + doPutPreparedStatementResult := &pb.DoPutPreparedStatementResult{PreparedStatementHandle: []byte(updatedHandle)} resdata, _ := proto.Marshal(doPutPreparedStatementResult) - putResult := &pb.PutResult{ AppMetadata: resdata } + putResult := &pb.PutResult{AppMetadata: resdata} // mocked client stream for DoPut mockedPut := &mockDoPutClient{} @@ -461,7 +461,7 @@ func (s *FlightSqlClientSuite) TestPreparedStatementExecuteParamBinding() { mockedPut.On("CloseSend").Return(nil) mockedPut.On("Recv").Return(putResult, nil) - infoCmd := &pb.CommandPreparedStatementQuery{PreparedStatementHandle: []byte(handle)} + infoCmd := &pb.CommandPreparedStatementQuery{PreparedStatementHandle: []byte(updatedHandle)} desc := getDesc(infoCmd) s.mockClient.On("GetFlightInfo", desc.Type, desc.Cmd, s.callOpts).Return(&emptyFlightInfo, nil) @@ -525,9 +525,9 @@ func (s *FlightSqlClientSuite) TestPreparedStatementExecuteReaderBinding() { expectedDesc := getDesc(&pb.CommandPreparedStatementQuery{PreparedStatementHandle: []byte(query)}) // mocked DoPut result - doPutPreparedStatementResult := &pb.DoPutPreparedStatementResult{PreparedStatementHandle: []byte(query)} + doPutPreparedStatementResult := &pb.DoPutPreparedStatementResult{PreparedStatementHandle: []byte(query)} resdata, _ := proto.Marshal(doPutPreparedStatementResult) - putResult := &pb.PutResult{ AppMetadata: resdata } + putResult := &pb.PutResult{AppMetadata: resdata} // mocked client stream for DoPut mockedPut := &mockDoPutClient{} From 6dc662324c2c46a7b5066b91cd0ace93a275ecf7 Mon Sep 17 00:00:00 2001 From: DenisTarasyuk <131180287+DenisTarasyuk@users.noreply.github.com> Date: Tue, 30 Apr 2024 03:59:51 +0300 Subject: [PATCH 020/261] GH-41433: [C++][Gandiva] Fix ascii_utf8 function to return same result on x86 and Arm (#41434) ### Rationale for this change Fixing ascii_utf8 function that has different return result on x86 and Arm due to default char type sign difference on those platforms. Added tests to cover existing x86 behavior for ascii symbols with code >127. ### What changes are included in this PR? 1. Added type cast to signed char to save existing x86 behavior on Arm platform. 2. Added tests cases for negative results. ### Are these changes tested? UT included. ### Are there any user-facing changes? None * GitHub Issue: #41433 Authored-by: DenisTarasyuk Signed-off-by: Sutou Kouhei --- cpp/src/gandiva/precompiled/string_ops.cc | 2 +- cpp/src/gandiva/precompiled/string_ops_test.cc | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/src/gandiva/precompiled/string_ops.cc b/cpp/src/gandiva/precompiled/string_ops.cc index 5aa0eb38eafd7..3849cf7bdf9a5 100644 --- a/cpp/src/gandiva/precompiled/string_ops.cc +++ b/cpp/src/gandiva/precompiled/string_ops.cc @@ -1377,7 +1377,7 @@ gdv_int32 ascii_utf8(const char* data, gdv_int32 data_len) { if (data_len == 0) { return 0; } - return static_cast(data[0]); + return static_cast(static_cast(data[0])); } // Returns the ASCII character having the binary equivalent to A. diff --git a/cpp/src/gandiva/precompiled/string_ops_test.cc b/cpp/src/gandiva/precompiled/string_ops_test.cc index 89213592e7ea2..aaa25db0a9f8d 100644 --- a/cpp/src/gandiva/precompiled/string_ops_test.cc +++ b/cpp/src/gandiva/precompiled/string_ops_test.cc @@ -51,6 +51,8 @@ TEST(TestStringOps, TestAscii) { EXPECT_EQ(ascii_utf8("", 0), 0); EXPECT_EQ(ascii_utf8("123", 3), 49); EXPECT_EQ(ascii_utf8("999", 3), 57); + EXPECT_EQ(ascii_utf8("\x80", 1), -128); + EXPECT_EQ(ascii_utf8("\xFF", 1), -1); } TEST(TestStringOps, TestChrBigInt) { From 747c8a28306f1e14439cf374b04cb8ed68e08cd2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Apr 2024 19:29:36 -0700 Subject: [PATCH 021/261] MINOR: [C#] Bump xunit.runner.visualstudio from 2.5.8 to 2.8.0 in /csharp (#41441) Bumps [xunit.runner.visualstudio](https://github.com/xunit/visualstudio.xunit) from 2.5.8 to 2.8.0.
Commits
  • 6438bb8 v2.8.0
  • 2afd4cd Pick up latest dependencies
  • b8be108 Add multiplier format support to RunSettings
  • 3c2e493 Update to 2.7.2-pre.17 and support Xunit.ParallelAlgorithm in RunSetttings
  • 144931e Missing height on version
  • 4315921 Fix concurrency bug in AssemblyHelper (#407)
  • 8617393 Bump up to 2.5.9-pre
  • See full diff in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=xunit.runner.visualstudio&package-manager=nuget&previous-version=2.5.8&new-version=2.8.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Curt Hagenlocher --- .../Apache.Arrow.Compression.Tests.csproj | 2 +- .../Apache.Arrow.Flight.Sql.Tests.csproj | 2 +- .../Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj | 2 +- csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj index b386ccf79c12c..df53da2098509 100644 --- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj +++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj @@ -9,7 +9,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj index ae6f9f1e69667..65b4ac027e29f 100644 --- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj +++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj @@ -8,7 +8,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj index ed158ca8656d3..cde2004e8e48d 100644 --- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj +++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj @@ -8,7 +8,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj index 06fb44e0a0e88..491a0c087b1cd 100644 --- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj +++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj @@ -18,7 +18,7 @@ - + all runtime; build; native; contentfiles; analyzers From 131dbd60b52d595583aae3c883fbddce26199d68 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Apr 2024 19:37:41 -0700 Subject: [PATCH 022/261] MINOR: [C#] Bump ZstdSharp.Port and System.Runtime.CompilerServices.Unsafe in /csharp (#41440) Bumps [ZstdSharp.Port](https://github.com/oleg-st/ZstdSharp) and [System.Runtime.CompilerServices.Unsafe](https://github.com/dotnet/runtime). These dependencies needed to be updated together. Updates `ZstdSharp.Port` from 0.7.6 to 0.8.0
Release notes

Sourced from ZstdSharp.Port's releases.

0.8.0

Ported zstd v1.5.6 Workaround for .NET Native

Commits

Updates `System.Runtime.CompilerServices.Unsafe` from 4.7.1 to 6.0.0
Release notes

Sourced from System.Runtime.CompilerServices.Unsafe's releases.

.NET 6.0

Release

.NET 6.0 RC 2

Release

.NET 6.0 RC 1

Release

.NET 6.0 Preview 7

Release

.NET 6.0 Preview 6

Release

.NET 6.0 Preview 5

Release

.NET 6.0 Preview 4

Release

.NET 6.0 Preview 3

Release

.NET 6.0 Preview 2

Release

.NET 6.0 Preview 1

Release

.NET 5.0.17

Release

.NET 5 is now out of support. We recommend using .NET 6.

.NET 5.0.16

Release

.NET 5.0.15

Release

.NET 5.0.14

Release

.NET 5.0.13

Release

.NET 5.0.11

Release

... (truncated)

Commits

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Curt Hagenlocher --- .../Apache.Arrow.Compression/Apache.Arrow.Compression.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj b/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj index 0ce8c89bb1d1b..c34d880f90060 100644 --- a/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj +++ b/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj @@ -13,7 +13,7 @@ - + From de37ee88690fc2ca8e48341d59e7dba327d8fe2c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 Apr 2024 20:12:54 -0700 Subject: [PATCH 023/261] MINOR: [C#] Bump xunit from 2.7.1 to 2.8.0 in /csharp (#41439) Bumps [xunit](https://github.com/xunit/xunit) from 2.7.1 to 2.8.0.
Commits
  • be260b3 v2.8.0
  • a8ceb66 #783: Add -useansicolor flag to console runner (v2)
  • 7b0ff93 Don't show /aggressive with unlimited threads
  • 46cdf06 Support parallel algorithm in MSBuild runner
  • b4aa876 Support multipler syntax in MSBuild runner
  • 6790b48 Add aggressive display to TestFrameworkEnvironment reported by XunitTestAssem...
  • 3dd7e91 Update mocks to make CollectionBehaviorAttribute property values optional
  • 4c82dea Asking for default threads should set 0, not null
  • d73cdef Should not try to use a semaphore when we've been asked for unlimited threads
  • 3722e54 Enable multiplier style max threads support
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=xunit&package-manager=nuget&previous-version=2.7.1&new-version=2.8.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Curt Hagenlocher --- .../Apache.Arrow.Compression.Tests.csproj | 2 +- .../Apache.Arrow.Flight.Sql.Tests.csproj | 2 +- .../Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj | 2 +- csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj index df53da2098509..2b1720561004e 100644 --- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj +++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj @@ -8,7 +8,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj index 65b4ac027e29f..c8fb40f2d6702 100644 --- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj +++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj @@ -7,7 +7,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj index cde2004e8e48d..ba60451f25f68 100644 --- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj +++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj @@ -7,7 +7,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj index 491a0c087b1cd..90b498d4e9b03 100644 --- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj +++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj @@ -17,7 +17,7 @@ - + all runtime; build; native; contentfiles; analyzers From e4f31462dbd668c3bcb6ce96442f3c1632c4d8c8 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Tue, 30 Apr 2024 06:38:40 +0200 Subject: [PATCH 024/261] GH-41317: [C++] Fix crash on invalid Parquet file (#41366) ### Rationale for this change Fixes the crash detailed in #41317 in TableBatchReader::ReadNext() on a corrupted Parquet file ### What changes are included in this PR? Add a validation that all read columns have the same size ### Are these changes tested? I've tested on the reproducer I provided in #41317 that it now triggers a clean error: ``` Traceback (most recent call last): File "test.py", line 3, in [_ for _ in parquet_file.iter_batches()] File "test.py", line 3, in [_ for _ in parquet_file.iter_batches()] File "pyarrow/_parquet.pyx", line 1587, in iter_batches File "pyarrow/error.pxi", line 91, in pyarrow.lib.check_status pyarrow.lib.ArrowInvalid: columns do not have the same size ``` I'm not sure if/how unit tests for corrupted datasets should be added ### Are there any user-facing changes? No **This PR contains a "Critical Fix".** * GitHub Issue: #41317 Authored-by: Even Rouault Signed-off-by: mwish --- cpp/src/arrow/table.cc | 2 ++ cpp/src/arrow/table.h | 2 ++ cpp/src/parquet/arrow/reader.cc | 10 ++++++++++ 3 files changed, 14 insertions(+) diff --git a/cpp/src/arrow/table.cc b/cpp/src/arrow/table.cc index 967e78f6b4db1..5dc5e4c1a9a8c 100644 --- a/cpp/src/arrow/table.cc +++ b/cpp/src/arrow/table.cc @@ -619,6 +619,7 @@ TableBatchReader::TableBatchReader(const Table& table) for (int i = 0; i < table.num_columns(); ++i) { column_data_[i] = table.column(i).get(); } + DCHECK(table_.Validate().ok()); } TableBatchReader::TableBatchReader(std::shared_ptr table) @@ -632,6 +633,7 @@ TableBatchReader::TableBatchReader(std::shared_ptr
table) for (int i = 0; i < owned_table_->num_columns(); ++i) { column_data_[i] = owned_table_->column(i).get(); } + DCHECK(table_.Validate().ok()); } std::shared_ptr TableBatchReader::schema() const { return table_.schema(); } diff --git a/cpp/src/arrow/table.h b/cpp/src/arrow/table.h index a7508430c132b..79675fa92b1f3 100644 --- a/cpp/src/arrow/table.h +++ b/cpp/src/arrow/table.h @@ -241,6 +241,8 @@ class ARROW_EXPORT Table { /// /// The conversion is zero-copy: each record batch is a view over a slice /// of the table's columns. +/// +/// The table is expected to be valid prior to using it with the batch reader. class ARROW_EXPORT TableBatchReader : public RecordBatchReader { public: /// \brief Construct a TableBatchReader for the given table diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc index d6ad7c25bc7c1..285e2a597389d 100644 --- a/cpp/src/parquet/arrow/reader.cc +++ b/cpp/src/parquet/arrow/reader.cc @@ -1043,6 +1043,16 @@ Status FileReaderImpl::GetRecordBatchReader(const std::vector& row_groups, } } + // Check all columns has same row-size + if (!columns.empty()) { + int64_t row_size = columns[0]->length(); + for (size_t i = 1; i < columns.size(); ++i) { + if (columns[i]->length() != row_size) { + return ::arrow::Status::Invalid("columns do not have the same size"); + } + } + } + auto table = ::arrow::Table::Make(batch_schema, std::move(columns)); auto table_reader = std::make_shared<::arrow::TableBatchReader>(*table); From 97e169a115bcf4e18fffd6c788f6fde648969664 Mon Sep 17 00:00:00 2001 From: Calvin Kirs Date: Tue, 30 Apr 2024 17:00:02 +0800 Subject: [PATCH 025/261] MINOR: [Java] Upgrade bcpkix-jdkon15 dependency to bcpkix-jdkon18 (#41458) ### Rationale for this change Since bcpkix-jdk15on is no longer being maintained and bcpkix-jdkon18 is fully compatible with it, we can replace bcpkix-jdk15on with bcpkix-jdkon18. This will ensure continued support and security for our applications. FYI: https://www.bouncycastle.org/latest_releases.html ### What changes are included in this PR? - Upgrade bcpkix-jdkon15 dependency to bcpkix-jdkon18 ### Are these changes tested? - yes We used the JcaPEMWriter class to convert certificates stored within a KeyStore object into PEM format and subsequently write them to a designated JcaPEMWriter object. Existing test suites provide comprehensive coverage for this functionality. Authored-by: Calvin Kirs Signed-off-by: David Li --- java/flight/flight-sql-jdbc-core/pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/java/flight/flight-sql-jdbc-core/pom.xml b/java/flight/flight-sql-jdbc-core/pom.xml index 2e0de90fcf8bc..ef3f2469b73dd 100644 --- a/java/flight/flight-sql-jdbc-core/pom.xml +++ b/java/flight/flight-sql-jdbc-core/pom.xml @@ -126,8 +126,8 @@ org.bouncycastle - bcpkix-jdk15on - 1.70 + bcpkix-jdk18on + 1.78.1 From b609de374c7c00e1537eb8092e1ff2db718d2b61 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Tue, 30 Apr 2024 13:42:31 +0200 Subject: [PATCH 026/261] GH-40342: [Python] Fix pickling of LocalFileSystem for cython 2 (#41459) Small follow-up fix for the failure introduced by https://github.com/apache/arrow/pull/40356 * GitHub Issue: #40342 Authored-by: Joris Van den Bossche Signed-off-by: Joris Van den Bossche --- python/pyarrow/_fs.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/pyarrow/_fs.pyx b/python/pyarrow/_fs.pyx index 0e635b2c8a28a..dbfb6ed114553 100644 --- a/python/pyarrow/_fs.pyx +++ b/python/pyarrow/_fs.pyx @@ -18,6 +18,7 @@ # cython: language_level = 3 from cpython.datetime cimport datetime, PyDateTime_DateTime +from cython cimport binding from pyarrow.includes.common cimport * from pyarrow.includes.libarrow_python cimport PyDateTime_to_TimePoint @@ -421,6 +422,7 @@ cdef class FileSystem(_Weakrefable): "SubTreeFileSystem") @staticmethod + @binding(True) # Required for cython < 3 def _from_uri(uri): fs, _path = FileSystem.from_uri(uri) return fs From e22197f39e41446789dcc52e931995fe20a784a4 Mon Sep 17 00:00:00 2001 From: Sarah Gilmore <74676073+sgilmore10@users.noreply.github.com> Date: Tue, 30 Apr 2024 09:41:44 -0400 Subject: [PATCH 027/261] GH-41400: [MATLAB] Bump `libmexclass` version to commit `ca3cea6` (#41436) ### Rationale for this change @ kevingurney and I recently resolved multiple issues related to `mathworks/libmexclass` not supporting ARM-based macOS builds (i.e. builds on `macos-14`): - mathworks/libmexclass#76 - mathworks/libmexclass#77 We should bump the version of mathworks/libmexclass used by the MATLAB interface to the latest available commit ([ca3cea6](https://github.com/mathworks/libmexclass/commit/ca3cea6bf1ba5e9d86210bd207d643493e8d45f6) as of now) in order to enable building the MATLAB interface to Arrow on `macos-14` (which is ARM-based). ### What changes are included in this PR? - Bumped version of `mathworks/libmexclass` used by the MATLAB interface to [ca3cea6](https://github.com/mathworks/libmexclass/commit/ca3cea6bf1ba5e9d86210bd207d643493e8d45f6) ### Are these changes tested? - Yes. The existing test points verify verify upgrading `mathworks/libmexclass` does not break the MATLAB interface. ### Are there any user-facing changes? - No. ### Future Directions - #41435 - #41385 * GitHub Issue: #41400 Authored-by: Sarah Gilmore Signed-off-by: Sarah Gilmore --- matlab/tools/cmake/BuildMatlabArrowInterface.cmake | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake index cb746e08b1f8e..e1641842ca8b9 100644 --- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake +++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake @@ -24,8 +24,7 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_NAME libmexclass) # libmexclass is accessible for CI without permission issues. set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_GIT_REPOSITORY "https://github.com/mathworks/libmexclass.git") # Use a specific Git commit hash to avoid libmexclass version changing unexpectedly. -set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_GIT_TAG "d04f88d") - +set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_GIT_TAG "ca3cea6") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_FETCH_CONTENT_SOURCE_SUBDIR "libmexclass/cpp") # ------------------------------------------ From 0ef7351986ee8b967e210d0f9c7a9c8e4d4038fd Mon Sep 17 00:00:00 2001 From: Rossi Sun Date: Wed, 1 May 2024 02:01:39 +0800 Subject: [PATCH 028/261] GH-41407: [C++] Use static method to fill scalar scratch space to prevent ub (#41421) ### Rationale for this change In #40237, I introduced scalar scratch space filling in concrete scalar sub-class constructor, in which there is a static down-casting of `this` to sub-class pointer. Though this is common in CRTP, it happens in base cast constructor. And this is reported in #41407 to be UB by UBSAN's "vptr" sanitizing. I'm not a language lawyer to tell if this is a true/false-positive. So I proposed two approaches: 1. The easy way: add suppression in [1], like we already did for `shared_ptr`. But apparently this won't be feasible if this is a true-positive (need some language lawyer's help to confirm). 2. The hard way: totally avoid this so-to-speak UB but may introduce more boilerplate code. This PR is the hard way. [1] https://github.com/apache/arrow/blob/main/r/tools/ubsan.supp ### What changes are included in this PR? Make `FillScratchSpace` static. ### Are these changes tested? The existing UT should cover it well. ### Are there any user-facing changes? None. * GitHub Issue: #41407 Lead-authored-by: Ruoxi Sun Co-authored-by: Rossi Sun Co-authored-by: Benjamin Kietzman Signed-off-by: Benjamin Kietzman --- cpp/src/arrow/scalar.cc | 73 +++++++++++++++----------- cpp/src/arrow/scalar.h | 112 +++++++++++++++++++++++++++++++--------- 2 files changed, 130 insertions(+), 55 deletions(-) diff --git a/cpp/src/arrow/scalar.cc b/cpp/src/arrow/scalar.cc index 8e8d3903663e4..7d8084e17c279 100644 --- a/cpp/src/arrow/scalar.cc +++ b/cpp/src/arrow/scalar.cc @@ -563,15 +563,17 @@ Status Scalar::ValidateFull() const { BaseBinaryScalar::BaseBinaryScalar(std::string s, std::shared_ptr type) : BaseBinaryScalar(Buffer::FromString(std::move(s)), std::move(type)) {} -void BinaryScalar::FillScratchSpace() { +void BinaryScalar::FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value) { FillScalarScratchSpace( - scratch_space_, + scratch_space, {int32_t(0), value ? static_cast(value->size()) : int32_t(0)}); } -void BinaryViewScalar::FillScratchSpace() { +void BinaryViewScalar::FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value) { static_assert(sizeof(BinaryViewType::c_type) <= internal::kScalarScratchSpaceSize); - auto* view = new (&scratch_space_) BinaryViewType::c_type; + auto* view = new (scratch_space) BinaryViewType::c_type; if (value) { *view = util::ToBinaryView(std::string_view{*value}, 0, 0); } else { @@ -579,9 +581,10 @@ void BinaryViewScalar::FillScratchSpace() { } } -void LargeBinaryScalar::FillScratchSpace() { +void LargeBinaryScalar::FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value) { FillScalarScratchSpace( - scratch_space_, + scratch_space, {int64_t(0), value ? static_cast(value->size()) : int64_t(0)}); } @@ -612,36 +615,40 @@ BaseListScalar::BaseListScalar(std::shared_ptr value, } ListScalar::ListScalar(std::shared_ptr value, bool is_valid) - : BaseListScalar(value, list(value->type()), is_valid) {} + : ListScalar(value, list(value->type()), is_valid) {} -void ListScalar::FillScratchSpace() { +void ListScalar::FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value) { FillScalarScratchSpace( - scratch_space_, + scratch_space, {int32_t(0), value ? static_cast(value->length()) : int32_t(0)}); } LargeListScalar::LargeListScalar(std::shared_ptr value, bool is_valid) - : BaseListScalar(value, large_list(value->type()), is_valid) {} + : LargeListScalar(value, large_list(value->type()), is_valid) {} -void LargeListScalar::FillScratchSpace() { - FillScalarScratchSpace(scratch_space_, +void LargeListScalar::FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value) { + FillScalarScratchSpace(scratch_space, {int64_t(0), value ? value->length() : int64_t(0)}); } ListViewScalar::ListViewScalar(std::shared_ptr value, bool is_valid) - : BaseListScalar(value, list_view(value->type()), is_valid) {} + : ListViewScalar(value, list_view(value->type()), is_valid) {} -void ListViewScalar::FillScratchSpace() { +void ListViewScalar::FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value) { FillScalarScratchSpace( - scratch_space_, + scratch_space, {int32_t(0), value ? static_cast(value->length()) : int32_t(0)}); } LargeListViewScalar::LargeListViewScalar(std::shared_ptr value, bool is_valid) - : BaseListScalar(value, large_list_view(value->type()), is_valid) {} + : LargeListViewScalar(value, large_list_view(value->type()), is_valid) {} -void LargeListViewScalar::FillScratchSpace() { - FillScalarScratchSpace(scratch_space_, +void LargeListViewScalar::FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value) { + FillScalarScratchSpace(scratch_space, {int64_t(0), value ? value->length() : int64_t(0)}); } @@ -652,11 +659,12 @@ inline std::shared_ptr MakeMapType(const std::shared_ptr& pa } MapScalar::MapScalar(std::shared_ptr value, bool is_valid) - : BaseListScalar(value, MakeMapType(value->type()), is_valid) {} + : MapScalar(value, MakeMapType(value->type()), is_valid) {} -void MapScalar::FillScratchSpace() { +void MapScalar::FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value) { FillScalarScratchSpace( - scratch_space_, + scratch_space, {int32_t(0), value ? static_cast(value->length()) : int32_t(0)}); } @@ -705,7 +713,9 @@ Result> StructScalar::field(FieldRef ref) const { RunEndEncodedScalar::RunEndEncodedScalar(std::shared_ptr value, std::shared_ptr type) - : Scalar{std::move(type), value->is_valid}, value{std::move(value)} { + : Scalar{std::move(type), value->is_valid}, + ArraySpanFillFromScalarScratchSpace(*this->type), + value{std::move(value)} { ARROW_CHECK_EQ(this->type->id(), Type::RUN_END_ENCODED); } @@ -716,18 +726,18 @@ RunEndEncodedScalar::RunEndEncodedScalar(const std::shared_ptr& type) RunEndEncodedScalar::~RunEndEncodedScalar() = default; -void RunEndEncodedScalar::FillScratchSpace() { - auto run_end = run_end_type()->id(); +void RunEndEncodedScalar::FillScratchSpace(uint8_t* scratch_space, const DataType& type) { + Type::type run_end = checked_cast(type).run_end_type()->id(); switch (run_end) { case Type::INT16: - FillScalarScratchSpace(scratch_space_, {int16_t(1)}); + FillScalarScratchSpace(scratch_space, {int16_t(1)}); break; case Type::INT32: - FillScalarScratchSpace(scratch_space_, {int32_t(1)}); + FillScalarScratchSpace(scratch_space, {int32_t(1)}); break; default: DCHECK_EQ(run_end, Type::INT64); - FillScalarScratchSpace(scratch_space_, {int64_t(1)}); + FillScalarScratchSpace(scratch_space, {int64_t(1)}); } } @@ -806,6 +816,7 @@ Result TimestampScalar::FromISO8601(std::string_view iso8601, SparseUnionScalar::SparseUnionScalar(ValueType value, int8_t type_code, std::shared_ptr type) : UnionScalar(std::move(type), type_code, /*is_valid=*/true), + ArraySpanFillFromScalarScratchSpace(type_code), value(std::move(value)) { const auto child_ids = checked_cast(*this->type).child_ids(); if (type_code >= 0 && static_cast(type_code) < child_ids.size() && @@ -833,13 +844,13 @@ std::shared_ptr SparseUnionScalar::FromValue(std::shared_ptr val return std::make_shared(field_values, type_code, std::move(type)); } -void SparseUnionScalar::FillScratchSpace() { - auto* union_scratch_space = reinterpret_cast(&scratch_space_); +void SparseUnionScalar::FillScratchSpace(uint8_t* scratch_space, int8_t type_code) { + auto* union_scratch_space = reinterpret_cast(scratch_space); union_scratch_space->type_code = type_code; } -void DenseUnionScalar::FillScratchSpace() { - auto* union_scratch_space = reinterpret_cast(&scratch_space_); +void DenseUnionScalar::FillScratchSpace(uint8_t* scratch_space, int8_t type_code) { + auto* union_scratch_space = reinterpret_cast(scratch_space); union_scratch_space->type_code = type_code; FillScalarScratchSpace(union_scratch_space->offsets, {int32_t(0), int32_t(1)}); } diff --git a/cpp/src/arrow/scalar.h b/cpp/src/arrow/scalar.h index a7ee6a417d9a1..982a4c5113c92 100644 --- a/cpp/src/arrow/scalar.h +++ b/cpp/src/arrow/scalar.h @@ -141,7 +141,12 @@ struct ARROW_EXPORT ArraySpanFillFromScalarScratchSpace { alignas(int64_t) mutable uint8_t scratch_space_[kScalarScratchSpaceSize]; private: - ArraySpanFillFromScalarScratchSpace() { static_cast(this)->FillScratchSpace(); } + template + explicit ArraySpanFillFromScalarScratchSpace(Args&&... args) { + Impl::FillScratchSpace(scratch_space_, std::forward(args)...); + } + + ArraySpanFillFromScalarScratchSpace() = delete; friend Impl; }; @@ -278,20 +283,32 @@ struct ARROW_EXPORT BaseBinaryScalar : public internal::PrimitiveScalarBase { struct ARROW_EXPORT BinaryScalar : public BaseBinaryScalar, private internal::ArraySpanFillFromScalarScratchSpace { - using BaseBinaryScalar::BaseBinaryScalar; using TypeClass = BinaryType; using ArraySpanFillFromScalarScratchSpace = internal::ArraySpanFillFromScalarScratchSpace; + explicit BinaryScalar(std::shared_ptr type) + : BaseBinaryScalar(std::move(type)), + ArraySpanFillFromScalarScratchSpace(this->value) {} + + BinaryScalar(std::shared_ptr value, std::shared_ptr type) + : BaseBinaryScalar(std::move(value), std::move(type)), + ArraySpanFillFromScalarScratchSpace(this->value) {} + + BinaryScalar(std::string s, std::shared_ptr type) + : BaseBinaryScalar(std::move(s), std::move(type)), + ArraySpanFillFromScalarScratchSpace(this->value) {} + explicit BinaryScalar(std::shared_ptr value) : BinaryScalar(std::move(value), binary()) {} - explicit BinaryScalar(std::string s) : BaseBinaryScalar(std::move(s), binary()) {} + explicit BinaryScalar(std::string s) : BinaryScalar(std::move(s), binary()) {} BinaryScalar() : BinaryScalar(binary()) {} private: - void FillScratchSpace(); + static void FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value); friend ArraySpan; friend ArraySpanFillFromScalarScratchSpace; @@ -312,23 +329,35 @@ struct ARROW_EXPORT StringScalar : public BinaryScalar { struct ARROW_EXPORT BinaryViewScalar : public BaseBinaryScalar, private internal::ArraySpanFillFromScalarScratchSpace { - using BaseBinaryScalar::BaseBinaryScalar; using TypeClass = BinaryViewType; using ArraySpanFillFromScalarScratchSpace = internal::ArraySpanFillFromScalarScratchSpace; + explicit BinaryViewScalar(std::shared_ptr type) + : BaseBinaryScalar(std::move(type)), + ArraySpanFillFromScalarScratchSpace(this->value) {} + + BinaryViewScalar(std::shared_ptr value, std::shared_ptr type) + : BaseBinaryScalar(std::move(value), std::move(type)), + ArraySpanFillFromScalarScratchSpace(this->value) {} + + BinaryViewScalar(std::string s, std::shared_ptr type) + : BaseBinaryScalar(std::move(s), std::move(type)), + ArraySpanFillFromScalarScratchSpace(this->value) {} + explicit BinaryViewScalar(std::shared_ptr value) : BinaryViewScalar(std::move(value), binary_view()) {} explicit BinaryViewScalar(std::string s) - : BaseBinaryScalar(std::move(s), binary_view()) {} + : BinaryViewScalar(std::move(s), binary_view()) {} BinaryViewScalar() : BinaryViewScalar(binary_view()) {} std::string_view view() const override { return std::string_view(*this->value); } private: - void FillScratchSpace(); + static void FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value); friend ArraySpan; friend ArraySpanFillFromScalarScratchSpace; @@ -350,24 +379,33 @@ struct ARROW_EXPORT StringViewScalar : public BinaryViewScalar { struct ARROW_EXPORT LargeBinaryScalar : public BaseBinaryScalar, private internal::ArraySpanFillFromScalarScratchSpace { - using BaseBinaryScalar::BaseBinaryScalar; using TypeClass = LargeBinaryType; using ArraySpanFillFromScalarScratchSpace = internal::ArraySpanFillFromScalarScratchSpace; + explicit LargeBinaryScalar(std::shared_ptr type) + : BaseBinaryScalar(std::move(type)), + ArraySpanFillFromScalarScratchSpace(this->value) {} + LargeBinaryScalar(std::shared_ptr value, std::shared_ptr type) - : BaseBinaryScalar(std::move(value), std::move(type)) {} + : BaseBinaryScalar(std::move(value), std::move(type)), + ArraySpanFillFromScalarScratchSpace(this->value) {} + + LargeBinaryScalar(std::string s, std::shared_ptr type) + : BaseBinaryScalar(std::move(s), std::move(type)), + ArraySpanFillFromScalarScratchSpace(this->value) {} explicit LargeBinaryScalar(std::shared_ptr value) : LargeBinaryScalar(std::move(value), large_binary()) {} explicit LargeBinaryScalar(std::string s) - : BaseBinaryScalar(std::move(s), large_binary()) {} + : LargeBinaryScalar(std::move(s), large_binary()) {} LargeBinaryScalar() : LargeBinaryScalar(large_binary()) {} private: - void FillScratchSpace(); + static void FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value); friend ArraySpan; friend ArraySpanFillFromScalarScratchSpace; @@ -550,14 +588,19 @@ struct ARROW_EXPORT ListScalar : public BaseListScalar, private internal::ArraySpanFillFromScalarScratchSpace { using TypeClass = ListType; - using BaseListScalar::BaseListScalar; using ArraySpanFillFromScalarScratchSpace = internal::ArraySpanFillFromScalarScratchSpace; + ListScalar(std::shared_ptr value, std::shared_ptr type, + bool is_valid = true) + : BaseListScalar(std::move(value), std::move(type), is_valid), + ArraySpanFillFromScalarScratchSpace(this->value) {} + explicit ListScalar(std::shared_ptr value, bool is_valid = true); private: - void FillScratchSpace(); + static void FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value); friend ArraySpan; friend ArraySpanFillFromScalarScratchSpace; @@ -567,14 +610,19 @@ struct ARROW_EXPORT LargeListScalar : public BaseListScalar, private internal::ArraySpanFillFromScalarScratchSpace { using TypeClass = LargeListType; - using BaseListScalar::BaseListScalar; using ArraySpanFillFromScalarScratchSpace = internal::ArraySpanFillFromScalarScratchSpace; + LargeListScalar(std::shared_ptr value, std::shared_ptr type, + bool is_valid = true) + : BaseListScalar(std::move(value), std::move(type), is_valid), + ArraySpanFillFromScalarScratchSpace(this->value) {} + explicit LargeListScalar(std::shared_ptr value, bool is_valid = true); private: - void FillScratchSpace(); + static void FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value); friend ArraySpan; friend ArraySpanFillFromScalarScratchSpace; @@ -584,14 +632,19 @@ struct ARROW_EXPORT ListViewScalar : public BaseListScalar, private internal::ArraySpanFillFromScalarScratchSpace { using TypeClass = ListViewType; - using BaseListScalar::BaseListScalar; using ArraySpanFillFromScalarScratchSpace = internal::ArraySpanFillFromScalarScratchSpace; + ListViewScalar(std::shared_ptr value, std::shared_ptr type, + bool is_valid = true) + : BaseListScalar(std::move(value), std::move(type), is_valid), + ArraySpanFillFromScalarScratchSpace(this->value) {} + explicit ListViewScalar(std::shared_ptr value, bool is_valid = true); private: - void FillScratchSpace(); + static void FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value); friend ArraySpan; friend ArraySpanFillFromScalarScratchSpace; @@ -601,14 +654,19 @@ struct ARROW_EXPORT LargeListViewScalar : public BaseListScalar, private internal::ArraySpanFillFromScalarScratchSpace { using TypeClass = LargeListViewType; - using BaseListScalar::BaseListScalar; using ArraySpanFillFromScalarScratchSpace = internal::ArraySpanFillFromScalarScratchSpace; + LargeListViewScalar(std::shared_ptr value, std::shared_ptr type, + bool is_valid = true) + : BaseListScalar(std::move(value), std::move(type), is_valid), + ArraySpanFillFromScalarScratchSpace(this->value) {} + explicit LargeListViewScalar(std::shared_ptr value, bool is_valid = true); private: - void FillScratchSpace(); + static void FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value); friend ArraySpan; friend ArraySpanFillFromScalarScratchSpace; @@ -618,14 +676,19 @@ struct ARROW_EXPORT MapScalar : public BaseListScalar, private internal::ArraySpanFillFromScalarScratchSpace { using TypeClass = MapType; - using BaseListScalar::BaseListScalar; using ArraySpanFillFromScalarScratchSpace = internal::ArraySpanFillFromScalarScratchSpace; + MapScalar(std::shared_ptr value, std::shared_ptr type, + bool is_valid = true) + : BaseListScalar(std::move(value), std::move(type), is_valid), + ArraySpanFillFromScalarScratchSpace(this->value) {} + explicit MapScalar(std::shared_ptr value, bool is_valid = true); private: - void FillScratchSpace(); + static void FillScratchSpace(uint8_t* scratch_space, + const std::shared_ptr& value); friend ArraySpan; friend ArraySpanFillFromScalarScratchSpace; @@ -707,7 +770,7 @@ struct ARROW_EXPORT SparseUnionScalar std::shared_ptr type); private: - void FillScratchSpace(); + static void FillScratchSpace(uint8_t* scratch_space, int8_t type_code); friend ArraySpan; friend ArraySpanFillFromScalarScratchSpace; @@ -733,10 +796,11 @@ struct ARROW_EXPORT DenseUnionScalar DenseUnionScalar(ValueType value, int8_t type_code, std::shared_ptr type) : UnionScalar(std::move(type), type_code, value->is_valid), + ArraySpanFillFromScalarScratchSpace(type_code), value(std::move(value)) {} private: - void FillScratchSpace(); + static void FillScratchSpace(uint8_t* scratch_space, int8_t type_code); friend ArraySpan; friend ArraySpanFillFromScalarScratchSpace; @@ -772,7 +836,7 @@ struct ARROW_EXPORT RunEndEncodedScalar private: const TypeClass& ree_type() const { return internal::checked_cast(*type); } - void FillScratchSpace(); + static void FillScratchSpace(uint8_t* scratch_space, const DataType& type); friend ArraySpan; friend ArraySpanFillFromScalarScratchSpace; From 5e986be59f08135d2fdaeb819c87120b0bf7436a Mon Sep 17 00:00:00 2001 From: ZhangHuiGui <106943008+ZhangHuiGui@users.noreply.github.com> Date: Wed, 1 May 2024 06:20:04 +0800 Subject: [PATCH 029/261] GH-41183: [C++][Python] Expose recursive flatten for lists on list_flatten kernel function and pyarrow bindings (#41295) ### Rationale for this change Expose recursive flatten for logical lists on list_flatten kernel function and pyarrow bindings. ### What changes are included in this PR? 1. Expose recursive flatten for logical lists on `list_flatten` kernel function 2. Support [Large]ListView for some kernel functions: `list_flatten`,`list_value_length`, `list_element` 3. Support recursive flatten for pyarrow bindinds and simplify [Large]ListView's pyarrow bindings 4. Refactor vector_nested_test.cc for better support [Large]ListView types. ### Are these changes tested? Yes ### Are there any user-facing changes? Yes. 1. Some kernel functions like: list_flatten, list_value_length, list_element would support [Large]ListView types 2. `list_flatten` and related pyarrow bindings could support flatten recursively with an ListFlattenOptions. * GitHub Issue: #41183 Lead-authored-by: ZhangHuiGui Co-authored-by: ZhangHuiGui <2689496754@qq.com> Signed-off-by: Felipe Oliveira Carvalho --- cpp/src/arrow/compute/api_vector.cc | 7 + cpp/src/arrow/compute/api_vector.h | 12 + .../arrow/compute/kernels/codegen_internal.cc | 21 +- .../arrow/compute/kernels/codegen_internal.h | 3 +- .../arrow/compute/kernels/scalar_nested.cc | 49 +++- .../compute/kernels/scalar_nested_test.cc | 17 +- .../arrow/compute/kernels/vector_nested.cc | 54 +++-- .../compute/kernels/vector_nested_test.cc | 129 +++++++++-- python/pyarrow/_compute.pyx | 20 ++ python/pyarrow/array.pxi | 215 +++++++----------- python/pyarrow/compute.py | 1 + python/pyarrow/includes/libarrow.pxd | 5 + python/pyarrow/lib.pxd | 4 +- python/pyarrow/tests/test_array.py | 8 +- python/pyarrow/tests/test_compute.py | 1 + 15 files changed, 364 insertions(+), 182 deletions(-) diff --git a/cpp/src/arrow/compute/api_vector.cc b/cpp/src/arrow/compute/api_vector.cc index d47ee42ebf239..f0d5c0fcc3d72 100644 --- a/cpp/src/arrow/compute/api_vector.cc +++ b/cpp/src/arrow/compute/api_vector.cc @@ -153,6 +153,8 @@ static auto kRankOptionsType = GetFunctionOptionsType( DataMember("tiebreaker", &RankOptions::tiebreaker)); static auto kPairwiseOptionsType = GetFunctionOptionsType( DataMember("periods", &PairwiseOptions::periods)); +static auto kListFlattenOptionsType = GetFunctionOptionsType( + DataMember("recursive", &ListFlattenOptions::recursive)); } // namespace } // namespace internal @@ -224,6 +226,10 @@ PairwiseOptions::PairwiseOptions(int64_t periods) : FunctionOptions(internal::kPairwiseOptionsType), periods(periods) {} constexpr char PairwiseOptions::kTypeName[]; +ListFlattenOptions::ListFlattenOptions(bool recursive) + : FunctionOptions(internal::kListFlattenOptionsType), recursive(recursive) {} +constexpr char ListFlattenOptions::kTypeName[]; + namespace internal { void RegisterVectorOptions(FunctionRegistry* registry) { DCHECK_OK(registry->AddFunctionOptionsType(kFilterOptionsType)); @@ -237,6 +243,7 @@ void RegisterVectorOptions(FunctionRegistry* registry) { DCHECK_OK(registry->AddFunctionOptionsType(kCumulativeOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kRankOptionsType)); DCHECK_OK(registry->AddFunctionOptionsType(kPairwiseOptionsType)); + DCHECK_OK(registry->AddFunctionOptionsType(kListFlattenOptionsType)); } } // namespace internal diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h index 919572f16ee69..e5bcc37329661 100644 --- a/cpp/src/arrow/compute/api_vector.h +++ b/cpp/src/arrow/compute/api_vector.h @@ -245,6 +245,18 @@ class ARROW_EXPORT PairwiseOptions : public FunctionOptions { int64_t periods = 1; }; +/// \brief Options for list_flatten function +class ARROW_EXPORT ListFlattenOptions : public FunctionOptions { + public: + explicit ListFlattenOptions(bool recursive = false); + static constexpr char const kTypeName[] = "ListFlattenOptions"; + static ListFlattenOptions Defaults() { return ListFlattenOptions(); } + + /// \brief If true, the list is flattened recursively until a non-list + /// array is formed. + bool recursive = false; +}; + /// @} /// \brief Filter with a boolean selection filter diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.cc b/cpp/src/arrow/compute/kernels/codegen_internal.cc index 00a833742f957..0fd9cae7a8d71 100644 --- a/cpp/src/arrow/compute/kernels/codegen_internal.cc +++ b/cpp/src/arrow/compute/kernels/codegen_internal.cc @@ -23,6 +23,7 @@ #include #include +#include "arrow/compute/api_vector.h" #include "arrow/type_fwd.h" namespace arrow { @@ -56,9 +57,23 @@ Result LastType(KernelContext*, const std::vector& types return types.back(); } -Result ListValuesType(KernelContext*, const std::vector& args) { - const auto& list_type = checked_cast(*args[0].type); - return list_type.value_type().get(); +Result ListValuesType(KernelContext* ctx, + const std::vector& args) { + auto list_type = checked_cast(args[0].type); + auto value_type = list_type->value_type().get(); + + auto recursive = + ctx->state() ? OptionsWrapper::Get(ctx).recursive : false; + if (!recursive) { + return value_type; + } + + for (auto value_kind = value_type->id(); + is_list(value_kind) || is_list_view(value_kind); value_kind = value_type->id()) { + list_type = checked_cast(list_type->value_type().get()); + value_type = list_type->value_type().get(); + } + return value_type; } void EnsureDictionaryDecoded(std::vector* types) { diff --git a/cpp/src/arrow/compute/kernels/codegen_internal.h b/cpp/src/arrow/compute/kernels/codegen_internal.h index 097ee1de45b6a..9e46a21887f8c 100644 --- a/cpp/src/arrow/compute/kernels/codegen_internal.h +++ b/cpp/src/arrow/compute/kernels/codegen_internal.h @@ -423,7 +423,8 @@ static void VisitTwoArrayValuesInline(const ArraySpan& arr0, const ArraySpan& ar Result FirstType(KernelContext*, const std::vector& types); Result LastType(KernelContext*, const std::vector& types); -Result ListValuesType(KernelContext*, const std::vector& types); +Result ListValuesType(KernelContext* ctx, + const std::vector& types); // ---------------------------------------------------------------------- // Helpers for iterating over common DataType instances for adding kernels to diff --git a/cpp/src/arrow/compute/kernels/scalar_nested.cc b/cpp/src/arrow/compute/kernels/scalar_nested.cc index 733ab9c0dc287..b99f065a0b158 100644 --- a/cpp/src/arrow/compute/kernels/scalar_nested.cc +++ b/cpp/src/arrow/compute/kernels/scalar_nested.cc @@ -23,6 +23,7 @@ #include "arrow/compute/api_scalar.h" #include "arrow/compute/kernels/common_internal.h" #include "arrow/result.h" +#include "arrow/type_fwd.h" #include "arrow/util/bit_block_counter.h" #include "arrow/util/bit_util.h" #include "arrow/util/bitmap_generate.h" @@ -41,10 +42,17 @@ Status ListValueLength(KernelContext* ctx, const ExecSpan& batch, ExecResult* ou const ArraySpan& arr = batch[0].array; ArraySpan* out_arr = out->array_span_mutable(); auto out_values = out_arr->GetValues(1); - const offset_type* offsets = arr.GetValues(1); - // Offsets are always well-defined and monotonic, even for null values - for (int64_t i = 0; i < arr.length; ++i) { - *out_values++ = offsets[i + 1] - offsets[i]; + if (is_list_view(*arr.type)) { + const auto* sizes = arr.GetValues(2); + if (arr.length > 0) { + memcpy(out_values, sizes, arr.length * sizeof(offset_type)); + } + } else { + const offset_type* offsets = arr.GetValues(1); + // Offsets are always well-defined and monotonic, even for null values + for (int64_t i = 0; i < arr.length; ++i) { + *out_values++ = offsets[i + 1] - offsets[i]; + } } return Status::OK(); } @@ -59,6 +67,30 @@ Status FixedSizeListValueLength(KernelContext* ctx, const ExecSpan& batch, return Status::OK(); } +template +void AddListValueLengthKernel(ScalarFunction* func, + const std::shared_ptr& out_type) { + auto in_type = {InputType(InListType::type_id)}; + ScalarKernel kernel(in_type, out_type, ListValueLength); + DCHECK_OK(func->AddKernel(std::move(kernel))); +} + +template <> +void AddListValueLengthKernel( + ScalarFunction* func, const std::shared_ptr& out_type) { + auto in_type = {InputType(Type::FIXED_SIZE_LIST)}; + ScalarKernel kernel(in_type, out_type, FixedSizeListValueLength); + DCHECK_OK(func->AddKernel(std::move(kernel))); +} + +void AddListValueLengthKernels(ScalarFunction* func) { + AddListValueLengthKernel(func, int32()); + AddListValueLengthKernel(func, int64()); + AddListValueLengthKernel(func, int32()); + AddListValueLengthKernel(func, int64()); + AddListValueLengthKernel(func, int32()); +} + const FunctionDoc list_value_length_doc{ "Compute list lengths", ("`lists` must have a list-like type.\n" @@ -399,6 +431,8 @@ void AddListElementKernels(ScalarFunction* func) { void AddListElementKernels(ScalarFunction* func) { AddListElementKernels(func); AddListElementKernels(func); + AddListElementKernels(func); + AddListElementKernels(func); AddListElementKernels(func); } @@ -824,12 +858,7 @@ const FunctionDoc map_lookup_doc{ void RegisterScalarNested(FunctionRegistry* registry) { auto list_value_length = std::make_shared( "list_value_length", Arity::Unary(), list_value_length_doc); - DCHECK_OK(list_value_length->AddKernel({InputType(Type::LIST)}, int32(), - ListValueLength)); - DCHECK_OK(list_value_length->AddKernel({InputType(Type::FIXED_SIZE_LIST)}, int32(), - FixedSizeListValueLength)); - DCHECK_OK(list_value_length->AddKernel({InputType(Type::LARGE_LIST)}, int64(), - ListValueLength)); + AddListValueLengthKernels(list_value_length.get()); DCHECK_OK(registry->AddFunction(std::move(list_value_length))); auto list_element = diff --git a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc index a72ec99620b82..32bea8246954d 100644 --- a/cpp/src/arrow/compute/kernels/scalar_nested_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_nested_test.cc @@ -30,11 +30,21 @@ namespace arrow { namespace compute { static std::shared_ptr GetOffsetType(const DataType& type) { - return type.id() == Type::LIST ? int32() : int64(); + switch (type.id()) { + case Type::LIST: + case Type::LIST_VIEW: + return int32(); + case Type::LARGE_LIST: + case Type::LARGE_LIST_VIEW: + return int64(); + default: + Unreachable("Unexpected type"); + } } TEST(TestScalarNested, ListValueLength) { - for (auto ty : {list(int32()), large_list(int32())}) { + for (auto ty : {list(int32()), large_list(int32()), list_view(int32()), + large_list_view(int32())}) { CheckScalarUnary("list_value_length", ty, "[[0, null, 1], null, [2, 3], []]", GetOffsetType(*ty), "[3, null, 2, 0]"); } @@ -47,7 +57,8 @@ TEST(TestScalarNested, ListValueLength) { TEST(TestScalarNested, ListElementNonFixedListWithNulls) { auto sample = "[[7, 5, 81], [6, null, 4, 7, 8], [3, 12, 2, 0], [1, 9], null]"; for (auto ty : NumericTypes()) { - for (auto list_type : {list(ty), large_list(ty)}) { + for (auto list_type : + {list(ty), large_list(ty), list_view(ty), large_list_view(ty)}) { auto input = ArrayFromJSON(list_type, sample); auto null_input = ArrayFromJSON(list_type, "[null]"); for (auto index_type : IntTypes()) { diff --git a/cpp/src/arrow/compute/kernels/vector_nested.cc b/cpp/src/arrow/compute/kernels/vector_nested.cc index 08930e589f7b4..8c77c261c6a98 100644 --- a/cpp/src/arrow/compute/kernels/vector_nested.cc +++ b/cpp/src/arrow/compute/kernels/vector_nested.cc @@ -18,6 +18,7 @@ // Vector kernels involving nested types #include "arrow/array/array_base.h" +#include "arrow/compute/api_vector.h" #include "arrow/compute/kernels/common_internal.h" #include "arrow/result.h" #include "arrow/visit_type_inline.h" @@ -29,8 +30,13 @@ namespace { template Status ListFlatten(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { + auto recursive = OptionsWrapper::Get(ctx).recursive; typename TypeTraits::ArrayType list_array(batch[0].array.ToArrayData()); - ARROW_ASSIGN_OR_RAISE(auto result, list_array.Flatten(ctx->memory_pool())); + + auto pool = ctx->memory_pool(); + ARROW_ASSIGN_OR_RAISE(auto result, (recursive ? list_array.FlattenRecursively(pool) + : list_array.Flatten(pool))); + out->value = std::move(result->data()); return Status::OK(); } @@ -107,10 +113,15 @@ struct ListParentIndicesArray { const FunctionDoc list_flatten_doc( "Flatten list values", - ("`lists` must have a list-like type.\n" - "Return an array with the top list level flattened.\n" - "Top-level null values in `lists` do not emit anything in the input."), - {"lists"}); + ("`lists` must have a list-like type (lists, list-views, and\n" + "fixed-size lists).\n" + "Return an array with the top list level flattened unless\n" + "`recursive` is set to true in ListFlattenOptions. When that\n" + "is that case, flattening happens recursively until a non-list\n" + "array is formed.\n" + "\n" + "Null list values do not emit anything to the output."), + {"lists"}, "ListFlattenOptions"); const FunctionDoc list_parent_indices_doc( "Compute parent indices of nested list values", @@ -153,17 +164,34 @@ class ListParentIndicesFunction : public MetaFunction { } }; +const ListFlattenOptions* GetDefaultListFlattenOptions() { + static const auto kDefaultListFlattenOptions = ListFlattenOptions::Defaults(); + return &kDefaultListFlattenOptions; +} + +template +void AddBaseListFlattenKernels(VectorFunction* func) { + auto in_type = {InputType(InListType::type_id)}; + auto out_type = OutputType(ListValuesType); + VectorKernel kernel(in_type, out_type, ListFlatten, + OptionsWrapper::Init); + DCHECK_OK(func->AddKernel(std::move(kernel))); +} + +void AddBaseListFlattenKernels(VectorFunction* func) { + AddBaseListFlattenKernels(func); + AddBaseListFlattenKernels(func); + AddBaseListFlattenKernels(func); + AddBaseListFlattenKernels(func); + AddBaseListFlattenKernels(func); +} + } // namespace void RegisterVectorNested(FunctionRegistry* registry) { - auto flatten = - std::make_shared("list_flatten", Arity::Unary(), list_flatten_doc); - DCHECK_OK(flatten->AddKernel({Type::LIST}, OutputType(ListValuesType), - ListFlatten)); - DCHECK_OK(flatten->AddKernel({Type::FIXED_SIZE_LIST}, OutputType(ListValuesType), - ListFlatten)); - DCHECK_OK(flatten->AddKernel({Type::LARGE_LIST}, OutputType(ListValuesType), - ListFlatten)); + auto flatten = std::make_shared( + "list_flatten", Arity::Unary(), list_flatten_doc, GetDefaultListFlattenOptions()); + AddBaseListFlattenKernels(flatten.get()); DCHECK_OK(registry->AddFunction(std::move(flatten))); DCHECK_OK(registry->AddFunction(std::make_shared())); diff --git a/cpp/src/arrow/compute/kernels/vector_nested_test.cc b/cpp/src/arrow/compute/kernels/vector_nested_test.cc index eef1b6835ffb5..56604ebd16cc0 100644 --- a/cpp/src/arrow/compute/kernels/vector_nested_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_nested_test.cc @@ -19,6 +19,7 @@ #include "arrow/chunked_array.h" #include "arrow/compute/api.h" +#include "arrow/compute/api_vector.h" #include "arrow/compute/kernels/test_util.h" #include "arrow/result.h" #include "arrow/testing/gtest_util.h" @@ -29,38 +30,113 @@ namespace compute { using arrow::internal::checked_cast; -TEST(TestVectorNested, ListFlatten) { - for (auto ty : {list(int16()), large_list(int16())}) { - auto input = ArrayFromJSON(ty, "[[0, null, 1], null, [2, 3], []]"); - auto expected = ArrayFromJSON(int16(), "[0, null, 1, 2, 3]"); +using ListAndListViewTypes = + ::testing::Types; + +// ---------------------------------------------------------------------- +// [Large]List and [Large]ListView tests +template +class TestVectorNestedSpecialized : public ::testing::Test { + public: + using TypeClass = T; + + void SetUp() override { + value_type_ = int16(); + type_ = std::make_shared(value_type_); + } + + public: + void TestListFlatten() { + auto input = ArrayFromJSON(type_, "[[0, null, 1], null, [2, 3], []]"); + auto expected = ArrayFromJSON(value_type_, "[0, null, 1, 2, 3]"); CheckVectorUnary("list_flatten", input, expected); // Construct a list with a non-empty null slot auto tweaked = TweakValidityBit(input, 0, false); - expected = ArrayFromJSON(int16(), "[2, 3]"); + expected = ArrayFromJSON(value_type_, "[2, 3]"); CheckVectorUnary("list_flatten", tweaked, expected); } -} -TEST(TestVectorNested, ListFlattenNulls) { - const auto ty = list(int32()); - auto input = ArrayFromJSON(ty, "[null, null]"); - auto expected = ArrayFromJSON(int32(), "[]"); - CheckVectorUnary("list_flatten", input, expected); -} + void TestListFlattenNulls() { + value_type_ = int32(); + type_ = std::make_shared(value_type_); + auto input = ArrayFromJSON(type_, "[null, null]"); + auto expected = ArrayFromJSON(value_type_, "[]"); + CheckVectorUnary("list_flatten", input, expected); + } -TEST(TestVectorNested, ListFlattenChunkedArray) { - for (auto ty : {list(int16()), large_list(int16())}) { - ARROW_SCOPED_TRACE(ty->ToString()); - auto input = ChunkedArrayFromJSON(ty, {"[[0, null, 1], null]", "[[2, 3], []]"}); - auto expected = ChunkedArrayFromJSON(int16(), {"[0, null, 1]", "[2, 3]"}); + void TestListFlattenChunkedArray() { + ARROW_SCOPED_TRACE(type_->ToString()); + auto input = ChunkedArrayFromJSON(type_, {"[[0, null, 1], null]", "[[2, 3], []]"}); + auto expected = ChunkedArrayFromJSON(value_type_, {"[0, null, 1]", "[2, 3]"}); CheckVectorUnary("list_flatten", input, expected); ARROW_SCOPED_TRACE("empty"); - input = ChunkedArrayFromJSON(ty, {}); - expected = ChunkedArrayFromJSON(int16(), {}); + input = ChunkedArrayFromJSON(type_, {}); + expected = ChunkedArrayFromJSON(value_type_, {}); CheckVectorUnary("list_flatten", input, expected); } + + void TestListFlattenRecursively() { + auto inner_type = std::make_shared(value_type_); + type_ = std::make_shared(inner_type); + + ListFlattenOptions opts; + opts.recursive = true; + + // List types with two nesting levels: list> + auto input = ArrayFromJSON(type_, R"([ + [[0, 1, 2], null, [3, null]], + [null], + [[2, 9], [4], [], [6, 5]] + ])"); + auto expected = ArrayFromJSON(value_type_, "[0, 1, 2, 3, null, 2, 9, 4, 6, 5]"); + CheckVectorUnary("list_flatten", input, expected, &opts); + + // Empty nested list should flatten until non-list type is reached + input = ArrayFromJSON(type_, R"([null])"); + expected = ArrayFromJSON(value_type_, "[]"); + CheckVectorUnary("list_flatten", input, expected, &opts); + + // List types with three nesting levels: list>> + type_ = std::make_shared(std::make_shared(fixed_size_list(value_type_, 2))); + input = ArrayFromJSON(type_, R"([ + [ + [[null, 0]], + [[3, 7], null] + ], + [ + [[4, null], [5, 8]], + [[8, null]], + null + ], + [ + null + ] + ])"); + expected = ArrayFromJSON(value_type_, "[null, 0, 3, 7, 4, null, 5, 8, 8, null]"); + CheckVectorUnary("list_flatten", input, expected, &opts); + } + + protected: + std::shared_ptr type_; + std::shared_ptr value_type_; +}; + +TYPED_TEST_SUITE(TestVectorNestedSpecialized, ListAndListViewTypes); + +TYPED_TEST(TestVectorNestedSpecialized, ListFlatten) { this->TestListFlatten(); } + +TYPED_TEST(TestVectorNestedSpecialized, ListFlattenNulls) { + this->TestListFlattenNulls(); +} + +TYPED_TEST(TestVectorNestedSpecialized, ListFlattenChunkedArray) { + this->TestListFlattenChunkedArray(); +} + +TYPED_TEST(TestVectorNestedSpecialized, ListFlattenRecursively) { + this->TestListFlattenRecursively(); } TEST(TestVectorNested, ListFlattenFixedSizeList) { @@ -92,6 +168,21 @@ TEST(TestVectorNested, ListFlattenFixedSizeListNulls) { CheckVectorUnary("list_flatten", input, expected); } +TEST(TestVectorNested, ListFlattenFixedSizeListRecursively) { + ListFlattenOptions opts; + opts.recursive = true; + + auto inner_type = fixed_size_list(int32(), 2); + auto type = fixed_size_list(inner_type, 2); + auto input = ArrayFromJSON(type, R"([ + [[0, 1], [null, 3]], + [[7, null], [2, 5]], + [null, null] + ])"); + auto expected = ArrayFromJSON(int32(), "[0, 1, null, 3, 7, null, 2, 5]"); + CheckVectorUnary("list_flatten", input, expected, &opts); +} + TEST(TestVectorNested, ListParentIndices) { for (auto ty : {list(int16()), large_list(int16())}) { auto input = ArrayFromJSON(ty, "[[0, null, 1], null, [2, 3], [], [4, 5]]"); diff --git a/python/pyarrow/_compute.pyx b/python/pyarrow/_compute.pyx index a267d53599436..44a3d5e740701 100644 --- a/python/pyarrow/_compute.pyx +++ b/python/pyarrow/_compute.pyx @@ -2035,6 +2035,26 @@ class PairwiseOptions(_PairwiseOptions): self._set_options(period) +cdef class _ListFlattenOptions(FunctionOptions): + def _set_options(self, recursive): + self.wrapped.reset(new CListFlattenOptions(recursive)) + + +class ListFlattenOptions(_ListFlattenOptions): + """ + Options for `list_flatten` function + + Parameters + ---------- + recursive : bool, default False + When True, the list array is flattened recursively until an array + of non-list values is formed. + """ + + def __init__(self, recursive=False): + self._set_options(recursive) + + cdef class _ArraySortOptions(FunctionOptions): def _set_options(self, order, null_placement): self.wrapped.reset(new CArraySortOptions( diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 60fc09ea861b6..6a11b19ffcdf5 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -2141,22 +2141,99 @@ cdef class Decimal256Array(FixedSizeBinaryArray): cdef class BaseListArray(Array): - def flatten(self): + def flatten(self, recursive=False): """ - Unnest this ListArray/LargeListArray by one level. - - The returned Array is logically a concatenation of all the sub-lists - in this Array. + Unnest this [Large]ListArray/[Large]ListViewArray/FixedSizeListArray + according to 'recursive'. Note that this method is different from ``self.values`` in that it takes care of the slicing offset as well as null elements backed by non-empty sub-lists. + Parameters + ---------- + recursive : bool, default False, optional + When True, flatten this logical list-array recursively until an + array of non-list values is formed. + + When False, flatten only the top level. + Returns ------- result : Array + + Examples + -------- + + Basic logical list-array's flatten + >>> import pyarrow as pa + >>> values = [1, 2, 3, 4] + >>> offsets = [2, 1, 0] + >>> sizes = [2, 2, 2] + >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values) + >>> array + + [ + [ + 3, + 4 + ], + [ + 2, + 3 + ], + [ + 1, + 2 + ] + ] + >>> array.flatten() + + [ + 3, + 4, + 2, + 3, + 1, + 2 + ] + + When recursive=True, nested list arrays are flattened recursively + until an array of non-list values is formed. + + >>> array = pa.array([ + ... None, + ... [ + ... [1, None, 2], + ... None, + ... [3, 4] + ... ], + ... [], + ... [ + ... [], + ... [5, 6], + ... None + ... ], + ... [ + ... [7, 8] + ... ] + ... ], type=pa.list_(pa.list_(pa.int64()))) + >>> array.flatten(True) + + [ + 1, + null, + 2, + 3, + 4, + 5, + 6, + 7, + 8 + ] """ - return _pc().list_flatten(self) + options = _pc().ListFlattenOptions(recursive) + return _pc().list_flatten(self, options=options) def value_parent_indices(self): """ @@ -2527,7 +2604,7 @@ cdef class LargeListArray(BaseListArray): return pyarrow_wrap_array(( self.ap).offsets()) -cdef class ListViewArray(Array): +cdef class ListViewArray(BaseListArray): """ Concrete class for Arrow arrays of a list view data type. """ @@ -2747,69 +2824,8 @@ cdef class ListViewArray(Array): """ return pyarrow_wrap_array(( self.ap).sizes()) - def flatten(self, memory_pool=None): - """ - Unnest this ListViewArray by one level. - - The returned Array is logically a concatenation of all the sub-lists - in this Array. - - Note that this method is different from ``self.values`` in that - it takes care of the slicing offset as well as null elements backed - by non-empty sub-lists. - - Parameters - ---------- - memory_pool : MemoryPool, optional - - Returns - ------- - result : Array - - Examples - -------- - >>> import pyarrow as pa - >>> values = [1, 2, 3, 4] - >>> offsets = [2, 1, 0] - >>> sizes = [2, 2, 2] - >>> array = pa.ListViewArray.from_arrays(offsets, sizes, values) - >>> array - - [ - [ - 3, - 4 - ], - [ - 2, - 3 - ], - [ - 1, - 2 - ] - ] - >>> array.flatten() - - [ - 3, - 4, - 2, - 3, - 1, - 2 - ] - """ - cdef CMemoryPool* cpool = maybe_unbox_memory_pool(memory_pool) - with nogil: - out = GetResultValue(( self.ap).Flatten(cpool)) - cdef Array result = pyarrow_wrap_array(out) - result.validate() - return result - - -cdef class LargeListViewArray(Array): +cdef class LargeListViewArray(BaseListArray): """ Concrete class for Arrow arrays of a large list view data type. @@ -3037,67 +3053,6 @@ cdef class LargeListViewArray(Array): """ return pyarrow_wrap_array(( self.ap).sizes()) - def flatten(self, memory_pool=None): - """ - Unnest this LargeListViewArray by one level. - - The returned Array is logically a concatenation of all the sub-lists - in this Array. - - Note that this method is different from ``self.values`` in that - it takes care of the slicing offset as well as null elements backed - by non-empty sub-lists. - - Parameters - ---------- - memory_pool : MemoryPool, optional - - Returns - ------- - result : Array - - Examples - -------- - - >>> import pyarrow as pa - >>> values = [1, 2, 3, 4] - >>> offsets = [2, 1, 0] - >>> sizes = [2, 2, 2] - >>> array = pa.LargeListViewArray.from_arrays(offsets, sizes, values) - >>> array - - [ - [ - 3, - 4 - ], - [ - 2, - 3 - ], - [ - 1, - 2 - ] - ] - >>> array.flatten() - - [ - 3, - 4, - 2, - 3, - 1, - 2 - ] - """ - cdef CMemoryPool* cpool = maybe_unbox_memory_pool(memory_pool) - with nogil: - out = GetResultValue(( self.ap).Flatten(cpool)) - cdef Array result = pyarrow_wrap_array(out) - result.validate() - return result - cdef class MapArray(ListArray): """ diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py index 205ab393b8b09..83612f66d21e2 100644 --- a/python/pyarrow/compute.py +++ b/python/pyarrow/compute.py @@ -44,6 +44,7 @@ IndexOptions, JoinOptions, ListSliceOptions, + ListFlattenOptions, MakeStructOptions, MapLookupOptions, MatchSubstringOptions, diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 6dae45ab80b1c..f461513e8b3cf 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -2589,6 +2589,11 @@ cdef extern from "arrow/compute/api.h" namespace "arrow::compute" nogil: CPairwiseOptions(int64_t period) int64_t period + cdef cppclass CListFlattenOptions\ + "arrow::compute::ListFlattenOptions"(CFunctionOptions): + CListFlattenOptions(c_bool recursive) + c_bool recursive + cdef cppclass CArraySortOptions \ "arrow::compute::ArraySortOptions"(CFunctionOptions): CArraySortOptions(CSortOrder, CNullPlacement) diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd index b1187a77c2a6e..bfd266a807c40 100644 --- a/python/pyarrow/lib.pxd +++ b/python/pyarrow/lib.pxd @@ -437,11 +437,11 @@ cdef class LargeListArray(BaseListArray): pass -cdef class ListViewArray(Array): +cdef class ListViewArray(BaseListArray): pass -cdef class LargeListViewArray(Array): +cdef class LargeListViewArray(BaseListArray): pass diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 156d58326b961..6a190957879d3 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -2757,6 +2757,7 @@ def test_list_array_flatten(offset_type, list_type_factory): assert arr1.values.equals(arr0) assert arr2.flatten().flatten().equals(arr0) assert arr2.values.values.equals(arr0) + assert arr2.flatten(True).equals(arr0) @pytest.mark.parametrize('list_type', [ @@ -2778,7 +2779,9 @@ def test_list_value_parent_indices(list_type): @pytest.mark.parametrize(('offset_type', 'list_type'), [(pa.int32(), pa.list_(pa.int32())), (pa.int32(), pa.list_(pa.int32(), list_size=2)), - (pa.int64(), pa.large_list(pa.int32()))]) + (pa.int64(), pa.large_list(pa.int32())), + (pa.int32(), pa.list_view(pa.int32())), + (pa.int64(), pa.large_list_view(pa.int32()))]) def test_list_value_lengths(offset_type, list_type): # FixedSizeListArray needs fixed list sizes @@ -2876,6 +2879,8 @@ def test_fixed_size_list_array_flatten(): assert arr0.type.equals(typ0) assert arr1.flatten().equals(arr0) assert arr2.flatten().flatten().equals(arr0) + assert arr2.flatten().equals(arr1) + assert arr2.flatten(True).equals(arr0) def test_fixed_size_list_array_flatten_with_slice(): @@ -3844,6 +3849,7 @@ def test_list_view_flatten(list_array_type, list_type_factory, offset_type): assert arr2.values.equals(arr1) assert arr2.flatten().flatten().equals(arr0) assert arr2.values.values.equals(arr0) + assert arr2.flatten(True).equals(arr0) # test out of order offsets values = [1, 2, 3, 4] diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 98cbd920b509b..17cc546f834ca 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -152,6 +152,7 @@ def test_option_class_equality(): pc.IndexOptions(pa.scalar(1)), pc.JoinOptions(), pc.ListSliceOptions(0, -1, 1, True), + pc.ListFlattenOptions(recursive=False), pc.MakeStructOptions(["field", "names"], field_nullability=[True, True], field_metadata=[pa.KeyValueMetadata({"a": "1"}), From 0d7fac0d49eae7f139735c3e7c9256fc304a698a Mon Sep 17 00:00:00 2001 From: ZhangHuiGui <106943008+ZhangHuiGui@users.noreply.github.com> Date: Wed, 1 May 2024 06:26:05 +0800 Subject: [PATCH 030/261] GH-41418: [C++] Add [Large]ListView and Map nested types for scalar_if_else's kernel functions (#41419) ### Rationale for this change Add [Large]ListView and Map nested types for scalar_if_else's kernel functions ### What changes are included in this PR? 1. Add the list-view related types to `case_when`, `coalesce`'s kernel function and move the nested-types's added logic to a unified function for better management. 2. Add the `MapType` and related test for `if_else` ### Are these changes tested? Yes ### Are there any user-facing changes? No * GitHub Issue: #41418 Authored-by: ZhangHuiGui <2689496754@qq.com> Signed-off-by: Felipe Oliveira Carvalho --- .../arrow/compute/kernels/scalar_if_else.cc | 107 ++++++++++++++---- .../kernels/scalar_if_else_benchmark.cc | 50 +++++--- .../compute/kernels/scalar_if_else_test.cc | 19 +++- 3 files changed, 138 insertions(+), 38 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index ee181c053c053..13874d9d65e70 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -1309,9 +1309,10 @@ void AddFixedWidthIfElseKernel(const std::shared_ptr& scalar_fun } void AddNestedIfElseKernels(const std::shared_ptr& scalar_function) { - for (const auto type_id : {Type::LIST, Type::LARGE_LIST, Type::LIST_VIEW, - Type::LARGE_LIST_VIEW, Type::FIXED_SIZE_LIST, Type::STRUCT, - Type::DENSE_UNION, Type::SPARSE_UNION, Type::DICTIONARY}) { + for (const auto type_id : + {Type::LIST, Type::LARGE_LIST, Type::LIST_VIEW, Type::LARGE_LIST_VIEW, + Type::FIXED_SIZE_LIST, Type::MAP, Type::STRUCT, Type::DENSE_UNION, + Type::SPARSE_UNION, Type::DICTIONARY}) { ScalarKernel kernel({boolean(), InputType(type_id), InputType(type_id)}, LastType, NestedIfElseExec::Exec); kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE; @@ -1847,6 +1848,48 @@ struct CaseWhenFunctor> { } }; +// TODO(GH-41453): a more efficient implementation for list-views is possible +template +struct CaseWhenFunctor> { + using offset_type = typename Type::offset_type; + using BuilderType = typename TypeTraits::BuilderType; + static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { + /// TODO(wesm): should this be a DCHECK? Or checked elsewhere + if (batch[0].null_count() > 0) { + return Status::Invalid("cond struct must not have outer nulls"); + } + if (batch[0].is_scalar()) { + return ExecVarWidthScalarCaseWhen(ctx, batch, out); + } + return ExecArray(ctx, batch, out); + } + + static Status ExecArray(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { + return ExecVarWidthArrayCaseWhen( + ctx, batch, out, + // ReserveData + [&](ArrayBuilder* raw_builder) { + auto builder = checked_cast(raw_builder); + auto child_builder = builder->value_builder(); + + int64_t reservation = 0; + for (int arg = 1; arg < batch.num_values(); arg++) { + const ExecValue& source = batch[arg]; + if (!source.is_array()) { + const auto& scalar = checked_cast(*source.scalar); + if (!scalar.value) continue; + reservation = + std::max(reservation, batch.length * scalar.value->length()); + } else { + const ArraySpan& array = source.array; + reservation = std::max(reservation, array.child_data[0].length); + } + } + return child_builder->Reserve(reservation); + }); + } +}; + // No-op reserve function, pulled out to avoid apparent miscompilation on MinGW Status ReserveNoData(ArrayBuilder*) { return Status::OK(); } @@ -2712,6 +2755,25 @@ void AddBinaryCaseWhenKernels(const std::shared_ptr& scalar_fu } } +template +void AddNestedCaseWhenKernel(const std::shared_ptr& scalar_function) { + AddCaseWhenKernel(scalar_function, ArrowNestedType::type_id, + CaseWhenFunctor::Exec); +} + +void AddNestedCaseWhenKernels(const std::shared_ptr& scalar_function) { + AddNestedCaseWhenKernel(scalar_function); + AddNestedCaseWhenKernel(scalar_function); + AddNestedCaseWhenKernel(scalar_function); + AddNestedCaseWhenKernel(scalar_function); + AddNestedCaseWhenKernel(scalar_function); + AddNestedCaseWhenKernel(scalar_function); + AddNestedCaseWhenKernel(scalar_function); + AddNestedCaseWhenKernel(scalar_function); + AddNestedCaseWhenKernel(scalar_function); + AddNestedCaseWhenKernel(scalar_function); +} + void AddCoalesceKernel(const std::shared_ptr& scalar_function, detail::GetTypeId get_id, ArrayKernelExec exec) { ScalarKernel kernel(KernelSignature::Make({InputType(get_id.id)}, FirstType, @@ -2731,6 +2793,25 @@ void AddPrimitiveCoalesceKernels(const std::shared_ptr& scalar_f } } +template +void AddNestedCoalesceKernel(const std::shared_ptr& scalar_function) { + AddCoalesceKernel(scalar_function, ArrowNestedType::type_id, + CoalesceFunctor::Exec); +} + +void AddNestedCoalesceKernels(const std::shared_ptr& scalar_function) { + AddNestedCoalesceKernel(scalar_function); + AddNestedCoalesceKernel(scalar_function); + AddNestedCoalesceKernel(scalar_function); + AddNestedCoalesceKernel(scalar_function); + AddNestedCoalesceKernel(scalar_function); + AddNestedCoalesceKernel(scalar_function); + AddNestedCoalesceKernel(scalar_function); + AddNestedCoalesceKernel(scalar_function); + AddNestedCoalesceKernel(scalar_function); + AddNestedCoalesceKernel(scalar_function); +} + void AddChooseKernel(const std::shared_ptr& scalar_function, detail::GetTypeId get_id, ArrayKernelExec exec) { ScalarKernel kernel(KernelSignature::Make({Type::INT64, InputType(get_id.id)}, LastType, @@ -2822,15 +2903,7 @@ void RegisterScalarIfElse(FunctionRegistry* registry) { AddCaseWhenKernel(func, Type::DECIMAL128, CaseWhenFunctor::Exec); AddCaseWhenKernel(func, Type::DECIMAL256, CaseWhenFunctor::Exec); AddBinaryCaseWhenKernels(func, BaseBinaryTypes()); - AddCaseWhenKernel(func, Type::FIXED_SIZE_LIST, - CaseWhenFunctor::Exec); - AddCaseWhenKernel(func, Type::LIST, CaseWhenFunctor::Exec); - AddCaseWhenKernel(func, Type::LARGE_LIST, CaseWhenFunctor::Exec); - AddCaseWhenKernel(func, Type::MAP, CaseWhenFunctor::Exec); - AddCaseWhenKernel(func, Type::STRUCT, CaseWhenFunctor::Exec); - AddCaseWhenKernel(func, Type::DENSE_UNION, CaseWhenFunctor::Exec); - AddCaseWhenKernel(func, Type::SPARSE_UNION, CaseWhenFunctor::Exec); - AddCaseWhenKernel(func, Type::DICTIONARY, CaseWhenFunctor::Exec); + AddNestedCaseWhenKernels(func); DCHECK_OK(registry->AddFunction(std::move(func))); } { @@ -2848,15 +2921,7 @@ void RegisterScalarIfElse(FunctionRegistry* registry) { for (const auto& ty : BaseBinaryTypes()) { AddCoalesceKernel(func, ty, GenerateTypeAgnosticVarBinaryBase(ty)); } - AddCoalesceKernel(func, Type::FIXED_SIZE_LIST, - CoalesceFunctor::Exec); - AddCoalesceKernel(func, Type::LIST, CoalesceFunctor::Exec); - AddCoalesceKernel(func, Type::LARGE_LIST, CoalesceFunctor::Exec); - AddCoalesceKernel(func, Type::MAP, CoalesceFunctor::Exec); - AddCoalesceKernel(func, Type::STRUCT, CoalesceFunctor::Exec); - AddCoalesceKernel(func, Type::DENSE_UNION, CoalesceFunctor::Exec); - AddCoalesceKernel(func, Type::SPARSE_UNION, CoalesceFunctor::Exec); - AddCoalesceKernel(func, Type::DICTIONARY, CoalesceFunctor::Exec); + AddNestedCoalesceKernels(func); DCHECK_OK(registry->AddFunction(std::move(func))); } { diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc index 58bc560f52842..5988908853d50 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc @@ -284,8 +284,11 @@ static void CaseWhenBench(benchmark::State& state) { state.SetItemsProcessed(state.iterations() * (len - offset)); } -static void CaseWhenBenchList(benchmark::State& state) { - auto type = list(int64()); +template +static void CaseWhenBenchList(benchmark::State& state, + const std::shared_ptr& type) { + using ArrayType = typename TypeTraits::ArrayType; + auto fld = field("", type); int64_t len = state.range(0); @@ -295,17 +298,17 @@ static void CaseWhenBenchList(benchmark::State& state) { auto cond_field = field("cond", boolean(), key_value_metadata({{"null_probability", "0.01"}})); - auto cond = rand.ArrayOf(*field("", struct_({cond_field, cond_field, cond_field}), - key_value_metadata({{"null_probability", "0.0"}})), - len); - auto val1 = rand.ArrayOf(*fld, len); - auto val2 = rand.ArrayOf(*fld, len); - auto val3 = rand.ArrayOf(*fld, len); - auto val4 = rand.ArrayOf(*fld, len); + auto cond = std::static_pointer_cast( + rand.ArrayOf(*field("", struct_({cond_field, cond_field, cond_field}), + key_value_metadata({{"null_probability", "0.0"}})), + len)) + ->Slice(offset); + auto val1 = std::static_pointer_cast(rand.ArrayOf(*fld, len))->Slice(offset); + auto val2 = std::static_pointer_cast(rand.ArrayOf(*fld, len))->Slice(offset); + auto val3 = std::static_pointer_cast(rand.ArrayOf(*fld, len))->Slice(offset); + auto val4 = std::static_pointer_cast(rand.ArrayOf(*fld, len))->Slice(offset); for (auto _ : state) { - ABORT_NOT_OK( - CaseWhen(cond->Slice(offset), {val1->Slice(offset), val2->Slice(offset), - val3->Slice(offset), val4->Slice(offset)})); + ABORT_NOT_OK(CaseWhen(cond, {val1, val2, val3, val4})); } // Set bytes processed to ~length of output @@ -372,6 +375,21 @@ static void CaseWhenBenchStringContiguous(benchmark::State& state) { return CaseWhenBenchContiguous(state); } +template +static void CaseWhenBenchVarLengthListLike(benchmark::State& state) { + auto value_type = TypeTraits::type_singleton(); + auto list_type = std::make_shared(value_type); + return CaseWhenBenchList(state, list_type); +} + +static void CaseWhenBenchListInt64(benchmark::State& state) { + return CaseWhenBenchVarLengthListLike(state); +} + +static void CaseWhenBenchListViewInt64(benchmark::State& state) { + CaseWhenBenchVarLengthListLike(state); +} + struct CoalesceParams { int64_t length; int64_t num_arguments; @@ -533,9 +551,11 @@ BENCHMARK(CaseWhenBench64)->Args({kNumItems, 99}); BENCHMARK(CaseWhenBench64Contiguous)->Args({kNumItems, 0}); BENCHMARK(CaseWhenBench64Contiguous)->Args({kNumItems, 99}); -// CaseWhen: Lists -BENCHMARK(CaseWhenBenchList)->Args({kFewItems, 0}); -BENCHMARK(CaseWhenBenchList)->Args({kFewItems, 99}); +// CaseWhen: List-like types +BENCHMARK(CaseWhenBenchListInt64)->Args({kFewItems, 0}); +BENCHMARK(CaseWhenBenchListInt64)->Args({kFewItems, 99}); +BENCHMARK(CaseWhenBenchListViewInt64)->Args({kFewItems, 0}); +BENCHMARK(CaseWhenBenchListViewInt64)->Args({kFewItems, 99}); // CaseWhen: Strings BENCHMARK(CaseWhenBenchString)->Args({kFewItems, 0}); diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc index c4c46b5efe84d..9a0ca325277dc 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else_test.cc @@ -896,6 +896,21 @@ TEST_F(TestIfElseKernel, ParameterizedTypes) { {cond, ArrayFromJSON(type0, "[0]"), ArrayFromJSON(type1, "[1]")})); } +TEST_F(TestIfElseKernel, MapNested) { + auto type = map(int64(), utf8()); + CheckWithDifferentShapes( + ArrayFromJSON(boolean(), "[true, true, false, false]"), + ArrayFromJSON(type, R"([null, [[2, "foo"], [4, null]], [[3, "test"]], []])"), + ArrayFromJSON(type, R"([[[1, "b"]], [[2, "c"]], [[7, "abc"]], null])"), + ArrayFromJSON(type, R"([null, [[2, "foo"], [4, null]], [[7, "abc"]], null])")); + + CheckWithDifferentShapes( + ArrayFromJSON(boolean(), "[null, null, null, null]"), + ArrayFromJSON(type, R"([null, [[1, "c"]], [[4, null]], [[6, "ok"]]])"), + ArrayFromJSON(type, R"([[[-1, null]], [[3, "c"]], null, [[6, "ok"]]])"), + ArrayFromJSON(type, R"([null, null, null, null])")); +} + template class TestIfElseUnion : public ::testing::Test {}; @@ -1920,7 +1935,7 @@ TYPED_TEST(TestCaseWhenBinary, Random) { template class TestCaseWhenList : public ::testing::Test {}; -TYPED_TEST_SUITE(TestCaseWhenList, ListArrowTypes); +TYPED_TEST_SUITE(TestCaseWhenList, ListAndListViewArrowTypes); TYPED_TEST(TestCaseWhenList, ListOfString) { auto type = std::make_shared(utf8()); @@ -2555,7 +2570,7 @@ class TestCoalesceList : public ::testing::Test {}; TYPED_TEST_SUITE(TestCoalesceNumeric, IfElseNumericBasedTypes); TYPED_TEST_SUITE(TestCoalesceBinary, BaseBinaryArrowTypes); -TYPED_TEST_SUITE(TestCoalesceList, ListArrowTypes); +TYPED_TEST_SUITE(TestCoalesceList, ListAndListViewArrowTypes); TYPED_TEST(TestCoalesceNumeric, Basics) { auto type = default_type_instance(); From 6b278be178975fe7174b961a3bf33502acb79295 Mon Sep 17 00:00:00 2001 From: Laurent Goujon Date: Tue, 30 Apr 2024 16:50:14 -0700 Subject: [PATCH 031/261] GH-41471: [Java] Fix performance uber-jar (#41473) ### Rationale for this change Performance `benchmarks.jar` uber-jar is mostly empty and is missing critical metadata information which would allow someone to run performance benchmarks using a simple commandline like: ``` $ java -cp performance/target/benchmarks.jar ArrowBufBenchmarks ``` ### What changes are included in this PR? Move benchmark classes from `src/test/java` to `src/main/java` and change the dependencies' scope as well so that `maven-shade-plugin` can actually pick up the classes to package. Also add missing jmh annotation generator to `maven-compiler-plugin` so that JMH metadata can be generated ### Are these changes tested? Local testing only. ### Are there any user-facing changes? I didn't find any user-facing documentation regarding JMH benchmarks. If there are some, it may be helpful to include a simplified command line * GitHub Issue: #41471 Authored-by: Laurent Goujon Signed-off-by: David Li --- java/performance/pom.xml | 30 +++++++++---------- .../arrow/adapter/AvroAdapterBenchmarks.java | 0 .../adapter/jdbc/JdbcAdapterBenchmarks.java | 2 ++ .../search/ParallelSearcherBenchmarks.java | 2 ++ .../arrow/memory/AllocatorBenchmarks.java | 0 .../arrow/memory/ArrowBufBenchmarks.java | 0 .../util/ArrowBufPointerBenchmarks.java | 0 .../util/ByteFunctionHelpersBenchmarks.java | 3 +- .../vector/BaseValueVectorBenchmarks.java | 0 .../vector/BitVectorHelperBenchmarks.java | 2 ++ .../arrow/vector/DecimalVectorBenchmarks.java | 0 .../apache/arrow/vector/Float8Benchmarks.java | 2 ++ .../arrow/vector/FloatingPointBenchmarks.java | 2 ++ .../apache/arrow/vector/IntBenchmarks.java | 2 ++ .../arrow/vector/VarCharBenchmarks.java | 2 ++ .../vector/VariableWidthVectorBenchmarks.java | 2 ++ .../arrow/vector/VectorLoaderBenchmark.java | 2 ++ .../arrow/vector/VectorUnloaderBenchmark.java | 2 ++ .../DictionaryEncoderBenchmarks.java | 0 .../vector/ipc/WriteChannelBenchmark.java | 2 ++ .../message/ArrowRecordBatchBenchmarks.java | 2 ++ .../vector/util/TransferPairBenchmarks.java | 2 ++ 22 files changed, 43 insertions(+), 16 deletions(-) rename java/performance/src/{test => main}/java/org/apache/arrow/adapter/AvroAdapterBenchmarks.java (100%) rename java/performance/src/{test => main}/java/org/apache/arrow/adapter/jdbc/JdbcAdapterBenchmarks.java (99%) rename java/performance/src/{test => main}/java/org/apache/arrow/algorithm/search/ParallelSearcherBenchmarks.java (97%) rename java/performance/src/{test => main}/java/org/apache/arrow/memory/AllocatorBenchmarks.java (100%) rename java/performance/src/{test => main}/java/org/apache/arrow/memory/ArrowBufBenchmarks.java (100%) rename java/performance/src/{test => main}/java/org/apache/arrow/memory/util/ArrowBufPointerBenchmarks.java (100%) rename java/performance/src/{test => main}/java/org/apache/arrow/memory/util/ByteFunctionHelpersBenchmarks.java (98%) rename java/performance/src/{test => main}/java/org/apache/arrow/vector/BaseValueVectorBenchmarks.java (100%) rename java/performance/src/{test => main}/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java (98%) rename java/performance/src/{test => main}/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java (100%) rename java/performance/src/{test => main}/java/org/apache/arrow/vector/Float8Benchmarks.java (97%) rename java/performance/src/{test => main}/java/org/apache/arrow/vector/FloatingPointBenchmarks.java (98%) rename java/performance/src/{test => main}/java/org/apache/arrow/vector/IntBenchmarks.java (97%) rename java/performance/src/{test => main}/java/org/apache/arrow/vector/VarCharBenchmarks.java (97%) rename java/performance/src/{test => main}/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java (97%) rename java/performance/src/{test => main}/java/org/apache/arrow/vector/VectorLoaderBenchmark.java (97%) rename java/performance/src/{test => main}/java/org/apache/arrow/vector/VectorUnloaderBenchmark.java (97%) rename java/performance/src/{test => main}/java/org/apache/arrow/vector/dictionary/DictionaryEncoderBenchmarks.java (100%) rename java/performance/src/{test => main}/java/org/apache/arrow/vector/ipc/WriteChannelBenchmark.java (97%) rename java/performance/src/{test => main}/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatchBenchmarks.java (97%) rename java/performance/src/{test => main}/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java (97%) diff --git a/java/performance/pom.xml b/java/performance/pom.xml index c819e6393d78f..e9023ece080a3 100644 --- a/java/performance/pom.xml +++ b/java/performance/pom.xml @@ -40,61 +40,61 @@ org.openjdk.jmh jmh-core ${jmh.version} - test - - - org.openjdk.jmh - jmh-generator-annprocess - ${jmh.version} - provided org.apache.arrow arrow-vector ${arrow.vector.classifier} - test org.apache.arrow arrow-memory-core - test org.apache.arrow arrow-memory-netty - test + runtime org.apache.avro avro ${dep.avro.version} - test org.apache.arrow arrow-avro - test com.h2database h2 2.2.224 - test + runtime org.apache.arrow arrow-jdbc - test org.apache.arrow arrow-algorithm - test + + org.apache.maven.plugins + maven-compiler-plugin + + + + org.openjdk.jmh + jmh-generator-annprocess + ${jmh.version} + + + + org.apache.maven.plugins maven-shade-plugin diff --git a/java/performance/src/test/java/org/apache/arrow/adapter/AvroAdapterBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/adapter/AvroAdapterBenchmarks.java similarity index 100% rename from java/performance/src/test/java/org/apache/arrow/adapter/AvroAdapterBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/adapter/AvroAdapterBenchmarks.java diff --git a/java/performance/src/test/java/org/apache/arrow/adapter/jdbc/JdbcAdapterBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/adapter/jdbc/JdbcAdapterBenchmarks.java similarity index 99% rename from java/performance/src/test/java/org/apache/arrow/adapter/jdbc/JdbcAdapterBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/adapter/jdbc/JdbcAdapterBenchmarks.java index fd3940b4c872c..f6dab83b7cd0c 100644 --- a/java/performance/src/test/java/org/apache/arrow/adapter/jdbc/JdbcAdapterBenchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/adapter/jdbc/JdbcAdapterBenchmarks.java @@ -54,6 +54,7 @@ * Benchmarks for Jdbc adapter. */ public class JdbcAdapterBenchmarks { + // checkstyle:off: MissingJavadocMethod private static final int VALUE_COUNT = 3000; @@ -355,5 +356,6 @@ public static void main(String[] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/performance/src/test/java/org/apache/arrow/algorithm/search/ParallelSearcherBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcherBenchmarks.java similarity index 97% rename from java/performance/src/test/java/org/apache/arrow/algorithm/search/ParallelSearcherBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcherBenchmarks.java index 1c3af77e73a05..c9fc5cc4bef9c 100644 --- a/java/performance/src/test/java/org/apache/arrow/algorithm/search/ParallelSearcherBenchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcherBenchmarks.java @@ -43,6 +43,7 @@ * Benchmarks for {@link ParallelSearcher}. */ public class ParallelSearcherBenchmarks { + // checkstyle:off: MissingJavadocMethod private static final int VECTOR_LENGTH = 1024 * 1024; @@ -112,4 +113,5 @@ public static void main(String[] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/performance/src/test/java/org/apache/arrow/memory/AllocatorBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/memory/AllocatorBenchmarks.java similarity index 100% rename from java/performance/src/test/java/org/apache/arrow/memory/AllocatorBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/memory/AllocatorBenchmarks.java diff --git a/java/performance/src/test/java/org/apache/arrow/memory/ArrowBufBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/memory/ArrowBufBenchmarks.java similarity index 100% rename from java/performance/src/test/java/org/apache/arrow/memory/ArrowBufBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/memory/ArrowBufBenchmarks.java diff --git a/java/performance/src/test/java/org/apache/arrow/memory/util/ArrowBufPointerBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/memory/util/ArrowBufPointerBenchmarks.java similarity index 100% rename from java/performance/src/test/java/org/apache/arrow/memory/util/ArrowBufPointerBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/memory/util/ArrowBufPointerBenchmarks.java diff --git a/java/performance/src/test/java/org/apache/arrow/memory/util/ByteFunctionHelpersBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpersBenchmarks.java similarity index 98% rename from java/performance/src/test/java/org/apache/arrow/memory/util/ByteFunctionHelpersBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpersBenchmarks.java index 4d0dfcb5da80d..f1dc2d79eff83 100644 --- a/java/performance/src/test/java/org/apache/arrow/memory/util/ByteFunctionHelpersBenchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/memory/util/ByteFunctionHelpersBenchmarks.java @@ -48,7 +48,7 @@ public class ByteFunctionHelpersBenchmarks { */ @State(Scope.Benchmark) public static class ArrowEqualState { - + // checkstyle:off: MissingJavadocMethod private static final int BUFFER_CAPACITY = 7; private BufferAllocator allocator; @@ -135,4 +135,5 @@ public static void main(String[] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/performance/src/test/java/org/apache/arrow/vector/BaseValueVectorBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/BaseValueVectorBenchmarks.java similarity index 100% rename from java/performance/src/test/java/org/apache/arrow/vector/BaseValueVectorBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/vector/BaseValueVectorBenchmarks.java diff --git a/java/performance/src/test/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java similarity index 98% rename from java/performance/src/test/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java index 5f6e5ca28fbab..e29b889c6e7a8 100644 --- a/java/performance/src/test/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/vector/BitVectorHelperBenchmarks.java @@ -41,6 +41,7 @@ * Benchmarks for {@link BitVectorHelper}. */ public class BitVectorHelperBenchmarks { + // checkstyle:off: MissingJavadocMethod /** * State object for general benchmarks. @@ -226,4 +227,5 @@ public static void main(String [] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/performance/src/test/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java similarity index 100% rename from java/performance/src/test/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/vector/DecimalVectorBenchmarks.java diff --git a/java/performance/src/test/java/org/apache/arrow/vector/Float8Benchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/Float8Benchmarks.java similarity index 97% rename from java/performance/src/test/java/org/apache/arrow/vector/Float8Benchmarks.java rename to java/performance/src/main/java/org/apache/arrow/vector/Float8Benchmarks.java index 874e0d9f82ee7..36a633e5e1b6e 100644 --- a/java/performance/src/test/java/org/apache/arrow/vector/Float8Benchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/vector/Float8Benchmarks.java @@ -40,6 +40,7 @@ */ @State(Scope.Benchmark) public class Float8Benchmarks { + // checkstyle:off: MissingJavadocMethod private static final int VECTOR_LENGTH = 1024; @@ -119,4 +120,5 @@ public static void main(String [] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/performance/src/test/java/org/apache/arrow/vector/FloatingPointBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/FloatingPointBenchmarks.java similarity index 98% rename from java/performance/src/test/java/org/apache/arrow/vector/FloatingPointBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/vector/FloatingPointBenchmarks.java index 079672e9f2a98..2938591737f06 100644 --- a/java/performance/src/test/java/org/apache/arrow/vector/FloatingPointBenchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/vector/FloatingPointBenchmarks.java @@ -41,6 +41,7 @@ */ @State(Scope.Benchmark) public class FloatingPointBenchmarks { + // checkstyle:off: MissingJavadocMethod private static final int VECTOR_LENGTH = 1024; @@ -130,5 +131,6 @@ public static void main(String [] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/performance/src/test/java/org/apache/arrow/vector/IntBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/IntBenchmarks.java similarity index 97% rename from java/performance/src/test/java/org/apache/arrow/vector/IntBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/vector/IntBenchmarks.java index 036768d445e55..99674058970a6 100644 --- a/java/performance/src/test/java/org/apache/arrow/vector/IntBenchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/vector/IntBenchmarks.java @@ -41,6 +41,7 @@ */ @State(Scope.Benchmark) public class IntBenchmarks { + // checkstyle:off: MissingJavadocMethod private static final int VECTOR_LENGTH = 1024; @@ -107,4 +108,5 @@ public static void main(String [] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/performance/src/test/java/org/apache/arrow/vector/VarCharBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/VarCharBenchmarks.java similarity index 97% rename from java/performance/src/test/java/org/apache/arrow/vector/VarCharBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/vector/VarCharBenchmarks.java index 1ab4b7bc20dad..a7ce4e04fee87 100644 --- a/java/performance/src/test/java/org/apache/arrow/vector/VarCharBenchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/vector/VarCharBenchmarks.java @@ -39,6 +39,7 @@ */ @State(Scope.Benchmark) public class VarCharBenchmarks { + // checkstyle:off: MissingJavadocMethod private static final int VECTOR_LENGTH = 1024; @@ -99,4 +100,5 @@ public static void main(String [] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/performance/src/test/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java similarity index 97% rename from java/performance/src/test/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java index 7eee981f13327..62c54606e6da6 100644 --- a/java/performance/src/test/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/vector/VariableWidthVectorBenchmarks.java @@ -41,6 +41,7 @@ */ @State(Scope.Benchmark) public class VariableWidthVectorBenchmarks { + // checkstyle:off: MissingJavadocMethod private static final int VECTOR_CAPACITY = 16 * 1024; @@ -127,4 +128,5 @@ public static void main(String [] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/performance/src/test/java/org/apache/arrow/vector/VectorLoaderBenchmark.java b/java/performance/src/main/java/org/apache/arrow/vector/VectorLoaderBenchmark.java similarity index 97% rename from java/performance/src/test/java/org/apache/arrow/vector/VectorLoaderBenchmark.java rename to java/performance/src/main/java/org/apache/arrow/vector/VectorLoaderBenchmark.java index 416d126419e56..e8e8c0cfbc1f3 100644 --- a/java/performance/src/test/java/org/apache/arrow/vector/VectorLoaderBenchmark.java +++ b/java/performance/src/main/java/org/apache/arrow/vector/VectorLoaderBenchmark.java @@ -40,6 +40,7 @@ * Benchmarks for {@link VectorLoader}. */ public class VectorLoaderBenchmark { + // checkstyle:off: MissingJavadocMethod private static final int ALLOCATOR_CAPACITY = 1024 * 1024; @@ -114,4 +115,5 @@ public static void main(String[] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/performance/src/test/java/org/apache/arrow/vector/VectorUnloaderBenchmark.java b/java/performance/src/main/java/org/apache/arrow/vector/VectorUnloaderBenchmark.java similarity index 97% rename from java/performance/src/test/java/org/apache/arrow/vector/VectorUnloaderBenchmark.java rename to java/performance/src/main/java/org/apache/arrow/vector/VectorUnloaderBenchmark.java index d125172450004..b464f888fa85f 100644 --- a/java/performance/src/test/java/org/apache/arrow/vector/VectorUnloaderBenchmark.java +++ b/java/performance/src/main/java/org/apache/arrow/vector/VectorUnloaderBenchmark.java @@ -41,6 +41,7 @@ */ @State(Scope.Benchmark) public class VectorUnloaderBenchmark { + // checkstyle:off: MissingJavadocMethod private static final int ALLOCATOR_CAPACITY = 1024 * 1024; @@ -106,4 +107,5 @@ public static void main(String[] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/performance/src/test/java/org/apache/arrow/vector/dictionary/DictionaryEncoderBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoderBenchmarks.java similarity index 100% rename from java/performance/src/test/java/org/apache/arrow/vector/dictionary/DictionaryEncoderBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/vector/dictionary/DictionaryEncoderBenchmarks.java diff --git a/java/performance/src/test/java/org/apache/arrow/vector/ipc/WriteChannelBenchmark.java b/java/performance/src/main/java/org/apache/arrow/vector/ipc/WriteChannelBenchmark.java similarity index 97% rename from java/performance/src/test/java/org/apache/arrow/vector/ipc/WriteChannelBenchmark.java rename to java/performance/src/main/java/org/apache/arrow/vector/ipc/WriteChannelBenchmark.java index 7a2537cbb8820..18efff11db9ff 100644 --- a/java/performance/src/test/java/org/apache/arrow/vector/ipc/WriteChannelBenchmark.java +++ b/java/performance/src/main/java/org/apache/arrow/vector/ipc/WriteChannelBenchmark.java @@ -41,6 +41,7 @@ * Benchmarks for {@link WriteChannel}. */ public class WriteChannelBenchmark { + // checkstyle:off: MissingJavadocMethod /** * State object for align benchmark. @@ -84,4 +85,5 @@ public static void main(String[] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/performance/src/test/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatchBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatchBenchmarks.java similarity index 97% rename from java/performance/src/test/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatchBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatchBenchmarks.java index c0882821e9cc4..b608bb4c1c590 100644 --- a/java/performance/src/test/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatchBenchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatchBenchmarks.java @@ -42,6 +42,7 @@ */ @State(Scope.Benchmark) public class ArrowRecordBatchBenchmarks { + // checkstyle:off: MissingJavadocMethod private static final int VECTOR_CAPACITY = 16 * 1024; @@ -95,4 +96,5 @@ public static void main(String [] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } diff --git a/java/performance/src/test/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java b/java/performance/src/main/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java similarity index 97% rename from java/performance/src/test/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java rename to java/performance/src/main/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java index 5142f4bdb8d0d..486862859f122 100644 --- a/java/performance/src/test/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java +++ b/java/performance/src/main/java/org/apache/arrow/vector/util/TransferPairBenchmarks.java @@ -42,6 +42,7 @@ */ @State(Scope.Benchmark) public class TransferPairBenchmarks { + // checkstyle:off: MissingJavadocMethod private static final int VECTOR_LENGTH = 1024; @@ -120,4 +121,5 @@ public static void main(String [] args) throws RunnerException { new Runner(opt).run(); } + // checkstyle:on: MissingJavadocMethod } From 0f7e9af43796a81d126c59ee1342c6dbf8efaf08 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Tue, 30 Apr 2024 17:27:26 -0800 Subject: [PATCH 032/261] GH-39990: [Docs][CI] Add sphinx-lint for docs linting (#40022) ### What changes are included in this PR? This adds developer tooling to the repo for linting the docs by adding the sphinx-lint tool to archery and our pre-commit hooks. In both locations, only two rules are enabled at the moment (Discussed in https://github.com/apache/arrow/pull/40006): `trailing-whitespace` and `missing-final-newline`. This PR also fixes the individual issues covered by the new tooling. ### Are these changes tested? Yes, though manually. I tested this works by running `archery lint --docs` and `pre-commit` without and without changes that should get caught by the rules. It works as expected. ### Are there any user-facing changes? Yes, 1. Developers that use pre-commit hooks will see a change in behavior when they modify docs 2. Developers using archery will see a new --docs option in `archery lint` 3. Developers working on the docs may see CI failures related to the new checks * Closes: #39990 * GitHub Issue: #39990 Authored-by: Bryce Mecum Signed-off-by: Bryce Mecum --- .pre-commit-config.yaml | 6 ++ ci/conda_env_sphinx.txt | 1 + dev/archery/archery/cli.py | 6 +- dev/archery/archery/utils/lint.py | 52 ++++++++++++- dev/archery/setup.py | 2 +- docs/requirements.txt | 1 + docs/source/cpp/acero/developer_guide.rst | 6 +- docs/source/cpp/acero/overview.rst | 8 +- docs/source/cpp/acero/substrait.rst | 46 ++++++------ docs/source/cpp/acero/user_guide.rst | 74 +++++++++---------- docs/source/cpp/api/scalar.rst | 2 +- docs/source/cpp/build_system.rst | 2 +- docs/source/cpp/compute.rst | 56 +++++++------- docs/source/cpp/dataset.rst | 22 +++--- docs/source/cpp/datatypes.rst | 14 ++-- .../examples/compute_and_write_example.rst | 6 +- docs/source/cpp/flight.rst | 4 +- docs/source/cpp/gandiva.rst | 26 +++---- .../cpp/gandiva/expr_projector_filter.rst | 26 +++---- docs/source/cpp/gandiva/external_func.rst | 14 ++-- docs/source/cpp/getting_started.rst | 12 ++- docs/source/cpp/memory.rst | 30 ++++---- docs/source/cpp/parquet.rst | 34 ++++----- docs/source/cpp/tables.rst | 6 +- docs/source/cpp/threading.rst | 2 +- .../source/cpp/tutorials/compute_tutorial.rst | 12 +-- .../cpp/tutorials/datasets_tutorial.rst | 30 ++++---- docs/source/cpp/tutorials/io_tutorial.rst | 20 ++--- .../continuous_integration/index.rst | 2 +- docs/source/developers/cpp/building.rst | 10 +-- docs/source/developers/cpp/windows.rst | 4 +- .../guide/architectural_overview.rst | 4 +- .../source/developers/guide/communication.rst | 4 +- .../source/developers/guide/documentation.rst | 3 +- docs/source/developers/guide/index.rst | 10 +-- docs/source/developers/guide/resources.rst | 2 +- .../guide/step_by_step/finding_issues.rst | 2 +- .../developers/guide/step_by_step/set_up.rst | 2 +- .../developers/guide/step_by_step/styling.rst | 2 +- .../developers/guide/tutorials/index.rst | 2 +- .../guide/tutorials/python_tutorial.rst | 36 ++++----- docs/source/developers/java/building.rst | 2 +- docs/source/developers/overview.rst | 3 +- docs/source/developers/release.rst | 18 ++--- .../developers/release_verification.rst | 2 +- docs/source/developers/reviewing.rst | 6 +- .../CDataInterface/PyCapsuleInterface.rst | 26 +++---- docs/source/format/Glossary.rst | 2 +- docs/source/format/Integration.rst | 8 +- docs/source/java/algorithm.rst | 28 +++---- docs/source/java/flight.rst | 2 +- docs/source/java/flight_sql_jdbc_driver.rst | 4 +- docs/source/java/memory.rst | 40 +++++----- docs/source/java/quickstartguide.rst | 2 +- docs/source/java/vector.rst | 4 +- docs/source/python/api/compute.rst | 8 +- docs/source/python/api/substrait.rst | 2 +- docs/source/python/compute.rst | 16 ++-- docs/source/python/dataset.rst | 54 +++++++------- docs/source/python/dlpack.rst | 2 +- docs/source/python/filesystems.rst | 4 +- docs/source/python/getstarted.rst | 16 ++-- docs/source/python/getting_involved.rst | 4 +- docs/source/python/integration/python_r.rst | 24 +++--- docs/source/python/ipc.rst | 10 +-- docs/source/python/json.rst | 2 +- docs/source/python/orc.rst | 2 +- docs/source/python/parquet.rst | 2 +- docs/source/python/timestamps.rst | 26 +++---- 69 files changed, 488 insertions(+), 434 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2e598e0a95064..bf5ca08d53c32 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -132,3 +132,9 @@ repos: ?^cpp/cmake_modules/UseCython\.cmake$| ?^cpp/src/arrow/util/config\.h\.cmake$| ) + - repo: https://github.com/sphinx-contrib/sphinx-lint + rev: v0.9.1 + hooks: + - id: sphinx-lint + files: ^docs/ + args: ['--disable', 'all', '--enable', 'trailing-whitespace,missing-final-newline', 'docs'] diff --git a/ci/conda_env_sphinx.txt b/ci/conda_env_sphinx.txt index 0a356d5722c42..83afa69a653a9 100644 --- a/ci/conda_env_sphinx.txt +++ b/ci/conda_env_sphinx.txt @@ -26,6 +26,7 @@ pydata-sphinx-theme=0.14 sphinx-autobuild sphinx-design sphinx-copybutton +sphinx-lint sphinxcontrib-jquery sphinx==6.2 # Requirement for doctest-cython diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index 5fa41e28a3208..8a26d9266f22d 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -261,6 +261,7 @@ def build(ctx, src, build_dir, force, targets, **kwargs): "Check all sources files for license texts via Apache RAT."), LintCheck('r', "Lint R files."), LintCheck('docker', "Lint Dockerfiles with hadolint."), + LintCheck('docs', "Lint docs with sphinx-lint."), ] @@ -285,9 +286,10 @@ def decorate_lint_command(cmd): help="Run IWYU on all C++ files if enabled") @click.option("-a", "--all", is_flag=True, default=False, help="Enable all checks.") +@click.argument("path", required=False) @decorate_lint_command @click.pass_context -def lint(ctx, src, fix, iwyu_all, **checks): +def lint(ctx, src, fix, iwyu_all, path, **checks): if checks.pop('all'): # "--all" is given => enable all non-selected checks for k, v in checks.items(): @@ -297,7 +299,7 @@ def lint(ctx, src, fix, iwyu_all, **checks): raise click.UsageError( "Need to enable at least one lint check (try --help)") try: - linter(src, fix, iwyu_all=iwyu_all, **checks) + linter(src, fix, iwyu_all=iwyu_all, path=path, **checks) except LintValidationException: sys.exit(1) diff --git a/dev/archery/archery/utils/lint.py b/dev/archery/archery/utils/lint.py index 15f22ca2e6e5c..108c9ded361e7 100644 --- a/dev/archery/archery/utils/lint.py +++ b/dev/archery/archery/utils/lint.py @@ -436,10 +436,55 @@ def docker_linter(src): cwd=src.path)) -def linter(src, fix=False, *, clang_format=False, cpplint=False, +class SphinxLint(Command): + def __init__(self, src, path=None, sphinx_lint_bin=None, disable=None, enable=None): + self.src = src + self.path = path + self.bin = default_bin(sphinx_lint_bin, "sphinx-lint") + self.disable = disable or "all" + self.enable = enable + + def lint(self, *args, check=False): + docs_path = os.path.join(self.src.path, "docs") + + args = [] + + if self.disable: + args.extend(["--disable", self.disable]) + + if self.enable: + args.extend(["--enable", self.enable]) + + if self.path is not None: + args.extend([self.path]) + else: + args.extend([docs_path]) + + return self.run(*args, check=check) + + +def docs_linter(src, path=None): + """Run sphinx-lint on docs.""" + logger.info("Running docs linter (sphinx-lint)") + + sphinx_lint = SphinxLint( + src, + path=path, + disable="all", + enable="trailing-whitespace,missing-final-newline" + ) + + if not sphinx_lint.available: + logger.error("sphinx-lint linter requested but sphinx-lint binary not found") + return + + yield LintResult.from_cmd(sphinx_lint.lint()) + + +def linter(src, fix=False, path=None, *, clang_format=False, cpplint=False, clang_tidy=False, iwyu=False, iwyu_all=False, python=False, numpydoc=False, cmake_format=False, rat=False, - r=False, docker=False): + r=False, docker=False, docs=False): """Run all linters.""" with tmpdir(prefix="arrow-lint-") as root: build_dir = os.path.join(root, "cpp-build") @@ -481,6 +526,9 @@ def linter(src, fix=False, *, clang_format=False, cpplint=False, if docker: results.extend(docker_linter(src)) + if docs: + results.extend(docs_linter(src, path)) + # Raise error if one linter failed, ensuring calling code can exit with # non-zero. for result in results: diff --git a/dev/archery/setup.py b/dev/archery/setup.py index 23a1600910d04..cd3e2e9ca0834 100755 --- a/dev/archery/setup.py +++ b/dev/archery/setup.py @@ -41,7 +41,7 @@ 'integration': ['cffi'], 'integration-java': ['jpype1'], 'lint': ['numpydoc==1.1.0', 'autopep8', 'flake8==6.1.0', 'cython-lint', - 'cmake_format==0.6.13'], + 'cmake_format==0.6.13', 'sphinx-lint==0.9.1'], 'numpydoc': ['numpydoc==1.1.0'], 'release': ['pygithub', jinja_req, 'jira', 'semver', 'gitpython'], } diff --git a/docs/requirements.txt b/docs/requirements.txt index 252344a74a58f..8891680814dff 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -10,5 +10,6 @@ pydata-sphinx-theme~=0.14 sphinx-autobuild sphinx-design sphinx-copybutton +sphinx-lint sphinx==6.2 pandas diff --git a/docs/source/cpp/acero/developer_guide.rst b/docs/source/cpp/acero/developer_guide.rst index 331cd833b58af..80ca68556fc40 100644 --- a/docs/source/cpp/acero/developer_guide.rst +++ b/docs/source/cpp/acero/developer_guide.rst @@ -187,7 +187,7 @@ Examples task (described below) as completed which allows the plan to finish. * The ``fetch`` node, in ``InputReceived``, may decide that it has all the data it needs. It can then call ``StopProducing`` on its input. - + Initialization / Construction / Destruction ------------------------------------------- @@ -271,7 +271,7 @@ distributed systems. Once that has been done then it should be possible to do a meaning exchanging between multiple exec plan instances on a single system) if desired. .. figure:: dist_plan.svg - + A distributed plan can provide parallelism even if the plans themselves run serially Pipeline Parallelism @@ -472,7 +472,7 @@ Benchmarking The most complete macro benchmarking for Acero is provided by https://github.com/voltrondata-labs/arrowbench These include a set of TPC-H benchmarks, executed from the R-dplyr integration, which are run on every Arrow commit and -reported to Conbench at https://conbench.ursa.dev/ +reported to Conbench at https://conbench.ursa.dev/ In addition to these TPC-H benchmarks there are a number of micro-benchmarks for various nodes (hash-join, asof-join, etc.) Finally, the compute functions themselves should mostly have micro-benchmarks. For more on micro benchmarks you diff --git a/docs/source/cpp/acero/overview.rst b/docs/source/cpp/acero/overview.rst index c569f82b099b6..8be4cbc1b1772 100644 --- a/docs/source/cpp/acero/overview.rst +++ b/docs/source/cpp/acero/overview.rst @@ -206,7 +206,7 @@ is very similar to a RecordBatch. It can have zero or more columns and all of t must have the same length. There are a few key differences from ExecBatch: .. figure:: rb_vs_eb.svg - + Both the record batch and the exec batch have strong ownership of the arrays & buffers * An `ExecBatch` does not have a schema. This is because an `ExecBatch` is assumed to be @@ -217,7 +217,7 @@ must have the same length. There are a few key differences from ExecBatch: also has a length property which describes how many rows are in a batch. So another way to view a `Scalar` is a constant array with `length` elements. * An `ExecBatch` contains additional information used by the exec plan. For example, an - `index` can be used to describe a batch's position in an ordered stream. We expect + `index` can be used to describe a batch's position in an ordered stream. We expect that `ExecBatch` will also evolve to contain additional fields such as a selection vector. .. figure:: scalar_vs_array.svg @@ -266,5 +266,5 @@ various query representations (e.g. Substrait). The Declaration objects are the with the DeclarationToXyz methods, are the current public API for Acero. .. figure:: decl_vs_ep.svg - - A declaration is a blueprint that is used to instantiate exec plan instances \ No newline at end of file + + A declaration is a blueprint that is used to instantiate exec plan instances diff --git a/docs/source/cpp/acero/substrait.rst b/docs/source/cpp/acero/substrait.rst index 797b2407f93cd..a5532733627c1 100644 --- a/docs/source/cpp/acero/substrait.rst +++ b/docs/source/cpp/acero/substrait.rst @@ -111,7 +111,7 @@ Aggregate Relations * Each measure's arguments must be direct references. * A measure may not have a filter * A measure may not have sorts -* A measure's invocation must be AGGREGATION_INVOCATION_ALL or +* A measure's invocation must be AGGREGATION_INVOCATION_ALL or AGGREGATION_INVOCATION_UNSPECIFIED * A measure's phase must be AGGREGATION_PHASE_INITIAL_TO_RESULT @@ -146,73 +146,73 @@ Types - Caveat * - boolean - boolean - - + - * - i8 - int8 - - + - * - i16 - int16 - - + - * - i32 - int32 - - + - * - i64 - int64 - - + - * - fp32 - float32 - - + - * - fp64 - float64 - - + - * - string - string - - + - * - binary - binary - - + - * - timestamp - timestamp - - + - * - timestamp_tz - timestamp - - + - * - date - date32 - - + - * - time - time64 - - + - * - interval_year - - + - - Not currently supported * - interval_day - - + - - Not currently supported * - uuid - - + - - Not currently supported * - FIXEDCHAR - - + - - Not currently supported * - VARCHAR - - + - - Not currently supported * - FIXEDBINARY - fixed_size_binary - - + - * - DECIMAL - decimal128 - - + - * - STRUCT - struct - Arrow struct fields will have no name (empty string) * - NSTRUCT - - + - - Not currently supported * - LIST - list - - + - * - MAP - map - K must not be nullable diff --git a/docs/source/cpp/acero/user_guide.rst b/docs/source/cpp/acero/user_guide.rst index eca1a0104708b..adcc17216e5ae 100644 --- a/docs/source/cpp/acero/user_guide.rst +++ b/docs/source/cpp/acero/user_guide.rst @@ -32,14 +32,14 @@ Using Acero The basic workflow for Acero is this: #. First, create a graph of :class:`Declaration` objects describing the plan - + #. Call one of the DeclarationToXyz methods to execute the Declaration. a. A new ExecPlan is created from the graph of Declarations. Each Declaration will correspond to one ExecNode in the plan. In addition, a sink node will be added, depending on which DeclarationToXyz method was used. - b. The ExecPlan is executed. Typically this happens as part of the DeclarationToXyz call but in + b. The ExecPlan is executed. Typically this happens as part of the DeclarationToXyz call but in DeclarationToReader the reader is returned before the plan is finished executing. c. Once the plan is finished it is destroyed @@ -315,7 +315,7 @@ of a specific execution node. ``source`` ---------- -A ``source`` operation can be considered as an entry point to create a streaming execution plan. +A ``source`` operation can be considered as an entry point to create a streaming execution plan. :class:`SourceNodeOptions` are used to create the ``source`` operation. The ``source`` operation is the most generic and flexible type of source currently available but it can be quite tricky to configure. First you should review the other source node types to ensure there @@ -326,7 +326,7 @@ function should take no arguments and should return an ``arrow::Future>``. This function might be reading a file, iterating through an in memory structure, or receiving data from a network connection. The arrow library refers to these functions as ``arrow::AsyncGenerator`` -and there are a number of utilities for working with these functions. For this example we use +and there are a number of utilities for working with these functions. For this example we use a vector of record batches that we've already stored in memory. In addition, the schema of the data must be known up front. Acero must know the schema of the data at each stage of the execution graph before any processing has begun. This means we must supply the @@ -368,10 +368,10 @@ Example of using ``source`` (usage of sink is explained in detail in :ref:`sink< In the previous example, :ref:`source node `, a source node was used to input the data. But when developing an application, if the data is already in memory as a table, it is much easier, and more performant to use :class:`TableSourceNodeOptions`. -Here the input data can be passed as a ``std::shared_ptr`` along with a ``max_batch_size``. +Here the input data can be passed as a ``std::shared_ptr`` along with a ``max_batch_size``. The ``max_batch_size`` is to break up large record batches so that they can be processed in parallel. It is important to note that the table batches will not get merged to form larger batches when the source -table has a smaller batch size. +table has a smaller batch size. Example of using ``table_source`` @@ -387,7 +387,7 @@ Example of using ``table_source`` ``filter`` ---------- -``filter`` operation, as the name suggests, provides an option to define data filtering +``filter`` operation, as the name suggests, provides an option to define data filtering criteria. It selects rows where the given expression evaluates to true. Filters can be written using :class:`arrow::compute::Expression`, and the expression should have a return type of boolean. For example, if we wish to keep rows where the value @@ -415,7 +415,7 @@ functions, i.e. elementwise functions that return one value for each input row independent of the value of all other rows). This is exposed via :class:`ProjectNodeOptions` which requires, an :class:`arrow::compute::Expression` and name for each of the output columns (if names are not -provided, the string representations of exprs will be used). +provided, the string representations of exprs will be used). Project example: @@ -456,7 +456,7 @@ can be selected from :ref:`this list of aggregation functions The aggregation can provide results as a group or scalar. For instances, an operation like `hash_count` provides the counts per each unique record -as a grouped result while an operation like `sum` provides a single record. +as a grouped result while an operation like `sum` provides a single record. Scalar Aggregation example: @@ -481,14 +481,14 @@ Group Aggregation example: ``sink`` -------- -``sink`` operation provides output and is the final node of a streaming -execution definition. :class:`SinkNodeOptions` interface is used to pass +``sink`` operation provides output and is the final node of a streaming +execution definition. :class:`SinkNodeOptions` interface is used to pass the required options. Similar to the source operator the sink operator exposes the output with a function that returns a record batch future each time it is called. It is expected the caller will repeatedly call this function until the generator function is exhausted (returns ``std::optional::nullopt``). If this function is not called often enough then record batches will accumulate in memory. An execution plan should only have one -"terminal" node (one sink node). An :class:`ExecPlan` can terminate early due to cancellation or +"terminal" node (one sink node). An :class:`ExecPlan` can terminate early due to cancellation or an error, before the output is fully consumed. However, the plan can be safely destroyed independently of the sink, which will hold the unconsumed batches by `exec_plan->finished()`. @@ -526,12 +526,12 @@ Example:: arrow::Future<> finish = arrow::Future<>::Make(); struct CustomSinkNodeConsumer : public cp::SinkNodeConsumer { - CustomSinkNodeConsumer(std::atomic *batches_seen, arrow::Future<>finish): + CustomSinkNodeConsumer(std::atomic *batches_seen, arrow::Future<>finish): batches_seen(batches_seen), finish(std::move(finish)) {} // Consumption logic can be written here arrow::Status Consume(cp::ExecBatch batch) override { // data can be consumed in the expected way - // transfer to another system or just do some work + // transfer to another system or just do some work // and write to disk (*batches_seen)++; return arrow::Status::OK(); @@ -541,9 +541,9 @@ Example:: std::atomic *batches_seen; arrow::Future<> finish; - + }; - + std::shared_ptr consumer = std::make_shared(&batches_seen, finish); @@ -567,14 +567,14 @@ Consuming-Sink example: ``order_by_sink`` ----------------- -``order_by_sink`` operation is an extension to the ``sink`` operation. -This operation provides the ability to guarantee the ordering of the -stream by providing the :class:`OrderBySinkNodeOptions`. -Here the :class:`arrow::compute::SortOptions` are provided to define which columns +``order_by_sink`` operation is an extension to the ``sink`` operation. +This operation provides the ability to guarantee the ordering of the +stream by providing the :class:`OrderBySinkNodeOptions`. +Here the :class:`arrow::compute::SortOptions` are provided to define which columns are used for sorting and whether to sort by ascending or descending values. .. note:: This node is a "pipeline breaker" and will fully materialize the dataset in memory. - In the future, spillover mechanisms will be added which should alleviate this + In the future, spillover mechanisms will be added which should alleviate this constraint. @@ -593,14 +593,14 @@ Order-By-Sink example: ``select_k_sink`` ----------------- -``select_k_sink`` option enables selecting the top/bottom K elements, -similar to a SQL ``ORDER BY ... LIMIT K`` clause. -:class:`SelectKOptions` which is a defined by -using :struct:`OrderBySinkNode` definition. This option returns a sink node that receives +``select_k_sink`` option enables selecting the top/bottom K elements, +similar to a SQL ``ORDER BY ... LIMIT K`` clause. +:class:`SelectKOptions` which is a defined by +using :struct:`OrderBySinkNode` definition. This option returns a sink node that receives inputs and then compute top_k/bottom_k. .. note:: This node is a "pipeline breaker" and will fully materialize the input in memory. - In the future, spillover mechanisms will be added which should alleviate this + In the future, spillover mechanisms will be added which should alleviate this constraint. SelectK example: @@ -617,7 +617,7 @@ SelectK example: .. _stream_execution_table_sink_docs: -The ``table_sink`` node provides the ability to receive the output as an in-memory table. +The ``table_sink`` node provides the ability to receive the output as an in-memory table. This is simpler to use than the other sink nodes provided by the streaming execution engine but it only makes sense when the output fits comfortably in memory. The node is created using :class:`TableSinkNodeOptions`. @@ -637,7 +637,7 @@ Example of using ``table_sink`` --------- ``scan`` is an operation used to load and process datasets. It should be preferred over the -more generic ``source`` node when your input is a dataset. The behavior is defined using +more generic ``source`` node when your input is a dataset. The behavior is defined using :class:`arrow::dataset::ScanNodeOptions`. More information on datasets and the various scan options can be found in :doc:`../dataset`. @@ -683,10 +683,10 @@ Write example: ``union`` ------------- -``union`` merges multiple data streams with the same schema into one, similar to +``union`` merges multiple data streams with the same schema into one, similar to a SQL ``UNION ALL`` clause. -The following example demonstrates how this can be achieved using +The following example demonstrates how this can be achieved using two data sources. Union example: @@ -704,15 +704,15 @@ Union example: ------------- ``hash_join`` operation provides the relational algebra operation, join using hash-based -algorithm. :class:`HashJoinNodeOptions` contains the options required in -defining a join. The hash_join supports +algorithm. :class:`HashJoinNodeOptions` contains the options required in +defining a join. The hash_join supports `left/right/full semi/anti/outerjoins -`_. +`_. Also the join-key (i.e. the column(s) to join on), and suffixes (i.e a suffix term like "_x" -which can be appended as a suffix for column names duplicated in both left and right -relations.) can be set via the join options. +which can be appended as a suffix for column names duplicated in both left and right +relations.) can be set via the join options. `Read more on hash-joins -`_. +`_. Hash-Join example: @@ -726,7 +726,7 @@ Hash-Join example: Summary ======= -There are examples of these nodes which can be found in +There are examples of these nodes which can be found in ``cpp/examples/arrow/execution_plan_documentation_examples.cc`` in the Arrow source. Complete Example: diff --git a/docs/source/cpp/api/scalar.rst b/docs/source/cpp/api/scalar.rst index 04e78450d7744..be9f9686bf110 100644 --- a/docs/source/cpp/api/scalar.rst +++ b/docs/source/cpp/api/scalar.rst @@ -44,4 +44,4 @@ Utilities .. doxygenclass:: arrow::ScalarVisitor :project: arrow_cpp :members: - :undoc-members: \ No newline at end of file + :undoc-members: diff --git a/docs/source/cpp/build_system.rst b/docs/source/cpp/build_system.rst index 60df117eb510e..0c94d7e5ce5dc 100644 --- a/docs/source/cpp/build_system.rst +++ b/docs/source/cpp/build_system.rst @@ -47,7 +47,7 @@ file into an executable linked with the Arrow C++ shared library: .. code-block:: cmake cmake_minimum_required(VERSION 3.16) - + project(MyExample) find_package(Arrow REQUIRED) diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index e7310d2c0c711..546b6e5716df7 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -49,8 +49,8 @@ Computation inputs are represented as a general :class:`Datum` class, which is a tagged union of several shapes of data such as :class:`Scalar`, :class:`Array` and :class:`ChunkedArray`. Many compute functions support both array (chunked or not) and scalar inputs, however some will mandate -particular input types. For example, while ``array_sort_indices`` requires its -first and only input to be an array, the generalized ``sort_indices`` +particular input types. For example, while ``array_sort_indices`` requires its +first and only input to be an array, the generalized ``sort_indices`` function accepts an array, chunked array, record batch or table. .. _invoking-compute-functions: @@ -572,28 +572,28 @@ representation based on the rounding criterion. | trunc | Unary | Numeric | Float32/Float64/Decimal | | | +-------------------+------------+-------------+-------------------------+----------------------------------+--------+ -* \(1) By default rounding functions change a value to the nearest - integer using HALF_TO_EVEN to resolve ties. Options are available to control - the rounding criterion. All ``round`` functions have the +* \(1) By default rounding functions change a value to the nearest + integer using HALF_TO_EVEN to resolve ties. Options are available to control + the rounding criterion. All ``round`` functions have the ``round_mode`` option to set the rounding mode. * \(2) Round to a number of digits where the ``ndigits`` option of :struct:`RoundOptions` specifies the rounding precision in terms of number of digits. A negative value corresponds to digits in the non-fractional part. For example, -2 corresponds to rounding to the nearest multiple of 100 (zeroing the ones and tens digits). Default value of ``ndigits`` is 0 - which rounds to the nearest integer. For integer inputs a non-negative + which rounds to the nearest integer. For integer inputs a non-negative ``ndigits`` value is ignored and the input is returned unchanged. For integer - inputs, if ``-ndigits`` is larger than the maximum number of digits the + inputs, if ``-ndigits`` is larger than the maximum number of digits the input type can hold, an error is returned. * \(3) Round to a multiple where the ``multiple`` option of :struct:`RoundToMultipleOptions` specifies the rounding scale. The rounding - multiple has to be a positive value and can be casted to input type. - For example, 100 corresponds to rounding to the nearest multiple of 100 - (zeroing the ones and tens digits). Default value of ``multiple`` is 1 which + multiple has to be a positive value and can be casted to input type. + For example, 100 corresponds to rounding to the nearest multiple of 100 + (zeroing the ones and tens digits). Default value of ``multiple`` is 1 which rounds to the nearest integer. * \(4) Round the first input to multiple of the second input. The rounding - multiple has to be a positive value and can be casted to the first input type. - For example, 100 corresponds to rounding to the nearest multiple of 100 + multiple has to be a positive value and can be casted to the first input type. + For example, 100 corresponds to rounding to the nearest multiple of 100 (zeroing the ones and tens digits). For ``round`` functions, the following rounding modes are available. @@ -634,8 +634,8 @@ The example values are given for default values of ``ndigits`` and ``multiple``. | | | -3.5 -> -3, -4.5 -> -5 | +-----------------------+--------------------------------------------------------------+---------------------------+ -The following table gives examples of how ``ndigits`` (for the ``round`` -and ``round_binary`` functions) and ``multiple`` (for ``round_to_multiple``) +The following table gives examples of how ``ndigits`` (for the ``round`` +and ``round_binary`` functions) and ``multiple`` (for ``round_to_multiple``) influence the operation performed, respectively. +--------------------+-------------------+---------------------------+ @@ -1621,12 +1621,12 @@ Array-wise ("vector") functions Cumulative Functions ~~~~~~~~~~~~~~~~~~~~ -Cumulative functions are vector functions that perform a running accumulation on -their input using a given binary associative operation with an identity element -(a monoid) and output an array containing the corresponding intermediate running -values. The input is expected to be of numeric type. By default these functions -do not detect overflow. They are also available in an overflow-checking variant, -suffixed ``_checked``, which returns an ``Invalid`` :class:`Status` when +Cumulative functions are vector functions that perform a running accumulation on +their input using a given binary associative operation with an identity element +(a monoid) and output an array containing the corresponding intermediate running +values. The input is expected to be of numeric type. By default these functions +do not detect overflow. They are also available in an overflow-checking variant, +suffixed ``_checked``, which returns an ``Invalid`` :class:`Status` when overflow is detected. +-------------------------+-------+-------------+-------------+--------------------------------+-----------+ @@ -1649,8 +1649,8 @@ overflow is detected. * \(1) CumulativeOptions has two optional parameters. The first parameter :member:`CumulativeOptions::start` is a starting value for the running - accumulation. It has a default value of 0 for `sum`, 1 for `prod`, min of - input type for `max`, and max of input type for `min`. Specified values of + accumulation. It has a default value of 0 for `sum`, 1 for `prod`, min of + input type for `max`, and max of input type for `min`. Specified values of ``start`` must be castable to the input type. The second parameter :member:`CumulativeOptions::skip_nulls` is a boolean. When set to false (the default), the first encountered null is propagated. When set to @@ -1861,9 +1861,9 @@ replaced, based on the remaining inputs. Pairwise functions ~~~~~~~~~~~~~~~~~~~~ -Pairwise functions are unary vector functions that perform a binary operation on +Pairwise functions are unary vector functions that perform a binary operation on a pair of elements in the input array, typically on adjacent elements. The n-th -output is computed by applying the binary operation to the n-th and (n-p)-th inputs, +output is computed by applying the binary operation to the n-th and (n-p)-th inputs, where p is the period. The default period is 1, in which case the binary operation is applied to adjacent pairs of inputs. The period can also be negative, in which case the n-th output is computed by applying the binary @@ -1877,9 +1877,9 @@ operation to the n-th and (n+abs(p))-th inputs. | pairwise_diff_checked | Unary | Numeric/Temporal | Numeric/Temporal | :struct:`PairwiseOptions` | \(1)(3) | +------------------------+-------+----------------------+----------------------+--------------------------------+----------+ -* \(1) Computes the first order difference of an array, It internally calls - the scalar function ``Subtract`` (or the checked variant) to compute - differences, so its behavior and supported types are the same as - ``Subtract``. The period can be specified in :struct:`PairwiseOptions`. +* \(1) Computes the first order difference of an array, It internally calls + the scalar function ``Subtract`` (or the checked variant) to compute + differences, so its behavior and supported types are the same as + ``Subtract``. The period can be specified in :struct:`PairwiseOptions`. * \(2) Wraps around the result when overflow is detected. * \(3) Returns an ``Invalid`` :class:`Status` when overflow is detected. diff --git a/docs/source/cpp/dataset.rst b/docs/source/cpp/dataset.rst index 1f5d0476c2889..a64b73b61c05d 100644 --- a/docs/source/cpp/dataset.rst +++ b/docs/source/cpp/dataset.rst @@ -378,28 +378,28 @@ Partitioning performance considerations Partitioning datasets has two aspects that affect performance: it increases the number of files and it creates a directory structure around the files. Both of these have benefits -as well as costs. Depending on the configuration and the size of your dataset, the costs -can outweigh the benefits. +as well as costs. Depending on the configuration and the size of your dataset, the costs +can outweigh the benefits. -Because partitions split up the dataset into multiple files, partitioned datasets can be -read and written with parallelism. However, each additional file adds a little overhead in -processing for filesystem interaction. It also increases the overall dataset size since +Because partitions split up the dataset into multiple files, partitioned datasets can be +read and written with parallelism. However, each additional file adds a little overhead in +processing for filesystem interaction. It also increases the overall dataset size since each file has some shared metadata. For example, each parquet file contains the schema and -group-level statistics. The number of partitions is a floor for the number of files. If -you partition a dataset by date with a year of data, you will have at least 365 files. If -you further partition by another dimension with 1,000 unique values, you will have up to +group-level statistics. The number of partitions is a floor for the number of files. If +you partition a dataset by date with a year of data, you will have at least 365 files. If +you further partition by another dimension with 1,000 unique values, you will have up to 365,000 files. This fine of partitioning often leads to small files that mostly consist of metadata. -Partitioned datasets create nested folder structures, and those allow us to prune which +Partitioned datasets create nested folder structures, and those allow us to prune which files are loaded in a scan. However, this adds overhead to discovering files in the dataset, as we'll need to recursively "list directory" to find the data files. Too fine partitions can cause problems here: Partitioning a dataset by date for a years worth -of data will require 365 list calls to find all the files; adding another column with +of data will require 365 list calls to find all the files; adding another column with cardinality 1,000 will make that 365,365 calls. The most optimal partitioning layout will depend on your data, access patterns, and which -systems will be reading the data. Most systems, including Arrow, should work across a +systems will be reading the data. Most systems, including Arrow, should work across a range of file sizes and partitioning layouts, but there are extremes you should avoid. These guidelines can help avoid some known worst cases: diff --git a/docs/source/cpp/datatypes.rst b/docs/source/cpp/datatypes.rst index 4e1fe76b4d6f2..7eb70936f4e1d 100644 --- a/docs/source/cpp/datatypes.rst +++ b/docs/source/cpp/datatypes.rst @@ -72,8 +72,8 @@ To instantiate data types, it is recommended to call the provided Type Traits ----------- -Writing code that can handle concrete :class:`arrow::DataType` subclasses would -be verbose, if it weren't for type traits. Arrow's type traits map the Arrow +Writing code that can handle concrete :class:`arrow::DataType` subclasses would +be verbose, if it weren't for type traits. Arrow's type traits map the Arrow data types to the specialized array, scalar, builder, and other associated types. For example, the Boolean type has traits: @@ -96,7 +96,7 @@ For example, the Boolean type has traits: See the :ref:`type-traits` for an explanation of each of these fields. Using type traits, one can write template functions that can handle a variety -of Arrow types. For example, to write a function that creates an array of +of Arrow types. For example, to write a function that creates an array of Fibonacci values for any Arrow numeric type: .. code-block:: cpp @@ -128,7 +128,7 @@ For some common cases, there are type associations on the classes themselves. Us Similar to the type traits provided in `std::type_traits `_, -Arrow provides type predicates such as ``is_number_type`` as well as +Arrow provides type predicates such as ``is_number_type`` as well as corresponding templates that wrap ``std::enable_if_t`` such as ``enable_if_number``. These can constrain template functions to only compile for relevant types, which is useful if other overloads need to be implemented. For example, to write a sum @@ -176,20 +176,20 @@ here is how one might sum across columns of arbitrary numeric types: class TableSummation { double partial = 0.0; public: - + arrow::Result Compute(std::shared_ptr batch) { for (std::shared_ptr array : batch->columns()) { ARROW_RETURN_NOT_OK(arrow::VisitArrayInline(*array, this)); } return partial; } - + // Default implementation arrow::Status Visit(const arrow::Array& array) { return arrow::Status::NotImplemented("Cannot compute sum for array of type ", array.type()->ToString()); } - + template arrow::enable_if_number Visit(const ArrayType& array) { for (std::optional value : array) { diff --git a/docs/source/cpp/examples/compute_and_write_example.rst b/docs/source/cpp/examples/compute_and_write_example.rst index e66d3ced55d0c..a4b619f7ffff3 100644 --- a/docs/source/cpp/examples/compute_and_write_example.rst +++ b/docs/source/cpp/examples/compute_and_write_example.rst @@ -21,8 +21,8 @@ Compute and Write CSV Example ============================= -The file ``cpp/examples/arrow/compute_and_write_csv_example.cc`` located inside -the source tree contains an example of creating a table of two numerical columns -and then comparing the magnitudes of the entries in the columns and writing out to +The file ``cpp/examples/arrow/compute_and_write_csv_example.cc`` located inside +the source tree contains an example of creating a table of two numerical columns +and then comparing the magnitudes of the entries in the columns and writing out to a CSV file with the column entries and their comparisons. The code in the example is documented. diff --git a/docs/source/cpp/flight.rst b/docs/source/cpp/flight.rst index e07a84e91ee4f..a1e9420bfd34e 100644 --- a/docs/source/cpp/flight.rst +++ b/docs/source/cpp/flight.rst @@ -350,10 +350,10 @@ Closing unresponsive connections calls Cancel() on a timer, with the main thread resetting the timer every time an operation completes successfully. For a fully-worked out example, see the Cookbook. - + .. note:: There is a long standing ticket for a per-write/per-read timeout instead of a per call timeout (ARROW-6062_), but this is not (easily) - possible to implement with the blocking gRPC API. + possible to implement with the blocking gRPC API. .. _best gRPC practices: https://grpc.io/docs/guides/performance/#general .. _gRPC keys: https://grpc.github.io/grpc/cpp/group__grpc__arg__keys.html diff --git a/docs/source/cpp/gandiva.rst b/docs/source/cpp/gandiva.rst index 07b07bee7ac4e..f60d1fc8ac8d9 100644 --- a/docs/source/cpp/gandiva.rst +++ b/docs/source/cpp/gandiva.rst @@ -29,8 +29,8 @@ Gandiva only handles projections and filters; for other transformations, see :ref:`Compute Functions `. Gandiva was designed to take advantage of the Arrow memory format and modern -hardware. From the Arrow memory model, since Arrow arrays have separate buffers for values and -validity bitmaps, values and their null status can often be processed +hardware. From the Arrow memory model, since Arrow arrays have separate buffers for values and +validity bitmaps, values and their null status can often be processed independently, allowing for better instruction pipelining. On modern hardware, compiling expressions using LLVM allows the execution to be optimized to the local runtime environment and hardware, including available SIMD @@ -42,25 +42,25 @@ pre-compiled into LLVM IR (intermediate representation). Expression, Projector and Filter ================================ -To effectively utilize Gandiva, you will construct expression trees with ``TreeExprBuilder``, -including the creation of function nodes, if-else logic, and boolean expressions. +To effectively utilize Gandiva, you will construct expression trees with ``TreeExprBuilder``, +including the creation of function nodes, if-else logic, and boolean expressions. Subsequently, leverage ``Projector`` or ``Filter`` execution kernels to efficiently evaluate these expressions. -See :doc:`./gandiva/expr_projector_filter` for more details. +See :doc:`./gandiva/expr_projector_filter` for more details. External Functions Development ============================== -Gandiva offers the capability of integrating external functions, encompassing -both C functions and IR functions. This feature broadens the spectrum of -functions that can be applied within Gandiva expressions. For developers -looking to customize and enhance their computational solutions, -Gandiva provides the opportunity to develop and register their own external -functions, thus allowing for a more tailored and flexible use of the Gandiva +Gandiva offers the capability of integrating external functions, encompassing +both C functions and IR functions. This feature broadens the spectrum of +functions that can be applied within Gandiva expressions. For developers +looking to customize and enhance their computational solutions, +Gandiva provides the opportunity to develop and register their own external +functions, thus allowing for a more tailored and flexible use of the Gandiva environment. -See :doc:`./gandiva/external_func` for more details. +See :doc:`./gandiva/external_func` for more details. .. toctree:: :maxdepth: 2 gandiva/expr_projector_filter - gandiva/external_func \ No newline at end of file + gandiva/external_func diff --git a/docs/source/cpp/gandiva/expr_projector_filter.rst b/docs/source/cpp/gandiva/expr_projector_filter.rst index c960d1d869fe5..9d58b185032e3 100644 --- a/docs/source/cpp/gandiva/expr_projector_filter.rst +++ b/docs/source/cpp/gandiva/expr_projector_filter.rst @@ -30,7 +30,7 @@ literal values, created by :func:`TreeExprBuilder::MakeLiteral`. Nodes can be combined into more complex expression trees using: * :func:`TreeExprBuilder::MakeFunction` to create a function - node. (You can call :func:`GetRegisteredFunctionSignatures` to + node. (You can call :func:`GetRegisteredFunctionSignatures` to get a list of valid function signatures.) * :func:`TreeExprBuilder::MakeIf` to create if-else logic. * :func:`TreeExprBuilder::MakeAnd` and :func:`TreeExprBuilder::MakeOr` @@ -39,7 +39,7 @@ can be combined into more complex expression trees using: functions to create set membership tests. Each of these functions create new composite nodes, which contain the leaf nodes -(literals and field references) or other composite nodes as children. By +(literals and field references) or other composite nodes as children. By composing these, you can create arbitrarily complex expression trees. Once an expression tree is built, they are wrapped in either :class:`Expression` @@ -84,7 +84,7 @@ reused to process distinct record batches in parallel. Evaluating projections ---------------------- -Execution is performed with :func:`Projector::Evaluate`. This outputs +Execution is performed with :func:`Projector::Evaluate`. This outputs a vector of arrays, which can be passed along with the output schema to :func:`arrow::RecordBatch::Make()`. @@ -99,14 +99,14 @@ Evaluating filters :func:`Filter::Evaluate` produces :class:`SelectionVector`, a vector of row indices that matched the filter condition. The selection vector -is a wrapper around an arrow integer array, parameterized by bitwidth. When -creating the selection vector (you must initialize it *before* passing to -``Evaluate()``), you must choose the bitwidth, which determines the max index +is a wrapper around an arrow integer array, parameterized by bitwidth. When +creating the selection vector (you must initialize it *before* passing to +``Evaluate()``), you must choose the bitwidth, which determines the max index value it can hold, and the max number of slots, which determines how many indices -it may contain. In general, the max number of slots should be set to your batch -size and the bitwidth the smallest integer size that can represent all integers -less than the batch size. For example, if your batch size is 100k, set the -maximum number of slots to 100k and the bitwidth to 32 (since 2^16 = 64k which +it may contain. In general, the max number of slots should be set to your batch +size and the bitwidth the smallest integer size that can represent all integers +less than the batch size. For example, if your batch size is 100k, set the +maximum number of slots to 100k and the bitwidth to 32 (since 2^16 = 64k which would be too small). Once ``Evaluate()`` has been run and the :class:`SelectionVector` is @@ -123,10 +123,10 @@ output record batch. Evaluating projections and filters ---------------------------------- -Finally, you can also project while apply a selection vector, with +Finally, you can also project while apply a selection vector, with :func:`Projector::Evaluate()`. To do so, first make sure to initialize the :class:`Projector` with :func:`SelectionVector::GetMode()` so that the projector -compiles with the correct bitwidth. Then you can pass the +compiles with the correct bitwidth. Then you can pass the :class:`SelectionVector` into the :func:`Projector::Evaluate()` method. @@ -134,4 +134,4 @@ compiles with the correct bitwidth. Then you can pass the :language: cpp :start-after: (Doc section: Evaluate filter and projection) :end-before: (Doc section: Evaluate filter and projection) - :dedent: 2 \ No newline at end of file + :dedent: 2 diff --git a/docs/source/cpp/gandiva/external_func.rst b/docs/source/cpp/gandiva/external_func.rst index cdd8fc82e59db..f8bdde83d96e6 100644 --- a/docs/source/cpp/gandiva/external_func.rst +++ b/docs/source/cpp/gandiva/external_func.rst @@ -79,7 +79,7 @@ The ``NativeFunction`` class is used to define the metadata for an external func * ``ResultNullableType::kResultNullIfNull``: result validity is an intersection of the validity of the children. * ``ResultNullableType::kResultNullNever``: result is always valid. * ``ResultNullableType::kResultNullInternal``: result validity depends on some internal logic. -* ``pc_name``: The name of the corresponding precompiled function. +* ``pc_name``: The name of the corresponding precompiled function. * Typically, this name follows the convention ``{base_name}`` + ``_{param1_type}`` + ``{param2_type}`` + ... + ``{paramN_type}``. For example, if the base name is ``add`` and the function takes two ``int32`` parameters and returns an ``int32``, the precompiled function name would be ``add_int32_int32``, but this convention is not mandatory as long as you can guarantee its uniqueness. * ``flags``: Optional flags for additional function attributes (default is 0). Please check out ``NativeFunction::kNeedsContext``, ``NativeFunction::kNeedsFunctionHolder``, and ``NativeFunction::kCanReturnErrors`` for more details. @@ -153,10 +153,10 @@ Not all Arrow data types are supported in Gandiva. The following table lists the | utf8 (as return type) | int64_t context, | | | const char*, | | | uint32_t* | -| | [see next section]| +| | [see next section]| +-------------------------------------+-------------------+ -Handling arrow::StringType (utf8 type) and arrow::BinaryType +Handling arrow::StringType (utf8 type) and arrow::BinaryType ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Both ``arrow::StringType`` and ``arrow::BinaryType`` are variable-length types. And they are handled similarly in external functions. Since ``arrow::StringType`` (utf8 type) is more commonly used, we will use it below as the example to explain how to handle variable-length types in external functions. @@ -179,7 +179,7 @@ When ``arrow::StringType`` (``utf8`` type) is used as the return type in a funct 2. **Function Parameters:** * **Context Parameter**: The C function should begin with an additional parameter, ``int64_t context``. This parameter is crucial for context management within the function. * **String Length Output Parameter**: The function should also include a ``uint32_t*`` parameter at the end. This output parameter will store the length of the returned string data. -3. **Return Value**: The function should return a ``const char*`` pointer, pointing to the string data. +3. **Return Value**: The function should return a ``const char*`` pointer, pointing to the string data. 4. **Function Implementation:** * **Memory Allocation and Error Messaging:** Within the function's implementation, use ``gdv_fn_context_arena_malloc`` and ``gdv_fn_context_set_error_msg`` for memory allocation and error messaging, respectively. Both functions take ``int64_t context`` as their first parameter, facilitating efficient context utilization. @@ -200,10 +200,10 @@ You can use ``gandiva::FunctionRegistry``'s APIs to register external C function NativeFunction func, void* c_function_ptr, std::optional function_holder_maker = std::nullopt); -The above API allows you to register an external C function. +The above API allows you to register an external C function. -* The ``NativeFunction`` object describes the metadata of the external C function. -* The ``c_function_ptr`` is the function pointer to the external C function's implementation. +* The ``NativeFunction`` object describes the metadata of the external C function. +* The ``c_function_ptr`` is the function pointer to the external C function's implementation. * The optional ``function_holder_maker`` is used to create a function holder for the external C function if the external C function requires a function holder. Check out the ``gandiva::FunctionHolder`` class and its several sub-classes for more details. External IR functions diff --git a/docs/source/cpp/getting_started.rst b/docs/source/cpp/getting_started.rst index 89bd4559ef1e6..2cab5d1581c1c 100644 --- a/docs/source/cpp/getting_started.rst +++ b/docs/source/cpp/getting_started.rst @@ -24,17 +24,17 @@ Getting Started The following articles demonstrate installation, use, and a basic understanding of Arrow. These articles will get you setup quickly using Arrow and give you a taste of what the library is capable of. -Specifically, it contains: an installation and linking guide; documentation of conventions used -in the codebase and suggested for users; and tutorials, including: +Specifically, it contains: an installation and linking guide; documentation of conventions used +in the codebase and suggested for users; and tutorials, including: -* Building Arrow arrays and tabular structures +* Building Arrow arrays and tabular structures * Reading and writing Parquet, Arrow, and CSV files * Executing compute kernels on arrays * Reading and writing multi-file partitioned datasets Start here to gain a basic understanding of Arrow, and move on to the :doc:`/cpp/user_guide` to -explore more specific topics and underlying concepts, or the :doc:`/cpp/api` to explore Arrow's -API. +explore more specific topics and underlying concepts, or the :doc:`/cpp/api` to explore Arrow's +API. .. toctree:: @@ -44,5 +44,3 @@ API. tutorials/io_tutorial.rst tutorials/compute_tutorial.rst tutorials/datasets_tutorial.rst - - diff --git a/docs/source/cpp/memory.rst b/docs/source/cpp/memory.rst index ad8276e3728a2..33907b5580f61 100644 --- a/docs/source/cpp/memory.rst +++ b/docs/source/cpp/memory.rst @@ -205,7 +205,7 @@ simply do:: Memory Profiling ================ -On Linux, detailed profiles of memory allocations can be generated using +On Linux, detailed profiles of memory allocations can be generated using ``perf record``, without any need to modify the binaries. These profiles can show the traceback in addition to allocation size. This does require debug symbols, from either a debug build or a release with debug symbols build. @@ -234,14 +234,14 @@ recorded allocations, so we can correlate them with the call to free/de-allocate .. tab-set:: .. tab-item:: jemalloc - + .. code-block:: shell - perf probe -x libarrow.so je_arrow_mallocx '$params' - perf probe -x libarrow.so je_arrow_mallocx%return '$retval' - perf probe -x libarrow.so je_arrow_rallocx '$params' - perf probe -x libarrow.so je_arrow_rallocx%return '$retval' - perf probe -x libarrow.so je_arrow_dallocx '$params' + perf probe -x libarrow.so je_arrow_mallocx '$params' + perf probe -x libarrow.so je_arrow_mallocx%return '$retval' + perf probe -x libarrow.so je_arrow_rallocx '$params' + perf probe -x libarrow.so je_arrow_rallocx%return '$retval' + perf probe -x libarrow.so je_arrow_dallocx '$params' PROBE_ARGS="-e probe_libarrow:je_arrow_mallocx \ -e probe_libarrow:je_arrow_mallocx__return \ -e probe_libarrow:je_arrow_rallocx \ @@ -249,13 +249,13 @@ recorded allocations, so we can correlate them with the call to free/de-allocate -e probe_libarrow:je_arrow_dallocx" .. tab-item:: mimalloc - + .. code-block:: shell - perf probe -x libarrow.so mi_malloc_aligned '$params' - perf probe -x libarrow.so mi_malloc_aligned%return '$retval' - perf probe -x libarrow.so mi_realloc_aligned '$params' - perf probe -x libarrow.so mi_realloc_aligned%return '$retval' + perf probe -x libarrow.so mi_malloc_aligned '$params' + perf probe -x libarrow.so mi_malloc_aligned%return '$retval' + perf probe -x libarrow.so mi_realloc_aligned '$params' + perf probe -x libarrow.so mi_realloc_aligned%return '$retval' perf probe -x libarrow.so mi_free '$params' PROBE_ARGS="-e probe_libarrow:mi_malloc_aligned \ -e probe_libarrow:mi_malloc_aligned__return \ @@ -277,9 +277,9 @@ If you want to profile a running process, you can run ``perf record -p `` and it will record until you interrupt with CTRL+C. Alternatively, you can do ``perf record -P sleep 10`` to record for 10 seconds. -The resulting data can be processed with standard tools to work with perf or +The resulting data can be processed with standard tools to work with perf or ``perf script`` can be used to pipe a text format of the data to custom scripts. -The following script parses ``perf script`` output and prints the output in +The following script parses ``perf script`` output and prints the output in new lines delimited JSON for easier processing. .. code-block:: python @@ -354,7 +354,7 @@ Here's an example invocation of that script, with a preview of output data: From there one can answer a number of questions. For example, the following -script will find which allocations were never freed, and print the associated +script will find which allocations were never freed, and print the associated tracebacks along with the count of dangling allocations: .. code-block:: python diff --git a/docs/source/cpp/parquet.rst b/docs/source/cpp/parquet.rst index 3e06352f5dde3..96897d139b351 100644 --- a/docs/source/cpp/parquet.rst +++ b/docs/source/cpp/parquet.rst @@ -51,8 +51,8 @@ FileReader ---------- To read Parquet data into Arrow structures, use :class:`arrow::FileReader`. -To construct, it requires a :class:`::arrow::io::RandomAccessFile` instance -representing the input file. To read the whole file at once, +To construct, it requires a :class:`::arrow::io::RandomAccessFile` instance +representing the input file. To read the whole file at once, use :func:`arrow::FileReader::ReadTable`: .. literalinclude:: ../../../cpp/examples/arrow/parquet_read_write.cc @@ -67,7 +67,7 @@ Finer-grained options are available through the and :class:`ArrowReaderProperties` classes. For reading as a stream of batches, use the :func:`arrow::FileReader::GetRecordBatchReader` -method to retrieve a :class:`arrow::RecordBatchReader`. It will use the batch +method to retrieve a :class:`arrow::RecordBatchReader`. It will use the batch size set in :class:`ArrowReaderProperties`. .. literalinclude:: ../../../cpp/examples/arrow/parquet_read_write.cc @@ -106,8 +106,8 @@ If memory efficiency is more important than performance, then: #. Turn on ``enable_buffered_stream`` in :class:`parquet::ReaderProperties`. In addition, if you know certain columns contain many repeated values, you can -read them as :term:`dictionary encoded` columns. This is -enabled with the ``set_read_dictionary`` setting on :class:`ArrowReaderProperties`. +read them as :term:`dictionary encoded` columns. This is +enabled with the ``set_read_dictionary`` setting on :class:`ArrowReaderProperties`. If the files were written with Arrow C++ and the ``store_schema`` was activated, then the original Arrow schema will be automatically read and will override this setting. @@ -174,7 +174,7 @@ The :func:`arrow::WriteTable` function writes an entire .. note:: - Column compression is off by default in C++. See :ref:`below ` + Column compression is off by default in C++. See :ref:`below ` for how to choose a compression codec in the writer properties. To write out data batch-by-batch, use :class:`arrow::FileWriter`. @@ -191,9 +191,9 @@ StreamWriter The :class:`StreamWriter` allows for Parquet files to be written using standard C++ output operators, similar to reading with the :class:`StreamReader` -class. This type-safe approach also ensures that rows are written without -omitting fields and allows for new row groups to be created automatically -(after certain volume of data) or explicitly by using the :type:`EndRowGroup` +class. This type-safe approach also ensures that rows are written without +omitting fields and allows for new row groups to be created automatically +(after certain volume of data) or explicitly by using the :type:`EndRowGroup` stream modifier. Exceptions are used to signal errors. A :class:`ParquetException` is @@ -266,20 +266,20 @@ group that takes precedent over the ``chunk_size`` passed in the write methods. You can set the version of Parquet to write with ``version``, which determines which logical types are available. In addition, you can set the data page version with ``data_page_version``. It's V1 by default; setting to V2 will allow more -optimal compression (skipping compressing pages where there isn't a space +optimal compression (skipping compressing pages where there isn't a space benefit), but not all readers support this data page version. -Compression is off by default, but to get the most out of Parquet, you should -also choose a compression codec. You can choose one for the whole file or +Compression is off by default, but to get the most out of Parquet, you should +also choose a compression codec. You can choose one for the whole file or choose one for individual columns. If you choose a mix, the file-level option -will apply to columns that don't have a specific compression codec. See +will apply to columns that don't have a specific compression codec. See :class:`::arrow::Compression` for options. -Column data encodings can likewise be applied at the file-level or at the -column level. By default, the writer will attempt to dictionary encode all +Column data encodings can likewise be applied at the file-level or at the +column level. By default, the writer will attempt to dictionary encode all supported columns, unless the dictionary grows too large. This behavior can be changed at file-level or at the column level with ``disable_dictionary()``. -When not using dictionary encoding, it will fallback to the encoding set for +When not using dictionary encoding, it will fallback to the encoding set for the column or the overall file; by default ``Encoding::PLAIN``, but this can be changed with ``encoding()``. @@ -559,7 +559,7 @@ Encryption Parquet C++ implements all features specified in the `encryption specification `__, -except for encryption of column index and bloom filter modules. +except for encryption of column index and bloom filter modules. More specifically, Parquet C++ supports: diff --git a/docs/source/cpp/tables.rst b/docs/source/cpp/tables.rst index b28a9fc1e13a5..d98a2acde6620 100644 --- a/docs/source/cpp/tables.rst +++ b/docs/source/cpp/tables.rst @@ -81,13 +81,13 @@ and computation functions, possibly incremental. :alt: A graphical representation of an Arrow Table and a Record Batch, with structure as described in text above. -Record batches can be sent between implementations, such as via +Record batches can be sent between implementations, such as via :ref:`IPC ` or -via the :doc:`C Data Interface <../format/CDataInterface>`. Tables and +via the :doc:`C Data Interface <../format/CDataInterface>`. Tables and chunked arrays, on the other hand, are concepts in the C++ implementation, not in the Arrow format itself, so they aren't directly portable. -However, a table can be converted to and built from a sequence of record +However, a table can be converted to and built from a sequence of record batches easily without needing to copy the underlying array buffers. A table can be streamed as an arbitrary number of record batches using a :class:`arrow::TableBatchReader`. Conversely, a logical sequence of diff --git a/docs/source/cpp/threading.rst b/docs/source/cpp/threading.rst index 24ad25b5a028a..4a1a65ffe012d 100644 --- a/docs/source/cpp/threading.rst +++ b/docs/source/cpp/threading.rst @@ -99,4 +99,4 @@ Arrow C++ uses :class:`arrow::Future` to communicate results between threads. T an :class:`arrow::Future` will be created when an operation needs to perform some kind of long running task that will block for some period of time. :class:`arrow::Future` objects are mainly meant for internal use and any method that returns an -:class:`arrow::Future` will usually have a synchronous variant as well. \ No newline at end of file +:class:`arrow::Future` will usually have a synchronous variant as well. diff --git a/docs/source/cpp/tutorials/compute_tutorial.rst b/docs/source/cpp/tutorials/compute_tutorial.rst index bcb87e6a8f992..a650865d75ce4 100644 --- a/docs/source/cpp/tutorials/compute_tutorial.rst +++ b/docs/source/cpp/tutorials/compute_tutorial.rst @@ -34,7 +34,7 @@ functionality to: 3. Search for a value in a column -Pre-requisites +Pre-requisites --------------- Before continuing, make sure you have: @@ -49,16 +49,16 @@ Setup Before running some computations, we need to fill in a couple gaps: 1. We need to include necessary headers. - + 2. ``A main()`` is needed to glue things together. 3. We need data to play with. - + Includes ^^^^^^^^ -Before writing C++ code, we need some includes. We'll get ``iostream`` for output, then import Arrow's -compute functionality: +Before writing C++ code, we need some includes. We'll get ``iostream`` for output, then import Arrow's +compute functionality: .. literalinclude:: ../../../../cpp/examples/tutorial_examples/compute_example.cc :language: cpp @@ -340,4 +340,4 @@ Refer to the below for a copy of the complete code: :start-after: (Doc section: Compute Example) :end-before: (Doc section: Compute Example) :linenos: - :lineno-match: \ No newline at end of file + :lineno-match: diff --git a/docs/source/cpp/tutorials/datasets_tutorial.rst b/docs/source/cpp/tutorials/datasets_tutorial.rst index 285fc24d8d599..f60e1e52170ae 100644 --- a/docs/source/cpp/tutorials/datasets_tutorial.rst +++ b/docs/source/cpp/tutorials/datasets_tutorial.rst @@ -33,7 +33,7 @@ file on disk. In this article, you will: 2. write out a partitioned dataset from a Table. -Pre-requisites +Pre-requisites --------------- Before continuing, make sure you have: @@ -50,7 +50,7 @@ Setup Before running some computations, we need to fill in a couple gaps: 1. We need to include necessary headers. - + 2. A ``main()`` is needed to glue things together. 3. We need data on disk to play with. @@ -58,8 +58,8 @@ Before running some computations, we need to fill in a couple gaps: Includes ^^^^^^^^ -Before writing C++ code, we need some includes. We'll get ``iostream`` for output, then import Arrow's -compute functionality for each file type we'll work with in this article: +Before writing C++ code, we need some includes. We'll get ``iostream`` for output, then import Arrow's +compute functionality for each file type we'll work with in this article: .. literalinclude:: ../../../../cpp/examples/tutorial_examples/dataset_example.cc :language: cpp @@ -206,7 +206,7 @@ Build Dataset using Factory ^^^^^^^^^^^^^^^^^^^^^^^^^^^ With a :class:`dataset::FileSystemDatasetFactory` set up, we can actually build our -:class:`dataset::Dataset` with :func:`dataset::FileSystemDatasetFactory::Finish`, just +:class:`dataset::Dataset` with :func:`dataset::FileSystemDatasetFactory::Finish`, just like with an :class:`ArrayBuilder` back in the basic tutorial: .. literalinclude:: ../../../../cpp/examples/tutorial_examples/dataset_example.cc @@ -228,14 +228,14 @@ dataset, and print those out, along with some small info: Move Dataset into Table ^^^^^^^^^^^^^^^^^^^^^^^ -One way we can do something with :class:`Datasets ` is getting -them into a :class:`Table`, where we can do anything we’ve learned we can do to -:class:`Tables
` to that :class:`Table`. +One way we can do something with :class:`Datasets ` is getting +them into a :class:`Table`, where we can do anything we’ve learned we can do to +:class:`Tables
` to that :class:`Table`. .. seealso:: :doc:`/cpp/streaming_execution` for execution that avoids manifesting the entire dataset in memory. -In order to move a :class:`Dataset’s ` contents into a :class:`Table`, -we need a :class:`dataset::Scanner`, which scans the data and outputs it to the :class:`Table`. +In order to move a :class:`Dataset’s ` contents into a :class:`Table`, +we need a :class:`dataset::Scanner`, which scans the data and outputs it to the :class:`Table`. First, we get a :class:`dataset::ScannerBuilder` from the :class:`dataset::Dataset`: .. literalinclude:: ../../../../cpp/examples/tutorial_examples/dataset_example.cc @@ -305,7 +305,7 @@ Create Scanner for Moving Table Data The process for writing a :class:`dataset::Dataset`, once a source of data is available, is similar to the reverse of reading it. Before, we used a :class:`dataset::Scanner` in order to scan into a :class:`Table` – now, we need one to read out of our -:class:`TableBatchReader`. To get that :class:`dataset::Scanner`, we’ll make a :class:`dataset::ScannerBuilder` +:class:`TableBatchReader`. To get that :class:`dataset::Scanner`, we’ll make a :class:`dataset::ScannerBuilder` based on our :class:`TableBatchReader`, then use that Builder to build a :class:`dataset::Scanner`: .. literalinclude:: ../../../../cpp/examples/tutorial_examples/dataset_example.cc @@ -343,7 +343,7 @@ Arrow, so we’ll write back out to that: :start-after: (Doc section: Write Format) :end-before: (Doc section: Write Format) -Configure FileSystemDatasetWriteOptions +Configure FileSystemDatasetWriteOptions ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ In order to write to disk, we need some configuration. We’ll do so via @@ -435,11 +435,11 @@ tutorials. With that, you’ve read and written partitioned datasets! This method, with some configuration, will work for any supported dataset format. For an example of such a dataset, the NYC Taxi dataset is a well-known -one, which you can find `here `_. +one, which you can find `here `_. Now you can get larger-than-memory data mapped for use! Which means that now we have to be able to process this data without -pulling it all into memory at once. For this, try Acero. +pulling it all into memory at once. For this, try Acero. .. seealso:: :doc:`/cpp/streaming_execution` for more information on Acero. @@ -450,4 +450,4 @@ Refer to the below for a copy of the complete code: :start-after: (Doc section: Dataset Example) :end-before: (Doc section: Dataset Example) :linenos: - :lineno-match: \ No newline at end of file + :lineno-match: diff --git a/docs/source/cpp/tutorials/io_tutorial.rst b/docs/source/cpp/tutorials/io_tutorial.rst index f981c94b83e32..309f10a350aa3 100644 --- a/docs/source/cpp/tutorials/io_tutorial.rst +++ b/docs/source/cpp/tutorials/io_tutorial.rst @@ -33,7 +33,7 @@ the start to end of an application. In this article, you will: 3. Read a Parquet file into a :class:`Table` and write it back out afterwards -Pre-requisites +Pre-requisites --------------- Before continuing, make sure you have: @@ -50,7 +50,7 @@ Setup Before writing out some file I/O, we need to fill in a couple gaps: 1. We need to include necessary headers. - + 2. A ``main()`` is needed to glue things together. 3. We need files to play with. @@ -58,8 +58,8 @@ Before writing out some file I/O, we need to fill in a couple gaps: Includes ^^^^^^^^ -Before writing C++ code, we need some includes. We'll get ``iostream`` for output, then import Arrow's -I/O functionality for each file type we'll work with in this article: +Before writing C++ code, we need some includes. We'll get ``iostream`` for output, then import Arrow's +I/O functionality for each file type we'll work with in this article: .. literalinclude:: ../../../../cpp/examples/tutorial_examples/file_access_example.cc :language: cpp @@ -156,8 +156,8 @@ Opening an Arrow file Reader ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ An :class:`io::ReadableFile` is too generic to offer all functionality to read an Arrow file. -We need to use it to get an :class:`ipc::RecordBatchFileReader` object. This object implements -all the logic needed to read an Arrow file with correct formatting. We get one through +We need to use it to get an :class:`ipc::RecordBatchFileReader` object. This object implements +all the logic needed to read an Arrow file with correct formatting. We get one through :func:`ipc::RecordBatchFileReader::Open`: .. literalinclude:: ../../../../cpp/examples/tutorial_examples/file_access_example.cc @@ -294,8 +294,8 @@ Write a CSV File from Table CSV writing to :class:`Table` looks exactly like IPC writing to :class:`RecordBatch`, except with our :class:`Table`, and using :func:`ipc::RecordBatchWriter::WriteTable` instead of -:func:`ipc::RecordBatchWriter::WriteRecordBatch`. Note that the same writer class is used -- -we're writing with :func:`ipc::RecordBatchWriter::WriteTable` because we have a :class:`Table`. We’ll target +:func:`ipc::RecordBatchWriter::WriteRecordBatch`. Note that the same writer class is used -- +we're writing with :func:`ipc::RecordBatchWriter::WriteTable` because we have a :class:`Table`. We’ll target a file, use our :class:`Table’s
` :class:`Schema`, and then write the :class:`Table`: .. literalinclude:: ../../../../cpp/examples/tutorial_examples/file_access_example.cc @@ -358,7 +358,7 @@ even though we used :func:`io::ReadableFile::Open`. Note that we pass our Reading a Parquet File to Table ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -With a prepared :class:`parquet::arrow::FileReader` in hand, we can read to a +With a prepared :class:`parquet::arrow::FileReader` in hand, we can read to a :class:`Table`, except we must pass the :class:`Table` by reference instead of outputting to it: .. literalinclude:: ../../../../cpp/examples/tutorial_examples/file_access_example.cc @@ -401,4 +401,4 @@ Refer to the below for a copy of the complete code: :start-after: (Doc section: File I/O) :end-before: (Doc section: File I/O) :linenos: - :lineno-match: \ No newline at end of file + :lineno-match: diff --git a/docs/source/developers/continuous_integration/index.rst b/docs/source/developers/continuous_integration/index.rst index f988b5ab69d50..cfca14e10e48c 100644 --- a/docs/source/developers/continuous_integration/index.rst +++ b/docs/source/developers/continuous_integration/index.rst @@ -27,4 +27,4 @@ Continuous Integration overview docker archery - crossbow \ No newline at end of file + crossbow diff --git a/docs/source/developers/cpp/building.rst b/docs/source/developers/cpp/building.rst index 5fab745679e93..040a046c5153d 100644 --- a/docs/source/developers/cpp/building.rst +++ b/docs/source/developers/cpp/building.rst @@ -67,7 +67,7 @@ On Alpine Linux: gcc \ ninja \ make - + On Fedora Linux: .. code-block:: shell @@ -99,7 +99,7 @@ On macOS, you can use `Homebrew `_: With `vcpkg `_: .. code-block:: shell - + git clone https://github.com/apache/arrow.git cd arrow vcpkg install \ @@ -362,7 +362,7 @@ boolean flags to ``cmake``. * ``-DARROW_GCS=ON``: Build Arrow with GCS support (requires the GCloud SDK for C++) * ``-DARROW_HDFS=ON``: Arrow integration with libhdfs for accessing the Hadoop Filesystem -* ``-DARROW_JEMALLOC=ON``: Build the Arrow jemalloc-based allocator, on by default +* ``-DARROW_JEMALLOC=ON``: Build the Arrow jemalloc-based allocator, on by default * ``-DARROW_JSON=ON``: JSON reader module * ``-DARROW_MIMALLOC=ON``: Build the Arrow mimalloc-based allocator * ``-DARROW_ORC=ON``: Arrow integration with Apache ORC @@ -375,7 +375,7 @@ boolean flags to ``cmake``. instead. * ``-DARROW_S3=ON``: Support for Amazon S3-compatible filesystems * ``-DARROW_SUBSTRAIT=ON``: Build with support for Substrait -* ``-DARROW_WITH_RE2=ON``: Build with support for regular expressions using the re2 +* ``-DARROW_WITH_RE2=ON``: Build with support for regular expressions using the re2 library, on by default and used when ``ARROW_COMPUTE`` or ``ARROW_GANDIVA`` is ``ON`` * ``-DARROW_WITH_UTF8PROC=ON``: Build with support for Unicode properties using the utf8proc library, on by default and used when ``ARROW_COMPUTE`` or ``ARROW_GANDIVA`` @@ -472,7 +472,7 @@ The build system supports a number of third-party dependencies * ``c-ares``: a dependency of gRPC * ``gflags``: for command line utilities (formerly Googleflags) * ``GLOG``: for logging - * ``google_cloud_cpp_storage``: for Google Cloud Storage support, requires + * ``google_cloud_cpp_storage``: for Google Cloud Storage support, requires system cURL and can use the ``BUNDLED`` method described below * ``gRPC``: for remote procedure calls * ``GTest``: Googletest, for testing diff --git a/docs/source/developers/cpp/windows.rst b/docs/source/developers/cpp/windows.rst index 251a45325fe0b..60ac949e81663 100644 --- a/docs/source/developers/cpp/windows.rst +++ b/docs/source/developers/cpp/windows.rst @@ -379,9 +379,9 @@ Downloading the Timezone Database ================================= To run some of the compute unit tests on Windows, the IANA timezone database -and the Windows timezone mapping need to be downloaded first. See +and the Windows timezone mapping need to be downloaded first. See :ref:`download-timezone-database` for download instructions. To set a non-default -path for the timezone database while running the unit tests, set the +path for the timezone database while running the unit tests, set the ``ARROW_TIMEZONE_DATABASE`` environment variable. Replicating Appveyor Builds diff --git a/docs/source/developers/guide/architectural_overview.rst b/docs/source/developers/guide/architectural_overview.rst index 58e05c85f457e..085a814453c84 100644 --- a/docs/source/developers/guide/architectural_overview.rst +++ b/docs/source/developers/guide/architectural_overview.rst @@ -29,8 +29,8 @@ Architectural Overview ********************** -A general overview of Apache Arrow project can be found on the -`front page `_ and in the +A general overview of Apache Arrow project can be found on the +`front page `_ and in the `Apache Arrow Overview `_. You can also have a look at the `Frequently Asked Questions `_. diff --git a/docs/source/developers/guide/communication.rst b/docs/source/developers/guide/communication.rst index a8659f83ac04d..749c94f9419b2 100644 --- a/docs/source/developers/guide/communication.rst +++ b/docs/source/developers/guide/communication.rst @@ -27,7 +27,7 @@ .. _communication: ************* -Communication +Communication ************* **About the contributors** @@ -50,7 +50,7 @@ tags ([C++], [R], [Ruby] etc.) so it gets noticed by the right people. Where to get help 👋 ==================== -For any question you may have or problems you are facing you can write to +For any question you may have or problems you are facing you can write to user or development :ref:`mailing_list` or you can create an issue on :ref:`github`. Also use GitHub to search through the issues, report bugs and create feature requests or proposals. diff --git a/docs/source/developers/guide/documentation.rst b/docs/source/developers/guide/documentation.rst index 3bb3bebef5098..8f9d7311e765f 100644 --- a/docs/source/developers/guide/documentation.rst +++ b/docs/source/developers/guide/documentation.rst @@ -49,7 +49,7 @@ documentation itself, you can search for an issue in GitHub. Documentation improvements are also a great way to gain some experience with our submission and review process without -requiring a lot of local development environment setup. +requiring a lot of local development environment setup. .. note:: Many documentation-only changes can be made directly in the @@ -114,4 +114,3 @@ library. Source folder includes: **Cookbooks** have their own repository ``_ and can be separately cloned and built. - diff --git a/docs/source/developers/guide/index.rst b/docs/source/developers/guide/index.rst index 353c8332ff0b5..0ed27a0ddc54e 100644 --- a/docs/source/developers/guide/index.rst +++ b/docs/source/developers/guide/index.rst @@ -83,17 +83,17 @@ of adding a basic feature. the installation of third-party packages, depending on which build options and components you enable. The C++ build guide has suggestions for commonly encountered issues - you can find it - :ref:`here `. + :ref:`here `. Anytime you are stuck, feel free to reach out via appropriate :ref:`communication` channel. - See a short description about the building process of + See a short description about the building process of :ref:`PyArrow or the R package` or go straight to detailed instructions on how to build one of Arrow libraries in the `documentation `_ . - + #. **Run the tests** - + We should run the tests to check if everything is working correctly. For example, you can run the tests from a terminal for Python @@ -155,7 +155,7 @@ There are lots of ways to contribute to the project besides writing code! * Improving the **documentation** is a great way to start contributing! For more information visit :ref:`documentation` section of the guide. -* **Apache Arrow Cookbooks** are a collection of recipes for solving various problems +* **Apache Arrow Cookbooks** are a collection of recipes for solving various problems and completing different tasks using Apache Arrow. They are also a great way to start contributing. For more information visit `How to contribute to Apache Arrow Cookbook `_ diff --git a/docs/source/developers/guide/resources.rst b/docs/source/developers/guide/resources.rst index f350f469af403..b5905af65499b 100644 --- a/docs/source/developers/guide/resources.rst +++ b/docs/source/developers/guide/resources.rst @@ -78,7 +78,7 @@ Reproducible examples: - `Tidyverse: Make a reprex `_ - `Craft Minimal Bug Reports by Matthew Rocklin `_ -Recommended references +Recommended references ---------------------- - Slatkin, Brett, *Effective Python: 90 Specific Ways to Write Better Python*, Addison-Wesley Professional, 2019 diff --git a/docs/source/developers/guide/step_by_step/finding_issues.rst b/docs/source/developers/guide/step_by_step/finding_issues.rst index 390c56a81c73f..a76b15e917e9a 100644 --- a/docs/source/developers/guide/step_by_step/finding_issues.rst +++ b/docs/source/developers/guide/step_by_step/finding_issues.rst @@ -65,7 +65,7 @@ person who triaged the ticket expected it to be. Don't hesitate to write that in the comments. .. note:: - + When you find a GitHub issue you would like to work on, please mention your interest in the comment section of that issue; that way we will know you are working on it. diff --git a/docs/source/developers/guide/step_by_step/set_up.rst b/docs/source/developers/guide/step_by_step/set_up.rst index 60b472637badb..9a2177568d6f5 100644 --- a/docs/source/developers/guide/step_by_step/set_up.rst +++ b/docs/source/developers/guide/step_by_step/set_up.rst @@ -60,7 +60,7 @@ a username and password each time you execute a git command. RStudio project and will create a ``.Rproj`` file in the root directory. For this reason it is *highly recommended* to clone the repository using the command line or a Git client. - + Get the source code =================== diff --git a/docs/source/developers/guide/step_by_step/styling.rst b/docs/source/developers/guide/step_by_step/styling.rst index bb428b0b6ab40..c155acb389512 100644 --- a/docs/source/developers/guide/step_by_step/styling.rst +++ b/docs/source/developers/guide/step_by_step/styling.rst @@ -59,4 +59,4 @@ check your code and will stop the commit process, described in the following section, if there are any errors. - `Pre-commit installation instructions `_ -- `Pre-commit hooks `_ \ No newline at end of file +- `Pre-commit hooks `_ diff --git a/docs/source/developers/guide/tutorials/index.rst b/docs/source/developers/guide/tutorials/index.rst index dcefab23230f9..5f44231afc9c2 100644 --- a/docs/source/developers/guide/tutorials/index.rst +++ b/docs/source/developers/guide/tutorials/index.rst @@ -25,4 +25,4 @@ Tutorials :maxdepth: 1 python_tutorial - r_tutorial \ No newline at end of file + r_tutorial diff --git a/docs/source/developers/guide/tutorials/python_tutorial.rst b/docs/source/developers/guide/tutorials/python_tutorial.rst index 7f004160b0e75..c12c4489aee95 100644 --- a/docs/source/developers/guide/tutorials/python_tutorial.rst +++ b/docs/source/developers/guide/tutorials/python_tutorial.rst @@ -137,7 +137,7 @@ function is defined in the ``compute.py`` file. After examining the ``compute.py`` file we can see that together with ``_compute.pyx`` the functions from C++ get wrapped into Python. -We will define the new feature at the end of the ``compute.py`` file. +We will define the new feature at the end of the ``compute.py`` file. Lets run some code in the Python console from ``arrow/python`` directory in order to learn more about ``pc.min_max``. @@ -147,10 +147,10 @@ directory in order to learn more about ``pc.min_max``. $ cd python $ python - Python 3.9.7 (default, Oct 22 2021, 13:24:00) + Python 3.9.7 (default, Oct 22 2021, 13:24:00) [Clang 13.0.0 (clang-1300.0.29.3)] on darwin Type "help", "copyright", "credits" or "license" for more information. - + We have entered into the Python console from the shell and we can do some research: @@ -278,7 +278,7 @@ options for the ``pc.min_max`` function we can finish the work. return pa.scalar([('min-', min_t), ('max+', max_t)], type=ty) .. TODO seealso - .. For more information about the Arrow codebase visit + .. For more information about the Arrow codebase visit .. :ref:``. (link to working on the Arrow codebase section) Adding a test @@ -303,24 +303,24 @@ a specific unit test, pass in the test name to the ``-k`` parameter. .. code:: console $ cd python - $ python -m pytest pyarrow/tests/test_compute.py -k test_tutorial_min_max + $ python -m pytest pyarrow/tests/test_compute.py -k test_tutorial_min_max ======================== test session starts ========================== platform darwin -- Python 3.9.7, pytest-6.2.5, py-1.10.0, pluggy-1.0.0 rootdir: /Users/alenkafrim/repos/arrow/python, configfile: setup.cfg plugins: hypothesis-6.24.1, lazy-fixture-0.6.3 - collected 204 items / 203 deselected / 1 selected + collected 204 items / 203 deselected / 1 selected pyarrow/tests/test_compute.py . [100%] ======================== 1 passed, 203 deselected in 0.16s ============ - - $ python -m pytest pyarrow/tests/test_compute.py + + $ python -m pytest pyarrow/tests/test_compute.py ======================== test session starts =========================== platform darwin -- Python 3.9.7, pytest-6.2.5, py-1.10.0, pluggy-1.0.0 rootdir: /Users/alenkafrim/repos/arrow/python, configfile: setup.cfg plugins: hypothesis-6.24.1, lazy-fixture-0.6.3 - collected 204 items + collected 204 items pyarrow/tests/test_compute.py ................................... [ 46%] ................................................. [100%] @@ -339,7 +339,7 @@ utility called `Archery ` to check if code is in line with PEP 8 style guide. .. code:: console - + $ archery lint --python --fix INFO:archery:Running Python formatter (autopep8) INFO:archery:Running Python linter (flake8) @@ -430,7 +430,7 @@ to the branch history): $ git commit -am "Adding a new compute feature for tutorial purposes" [ARROW-14977 170ef85be] Adding a new compute feature for tutorial purposes 2 files changed, 51 insertions(+) - + We can use ``git log`` to check the history of commits: @@ -448,12 +448,12 @@ We can use ``git log`` to check the history of commits: Date: Sun Dec 5 15:19:46 2021 +0900 ARROW-14981: [CI][Docs] Upload built documents - + We can use this in release process instead of building on release manager's local environment. - + Closes #11856 from kou/ci-docs-upload - + Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei ... @@ -478,10 +478,10 @@ called ``origin``. Writing objects: 100% (7/7), 1.19 KiB | 1.19 MiB/s, done. Total 7 (delta 6), reused 0 (delta 0), pack-reused 0 remote: Resolving deltas: 100% (6/6), completed with 6 local objects. - remote: + remote: remote: Create a pull request for 'ARROW-14977' on GitHub by visiting: remote: https://github.com/AlenkaF/arrow/pull/new/ARROW-14977 - remote: + remote: To https://github.com/AlenkaF/arrow.git * [new branch] ARROW-14977 -> ARROW-14977 @@ -490,7 +490,7 @@ to create a Pull Request. On the GitHub Arrow page (main or forked) we will see a yellow notice bar with a note that we made recent pushes to the branch ARROW-14977. That’s great, now we can make the Pull Request -by clicking on **Compare & pull request**. +by clicking on **Compare & pull request**. .. figure:: ../../images/python_tutorial_github_pr_notice.jpeg :scale: 50 % @@ -527,5 +527,5 @@ the code, comment, resolve conversations and so on. The Pull Request we made can be viewed `here `_. .. seealso:: - + For more information about Pull Request workflow see :ref:`pr_lifecycle`. diff --git a/docs/source/developers/java/building.rst b/docs/source/developers/java/building.rst index c059ff676efb2..82053e901186c 100644 --- a/docs/source/developers/java/building.rst +++ b/docs/source/developers/java/building.rst @@ -350,7 +350,7 @@ Arrow repository, and update the following settings: * To enable debugging JNI-based modules like ``dataset``, activate specific profiles in the Maven tab under "Profiles". Ensure the profiles ``arrow-c-data``, ``arrow-jni``, ``generate-libs-cdata-all-os``, - ``generate-libs-jni-macos-linux``, and ``jdk11+`` are enabled, so that the + ``generate-libs-jni-macos-linux``, and ``jdk11+`` are enabled, so that the IDE can build them and enable debugging. You may not need to update all of these settings if you build/test with the diff --git a/docs/source/developers/overview.rst b/docs/source/developers/overview.rst index c7bc4273313bc..5a18b1e4eb8db 100644 --- a/docs/source/developers/overview.rst +++ b/docs/source/developers/overview.rst @@ -75,7 +75,7 @@ checklist for using ``git``: locally, for example if additional commits have been made by a colleague. By using ``--force-with-lease`` instead of ``--force``, you ensure those commits are not overwritten and can fetch those changes if desired. - + .. dropdown:: Setting rebase to be default :animate: fade-in-slide-down :class-container: sd-shadow-none @@ -202,4 +202,3 @@ Implementations that do not intend to implement cross endian support: For other libraries, a discussion to gather consensus on the mailing-list should be had before submitting PRs. - diff --git a/docs/source/developers/release.rst b/docs/source/developers/release.rst index e7431ce0fb7b9..0b3a83dc5aabe 100644 --- a/docs/source/developers/release.rst +++ b/docs/source/developers/release.rst @@ -80,10 +80,10 @@ Ensure local tags are removed, gpg-agent is set and JIRA tickets are correctly a # Delete the local tag for RC1 or later git tag -d apache-arrow- - + # Setup gpg agent for signing artifacts source dev/release/setup-gpg-agent.sh - + # Curate the release # The end of the generated report shows the JIRA tickets with wrong version number assigned. archery release curate @@ -180,7 +180,7 @@ Create the Release Candidate branch from the updated maintenance branch # Start from the updated maintenance branch. git checkout maint-X.Y.Z - + # The following script will create a branch for the Release Candidate, # place the necessary commits updating the version number and then create a git tag # on OSX use gnu-sed with homebrew: brew install gnu-sed (and export to $PATH) @@ -188,7 +188,7 @@ Create the Release Candidate branch from the updated maintenance branch # starts at 0 and increments every time the Release Candidate is burned # so for the first RC this would be: dev/release/01-prepare.sh 4.0.0 5.0.0 0 dev/release/01-prepare.sh - + # Push the release tag (for RC1 or later the --force flag is required) git push -u apache apache-arrow- # Push the release candidate branch in order to trigger verification jobs later @@ -201,23 +201,23 @@ Build source and binaries and submit them # Build the source release tarball and create Pull Request with verification tasks dev/release/02-source.sh - + # Submit binary tasks using crossbow, the command will output the crossbow build id dev/release/03-binary-submit.sh - + # Wait for the crossbow jobs to finish archery crossbow status - + # Download the produced binaries # This will download packages to a directory called packages/release--rc dev/release/04-binary-download.sh - + # Sign and upload the binaries # # On macOS the only way I could get this to work was running "echo "UPDATESTARTUPTTY" | gpg-connect-agent" before running this comment # otherwise I got errors referencing "ioctl" errors. dev/release/05-binary-upload.sh - + # Sign and upload the Java artifacts # # Note that you need to press the "Close" button manually by Web interface diff --git a/docs/source/developers/release_verification.rst b/docs/source/developers/release_verification.rst index 8c301b44a3c42..afd220db6010d 100644 --- a/docs/source/developers/release_verification.rst +++ b/docs/source/developers/release_verification.rst @@ -55,7 +55,7 @@ and test the result on their own platform in order to cast a +1 vote. # this will create and automatically clean up a temporary directory for the verification environment and will run the source verification TEST_DEFAULT=0 TEST_SOURCE=1 verify-release-candidate.sh $VERSION $RC_NUM - + # to verify only certain implementations use the TEST_DEFAULT=0 and TEST_* variables # here are a couple of examples, but see the source code for the available options TEST_DEFAULT=0 TEST_CPP=1 verify-release-candidate.sh $VERSION $RC_NUM # only C++ tests diff --git a/docs/source/developers/reviewing.rst b/docs/source/developers/reviewing.rst index b6e0c1f4023bd..1550d6aa7ce61 100644 --- a/docs/source/developers/reviewing.rst +++ b/docs/source/developers/reviewing.rst @@ -260,14 +260,14 @@ Social aspects Labelling ========= -While reviewing PRs, we should try to identify whether the corresponding issue +While reviewing PRs, we should try to identify whether the corresponding issue needs to be marked with one or both of the following issue labels: * **Critical Fix**: The change fixes either: (a) a security vulnerability; (b) a bug that causes incorrect or invalid data to be produced; or (c) a bug that causes a crash (while the API contract is upheld). This is intended to mark fixes to issues that may affect users without their - knowledge. For this reason, fixing bugs that cause errors don't count, since + knowledge. For this reason, fixing bugs that cause errors don't count, since those bugs are usually obvious. Bugs that cause crashes are considered critical because they are a possible vector of Denial-of-Service attacks. * **Breaking Change**: The change breaks backwards compatibility in a public API. @@ -275,7 +275,7 @@ needs to be marked with one or both of the following issue labels: compatibility, except for the few places where we do guarantee ABI compatibility (such as C Data Interface). Experimental APIs are *not* exempt from this; they are just more likely to be associated with this tag. - + Breaking changes and critical fixes are separate: breaking changes alter the API contract, while critical fixes make the implementation align with the existing API contract. For example, fixing a bug that caused a Parquet reader diff --git a/docs/source/format/CDataInterface/PyCapsuleInterface.rst b/docs/source/format/CDataInterface/PyCapsuleInterface.rst index 03095aa2e9356..67f77f53f012b 100644 --- a/docs/source/format/CDataInterface/PyCapsuleInterface.rst +++ b/docs/source/format/CDataInterface/PyCapsuleInterface.rst @@ -64,7 +64,7 @@ structures should be wrapped in capsules. Capsules avoid invalid access by attaching a name to the pointer and avoid memory leaks by attaching a destructor. Thus, they are much safer than passing pointers as integers. -`PyCapsule`_ allows for a ``name`` to be associated with the capsule, allowing +`PyCapsule`_ allows for a ``name`` to be associated with the capsule, allowing consumers to verify that the capsule contains the expected kind of data. To make sure Arrow structures are recognized, the following names must be used: @@ -133,8 +133,8 @@ Arrays and record batches (contiguous tables) can implement the method Export the object as a pair of ArrowSchema and ArrowArray structures. - :param requested_schema: A PyCapsule containing a C ArrowSchema representation - of a requested schema. Conversion to this schema is best-effort. See + :param requested_schema: A PyCapsule containing a C ArrowSchema representation + of a requested schema. Conversion to this schema is best-effort. See `Schema Requests`_. :type requested_schema: PyCapsule or None @@ -152,8 +152,8 @@ Tables / DataFrames and streams can implement the method ``__arrow_c_stream__``. Export the object as an ArrowArrayStream. - :param requested_schema: A PyCapsule containing a C ArrowSchema representation - of a requested schema. Conversion to this schema is best-effort. See + :param requested_schema: A PyCapsule containing a C ArrowSchema representation + of a requested schema. Conversion to this schema is best-effort. See `Schema Requests`_. :type requested_schema: PyCapsule or None @@ -192,7 +192,7 @@ schema transformations. Protocol Typehints ------------------ -The following typehints can be copied into your library to annotate that a +The following typehints can be copied into your library to annotate that a function accepts an object implementing one of these protocols. .. code-block:: python @@ -248,7 +248,7 @@ Below is the code to create a PyCapsule for an ``ArrowSchema``. The code for } free(schema); } - + PyObject* ExportArrowSchemaPyCapsule() { struct ArrowSchema* schema = (struct ArrowSchema*)malloc(sizeof(struct ArrowSchema)); @@ -270,9 +270,9 @@ Below is the code to create a PyCapsule for an ``ArrowSchema``. The code for ) if schema.release != NULL: schema.release(schema) - + free(schema) - + cdef object export_arrow_schema_py_capsule(): cdef ArrowSchema* schema = malloc(sizeof(ArrowSchema)) # It's recommended to immediately wrap the struct in a capsule, so @@ -305,7 +305,7 @@ code for ``ArrowArray`` and ``ArrowArrayStream`` is similar. .. code-block:: c #include - + // If the capsule is not an ArrowSchema, will return NULL and set an exception. struct ArrowSchema* GetArrowSchemaPyCapsule(PyObject* capsule) { return PyCapsule_GetPointer(capsule, "arrow_schema"); @@ -316,7 +316,7 @@ code for ``ArrowArray`` and ``ArrowArrayStream`` is similar. .. code-block:: cython cimport cpython - + cdef ArrowSchema* get_arrow_schema_py_capsule(object capsule) except NULL: return cpython.PyCapsule_GetPointer(capsule, 'arrow_schema') @@ -429,7 +429,7 @@ implementing the DataFrame Interchange Protocol. Comparison to ``__arrow_array__`` protocol ------------------------------------------ -The :ref:`arrow_array_protocol` protocol is a dunder method that +The :ref:`arrow_array_protocol` protocol is a dunder method that defines how PyArrow should import an object as an Arrow array. Unlike this protocol, it is specific to PyArrow and isn't used by other libraries. It is -also limited to arrays and does not support schemas, tabular structures, or streams. \ No newline at end of file +also limited to arrays and does not support schemas, tabular structures, or streams. diff --git a/docs/source/format/Glossary.rst b/docs/source/format/Glossary.rst index 3f2f118a95d6d..11c19c5fa70e9 100644 --- a/docs/source/format/Glossary.rst +++ b/docs/source/format/Glossary.rst @@ -211,7 +211,7 @@ Glossary its bindings, and Go). .. image:: ../cpp/tables-versus-record-batches.svg - :alt: A graphical representation of an Arrow Table and a + :alt: A graphical representation of an Arrow Table and a Record Batch, with structure as described in text above. .. seealso:: :term:`chunked array`, :term:`record batch` diff --git a/docs/source/format/Integration.rst b/docs/source/format/Integration.rst index 1a9b1b97f07ee..c800255687796 100644 --- a/docs/source/format/Integration.rst +++ b/docs/source/format/Integration.rst @@ -501,14 +501,14 @@ integration testing actually tests. There are two types of integration test cases: the ones populated on the fly by the data generator in the Archery utility, and *gold* files that exist -in the `arrow-testing ` +in the `arrow-testing ` repository. Data Generator Tests ~~~~~~~~~~~~~~~~~~~~ This is the high-level description of the cases which are generated and -tested using the ``archery integration`` command (see ``get_generated_json_files`` +tested using the ``archery integration`` command (see ``get_generated_json_files`` in ``datagen.py``): * Primitive Types @@ -549,7 +549,7 @@ Gold File Integration Tests Pre-generated json and arrow IPC files (both file and stream format) exist in the `arrow-testing `__ repository in the ``data/arrow-ipc-stream/integration`` directory. These serve as -*gold* files that are assumed to be correct for use in testing. They are +*gold* files that are assumed to be correct for use in testing. They are referenced by ``runner.py`` in the code for the :ref:`Archery ` utility. Below are the test cases which are covered by them: @@ -563,7 +563,7 @@ utility. Below are the test cases which are covered by them: + intervals + maps + nested types (list, struct) - + primitives + + primitives + primitive with no batches + primitive with zero length batches diff --git a/docs/source/java/algorithm.rst b/docs/source/java/algorithm.rst index 316fd38fa0990..06ed32bd48cf7 100644 --- a/docs/source/java/algorithm.rst +++ b/docs/source/java/algorithm.rst @@ -20,12 +20,12 @@ Java Algorithms Arrow's Java library provides algorithms for some commonly-used functionalities. The algorithms are provided in the ``org.apache.arrow.algorithm`` -package of the ``algorithm`` module. +package of the ``algorithm`` module. Comparing Vector Elements ------------------------- -Comparing vector elements is the basic for many algorithms. Vector +Comparing vector elements is the basic for many algorithms. Vector elements can be compared in one of the two ways: 1. **Equality comparison**: there are two possible results for this type of comparisons: ``equal`` and ``unequal``. @@ -36,30 +36,30 @@ interface. and ``greater than``. This comparison is supported by the abstract class ``org.apache.arrow.algorithm.sort.VectorValueComparator``. We provide default implementations to compare vector elements. However, users can also define ways -for customized comparisons. +for customized comparisons. Vector Element Search --------------------- -A search algorithm tries to find a particular value in a vector. When successful, a vector index is +A search algorithm tries to find a particular value in a vector. When successful, a vector index is returned; otherwise, a ``-1`` is returned. The following search algorithms are provided: -1. **Linear search**: this algorithm simply traverses the vector from the beginning, until a match is +1. **Linear search**: this algorithm simply traverses the vector from the beginning, until a match is found, or the end of the vector is reached. So it takes ``O(n)`` time, where ``n`` is the number of elements in the vector. This algorithm is implemented in ``org.apache.arrow.algorithm.search.VectorSearcher#linearSearch``. -2. **Binary search**: this represents a more efficient search algorithm, as it runs in ``O(log(n))`` time. +2. **Binary search**: this represents a more efficient search algorithm, as it runs in ``O(log(n))`` time. However, it is only applicable to sorted vectors. To get a sorted vector, one can use one of our sorting algorithms, which will be discussed in the next section. This algorithm is implemented in ``org.apache.arrow.algorithm.search.VectorSearcher#binarySearch``. 3. **Parallel search**: when the vector is large, it takes a long time to traverse the elements to search -for a value. To make this process faster, one can split the vector into multiple partitions, and perform the +for a value. To make this process faster, one can split the vector into multiple partitions, and perform the search for each partition in parallel. This is supported by ``org.apache.arrow.algorithm.search.ParallelSearcher``. -4. **Range search**: for many scenarios, there can be multiple matching values in the vector. +4. **Range search**: for many scenarios, there can be multiple matching values in the vector. If the vector is sorted, the matching values reside in a contiguous region in the vector. The -range search algorithm tries to find the upper/lower bound of the region in ``O(log(n))`` time. +range search algorithm tries to find the upper/lower bound of the region in ``O(log(n))`` time. An implementation is provided in ``org.apache.arrow.algorithm.search.VectorRangeSearcher``. Vector Sorting @@ -72,19 +72,19 @@ classified into the following categories: 1. **In-place sorter**: an in-place sorter performs the sorting by manipulating the original vector, without creating any new vector. So it just returns the original vector after the sorting operations. Currently, we have ``org.apache.arrow.algorithm.sort.FixedWidthInPlaceVectorSorter`` for in-place -sorting in ``O(nlog(n))`` time. As the name suggests, it only supports fixed width vectors. +sorting in ``O(nlog(n))`` time. As the name suggests, it only supports fixed width vectors. 2. **Out-of-place sorter**: an out-of-place sorter does not mutate the original vector. Instead, it copies vector elements to a new vector in sorted order, and returns the new vector. -We have ``org.apache.arrow.algorithm.sort.FixedWidthInPlaceVectorSorter.FixedWidthOutOfPlaceVectorSorter`` +We have ``org.apache.arrow.algorithm.sort.FixedWidthInPlaceVectorSorter.FixedWidthOutOfPlaceVectorSorter`` and ``org.apache.arrow.algorithm.sort.FixedWidthInPlaceVectorSorter.VariableWidthOutOfPlaceVectorSorter`` -for fixed width and variable width vectors, respectively. Both algorithms run in ``O(nlog(n))`` time. +for fixed width and variable width vectors, respectively. Both algorithms run in ``O(nlog(n))`` time. 3. **Index sorter**: this sorter does not actually sort the vector. Instead, it returns an integer vector, which correspond to indices of vector elements in sorted order. With the index vector, one can easily construct a sorted vector. In addition, some other tasks can be easily achieved, like finding the ``k``th -smallest value in the vector. Index sorting is supported by ``org.apache.arrow.algorithm.sort.IndexSorter``, -which runs in ``O(nlog(n))`` time. It is applicable to vectors of any type. +smallest value in the vector. Index sorting is supported by ``org.apache.arrow.algorithm.sort.IndexSorter``, +which runs in ``O(nlog(n))`` time. It is applicable to vectors of any type. Other Algorithms ---------------- diff --git a/docs/source/java/flight.rst b/docs/source/java/flight.rst index e009998be4f4e..6d26583aeefa6 100644 --- a/docs/source/java/flight.rst +++ b/docs/source/java/flight.rst @@ -184,7 +184,7 @@ Handshake-based authentication can be enabled by implementing ``ServerAuthHandler``. Authentication consists of two parts: on initial client connection, the server and client authentication implementations can perform any negotiation needed. The client authentication -handler then provides a token that will be attached to future calls. +handler then provides a token that will be attached to future calls. The client send data to be validated through ``ClientAuthHandler.authenticate`` The server validate data received through ``ServerAuthHandler.authenticate``. diff --git a/docs/source/java/flight_sql_jdbc_driver.rst b/docs/source/java/flight_sql_jdbc_driver.rst index 0ace2185983a9..cc8822247b007 100644 --- a/docs/source/java/flight_sql_jdbc_driver.rst +++ b/docs/source/java/flight_sql_jdbc_driver.rst @@ -169,8 +169,8 @@ when using the JDBC Driver Manager to connect. When supplying using the Properties object, values should *not* be URI-encoded. Parameters specified by the URI supercede parameters supplied by the -Properties object. When calling the `user/password overload of -DriverManager#getConnection() +Properties object. When calling the `user/password overload of +DriverManager#getConnection() `_, the username and password supplied on the URI supercede the username and password arguments to the function call. diff --git a/docs/source/java/memory.rst b/docs/source/java/memory.rst index 036befa148692..8014a27444ac9 100644 --- a/docs/source/java/memory.rst +++ b/docs/source/java/memory.rst @@ -20,7 +20,7 @@ Memory Management ================= The memory modules contain all the functionality that Arrow uses to allocate and deallocate memory. This document is divided in two parts: -The first part, *Memory Basics*, provides a high-level introduction. The following section, *Arrow Memory In-Depth*, fills in the details. +The first part, *Memory Basics*, provides a high-level introduction. The following section, *Arrow Memory In-Depth*, fills in the details. .. contents:: @@ -39,7 +39,7 @@ Getting Started Arrow's memory management is built around the needs of the columnar format and using off-heap memory. Arrow Java has its own independent implementation. It does not wrap the C++ implementation, although the framework is flexible enough -to be used with memory allocated in C++ that is used by Java code. +to be used with memory allocated in C++ that is used by Java code. Arrow provides multiple modules: the core interfaces, and implementations of the interfaces. Users need the core interfaces, and exactly one of the implementations. @@ -67,9 +67,9 @@ Why Arrow Uses Direct Memory BufferAllocator --------------- -The `BufferAllocator`_ is primarily an arena or nursery used for accounting of buffers (ArrowBuf instances). -As the name suggests, it can allocate new buffers associated with itself, but it can also -handle the accounting for buffers allocated elsewhere. For example, it handles the Java-side accounting for +The `BufferAllocator`_ is primarily an arena or nursery used for accounting of buffers (ArrowBuf instances). +As the name suggests, it can allocate new buffers associated with itself, but it can also +handle the accounting for buffers allocated elsewhere. For example, it handles the Java-side accounting for memory allocated in C++ and shared with Java using the C-Data Interface. In the code below it performs an allocation: .. code-block:: Java @@ -100,21 +100,21 @@ memory from a child allocator, those allocations are also reflected in all paren effectively sets the program-wide memory limit, and serves as the master bookkeeper for all memory allocations. Child allocators are not strictly required, but can help better organize code. For instance, a lower memory limit can -be set for a particular section of code. The child allocator can be closed when that section completes, -at which point it checks that that section didn't leak any memory. +be set for a particular section of code. The child allocator can be closed when that section completes, +at which point it checks that that section didn't leak any memory. Child allocators can also be named, which makes it easier to tell where an ArrowBuf came from during debugging. Reference counting ------------------ -Because direct memory is expensive to allocate and deallocate, allocators may share direct buffers. To managed shared buffers -deterministically, we use manual reference counting instead of the garbage collector. +Because direct memory is expensive to allocate and deallocate, allocators may share direct buffers. To managed shared buffers +deterministically, we use manual reference counting instead of the garbage collector. This simply means that each buffer has a counter keeping track of the number of references to the buffer, and the user is responsible for properly incrementing/decrementing the counter as the buffer is used. In Arrow, each ArrowBuf has an associated `ReferenceManager`_ that tracks the reference count. You can retrieve -it with ArrowBuf.getReferenceManager(). The reference count is updated using `ReferenceManager.release`_ to decrement the count, -and `ReferenceManager.retain`_ to increment it. +it with ArrowBuf.getReferenceManager(). The reference count is updated using `ReferenceManager.release`_ to decrement the count, +and `ReferenceManager.retain`_ to increment it. Of course, this is tedious and error-prone, so instead of directly working with buffers, we typically use higher-level APIs like ValueVector. Such classes generally implement Closeable/AutoCloseable and will automatically @@ -289,7 +289,7 @@ Finally, enabling the ``TRACE`` logging level will automatically provide this st | at (#8:1) Sometimes, explicitly passing allocators around is difficult. For example, it -can be hard to pass around extra state, like an allocator, through layers of +can be hard to pass around extra state, like an allocator, through layers of existing application or framework code. A global or singleton allocator instance can be useful here, though it should not be your first choice. @@ -370,7 +370,7 @@ Arrow’s memory model is based on the following basic concepts: leaks. - The same physical memory can be shared by multiple allocators and the allocator must provide an accounting paradigm for this purpose. - + Reserving Memory ---------------- @@ -384,17 +384,17 @@ Arrow provides two different ways to reserve memory: - ``AllocationReservation`` via BufferAllocator.newReservation(): Allows a short-term preallocation strategy so that a particular subsystem can ensure future memory is available to support a - particular request. - + particular request. + Reference Counting Details -------------------------- -Typically, the ReferenceManager implementation used is an instance of `BufferLedger`_. -A BufferLedger is a ReferenceManager that also maintains the relationship between an ``AllocationManager``, +Typically, the ReferenceManager implementation used is an instance of `BufferLedger`_. +A BufferLedger is a ReferenceManager that also maintains the relationship between an ``AllocationManager``, a ``BufferAllocator`` and one or more individual ``ArrowBuf``\ s -All ArrowBufs (direct or sliced) related to a single BufferLedger/BufferAllocator combination -share the same reference count and either all will be valid or all will be invalid. +All ArrowBufs (direct or sliced) related to a single BufferLedger/BufferAllocator combination +share the same reference count and either all will be valid or all will be invalid. For simplicity of accounting, we treat that memory as being used by one of the BufferAllocators associated with the memory. When that allocator releases its claim on that memory, the memory ownership is then moved to @@ -411,7 +411,7 @@ There are several Allocator types in Arrow Java: - ``ChildAllocator`` - A child allocator that derives from the root allocator Many BufferAllocators can reference the same piece of physical memory at the same -time. It is the AllocationManager’s responsibility to ensure that in this situation, +time. It is the AllocationManager’s responsibility to ensure that in this situation, all memory is accurately accounted for from the Root’s perspective and also to ensure that the memory is correctly released once all BufferAllocators have stopped using that memory. diff --git a/docs/source/java/quickstartguide.rst b/docs/source/java/quickstartguide.rst index e358681c57830..a71ddc5b5e55f 100644 --- a/docs/source/java/quickstartguide.rst +++ b/docs/source/java/quickstartguide.rst @@ -313,4 +313,4 @@ Example: Read the dataset from the previous example from an Arrow IPC file (rand More examples available at `Arrow Java Cookbook`_. -.. _`Arrow Java Cookbook`: https://arrow.apache.org/cookbook/java \ No newline at end of file +.. _`Arrow Java Cookbook`: https://arrow.apache.org/cookbook/java diff --git a/docs/source/java/vector.rst b/docs/source/java/vector.rst index abbbd1a236d6d..1c3e123cf50fb 100644 --- a/docs/source/java/vector.rst +++ b/docs/source/java/vector.rst @@ -226,7 +226,7 @@ A :class:`ListVector` is a vector that holds a list of values for each index. Wo For example, the code below shows how to build a :class:`ListVector` of int's using the writer :class:`UnionListWriter`. We build a vector from 0 to 9 and each index contains a list with values [[0, 0, 0, 0, 0], [0, 1, 2, 3, 4], [0, 2, 4, 6, 8], …, [0, 9, 18, 27, 36]]. List values can be added in any order so writing a list such as [3, 1, 2] would be just as valid. .. code-block:: Java - + try (BufferAllocator allocator = new RootAllocator(Long.MAX_VALUE); ListVector listVector = ListVector.empty("vector", allocator)) { UnionListWriter writer = listVector.getWriter(); @@ -240,7 +240,7 @@ For example, the code below shows how to build a :class:`ListVector` of int's us writer.endList(); } listVector.setValueCount(10); - } + } :class:`ListVector` values can be accessed either through the get API or through the reader class :class:`UnionListReader`. To read all the values, first enumerate through the indexes, and then enumerate through the inner list values. diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst index 928c607d139ce..ae48578a1bd61 100644 --- a/docs/source/python/api/compute.rst +++ b/docs/source/python/api/compute.rst @@ -52,10 +52,10 @@ Aggregations Cumulative Functions -------------------- -Cumulative functions are vector functions that perform a running accumulation on -their input using a given binary associative operation with an identity element -(a monoid) and output an array containing the corresponding intermediate running -values. The input is expected to be of numeric type. By default these functions +Cumulative functions are vector functions that perform a running accumulation on +their input using a given binary associative operation with an identity element +(a monoid) and output an array containing the corresponding intermediate running +values. The input is expected to be of numeric type. By default these functions do not detect overflow. They are also available in an overflow-checking variant, suffixed ``_checked``, which throws an ``ArrowInvalid`` exception when overflow is detected. diff --git a/docs/source/python/api/substrait.rst b/docs/source/python/api/substrait.rst index 66e88fcd279ae..1556be9dbd011 100644 --- a/docs/source/python/api/substrait.rst +++ b/docs/source/python/api/substrait.rst @@ -50,4 +50,4 @@ Utility .. autosummary:: :toctree: ../generated/ - get_supported_functions \ No newline at end of file + get_supported_functions diff --git a/docs/source/python/compute.rst b/docs/source/python/compute.rst index c02059a4f8faa..ce3dfabb0e689 100644 --- a/docs/source/python/compute.rst +++ b/docs/source/python/compute.rst @@ -23,7 +23,7 @@ Compute Functions ================= Arrow supports logical compute operations over inputs of possibly -varying types. +varying types. The standard compute operations are provided by the :mod:`pyarrow.compute` module and can be used directly:: @@ -91,7 +91,7 @@ Grouped Aggregations ==================== PyArrow supports grouped aggregations over :class:`pyarrow.Table` through the -:meth:`pyarrow.Table.group_by` method. +:meth:`pyarrow.Table.group_by` method. The method will return a grouping declaration to which the hash aggregation functions can be applied:: @@ -300,7 +300,7 @@ Filtering by Expressions :class:`.Table` and :class:`.Dataset` can both be filtered using a boolean :class:`.Expression`. -The expression can be built starting from a +The expression can be built starting from a :func:`pyarrow.compute.field`. Comparisons and transformations can then be applied to one or more fields to build the filter expression you care about. @@ -325,7 +325,7 @@ in column ``"nums"`` by the ``bit_wise_and`` operation equals ``0``. Only the numbers where the last bit was ``0`` will return a ``0`` as the result of ``num & 1`` and as all numbers where the last bit is ``0`` are multiples of ``2`` we will be filtering for the even numbers only. - + Once we have our filter, we can provide it to the :meth:`.Table.filter` method to filter our table only for the matching rows: @@ -392,7 +392,7 @@ User-Defined Functions PyArrow allows defining and registering custom compute functions. These functions can then be called from Python as well as C++ (and potentially any other implementation wrapping Arrow C++, such as the R ``arrow`` package) -using their registered function name. +using their registered function name. UDF support is limited to scalar functions. A scalar function is a function which executes elementwise operations on arrays or scalars. In general, the output of a @@ -441,7 +441,7 @@ output type need to be defined. Using :func:`pyarrow.compute.register_scalar_fun function_docs, input_types, output_type) - + The implementation of a user-defined function always takes a first *context* parameter (named ``ctx`` in the example above) which is an instance of @@ -497,9 +497,9 @@ the GCD of one column with the scalar value 30. We will be re-using the category: [["A","B","C","D"]] Note that ``ds.field('')._call(...)`` returns a :func:`pyarrow.compute.Expression`. -The arguments passed to this function call are expressions, not scalar values +The arguments passed to this function call are expressions, not scalar values (notice the difference between :func:`pyarrow.scalar` and :func:`pyarrow.compute.scalar`, -the latter produces an expression). +the latter produces an expression). This expression is evaluated when the projection operator executes it. Projection Expressions diff --git a/docs/source/python/dataset.rst b/docs/source/python/dataset.rst index daab36f9a7be9..00469fd57becf 100644 --- a/docs/source/python/dataset.rst +++ b/docs/source/python/dataset.rst @@ -575,28 +575,28 @@ Partitioning performance considerations Partitioning datasets has two aspects that affect performance: it increases the number of files and it creates a directory structure around the files. Both of these have benefits -as well as costs. Depending on the configuration and the size of your dataset, the costs -can outweigh the benefits. +as well as costs. Depending on the configuration and the size of your dataset, the costs +can outweigh the benefits. -Because partitions split up the dataset into multiple files, partitioned datasets can be -read and written with parallelism. However, each additional file adds a little overhead in -processing for filesystem interaction. It also increases the overall dataset size since +Because partitions split up the dataset into multiple files, partitioned datasets can be +read and written with parallelism. However, each additional file adds a little overhead in +processing for filesystem interaction. It also increases the overall dataset size since each file has some shared metadata. For example, each parquet file contains the schema and -group-level statistics. The number of partitions is a floor for the number of files. If -you partition a dataset by date with a year of data, you will have at least 365 files. If -you further partition by another dimension with 1,000 unique values, you will have up to +group-level statistics. The number of partitions is a floor for the number of files. If +you partition a dataset by date with a year of data, you will have at least 365 files. If +you further partition by another dimension with 1,000 unique values, you will have up to 365,000 files. This fine of partitioning often leads to small files that mostly consist of metadata. -Partitioned datasets create nested folder structures, and those allow us to prune which +Partitioned datasets create nested folder structures, and those allow us to prune which files are loaded in a scan. However, this adds overhead to discovering files in the dataset, as we'll need to recursively "list directory" to find the data files. Too fine partitions can cause problems here: Partitioning a dataset by date for a years worth -of data will require 365 list calls to find all the files; adding another column with +of data will require 365 list calls to find all the files; adding another column with cardinality 1,000 will make that 365,365 calls. The most optimal partitioning layout will depend on your data, access patterns, and which -systems will be reading the data. Most systems, including Arrow, should work across a +systems will be reading the data. Most systems, including Arrow, should work across a range of file sizes and partitioning layouts, but there are extremes you should avoid. These guidelines can help avoid some known worst cases: @@ -611,35 +611,35 @@ of file size. Arrow's file writer provides sensible defaults for group sizing in Configuring files open during a write ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -When writing data to the disk, there are a few parameters that can be +When writing data to the disk, there are a few parameters that can be important to optimize the writes, such as the number of rows per file and the maximum number of open files allowed during the write. Set the maximum number of files opened with the ``max_open_files`` parameter of :meth:`write_dataset`. -If ``max_open_files`` is set greater than 0 then this will limit the maximum +If ``max_open_files`` is set greater than 0 then this will limit the maximum number of files that can be left open. This only applies to writing partitioned datasets, where rows are dispatched to the appropriate file depending on their partition values. If an attempt is made to open too many files then the least recently used file will be closed. If this setting is set too low you may end up fragmenting your data into many small files. -If your process is concurrently using other file handlers, either with a -dataset scanner or otherwise, you may hit a system file handler limit. For +If your process is concurrently using other file handlers, either with a +dataset scanner or otherwise, you may hit a system file handler limit. For example, if you are scanning a dataset with 300 files and writing out to 900 files, the total of 1200 files may be over a system limit. (On Linux, this might be a "Too Many Open Files" error.) You can either reduce this ``max_open_files`` setting or increase the file handler limit on your system. The default value is 900 which allows some number of files -to be open by the scanner before hitting the default Linux limit of 1024. +to be open by the scanner before hitting the default Linux limit of 1024. -Another important configuration used in :meth:`write_dataset` is ``max_rows_per_file``. +Another important configuration used in :meth:`write_dataset` is ``max_rows_per_file``. Set the maximum number of rows written in each file with the ``max_rows_per_files`` parameter of :meth:`write_dataset`. -If ``max_rows_per_file`` is set greater than 0 then this will limit how many +If ``max_rows_per_file`` is set greater than 0 then this will limit how many rows are placed in any single file. Otherwise there will be no limit and one file will be created in each output directory unless files need to be closed to respect ``max_open_files``. This setting is the primary way to control file size. @@ -653,22 +653,22 @@ Configuring rows per group during a write The volume of data written to the disk per each group can be configured. This configuration includes a lower and an upper bound. -The minimum number of rows required to form a row group is +The minimum number of rows required to form a row group is defined with the ``min_rows_per_group`` parameter of :meth:`write_dataset`. .. note:: - If ``min_rows_per_group`` is set greater than 0 then this will cause the - dataset writer to batch incoming data and only write the row groups to the - disk when sufficient rows have accumulated. The final row group size may be - less than this value if other options such as ``max_open_files`` or + If ``min_rows_per_group`` is set greater than 0 then this will cause the + dataset writer to batch incoming data and only write the row groups to the + disk when sufficient rows have accumulated. The final row group size may be + less than this value if other options such as ``max_open_files`` or ``max_rows_per_file`` force smaller row group sizes. The maximum number of rows allowed per group is defined with the ``max_rows_per_group`` parameter of :meth:`write_dataset`. -If ``max_rows_per_group`` is set greater than 0 then the dataset writer may split -up large incoming batches into multiple row groups. If this value is set then -``min_rows_per_group`` should also be set or else you may end up with very small +If ``max_rows_per_group`` is set greater than 0 then the dataset writer may split +up large incoming batches into multiple row groups. If this value is set then +``min_rows_per_group`` should also be set or else you may end up with very small row groups (e.g. if the incoming row group size is just barely larger than this value). Row groups are built into the Parquet and IPC/Feather formats but don't affect JSON or CSV. @@ -719,7 +719,7 @@ Customizing & inspecting written files By default the dataset API will create files named "part-i.format" where "i" is a integer generated during the write and "format" is the file format specified in the write_dataset call. For simple datasets it may be possible to know which files will be created but for -larger or partitioned datasets it is not so easy. The ``file_visitor`` keyword can be used +larger or partitioned datasets it is not so easy. The ``file_visitor`` keyword can be used to supply a visitor that will be called as each file is created: .. ipython:: python diff --git a/docs/source/python/dlpack.rst b/docs/source/python/dlpack.rst index f612ebabde5c9..024c2800e1107 100644 --- a/docs/source/python/dlpack.rst +++ b/docs/source/python/dlpack.rst @@ -90,4 +90,4 @@ Convert a PyArrow CPU array to PyTorch tensor: >>> import torch >>> torch.from_dlpack(array) - tensor([2, 0, 2, 4]) + tensor([2, 0, 2, 4]) diff --git a/docs/source/python/filesystems.rst b/docs/source/python/filesystems.rst index 5309250351d8e..22f983a60c349 100644 --- a/docs/source/python/filesystems.rst +++ b/docs/source/python/filesystems.rst @@ -233,7 +233,7 @@ generate a credentials file in the default location:: To connect to a public bucket without using any credentials, you must pass ``anonymous=True`` to :class:`GcsFileSystem`. Otherwise, the filesystem -will report ``Couldn't resolve host name`` since there are different host +will report ``Couldn't resolve host name`` since there are different host names for authenticated and public access. Example showing how you can read contents from a GCS bucket:: @@ -314,7 +314,7 @@ For example:: # using this to read a partitioned dataset import pyarrow.dataset as ds ds.dataset("data/", filesystem=fs) - + Similarly for Azure Blob Storage:: import adlfs diff --git a/docs/source/python/getstarted.rst b/docs/source/python/getstarted.rst index d38fcadab288f..42e415c40b835 100644 --- a/docs/source/python/getstarted.rst +++ b/docs/source/python/getstarted.rst @@ -37,7 +37,7 @@ in tabular data. Arrow also provides support for various formats to get those tabular data in and out of disk and networks. Most commonly used formats are -Parquet (:ref:`parquet`) and the IPC format (:ref:`ipc`). +Parquet (:ref:`parquet`) and the IPC format (:ref:`ipc`). Creating Arrays and Tables -------------------------- @@ -63,7 +63,7 @@ in tabular data when attached to a column name birthdays_table = pa.table([days, months, years], names=["days", "months", "years"]) - + birthdays_table See :ref:`data` for more details. @@ -75,7 +75,7 @@ Once you have tabular data, Arrow provides out of the box the features to save and restore that data for common formats like Parquet: -.. ipython:: python +.. ipython:: python import pyarrow.parquet as pq @@ -92,14 +92,14 @@ data will be as quick as possible reloaded_birthdays Saving and loading back data in arrow is usually done through -:ref:`Parquet `, :ref:`IPC format ` (:ref:`feather`), +:ref:`Parquet `, :ref:`IPC format ` (:ref:`feather`), :ref:`CSV ` or :ref:`Line-Delimited JSON ` formats. Performing Computations ----------------------- Arrow ships with a bunch of compute functions that can be applied -to its arrays and tables, so through the compute functions +to its arrays and tables, so through the compute functions it's possible to apply transformations to the data .. ipython:: python @@ -122,7 +122,7 @@ smaller chunks import pyarrow.dataset as ds - ds.write_dataset(birthdays_table, "savedir", format="parquet", + ds.write_dataset(birthdays_table, "savedir", format="parquet", partitioning=ds.partitioning( pa.schema([birthdays_table.schema.field("years")]) )) @@ -151,8 +151,8 @@ how to project them, etc., refer to :ref:`dataset` documentation. Continuing from here -------------------- -For digging further into Arrow, you might want to read the -:doc:`PyArrow Documentation <./index>` itself or the +For digging further into Arrow, you might want to read the +:doc:`PyArrow Documentation <./index>` itself or the `Arrow Python Cookbook `_ diff --git a/docs/source/python/getting_involved.rst b/docs/source/python/getting_involved.rst index 7b3bcf2ac527a..9fda3c7c78488 100644 --- a/docs/source/python/getting_involved.rst +++ b/docs/source/python/getting_involved.rst @@ -54,7 +54,7 @@ used as foundations to build easier to use entities. exposed to the user are declared. In some cases, those files might directly import the entities from inner implementation if they want to expose it as is without modification. -* The ``lib.pyx`` file is where the majority of the core C++ libarrow +* The ``lib.pyx`` file is where the majority of the core C++ libarrow capabilities are exposed to Python. Most of the implementation of this module relies on included ``*.pxi`` files where the specific pieces are built. While being exposed to Python as ``pyarrow.lib`` its content @@ -73,4 +73,4 @@ used as foundations to build easier to use entities. PyArrow is also based on PyArrow C++, dedicated pieces of code that live in ``python/pyarrow/src/arrow/python`` directory and provide the low level code for capabilities like converting to and from numpy or pandas and the classes - that allow to use Python objects and callbacks in C++. \ No newline at end of file + that allow to use Python objects and callbacks in C++. diff --git a/docs/source/python/integration/python_r.rst b/docs/source/python/integration/python_r.rst index 20627c3782d3c..ec5dfc366fdf9 100644 --- a/docs/source/python/integration/python_r.rst +++ b/docs/source/python/integration/python_r.rst @@ -29,7 +29,7 @@ marshaling and unmarshaling data. The article takes for granted that you have a ``Python`` environment with ``pyarrow`` correctly installed and an ``R`` environment with - ``arrow`` library correctly installed. + ``arrow`` library correctly installed. See `Python Install Instructions `_ and `R Install instructions `_ for further details. @@ -52,7 +52,7 @@ We could save such a function in a ``addthree.R`` file so that we can make it available for reuse. Once the ``addthree.R`` file is created we can invoke any of its functions -from Python using the +from Python using the `rpy2 `_ library which enables a R runtime within the Python interpreter. @@ -91,12 +91,12 @@ to access the ``R`` function and print the expected result: .. code-block:: bash - $ python addthree.py + $ python addthree.py 6 If instead of passing around basic data types we want to pass around Arrow Arrays, we can do so relying on the -`rpy2-arrow `_ +`rpy2-arrow `_ module which implements ``rpy2`` support for Arrow types. ``rpy2-arrow`` can be installed through ``pip``: @@ -189,7 +189,7 @@ Invoking the ``addthree.R`` script will print the outcome of adding .. code-block:: bash - $ R --silent -f addthree.R + $ R --silent -f addthree.R Array [ @@ -219,7 +219,7 @@ necessary to import an Arrow Array in R from the C Data interface. That work will be done by the ``addthree_cdata`` function which invokes the ``addthree`` function once the Array is imported. -Our ``addthree.R`` will thus have both the ``addthree_cdata`` and the +Our ``addthree.R`` will thus have both the ``addthree_cdata`` and the ``addthree`` functions: .. code-block:: R @@ -261,7 +261,7 @@ Our ``addthree.py`` will thus become: # Import the pyarrow module that provides access to the C Data interface from pyarrow.cffi import ffi as arrow_c - # Allocate structures where we will export the Array data + # Allocate structures where we will export the Array data # and the Array schema. They will be released when we exit the with block. with arrow_c.new("struct ArrowArray*") as c_array, \ arrow_c.new("struct ArrowSchema*") as c_schema: @@ -274,7 +274,7 @@ Our ``addthree.py`` will thus become: array.type._export_to_c(c_schema_ptr) # Invoke the R addthree_cdata function passing the references - # to the array and schema C Data structures. + # to the array and schema C Data structures. # Those references are passed as strings as R doesn't have # native support for 64bit integers, so the integers are # converted to their string representation for R to convert it back. @@ -289,19 +289,19 @@ Our ``addthree.py`` will thus become: # Once the returned array is exported to a C Data infrastructure # we can import it back into pyarrow using Array._import_from_c py_array = pyarrow.Array._import_from_c(c_array_ptr, c_schema_ptr) - + print("RESULT", py_array) Running the newly changed ``addthree.py`` will now print the Array resulting -from adding ``3`` to all the elements of the original +from adding ``3`` to all the elements of the original ``pyarrow.array((1, 2, 3))`` array: .. code-block:: bash - $ python addthree.py + $ python addthree.py R[write to console]: Attaching package: ‘arrow’ RESULT [ 4, 5, 6 - ] \ No newline at end of file + ] diff --git a/docs/source/python/ipc.rst b/docs/source/python/ipc.rst index 27cd14a68853d..f55e8f8bc5dc3 100644 --- a/docs/source/python/ipc.rst +++ b/docs/source/python/ipc.rst @@ -76,12 +76,12 @@ this one can be created with :func:`~pyarrow.ipc.new_stream`: .. ipython:: python sink = pa.BufferOutputStream() - + with pa.ipc.new_stream(sink, batch.schema) as writer: for i in range(5): writer.write_batch(batch) -Here we used an in-memory Arrow buffer stream (``sink``), +Here we used an in-memory Arrow buffer stream (``sink``), but this could have been a socket or some other IO sink. When creating the ``StreamWriter``, we pass the schema, since the schema @@ -102,7 +102,7 @@ convenience function ``pyarrow.ipc.open_stream``: with pa.ipc.open_stream(buf) as reader: schema = reader.schema batches = [b for b in reader] - + schema len(batches) @@ -126,7 +126,7 @@ The :class:`~pyarrow.RecordBatchFileWriter` has the same API as .. ipython:: python sink = pa.BufferOutputStream() - + with pa.ipc.new_file(sink, batch.schema) as writer: for i in range(10): writer.write_batch(batch) @@ -164,7 +164,7 @@ DataFrame output: with pa.ipc.open_file(buf) as reader: df = reader.read_pandas() - + df[:5] Efficiently Writing and Reading Arrow Data diff --git a/docs/source/python/json.rst b/docs/source/python/json.rst index 99ecbc19a1230..eff6135d895a7 100644 --- a/docs/source/python/json.rst +++ b/docs/source/python/json.rst @@ -21,7 +21,7 @@ Reading JSON files ================== -Arrow supports reading columnar data from line-delimited JSON files. +Arrow supports reading columnar data from line-delimited JSON files. In this context, a JSON file consists of multiple JSON objects, one per line, representing individual data rows. For example, this file represents two rows of data with four columns "a", "b", "c", "d": diff --git a/docs/source/python/orc.rst b/docs/source/python/orc.rst index bfa68fc34d895..76c293d742010 100644 --- a/docs/source/python/orc.rst +++ b/docs/source/python/orc.rst @@ -112,7 +112,7 @@ control various settings when writing an ORC file. * ``file_version``, the ORC format version to use. ``'0.11'`` ensures compatibility with older readers, while ``'0.12'`` is the newer one. -* ``stripe_size``, to control the approximate size of data within a column +* ``stripe_size``, to control the approximate size of data within a column stripe. This currently defaults to 64MB. See the :func:`~pyarrow.orc.write_table()` docstring for more details. diff --git a/docs/source/python/parquet.rst b/docs/source/python/parquet.rst index d4717897660b6..029ed4f1a3e15 100644 --- a/docs/source/python/parquet.rst +++ b/docs/source/python/parquet.rst @@ -32,7 +32,7 @@ performance data IO. Apache Arrow is an ideal in-memory transport layer for data that is being read or written with Parquet files. We have been concurrently developing the `C++ -implementation of +implementation of Apache Parquet `_, which includes a native, multithreaded C++ adapter to and from in-memory Arrow data. PyArrow includes Python bindings to this code, which thus enables reading diff --git a/docs/source/python/timestamps.rst b/docs/source/python/timestamps.rst index 64a2a354dddef..cecbd5b595bc7 100644 --- a/docs/source/python/timestamps.rst +++ b/docs/source/python/timestamps.rst @@ -51,8 +51,8 @@ This implies a few things when round-tripping timestamps: #. Timezone information is lost (all timestamps that result from converting from spark to arrow/pandas are "time zone naive"). #. Timestamps are truncated to microseconds. -#. The session time zone might have unintuitive impacts on - translation of timestamp values. +#. The session time zone might have unintuitive impacts on + translation of timestamp values. Spark to Pandas (through Apache Arrow) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -62,8 +62,8 @@ The following cases assume the Spark configuration :: - >>> pdf = pd.DataFrame({'naive': [datetime(2019, 1, 1, 0)], - ... 'aware': [Timestamp(year=2019, month=1, day=1, + >>> pdf = pd.DataFrame({'naive': [datetime(2019, 1, 1, 0)], + ... 'aware': [Timestamp(year=2019, month=1, day=1, ... nanosecond=500, tz=timezone(timedelta(hours=-8)))]}) >>> pdf naive aware @@ -77,7 +77,7 @@ The following cases assume the Spark configuration +-------------------+-------------------+ |2019-01-01 00:00:00|2019-01-01 08:00:00| +-------------------+-------------------+ - + Note that conversion of the aware timestamp is shifted to reflect the time assuming UTC (it represents the same instant in time). For naive timestamps, Spark treats them as being in the system local @@ -129,7 +129,7 @@ session time zone is still PST: |2019-01-01 00:00:00|2019-01-01 00:00:00| +-------------------+-------------------+ - + >>> pst_df.toPandas() naive aware 0 2019-01-01 2019-01-01 @@ -141,7 +141,7 @@ session time zone is still PST: aware 1 non-null datetime64[ns] dtypes: datetime64[ns](2) memory usage: 96.0 bytes - + Notice that, in addition to being a "time zone naive" timestamp, the 'aware' value will now differ when converting to an epoch offset. Spark does the conversion by first converting to the session time zone (or system local time zone if @@ -158,9 +158,9 @@ time: >>> (pst_df.toPandas()['aware'][0].timestamp()-pdf['aware'][0].timestamp())/3600 -8.0 -The same type of conversion happens with the data frame converted while -the session time zone was UTC. In this case both naive and aware -represent different instants in time (the naive instant is due to +The same type of conversion happens with the data frame converted while +the session time zone was UTC. In this case both naive and aware +represent different instants in time (the naive instant is due to the change in session time zone between creating data frames): :: @@ -179,9 +179,9 @@ the change in session time zone between creating data frames): Note that the surprising shift for aware doesn't happen when the session time zone is UTC (but the timestamps still become "time zone naive"): - + :: - + >>> spark.conf.set("spark.sql.session.timeZone", "UTC") >>> pst_df.show() +-------------------+-------------------+ @@ -189,7 +189,7 @@ still become "time zone naive"): +-------------------+-------------------+ |2019-01-01 08:00:00|2019-01-01 08:00:00| +-------------------+-------------------+ - + >>> pst_df.toPandas()['aware'][0] Timestamp('2019-01-01 08:00:00') >>> pdf['aware'][0] From 250291500b6a7d5d934901acef708cef2eb1dc08 Mon Sep 17 00:00:00 2001 From: Rossi Sun Date: Wed, 1 May 2024 14:39:35 +0800 Subject: [PATCH 033/261] GH-41463: [C++] Skip TestConcurrentFillFromScalar for platforms without threading support (#41461) ### Rationale for this change See #41463 and https://github.com/apache/arrow/pull/40237#issuecomment-2084577090 ### What changes are included in this PR? Skip test for platforms that have no threading support. ### Are these changes tested? Change is test. ### Are there any user-facing changes? None. * GitHub Issue: #41463 Authored-by: Ruoxi Sun Signed-off-by: Joris Van den Bossche --- cpp/src/arrow/array/array_test.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc index af64908b59582..7e25ad61fa2ea 100644 --- a/cpp/src/arrow/array/array_test.cc +++ b/cpp/src/arrow/array/array_test.cc @@ -827,6 +827,9 @@ TEST_F(TestArray, TestFillFromScalar) { // GH-40069: Data-race when concurrent calling ArraySpan::FillFromScalar of the same // scalar instance. TEST_F(TestArray, TestConcurrentFillFromScalar) { +#ifndef ARROW_ENABLE_THREADING + GTEST_SKIP() << "Test requires threading support"; +#endif for (auto type : TestArrayUtilitiesAgainstTheseTypes()) { ARROW_SCOPED_TRACE("type = ", type->ToString()); for (auto seed : {0u, 0xdeadbeef, 42u}) { From 22f88fa4a8f5ac7250f1845aace5a78d20006ef2 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Wed, 1 May 2024 00:12:37 -0800 Subject: [PATCH 034/261] GH-41410: [C++][FS][Azure][Docs] Add AzureFileSystem to Filesystems API reference (#41411) ### Rationale for this change See https://github.com/apache/arrow/issues/41410. ### What changes are included in this PR? Just changes to filesystem.rst. ### Are these changes tested? Yes, locally. ### Are there any user-facing changes? These are those changes. * GitHub Issue: #41410 Authored-by: Bryce Mecum Signed-off-by: Sutou Kouhei --- cpp/src/arrow/filesystem/azurefs.h | 17 ++++++++--------- docs/source/cpp/api/filesystem.rst | 9 +++++++++ 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index 667b4e372ae59..b71a5ae73b2e9 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -141,18 +141,14 @@ struct ARROW_EXPORT AzureOptions { /// /// 1. abfs[s]://[:\@]\.blob.core.windows.net /// [/\[/\]] - /// 2. abfs[s]://\[:\]@\.dfs.core.windows.net - /// [/path] + /// 2. abfs[s]://\[:\]\@\.dfs.core.windows.net[/path] /// 3. abfs[s]://[\]@]\[\<:port\>] /// [/\[/path]] /// 4. abfs[s]://[\]@]\[/path] /// - /// 1. and 2. are compatible with the Azure Data Lake Storage Gen2 URIs: - /// https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction-abfs-uri - /// - /// 3. is for Azure Blob Storage compatible service including Azurite. - /// - /// 4. is a shorter version of 1. and 2. + /// (1) and (2) are compatible with the Azure Data Lake Storage Gen2 URIs + /// [1], (3) is for Azure Blob Storage compatible service including Azurite, + /// and (4) is a shorter version of (1) and (2). /// /// Note that there is no difference between abfs and abfss. HTTPS is /// used with abfs by default. You can force to use HTTP by specifying @@ -178,6 +174,9 @@ struct ARROW_EXPORT AzureOptions { /// AzureOptions::ConfigureClientSecretCredential() is called. /// * client_secret: You must specify "tenant_id" and "client_id" /// too. AzureOptions::ConfigureClientSecretCredential() is called. + /// + /// [1]: + /// https://learn.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction-abfs-uri static Result FromUri(const Uri& uri, std::string* out_path); static Result FromUri(const std::string& uri, std::string* out_path); @@ -226,7 +225,7 @@ struct ARROW_EXPORT AzureOptions { /// overwriting. /// - When you use the ListBlobs operation without specifying a delimiter, the results /// include both directories and blobs. If you choose to use a delimiter, use only a -/// forward slash (/) -- the only supported delimiter. +/// forward slash (/) \--- the only supported delimiter. /// - If you use the DeleteBlob API to delete a directory, that directory is deleted only /// if it's empty. This means that you can't use the Blob API delete directories /// recursively. diff --git a/docs/source/cpp/api/filesystem.rst b/docs/source/cpp/api/filesystem.rst index 02b12668327f2..599e9fedb60f9 100644 --- a/docs/source/cpp/api/filesystem.rst +++ b/docs/source/cpp/api/filesystem.rst @@ -97,3 +97,12 @@ Google Cloud Storage filesystem .. doxygenclass:: arrow::fs::GcsFileSystem :members: + +Azure filesystem +---------------- + +.. doxygenstruct:: arrow::fs::AzureOptions + :members: + +.. doxygenclass:: arrow::fs::AzureFileSystem + :members: From 281122c018df86601ca675f3941751ddc3a89b3d Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Thu, 2 May 2024 00:18:12 +0800 Subject: [PATCH 035/261] GH-41306: [C++] Check to avoid copying when NullBitmapBuffer is Null (#41452) ### Rationale for this change This PR addresses a bug with the `FixedSizeBinary` type where it does not cast to a `Binary` type after being sliced. When slicing occurs, the offset is modified. If the resulting sliced data structure does not contain any `null` values, the Null Bitmap Buffer may be set to `null`. Currently, when a `Cast` operation is attempted on such a data structure, the code erroneously tries to access the Null Bitmap Buffer even when it is `null`. This leads to an `EXC_BAD_ACCESS` error. This PR implements a fix to prevent this erroneous behavior by adding checks before accessing the Null Bitmap Buffer. ### What changes are included in this PR? - Add a null check for the Null Bitmap Buffer when casting from `FixedSizeBinary` to `Binary` to prevent access violations if the buffer is null. ### Are these changes tested? Yes ### Are there any user-facing changes? Yes (Pyarrow side) * GitHub Issue: #41306 Authored-by: Hyunseok Seo Signed-off-by: Weston Pace --- .../compute/kernels/scalar_cast_string.cc | 13 ++++++--- .../arrow/compute/kernels/scalar_cast_test.cc | 27 +++++++++++++++++++ 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc index 3a8352a9b870f..dc3fe29a3dfae 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc @@ -340,10 +340,15 @@ BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* ou if (input.offset == output->offset) { output->buffers[0] = input.GetBuffer(0); } else { - ARROW_ASSIGN_OR_RAISE( - output->buffers[0], - arrow::internal::CopyBitmap(ctx->memory_pool(), input.buffers[0].data, - input.offset, input.length)); + // When the offsets are different (e.g., due to slice operation), we need to check if + // the null bitmap buffer is not null before copying it. The null bitmap buffer can be + // null if the input array value does not contain any null value. + if (input.buffers[0].data != NULLPTR) { + ARROW_ASSIGN_OR_RAISE( + output->buffers[0], + arrow::internal::CopyBitmap(ctx->memory_pool(), input.buffers[0].data, + input.offset, input.length)); + } } // This buffer is preallocated diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc index af62b4da2caa5..a6d7f6097b59b 100644 --- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc +++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc @@ -2196,6 +2196,33 @@ TEST(Cast, BinaryOrStringToFixedSizeBinary) { } } +TEST(Cast, FixedSizeBinaryToBinaryOrString) { + for (auto out_type : {utf8(), large_utf8(), binary(), large_binary()}) { + auto valid_input = ArrayFromJSON(fixed_size_binary(3), R"(["foo", null, "bar", + "baz", "quu"])"); + + CheckCast(valid_input, ArrayFromJSON(out_type, R"(["foo", null, "bar", "baz", + "quu"])")); + + auto empty_input = ArrayFromJSON(fixed_size_binary(3), "[]"); + CheckCast(empty_input, ArrayFromJSON(out_type, "[]")); + } +} + +TEST(Cast, FixedSizeBinaryToBinaryOrStringWithSlice) { + for (auto out_type : {utf8(), large_utf8(), binary(), large_binary()}) { + auto valid_input = ArrayFromJSON(fixed_size_binary(3), R"(["foo", null, "bar", + "baz", "quu"])"); + auto sliced = valid_input->Slice(1, 3); + CheckCast(sliced, ArrayFromJSON(out_type, R"([null, "bar", "baz"])")); + + auto valid_input_without_null = ArrayFromJSON(fixed_size_binary(3), R"(["foo", "bar", + "baz", "quu"])"); + auto sliced_without_null = valid_input_without_null->Slice(1, 3); + CheckCast(sliced_without_null, ArrayFromJSON(out_type, R"(["bar", "baz", "quu"])")); + } +} + TEST(Cast, IntToString) { for (auto string_type : {utf8(), large_utf8()}) { CheckCast(ArrayFromJSON(int8(), "[0, 1, 127, -128, null]"), From cc78c7a9bf17ceba7d538b30ddda008daeb1db85 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 2 May 2024 05:54:20 +0900 Subject: [PATCH 036/261] MINOR: [JS] Bump memfs from 4.8.2 to 4.9.2 in /js (#41482) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [memfs](https://github.com/streamich/memfs) from 4.8.2 to 4.9.2.
Release notes

Sourced from memfs's releases.

v4.9.2

4.9.2 (2024-04-30)

Bug Fixes

v4.9.1

4.9.1 (2024-04-27)

Bug Fixes

  • 🐛 use latest json-pack implementation (de54ab5)
Changelog

Sourced from memfs's changelog.

4.9.2 (2024-04-30)

Bug Fixes

4.9.1 (2024-04-27)

Bug Fixes

  • 🐛 use latest json-pack implementation (de54ab5)

4.9.0 (2024-04-27)

Features

  • 🎸 define .scan() CRUD method (921e05d)
  • 🎸 implement .scan() in Node.js CRUD (3d973b7)
  • 🎸 implement .scan() method for FSA CRUD (a148fb8)
Commits
  • 0cc081d chore(release): 4.9.2 [skip ci]
  • a474a47 Merge pull request #1031 from streamich/bump-utils
  • 32cc4da fix: 🐛 bump json-pack
  • eea3b42 fix: 🐛 bump @​jsonjoy.com/util package
  • 7a38617 chore(deps): update peaceiris/actions-gh-pages action to v4 (#1027)
  • b198f40 ci: 🎡 add mirror to Gitlab workflow
  • 4619f16 chore(release): 4.9.1 [skip ci]
  • e5461ae Merge pull request #1028 from streamich/dependencies
  • 0dfd7bb docs: ✏️ describe memfs() helper in docs
  • 0509f15 chore: 🤖 remove /src/json-joy/ folder
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=memfs&package-manager=npm_and_yarn&previous-version=4.8.2&new-version=4.9.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- js/package.json | 2 +- js/yarn.lock | 53 +++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/js/package.json b/js/package.json index fee6b342dbd13..7def7986490d6 100644 --- a/js/package.json +++ b/js/package.json @@ -99,7 +99,7 @@ "ix": "5.0.0", "jest": "29.7.0", "jest-silent-reporter": "0.5.0", - "memfs": "4.8.2", + "memfs": "4.9.2", "mkdirp": "3.0.1", "multistream": "4.1.0", "regenerator-runtime": "0.14.1", diff --git a/js/yarn.lock b/js/yarn.lock index b74e4543d9d4e..9daed1af9dd69 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -936,6 +936,26 @@ "@jridgewell/resolve-uri" "^3.1.0" "@jridgewell/sourcemap-codec" "^1.4.14" +"@jsonjoy.com/base64@^1.1.1": + version "1.1.1" + resolved "https://registry.yarnpkg.com/@jsonjoy.com/base64/-/base64-1.1.1.tgz#a717fd8840f7bad49c7fe66cc65db8bcfc4c4dc5" + integrity sha512-LnFjVChaGY8cZVMwAIMjvA1XwQjZ/zIXHyh28IyJkyNkzof4Dkm1+KN9UIm3lHhREH4vs7XwZ0NpkZKnwOtEfg== + +"@jsonjoy.com/json-pack@^1.0.3": + version "1.0.3" + resolved "https://registry.yarnpkg.com/@jsonjoy.com/json-pack/-/json-pack-1.0.3.tgz#a68cbe3ccfd85d26cd763e4175fe90c9ee383d33" + integrity sha512-Q0SPAdmK6s5Fe3e1kcNvwNyk6e2+CxM8XZdGbf4abZG7nUO05KSie3/iX29loTBuY+75uVP6RixDSPVpotfzmQ== + dependencies: + "@jsonjoy.com/base64" "^1.1.1" + "@jsonjoy.com/util" "^1.1.2" + hyperdyperid "^1.2.0" + thingies "^1.20.0" + +"@jsonjoy.com/util@^1.1.2": + version "1.1.2" + resolved "https://registry.yarnpkg.com/@jsonjoy.com/util/-/util-1.1.2.tgz#5072c27ecdb16d1ed7a2d125a1d0ed8aba01d652" + integrity sha512-HOGa9wtE6LEz2I5mMQ2pMSjth85PmD71kPbsecs02nEUq3/Kw0wRK3gmZn5BCEB8mFLXByqPxjHgApoMwIPMKQ== + "@nodelib/fs.scandir@2.1.5": version "2.1.5" resolved "https://registry.yarnpkg.com/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz#7619c2eb21b25483f6d167548b4cfd5a7488c3d5" @@ -4018,6 +4038,11 @@ human-signals@^2.1.0: resolved "https://registry.yarnpkg.com/human-signals/-/human-signals-2.1.0.tgz#dc91fcba42e4d06e4abaed33b3e7a3c02f514ea0" integrity sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw== +hyperdyperid@^1.2.0: + version "1.2.0" + resolved "https://registry.yarnpkg.com/hyperdyperid/-/hyperdyperid-1.2.0.tgz#59668d323ada92228d2a869d3e474d5a33b69e6b" + integrity sha512-Y93lCzHYgGWdrJ66yIktxiaGULYc6oGiABxhcO5AufBeOyoIdZF7bIfLaOrbM0iGIOXQQgxxRrFEnb+Y6w1n4A== + ignore@^5.2.0, ignore@^5.2.4, ignore@^5.3.1: version "5.3.1" resolved "https://registry.yarnpkg.com/ignore/-/ignore-5.3.1.tgz#5073e554cd42c5b33b394375f538b8593e34d4ef" @@ -5165,11 +5190,14 @@ matchdep@^2.0.0: resolve "^1.4.0" stack-trace "0.0.10" -memfs@4.8.2: - version "4.8.2" - resolved "https://registry.yarnpkg.com/memfs/-/memfs-4.8.2.tgz#9bb7c3e43647348451082557f05fb170b7442949" - integrity sha512-j4WKth315edViMBGkHW6NTF0QBjsTrcRDmYNcGsPq+ozMEyCCCIlX2d2mJ5wuh6iHvJ3FevUrr48v58YRqVdYg== +memfs@4.9.2: + version "4.9.2" + resolved "https://registry.yarnpkg.com/memfs/-/memfs-4.9.2.tgz#42e7b48207268dad8c9c48ea5d4952c5d3840433" + integrity sha512-f16coDZlTG1jskq3mxarwB+fGRrd0uXWt+o1WIhRfOwbXQZqUDsTVxQBFK9JjRQHblg8eAG2JSbprDXKjc7ijQ== dependencies: + "@jsonjoy.com/json-pack" "^1.0.3" + "@jsonjoy.com/util" "^1.1.2" + sonic-forest "^1.0.0" tslib "^2.0.0" memoizee@0.4.X: @@ -6386,6 +6414,13 @@ snapdragon@^0.8.1: source-map-resolve "^0.5.0" use "^3.1.0" +sonic-forest@^1.0.0: + version "1.0.2" + resolved "https://registry.yarnpkg.com/sonic-forest/-/sonic-forest-1.0.2.tgz#d80aa621d1cffe75a606ca44789ccff30f5b9ce6" + integrity sha512-2rICdwIJi5kVlehMUVtJeHn3ohh5YZV4pDv0P0c1M11cRz/gXNViItpM94HQwfvnXuzybpqK0LZJgTa3lEwtAw== + dependencies: + tree-dump "^1.0.0" + source-map-resolve@^0.5.0: version "0.5.3" resolved "https://registry.yarnpkg.com/source-map-resolve/-/source-map-resolve-0.5.3.tgz#190866bece7553e1f8f267a2ee82c606b5509a1a" @@ -6755,6 +6790,11 @@ textextensions@^3.2.0: resolved "https://registry.yarnpkg.com/textextensions/-/textextensions-3.3.0.tgz#03530d5287b86773c08b77458589148870cc71d3" integrity sha512-mk82dS8eRABNbeVJrEiN5/UMSCliINAuz8mkUwH4SwslkNP//gbEzlWNS5au0z5Dpx40SQxzqZevZkn+WYJ9Dw== +thingies@^1.20.0: + version "1.21.0" + resolved "https://registry.yarnpkg.com/thingies/-/thingies-1.21.0.tgz#e80fbe58fd6fdaaab8fad9b67bd0a5c943c445c1" + integrity sha512-hsqsJsFMsV+aD4s3CWKk85ep/3I9XzYV/IXaSouJMYIoDlgyi11cBhsqYe9/geRfB0YIikBQg6raRaM+nIMP9g== + through2-filter@^3.0.0: version "3.0.0" resolved "https://registry.yarnpkg.com/through2-filter/-/through2-filter-3.0.0.tgz#700e786df2367c2c88cd8aa5be4cf9c1e7831254" @@ -6866,6 +6906,11 @@ totalist@^3.0.0: resolved "https://registry.yarnpkg.com/totalist/-/totalist-3.0.1.tgz#ba3a3d600c915b1a97872348f79c127475f6acf8" integrity sha512-sf4i37nQ2LBx4m3wB74y+ubopq6W/dIzXg0FDGjsYnZHVa1Da8FH853wlL2gtUhg+xJXjfk3kUZS3BRoQeoQBQ== +tree-dump@^1.0.0: + version "1.0.1" + resolved "https://registry.yarnpkg.com/tree-dump/-/tree-dump-1.0.1.tgz#b448758da7495580e6b7830d6b7834fca4c45b96" + integrity sha512-WCkcRBVPSlHHq1dc/px9iOfqklvzCbdRwvlNfxGZsrHqf6aZttfPrd7DJTt6oR10dwUfpFFQeVTkPbBIZxX/YA== + trim-newlines@^4.0.2: version "4.1.1" resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-4.1.1.tgz#28c88deb50ed10c7ba6dc2474421904a00139125" From 9ce7ab10fbb3937cdcb4800a791c06591523240b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 2 May 2024 05:55:57 +0900 Subject: [PATCH 037/261] MINOR: [JS] Bump rollup from 4.14.3 to 4.17.2 in /js (#41484) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [rollup](https://github.com/rollup/rollup) from 4.14.3 to 4.17.2.
Release notes

Sourced from rollup's releases.

v4.17.2

4.17.2

2024-04-30

Bug Fixes

  • Fix tree-shaking problems when using spread arguments (#5503)

Pull Requests

v4.17.1

4.17.1

2024-04-29

Bug Fixes

  • Prevent infinite recursions for certain constructor invocations (#5500)

Pull Requests

v4.17.0

4.17.0

2024-04-27

Features

  • Track function call arguments to optimize functions only called once or with the same literal values (re-release from 4.16.0) (#5483)

Bug Fixes

  • Reduce browser WASM size to a fraction by changing optimization settings (#5494)

Pull Requests

v4.16.4

... (truncated)

Changelog

Sourced from rollup's changelog.

4.17.2

2024-04-30

Bug Fixes

  • Fix tree-shaking problems when using spread arguments (#5503)

Pull Requests

4.17.1

2024-04-29

Bug Fixes

  • Prevent infinite recursions for certain constructor invocations (#5500)

Pull Requests

4.17.0

2024-04-27

Features

  • Track function call arguments to optimize functions only called once or with the same literal values (re-release from 4.16.0) (#5483)

Bug Fixes

  • Reduce browser WASM size to a fraction by changing optimization settings (#5494)

Pull Requests

4.16.4

2024-04-23

... (truncated)

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=rollup&package-manager=npm_and_yarn&previous-version=4.14.3&new-version=4.17.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- js/package.json | 2 +- js/yarn.lock | 198 ++++++++++++++++++++++++------------------------ 2 files changed, 100 insertions(+), 100 deletions(-) diff --git a/js/package.json b/js/package.json index 7def7986490d6..8cab229521f79 100644 --- a/js/package.json +++ b/js/package.json @@ -103,7 +103,7 @@ "mkdirp": "3.0.1", "multistream": "4.1.0", "regenerator-runtime": "0.14.1", - "rollup": "4.14.3", + "rollup": "4.17.2", "rxjs": "7.8.1", "ts-jest": "29.1.2", "ts-node": "10.9.2", diff --git a/js/yarn.lock b/js/yarn.lock index 9daed1af9dd69..fe483ab8aca36 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -1020,85 +1020,85 @@ estree-walker "^2.0.2" picomatch "^2.3.1" -"@rollup/rollup-android-arm-eabi@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.14.3.tgz#bddf05c3387d02fac04b6b86b3a779337edfed75" - integrity sha512-X9alQ3XM6I9IlSlmC8ddAvMSyG1WuHk5oUnXGw+yUBs3BFoTizmG1La/Gr8fVJvDWAq+zlYTZ9DBgrlKRVY06g== - -"@rollup/rollup-android-arm64@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.14.3.tgz#b26bd09de58704c0a45e3375b76796f6eda825e4" - integrity sha512-eQK5JIi+POhFpzk+LnjKIy4Ks+pwJ+NXmPxOCSvOKSNRPONzKuUvWE+P9JxGZVxrtzm6BAYMaL50FFuPe0oWMQ== - -"@rollup/rollup-darwin-arm64@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.14.3.tgz#c5f3fd1aa285b6d33dda6e3f3ca395f8c37fd5ca" - integrity sha512-Od4vE6f6CTT53yM1jgcLqNfItTsLt5zE46fdPaEmeFHvPs5SjZYlLpHrSiHEKR1+HdRfxuzXHjDOIxQyC3ptBA== - -"@rollup/rollup-darwin-x64@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.14.3.tgz#8e4673734d7dc9d68f6d48e81246055cda0e840f" - integrity sha512-0IMAO21axJeNIrvS9lSe/PGthc8ZUS+zC53O0VhF5gMxfmcKAP4ESkKOCwEi6u2asUrt4mQv2rjY8QseIEb1aw== - -"@rollup/rollup-linux-arm-gnueabihf@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.14.3.tgz#53ed38eb13b58ababdb55a7f66f0538a7f85dcba" - integrity sha512-ge2DC7tHRHa3caVEoSbPRJpq7azhG+xYsd6u2MEnJ6XzPSzQsTKyXvh6iWjXRf7Rt9ykIUWHtl0Uz3T6yXPpKw== - -"@rollup/rollup-linux-arm-musleabihf@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.14.3.tgz#0706ee38330e267a5c9326956820f009cfb21fcd" - integrity sha512-ljcuiDI4V3ySuc7eSk4lQ9wU8J8r8KrOUvB2U+TtK0TiW6OFDmJ+DdIjjwZHIw9CNxzbmXY39wwpzYuFDwNXuw== - -"@rollup/rollup-linux-arm64-gnu@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.14.3.tgz#426fce7b8b242ac5abd48a10a5020f5a468c6cb4" - integrity sha512-Eci2us9VTHm1eSyn5/eEpaC7eP/mp5n46gTRB3Aar3BgSvDQGJZuicyq6TsH4HngNBgVqC5sDYxOzTExSU+NjA== - -"@rollup/rollup-linux-arm64-musl@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.14.3.tgz#65bf944530d759b50d7ffd00dfbdf4125a43406f" - integrity sha512-UrBoMLCq4E92/LCqlh+blpqMz5h1tJttPIniwUgOFJyjWI1qrtrDhhpHPuFxULlUmjFHfloWdixtDhSxJt5iKw== - -"@rollup/rollup-linux-powerpc64le-gnu@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.14.3.tgz#494ba3b31095e9a45df9c3f646d21400fb631a95" - integrity sha512-5aRjvsS8q1nWN8AoRfrq5+9IflC3P1leMoy4r2WjXyFqf3qcqsxRCfxtZIV58tCxd+Yv7WELPcO9mY9aeQyAmw== - -"@rollup/rollup-linux-riscv64-gnu@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.14.3.tgz#8b88ed0a40724cce04aa15374ebe5ba4092d679f" - integrity sha512-sk/Qh1j2/RJSX7FhEpJn8n0ndxy/uf0kI/9Zc4b1ELhqULVdTfN6HL31CDaTChiBAOgLcsJ1sgVZjWv8XNEsAQ== - -"@rollup/rollup-linux-s390x-gnu@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.14.3.tgz#09c9e5ec57a0f6ec3551272c860bb9a04b96d70f" - integrity sha512-jOO/PEaDitOmY9TgkxF/TQIjXySQe5KVYB57H/8LRP/ux0ZoO8cSHCX17asMSv3ruwslXW/TLBcxyaUzGRHcqg== - -"@rollup/rollup-linux-x64-gnu@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.14.3.tgz#197f27fd481ad9c861021d5cbbf21793922a631c" - integrity sha512-8ybV4Xjy59xLMyWo3GCfEGqtKV5M5gCSrZlxkPGvEPCGDLNla7v48S662HSGwRd6/2cSneMQWiv+QzcttLrrOA== - -"@rollup/rollup-linux-x64-musl@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.14.3.tgz#5cc0522f4942f2df625e9bfb6fb02c6580ffbce6" - integrity sha512-s+xf1I46trOY10OqAtZ5Rm6lzHre/UiLA1J2uOhCFXWkbZrJRkYBPO6FhvGfHmdtQ3Bx793MNa7LvoWFAm93bg== - -"@rollup/rollup-win32-arm64-msvc@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.14.3.tgz#a648122389d23a7543b261fba082e65fefefe4f6" - integrity sha512-+4h2WrGOYsOumDQ5S2sYNyhVfrue+9tc9XcLWLh+Kw3UOxAvrfOrSMFon60KspcDdytkNDh7K2Vs6eMaYImAZg== - -"@rollup/rollup-win32-ia32-msvc@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.14.3.tgz#34727b5c7953c35fc6e1ae4f770ad3a2025f8e03" - integrity sha512-T1l7y/bCeL/kUwh9OD4PQT4aM7Bq43vX05htPJJ46RTI4r5KNt6qJRzAfNfM+OYMNEVBWQzR2Gyk+FXLZfogGw== - -"@rollup/rollup-win32-x64-msvc@4.14.3": - version "4.14.3" - resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.14.3.tgz#5b2fb4d8cd44c05deef8a7b0e6deb9ccb8939d18" - integrity sha512-/BypzV0H1y1HzgYpxqRaXGBRqfodgoBBCcsrujT6QRcakDQdfU+Lq9PENPh5jB4I44YWq+0C2eHsHya+nZY1sA== +"@rollup/rollup-android-arm-eabi@4.17.2": + version "4.17.2" + resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.17.2.tgz#1a32112822660ee104c5dd3a7c595e26100d4c2d" + integrity sha512-NM0jFxY8bB8QLkoKxIQeObCaDlJKewVlIEkuyYKm5An1tdVZ966w2+MPQ2l8LBZLjR+SgyV+nRkTIunzOYBMLQ== + +"@rollup/rollup-android-arm64@4.17.2": + version "4.17.2" + resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.17.2.tgz#5aeef206d65ff4db423f3a93f71af91b28662c5b" + integrity sha512-yeX/Usk7daNIVwkq2uGoq2BYJKZY1JfyLTaHO/jaiSwi/lsf8fTFoQW/n6IdAsx5tx+iotu2zCJwz8MxI6D/Bw== + +"@rollup/rollup-darwin-arm64@4.17.2": + version "4.17.2" + resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.17.2.tgz#6b66aaf003c70454c292cd5f0236ebdc6ffbdf1a" + integrity sha512-kcMLpE6uCwls023+kknm71ug7MZOrtXo+y5p/tsg6jltpDtgQY1Eq5sGfHcQfb+lfuKwhBmEURDga9N0ol4YPw== + +"@rollup/rollup-darwin-x64@4.17.2": + version "4.17.2" + resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.17.2.tgz#f64fc51ed12b19f883131ccbcea59fc68cbd6c0b" + integrity sha512-AtKwD0VEx0zWkL0ZjixEkp5tbNLzX+FCqGG1SvOu993HnSz4qDI6S4kGzubrEJAljpVkhRSlg5bzpV//E6ysTQ== + +"@rollup/rollup-linux-arm-gnueabihf@4.17.2": + version "4.17.2" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.17.2.tgz#1a7641111be67c10111f7122d1e375d1226cbf14" + integrity sha512-3reX2fUHqN7sffBNqmEyMQVj/CKhIHZd4y631duy0hZqI8Qoqf6lTtmAKvJFYa6bhU95B1D0WgzHkmTg33In0A== + +"@rollup/rollup-linux-arm-musleabihf@4.17.2": + version "4.17.2" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.17.2.tgz#c93fd632923e0fee25aacd2ae414288d0b7455bb" + integrity sha512-uSqpsp91mheRgw96xtyAGP9FW5ChctTFEoXP0r5FAzj/3ZRv3Uxjtc7taRQSaQM/q85KEKjKsZuiZM3GyUivRg== + +"@rollup/rollup-linux-arm64-gnu@4.17.2": + version "4.17.2" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.17.2.tgz#fa531425dd21d058a630947527b4612d9d0b4a4a" + integrity sha512-EMMPHkiCRtE8Wdk3Qhtciq6BndLtstqZIroHiiGzB3C5LDJmIZcSzVtLRbwuXuUft1Cnv+9fxuDtDxz3k3EW2A== + +"@rollup/rollup-linux-arm64-musl@4.17.2": + version "4.17.2" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.17.2.tgz#8acc16f095ceea5854caf7b07e73f7d1802ac5af" + integrity sha512-NMPylUUZ1i0z/xJUIx6VUhISZDRT+uTWpBcjdv0/zkp7b/bQDF+NfnfdzuTiB1G6HTodgoFa93hp0O1xl+/UbA== + +"@rollup/rollup-linux-powerpc64le-gnu@4.17.2": + version "4.17.2" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.17.2.tgz#94e69a8499b5cf368911b83a44bb230782aeb571" + integrity sha512-T19My13y8uYXPw/L/k0JYaX1fJKFT/PWdXiHr8mTbXWxjVF1t+8Xl31DgBBvEKclw+1b00Chg0hxE2O7bTG7GQ== + +"@rollup/rollup-linux-riscv64-gnu@4.17.2": + version "4.17.2" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.17.2.tgz#7ef1c781c7e59e85a6ce261cc95d7f1e0b56db0f" + integrity sha512-BOaNfthf3X3fOWAB+IJ9kxTgPmMqPPH5f5k2DcCsRrBIbWnaJCgX2ll77dV1TdSy9SaXTR5iDXRL8n7AnoP5cg== + +"@rollup/rollup-linux-s390x-gnu@4.17.2": + version "4.17.2" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.17.2.tgz#f15775841c3232fca9b78cd25a7a0512c694b354" + integrity sha512-W0UP/x7bnn3xN2eYMql2T/+wpASLE5SjObXILTMPUBDB/Fg/FxC+gX4nvCfPBCbNhz51C+HcqQp2qQ4u25ok6g== + +"@rollup/rollup-linux-x64-gnu@4.17.2": + version "4.17.2" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.17.2.tgz#b521d271798d037ad70c9f85dd97d25f8a52e811" + integrity sha512-Hy7pLwByUOuyaFC6mAr7m+oMC+V7qyifzs/nW2OJfC8H4hbCzOX07Ov0VFk/zP3kBsELWNFi7rJtgbKYsav9QQ== + +"@rollup/rollup-linux-x64-musl@4.17.2": + version "4.17.2" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.17.2.tgz#9254019cc4baac35800991315d133cc9fd1bf385" + integrity sha512-h1+yTWeYbRdAyJ/jMiVw0l6fOOm/0D1vNLui9iPuqgRGnXA0u21gAqOyB5iHjlM9MMfNOm9RHCQ7zLIzT0x11Q== + +"@rollup/rollup-win32-arm64-msvc@4.17.2": + version "4.17.2" + resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.17.2.tgz#27f65a89f6f52ee9426ec11e3571038e4671790f" + integrity sha512-tmdtXMfKAjy5+IQsVtDiCfqbynAQE/TQRpWdVataHmhMb9DCoJxp9vLcCBjEQWMiUYxO1QprH/HbY9ragCEFLA== + +"@rollup/rollup-win32-ia32-msvc@4.17.2": + version "4.17.2" + resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.17.2.tgz#a2fbf8246ed0bb014f078ca34ae6b377a90cb411" + integrity sha512-7II/QCSTAHuE5vdZaQEwJq2ZACkBpQDOmQsE6D6XUbnBHW8IAhm4eTufL6msLJorzrHDFv3CF8oCA/hSIRuZeQ== + +"@rollup/rollup-win32-x64-msvc@4.17.2": + version "4.17.2" + resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.17.2.tgz#5a2d08b81e8064b34242d5cc9973ef8dd1e60503" + integrity sha512-TGGO7v7qOq4CYmSBVEYpI1Y5xDuCEnbVC5Vth8mOsW0gDSzxNrVERPc790IGHsrT2dQSimgMr9Ub3Y1Jci5/8w== "@rollup/stream@3.0.1": version "3.0.1" @@ -6191,29 +6191,29 @@ rimraf@^3.0.2: dependencies: glob "^7.1.3" -rollup@4.14.3: - version "4.14.3" - resolved "https://registry.yarnpkg.com/rollup/-/rollup-4.14.3.tgz#bcbb7784b35826d3164346fa6d5aac95190d8ba9" - integrity sha512-ag5tTQKYsj1bhrFC9+OEWqb5O6VYgtQDO9hPDBMmIbePwhfSr+ExlcU741t8Dhw5DkPCQf6noz0jb36D6W9/hw== +rollup@4.17.2: + version "4.17.2" + resolved "https://registry.yarnpkg.com/rollup/-/rollup-4.17.2.tgz#26d1785d0144122277fdb20ab3a24729ae68301f" + integrity sha512-/9ClTJPByC0U4zNLowV1tMBe8yMEAxewtR3cUNX5BoEpGH3dQEWpJLr6CLp0fPdYRF/fzVOgvDb1zXuakwF5kQ== dependencies: "@types/estree" "1.0.5" optionalDependencies: - "@rollup/rollup-android-arm-eabi" "4.14.3" - "@rollup/rollup-android-arm64" "4.14.3" - "@rollup/rollup-darwin-arm64" "4.14.3" - "@rollup/rollup-darwin-x64" "4.14.3" - "@rollup/rollup-linux-arm-gnueabihf" "4.14.3" - "@rollup/rollup-linux-arm-musleabihf" "4.14.3" - "@rollup/rollup-linux-arm64-gnu" "4.14.3" - "@rollup/rollup-linux-arm64-musl" "4.14.3" - "@rollup/rollup-linux-powerpc64le-gnu" "4.14.3" - "@rollup/rollup-linux-riscv64-gnu" "4.14.3" - "@rollup/rollup-linux-s390x-gnu" "4.14.3" - "@rollup/rollup-linux-x64-gnu" "4.14.3" - "@rollup/rollup-linux-x64-musl" "4.14.3" - "@rollup/rollup-win32-arm64-msvc" "4.14.3" - "@rollup/rollup-win32-ia32-msvc" "4.14.3" - "@rollup/rollup-win32-x64-msvc" "4.14.3" + "@rollup/rollup-android-arm-eabi" "4.17.2" + "@rollup/rollup-android-arm64" "4.17.2" + "@rollup/rollup-darwin-arm64" "4.17.2" + "@rollup/rollup-darwin-x64" "4.17.2" + "@rollup/rollup-linux-arm-gnueabihf" "4.17.2" + "@rollup/rollup-linux-arm-musleabihf" "4.17.2" + "@rollup/rollup-linux-arm64-gnu" "4.17.2" + "@rollup/rollup-linux-arm64-musl" "4.17.2" + "@rollup/rollup-linux-powerpc64le-gnu" "4.17.2" + "@rollup/rollup-linux-riscv64-gnu" "4.17.2" + "@rollup/rollup-linux-s390x-gnu" "4.17.2" + "@rollup/rollup-linux-x64-gnu" "4.17.2" + "@rollup/rollup-linux-x64-musl" "4.17.2" + "@rollup/rollup-win32-arm64-msvc" "4.17.2" + "@rollup/rollup-win32-ia32-msvc" "4.17.2" + "@rollup/rollup-win32-x64-msvc" "4.17.2" fsevents "~2.3.2" run-parallel@^1.1.9: From 14c54bbfb7d9305e79a2c2d016c34a655773e5cb Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Wed, 1 May 2024 19:56:20 -0400 Subject: [PATCH 038/261] GH-41470: [C++] Reuse deduplication logic for direct registration (#41466) ### Rationale for this change As observed in https://github.com/apache/arrow/pull/41309 a crossbow job on mac is failing due to duplicate registration of a factory for the file:// scheme ### What changes are included in this PR? Deduplication of registered filesystem factories is applied to direct registration as well as when merging registries. ### Are these changes tested? No, we just need to verify that the problematic crossbow job is repaired. ### Are there any user-facing changes? No * GitHub Issue: #41470 Lead-authored-by: Benjamin Kietzman Co-authored-by: David Li Signed-off-by: David Li --- cpp/src/arrow/filesystem/filesystem.cc | 4 ++-- cpp/src/arrow/filesystem/localfs_test.cc | 7 ++++--- dev/tasks/java-jars/github.yml | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/filesystem/filesystem.cc b/cpp/src/arrow/filesystem/filesystem.cc index b79af08385c0c..284be685fa800 100644 --- a/cpp/src/arrow/filesystem/filesystem.cc +++ b/cpp/src/arrow/filesystem/filesystem.cc @@ -761,8 +761,8 @@ class FileSystemFactoryRegistry { RETURN_NOT_OK(CheckValid()); auto [it, success] = scheme_to_factory_.emplace( - std::move(scheme), Registered{std::move(factory), std::move(finalizer)}); - if (success) { + std::move(scheme), Registered{factory, std::move(finalizer)}); + if (success || (it->second.ok() && it->second->factory == factory)) { return Status::OK(); } diff --git a/cpp/src/arrow/filesystem/localfs_test.cc b/cpp/src/arrow/filesystem/localfs_test.cc index 1a20e44bc36e2..d68c992dff863 100644 --- a/cpp/src/arrow/filesystem/localfs_test.cc +++ b/cpp/src/arrow/filesystem/localfs_test.cc @@ -154,15 +154,16 @@ TEST(FileSystemFromUri, RuntimeRegisteredFactory) { EXPECT_THAT(FileSystemFromUri("slowfile2:///hey/yo", &path), Raises(StatusCode::Invalid)); - EXPECT_THAT(RegisterFileSystemFactory("slowfile2", {SlowFileSystemFactory, "", 0}), - Ok()); + EXPECT_THAT( + RegisterFileSystemFactory("slowfile2", {SlowFileSystemFactory, __FILE__, __LINE__}), + Ok()); ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFromUri("slowfile2:///hey/yo", &path)); EXPECT_EQ(path, "/hey/yo"); EXPECT_EQ(fs->type_name(), "slow"); EXPECT_THAT( - RegisterFileSystemFactory("slowfile2", {SlowFileSystemFactory, "", 0}), + RegisterFileSystemFactory("slowfile2", {SlowFileSystemFactory, __FILE__, __LINE__}), Raises(StatusCode::KeyError, testing::HasSubstr("Attempted to register factory for scheme 'slowfile2' " "but that scheme is already registered"))); diff --git a/dev/tasks/java-jars/github.yml b/dev/tasks/java-jars/github.yml index 0437ee7864979..eb9478ebaa6ef 100644 --- a/dev/tasks/java-jars/github.yml +++ b/dev/tasks/java-jars/github.yml @@ -80,7 +80,7 @@ jobs: fail-fast: false matrix: platform: - - { runs_on: ["macos-latest"], arch: "x86_64"} + - { runs_on: ["macos-13"], arch: "x86_64"} - { runs_on: ["macos-14"], arch: "aarch_64" } env: MACOSX_DEPLOYMENT_TARGET: "10.15" From 3c67091f93223f2d12f5a73d3e5bc51e7b389a00 Mon Sep 17 00:00:00 2001 From: Thomas A Caswell Date: Thu, 2 May 2024 08:18:21 -0400 Subject: [PATCH 039/261] GH-41491: [Python] remove special methods related to buffers in python <2.6 (#41492) ### Rationale for this change These methods are not actually used and will be removed from Cython in an upcoming release. Closes #41491 ### What changes are included in this PR? ### Are these changes tested? Trust CI ### Are there any user-facing changes? No, this code should never be actually used. * GitHub Issue: #41491 Authored-by: Thomas A Caswell Signed-off-by: Joris Van den Bossche --- python/pyarrow/io.pxi | 47 ++++++++++++------------------------------- 1 file changed, 13 insertions(+), 34 deletions(-) diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index 7890bf4b2dd76..9e8026deb435c 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -1446,27 +1446,6 @@ cdef class Buffer(_Weakrefable): buffer.strides = self.strides buffer.suboffsets = NULL - def __getsegcount__(self, Py_ssize_t *len_out): - if len_out != NULL: - len_out[0] = self.size - return 1 - - def __getreadbuffer__(self, Py_ssize_t idx, void **p): - if idx != 0: - raise SystemError("accessing nonexistent buffer segment") - if p != NULL: - p[0] = self.buffer.get().data() - return self.size - - def __getwritebuffer__(self, Py_ssize_t idx, void **p): - if not self.buffer.get().is_mutable(): - raise SystemError("trying to write an immutable buffer") - if idx != 0: - raise SystemError("accessing nonexistent buffer segment") - if p != NULL: - p[0] = self.buffer.get().data() - return self.size - cdef class ResizableBuffer(Buffer): """ @@ -2142,21 +2121,21 @@ cdef class CacheOptions(_Weakrefable): Parameters ---------- hole_size_limit : int, default 8KiB - The maximum distance in bytes between two consecutive ranges; beyond + The maximum distance in bytes between two consecutive ranges; beyond this value, ranges are not combined. range_size_limit : int, default 32MiB - The maximum size in bytes of a combined range; if combining two - consecutive ranges would produce a range of a size greater than this, + The maximum size in bytes of a combined range; if combining two + consecutive ranges would produce a range of a size greater than this, they are not combined lazy : bool, default True lazy = false: request all byte ranges when PreBuffer or WillNeed is called. - lazy = True, prefetch_limit = 0: request merged byte ranges only after the reader - needs them. - lazy = True, prefetch_limit = k: prefetch up to k merged byte ranges ahead of the + lazy = True, prefetch_limit = 0: request merged byte ranges only after the reader + needs them. + lazy = True, prefetch_limit = k: prefetch up to k merged byte ranges ahead of the range that is currently being read. prefetch_limit : int, default 0 - The maximum number of ranges to be prefetched. This is only used for - lazy cache to asynchronously read some ranges after reading the target + The maximum number of ranges to be prefetched. This is only used for + lazy cache to asynchronously read some ranges after reading the target range. """ @@ -2227,19 +2206,19 @@ cdef class CacheOptions(_Weakrefable): """ Create suiteable CacheOptions based on provided network metrics. - Typically this will be used with object storage solutions like Amazon S3, + Typically this will be used with object storage solutions like Amazon S3, Google Cloud Storage and Azure Blob Storage. Parameters ---------- time_to_first_byte_millis : int - Seek-time or Time-To-First-Byte (TTFB) in milliseconds, also called call - setup latency of a new read request. The value is a positive integer. + Seek-time or Time-To-First-Byte (TTFB) in milliseconds, also called call + setup latency of a new read request. The value is a positive integer. transfer_bandwidth_mib_per_sec : int - Data transfer Bandwidth (BW) in MiB/sec (per connection). The value is a positive + Data transfer Bandwidth (BW) in MiB/sec (per connection). The value is a positive integer. ideal_bandwidth_utilization_frac : int, default 0.9 - Transfer bandwidth utilization fraction (per connection) to maximize the net + Transfer bandwidth utilization fraction (per connection) to maximize the net data load. The value is a positive float less than 1. max_ideal_request_size_mib : int, default 64 The maximum single data request size (in MiB) to maximize the net data load. From 49bf3d9bf2ca266fcf63dca1e57bdb83c9559b72 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 3 May 2024 06:10:20 +0900 Subject: [PATCH 040/261] GH-41467: [CI][Release] Don't push conda-verify-rc image (#41468) ### Rationale for this change Because it uses ubuntu:20.04 image directly. We don't build our image for it. ### What changes are included in this PR? Don't push an image for `conda-verify-rc`. ### Are these changes tested? No. ### Are there any user-facing changes? No. * GitHub Issue: #41467 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- dev/tasks/verify-rc/github.linux.amd64.docker.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/tasks/verify-rc/github.linux.amd64.docker.yml b/dev/tasks/verify-rc/github.linux.amd64.docker.yml index 65b30b5c8d4df..7a28ba705dd50 100644 --- a/dev/tasks/verify-rc/github.linux.amd64.docker.yml +++ b/dev/tasks/verify-rc/github.linux.amd64.docker.yml @@ -43,7 +43,7 @@ jobs: -e TEST_{{ target|upper }}=1 \ {{ distro }}-verify-rc - {% if arrow.is_default_branch() %} + {% if arrow.is_default_branch() and distro != "conda" %} {{ macros.github_login_dockerhub()|indent }} - name: Push Docker Image shell: bash From 71e38fc8a9fdf102a5136793b738d7650ca053fa Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 3 May 2024 06:50:42 +0900 Subject: [PATCH 041/261] MINOR: [JS] Bump @typescript-eslint/eslint-plugin from 7.7.0 to 7.8.0 in /js (#41485) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [@ typescript-eslint/eslint-plugin](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/eslint-plugin) from 7.7.0 to 7.8.0.
Release notes

Sourced from @​typescript-eslint/eslint-plugin's releases.

v7.8.0

7.8.0 (2024-04-29)

🚀 Features

  • rule-tester: assert suggestion messages are unique (#8995)
  • typescript-estree: add maximumDefaultProjectFileMatchCount and wide allowDefaultProjectForFiles glob restrictions (#8925)

🩹 Fixes

  • eslint-plugin: [no-unsafe-argument] handle tagged templates (#8746)
  • eslint-plugin: [prefer-optional-chain] suggests optional chaining during strict null equality check (#8717)
  • eslint-plugin: [consistent-type-assertions] handle tagged templates (#8993)
  • eslint-plugin: [no-unsafe-return] handle union types (#9001)
  • eslint-plugin: [no-unused-vars] clear error report range (#8640)
  • utils: export ESLint backwards-compat functions (#8976)

❤️ Thank You

You can read about our versioning strategy and releases on our website.

v7.7.1

7.7.1 (2024-04-22)

🩹 Fixes

  • eslint-plugin: [no-unsafe-assignment] handle shorthand property assignment (#8800)
  • eslint-plugin: [explicit-function-return-type] fix checking wrong ancestor's return type (#8809)
  • eslint-plugin: [prefer-optional-chain] only look at left operand for requireNullish (#8559)
  • eslint-plugin: [no-for-in-array] refine report location (#8874)
  • eslint-plugin: [no-unnecessary-type-assertion] allow non-null assertion for void type (#8912)

❤️ Thank You

You can read about our versioning strategy and releases on our website.

Changelog

Sourced from @​typescript-eslint/eslint-plugin's changelog.

7.8.0 (2024-04-29)

🩹 Fixes

  • eslint-plugin: [no-unsafe-argument] handle tagged templates

  • eslint-plugin: [prefer-optional-chain] suggests optional chaining during strict null equality check

  • eslint-plugin: [consistent-type-assertions] handle tagged templates

  • eslint-plugin: [no-unsafe-return] handle union types

  • eslint-plugin: [no-unused-vars] clear error report range

❤️ Thank You

  • auvred
  • Josh Goldberg ✨
  • jsfm01
  • Kim Sang Du
  • YeonJuan

You can read about our versioning strategy and releases on our website.

7.7.1 (2024-04-22)

🩹 Fixes

  • eslint-plugin: [no-unsafe-assignment] handle shorthand property assignment

  • eslint-plugin: [explicit-function-return-type] fix checking wrong ancestor's return type

  • eslint-plugin: [prefer-optional-chain] only look at left operand for requireNullish

  • eslint-plugin: [no-for-in-array] refine report location

  • eslint-plugin: [no-unnecessary-type-assertion] allow non-null assertion for void type

❤️ Thank You

  • Abraham Guo
  • Kirk Waiblinger
  • YeonJuan

You can read about our versioning strategy and releases on our website.

Commits
  • ee677f6 chore(release): publish 7.8.0
  • 8127873 fix(eslint-plugin): [no-unused-vars] clear error report range (#8640)
  • 216d1b0 fix(eslint-plugin): [no-unsafe-return] handle union types (#9001)
  • 51d2193 fix(eslint-plugin): [consistent-type-assertions] handle tagged templates (#8993)
  • 4bed24d fix(eslint-plugin): [prefer-optional-chain] suggests optional chaining during...
  • b0f7aa4 fix(eslint-plugin): [no-unsafe-argument] handle tagged templates (#8746)
  • 219b841 chore: resolve lint issues on main branch (#8966)
  • 3e19436 chore(release): publish 7.7.1
  • b2552ca fix(eslint-plugin): [no-unnecessary-type-assertion] allow non-null assertion ...
  • fdeba42 fix(eslint-plugin): [no-for-in-array] refine report location (#8874)
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=@ typescript-eslint/eslint-plugin&package-manager=npm_and_yarn&previous-version=7.7.0&new-version=7.8.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- js/package.json | 2 +- js/yarn.lock | 77 +++++++++++++++++++++++++++++++++++-------------- 2 files changed, 57 insertions(+), 22 deletions(-) diff --git a/js/package.json b/js/package.json index 8cab229521f79..e9590a188820f 100644 --- a/js/package.json +++ b/js/package.json @@ -72,7 +72,7 @@ "@types/glob": "8.1.0", "@types/jest": "29.5.12", "@types/multistream": "4.1.3", - "@typescript-eslint/eslint-plugin": "7.7.0", + "@typescript-eslint/eslint-plugin": "7.8.0", "@typescript-eslint/parser": "7.7.0", "async-done": "2.0.0", "benny": "3.7.1", diff --git a/js/yarn.lock b/js/yarn.lock index fe483ab8aca36..ab092675b4806 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -1436,16 +1436,16 @@ dependencies: "@types/yargs-parser" "*" -"@typescript-eslint/eslint-plugin@7.7.0": - version "7.7.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-7.7.0.tgz#bf34a02f221811505b8bf2f31060c8560c1bb0a3" - integrity sha512-GJWR0YnfrKnsRoluVO3PRb9r5aMZriiMMM/RHj5nnTrBy1/wIgk76XCtCKcnXGjpZQJQRFtGV9/0JJ6n30uwpQ== +"@typescript-eslint/eslint-plugin@7.8.0": + version "7.8.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-7.8.0.tgz#c78e309fe967cb4de05b85cdc876fb95f8e01b6f" + integrity sha512-gFTT+ezJmkwutUPmB0skOj3GZJtlEGnlssems4AjkVweUPGj7jRwwqg0Hhg7++kPGJqKtTYx+R05Ftww372aIg== dependencies: "@eslint-community/regexpp" "^4.10.0" - "@typescript-eslint/scope-manager" "7.7.0" - "@typescript-eslint/type-utils" "7.7.0" - "@typescript-eslint/utils" "7.7.0" - "@typescript-eslint/visitor-keys" "7.7.0" + "@typescript-eslint/scope-manager" "7.8.0" + "@typescript-eslint/type-utils" "7.8.0" + "@typescript-eslint/utils" "7.8.0" + "@typescript-eslint/visitor-keys" "7.8.0" debug "^4.3.4" graphemer "^1.4.0" ignore "^5.3.1" @@ -1480,13 +1480,21 @@ "@typescript-eslint/types" "7.7.0" "@typescript-eslint/visitor-keys" "7.7.0" -"@typescript-eslint/type-utils@7.7.0": - version "7.7.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/type-utils/-/type-utils-7.7.0.tgz#36792ff4209a781b058de61631a48df17bdefbc5" - integrity sha512-bOp3ejoRYrhAlnT/bozNQi3nio9tIgv3U5C0mVDdZC7cpcQEDZXvq8inrHYghLVwuNABRqrMW5tzAv88Vy77Sg== +"@typescript-eslint/scope-manager@7.8.0": + version "7.8.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-7.8.0.tgz#bb19096d11ec6b87fb6640d921df19b813e02047" + integrity sha512-viEmZ1LmwsGcnr85gIq+FCYI7nO90DVbE37/ll51hjv9aG+YZMb4WDE2fyWpUR4O/UrhGRpYXK/XajcGTk2B8g== dependencies: - "@typescript-eslint/typescript-estree" "7.7.0" - "@typescript-eslint/utils" "7.7.0" + "@typescript-eslint/types" "7.8.0" + "@typescript-eslint/visitor-keys" "7.8.0" + +"@typescript-eslint/type-utils@7.8.0": + version "7.8.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/type-utils/-/type-utils-7.8.0.tgz#9de166f182a6e4d1c5da76e94880e91831e3e26f" + integrity sha512-H70R3AefQDQpz9mGv13Uhi121FNMh+WEaRqcXTX09YEDky21km4dV1ZXJIp8QjXc4ZaVkXVdohvWDzbnbHDS+A== + dependencies: + "@typescript-eslint/typescript-estree" "7.8.0" + "@typescript-eslint/utils" "7.8.0" debug "^4.3.4" ts-api-utils "^1.3.0" @@ -1500,6 +1508,11 @@ resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.7.0.tgz#23af4d24bf9ce15d8d301236e3e3014143604f27" integrity sha512-G01YPZ1Bd2hn+KPpIbrAhEWOn5lQBrjxkzHkWvP6NucMXFtfXoevK82hzQdpfuQYuhkvFDeQYbzXCjR1z9Z03w== +"@typescript-eslint/types@7.8.0": + version "7.8.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.8.0.tgz#1fd2577b3ad883b769546e2d1ef379f929a7091d" + integrity sha512-wf0peJ+ZGlcH+2ZS23aJbOv+ztjeeP8uQ9GgwMJGVLx/Nj9CJt17GWgWWoSmoRVKAX2X+7fzEnAjxdvK2gqCLw== + "@typescript-eslint/typescript-estree@5.62.0": version "5.62.0" resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-5.62.0.tgz#7d17794b77fabcac615d6a48fb143330d962eb9b" @@ -1527,17 +1540,31 @@ semver "^7.6.0" ts-api-utils "^1.3.0" -"@typescript-eslint/utils@7.7.0": - version "7.7.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-7.7.0.tgz#3d2b6606a60ac34f3c625facfb3b3ab7e126f58d" - integrity sha512-LKGAXMPQs8U/zMRFXDZOzmMKgFv3COlxUQ+2NMPhbqgVm6R1w+nU1i4836Pmxu9jZAuIeyySNrN/6Rc657ggig== +"@typescript-eslint/typescript-estree@7.8.0": + version "7.8.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-7.8.0.tgz#b028a9226860b66e623c1ee55cc2464b95d2987c" + integrity sha512-5pfUCOwK5yjPaJQNy44prjCwtr981dO8Qo9J9PwYXZ0MosgAbfEMB008dJ5sNo3+/BN6ytBPuSvXUg9SAqB0dg== + dependencies: + "@typescript-eslint/types" "7.8.0" + "@typescript-eslint/visitor-keys" "7.8.0" + debug "^4.3.4" + globby "^11.1.0" + is-glob "^4.0.3" + minimatch "^9.0.4" + semver "^7.6.0" + ts-api-utils "^1.3.0" + +"@typescript-eslint/utils@7.8.0": + version "7.8.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-7.8.0.tgz#57a79f9c0c0740ead2f622e444cfaeeb9fd047cd" + integrity sha512-L0yFqOCflVqXxiZyXrDr80lnahQfSOfc9ELAAZ75sqicqp2i36kEZZGuUymHNFoYOqxRT05up760b4iGsl02nQ== dependencies: "@eslint-community/eslint-utils" "^4.4.0" "@types/json-schema" "^7.0.15" "@types/semver" "^7.5.8" - "@typescript-eslint/scope-manager" "7.7.0" - "@typescript-eslint/types" "7.7.0" - "@typescript-eslint/typescript-estree" "7.7.0" + "@typescript-eslint/scope-manager" "7.8.0" + "@typescript-eslint/types" "7.8.0" + "@typescript-eslint/typescript-estree" "7.8.0" semver "^7.6.0" "@typescript-eslint/utils@^5.10.0": @@ -1570,6 +1597,14 @@ "@typescript-eslint/types" "7.7.0" eslint-visitor-keys "^3.4.3" +"@typescript-eslint/visitor-keys@7.8.0": + version "7.8.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-7.8.0.tgz#7285aab991da8bee411a42edbd5db760d22fdd91" + integrity sha512-q4/gibTNBQNA0lGyYQCmWRS5D15n8rXh4QjK3KV+MBPlTYHpfBUT3D3PaPR/HeNiI9W6R7FvlkcGhNyAoP+caA== + dependencies: + "@typescript-eslint/types" "7.8.0" + eslint-visitor-keys "^3.4.3" + "@ungap/structured-clone@^1.2.0": version "1.2.0" resolved "https://registry.yarnpkg.com/@ungap/structured-clone/-/structured-clone-1.2.0.tgz#756641adb587851b5ccb3e095daf27ae581c8406" From 9749d7d653e1b106d0662624b22b2982b3ad0516 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Thu, 2 May 2024 22:02:00 -0300 Subject: [PATCH 042/261] GH-39798: [C++] Optimize Take for fixed-size types including nested fixed-size lists (#41297) ### Rationale for this change Introduce utilities for dealing with fixed-width types (including fixed-size lists of fixed-width types) generically. And use it for initial optimizations of `Take` and `Filter`. ### What changes are included in this PR? - [x] Introduce utilities for dealing with fixed-width types generically - [x] Use faster `Take` kernel on small power-of-2 byte widths of fixed-width types - [x] from `FSLTakeExec` (including FSLs of FSBs) - [x] from `FSBTakeExec` (done before this PR) - [x] ~Take on any fixed-width type~ (as a separate issue #41301) - [x] Use faster `Filter` kernel on both primitive and fixed-width types of any length - [x] from `FSLFilterExec` (including FSLs of FSBs) - [x] from `FSBFilterExec` (done before this PR) ### Are these changes tested? By existing and new tests. ### Are there any user-facing changes? Some functions added to the `arrow::util` namespace and documented inline. * GitHub Issue: #39798 Authored-by: Felipe Oliveira Carvalho Signed-off-by: Felipe Oliveira Carvalho --- cpp/src/arrow/CMakeLists.txt | 1 + .../vector_selection_filter_internal.cc | 30 +- .../kernels/vector_selection_internal.cc | 56 +++- .../kernels/vector_selection_internal.h | 7 +- .../kernels/vector_selection_take_internal.cc | 39 ++- .../compute/kernels/vector_selection_test.cc | 156 +++++++-- cpp/src/arrow/util/CMakeLists.txt | 1 + cpp/src/arrow/util/fixed_width_internal.cc | 226 +++++++++++++ cpp/src/arrow/util/fixed_width_internal.h | 307 ++++++++++++++++++ cpp/src/arrow/util/fixed_width_test.cc | 217 +++++++++++++ cpp/src/arrow/util/fixed_width_test_util.h | 203 ++++++++++++ 11 files changed, 1171 insertions(+), 72 deletions(-) create mode 100644 cpp/src/arrow/util/fixed_width_internal.cc create mode 100644 cpp/src/arrow/util/fixed_width_internal.h create mode 100644 cpp/src/arrow/util/fixed_width_test.cc create mode 100644 cpp/src/arrow/util/fixed_width_test_util.h diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 2ef82dd614f84..5d61112518f5e 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -501,6 +501,7 @@ set(ARROW_UTIL_SRCS util/decimal.cc util/delimiting.cc util/dict_util.cc + util/fixed_width_internal.cc util/float16.cc util/formatting.cc util/future.cc diff --git a/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc index 8825d697fdf77..d5e5e5ad289ac 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc @@ -40,6 +40,7 @@ #include "arrow/util/bit_run_reader.h" #include "arrow/util/bit_util.h" #include "arrow/util/bitmap_ops.h" +#include "arrow/util/fixed_width_internal.h" namespace arrow { @@ -158,9 +159,11 @@ class PrimitiveFilterImpl { PrimitiveFilterImpl(const ArraySpan& values, const ArraySpan& filter, FilterOptions::NullSelectionBehavior null_selection, ArrayData* out_arr) - : byte_width_(values.type->byte_width()), + : byte_width_(util::FixedWidthInBytes(*values.type)), values_is_valid_(values.buffers[0].data), - values_data_(values.buffers[1].data), + // No offset applied for boolean because it's a bitmap + values_data_(kIsBoolean ? values.buffers[1].data + : util::OffsetPointerOfFixedWidthValues(values)), values_null_count_(values.null_count), values_offset_(values.offset), values_length_(values.length), @@ -169,17 +172,13 @@ class PrimitiveFilterImpl { if constexpr (kByteWidth >= 0 && !kIsBoolean) { DCHECK_EQ(kByteWidth, byte_width_); } - if constexpr (!kIsBoolean) { - // No offset applied for boolean because it's a bitmap - values_data_ += values.offset * byte_width(); - } + DCHECK_EQ(out_arr->offset, 0); if (out_arr->buffers[0] != nullptr) { // May be unallocated if neither filter nor values contain nulls out_is_valid_ = out_arr->buffers[0]->mutable_data(); } - out_data_ = out_arr->buffers[1]->mutable_data(); - DCHECK_EQ(out_arr->offset, 0); + out_data_ = util::MutableFixedWidthValuesPointer(out_arr); out_length_ = out_arr->length; out_position_ = 0; } @@ -416,7 +415,7 @@ class PrimitiveFilterImpl { out_position_ += length; } - constexpr int32_t byte_width() const { + constexpr int64_t byte_width() const { if constexpr (kByteWidth >= 0) { return kByteWidth; } else { @@ -425,7 +424,7 @@ class PrimitiveFilterImpl { } private: - int32_t byte_width_; + int64_t byte_width_; const uint8_t* values_is_valid_; const uint8_t* values_data_; int64_t values_null_count_; @@ -439,6 +438,8 @@ class PrimitiveFilterImpl { int64_t out_position_; }; +} // namespace + Status PrimitiveFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { const ArraySpan& values = batch[0].array; const ArraySpan& filter = batch[1].array; @@ -468,9 +469,10 @@ Status PrimitiveFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult // validity bitmap. const bool allocate_validity = values.null_count != 0 || !filter_null_count_is_zero; - const int bit_width = values.type->bit_width(); - RETURN_NOT_OK(PreallocatePrimitiveArrayData(ctx, output_length, bit_width, - allocate_validity, out_arr)); + DCHECK(util::IsFixedWidthLike(values, /*force_null_count=*/false)); + const int64_t bit_width = util::FixedWidthInBits(*values.type); + RETURN_NOT_OK(util::internal::PreallocateFixedWidthArrayData( + ctx, output_length, /*source=*/values, allocate_validity, out_arr)); switch (bit_width) { case 1: @@ -505,6 +507,8 @@ Status PrimitiveFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult return Status::OK(); } +namespace { + // ---------------------------------------------------------------------- // Optimized filter for base binary types (32-bit and 64-bit) diff --git a/cpp/src/arrow/compute/kernels/vector_selection_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_internal.cc index a0fe2808e3e4e..93cd5060348db 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_internal.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_internal.cc @@ -37,6 +37,7 @@ #include "arrow/util/bit_block_counter.h" #include "arrow/util/bit_run_reader.h" #include "arrow/util/bit_util.h" +#include "arrow/util/fixed_width_internal.h" #include "arrow/util/int_util.h" #include "arrow/util/logging.h" #include "arrow/util/ree_util.h" @@ -65,24 +66,6 @@ void RegisterSelectionFunction(const std::string& name, FunctionDoc doc, DCHECK_OK(registry->AddFunction(std::move(func))); } -Status PreallocatePrimitiveArrayData(KernelContext* ctx, int64_t length, int bit_width, - bool allocate_validity, ArrayData* out) { - // Preallocate memory - out->length = length; - out->buffers.resize(2); - - if (allocate_validity) { - ARROW_ASSIGN_OR_RAISE(out->buffers[0], ctx->AllocateBitmap(length)); - } - if (bit_width == 1) { - ARROW_ASSIGN_OR_RAISE(out->buffers[1], ctx->AllocateBitmap(length)); - } else { - ARROW_ASSIGN_OR_RAISE(out->buffers[1], - ctx->Allocate(bit_util::BytesForBits(length * bit_width))); - } - return Status::OK(); -} - namespace { /// \brief Iterate over a REE filter, emitting ranges of a plain values array that @@ -909,6 +892,20 @@ Status LargeListFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult } Status FSLFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { + const ArraySpan& values = batch[0].array; + + // If a FixedSizeList wraps a fixed-width type we can, in some cases, use + // PrimitiveFilterExec for a fixed-size list array. + if (util::IsFixedWidthLike(values, + /*force_null_count=*/true, + /*exclude_dictionary=*/true)) { + const auto byte_width = util::FixedWidthInBytes(*values.type); + // 0 is a valid byte width for FixedSizeList, but PrimitiveFilterExec + // might not handle it correctly. + if (byte_width > 0) { + return PrimitiveFilterExec(ctx, batch, out); + } + } return FilterExec(ctx, batch, out); } @@ -968,6 +965,29 @@ Status LargeListTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* } Status FSLTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { + const ArraySpan& values = batch[0].array; + + // If a FixedSizeList wraps a fixed-width type we can, in some cases, use + // PrimitiveTakeExec for a fixed-size list array. + if (util::IsFixedWidthLike(values, + /*force_null_count=*/true, + /*exclude_dictionary=*/true)) { + const auto byte_width = util::FixedWidthInBytes(*values.type); + // Additionally, PrimitiveTakeExec is only implemented for specific byte widths. + // TODO(GH-41301): Extend PrimitiveTakeExec for any fixed-width type. + switch (byte_width) { + case 1: + case 2: + case 4: + case 8: + case 16: + case 32: + return PrimitiveTakeExec(ctx, batch, out); + default: + break; // fallback to TakeExec + } + } + return TakeExec(ctx, batch, out); } diff --git a/cpp/src/arrow/compute/kernels/vector_selection_internal.h b/cpp/src/arrow/compute/kernels/vector_selection_internal.h index 95f3e51cd67e3..a169f4b38a2b8 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_internal.h +++ b/cpp/src/arrow/compute/kernels/vector_selection_internal.h @@ -45,12 +45,6 @@ void RegisterSelectionFunction(const std::string& name, FunctionDoc doc, const FunctionOptions* default_options, FunctionRegistry* registry); -/// \brief Allocate an ArrayData for a primitive array with a given length and bit width -/// -/// \param[in] bit_width 1 or a multiple of 8 -Status PreallocatePrimitiveArrayData(KernelContext* ctx, int64_t length, int bit_width, - bool allocate_validity, ArrayData* out); - /// \brief Callback type for VisitPlainxREEFilterOutputSegments. /// /// position is the logical position in the values array relative to its offset. @@ -70,6 +64,7 @@ void VisitPlainxREEFilterOutputSegments( FilterOptions::NullSelectionBehavior null_selection, const EmitREEFilterSegment& emit_segment); +Status PrimitiveFilterExec(KernelContext*, const ExecSpan&, ExecResult*); Status ListFilterExec(KernelContext*, const ExecSpan&, ExecResult*); Status LargeListFilterExec(KernelContext*, const ExecSpan&, ExecResult*); Status FSLFilterExec(KernelContext*, const ExecSpan&, ExecResult*); diff --git a/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc index 5cd3710828485..48a2de9936cd4 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc @@ -37,6 +37,7 @@ #include "arrow/util/bit_block_counter.h" #include "arrow/util/bit_run_reader.h" #include "arrow/util/bit_util.h" +#include "arrow/util/fixed_width_internal.h" #include "arrow/util/int_util.h" #include "arrow/util/ree_util.h" @@ -323,7 +324,7 @@ namespace { using TakeState = OptionsWrapper; // ---------------------------------------------------------------------- -// Implement optimized take for primitive types from boolean to 1/2/4/8-byte +// Implement optimized take for primitive types from boolean to 1/2/4/8/16/32-byte // C-type based types. Use common implementation for every byte width and only // generate code for unsigned integer indices, since after boundschecking to // check for negative numbers in the indices we can safely reinterpret_cast @@ -333,16 +334,20 @@ using TakeState = OptionsWrapper; /// use the logical Arrow type but rather the physical C type. This way we /// only generate one take function for each byte width. /// -/// This function assumes that the indices have been boundschecked. +/// Also note that this function can also handle fixed-size-list arrays if +/// they fit the criteria described in fixed_width_internal.h, so use the +/// function defined in that file to access values and destination pointers +/// and DO NOT ASSUME `values.type()` is a primitive type. +/// +/// \pre the indices have been boundschecked template struct PrimitiveTakeImpl { static constexpr int kValueWidth = ValueWidthConstant::value; static void Exec(const ArraySpan& values, const ArraySpan& indices, ArrayData* out_arr) { - DCHECK_EQ(values.type->byte_width(), kValueWidth); - const auto* values_data = - values.GetValues(1, 0) + kValueWidth * values.offset; + DCHECK_EQ(util::FixedWidthInBytes(*values.type), kValueWidth); + const auto* values_data = util::OffsetPointerOfFixedWidthValues(values); const uint8_t* values_is_valid = values.buffers[0].data; auto values_offset = values.offset; @@ -350,16 +355,15 @@ struct PrimitiveTakeImpl { const uint8_t* indices_is_valid = indices.buffers[0].data; auto indices_offset = indices.offset; - auto out = out_arr->GetMutableValues(1, 0) + kValueWidth * out_arr->offset; + DCHECK_EQ(out_arr->offset, 0); + auto* out = util::MutableFixedWidthValuesPointer(out_arr); auto out_is_valid = out_arr->buffers[0]->mutable_data(); - auto out_offset = out_arr->offset; - DCHECK_EQ(out_offset, 0); // If either the values or indices have nulls, we preemptively zero out the // out validity bitmap so that we don't have to use ClearBit in each // iteration for nulls. if (values.null_count != 0 || indices.null_count != 0) { - bit_util::SetBitsTo(out_is_valid, out_offset, indices.length, false); + bit_util::SetBitsTo(out_is_valid, 0, indices.length, false); } auto WriteValue = [&](int64_t position) { @@ -386,7 +390,7 @@ struct PrimitiveTakeImpl { valid_count += block.popcount; if (block.popcount == block.length) { // Fastest path: neither values nor index nulls - bit_util::SetBitsTo(out_is_valid, out_offset + position, block.length, true); + bit_util::SetBitsTo(out_is_valid, position, block.length, true); for (int64_t i = 0; i < block.length; ++i) { WriteValue(position); ++position; @@ -396,7 +400,7 @@ struct PrimitiveTakeImpl { for (int64_t i = 0; i < block.length; ++i) { if (bit_util::GetBit(indices_is_valid, indices_offset + position)) { // index is not null - bit_util::SetBit(out_is_valid, out_offset + position); + bit_util::SetBit(out_is_valid, position); WriteValue(position); } else { WriteZero(position); @@ -416,7 +420,7 @@ struct PrimitiveTakeImpl { values_offset + indices_data[position])) { // value is not null WriteValue(position); - bit_util::SetBit(out_is_valid, out_offset + position); + bit_util::SetBit(out_is_valid, position); ++valid_count; } else { WriteZero(position); @@ -433,7 +437,7 @@ struct PrimitiveTakeImpl { values_offset + indices_data[position])) { // index is not null && value is not null WriteValue(position); - bit_util::SetBit(out_is_valid, out_offset + position); + bit_util::SetBit(out_is_valid, position); ++valid_count; } else { WriteZero(position); @@ -584,14 +588,17 @@ Status PrimitiveTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* ArrayData* out_arr = out->array_data().get(); - const int bit_width = values.type->bit_width(); + DCHECK(util::IsFixedWidthLike(values, /*force_null_count=*/false, + /*exclude_dictionary=*/true)); + const int64_t bit_width = util::FixedWidthInBits(*values.type); // TODO: When neither values nor indices contain nulls, we can skip // allocating the validity bitmap altogether and save time and space. A // streamlined PrimitiveTakeImpl would need to be written that skips all // interactions with the output validity bitmap, though. - RETURN_NOT_OK(PreallocatePrimitiveArrayData(ctx, indices.length, bit_width, - /*allocate_validity=*/true, out_arr)); + RETURN_NOT_OK(util::internal::PreallocateFixedWidthArrayData( + ctx, indices.length, /*source=*/values, + /*allocate_validity=*/true, out_arr)); switch (bit_width) { case 1: TakeIndexDispatch(values, indices, out_arr); diff --git a/cpp/src/arrow/compute/kernels/vector_selection_test.cc b/cpp/src/arrow/compute/kernels/vector_selection_test.cc index ec94b328ea361..4c7d85b103f36 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_test.cc @@ -23,6 +23,7 @@ #include #include +#include "arrow/array/builder_nested.h" #include "arrow/array/concatenate.h" #include "arrow/chunked_array.h" #include "arrow/compute/api.h" @@ -32,6 +33,7 @@ #include "arrow/testing/gtest_util.h" #include "arrow/testing/random.h" #include "arrow/testing/util.h" +#include "arrow/util/fixed_width_test_util.h" #include "arrow/util/logging.h" namespace arrow { @@ -726,7 +728,37 @@ TEST_F(TestFilterKernelWithLargeList, FilterListInt32) { "[[1,2], null, null]"); } -class TestFilterKernelWithFixedSizeList : public TestFilterKernel {}; +class TestFilterKernelWithFixedSizeList : public TestFilterKernel { + protected: + std::vector> five_length_filters_ = { + ArrayFromJSON(boolean(), "[false, false, false, false, false]"), + ArrayFromJSON(boolean(), "[true, true, true, true, true]"), + ArrayFromJSON(boolean(), "[false, true, true, false, true]"), + ArrayFromJSON(boolean(), "[null, true, null, false, true]"), + }; + + void AssertFilterOnNestedLists(const std::shared_ptr& inner_type, + const std::vector& list_sizes) { + using NLG = ::arrow::util::internal::NestedListGenerator; + constexpr int64_t kLength = 5; + // Create two equivalent lists: one as a FixedSizeList and another as a List. + ASSERT_OK_AND_ASSIGN(auto fsl_list, + NLG::NestedFSLArray(inner_type, list_sizes, kLength)); + ASSERT_OK_AND_ASSIGN(auto list, + NLG::NestedListArray(inner_type, list_sizes, kLength)); + + ARROW_SCOPED_TRACE("CheckTakeOnNestedLists of type `", *fsl_list->type(), "`"); + + for (auto& filter : five_length_filters_) { + // Use the Filter on ListType as the reference implementation. + ASSERT_OK_AND_ASSIGN(auto expected_list, + Filter(*list, *filter, /*options=*/emit_null_)); + ASSERT_OK_AND_ASSIGN(auto expected_fsl, Cast(expected_list, fsl_list->type())); + auto expected_fsl_array = expected_fsl.make_array(); + this->AssertFilter(fsl_list, filter, expected_fsl_array); + } + } +}; TEST_F(TestFilterKernelWithFixedSizeList, FilterFixedSizeListInt32) { std::string list_json = "[null, [1, null, 3], [4, 5, 6], [7, 8, null]]"; @@ -740,6 +772,33 @@ TEST_F(TestFilterKernelWithFixedSizeList, FilterFixedSizeListInt32) { "[[1, null, 3], [7, 8, null]]"); } +TEST_F(TestFilterKernelWithFixedSizeList, FilterFixedSizeListVarWidth) { + std::string list_json = + R"([["zero", "one", ""], ["two", "", "three"], ["four", "five", "six"], ["seven", "eight", ""]])"; + this->AssertFilter(fixed_size_list(utf8(), 3), list_json, "[0, 0, 0, 0]", "[]"); + this->AssertFilter(fixed_size_list(utf8(), 3), list_json, "[0, 1, 1, null]", + R"([["two", "", "three"], ["four", "five", "six"], null])"); + this->AssertFilter(fixed_size_list(utf8(), 3), list_json, "[0, 0, 1, null]", + R"([["four", "five", "six"], null])"); + this->AssertFilter(fixed_size_list(utf8(), 3), list_json, "[1, 1, 1, 1]", list_json); + this->AssertFilter(fixed_size_list(utf8(), 3), list_json, "[0, 1, 0, 1]", + R"([["two", "", "three"], ["seven", "eight", ""]])"); +} + +TEST_F(TestFilterKernelWithFixedSizeList, FilterFixedSizeListModuloNesting) { + using NLG = ::arrow::util::internal::NestedListGenerator; + const std::vector> value_types = { + int16(), + int32(), + int64(), + }; + NLG::VisitAllNestedListConfigurations( + value_types, [this](const std::shared_ptr& inner_type, + const std::vector& list_sizes) { + this->AssertFilterOnNestedLists(inner_type, list_sizes); + }); +} + class TestFilterKernelWithMap : public TestFilterKernel {}; TEST_F(TestFilterKernelWithMap, FilterMapStringToInt32) { @@ -1034,29 +1093,34 @@ Status TakeJSON(const std::shared_ptr& type, const std::string& values .Value(out); } +void DoCheckTake(const std::shared_ptr& values, + const std::shared_ptr& indices, + const std::shared_ptr& expected) { + AssertTakeArrays(values, indices, expected); + + // Check sliced values + ASSERT_OK_AND_ASSIGN(auto values_filler, MakeArrayOfNull(values->type(), 2)); + ASSERT_OK_AND_ASSIGN(auto values_sliced, + Concatenate({values_filler, values, values_filler})); + values_sliced = values_sliced->Slice(2, values->length()); + AssertTakeArrays(values_sliced, indices, expected); + + // Check sliced indices + ASSERT_OK_AND_ASSIGN(auto zero, MakeScalar(indices->type(), int8_t{0})); + ASSERT_OK_AND_ASSIGN(auto indices_filler, MakeArrayFromScalar(*zero, 3)); + ASSERT_OK_AND_ASSIGN(auto indices_sliced, + Concatenate({indices_filler, indices, indices_filler})); + indices_sliced = indices_sliced->Slice(3, indices->length()); + AssertTakeArrays(values, indices_sliced, expected); +} + void CheckTake(const std::shared_ptr& type, const std::string& values_json, const std::string& indices_json, const std::string& expected_json) { auto values = ArrayFromJSON(type, values_json); auto expected = ArrayFromJSON(type, expected_json); - for (auto index_type : {int8(), uint32()}) { auto indices = ArrayFromJSON(index_type, indices_json); - AssertTakeArrays(values, indices, expected); - - // Check sliced values - ASSERT_OK_AND_ASSIGN(auto values_filler, MakeArrayOfNull(type, 2)); - ASSERT_OK_AND_ASSIGN(auto values_sliced, - Concatenate({values_filler, values, values_filler})); - values_sliced = values_sliced->Slice(2, values->length()); - AssertTakeArrays(values_sliced, indices, expected); - - // Check sliced indices - ASSERT_OK_AND_ASSIGN(auto zero, MakeScalar(index_type, int8_t{0})); - ASSERT_OK_AND_ASSIGN(auto indices_filler, MakeArrayFromScalar(*zero, 3)); - ASSERT_OK_AND_ASSIGN(auto indices_sliced, - Concatenate({indices_filler, indices, indices_filler})); - indices_sliced = indices_sliced->Slice(3, indices->length()); - AssertTakeArrays(values, indices_sliced, expected); + DoCheckTake(values, indices, expected); } } @@ -1427,7 +1491,25 @@ TEST_F(TestTakeKernelWithLargeList, TakeLargeListInt32) { CheckTake(large_list(int32()), list_json, "[null, 1, 2, 0]", "[null, [1,2], null, []]"); } -class TestTakeKernelWithFixedSizeList : public TestTakeKernelTyped {}; +class TestTakeKernelWithFixedSizeList : public TestTakeKernelTyped { + protected: + void CheckTakeOnNestedLists(const std::shared_ptr& inner_type, + const std::vector& list_sizes, int64_t length) { + using NLG = ::arrow::util::internal::NestedListGenerator; + // Create two equivalent lists: one as a FixedSizeList and another as a List. + ASSERT_OK_AND_ASSIGN(auto fsl_list, + NLG::NestedFSLArray(inner_type, list_sizes, length)); + ASSERT_OK_AND_ASSIGN(auto list, NLG::NestedListArray(inner_type, list_sizes, length)); + + ARROW_SCOPED_TRACE("CheckTakeOnNestedLists of type `", *fsl_list->type(), "`"); + + auto indices = ArrayFromJSON(int64(), "[1, 2, 4]"); + // Use the Take on ListType as the reference implementation. + ASSERT_OK_AND_ASSIGN(auto expected_list, Take(*list, *indices)); + ASSERT_OK_AND_ASSIGN(auto expected_fsl, Cast(*expected_list, fsl_list->type())); + DoCheckTake(fsl_list, indices, expected_fsl); + } +}; TEST_F(TestTakeKernelWithFixedSizeList, TakeFixedSizeListInt32) { std::string list_json = "[null, [1, null, 3], [4, 5, 6], [7, 8, null]]"; @@ -1449,6 +1531,42 @@ TEST_F(TestTakeKernelWithFixedSizeList, TakeFixedSizeListInt32) { "[0, 1, 0]"); } +TEST_F(TestTakeKernelWithFixedSizeList, TakeFixedSizeListVarWidth) { + std::string list_json = + R"([["zero", "one", ""], ["two", "", "three"], ["four", "five", "six"], ["seven", "eight", ""]])"; + CheckTake(fixed_size_list(utf8(), 3), list_json, "[]", "[]"); + CheckTake(fixed_size_list(utf8(), 3), list_json, "[3, 2, 1]", + R"([["seven", "eight", ""], ["four", "five", "six"], ["two", "", "three"]])"); + CheckTake(fixed_size_list(utf8(), 3), list_json, "[null, 2, 0]", + R"([null, ["four", "five", "six"], ["zero", "one", ""]])"); + CheckTake(fixed_size_list(utf8(), 3), list_json, R"([null, null])", "[null, null]"); + CheckTake( + fixed_size_list(utf8(), 3), list_json, "[3, 0, 0,3]", + R"([["seven", "eight", ""], ["zero", "one", ""], ["zero", "one", ""], ["seven", "eight", ""]])"); + CheckTake(fixed_size_list(utf8(), 3), list_json, "[0, 1, 2, 3]", list_json); + CheckTake(fixed_size_list(utf8(), 3), list_json, "[2, 2, 2, 2, 2, 2, 1]", + R"([ + ["four", "five", "six"], ["four", "five", "six"], + ["four", "five", "six"], ["four", "five", "six"], + ["four", "five", "six"], ["four", "five", "six"], + ["two", "", "three"] + ])"); +} + +TEST_F(TestTakeKernelWithFixedSizeList, TakeFixedSizeListModuloNesting) { + using NLG = ::arrow::util::internal::NestedListGenerator; + const std::vector> value_types = { + int16(), + int32(), + int64(), + }; + NLG::VisitAllNestedListConfigurations( + value_types, [this](const std::shared_ptr& inner_type, + const std::vector& list_sizes) { + this->CheckTakeOnNestedLists(inner_type, list_sizes, /*length=*/5); + }); +} + class TestTakeKernelWithMap : public TestTakeKernelTyped {}; TEST_F(TestTakeKernelWithMap, TakeMapStringToInt32) { diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt index e26efba28594b..087e4e3879e56 100644 --- a/cpp/src/arrow/util/CMakeLists.txt +++ b/cpp/src/arrow/util/CMakeLists.txt @@ -56,6 +56,7 @@ add_arrow_test(utility-test compression_test.cc decimal_test.cc float16_test.cc + fixed_width_test.cc formatting_util_test.cc key_value_metadata_test.cc hashing_test.cc diff --git a/cpp/src/arrow/util/fixed_width_internal.cc b/cpp/src/arrow/util/fixed_width_internal.cc new file mode 100644 index 0000000000000..164af3cff66b3 --- /dev/null +++ b/cpp/src/arrow/util/fixed_width_internal.cc @@ -0,0 +1,226 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include "arrow/array/data.h" +#include "arrow/compute/kernel.h" +#include "arrow/result.h" +#include "arrow/type.h" +#include "arrow/util/checked_cast.h" +#include "arrow/util/fixed_width_internal.h" +#include "arrow/util/logging.h" +#include "arrow/util/small_vector.h" + +namespace arrow::util { + +using ::arrow::internal::checked_cast; + +bool IsFixedWidthLike(const ArraySpan& source, bool force_null_count, + bool exclude_dictionary) { + return IsFixedWidthLike(source, force_null_count, + [exclude_dictionary](const DataType& type) { + return !exclude_dictionary || type.id() != Type::DICTIONARY; + }); +} + +static int64_t FixedWidthInBytesFallback(const FixedSizeListType& fixed_size_list_type) { + auto* fsl = &fixed_size_list_type; + int64_t list_size = fsl->list_size(); + for (auto type = fsl->value_type().get();;) { + if (type->id() == Type::FIXED_SIZE_LIST) { + fsl = checked_cast(type); + list_size *= fsl->list_size(); + type = fsl->value_type().get(); + continue; + } + if (type->id() != Type::BOOL && is_fixed_width(type->id())) { + const int64_t flat_byte_width = list_size * type->byte_width(); + DCHECK_GE(flat_byte_width, 0); + return flat_byte_width; + } + break; + } + return -1; +} + +int64_t FixedWidthInBytes(const DataType& type) { + auto type_id = type.id(); + if (is_fixed_width(type_id)) { + const int32_t num_bits = type.bit_width(); + return (type_id == Type::BOOL) ? -1 : num_bits / 8; + } + if (type_id == Type::FIXED_SIZE_LIST) { + auto& fsl = ::arrow::internal::checked_cast(type); + return FixedWidthInBytesFallback(fsl); + } + return -1; +} + +int64_t FixedWidthInBits(const DataType& type) { + auto type_id = type.id(); + if (is_fixed_width(type_id)) { + return type.bit_width(); + } + const int64_t byte_width = FixedWidthInBytes(type); + if (ARROW_PREDICT_FALSE(byte_width < 0)) { + return -1; + } + return byte_width * 8; +} + +namespace internal { + +Status PreallocateFixedWidthArrayData(::arrow::compute::KernelContext* ctx, + int64_t length, const ArraySpan& source, + bool allocate_validity, ArrayData* out) { + DCHECK(!source.MayHaveNulls() || allocate_validity) + << "allocate_validity cannot be false if source may have nulls"; + DCHECK_EQ(source.type->id(), out->type->id()); + auto* type = source.type; + out->length = length; + if (type->id() == Type::FIXED_SIZE_LIST) { + out->buffers.resize(1); + out->child_data = {std::make_shared()}; + } else { + out->buffers.resize(2); + } + if (allocate_validity) { + ARROW_ASSIGN_OR_RAISE(out->buffers[0], ctx->AllocateBitmap(length)); + } + + if (type->id() == Type::BOOL) { + ARROW_ASSIGN_OR_RAISE(out->buffers[1], ctx->AllocateBitmap(length)); + return Status::OK(); + } + if (is_fixed_width(type->id())) { + if (type->id() == Type::DICTIONARY) { + return Status::NotImplemented( + "PreallocateFixedWidthArrayData: DICTIONARY type allocation: ", *type); + } + ARROW_ASSIGN_OR_RAISE(out->buffers[1], + ctx->Allocate(length * source.type->byte_width())); + return Status::OK(); + } + if (type->id() == Type::FIXED_SIZE_LIST) { + auto& fsl_type = checked_cast(*type); + auto& value_type = fsl_type.value_type(); + if (ARROW_PREDICT_FALSE(value_type->id() == Type::BOOL)) { + return Status::Invalid("PreallocateFixedWidthArrayData: Invalid type: ", fsl_type); + } + if (ARROW_PREDICT_FALSE(value_type->id() == Type::DICTIONARY)) { + return Status::NotImplemented( + "PreallocateFixedWidthArrayData: DICTIONARY type allocation: ", *type); + } + if (source.child_data[0].MayHaveNulls()) { + return Status::Invalid( + "PreallocateFixedWidthArrayData: " + "FixedSizeList may have null values in child array: ", + fsl_type); + } + auto* child_values = out->child_data[0].get(); + child_values->type = value_type; + return PreallocateFixedWidthArrayData(ctx, length * fsl_type.list_size(), + /*source=*/source.child_data[0], + /*allocate_validity=*/false, + /*out=*/child_values); + } + return Status::Invalid("PreallocateFixedWidthArrayData: Invalid type: ", *type); +} + +} // namespace internal + +/// \pre same as OffsetPointerOfFixedWidthValues +/// \pre source.type->id() != Type::BOOL +static const uint8_t* OffsetPointerOfFixedWidthValuesFallback(const ArraySpan& source) { + using OffsetAndListSize = std::pair; + auto get_offset = [](auto pair) { return pair.first; }; + auto get_list_size = [](auto pair) { return pair.second; }; + ::arrow::internal::SmallVector stack; + + DCHECK_NE(source.type->id(), Type::BOOL); + + int64_t list_size = 1; + auto* array = &source; + while (array->type->id() == Type::FIXED_SIZE_LIST) { + list_size *= checked_cast(array->type)->list_size(); + stack.emplace_back(array->offset, list_size); + array = &array->child_data[0]; + } + // Now that innermost values were reached, pop the stack and calculate the offset + // in bytes of the innermost values buffer by considering the offset at each + // level of nesting. + DCHECK(array->type->id() != Type::BOOL && is_fixed_width(*array->type)); + DCHECK(array == &source || !array->MayHaveNulls()) + << "OffsetPointerOfFixedWidthValues: array is expected to be flat or have no " + "nulls in the arrays nested by FIXED_SIZE_LIST."; + int64_t value_width = array->type->byte_width(); + int64_t offset_in_bytes = array->offset * value_width; + for (auto it = stack.rbegin(); it != stack.rend(); ++it) { + value_width *= get_list_size(*it); + offset_in_bytes += get_offset(*it) * value_width; + } + return value_width < 0 ? nullptr : array->GetValues(1, offset_in_bytes); +} + +const uint8_t* OffsetPointerOfFixedWidthValues(const ArraySpan& source) { + auto type_id = source.type->id(); + if (is_fixed_width(type_id)) { + if (ARROW_PREDICT_FALSE(type_id == Type::BOOL)) { + // BOOL arrays are bit-packed, thus a byte-aligned pointer cannot be produced in the + // general case. Returning something for BOOL arrays that happen to byte-align + // because offset=0 would create too much confusion. + return nullptr; + } + return source.GetValues(1, 0) + source.offset * source.type->byte_width(); + } + return OffsetPointerOfFixedWidthValuesFallback(source); +} + +/// \brief Get the mutable pointer to the fixed-width values of an array +/// allocated by PreallocateFixedWidthArrayData. +/// +/// \pre mutable_array->offset and the offset of child array (if it's a +/// FixedSizeList) MUST be 0 (recursively). +/// \pre IsFixedWidthLike(ArraySpan(mutable_array)) or the more restrictive +/// is_fixed_width(*mutable_array->type) MUST be true +/// \return The mutable pointer to the fixed-width byte blocks of the array. If +/// pre-conditions are not satisfied, the return values is undefined. +uint8_t* MutableFixedWidthValuesPointer(ArrayData* mutable_array) { + auto type_id = mutable_array->type->id(); + if (type_id == Type::FIXED_SIZE_LIST) { + auto* array = mutable_array; + do { + DCHECK_EQ(array->offset, 0); + DCHECK_EQ(array->child_data.size(), 1) << array->type->ToString(true) << " part of " + << mutable_array->type->ToString(true); + array = array->child_data[0].get(); + } while (array->type->id() == Type::FIXED_SIZE_LIST); + DCHECK_EQ(array->offset, 0); + DCHECK(array->type->id() != Type::BOOL && is_fixed_width(*array->type)); + return array->GetMutableValues(1, 0); + } + DCHECK_EQ(mutable_array->offset, 0); + // BOOL is allowed here only because the offset is expected to be 0, + // so the byte-aligned pointer also points to the first *bit* of the buffer. + DCHECK(is_fixed_width(type_id)); + return mutable_array->GetMutableValues(1, 0); +} + +} // namespace arrow::util diff --git a/cpp/src/arrow/util/fixed_width_internal.h b/cpp/src/arrow/util/fixed_width_internal.h new file mode 100644 index 0000000000000..f6959485fbd01 --- /dev/null +++ b/cpp/src/arrow/util/fixed_width_internal.h @@ -0,0 +1,307 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "arrow/array/data.h" +#include "arrow/type.h" +#include "arrow/type_fwd.h" +#include "arrow/type_traits.h" + +namespace arrow::compute { +// XXX: remove dependency on compute::KernelContext +class KernelContext; +} // namespace arrow::compute + +namespace arrow::util { + +/// \brief Checks if the given array has a fixed-width type or if it's an array of +/// fixed-size list that can be flattened to an array of fixed-width values. +/// +/// Fixed-width types are the ones defined by the is_fixed_width() predicate in +/// type_traits.h. They are all the types that passes any of the following +/// predicates: +/// +/// - is_primitive() +/// - is_fixed_size_binary() +/// - is_dictionary() +/// +/// At least 3 types in this set require special care: +/// - `Type::BOOL` is fixed-width, but it's a 1-bit type and pointers to first bit +/// in boolean buffers are not always aligned to byte boundaries. +/// - `Type::DICTIONARY` is fixed-width because the indices are fixed-width, but the +/// dictionary values are not necessarily fixed-width and have to be managed +/// by separate operations. +/// - Type::FIXED_SIZE_BINARY unlike other fixed-width types, fixed-size binary +/// values are defined by a size attribute that is not known at compile time. +/// The other types have power-of-2 byte widths, while fixed-size binary can +/// have any byte width including 0. +/// +/// Additionally, we say that a type is "fixed-width like" if it's a fixed-width as +/// defined above, or if it's a fixed-size list (or nested fixed-size lists) and +/// the innermost type is fixed-width and the following restrictions also apply: +/// - The value type of the innermost fixed-size list is not BOOL (it has to be excluded +/// because a 1-bit type doesn't byte-align) +/// - Only the top-level array may have nulls, all the inner array have to be completely +/// free of nulls so we don't need to manage internal validity bitmaps. +/// +/// Take the following `fixed_size_list, 3>` array as an +/// example: +/// +/// [ +/// [[1, 2], [3, 4], [ 5, 6]], +/// null, +/// [[7, 8], [9, 10], [11, 12]] +/// ] +/// +/// in memory, it would look like: +/// +/// { +/// type: fixed_size_list, 3>, +/// length: 3, +/// null_count: 1, +/// offset: 0, +/// buffers: [ +/// 0: [0b00000101] +/// ], +/// child_data: [ +/// 0: { +/// type: fixed_size_list, +/// length: 9, +/// null_count: 0, +/// offset: 0, +/// buffers: [0: NULL], +/// child_data: [ +/// 0: { +/// type: int32, +/// length: 18, +/// null_count: 0, +/// offset: 0, +/// buffers: [ +/// 0: NULL, +/// 1: [ 1, 2, 3, 4, 5, 6, +/// 0, 0, 0, 0, 0, 0 +/// 7, 8, 9, 10, 11, 12 ] +/// ], +/// child_data: [] +/// } +/// ] +/// } +/// ] +/// } +/// +/// This layout fits the fixed-width like definition because the innermost type +/// is byte-aligned fixed-width (int32 = 4 bytes) and the internal arrays don't +/// have nulls. The validity bitmap is only needed at the top-level array. +/// +/// Writing to this array can be done in the same way writing to a flat fixed-width +/// array is done, by: +/// 1. Updating the validity bitmap at the top-level array if nulls are present. +/// 2. Updating a continuous fixed-width block of memory through a single pointer. +/// +/// The length of this block of memory is the product of the list sizes in the +/// `FixedSizeList` types and the byte width of the innermost fixed-width type: +/// +/// 3 * 2 * 4 = 24 bytes +/// +/// Writing the `[[1, 2], [3, 4], [5, 6]]` value at a given index can be done by +/// simply setting the validity bit to 1 and writing the 24-byte sequence of +/// integers `[1, 2, 3, 4, 5, 6]` to the memory block at `byte_ptr + index * 24`. +/// +/// The length of the top-level array fully defines the lengths that all the nested +/// arrays must have, which makes defining all the lengths as easy as defining the +/// length of the top-level array. +/// +/// length = 3 +/// child_data[0].length == 3 * 3 == 9 +/// child_data[0].child_data[0].length == 3 * 3 * 2 == 18 +/// +/// child_data[0].child_data[0].buffers[1].size() >= +/// (3 * (3 * 2 * sizeof(int32)) == 3 * 24 == 72) +/// +/// Dealing with offsets is a bit involved. Let's say the array described above has +/// the offsets 2, 5, and 7: +/// +/// { +/// type: fixed_size_list, 3>, +/// offset: 2, +/// ... +/// child_data: [ +/// 0: { +/// type: fixed_size_list, +/// offset: 5, +/// ... +/// child_data: [ +/// 0: { +/// type: int32, +/// offset: 7, +/// buffers: [ +/// 0: NULL, +/// 1: [ 1, 1, 1, 1, 1, 1, 1, // 7 values skipped +/// 0,1, 0,1, 0,1, 0,1, 0,1, // 5 [x,x] values skipped +/// +/// 0,0,0,0,0,1, // +/// 0,0,0,0,0,1, // 2 [[x,x], [x,x], [x,x]] values skipped +/// +/// 1, 2, 3, 4, 5, 6, // +/// 0, 0, 0, 0, 0, 0 // the actual values +/// 7, 8, 9, 10, 11, 12 // +/// ] +/// ], +/// } +/// ] +/// } +/// ] +/// } +/// +/// The offset of the innermost values buffer, in bytes, is calculated as: +/// +/// ((2 * 3) + (5 * 2) + 7) * sizeof(int32) = 29 * 4 bytes = 116 bytes +/// +/// In general, the formula to calculate the offset of the innermost values buffer is: +/// +/// ((off_0 * fsl_size_0) + (off_1 * fsl_size_1) + ... + innermost_off) +/// * sizeof(innermost_type) +/// +/// `OffsetPointerOfFixedWidthValues()` can calculate this byte offset and return the +/// pointer to the first relevant byte of the innermost values buffer. +/// +/// \param source The array to check +/// \param force_null_count If true, GetNullCount() is used instead of null_count +/// \param exclude_dictionary If true, DICTIONARY is excluded from the +/// is_fixed_width() types. Default: false. +ARROW_EXPORT bool IsFixedWidthLike(const ArraySpan& source, bool force_null_count = false, + bool exclude_dictionary = false); + +/// \brief Checks if the given array has a fixed-width type or if it's an array of +/// fixed-size list that can be flattened to an array of fixed-width values. +/// +/// This function is a more general version of +/// `IsFixedWidthLike(const ArraySpan&, bool)` that allows the caller to further +/// restrict the inner value types that should be considered fixed-width. +/// +/// \param source The array to check +/// \param force_null_count If true, GetNullCount() is used instead of null_count +/// \param extra_predicate A DataType predicate that can be used to further +/// restrict the types that are considered fixed-width +template +inline bool IsFixedWidthLike(const ArraySpan& source, bool force_null_count, + ExtraPred extra_predicate) { + const auto* type = source.type; + // BOOL is considered fixed-width if not nested under FIXED_SIZE_LIST. + if (is_fixed_width(type->id()) && extra_predicate(*type)) { + return true; + } + if (type->id() == Type::FIXED_SIZE_LIST) { + // All the inner arrays must not contain any nulls. + const auto* values = &source.child_data[0]; + while ((force_null_count ? values->GetNullCount() : values->null_count) == 0) { + type = values->type; + if (type->id() == Type::FIXED_SIZE_LIST) { + values = &values->child_data[0]; + continue; + } + // BOOL has to be excluded because it's not byte-aligned. + return type->id() != Type::BOOL && is_fixed_width(type->id()) && + extra_predicate(*type); + } + } + return false; +} + +/// \brief Get the fixed-width in bytes of a type if it is a fixed-width like +/// type, but not BOOL. +/// +/// If the array is a FixedSizeList (of any level of nesting), the byte width of +/// the values is the product of all fixed-list sizes and the byte width of the +/// innermost fixed-width value type. +/// +/// IsFixedWidthLike(array) performs more checks than this function and should +/// be used to guarantee that, if type is not BOOL, this function will not return -1. +/// +/// NOTE: this function translates `DataType::bit_width()` to bytes differently from +/// `DataType::byte_width()`. `DataType::byte_width()` will return 0 for +/// BOOL, while this function will return `-1`. This is done because 0 is +/// a valid return value for FIXED_SIZE_LIST with size 0 or `FIXED_SIZE_BINARY` with +/// size 0. +/// +/// \pre The instance of the array where this type is from must pass +/// `IsFixedWidthLike(array)` and should not be BOOL. +/// \return The fixed-byte width of the values or -1 if the type is BOOL or not +/// fixed-width like. 0 is a valid return value as fixed-size-lists +/// and fixed-size-binary with size 0 are allowed. +ARROW_EXPORT int64_t FixedWidthInBytes(const DataType& type); + +/// \brief Get the fixed-width in bits of a type if it is a fixed-width like +/// type. +/// +/// \return The bit-width of the values or -1 +/// \see FixedWidthInBytes +ARROW_EXPORT int64_t FixedWidthInBits(const DataType& type); + +namespace internal { + +/// \brief Allocate an ArrayData for a type that is fixed-width like. +/// +/// This function performs the same checks performed by +/// `IsFixedWidthLike(source, false)`. If `source.type` is not a simple +/// fixed-width type, caller should make sure it passes the +/// `IsFixedWidthLike(source)` checks. That guarantees that it's possible to +/// allocate an array that can serve as a destination for a kernel that writes values +/// through a single pointer to fixed-width byte blocks. +/// +/// \param[in] length The length of the array to allocate (unrelated to the length of +/// the source array) +/// \param[in] source The source array that carries the type information and the +/// validity bitmaps that are relevant for the type validation +/// when the source is a FixedSizeList. +/// \see IsFixedWidthLike +ARROW_EXPORT Status PreallocateFixedWidthArrayData(::arrow::compute::KernelContext* ctx, + int64_t length, + const ArraySpan& source, + bool allocate_validity, + ArrayData* out); + +} // namespace internal + +/// \brief Get the pointer to the fixed-width values of a fixed-width like array. +/// +/// This function might return NULLPTR if the type of the array is BOOL or +/// if the pre-conditions listed are not satisfied. The converse is not true +/// (i.e. not getting NULLPTR doesn't guarantee that source is a fixed-width +/// like array). +/// +/// \pre `IsFixedWidthLike(source)` or the more restrictive +/// is_fixed_width(*mutable_array->type) SHOULD be true +/// \return The pointer to the fixed-width values of an array or NULLPTR +/// if pre-conditions are not satisfied. +ARROW_EXPORT const uint8_t* OffsetPointerOfFixedWidthValues(const ArraySpan& source); + +/// \brief Get the mutable pointer to the fixed-width values of an array +/// allocated by PreallocateFixedWidthArrayData. +/// +/// \pre mutable_array->offset and the offset of child array (if it's a +/// FixedSizeList) MUST be 0 (recursively). +/// \pre IsFixedWidthLike(ArraySpan(mutable_array)) or the more restrictive +/// is_fixed_width(*mutable_array->type) MUST be true +/// \return The mutable pointer to the fixed-width byte blocks of the array. If +/// pre-conditions are not satisfied, the return values is undefined. +ARROW_EXPORT uint8_t* MutableFixedWidthValuesPointer(ArrayData* mutable_array); + +} // namespace arrow::util diff --git a/cpp/src/arrow/util/fixed_width_test.cc b/cpp/src/arrow/util/fixed_width_test.cc new file mode 100644 index 0000000000000..2f05221ed6535 --- /dev/null +++ b/cpp/src/arrow/util/fixed_width_test.cc @@ -0,0 +1,217 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// #include +// #include + +#include + +#include "arrow/array/array_base.h" +#include "arrow/array/data.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/type.h" +#include "arrow/util/fixed_width_internal.h" + +namespace arrow::util { + +namespace { +bool NotBool(const DataType& type) { return type.id() != Type::BOOL; } +bool NotInt32(const DataType& type) { return type.id() != Type::INT32; } +} // namespace + +class TestFixedWidth : public ::testing::Test { + protected: + std::shared_ptr bool_array_array_; + std::shared_ptr int_array_array_; + std::shared_ptr fsl_bool_array_; + std::shared_ptr fsl_int_array_; + std::shared_ptr fsl_int_nulls_array_; + std::shared_ptr fsl_int_inner_nulls_array_; + std::shared_ptr dict_string_array_; + + std::shared_ptr fsl(int32_t list_size, + const std::shared_ptr& value_type) { + return fixed_size_list(value_type, list_size); + } + + public: + void SetUp() override { + bool_array_array_ = ArrayFromJSON(boolean(), "[true, false, null]"); + int_array_array_ = ArrayFromJSON(int32(), "[1, 0, null]"); + fsl_bool_array_ = ArrayFromJSON(fsl(2, boolean()), "[[true, false]]"); + fsl_int_array_ = ArrayFromJSON(fsl(2, int32()), "[[1, 0], [2, 3]]"); + fsl_int_nulls_array_ = ArrayFromJSON(fsl(2, int32()), "[[1, 0], null, [1, 2]]"); + fsl_int_inner_nulls_array_ = + ArrayFromJSON(fsl(2, int32()), "[[1, 0], [2, 3], [null, 2]]"); + dict_string_array_ = + ArrayFromJSON(dictionary(int32(), utf8()), R"(["Alice", "Bob", "Alice"])"); + } +}; + +TEST_F(TestFixedWidth, IsFixedWidth) { + auto arr = ArraySpan{*bool_array_array_->data()}; + // force_null_count doesn't matter because nulls at the top-level + // of the array are allowed by IsFixedWidthLike. + ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/false)); + ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/true)); + + ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/false, NotInt32)); + ASSERT_FALSE(IsFixedWidthLike(arr, /*force_null_count=*/false, NotBool)); + + arr = ArraySpan{*int_array_array_->data()}; + ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/false)); + ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/true)); + ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/false, NotBool)); +} + +TEST_F(TestFixedWidth, IsFixedWidthLike) { + auto arr = ArraySpan{*fsl_bool_array_->data()}; + // bools wrapped by fixed-size-list are not fixed-width because the + // innermost data buffer is a bitmap and won't byte-align. + ASSERT_FALSE(IsFixedWidthLike(arr, /*force_null_count=*/false)); + ASSERT_FALSE(IsFixedWidthLike(arr, /*force_null_count=*/true)); + + arr = ArraySpan{*fsl_int_array_->data()}; + ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/false)); + arr.null_count = kUnknownNullCount; + // force_null_count=true isn't necessary because nulls at the top-level + // of the array are allowed by IsFixedWidthLike. + ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/false)); + + arr.child_data[0].null_count = kUnknownNullCount; + // inner nulls are not allowed by IsFixedWidthLike... + ASSERT_FALSE(IsFixedWidthLike(arr, /*force_null_count=*/false)); + // ...but forcing null counting at on every internal array increases + // the chances of IsFixedWidthLike returning true. + ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/true)); + // Excluding INT32 from the internal array checks. + ASSERT_FALSE(IsFixedWidthLike(arr, /*force_null_count=*/true, NotInt32)); + + arr = ArraySpan{*fsl_int_nulls_array_->data()}; + // Nulls at the top-level of the array are allowed by IsFixedWidthLike. + // + // TODO(GH-10157): ArrayFromJSON uses FixedSizeListBuilder which currently + // produces nulls on the child data if one of the list-typed elements is null. + // ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/false)); + + arr = ArraySpan{*fsl_int_inner_nulls_array_->data()}; + // Inner nulls are not allowed by IsFixedWidthLike. + ASSERT_FALSE(IsFixedWidthLike(arr, /*force_null_count=*/true)); + + arr = ArraySpan{*dict_string_array_->data()}; + // Dictionaries are considered fixed-width by is_fixed_width(), but excluded + // by IsFixedWidthLike if exclude_dictionary=true. + ASSERT_TRUE(IsFixedWidthLike(arr)); + ASSERT_TRUE( + IsFixedWidthLike(arr, /*force_null_count=*/false, /*exclude_dictionary=*/false)); + ASSERT_FALSE( + IsFixedWidthLike(arr, /*force_null_count=*/false, /*exclude_dictionary=*/true)); +} + +TEST_F(TestFixedWidth, MeasureWidthInBytes) { + auto b = boolean(); + auto i8 = int8(); + auto i32 = int32(); + auto fsb = fixed_size_binary(3); + auto dict = dictionary(int32(), utf8()); + auto varlen = utf8(); + ASSERT_EQ(FixedWidthInBytes(*b), -1); + ASSERT_EQ(FixedWidthInBytes(*i8), 1); + ASSERT_EQ(FixedWidthInBytes(*i32), 4); + ASSERT_EQ(FixedWidthInBytes(*fsb), 3); + ASSERT_EQ(FixedWidthInBytes(*dict), 4); + + ASSERT_EQ(FixedWidthInBytes(*varlen), -1); + ASSERT_EQ(FixedWidthInBytes(*varlen), -1); + + ASSERT_EQ(FixedWidthInBytes(*fsl(0, b)), -1); + ASSERT_EQ(FixedWidthInBytes(*fsl(3, b)), -1); + ASSERT_EQ(FixedWidthInBytes(*fsl(5, b)), -1); + + ASSERT_EQ(FixedWidthInBytes(*fsl(0, i8)), 0); + ASSERT_EQ(FixedWidthInBytes(*fsl(3, i8)), 3); + ASSERT_EQ(FixedWidthInBytes(*fsl(5, i8)), 5); + ASSERT_EQ(FixedWidthInBytes(*fsl(0, i32)), 0); + ASSERT_EQ(FixedWidthInBytes(*fsl(3, i32)), 3 * 4); + ASSERT_EQ(FixedWidthInBytes(*fsl(5, i32)), 5 * 4); + ASSERT_EQ(FixedWidthInBytes(*fsl(5, fsb)), 5 * 3); + ASSERT_EQ(FixedWidthInBytes(*fsl(5, dict)), 5 * 4); + + ASSERT_EQ(FixedWidthInBytes(*fsl(2, fsl(0, i8))), 0); + ASSERT_EQ(FixedWidthInBytes(*fsl(2, fsl(3, i8))), 2 * 3); + ASSERT_EQ(FixedWidthInBytes(*fsl(2, fsl(5, i8))), 2 * 5); + ASSERT_EQ(FixedWidthInBytes(*fsl(2, fsl(0, i32))), 0); + ASSERT_EQ(FixedWidthInBytes(*fsl(2, fsl(3, i32))), 2 * 3 * 4); + ASSERT_EQ(FixedWidthInBytes(*fsl(2, fsl(5, i32))), 2 * 5 * 4); + ASSERT_EQ(FixedWidthInBytes(*fsl(2, fsl(0, fsb))), 0); + ASSERT_EQ(FixedWidthInBytes(*fsl(2, fsl(3, fsb))), 2 * 3 * 3); + ASSERT_EQ(FixedWidthInBytes(*fsl(2, fsl(5, fsb))), 2 * 5 * 3); + ASSERT_EQ(FixedWidthInBytes(*fsl(2, fsl(0, dict))), 0); + ASSERT_EQ(FixedWidthInBytes(*fsl(2, fsl(3, dict))), 2 * 3 * 4); + ASSERT_EQ(FixedWidthInBytes(*fsl(2, fsl(5, dict))), 2 * 5 * 4); + + ASSERT_EQ(FixedWidthInBytes(*fsl(0, varlen)), -1); + ASSERT_EQ(FixedWidthInBytes(*fsl(2, varlen)), -1); +} + +TEST_F(TestFixedWidth, MeasureWidthInBits) { + auto b = boolean(); + auto i8 = int8(); + auto i32 = int32(); + auto fsb = fixed_size_binary(3); + auto dict = dictionary(int32(), utf8()); + auto varlen = utf8(); + ASSERT_EQ(FixedWidthInBits(*b), 1); + ASSERT_EQ(FixedWidthInBits(*i8), 8); + ASSERT_EQ(FixedWidthInBits(*i32), 4 * 8); + ASSERT_EQ(FixedWidthInBits(*fsb), 3 * 8); + ASSERT_EQ(FixedWidthInBits(*dict), 4 * 8); + + ASSERT_EQ(FixedWidthInBits(*varlen), -1); + ASSERT_EQ(FixedWidthInBits(*varlen), -1); + + ASSERT_EQ(FixedWidthInBits(*fsl(0, b)), -1); + ASSERT_EQ(FixedWidthInBits(*fsl(3, b)), -1); + ASSERT_EQ(FixedWidthInBits(*fsl(5, b)), -1); + + ASSERT_EQ(FixedWidthInBits(*fsl(0, i8)), 0); + ASSERT_EQ(FixedWidthInBits(*fsl(3, i8)), 3 * 8); + ASSERT_EQ(FixedWidthInBits(*fsl(5, i8)), 5 * 8); + ASSERT_EQ(FixedWidthInBits(*fsl(0, i32)), 0); + ASSERT_EQ(FixedWidthInBits(*fsl(3, i32)), 4 * 3 * 8); + ASSERT_EQ(FixedWidthInBits(*fsl(5, i32)), 4 * 5 * 8); + ASSERT_EQ(FixedWidthInBits(*fsl(5, fsb)), 5 * 3 * 8); + ASSERT_EQ(FixedWidthInBits(*fsl(5, dict)), 5 * 4 * 8); + + ASSERT_EQ(FixedWidthInBits(*fsl(2, fsl(0, i8))), 0); + ASSERT_EQ(FixedWidthInBits(*fsl(2, fsl(3, i8))), 2 * 3 * 8); + ASSERT_EQ(FixedWidthInBits(*fsl(2, fsl(5, i8))), 2 * 5 * 8); + ASSERT_EQ(FixedWidthInBits(*fsl(2, fsl(0, i32))), 0); + ASSERT_EQ(FixedWidthInBits(*fsl(2, fsl(3, i32))), 2 * 3 * 4 * 8); + ASSERT_EQ(FixedWidthInBits(*fsl(2, fsl(5, i32))), 2 * 5 * 4 * 8); + ASSERT_EQ(FixedWidthInBits(*fsl(2, fsl(0, fsb))), 0); + ASSERT_EQ(FixedWidthInBits(*fsl(2, fsl(3, fsb))), 2 * 3 * 3 * 8); + ASSERT_EQ(FixedWidthInBits(*fsl(2, fsl(5, fsb))), 2 * 5 * 3 * 8); + ASSERT_EQ(FixedWidthInBits(*fsl(2, fsl(0, dict))), 0); + ASSERT_EQ(FixedWidthInBits(*fsl(2, fsl(3, dict))), 2 * 3 * 4 * 8); + ASSERT_EQ(FixedWidthInBits(*fsl(2, fsl(5, dict))), 2 * 5 * 4 * 8); + + ASSERT_EQ(FixedWidthInBits(*fsl(0, varlen)), -1); + ASSERT_EQ(FixedWidthInBits(*fsl(2, varlen)), -1); +} + +} // namespace arrow::util diff --git a/cpp/src/arrow/util/fixed_width_test_util.h b/cpp/src/arrow/util/fixed_width_test_util.h new file mode 100644 index 0000000000000..ca141b7ca2c4d --- /dev/null +++ b/cpp/src/arrow/util/fixed_width_test_util.h @@ -0,0 +1,203 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include + +#include "arrow/array/builder_primitive.h" +#include "arrow/builder.h" +#include "arrow/type.h" +#include "arrow/util/checked_cast.h" + +namespace arrow::util::internal { + +class NestedListGenerator { + public: + /// \brief Create a nested FixedSizeListType. + /// + /// \return `fixed_size_list(fixed_size_list(..., sizes[1]), sizes[0])` + static std::shared_ptr NestedFSLType( + const std::shared_ptr& inner_type, const std::vector& sizes) { + auto type = inner_type; + for (auto it = sizes.rbegin(); it != sizes.rend(); it++) { + type = fixed_size_list(std::move(type), *it); + } + return type; + } + + /// \brief Create a nested FixedListType. + /// + /// \return `list(list(...))` + static std::shared_ptr NestedListType( + const std::shared_ptr& inner_type, size_t depth) { + auto list_type = list(inner_type); + for (size_t i = 1; i < depth; i++) { + list_type = list(std::move(list_type)); + } + return list_type; + } + + private: + template + static Status AppendNumeric(ArrayBuilder* builder, int64_t* next_value) { + using NumericBuilder = ::arrow::NumericBuilder; + using value_type = typename NumericBuilder::value_type; + auto* numeric_builder = ::arrow::internal::checked_cast(builder); + auto cast_next_value = + static_cast(*next_value % std::numeric_limits::max()); + RETURN_NOT_OK(numeric_builder->Append(cast_next_value)); + *next_value += 1; + return Status::OK(); + } + + // Append([...[[*next_inner_value++, *next_inner_value++, ...]]...]) + static Status AppendNestedList(ArrayBuilder* nested_builder, const int* list_sizes, + int64_t* next_inner_value) { + using ::arrow::internal::checked_cast; + ArrayBuilder* builder = nested_builder; + auto type = builder->type(); + if (type->id() == Type::FIXED_SIZE_LIST || type->id() == Type::LIST) { + const int list_size = *list_sizes; + if (type->id() == Type::FIXED_SIZE_LIST) { + auto* fsl_builder = checked_cast(builder); + assert(list_size == checked_cast(*type).list_size()); + RETURN_NOT_OK(fsl_builder->Append()); + builder = fsl_builder->value_builder(); + } else { // type->id() == Type::LIST) + auto* list_builder = checked_cast(builder); + RETURN_NOT_OK(list_builder->Append(/*is_valid=*/true, list_size)); + builder = list_builder->value_builder(); + } + list_sizes++; + for (int i = 0; i < list_size; i++) { + RETURN_NOT_OK(AppendNestedList(builder, list_sizes, next_inner_value)); + } + } else { + switch (type->id()) { + case Type::INT8: + RETURN_NOT_OK(AppendNumeric(builder, next_inner_value)); + break; + case Type::INT16: + RETURN_NOT_OK(AppendNumeric(builder, next_inner_value)); + break; + case Type::INT32: + RETURN_NOT_OK(AppendNumeric(builder, next_inner_value)); + break; + case Type::INT64: + RETURN_NOT_OK(AppendNumeric(builder, next_inner_value)); + break; + default: + return Status::NotImplemented("Unsupported type: ", *type); + } + } + return Status::OK(); + } + + static Result> NestedListArray( + ArrayBuilder* nested_builder, const std::vector& list_sizes, int64_t length) { + int64_t next_inner_value = 0; + for (int64_t i = 0; i < length; i++) { + RETURN_NOT_OK( + AppendNestedList(nested_builder, list_sizes.data(), &next_inner_value)); + } + return nested_builder->Finish(); + } + + public: + static Result> NestedFSLArray( + const std::shared_ptr& inner_type, const std::vector& list_sizes, + int64_t length) { + auto nested_type = NestedFSLType(inner_type, list_sizes); + ARROW_ASSIGN_OR_RAISE(auto builder, MakeBuilder(nested_type)); + return NestedListArray(builder.get(), list_sizes, length); + } + + static Result> NestedListArray( + const std::shared_ptr& inner_type, const std::vector& list_sizes, + int64_t length) { + auto nested_type = NestedListType(inner_type, list_sizes.size()); + ARROW_ASSIGN_OR_RAISE(auto builder, MakeBuilder(nested_type)); + return NestedListArray(builder.get(), list_sizes, length); + } + + /// \brief Generate all possible nested list configurations of depth 1 to max_depth. + /// + /// Each configuration consists of a single inner value type and a list of sizes. + /// Both can be used with NestedFSLArray and NestedListArray to generate test data. + /// + /// The product of the list sizes and the size of the inner value type is always a power + /// of 2 no greater than max_power_of_2_size. For max_depth=3 and + /// max_power_of_2_size=32, this generates 108 configurations. + /// + /// \tparam Visit a function type with signature + /// void(const std::shared_ptr& inner_type, + /// const std::vector& list_sizes) + template + static void VisitAllNestedListConfigurations( + const std::vector>& inner_value_types, Visit&& visit, + int max_depth = 3, int max_power_of_2_size = 32) { + for (int depth = 1; depth <= max_depth; depth++) { + for (auto& type : inner_value_types) { + assert(is_fixed_width(*type)); + int value_width = type->byte_width(); + + std::vector list_sizes; // stack of list sizes + auto pop = [&]() { // pop the list_sizes stack + assert(!list_sizes.empty()); + value_width /= list_sizes.back(); + list_sizes.pop_back(); + }; + auto next = [&]() { // double the top of the stack + assert(!list_sizes.empty()); + value_width *= 2; + list_sizes.back() *= 2; + return value_width; + }; + auto push_1s = [&]() { // fill the stack with 1s + while (list_sizes.size() < static_cast(depth)) { + list_sizes.push_back(1); + } + }; + + // Loop invariants: + // value_width == product(list_sizes) * type->byte_width() + // value_width is a power-of-2 (1, 2, 4, 8, 16, max_power_of_2_size=32) + push_1s(); + do { + // for (auto x : list_sizes) printf("%d * ", x); + // printf("(%s) %d = %2d\n", type->name().c_str(), type->byte_width(), + // value_width); + visit(type, list_sizes); + // Advance to the next test case + while (!list_sizes.empty()) { + if (next() <= max_power_of_2_size) { + push_1s(); + break; + } + pop(); + } + } while (!list_sizes.empty()); + } + } + } +}; + +} // namespace arrow::util::internal From 0d8b3791cda042224427ccbb4fc2fc3ec0f27b61 Mon Sep 17 00:00:00 2001 From: Jacek Stania <38670505+janosik47@users.noreply.github.com> Date: Fri, 3 May 2024 07:24:06 +0100 Subject: [PATCH 043/261] GH-35888: [Java] Add FlightStatusCode.RESOURCE_EXHAUSTED (#41508) ### Rationale for this change Related to https://github.com/apache/arrow/issues/35888 Currently the gRPC Status.RESOURCE_EXHAUSTED exception/code is translated by the Java FlightServer into FlightStatusCode.INVALID_ARGUMENT and thrown to the client as gRPC INVALID_ARGUMENT exception. That may mislead the other party as the INVALID_ARGUMENT indicates an input parameters problem where in reality the backed server intention was rather 'back off and try later'. ### What changes are included in this PR? Add the FlightStatusCode.RESOURCE_EXHAUSTED code and make sure is translated from/to the gRPC Status.RESOURCE_EXHAUSTED ### Are these changes tested? Unit tests included to validate the RESOURCE_EXHAUSTED translation between flight and grpc codes. ### Are there any user-facing changes? No. Users may start seeing RESOURCE_EXHAUSTED instead of INVALID_ARGUMENT code. In both cases this is an exception seen on the client side so I am considering this as a _not breaking change to any public API_. Although, may have an influence in the client side flows if one decided to react conditionally on exception status code. * GitHub Issue: #35888 Authored-by: Jacek Stania Signed-off-by: David Li --- .../org/apache/arrow/flight/CallStatus.java | 1 + .../apache/arrow/flight/FlightStatusCode.java | 5 +++++ .../apache/arrow/flight/grpc/StatusUtils.java | 4 +++- .../arrow/flight/grpc/TestStatusUtils.java | 22 +++++++++++++++++++ 4 files changed, 31 insertions(+), 1 deletion(-) diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallStatus.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallStatus.java index 991d0ed6a043b..8fc2002207e24 100644 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallStatus.java +++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/CallStatus.java @@ -49,6 +49,7 @@ public class CallStatus { public static final CallStatus UNAUTHORIZED = FlightStatusCode.UNAUTHORIZED.toStatus(); public static final CallStatus UNIMPLEMENTED = FlightStatusCode.UNIMPLEMENTED.toStatus(); public static final CallStatus UNAVAILABLE = FlightStatusCode.UNAVAILABLE.toStatus(); + public static final CallStatus RESOURCE_EXHAUSTED = FlightStatusCode.RESOURCE_EXHAUSTED.toStatus(); /** * Create a new status. diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStatusCode.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStatusCode.java index 3d96877ba02de..09a2c7afda106 100644 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStatusCode.java +++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/FlightStatusCode.java @@ -71,6 +71,11 @@ public enum FlightStatusCode { * should send this code only if it has not done any work. */ UNAVAILABLE, + /** + * Some resource has been exhausted, perhaps a per-user quota, or perhaps the entire file system is out of space. + * (see: https://grpc.github.io/grpc/core/md_doc_statuscodes.html) + */ + RESOURCE_EXHAUSTED ; /** diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java index 7f0dcf2da3f0d..a2d9a85aaa442 100644 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java +++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/grpc/StatusUtils.java @@ -74,6 +74,8 @@ public static Status.Code toGrpcStatusCode(FlightStatusCode code) { return Code.UNIMPLEMENTED; case UNAVAILABLE: return Code.UNAVAILABLE; + case RESOURCE_EXHAUSTED: + return Code.RESOURCE_EXHAUSTED; default: return Code.UNKNOWN; } @@ -101,7 +103,7 @@ public static FlightStatusCode fromGrpcStatusCode(Status.Code code) { case PERMISSION_DENIED: return FlightStatusCode.UNAUTHORIZED; case RESOURCE_EXHAUSTED: - return FlightStatusCode.INVALID_ARGUMENT; + return FlightStatusCode.RESOURCE_EXHAUSTED; case FAILED_PRECONDITION: return FlightStatusCode.INVALID_ARGUMENT; case ABORTED: diff --git a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/grpc/TestStatusUtils.java b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/grpc/TestStatusUtils.java index 9912a26ea340a..730ea30a2f598 100644 --- a/java/flight/flight-core/src/test/java/org/apache/arrow/flight/grpc/TestStatusUtils.java +++ b/java/flight/flight-core/src/test/java/org/apache/arrow/flight/grpc/TestStatusUtils.java @@ -48,4 +48,26 @@ public void testParseTrailers() { Assertions.assertTrue(callStatus.metadata().containsKey("content-type")); Assertions.assertEquals("text/html", callStatus.metadata().get("content-type")); } + + @Test + public void testGrpcResourceExhaustedTranslatedToFlightStatus() { + Status status = Status.RESOURCE_EXHAUSTED; + + CallStatus callStatus = StatusUtils.fromGrpcStatus(status); + Assertions.assertEquals(FlightStatusCode.RESOURCE_EXHAUSTED, callStatus.code()); + + FlightStatusCode flightStatusCode = StatusUtils.fromGrpcStatusCode(status.getCode()); + Assertions.assertEquals(FlightStatusCode.RESOURCE_EXHAUSTED, flightStatusCode); + } + + @Test + public void testFlightResourceExhaustedTranslatedToGrpcStatua() { + CallStatus callStatus = CallStatus.RESOURCE_EXHAUSTED; + + Status.Code grpcStatusCode = StatusUtils.toGrpcStatusCode(callStatus.code()); + Assertions.assertEquals(Status.RESOURCE_EXHAUSTED.getCode(), grpcStatusCode); + + Status grpcStatus = StatusUtils.toGrpcStatus(callStatus); + Assertions.assertEquals(Status.RESOURCE_EXHAUSTED.getCode(), grpcStatus.getCode()); + } } From 2eb47efc04707145edbf6a2ad8a750138a585682 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?JB=20Onofr=C3=A9?= Date: Fri, 3 May 2024 08:44:41 +0200 Subject: [PATCH 044/261] MINOR: Increase the open-pull-requests-limit for dependabot (#41499) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change By default, dependabot opens a maximum of five pull requests for version updates. Once there are five open pull requests from dependabot, dependabot will not open any new requests until some of those open requests are merged or closed. With this change, dependabot can open up to 50 pull requests for Maven, and 10 pull requests for other systems. ### What changes are included in this PR? Update configuration for dependabot. ### Are these changes tested? Tested on other ASF projects 😄 ### Are there any user-facing changes? No Authored-by: JB Onofré Signed-off-by: David Li --- .github/dependabot.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/dependabot.yml b/.github/dependabot.yml index e96cb8d2eb1e3..7d9ff2f42e887 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -23,30 +23,35 @@ updates: interval: "weekly" commit-message: prefix: "MINOR: [CI] " + open-pull-requests-limit: 10 - package-ecosystem: "gomod" directory: "/go/" schedule: interval: "weekly" commit-message: prefix: "MINOR: [Go] " + open-pull-requests-limit: 10 - package-ecosystem: "maven" directory: "/java/" schedule: interval: "weekly" commit-message: prefix: "MINOR: [Java] " + open-pull-requests-limit: 10 - package-ecosystem: "npm" directory: "/js/" schedule: interval: "monthly" commit-message: prefix: "MINOR: [JS] " + open-pull-requests-limit: 10 - package-ecosystem: "nuget" directory: "/csharp/" schedule: interval: "weekly" commit-message: prefix: "MINOR: [C#] " + open-pull-requests-limit: 10 ignore: - dependency-name: "Microsoft.Extensions.*" update-types: From c0aade5f624e2ec64d5d8743df9b95c7eee50117 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 3 May 2024 12:16:02 +0200 Subject: [PATCH 045/261] GH-41462: [CI] Temporary pin azurite to v3.29.0 (#41501) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change install_azurite.sh is failing to install the latest version of Azurite and azure tests were failing. ### What changes are included in this PR? Temporarily pin azurite to v3.29.0 to unblock 16.1.0 release. A follow up issue is tracked here: https://github.com/apache/arrow/issues/41505 ### Are these changes tested? Yes via archery ### Are there any user-facing changes? No * GitHub Issue: #41462 Authored-by: Raúl Cumplido Signed-off-by: Raúl Cumplido --- ci/scripts/install_azurite.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ci/scripts/install_azurite.sh b/ci/scripts/install_azurite.sh index 2e7008360fdc3..dda5e99405b7f 100755 --- a/ci/scripts/install_azurite.sh +++ b/ci/scripts/install_azurite.sh @@ -19,17 +19,18 @@ set -e +# Pin azurite to 3.29.0 due to https://github.com/apache/arrow/issues/41505 case "$(uname)" in Darwin) - npm install -g azurite + npm install -g azurite@v3.29.0 which azurite ;; MINGW*) choco install nodejs.install - npm install -g azurite + npm install -g azurite@v3.29.0 ;; Linux) - npm install -g azurite + npm install -g azurite@v3.29.0 which azurite ;; esac From c8cf61c569886ff18c88e29c447a98ab6dedbd92 Mon Sep 17 00:00:00 2001 From: Sarah Gilmore <74676073+sgilmore10@users.noreply.github.com> Date: Fri, 3 May 2024 09:42:28 -0400 Subject: [PATCH 046/261] GH-41385: [CI][MATLAB][Packaging] Add support for MATLAB `R2024a` in CI and crossbow packaging workflows (#41504) ### Rationale for this change MATLAB `R2024a` is the latest available version of MATLAB as of April 2024. We are currently building against MATLAB `R2023a` in CI and for the crossbow packaging workflow. We should update the version of MATLAB we support to the latest available version. We previously created an issue to use `R2023b` (https://github.com/apache/arrow/issues/37809). However, `R2024a` has become publicly available since then. ### What changes are included in this PR? 1. Changed the `release` argument supplied to `matlab-actions/setup-matlab@ v2` to `R2024a` from `R2023a` in `.github/workflows/matlab.yml` and `dev/tasks/matlab/github.yml`. 2. Updated the script used to package the MLTBX file (`arrow/matlab/tools/packageMatlabInterface.m`) to specify the version of MATLAB currently running as the only compatible version of MATLAB for the interface. 3. Updated display tests to react to a change in how class names are formatted when an objected is displayed in the Command Window. ### Are these changes tested? Yes. Existing tests used. ### Are there any user-facing changes? There are no changes to the MATLAB Arrow Interface. However, the MATLAB release used to build, test, and package the interface has been upgraded. ### Future Work 1. We may want to consider building and packaging against multiple versions of MATLAB in parallel, rather than just the latest. This would require some more thought on how many releases back to support over time, however. 2. #41435 * GitHub Issue: #41385 Lead-authored-by: Sarah Gilmore Co-authored-by: Sarah Gilmore <74676073+sgilmore10@users.noreply.github.com> Co-authored-by: Sutou Kouhei Signed-off-by: Sarah Gilmore --- .github/workflows/matlab.yml | 6 +++--- dev/tasks/matlab/github.yml | 8 ++++---- .../+internal/+test/+display/makeLinkString.m | 4 ++-- .../+tabular/+internal/+display/getSchemaString.m | 2 +- matlab/tools/packageMatlabInterface.m | 15 ++++++++++++--- 5 files changed, 22 insertions(+), 13 deletions(-) diff --git a/.github/workflows/matlab.yml b/.github/workflows/matlab.yml index aa3692e587961..8a0de8a365661 100644 --- a/.github/workflows/matlab.yml +++ b/.github/workflows/matlab.yml @@ -70,7 +70,7 @@ jobs: - name: Install MATLAB uses: matlab-actions/setup-matlab@v2 with: - release: R2023a + release: R2024a - name: Install ccache run: sudo apt-get install ccache - name: Setup ccache @@ -110,7 +110,7 @@ jobs: - name: Install MATLAB uses: matlab-actions/setup-matlab@v2 with: - release: R2023a + release: R2024a - name: Install ccache run: brew install ccache - name: Setup ccache @@ -148,7 +148,7 @@ jobs: - name: Install MATLAB uses: matlab-actions/setup-matlab@v2 with: - release: R2023a + release: R2024a - name: Download Timezone Database shell: bash run: ci/scripts/download_tz_database.sh diff --git a/dev/tasks/matlab/github.yml b/dev/tasks/matlab/github.yml index 13fa36b501125..7840fd176705c 100644 --- a/dev/tasks/matlab/github.yml +++ b/dev/tasks/matlab/github.yml @@ -31,7 +31,7 @@ jobs: - name: Install MATLAB uses: matlab-actions/setup-matlab@v1 with: - release: R2023a + release: R2024a - name: Build MATLAB Interface env: {{ macros.github_set_sccache_envvars()|indent(8) }} @@ -68,7 +68,7 @@ jobs: - name: Install MATLAB uses: matlab-actions/setup-matlab@v1 with: - release: R2023a + release: R2024a - name: Build MATLAB Interface env: {{ macros.github_set_sccache_envvars()|indent(8) }} @@ -103,7 +103,7 @@ jobs: - name: Install MATLAB uses: matlab-actions/setup-matlab@v1 with: - release: R2023a + release: R2024a - name: Install sccache shell: bash run: arrow/ci/scripts/install_sccache.sh pc-windows-msvc $(pwd)/sccache @@ -149,7 +149,7 @@ jobs: - name: Install MATLAB uses: matlab-actions/setup-matlab@v1 with: - release: R2023a + release: R2024a - name: Run commands env: MATLABPATH: arrow/matlab/tools diff --git a/matlab/src/matlab/+arrow/+internal/+test/+display/makeLinkString.m b/matlab/src/matlab/+arrow/+internal/+test/+display/makeLinkString.m index 79065ba1c8cfd..e99dd7d78488d 100644 --- a/matlab/src/matlab/+arrow/+internal/+test/+display/makeLinkString.m +++ b/matlab/src/matlab/+arrow/+internal/+test/+display/makeLinkString.m @@ -26,11 +26,11 @@ end if opts.BoldFont - link = compose("%s", ... opts.FullClassName, opts.ClassName); else - link = compose("%s", ... + link = compose("%s", ... opts.FullClassName, opts.ClassName); end end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+tabular/+internal/+display/getSchemaString.m b/matlab/src/matlab/+arrow/+tabular/+internal/+display/getSchemaString.m index 7da945ca993ef..724b4873c92e1 100644 --- a/matlab/src/matlab/+arrow/+tabular/+internal/+display/getSchemaString.m +++ b/matlab/src/matlab/+arrow/+tabular/+internal/+display/getSchemaString.m @@ -43,7 +43,7 @@ classNameAndIDs = strings([1 numel(typeIDs) * 2]); classNameAndIDs(1:2:end-1) = classNames; classNameAndIDs(2:2:end) = typeIDs; - typeIDs = compose("%s", classNameAndIDs); + typeIDs = compose("%s", classNameAndIDs); end text = names + ": " + typeIDs; diff --git a/matlab/tools/packageMatlabInterface.m b/matlab/tools/packageMatlabInterface.m index 55b4d4241a569..3d970002614ab 100644 --- a/matlab/tools/packageMatlabInterface.m +++ b/matlab/tools/packageMatlabInterface.m @@ -55,9 +55,18 @@ opts.SupportedPlatforms.Glnxa64 = true; opts.SupportedPlatforms.MatlabOnline = true; -% Interface is only qualified against R2023a at the moment -opts.MinimumMatlabRelease = "R2023a"; -opts.MaximumMatlabRelease = "R2023a"; +% MEX files use run-time libraries shipped with MATLAB (e.g. libmx, libmex, +% etc.). MEX files linked against earlier versions of MALTAB run-time libraries +% will most likely work on newer versions of MATLAB. However, this may not +% always be the case. +% +% For now, set the earliest and latest compatible releases of MATLAB to +% the release of MATLAB used to build and package the MATLAB Arrow Interface. +% +% See: https://www.mathworks.com/help/matlab/matlab_external/version-compatibility.html +currentRelease = matlabRelease.Release; +opts.MinimumMatlabRelease = currentRelease; +opts.MaximumMatlabRelease = currentRelease; opts.OutputFile = fullfile(outputFolder, compose("matlab-arrow-%s.mltbx", toolboxVersionRaw)); disp("Output File: " + opts.OutputFile); From 32916f8355858ccea91df402f67696953f9dd298 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 3 May 2024 23:06:36 +0900 Subject: [PATCH 047/261] MINOR: [JS] Bump @swc/core from 1.4.14 to 1.4.17 in /js (#41519) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [@ swc/core](https://github.com/swc-project/swc) from 1.4.14 to 1.4.17.
Changelog

Sourced from @​swc/core's changelog.

[1.4.17] - 2024-04-23

Bug Fixes

  • (es) Ignore sourceMappingURL in string literals (#8879) (d7188cd)

  • (es/codegen) Use Str.raw for es5 (#8873) (c7a06b1)

  • (es/compat) Fix async generator (#8881) (063eabd)

  • (es/resolver) Prioritze jsc.paths by length in tsc resolver (#8875) (e22c368)

  • (html/codegen) Expand elements before which body isn’t elided (#8877) (5419a94)

[1.4.16] - 2024-04-18

Bug Fixes

  • (es/helpers) Fix resolving of usingCtx helper (#8874) (6e9d1a4)

[1.4.15] - 2024-04-17

Bug Fixes

  • (es/codegen) Fix ascii_only for identifiers (#8866) (2075a23)

  • (es/minifier) Remove raw of strings after modification (#8865) (740c0bb)

  • (es/parser) Fix span of BindingIdent (#8859) (fbd32fb)

  • (es/proposal) Update explicit resource management to match spec (#8860) (6d24076)

Features

  • (es/transforms) Allocate stacks dynamically (#8867) (a1c5415)

... (truncated)

Commits
  • 3311da7 chore: Publish 1.4.17 with swc_core v0.90.37
  • b1c22d5 chore: Improve publish script
  • fedf06f chore: Publish 1.4.17-nightly-20240423.3
  • 2c1e959 chore: Publish 1.4.17-nightly-20240423.2
  • 7b08d38 chore: Update bindings
  • 2fbb864 chore: Publish 1.4.17-nightly-20240423.1
  • 6d3c41d chore: Bump crates
  • c7a06b1 fix(es/codegen): Use Str.raw for es5 (#8873)
  • f5e50c2 chore: Bump crates
  • 063eabd fix(es/compat): Fix async generator (#8881)
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=@ swc/core&package-manager=npm_and_yarn&previous-version=1.4.14&new-version=1.4.17)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- js/package.json | 2 +- js/yarn.lock | 128 ++++++++++++++++++++++++------------------------ 2 files changed, 65 insertions(+), 65 deletions(-) diff --git a/js/package.json b/js/package.json index e9590a188820f..08f2fe83d13e8 100644 --- a/js/package.json +++ b/js/package.json @@ -67,7 +67,7 @@ "@rollup/plugin-alias": "5.1.0", "@rollup/plugin-node-resolve": "15.2.3", "@rollup/stream": "3.0.1", - "@swc/core": "1.4.14", + "@swc/core": "1.4.17", "@types/benchmark": "2.1.5", "@types/glob": "8.1.0", "@types/jest": "29.5.12", diff --git a/js/yarn.lock b/js/yarn.lock index ab092675b4806..47674bd8b2168 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -1124,74 +1124,74 @@ dependencies: "@sinonjs/commons" "^3.0.0" -"@swc/core-darwin-arm64@1.4.14": - version "1.4.14" - resolved "https://registry.yarnpkg.com/@swc/core-darwin-arm64/-/core-darwin-arm64-1.4.14.tgz#de570252c3f155f55536f0d6bb8bafaec2e99616" - integrity sha512-8iPfLhYNspBl836YYsfv6ErXwDUqJ7IMieddV3Ey/t/97JAEAdNDUdtTKDtbyP0j/Ebyqyn+fKcqwSq7rAof0g== - -"@swc/core-darwin-x64@1.4.14": - version "1.4.14" - resolved "https://registry.yarnpkg.com/@swc/core-darwin-x64/-/core-darwin-x64-1.4.14.tgz#4eefbe129e416f4c400656742ab7f30e01aff02e" - integrity sha512-9CqSj8uRZ92cnlgAlVaWMaJJBdxtNvCzJxaGj5KuIseeG6Q0l1g+qk8JcU7h9dAsH9saHTNwNFBVGKQo0W0ujg== - -"@swc/core-linux-arm-gnueabihf@1.4.14": - version "1.4.14" - resolved "https://registry.yarnpkg.com/@swc/core-linux-arm-gnueabihf/-/core-linux-arm-gnueabihf-1.4.14.tgz#bea4b94c32bb25de2816126dac299655529ba7f3" - integrity sha512-mfd5JArPITTzMjcezH4DwMw+BdjBV1y25Khp8itEIpdih9ei+fvxOOrDYTN08b466NuE2dF2XuhKtRLA7fXArQ== - -"@swc/core-linux-arm64-gnu@1.4.14": - version "1.4.14" - resolved "https://registry.yarnpkg.com/@swc/core-linux-arm64-gnu/-/core-linux-arm64-gnu-1.4.14.tgz#52063214f4a14d6a0c3c6059ed9e7ba1062f6b46" - integrity sha512-3Lqlhlmy8MVRS9xTShMaPAp0oyUt0KFhDs4ixJsjdxKecE0NJSV/MInuDmrkij1C8/RQ2wySRlV9np5jK86oWw== - -"@swc/core-linux-arm64-musl@1.4.14": - version "1.4.14" - resolved "https://registry.yarnpkg.com/@swc/core-linux-arm64-musl/-/core-linux-arm64-musl-1.4.14.tgz#7e7deea7b1b3d0c9944cc8e9ba948fcc785158ea" - integrity sha512-n0YoCa64TUcJrbcXIHIHDWQjdUPdaXeMHNEu7yyBtOpm01oMGTKP3frsUXIABLBmAVWtKvqit4/W1KVKn5gJzg== - -"@swc/core-linux-x64-gnu@1.4.14": - version "1.4.14" - resolved "https://registry.yarnpkg.com/@swc/core-linux-x64-gnu/-/core-linux-x64-gnu-1.4.14.tgz#301133ea3ee347568886f2489837e991e96d44db" - integrity sha512-CGmlwLWbfG1dB4jZBJnp2IWlK5xBMNLjN7AR5kKA3sEpionoccEnChOEvfux1UdVJQjLRKuHNV9yGyqGBTpxfQ== - -"@swc/core-linux-x64-musl@1.4.14": - version "1.4.14" - resolved "https://registry.yarnpkg.com/@swc/core-linux-x64-musl/-/core-linux-x64-musl-1.4.14.tgz#86b8e987a814209cd0dd0f21cbc1134305dfffd5" - integrity sha512-xq4npk8YKYmNwmr8fbvF2KP3kUVdZYfXZMQnW425gP3/sn+yFQO8Nd0bGH40vOVQn41kEesSe0Z5O/JDor2TgQ== - -"@swc/core-win32-arm64-msvc@1.4.14": - version "1.4.14" - resolved "https://registry.yarnpkg.com/@swc/core-win32-arm64-msvc/-/core-win32-arm64-msvc-1.4.14.tgz#eb56b8977e3542665929c3963bd7dc18fe5b2556" - integrity sha512-imq0X+gU9uUe6FqzOQot5gpKoaC00aCUiN58NOzwp0QXEupn8CDuZpdBN93HiZswfLruu5jA1tsc15x6v9p0Yg== - -"@swc/core-win32-ia32-msvc@1.4.14": - version "1.4.14" - resolved "https://registry.yarnpkg.com/@swc/core-win32-ia32-msvc/-/core-win32-ia32-msvc-1.4.14.tgz#72e119038b9d8743b13bb933b8e192acd9f501f9" - integrity sha512-cH6QpXMw5D3t+lpx6SkErHrxN0yFzmQ0lgNAJxoDRiaAdDbqA6Col8UqUJwUS++Ul6aCWgNhCdiEYehPaoyDPA== - -"@swc/core-win32-x64-msvc@1.4.14": - version "1.4.14" - resolved "https://registry.yarnpkg.com/@swc/core-win32-x64-msvc/-/core-win32-x64-msvc-1.4.14.tgz#f5a3b1a241708b0628a07458e5bedbf67a1b9595" - integrity sha512-FmZ4Tby4wW65K/36BKzmuu7mlq7cW5XOxzvufaSNVvQ5PN4OodAlqPjToe029oma4Av+ykJiif64scMttyNAzg== - -"@swc/core@1.4.14": - version "1.4.14" - resolved "https://registry.yarnpkg.com/@swc/core/-/core-1.4.14.tgz#8bad316c0119f626bb1b181ba7a988ef9d14e9cc" - integrity sha512-tHXg6OxboUsqa/L7DpsCcFnxhLkqN/ht5pCwav1HnvfthbiNIJypr86rNx4cUnQDJepETviSqBTIjxa7pSpGDQ== +"@swc/core-darwin-arm64@1.4.17": + version "1.4.17" + resolved "https://registry.yarnpkg.com/@swc/core-darwin-arm64/-/core-darwin-arm64-1.4.17.tgz#e62fa7f247bdd1c0c50a3f99722da4dd098c7c67" + integrity sha512-HVl+W4LezoqHBAYg2JCqR+s9ife9yPfgWSj37iIawLWzOmuuJ7jVdIB7Ee2B75bEisSEKyxRlTl6Y1Oq3owBgw== + +"@swc/core-darwin-x64@1.4.17": + version "1.4.17" + resolved "https://registry.yarnpkg.com/@swc/core-darwin-x64/-/core-darwin-x64-1.4.17.tgz#1145cbb7575e317204ed3a7d0274bd26fe9ffab6" + integrity sha512-WYRO9Fdzq4S/he8zjW5I95G1zcvyd9yyD3Tgi4/ic84P5XDlSMpBDpBLbr/dCPjmSg7aUXxNQqKqGkl6dQxYlA== + +"@swc/core-linux-arm-gnueabihf@1.4.17": + version "1.4.17" + resolved "https://registry.yarnpkg.com/@swc/core-linux-arm-gnueabihf/-/core-linux-arm-gnueabihf-1.4.17.tgz#7145b3ada5cf9b748eaacbc9a7c7037ba0fb26bb" + integrity sha512-cgbvpWOvtMH0XFjvwppUCR+Y+nf6QPaGu6AQ5hqCP+5Lv2zO5PG0RfasC4zBIjF53xgwEaaWmGP5/361P30X8Q== + +"@swc/core-linux-arm64-gnu@1.4.17": + version "1.4.17" + resolved "https://registry.yarnpkg.com/@swc/core-linux-arm64-gnu/-/core-linux-arm64-gnu-1.4.17.tgz#5c0833ef132af17bd3cbdf2253f35b57c0cf62bb" + integrity sha512-l7zHgaIY24cF9dyQ/FOWbmZDsEj2a9gRFbmgx2u19e3FzOPuOnaopFj0fRYXXKCmtdx+anD750iBIYnTR+pq/Q== + +"@swc/core-linux-arm64-musl@1.4.17": + version "1.4.17" + resolved "https://registry.yarnpkg.com/@swc/core-linux-arm64-musl/-/core-linux-arm64-musl-1.4.17.tgz#5bfe81eb23c905f04b669a7d2b060a147a263483" + integrity sha512-qhH4gr9gAlVk8MBtzXbzTP3BJyqbAfUOATGkyUtohh85fPXQYuzVlbExix3FZXTwFHNidGHY8C+ocscI7uDaYw== + +"@swc/core-linux-x64-gnu@1.4.17": + version "1.4.17" + resolved "https://registry.yarnpkg.com/@swc/core-linux-x64-gnu/-/core-linux-x64-gnu-1.4.17.tgz#a0c19bc9635e86ebd1c7f8e9e026503d1a1bf83d" + integrity sha512-vRDFATL1oN5oZMImkwbgSHEkp8xG1ofEASBypze01W1Tqto8t+yo6gsp69wzCZBlxldsvPpvFZW55Jq0Rn+UnA== + +"@swc/core-linux-x64-musl@1.4.17": + version "1.4.17" + resolved "https://registry.yarnpkg.com/@swc/core-linux-x64-musl/-/core-linux-x64-musl-1.4.17.tgz#2179b9536235a3b02a46997ddb1c178dfadf1667" + integrity sha512-zQNPXAXn3nmPqv54JVEN8k2JMEcMTQ6veVuU0p5O+A7KscJq+AGle/7ZQXzpXSfUCXlLMX4wvd+rwfGhh3J4cw== + +"@swc/core-win32-arm64-msvc@1.4.17": + version "1.4.17" + resolved "https://registry.yarnpkg.com/@swc/core-win32-arm64-msvc/-/core-win32-arm64-msvc-1.4.17.tgz#3004a431c836c6b16b4660ea2425dde467a8ee36" + integrity sha512-z86n7EhOwyzxwm+DLE5NoLkxCTme2lq7QZlDjbQyfCxOt6isWz8rkW5QowTX8w9Rdmk34ncrjSLvnHOeLY17+w== + +"@swc/core-win32-ia32-msvc@1.4.17": + version "1.4.17" + resolved "https://registry.yarnpkg.com/@swc/core-win32-ia32-msvc/-/core-win32-ia32-msvc-1.4.17.tgz#59155485d5307fb2a267e5acb215e0f440b6f48f" + integrity sha512-JBwuSTJIgiJJX6wtr4wmXbfvOswHFj223AumUrK544QV69k60FJ9q2adPW9Csk+a8wm1hLxq4HKa2K334UHJ/g== + +"@swc/core-win32-x64-msvc@1.4.17": + version "1.4.17" + resolved "https://registry.yarnpkg.com/@swc/core-win32-x64-msvc/-/core-win32-x64-msvc-1.4.17.tgz#b98f25fc277fb0e319f25f9fd00a82023662716b" + integrity sha512-jFkOnGQamtVDBm3MF5Kq1lgW8vx4Rm1UvJWRUfg+0gx7Uc3Jp3QMFeMNw/rDNQYRDYPG3yunCC+2463ycd5+dg== + +"@swc/core@1.4.17": + version "1.4.17" + resolved "https://registry.yarnpkg.com/@swc/core/-/core-1.4.17.tgz#3ea4180fa5c54282b284006a6de1263ef1cf887f" + integrity sha512-tq+mdWvodMBNBBZbwFIMTVGYHe9N7zvEaycVVjfvAx20k1XozHbHhRv+9pEVFJjwRxLdXmtvFZd3QZHRAOpoNQ== dependencies: "@swc/counter" "^0.1.2" "@swc/types" "^0.1.5" optionalDependencies: - "@swc/core-darwin-arm64" "1.4.14" - "@swc/core-darwin-x64" "1.4.14" - "@swc/core-linux-arm-gnueabihf" "1.4.14" - "@swc/core-linux-arm64-gnu" "1.4.14" - "@swc/core-linux-arm64-musl" "1.4.14" - "@swc/core-linux-x64-gnu" "1.4.14" - "@swc/core-linux-x64-musl" "1.4.14" - "@swc/core-win32-arm64-msvc" "1.4.14" - "@swc/core-win32-ia32-msvc" "1.4.14" - "@swc/core-win32-x64-msvc" "1.4.14" + "@swc/core-darwin-arm64" "1.4.17" + "@swc/core-darwin-x64" "1.4.17" + "@swc/core-linux-arm-gnueabihf" "1.4.17" + "@swc/core-linux-arm64-gnu" "1.4.17" + "@swc/core-linux-arm64-musl" "1.4.17" + "@swc/core-linux-x64-gnu" "1.4.17" + "@swc/core-linux-x64-musl" "1.4.17" + "@swc/core-win32-arm64-msvc" "1.4.17" + "@swc/core-win32-ia32-msvc" "1.4.17" + "@swc/core-win32-x64-msvc" "1.4.17" "@swc/counter@^0.1.2", "@swc/counter@^0.1.3": version "0.1.3" From b102aebb0a9eb05e94544fd1f15f85f660fc89be Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 3 May 2024 23:17:41 +0900 Subject: [PATCH 048/261] MINOR: [JS] Bump @typescript-eslint/parser from 7.7.0 to 7.8.0 in /js (#41522) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [@ typescript-eslint/parser](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/parser) from 7.7.0 to 7.8.0.
Release notes

Sourced from @​typescript-eslint/parser's releases.

v7.8.0

7.8.0 (2024-04-29)

🚀 Features

  • rule-tester: assert suggestion messages are unique (#8995)
  • typescript-estree: add maximumDefaultProjectFileMatchCount and wide allowDefaultProjectForFiles glob restrictions (#8925)

🩹 Fixes

  • eslint-plugin: [no-unsafe-argument] handle tagged templates (#8746)
  • eslint-plugin: [prefer-optional-chain] suggests optional chaining during strict null equality check (#8717)
  • eslint-plugin: [consistent-type-assertions] handle tagged templates (#8993)
  • eslint-plugin: [no-unsafe-return] handle union types (#9001)
  • eslint-plugin: [no-unused-vars] clear error report range (#8640)
  • utils: export ESLint backwards-compat functions (#8976)

❤️ Thank You

You can read about our versioning strategy and releases on our website.

v7.7.1

7.7.1 (2024-04-22)

🩹 Fixes

  • eslint-plugin: [no-unsafe-assignment] handle shorthand property assignment (#8800)
  • eslint-plugin: [explicit-function-return-type] fix checking wrong ancestor's return type (#8809)
  • eslint-plugin: [prefer-optional-chain] only look at left operand for requireNullish (#8559)
  • eslint-plugin: [no-for-in-array] refine report location (#8874)
  • eslint-plugin: [no-unnecessary-type-assertion] allow non-null assertion for void type (#8912)

❤️ Thank You

You can read about our versioning strategy and releases on our website.

Changelog

Sourced from @​typescript-eslint/parser's changelog.

7.8.0 (2024-04-29)

This was a version bump only for parser to align it with other projects, there were no code changes.

You can read about our versioning strategy and releases on our website.

7.7.1 (2024-04-22)

This was a version bump only for parser to align it with other projects, there were no code changes.

You can read about our versioning strategy and releases on our website.

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=@ typescript-eslint/parser&package-manager=npm_and_yarn&previous-version=7.7.0&new-version=7.8.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- js/package.json | 2 +- js/yarn.lock | 53 +++++++++---------------------------------------- 2 files changed, 10 insertions(+), 45 deletions(-) diff --git a/js/package.json b/js/package.json index 08f2fe83d13e8..f6d3c70fca6aa 100644 --- a/js/package.json +++ b/js/package.json @@ -73,7 +73,7 @@ "@types/jest": "29.5.12", "@types/multistream": "4.1.3", "@typescript-eslint/eslint-plugin": "7.8.0", - "@typescript-eslint/parser": "7.7.0", + "@typescript-eslint/parser": "7.8.0", "async-done": "2.0.0", "benny": "3.7.1", "cross-env": "7.0.3", diff --git a/js/yarn.lock b/js/yarn.lock index 47674bd8b2168..9885be2ba0643 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -1453,15 +1453,15 @@ semver "^7.6.0" ts-api-utils "^1.3.0" -"@typescript-eslint/parser@7.7.0": - version "7.7.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-7.7.0.tgz#6b1b3ce76c5de002c43af8ae933613b0f2b4bcc6" - integrity sha512-fNcDm3wSwVM8QYL4HKVBggdIPAy9Q41vcvC/GtDobw3c4ndVT3K6cqudUmjHPw8EAp4ufax0o58/xvWaP2FmTg== - dependencies: - "@typescript-eslint/scope-manager" "7.7.0" - "@typescript-eslint/types" "7.7.0" - "@typescript-eslint/typescript-estree" "7.7.0" - "@typescript-eslint/visitor-keys" "7.7.0" +"@typescript-eslint/parser@7.8.0": + version "7.8.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-7.8.0.tgz#1e1db30c8ab832caffee5f37e677dbcb9357ddc8" + integrity sha512-KgKQly1pv0l4ltcftP59uQZCi4HUYswCLbTqVZEJu7uLX8CTLyswqMLqLN+2QFz4jCptqWVV4SB7vdxcH2+0kQ== + dependencies: + "@typescript-eslint/scope-manager" "7.8.0" + "@typescript-eslint/types" "7.8.0" + "@typescript-eslint/typescript-estree" "7.8.0" + "@typescript-eslint/visitor-keys" "7.8.0" debug "^4.3.4" "@typescript-eslint/scope-manager@5.62.0": @@ -1472,14 +1472,6 @@ "@typescript-eslint/types" "5.62.0" "@typescript-eslint/visitor-keys" "5.62.0" -"@typescript-eslint/scope-manager@7.7.0": - version "7.7.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-7.7.0.tgz#3f0db079b275bb8b0cb5be7613fb3130cfb5de77" - integrity sha512-/8INDn0YLInbe9Wt7dK4cXLDYp0fNHP5xKLHvZl3mOT5X17rK/YShXaiNmorl+/U4VKCVIjJnx4Ri5b0y+HClw== - dependencies: - "@typescript-eslint/types" "7.7.0" - "@typescript-eslint/visitor-keys" "7.7.0" - "@typescript-eslint/scope-manager@7.8.0": version "7.8.0" resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-7.8.0.tgz#bb19096d11ec6b87fb6640d921df19b813e02047" @@ -1503,11 +1495,6 @@ resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-5.62.0.tgz#258607e60effa309f067608931c3df6fed41fd2f" integrity sha512-87NVngcbVXUahrRTqIK27gD2t5Cu1yuCXxbLcFtCzZGlfyVWWh8mLHkoxzjsB6DDNnvdL+fW8MiwPEJyGJQDgQ== -"@typescript-eslint/types@7.7.0": - version "7.7.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.7.0.tgz#23af4d24bf9ce15d8d301236e3e3014143604f27" - integrity sha512-G01YPZ1Bd2hn+KPpIbrAhEWOn5lQBrjxkzHkWvP6NucMXFtfXoevK82hzQdpfuQYuhkvFDeQYbzXCjR1z9Z03w== - "@typescript-eslint/types@7.8.0": version "7.8.0" resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.8.0.tgz#1fd2577b3ad883b769546e2d1ef379f929a7091d" @@ -1526,20 +1513,6 @@ semver "^7.3.7" tsutils "^3.21.0" -"@typescript-eslint/typescript-estree@7.7.0": - version "7.7.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-7.7.0.tgz#b5dd6383b4c6a852d7b256a37af971e8982be97f" - integrity sha512-8p71HQPE6CbxIBy2kWHqM1KGrC07pk6RJn40n0DSc6bMOBBREZxSDJ+BmRzc8B5OdaMh1ty3mkuWRg4sCFiDQQ== - dependencies: - "@typescript-eslint/types" "7.7.0" - "@typescript-eslint/visitor-keys" "7.7.0" - debug "^4.3.4" - globby "^11.1.0" - is-glob "^4.0.3" - minimatch "^9.0.4" - semver "^7.6.0" - ts-api-utils "^1.3.0" - "@typescript-eslint/typescript-estree@7.8.0": version "7.8.0" resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-7.8.0.tgz#b028a9226860b66e623c1ee55cc2464b95d2987c" @@ -1589,14 +1562,6 @@ "@typescript-eslint/types" "5.62.0" eslint-visitor-keys "^3.3.0" -"@typescript-eslint/visitor-keys@7.7.0": - version "7.7.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-7.7.0.tgz#950148cf1ac11562a2d903fdf7acf76714a2dc9e" - integrity sha512-h0WHOj8MhdhY8YWkzIF30R379y0NqyOHExI9N9KCzvmu05EgG4FumeYa3ccfKUSphyWkWQE1ybVrgz/Pbam6YA== - dependencies: - "@typescript-eslint/types" "7.7.0" - eslint-visitor-keys "^3.4.3" - "@typescript-eslint/visitor-keys@7.8.0": version "7.8.0" resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-7.8.0.tgz#7285aab991da8bee411a42edbd5db760d22fdd91" From 5959024e9c1094c096dee569c0a5016496b24b58 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 3 May 2024 23:18:13 +0900 Subject: [PATCH 049/261] MINOR: [JS] Bump eslint-plugin-jest from 27.9.0 to 28.4.0 in /js (#41524) Bumps [eslint-plugin-jest](https://github.com/jest-community/eslint-plugin-jest) from 27.9.0 to 28.4.0.
Release notes

Sourced from eslint-plugin-jest's releases.

v28.4.0

28.4.0 (2024-05-03)

Features

  • valid-expect: supporting automatically fixing missing await in some cases (#1574) (a407098)

v28.3.0

28.3.0 (2024-04-27)

Features

  • prefer importing jest globals for specific types (#1568) (c464ae3)

v28.2.0

28.2.0 (2024-04-06)

Features

  • support providing aliases for @ jest/globals package (#1543) (744d4f6)

v28.1.1

28.1.1 (2024-04-06)

Bug Fixes

  • max-expects: properly reset counter when exiting a test case (#1550) (b4b7cbc)

v28.1.0

28.1.0 (2024-04-06)

Features

v28.0.0

28.0.0 (2024-04-06)

Bug Fixes

  • allow ESLint 9 as peer dependency (#1547) (3c5e167)
  • drop support for Node 19 (#1548) (c87e388)
  • no-large-snapshots: avoid instanceof RegExp check for ESLint v9 compatibility (#1542) (af4a9c9)

... (truncated)

Changelog

Sourced from eslint-plugin-jest's changelog.

28.4.0 (2024-05-03)

Features

  • valid-expect: supporting automatically fixing missing await in some cases (#1574) (a407098)

28.3.0 (2024-04-27)

Features

  • prefer importing jest globals for specific types (#1568) (c464ae3)

28.2.0 (2024-04-06)

Features

  • support providing aliases for @ jest/globals package (#1543) (744d4f6)

28.1.1 (2024-04-06)

Bug Fixes

  • max-expects: properly reset counter when exiting a test case (#1550) (b4b7cbc)

28.1.0 (2024-04-06)

Features

28.0.0 (2024-04-06)

Bug Fixes

  • allow ESLint 9 as peer dependency (#1547) (3c5e167)
  • drop support for Node 19 (#1548) (c87e388)
  • no-large-snapshots: avoid instanceof RegExp check for ESLint v9 compatibility (#1542) (af4a9c9)

Features

... (truncated)

Commits
  • 27f7e74 chore(release): 28.4.0 [skip ci]
  • a407098 feat(valid-expect): supporting automatically fixing missing await in some c...
  • f47cc3c refactor: remove unneeded as consts (#1578)
  • 6c1f921 refactor(prefer-lowercase-title): remove unneeded cast (#1577)
  • aac5f03 refactor(prefer-importing-jest-globals): use AST_NODE_TYPES constant instea...
  • df3202f chore(deps): update yarn to v3.8.2 (#1575)
  • 8001fe7 chore(deps): lock file maintenance
  • bd6b918 chore(release): 28.3.0 [skip ci]
  • c464ae3 feat: prefer importing jest globals for specific types (#1568)
  • 2f21f33 refactor(expect-expect): remove unneeded array (#1571)
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=eslint-plugin-jest&package-manager=npm_and_yarn&previous-version=27.9.0&new-version=28.4.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- js/package.json | 2 +- js/yarn.lock | 115 +++++++++++++++++++++++------------------------- 2 files changed, 56 insertions(+), 61 deletions(-) diff --git a/js/package.json b/js/package.json index f6d3c70fca6aa..7ed0daddfada0 100644 --- a/js/package.json +++ b/js/package.json @@ -82,7 +82,7 @@ "esbuild": "0.20.2", "esbuild-plugin-alias": "0.2.1", "eslint": "8.57.0", - "eslint-plugin-jest": "27.9.0", + "eslint-plugin-jest": "28.4.0", "eslint-plugin-unicorn": "52.0.0", "esm": "https://github.com/jsg2021/esm/releases/download/v3.x.x-pr883/esm-3.x.x-pr883.tgz", "gulp": "4.0.2", diff --git a/js/yarn.lock b/js/yarn.lock index 9885be2ba0643..dbf79115d6412 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -1348,7 +1348,7 @@ expect "^29.0.0" pretty-format "^29.0.0" -"@types/json-schema@*", "@types/json-schema@^7.0.15", "@types/json-schema@^7.0.8", "@types/json-schema@^7.0.9": +"@types/json-schema@*", "@types/json-schema@^7.0.12", "@types/json-schema@^7.0.15", "@types/json-schema@^7.0.8": version "7.0.15" resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.15.tgz#596a1747233694d50f6ad8a7869fcb6f56cf5841" integrity sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA== @@ -1399,7 +1399,7 @@ resolved "https://registry.yarnpkg.com/@types/resolve/-/resolve-1.20.2.tgz#97d26e00cd4a0423b4af620abecf3e6f442b7975" integrity sha512-60BCwRFOZCQhDncwQdxxeOEEkbc5dIMccYLwbxsS4TUNeVECQ/pBJ0j09mrHOl/JJvpRPGwO9SvE4nR2Nb/a4Q== -"@types/semver@^7.3.12", "@types/semver@^7.5.8": +"@types/semver@^7.5.0", "@types/semver@^7.5.8": version "7.5.8" resolved "https://registry.yarnpkg.com/@types/semver/-/semver-7.5.8.tgz#8268a8c57a3e4abd25c165ecd36237db7948a55e" integrity sha512-I8EUhyrgfLrcTkzV3TSsGyl1tSuPrEDzr0yd5m90UgNxQkyDXULk3b6MlQqTCpZpNtWe1K0hzclnZkTcLBe2UQ== @@ -1464,13 +1464,13 @@ "@typescript-eslint/visitor-keys" "7.8.0" debug "^4.3.4" -"@typescript-eslint/scope-manager@5.62.0": - version "5.62.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-5.62.0.tgz#d9457ccc6a0b8d6b37d0eb252a23022478c5460c" - integrity sha512-VXuvVvZeQCQb5Zgf4HAxc04q5j+WrNAtNh9OwCsCgpKqESMTu3tF/jhZ3xG6T4NZwWl65Bg8KuS2uEvhSfLl0w== +"@typescript-eslint/scope-manager@6.21.0": + version "6.21.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-6.21.0.tgz#ea8a9bfc8f1504a6ac5d59a6df308d3a0630a2b1" + integrity sha512-OwLUIWZJry80O99zvqXVEioyniJMa+d2GrqpUTqi5/v5D5rOrppJVBPa0yKCblcigC0/aYAzxxqQ1B+DS2RYsg== dependencies: - "@typescript-eslint/types" "5.62.0" - "@typescript-eslint/visitor-keys" "5.62.0" + "@typescript-eslint/types" "6.21.0" + "@typescript-eslint/visitor-keys" "6.21.0" "@typescript-eslint/scope-manager@7.8.0": version "7.8.0" @@ -1490,28 +1490,29 @@ debug "^4.3.4" ts-api-utils "^1.3.0" -"@typescript-eslint/types@5.62.0": - version "5.62.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-5.62.0.tgz#258607e60effa309f067608931c3df6fed41fd2f" - integrity sha512-87NVngcbVXUahrRTqIK27gD2t5Cu1yuCXxbLcFtCzZGlfyVWWh8mLHkoxzjsB6DDNnvdL+fW8MiwPEJyGJQDgQ== +"@typescript-eslint/types@6.21.0": + version "6.21.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-6.21.0.tgz#205724c5123a8fef7ecd195075fa6e85bac3436d" + integrity sha512-1kFmZ1rOm5epu9NZEZm1kckCDGj5UJEf7P1kliH4LKu/RkwpsfqqGmY2OOcUs18lSlQBKLDYBOGxRVtrMN5lpg== "@typescript-eslint/types@7.8.0": version "7.8.0" resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.8.0.tgz#1fd2577b3ad883b769546e2d1ef379f929a7091d" integrity sha512-wf0peJ+ZGlcH+2ZS23aJbOv+ztjeeP8uQ9GgwMJGVLx/Nj9CJt17GWgWWoSmoRVKAX2X+7fzEnAjxdvK2gqCLw== -"@typescript-eslint/typescript-estree@5.62.0": - version "5.62.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-5.62.0.tgz#7d17794b77fabcac615d6a48fb143330d962eb9b" - integrity sha512-CmcQ6uY7b9y694lKdRB8FEel7JbU/40iSAPomu++SjLMntB+2Leay2LO6i8VnJk58MtE9/nQSFIH6jpyRWyYzA== +"@typescript-eslint/typescript-estree@6.21.0": + version "6.21.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-6.21.0.tgz#c47ae7901db3b8bddc3ecd73daff2d0895688c46" + integrity sha512-6npJTkZcO+y2/kr+z0hc4HwNfrrP4kNYh57ek7yCNlrBjWQ1Y0OS7jiZTkgumrvkX5HkEKXFZkkdFNkaW2wmUQ== dependencies: - "@typescript-eslint/types" "5.62.0" - "@typescript-eslint/visitor-keys" "5.62.0" + "@typescript-eslint/types" "6.21.0" + "@typescript-eslint/visitor-keys" "6.21.0" debug "^4.3.4" globby "^11.1.0" is-glob "^4.0.3" - semver "^7.3.7" - tsutils "^3.21.0" + minimatch "9.0.3" + semver "^7.5.4" + ts-api-utils "^1.0.1" "@typescript-eslint/typescript-estree@7.8.0": version "7.8.0" @@ -1540,27 +1541,26 @@ "@typescript-eslint/typescript-estree" "7.8.0" semver "^7.6.0" -"@typescript-eslint/utils@^5.10.0": - version "5.62.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-5.62.0.tgz#141e809c71636e4a75daa39faed2fb5f4b10df86" - integrity sha512-n8oxjeb5aIbPFEtmQxQYOLI0i9n5ySBEY/ZEHHZqKQSFnxio1rv6dthascc9dLuwrL0RC5mPCxB7vnAVGAYWAQ== +"@typescript-eslint/utils@^6.0.0": + version "6.21.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-6.21.0.tgz#4714e7a6b39e773c1c8e97ec587f520840cd8134" + integrity sha512-NfWVaC8HP9T8cbKQxHcsJBY5YE1O33+jpMwN45qzWWaPDZgLIbo12toGMWnmhvCpd3sIxkpDw3Wv1B3dYrbDQQ== dependencies: - "@eslint-community/eslint-utils" "^4.2.0" - "@types/json-schema" "^7.0.9" - "@types/semver" "^7.3.12" - "@typescript-eslint/scope-manager" "5.62.0" - "@typescript-eslint/types" "5.62.0" - "@typescript-eslint/typescript-estree" "5.62.0" - eslint-scope "^5.1.1" - semver "^7.3.7" - -"@typescript-eslint/visitor-keys@5.62.0": - version "5.62.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-5.62.0.tgz#2174011917ce582875954ffe2f6912d5931e353e" - integrity sha512-07ny+LHRzQXepkGg6w0mFY41fVUNBrL2Roj/++7V1txKugfjm/Ci/qSND03r2RhlJhJYMcTn9AhhSSqQp0Ysyw== - dependencies: - "@typescript-eslint/types" "5.62.0" - eslint-visitor-keys "^3.3.0" + "@eslint-community/eslint-utils" "^4.4.0" + "@types/json-schema" "^7.0.12" + "@types/semver" "^7.5.0" + "@typescript-eslint/scope-manager" "6.21.0" + "@typescript-eslint/types" "6.21.0" + "@typescript-eslint/typescript-estree" "6.21.0" + semver "^7.5.4" + +"@typescript-eslint/visitor-keys@6.21.0": + version "6.21.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-6.21.0.tgz#87a99d077aa507e20e238b11d56cc26ade45fe47" + integrity sha512-JJtkDduxLi9bivAB+cYOVMtbkqdPOhZ+ZI5LC47MIRrDV4Yn2o+ZnW10Nkmr28xRpSpdJ6Sm42Hjf2+REYXm0A== + dependencies: + "@typescript-eslint/types" "6.21.0" + eslint-visitor-keys "^3.4.1" "@typescript-eslint/visitor-keys@7.8.0": version "7.8.0" @@ -3050,12 +3050,12 @@ escape-string-regexp@^4.0.0: resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz#14ba83a5d373e3d311e5afca29cf5bfad965bf34" integrity sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA== -eslint-plugin-jest@27.9.0: - version "27.9.0" - resolved "https://registry.yarnpkg.com/eslint-plugin-jest/-/eslint-plugin-jest-27.9.0.tgz#7c98a33605e1d8b8442ace092b60e9919730000b" - integrity sha512-QIT7FH7fNmd9n4se7FFKHbsLKGQiw885Ds6Y/sxKgCZ6natwCsXdgPOADnYVxN2QrRweF0FZWbJ6S7Rsn7llug== +eslint-plugin-jest@28.4.0: + version "28.4.0" + resolved "https://registry.yarnpkg.com/eslint-plugin-jest/-/eslint-plugin-jest-28.4.0.tgz#213be88f799a35ca9d63ce1a30081bb32b8da765" + integrity sha512-ORVHiFPC8RQxHLyQJ37MxNilK9k+cPzjHz65T8gAbpYZunGutXvKqwfM3WXBCvFDF1QBeYJJu9LB/i5cuXBs+g== dependencies: - "@typescript-eslint/utils" "^5.10.0" + "@typescript-eslint/utils" "^6.0.0" eslint-plugin-unicorn@52.0.0: version "52.0.0" @@ -3079,7 +3079,7 @@ eslint-plugin-unicorn@52.0.0: semver "^7.5.4" strip-indent "^3.0.0" -eslint-scope@5.1.1, eslint-scope@^5.1.1: +eslint-scope@5.1.1: version "5.1.1" resolved "https://registry.yarnpkg.com/eslint-scope/-/eslint-scope-5.1.1.tgz#e786e59a66cb92b3f6c1fb0d508aab174848f48c" integrity sha512-2NxwbF/hZ0KpepYN0cNbo+FN6XoK7GaHlQhgx/hIZl6Va0bF45RQOOwhLIy8lQDbuCiadSLCBnH2CFYquit5bw== @@ -5299,6 +5299,13 @@ min-indent@^1.0.0, min-indent@^1.0.1: resolved "https://registry.yarnpkg.com/min-indent/-/min-indent-1.0.1.tgz#a63f681673b30571fbe8bc25686ae746eefa9869" integrity sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg== +minimatch@9.0.3: + version "9.0.3" + resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-9.0.3.tgz#a6e00c3de44c3a542bfaae70abfc22420a6da825" + integrity sha512-RHiac9mvaRw0x3AYRgDC1CxAP7HTcNrrECeA8YYJeWnpo+2Q5CegtZjaotWTWxDG3UeGA1coE05iH1mPjT/2mg== + dependencies: + brace-expansion "^2.0.1" + minimatch@^3.0.4, minimatch@^3.0.5, minimatch@^3.1.1, minimatch@^3.1.2: version "3.1.2" resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.1.2.tgz#19cd194bfd3e428f049a70817c038d89ab4be35b" @@ -6278,7 +6285,7 @@ semver@^6.3.0, semver@^6.3.1: resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.1.tgz#556d2ef8689146e46dcea4bfdd095f3434dffcb4" integrity sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA== -semver@^7.3.4, semver@^7.3.7, semver@^7.5.3, semver@^7.5.4, semver@^7.6.0: +semver@^7.3.4, semver@^7.5.3, semver@^7.5.4, semver@^7.6.0: version "7.6.0" resolved "https://registry.yarnpkg.com/semver/-/semver-7.6.0.tgz#1a46a4db4bffcccd97b743b5005c8325f23d4e2d" integrity sha512-EnwXhrlwXMk9gKu5/flx5sv/an57AkRplG3hTK68W7FRDN+k+OWBj65M7719OkA82XLBxrcX0KSHj+X5COhOVg== @@ -6916,7 +6923,7 @@ trim-newlines@^4.0.2: resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-4.1.1.tgz#28c88deb50ed10c7ba6dc2474421904a00139125" integrity sha512-jRKj0n0jXWo6kh62nA5TEh3+4igKDXLvzBJcPpiizP7oOolUrYIxmVBG9TOtHYFHoddUk6YvAkGeGoSVTXfQXQ== -ts-api-utils@^1.3.0: +ts-api-utils@^1.0.1, ts-api-utils@^1.3.0: version "1.3.0" resolved "https://registry.yarnpkg.com/ts-api-utils/-/ts-api-utils-1.3.0.tgz#4b490e27129f1e8e686b45cc4ab63714dc60eea1" integrity sha512-UQMIo7pb8WRomKR1/+MFVLTroIvDVtMX3K6OUir8ynLyzB8Jeriont2bTAtmNPa1ekAgN7YPDyf6V+ygrdU+eQ== @@ -6954,23 +6961,11 @@ ts-node@10.9.2: v8-compile-cache-lib "^3.0.1" yn "3.1.1" -tslib@^1.8.1: - version "1.14.1" - resolved "https://registry.yarnpkg.com/tslib/-/tslib-1.14.1.tgz#cf2d38bdc34a134bcaf1091c41f6619e2f672d00" - integrity sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg== - tslib@^2.0.0, tslib@^2.1.0, tslib@^2.3.0, tslib@^2.4.0, tslib@^2.6.2: version "2.6.2" resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.6.2.tgz#703ac29425e7b37cd6fd456e92404d46d1f3e4ae" integrity sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q== -tsutils@^3.21.0: - version "3.21.0" - resolved "https://registry.yarnpkg.com/tsutils/-/tsutils-3.21.0.tgz#b48717d394cea6c1e096983eed58e9d61715b623" - integrity sha512-mHKK3iUXL+3UF6xL5k0PEhKRUBKPBCv/+RkEOpjRWxxx27KKRBmmA60A9pgOUvMi8GKhRMPEmjBRPzs2W7O1OA== - dependencies: - tslib "^1.8.1" - type-check@^0.4.0, type-check@~0.4.0: version "0.4.0" resolved "https://registry.yarnpkg.com/type-check/-/type-check-0.4.0.tgz#07b8203bfa7056c0657050e3ccd2c37730bab8f1" From c8bf9753d7423c3020ac66b97caf2099cda35110 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 3 May 2024 23:18:46 +0900 Subject: [PATCH 050/261] MINOR: [JS] Bump @types/node from 20.12.7 to 20.12.8 in /js (#41526) Bumps [@ types/node](https://github.com/DefinitelyTyped/DefinitelyTyped/tree/HEAD/types/node) from 20.12.7 to 20.12.8.
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=@ types/node&package-manager=npm_and_yarn&previous-version=20.12.7&new-version=20.12.8)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- js/yarn.lock | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/js/yarn.lock b/js/yarn.lock index dbf79115d6412..abde21f603382 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -1370,10 +1370,10 @@ dependencies: "@types/node" "*" -"@types/node@*": - version "20.12.3" - resolved "https://registry.yarnpkg.com/@types/node/-/node-20.12.3.tgz#d6658c2c7776c1cad93534bb45428195ed840c65" - integrity sha512-sD+ia2ubTeWrOu+YMF+MTAB7E+O7qsMqAbMfW7DG3K1URwhZ5hN1pLlRVGbf4wDFzSfikL05M17EyorS86jShw== +"@types/node@*", "@types/node@^20.12.7": + version "20.12.8" + resolved "https://registry.yarnpkg.com/@types/node/-/node-20.12.8.tgz#35897bf2bfe3469847ab04634636de09552e8256" + integrity sha512-NU0rJLJnshZWdE/097cdCBbyW1h4hEg0xpovcoAQYHl8dnEyp/NAOiE45pvc+Bd1Dt+2r94v2eGFpQJ4R7g+2w== dependencies: undici-types "~5.26.4" @@ -1382,13 +1382,6 @@ resolved "https://registry.yarnpkg.com/@types/node/-/node-13.13.52.tgz#03c13be70b9031baaed79481c0c0cfb0045e53f7" integrity sha512-s3nugnZumCC//n4moGGe6tkNMyYEdaDBitVjwPxXmR5lnMG5dHePinH2EdxkG3Rh1ghFHHixAG4NJhpJW1rthQ== -"@types/node@^20.12.7": - version "20.12.7" - resolved "https://registry.yarnpkg.com/@types/node/-/node-20.12.7.tgz#04080362fa3dd6c5822061aa3124f5c152cff384" - integrity sha512-wq0cICSkRLVaf3UGLMGItu/PtdY7oaXaI/RVU+xliKVOtRna3PRY57ZDfztpDL0n11vfymMUnXv8QwYCO7L1wg== - dependencies: - undici-types "~5.26.4" - "@types/normalize-package-data@^2.4.0": version "2.4.4" resolved "https://registry.yarnpkg.com/@types/normalize-package-data/-/normalize-package-data-2.4.4.tgz#56e2cc26c397c038fab0e3a917a12d5c5909e901" From cc9e65fb80db2e2d706b1776c52a88bb1c983533 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 3 May 2024 07:58:31 -0700 Subject: [PATCH 051/261] MINOR: [C#] Bump Grpc.Tools from 2.62.0 to 2.63.0 in /csharp (#41523) Bumps [Grpc.Tools](https://github.com/grpc/grpc) from 2.62.0 to 2.63.0.
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Grpc.Tools&package-manager=nuget&previous-version=2.62.0&new-version=2.63.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Curt Hagenlocher --- .../src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj | 2 +- csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj index 9a3cf190cc376..7314b8207fef6 100644 --- a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj +++ b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj @@ -5,7 +5,7 @@ - + diff --git a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj index 04b8a7dc734f0..780da3ad39081 100644 --- a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj +++ b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj @@ -7,7 +7,7 @@ - + From 56437409d1f99852a6b9486b1620c3ed12ff3e5c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 3 May 2024 12:36:02 -0400 Subject: [PATCH 052/261] MINOR: [Go] Bump github.com/hamba/avro/v2 from 2.20.1 to 2.21.1 in /go (#41512) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [github.com/hamba/avro/v2](https://github.com/hamba/avro) from 2.20.1 to 2.21.1.
Release notes

Sourced from github.com/hamba/avro/v2's releases.

v2.21.1

What's Changed

New Contributors

Full Changelog: https://github.com/hamba/avro/compare/v2.21.0...v2.21.1

v2.21.0

What's Changed

New Contributors

Full Changelog: https://github.com/hamba/avro/compare/v2.20.1...v2.21.0

Commits
  • 5dde47b fix: support 32bit builds (#390)
  • ad836ba chore: bump golangci/golangci-lint-action from 4 to 5 in the all group (#388)
  • e42dea1 fix: Union Decoder uses readInt (#387)
  • 2461d45 fix: reader int/long setting head > tail (#385)
  • 84f9b10 fix: readByte returns errors on unexpected EOF (#383)
  • 141e857 fix: reader returns errors on unexpected EOF (#382)
  • f138d7f fix: handle short read errors on arrays and maps (#379)
  • b43fe48 feat: add max slice alloc size config (#376)
  • 0b21284 Check for max allocation (#374)
  • 7a2eb5f feat: support slices for nullable unions (#372)
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/hamba/avro/v2&package-manager=go_modules&previous-version=2.20.1&new-version=2.21.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Matt Topol --- go/go.mod | 2 +- go/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go/go.mod b/go/go.mod index 79c3cc3981231..547127a470a61 100644 --- a/go/go.mod +++ b/go/go.mod @@ -47,7 +47,7 @@ require ( require ( github.com/google/uuid v1.6.0 - github.com/hamba/avro/v2 v2.20.1 + github.com/hamba/avro/v2 v2.21.1 github.com/substrait-io/substrait-go v0.4.2 github.com/tidwall/sjson v1.2.5 ) diff --git a/go/go.sum b/go/go.sum index e8c2fde15181a..29490b9da2ca3 100644 --- a/go/go.sum +++ b/go/go.sum @@ -43,8 +43,8 @@ github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbu github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26/go.mod h1:dDKJzRmX4S37WGHujM7tX//fmj1uioxKzKxz3lo4HJo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/hamba/avro/v2 v2.20.1 h1:3WByQiVn7wT7d27WQq6pvBRC00FVOrniP6u67FLA/2E= -github.com/hamba/avro/v2 v2.20.1/go.mod h1:xHiKXbISpb3Ovc809XdzWow+XGTn+Oyf/F9aZbTLAig= +github.com/hamba/avro/v2 v2.21.1 h1:400/jTdLWQ3ib58y83VXlTJKijRouYQszY1SO0cMGt4= +github.com/hamba/avro/v2 v2.21.1/go.mod h1:ouJ4PkiAEP49u0lAtQyd5Gv04MehKj+7lXwD3zpLpY0= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= From 2b0647230536ffb2fd2d59af11acdb4674ed44c3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 3 May 2024 12:36:25 -0400 Subject: [PATCH 053/261] MINOR: [Go] Bump google.golang.org/protobuf from 1.33.0 to 1.34.0 in /go (#41513) Bumps google.golang.org/protobuf from 1.33.0 to 1.34.0. [![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=google.golang.org/protobuf&package-manager=go_modules&previous-version=1.33.0&new-version=1.34.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Matt Topol --- go/go.mod | 2 +- go/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go/go.mod b/go/go.mod index 547127a470a61..35fd9b9915c0b 100644 --- a/go/go.mod +++ b/go/go.mod @@ -41,7 +41,7 @@ require ( golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 gonum.org/v1/gonum v0.15.0 google.golang.org/grpc v1.63.2 - google.golang.org/protobuf v1.33.0 + google.golang.org/protobuf v1.34.0 modernc.org/sqlite v1.29.6 ) diff --git a/go/go.sum b/go/go.sum index 29490b9da2ca3..bf33fed6c4c97 100644 --- a/go/go.sum +++ b/go/go.sum @@ -138,8 +138,8 @@ google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de h1: google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de/go.mod h1:H4O17MA/PE9BsGx3w+a+W2VOLLD1Qf7oJneAoU6WktY= google.golang.org/grpc v1.63.2 h1:MUeiw1B2maTVZthpU5xvASfTh3LDbxHd6IJ6QQVU+xM= google.golang.org/grpc v1.63.2/go.mod h1:WAX/8DgncnokcFUldAxq7GeB5DXHDbMF+lLvDomNkRA= -google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= -google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +google.golang.org/protobuf v1.34.0 h1:Qo/qEd2RZPCf2nKuorzksSknv0d3ERwp1vFG38gSmH4= +google.golang.org/protobuf v1.34.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= From 493d456d8c6eff21659dc87a2dee32abd0be5ffd Mon Sep 17 00:00:00 2001 From: Sarah Gilmore <74676073+sgilmore10@users.noreply.github.com> Date: Fri, 3 May 2024 14:43:51 -0400 Subject: [PATCH 054/261] GH-41507: [MATLAB][CI] Pass `strict: true` to `matlab-actions/run-tests@v2` (#41530) ### Rationale for this change The MATLAB CI jobs should fail if any one of the unit tests issues a `warning`. Currently, the MATLAB CI jobs only fail if there is a verification failure. Passing the argument `strict: true` to `matlab-actions/run-tests@ v2` will ensure MATLAB jobs will fail if a test warning is issued. See the [`matlab-actions/run-tests@ v2` documentation](https://github.com/matlab-actions/run-tests/?tab=readme-ov-file#run-matlab-tests) for more details. ### What changes are included in this PR? 1. Pass `strict: true` argument to `matlab-actions/setup-matlab@ v2` ### Are these changes tested? N/A (relying on existing tests). ### Are there any user-facing changes? No. * GitHub Issue: #41507 Authored-by: Sarah Gilmore Signed-off-by: Sarah Gilmore --- .github/workflows/matlab.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/matlab.yml b/.github/workflows/matlab.yml index 8a0de8a365661..2ae33d1e8d6c6 100644 --- a/.github/workflows/matlab.yml +++ b/.github/workflows/matlab.yml @@ -96,6 +96,7 @@ jobs: uses: matlab-actions/run-tests@v2 with: select-by-folder: matlab/test + strict: true macos: name: AMD64 macOS 12 MATLAB runs-on: macos-12 @@ -135,7 +136,8 @@ jobs: MATLABPATH: matlab/install/arrow_matlab uses: matlab-actions/run-tests@v2 with: - select-by-folder: matlab/test + select-by-folder: matlab/test + strict: true windows: name: AMD64 Windows 2022 MATLAB runs-on: windows-2022 @@ -181,4 +183,5 @@ jobs: MATLABPATH: matlab/install/arrow_matlab uses: matlab-actions/run-tests@v2 with: - select-by-folder: matlab/test + select-by-folder: matlab/test + strict: true From e7f5f810ac1235ee835a6ec62fb25d2f05e5d542 Mon Sep 17 00:00:00 2001 From: Sarah Gilmore <74676073+sgilmore10@users.noreply.github.com> Date: Fri, 3 May 2024 14:46:01 -0400 Subject: [PATCH 055/261] GH-41531: [MATLAB][Packaging] Bump `matlab-actions/setup-matlab` and `matlab-actions/run-command` from `v1` to `v2` in the `crossbow` job (#41532) ### Rationale for this change I noticed `dev/tasks/matlab/github.yml` is still using `matlab-actions/setup-matlab@ v1`, which is no longer supported. See this [log](https://github.com/ursacomputing/crossbow/actions/runs/8928507510/job/24524230971#step:4:11) file. We should use `matlab-actions/setup-matlab@ v2` instead. We should also use `matlab-actions/run-command@ v2` instead of `matlab-actions/run-command@ v1`. ### What changes are included in this PR? 1. Replaced `matlab-actions/setup-matlab@ v1` with `matlab-actions/setup-matlab@ v2` in `dev/tasks/matlab/github.yml` 2. Replaced `matlab-actions/run-command@ v1` with `matlab-actions/run-command@ v2` in `dev/tasks/matlab/github.yml` ### Are these changes tested? N/A (I will trigger a crossbow job to verify the packaging workflow works as expected still). ### Are there any user-facing changes? No. * GitHub Issue: #41531 Authored-by: Sarah Gilmore Signed-off-by: Sarah Gilmore --- dev/tasks/matlab/github.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/dev/tasks/matlab/github.yml b/dev/tasks/matlab/github.yml index 7840fd176705c..963c85f6e11bf 100644 --- a/dev/tasks/matlab/github.yml +++ b/dev/tasks/matlab/github.yml @@ -29,7 +29,7 @@ jobs: - name: Install ninja-build run: sudo apt-get update && sudo apt-get install ninja-build - name: Install MATLAB - uses: matlab-actions/setup-matlab@v1 + uses: matlab-actions/setup-matlab@v2 with: release: R2024a - name: Build MATLAB Interface @@ -66,7 +66,7 @@ jobs: - name: Install ninja-build run: brew install ninja - name: Install MATLAB - uses: matlab-actions/setup-matlab@v1 + uses: matlab-actions/setup-matlab@v2 with: release: R2024a - name: Build MATLAB Interface @@ -101,7 +101,7 @@ jobs: steps: {{ macros.github_checkout_arrow()|indent }} - name: Install MATLAB - uses: matlab-actions/setup-matlab@v1 + uses: matlab-actions/setup-matlab@v2 with: release: R2024a - name: Install sccache @@ -147,7 +147,7 @@ jobs: cp arrow/LICENSE.txt arrow/matlab/install/arrow_matlab/LICENSE.txt cp arrow/NOTICE.txt arrow/matlab/install/arrow_matlab/NOTICE.txt - name: Install MATLAB - uses: matlab-actions/setup-matlab@v1 + uses: matlab-actions/setup-matlab@v2 with: release: R2024a - name: Run commands @@ -156,7 +156,7 @@ jobs: ARROW_MATLAB_TOOLBOX_FOLDER: arrow/matlab/install/arrow_matlab ARROW_MATLAB_TOOLBOX_OUTPUT_FOLDER: artifacts/matlab-dist ARROW_MATLAB_TOOLBOX_VERSION: {{ arrow.no_rc_version }} - uses: matlab-actions/run-command@v1 + uses: matlab-actions/run-command@v2 with: command: packageMatlabInterface {{ macros.github_upload_releases(["artifacts/matlab-dist/*.mltbx"])|indent }} From 7cd9c6fbd313c9afa0673d85415a84fd199951c9 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Fri, 3 May 2024 19:17:47 -0400 Subject: [PATCH 056/261] GH-41534: [Go] Fix mem leak importing 0 length C Array (#41535) ### What changes are included in this PR? If the `imp.alloc.bufCount` is 0, indicating we did not import any buffers from the provided C ArrowArray object, then we are free to not only call the release callback (which we already do) but also we need to free the temp ArrowArray we allocated to move the source to. This was uncovered by https://github.com/apache/arrow-adbc/pull/1808 * GitHub Issue: #41534 Authored-by: Matt Topol Signed-off-by: David Li --- go/arrow/cdata/cdata.go | 1 + 1 file changed, 1 insertion(+) diff --git a/go/arrow/cdata/cdata.go b/go/arrow/cdata/cdata.go index b86898277bf47..00d1f351eaf11 100644 --- a/go/arrow/cdata/cdata.go +++ b/go/arrow/cdata/cdata.go @@ -448,6 +448,7 @@ func (imp *cimporter) doImportArr(src *CArrowArray) error { defer func() { if imp.alloc.bufCount == 0 { C.ArrowArrayRelease(imp.arr) + C.free(unsafe.Pointer(imp.arr)) } }() From 2ba129f0a28e0ec93c7f383a64c54bef24e26f59 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 6 May 2024 06:56:08 +0900 Subject: [PATCH 057/261] MINOR: [JS] Bump @swc/helpers from 0.5.10 to 0.5.11 in /js (#41486) Bumps [@ swc/helpers](https://github.com/swc-project/swc) from 0.5.10 to 0.5.11.
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=@ swc/helpers&package-manager=npm_and_yarn&previous-version=0.5.10&new-version=0.5.11)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- js/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/js/yarn.lock b/js/yarn.lock index abde21f603382..eb7ed33520f0a 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -1199,9 +1199,9 @@ integrity sha512-e2BR4lsJkkRlKZ/qCHPw9ZaSxc0MVUd7gtbtaB7aMvHeJVYe8sOB8DBZkP2DtISHGSku9sCK6T6cnY0CtXrOCQ== "@swc/helpers@^0.5.10": - version "0.5.10" - resolved "https://registry.yarnpkg.com/@swc/helpers/-/helpers-0.5.10.tgz#5720082d007197cd85743dd599198097126a3f6e" - integrity sha512-CU+RF9FySljn7HVSkkjiB84hWkvTaI3rtLvF433+jRSBL2hMu3zX5bGhHS8C80SM++h4xy8hBSnUHFQHmRXSBw== + version "0.5.11" + resolved "https://registry.yarnpkg.com/@swc/helpers/-/helpers-0.5.11.tgz#5bab8c660a6e23c13b2d23fcd1ee44a2db1b0cb7" + integrity sha512-YNlnKRWF2sVojTpIyzwou9XoTNbzbzONwRhOoniEioF1AtaitTvVZblaQRrAzChWQ1bLYyYSWzM18y4WwgzJ+A== dependencies: tslib "^2.4.0" From 4cf44b4bc3ab053b03c937d3327d43c105790462 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?JB=20Onofr=C3=A9?= Date: Mon, 6 May 2024 05:39:34 +0200 Subject: [PATCH 058/261] MINOR: add jbonofre in collaborators list (#41528) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change Add `jbonofre` to the ASF collaborators list. ### What changes are included in this PR? Update `.asf.yaml`. ### Are these changes tested? NA ### Are there any user-facing changes? No Authored-by: JB Onofré Signed-off-by: Jacob Wujciak-Jens --- .asf.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.asf.yaml b/.asf.yaml index 40b961dc6e885..760a830ef98c7 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -23,6 +23,7 @@ github: - benibus - danepitkin - davisusanibar + - jbonofre - js8544 - vibhatha From d10ebf055a393c94a693097db1dca08ff86745bd Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Mon, 6 May 2024 09:28:22 -0400 Subject: [PATCH 059/261] MINOR: [R] fix no visible global function definition: left_join (#41542) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change Followup to #41350, fixes a check NOTE that caused. ### What changes are included in this PR? `dplyr::` in two places. ### Are these changes tested? Check will be clean. ### Are there any user-facing changes? 🙅 --- r/R/dplyr-mutate.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/R/dplyr-mutate.R b/r/R/dplyr-mutate.R index 880f7799e6316..72882b6afd964 100644 --- a/r/R/dplyr-mutate.R +++ b/r/R/dplyr-mutate.R @@ -84,12 +84,12 @@ mutate.arrow_dplyr_query <- function(.data, agg_query$aggregations <- mask$.aggregations agg_query <- collapse.arrow_dplyr_query(agg_query) if (length(grv)) { - out <- left_join(out, agg_query, by = grv) + out <- dplyr::left_join(out, agg_query, by = grv) } else { # If there are no group_by vars, add a scalar column to both and join on that agg_query$selected_columns[["..tempjoin"]] <- Expression$scalar(1L) out$selected_columns[["..tempjoin"]] <- Expression$scalar(1L) - out <- left_join(out, agg_query, by = "..tempjoin") + out <- dplyr::left_join(out, agg_query, by = "..tempjoin") } } From 3a54e68c69b028afe6d5fbb58eb0c4520dca1308 Mon Sep 17 00:00:00 2001 From: mwish Date: Tue, 7 May 2024 04:40:51 +0800 Subject: [PATCH 060/261] MINOR: [Dev] Add zanmato1984 and ZhangHuiGui in collaborators list (#41544) ### Rationale for this change Recently zanmato1984 and ZhangHuiGui is active on arrow-compute and arrow-acero module, which lacks maintainer. The contributions can be seem below: * https://github.com/apache/arrow/commits?author=zanmato1984 * https://github.com/apache/arrow/commits?author=ZhangHuiGui I promote them as collaborators ### What changes are included in this PR? Changes `.asf.yaml` ### Are these changes tested? No ### Are there any user-facing changes? No Authored-by: mwish Signed-off-by: Sutou Kouhei --- .asf.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.asf.yaml b/.asf.yaml index 760a830ef98c7..1eb019fea9af1 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -26,6 +26,8 @@ github: - jbonofre - js8544 - vibhatha + - zanmato1984 + - ZhangHuiGui notifications: commits: commits@arrow.apache.org From 52321377cc9fbcb8678577f10232aea984a235f5 Mon Sep 17 00:00:00 2001 From: ZhangHuiGui <106943008+ZhangHuiGui@users.noreply.github.com> Date: Tue, 7 May 2024 05:13:44 -0400 Subject: [PATCH 061/261] GH-40997: [C++] Get null_bit_id according to are_cols_in_encoding_order in NullUpdateColumnToRow_avx2 (#40998) ### Rationale for this change Recently, we find that the compare internal's avx2 function NullUpdateColumnToRowImp_avx2 lost the are_cols_in_encoding_order check when get null_bit_id. It may cause grouper's compare result wrong(are_cols_in_encoding_order = true in grouper). ### What changes are included in this PR? Get `null_bit_id` according to `are_cols_in_encoding_order` in NullUpdateColumnToRow_avx2. ### Are there any user-facing changes? No Co-authored-by laotan332 Co-authored-by ZhangHuiGui <2689496754@ qq.com> * GitHub Issue: #40997 Lead-authored-by: ZhangHuiGui Co-authored-by: ZhangHuiGui <2689496754@qq.com> Signed-off-by: Antoine Pitrou --- cpp/src/arrow/compute/CMakeLists.txt | 3 +- cpp/src/arrow/compute/row/compare_internal.cc | 41 +++++------ cpp/src/arrow/compute/row/compare_internal.h | 25 ++++--- .../compute/row/compare_internal_avx2.cc | 20 +++--- cpp/src/arrow/compute/row/grouper_test.cc | 68 +++++++++++++++++++ cpp/src/arrow/compute/row/row_internal.cc | 3 +- 6 files changed, 116 insertions(+), 44 deletions(-) create mode 100644 cpp/src/arrow/compute/row/grouper_test.cc diff --git a/cpp/src/arrow/compute/CMakeLists.txt b/cpp/src/arrow/compute/CMakeLists.txt index badcf4f2f26ac..fb778be113029 100644 --- a/cpp/src/arrow/compute/CMakeLists.txt +++ b/cpp/src/arrow/compute/CMakeLists.txt @@ -90,7 +90,8 @@ add_arrow_test(internals_test light_array_test.cc registry_test.cc key_hash_test.cc - row/compare_test.cc) + row/compare_test.cc + row/grouper_test.cc) add_arrow_compute_test(expression_test SOURCES expression_test.cc) diff --git a/cpp/src/arrow/compute/row/compare_internal.cc b/cpp/src/arrow/compute/row/compare_internal.cc index 078a8287c71c0..98aea9011266c 100644 --- a/cpp/src/arrow/compute/row/compare_internal.cc +++ b/cpp/src/arrow/compute/row/compare_internal.cc @@ -36,22 +36,22 @@ void KeyCompare::NullUpdateColumnToRow(uint32_t id_col, uint32_t num_rows_to_com const uint32_t* left_to_right_map, LightContext* ctx, const KeyColumnArray& col, const RowTableImpl& rows, - uint8_t* match_bytevector, - bool are_cols_in_encoding_order) { + bool are_cols_in_encoding_order, + uint8_t* match_bytevector) { if (!rows.has_any_nulls(ctx) && !col.data(0)) { return; } uint32_t num_processed = 0; #if defined(ARROW_HAVE_RUNTIME_AVX2) if (ctx->has_avx2()) { - num_processed = NullUpdateColumnToRow_avx2(use_selection, id_col, num_rows_to_compare, - sel_left_maybe_null, left_to_right_map, - ctx, col, rows, match_bytevector); + num_processed = NullUpdateColumnToRow_avx2( + use_selection, id_col, num_rows_to_compare, sel_left_maybe_null, + left_to_right_map, ctx, col, rows, are_cols_in_encoding_order, match_bytevector); } #endif - uint32_t null_bit_id = - are_cols_in_encoding_order ? id_col : rows.metadata().pos_after_encoding(id_col); + const uint32_t null_bit_id = + ColIdInEncodingOrder(rows, id_col, are_cols_in_encoding_order); if (!col.data(0)) { // Remove rows from the result for which the column value is a null @@ -363,10 +363,9 @@ void KeyCompare::CompareColumnsToRows( continue; } - uint32_t offset_within_row = rows.metadata().encoded_field_offset( - are_cols_in_encoding_order - ? static_cast(icol) - : rows.metadata().pos_after_encoding(static_cast(icol))); + uint32_t offset_within_row = + rows.metadata().encoded_field_offset(ColIdInEncodingOrder( + rows, static_cast(icol), are_cols_in_encoding_order)); if (col.metadata().is_fixed_length) { if (sel_left_maybe_null) { CompareBinaryColumnToRow( @@ -375,9 +374,8 @@ void KeyCompare::CompareColumnsToRows( is_first_column ? match_bytevector_A : match_bytevector_B); NullUpdateColumnToRow( static_cast(icol), num_rows_to_compare, sel_left_maybe_null, - left_to_right_map, ctx, col, rows, - is_first_column ? match_bytevector_A : match_bytevector_B, - are_cols_in_encoding_order); + left_to_right_map, ctx, col, rows, are_cols_in_encoding_order, + is_first_column ? match_bytevector_A : match_bytevector_B); } else { // Version without using selection vector CompareBinaryColumnToRow( @@ -386,9 +384,8 @@ void KeyCompare::CompareColumnsToRows( is_first_column ? match_bytevector_A : match_bytevector_B); NullUpdateColumnToRow( static_cast(icol), num_rows_to_compare, sel_left_maybe_null, - left_to_right_map, ctx, col, rows, - is_first_column ? match_bytevector_A : match_bytevector_B, - are_cols_in_encoding_order); + left_to_right_map, ctx, col, rows, are_cols_in_encoding_order, + is_first_column ? match_bytevector_A : match_bytevector_B); } if (!is_first_column) { AndByteVectors(ctx, num_rows_to_compare, match_bytevector_A, match_bytevector_B); @@ -414,9 +411,8 @@ void KeyCompare::CompareColumnsToRows( } NullUpdateColumnToRow( static_cast(icol), num_rows_to_compare, sel_left_maybe_null, - left_to_right_map, ctx, col, rows, - is_first_column ? match_bytevector_A : match_bytevector_B, - are_cols_in_encoding_order); + left_to_right_map, ctx, col, rows, are_cols_in_encoding_order, + is_first_column ? match_bytevector_A : match_bytevector_B); } else { if (ivarbinary == 0) { CompareVarBinaryColumnToRow( @@ -429,9 +425,8 @@ void KeyCompare::CompareColumnsToRows( } NullUpdateColumnToRow( static_cast(icol), num_rows_to_compare, sel_left_maybe_null, - left_to_right_map, ctx, col, rows, - is_first_column ? match_bytevector_A : match_bytevector_B, - are_cols_in_encoding_order); + left_to_right_map, ctx, col, rows, are_cols_in_encoding_order, + is_first_column ? match_bytevector_A : match_bytevector_B); } if (!is_first_column) { AndByteVectors(ctx, num_rows_to_compare, match_bytevector_A, match_bytevector_B); diff --git a/cpp/src/arrow/compute/row/compare_internal.h b/cpp/src/arrow/compute/row/compare_internal.h index b039ca97ff978..16002ee5184e9 100644 --- a/cpp/src/arrow/compute/row/compare_internal.h +++ b/cpp/src/arrow/compute/row/compare_internal.h @@ -43,13 +43,19 @@ class ARROW_EXPORT KeyCompare { uint8_t* out_match_bitvector_maybe_null = NULLPTR); private: + static uint32_t ColIdInEncodingOrder(const RowTableImpl& rows, uint32_t id_col, + bool are_cols_in_encoding_order) { + return are_cols_in_encoding_order ? id_col + : rows.metadata().pos_after_encoding(id_col); + } + template static void NullUpdateColumnToRow(uint32_t id_col, uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map, LightContext* ctx, const KeyColumnArray& col, const RowTableImpl& rows, - uint8_t* match_bytevector, - bool are_cols_in_encoding_order); + bool are_cols_in_encoding_order, + uint8_t* match_bytevector); template static void CompareBinaryColumnToRowHelper( @@ -92,7 +98,8 @@ class ARROW_EXPORT KeyCompare { static uint32_t NullUpdateColumnToRowImp_avx2( uint32_t id_col, uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map, LightContext* ctx, const KeyColumnArray& col, - const RowTableImpl& rows, uint8_t* match_bytevector); + const RowTableImpl& rows, bool are_cols_in_encoding_order, + uint8_t* match_bytevector); template static uint32_t CompareBinaryColumnToRowHelper_avx2( @@ -118,13 +125,11 @@ class ARROW_EXPORT KeyCompare { static uint32_t AndByteVectors_avx2(uint32_t num_elements, uint8_t* bytevector_A, const uint8_t* bytevector_B); - static uint32_t NullUpdateColumnToRow_avx2(bool use_selection, uint32_t id_col, - uint32_t num_rows_to_compare, - const uint16_t* sel_left_maybe_null, - const uint32_t* left_to_right_map, - LightContext* ctx, const KeyColumnArray& col, - const RowTableImpl& rows, - uint8_t* match_bytevector); + static uint32_t NullUpdateColumnToRow_avx2( + bool use_selection, uint32_t id_col, uint32_t num_rows_to_compare, + const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map, + LightContext* ctx, const KeyColumnArray& col, const RowTableImpl& rows, + bool are_cols_in_encoding_order, uint8_t* match_bytevector); static uint32_t CompareBinaryColumnToRow_avx2( bool use_selection, uint32_t offset_within_row, uint32_t num_rows_to_compare, diff --git a/cpp/src/arrow/compute/row/compare_internal_avx2.cc b/cpp/src/arrow/compute/row/compare_internal_avx2.cc index ff407c51b83cb..18f656a2e458d 100644 --- a/cpp/src/arrow/compute/row/compare_internal_avx2.cc +++ b/cpp/src/arrow/compute/row/compare_internal_avx2.cc @@ -39,12 +39,14 @@ template uint32_t KeyCompare::NullUpdateColumnToRowImp_avx2( uint32_t id_col, uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map, LightContext* ctx, const KeyColumnArray& col, - const RowTableImpl& rows, uint8_t* match_bytevector) { + const RowTableImpl& rows, bool are_cols_in_encoding_order, + uint8_t* match_bytevector) { if (!rows.has_any_nulls(ctx) && !col.data(0)) { return num_rows_to_compare; } - uint32_t null_bit_id = rows.metadata().pos_after_encoding(id_col); + const uint32_t null_bit_id = + ColIdInEncodingOrder(rows, id_col, are_cols_in_encoding_order); if (!col.data(0)) { // Remove rows from the result for which the column value is a null @@ -569,7 +571,7 @@ uint32_t KeyCompare::NullUpdateColumnToRow_avx2( bool use_selection, uint32_t id_col, uint32_t num_rows_to_compare, const uint16_t* sel_left_maybe_null, const uint32_t* left_to_right_map, LightContext* ctx, const KeyColumnArray& col, const RowTableImpl& rows, - uint8_t* match_bytevector) { + bool are_cols_in_encoding_order, uint8_t* match_bytevector) { int64_t num_rows_safe = TailSkipForSIMD::FixBitAccess(sizeof(uint32_t), col.length(), col.bit_offset(0)); if (sel_left_maybe_null) { @@ -580,13 +582,13 @@ uint32_t KeyCompare::NullUpdateColumnToRow_avx2( } if (use_selection) { - return NullUpdateColumnToRowImp_avx2(id_col, num_rows_to_compare, - sel_left_maybe_null, left_to_right_map, - ctx, col, rows, match_bytevector); + return NullUpdateColumnToRowImp_avx2( + id_col, num_rows_to_compare, sel_left_maybe_null, left_to_right_map, ctx, col, + rows, are_cols_in_encoding_order, match_bytevector); } else { - return NullUpdateColumnToRowImp_avx2(id_col, num_rows_to_compare, - sel_left_maybe_null, left_to_right_map, - ctx, col, rows, match_bytevector); + return NullUpdateColumnToRowImp_avx2( + id_col, num_rows_to_compare, sel_left_maybe_null, left_to_right_map, ctx, col, + rows, are_cols_in_encoding_order, match_bytevector); } } diff --git a/cpp/src/arrow/compute/row/grouper_test.cc b/cpp/src/arrow/compute/row/grouper_test.cc new file mode 100644 index 0000000000000..1e853be5e4af7 --- /dev/null +++ b/cpp/src/arrow/compute/row/grouper_test.cc @@ -0,0 +1,68 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "arrow/compute/exec.h" +#include "arrow/compute/row/grouper.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/testing/random.h" + +namespace arrow { +namespace compute { + +// Specialized case for GH-40997 +TEST(Grouper, ResortedColumnsWithLargeNullRows) { + const uint64_t num_rows = 1024; + + // construct random array with plenty of null values + const int32_t kSeed = 42; + const int32_t min = 0; + const int32_t max = 100; + const double null_probability = 0.3; + const double true_probability = 0.5; + auto rng = random::RandomArrayGenerator(kSeed); + auto b_arr = rng.Boolean(num_rows, true_probability, null_probability); + auto i32_arr = rng.Int32(num_rows, min, max, null_probability); + auto i64_arr = rng.Int64(num_rows, min, max * 10, null_probability); + + // construct batches with columns which will be resorted in the grouper make + std::vector exec_batches = {ExecBatch({i64_arr, i32_arr, b_arr}, num_rows), + ExecBatch({i32_arr, i64_arr, b_arr}, num_rows), + ExecBatch({i64_arr, b_arr, i32_arr}, num_rows), + ExecBatch({i32_arr, b_arr, i64_arr}, num_rows), + ExecBatch({b_arr, i32_arr, i64_arr}, num_rows), + ExecBatch({b_arr, i64_arr, i32_arr}, num_rows)}; + + const int num_batches = static_cast(exec_batches.size()); + std::vector group_num_vec; + group_num_vec.reserve(num_batches); + + for (const auto& exec_batch : exec_batches) { + ExecSpan span(exec_batch); + ASSERT_OK_AND_ASSIGN(auto grouper, Grouper::Make(span.GetTypes())); + ASSERT_OK_AND_ASSIGN(Datum group_ids, grouper->Consume(span)); + group_num_vec.emplace_back(grouper->num_groups()); + } + + for (int i = 1; i < num_batches; i++) { + ASSERT_EQ(group_num_vec[i - 1], group_num_vec[i]); + } +} + +} // namespace compute +} // namespace arrow diff --git a/cpp/src/arrow/compute/row/row_internal.cc b/cpp/src/arrow/compute/row/row_internal.cc index f6a62c09fcf24..469205e9b008d 100644 --- a/cpp/src/arrow/compute/row/row_internal.cc +++ b/cpp/src/arrow/compute/row/row_internal.cc @@ -66,7 +66,8 @@ void RowTableMetadata::FromColumnMetadataVector( // // Columns are sorted based on the size in bytes of their fixed-length part. // For the varying-length column, the fixed-length part is the 32-bit field storing - // cumulative length of varying-length fields. + // cumulative length of varying-length fields. This is to make the memory access of + // each individual column within the encoded row alignment-friendly. // // The rules are: // From b719408f4abd9921f35935fe0de771f3b856efd1 Mon Sep 17 00:00:00 2001 From: hemidark <67875833+hemidark@users.noreply.github.com> Date: Tue, 7 May 2024 02:44:48 -0700 Subject: [PATCH 062/261] GH-40560: [Python] RunEndEncodedArray.from_arrays: bugfix for Array arguments (#40560) (#41093) ### Rationale for this change The documentation suggests that `RunEndEncodedArray.from_arrays` takes two `Array` parameters, as would be expected of a `from_arrays` method. However, if given an `Array` instance for the `run_ends` parameter, it errors because `Array.__getitem__` returns a pyarrow scalar instead of a native Python integer. ### What changes are included in this PR? * Handle `Array` parameters for `run_ends` by unconditionally coercing the logical length to a pyarrow scalar, then to a Python native value. ### Are these change tested? Yes. Augmented the existing unit tests to test with `Array` as well as Python lists, and check that the data types of the `Array` instances correctly carry over to the data type of the `RunEndEncodedArray`. ### Are there any user-facing changes? Not apart from the bugfix; this was the minimum necessary change to make `Array` parameters work. `RunEndEncodedArray.from_arrays` continues to support e.g. python lists as before. * GitHub Issue: #40560 Authored-by: Hemidark Signed-off-by: Joris Van den Bossche --- python/pyarrow/array.pxi | 2 +- python/pyarrow/tests/test_array.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 6a11b19ffcdf5..946c82b258241 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -3984,7 +3984,7 @@ cdef class RunEndEncodedArray(Array): ------- RunEndEncodedArray """ - logical_length = run_ends[-1] if len(run_ends) > 0 else 0 + logical_length = scalar(run_ends[-1]).as_py() if len(run_ends) > 0 else 0 return RunEndEncodedArray._from_arrays(type, True, logical_length, run_ends, values, 0) diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 6a190957879d3..3754daeb9b4bd 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3578,12 +3578,23 @@ def check_run_end_encoded_from_arrays_with_type(ree_type=None): check_run_end_encoded(ree_array, run_ends, values, 19, 4, 0) +def check_run_end_encoded_from_typed_arrays(ree_type): + run_ends = [3, 5, 10, 19] + values = [1, 2, 1, 3] + typed_run_ends = pa.array(run_ends, ree_type.run_end_type) + typed_values = pa.array(values, ree_type.value_type) + ree_array = pa.RunEndEncodedArray.from_arrays(typed_run_ends, typed_values) + assert ree_array.type == ree_type + check_run_end_encoded(ree_array, run_ends, values, 19, 4, 0) + + def test_run_end_encoded_from_arrays(): check_run_end_encoded_from_arrays_with_type() for run_end_type in [pa.int16(), pa.int32(), pa.int64()]: for value_type in [pa.uint32(), pa.int32(), pa.uint64(), pa.int64()]: ree_type = pa.run_end_encoded(run_end_type, value_type) check_run_end_encoded_from_arrays_with_type(ree_type) + check_run_end_encoded_from_typed_arrays(ree_type) def test_run_end_encoded_from_buffers(): From c79b6a593e21c10dc65e06a2717809ab83fd31db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 7 May 2024 14:54:10 +0200 Subject: [PATCH 063/261] GH-41566: [CI][Packaging] macOS wheel for Catalina fails to build on macOS arm64 (#41567) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change Wheels for macOS catalina are failing ### What changes are included in this PR? Use macos-13 instead of (latest) ARM ### Are these changes tested? Yes, via archery ### Are there any user-facing changes? No * GitHub Issue: #41566 Authored-by: Raúl Cumplido Signed-off-by: Raúl Cumplido --- dev/tasks/tasks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 52a235c688eda..126b0fcb6f76a 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -409,7 +409,7 @@ tasks: arrow_jemalloc: "ON" python_version: "{{ python_version }}" macos_deployment_target: "{{ macos_version }}" - runs_on: "macos-latest" + runs_on: "macos-13" vcpkg_arch: "amd64" artifacts: - pyarrow-{no_rc_version}-{{ python_tag }}-{{ abi_tag }}-{{ platform_tag }}.whl From 03f8ae754ede16f118ccdba0abb593b1461024aa Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Tue, 7 May 2024 09:42:55 -0400 Subject: [PATCH 064/261] GH-41540: [R] Simplify arrow_eval() logic and bindings environments (#41537) ### Rationale for this change NSE is hard enough. I wanted to see if I could remove some layers of complexity. ### What changes are included in this PR? * There no longer are separate collections of `agg_funcs` and `nse_funcs`. Now that the aggregation functions return Expressions (https://github.com/apache/arrow/pull/41223), there's no reason to treat them separately. All bindings return Expressions now. * Both are removed and functions are just stored in `.cache$functions`. There was a note wondering why both `nse_funcs` and that needed to exist. They don't. * `arrow_mask()` no longer has an `aggregations` argument: agg functions are always present. * Because agg functions are always present, `filter` and `arrange` now have to check for whether the expressions passed to them contain aggregations--this is supported in regular dplyr but we have deferred supporting it here for now (see https://github.com/apache/arrow/pull/41350). If we decide we want to support it later, these checks are the entry points where we'd drop in the `left_join()` as in `mutate()`. * The logic of evaluating expresssions in `filter()` has been simplified. * Assorted other cleanups: `register_binding()` has two fewer arguments, for example, and the duplicate functions for referencing agg_funcs are gone. There is one more refactor I intend to pursue, and that's to rework abandon_ship and how arrow_eval does error handling, but I ~may~ will defer that to a followup. ### Are these changes tested? Yes, though I'll add some more for filter/aggregate in the followup since I'm reworking things there. ### Are there any user-facing changes? There are a couple of edge cases where the error message will change subtly. For example, if you supplied a comma-separated list of filter expressions, and more than one of them did not evaluate, previously you would be informed of all of the failures; now, we error on the first one. I don't think this is concerning. * GitHub Issue: #41540 --- r/R/dplyr-arrange.R | 8 ++ r/R/dplyr-eval.R | 17 +-- r/R/dplyr-filter.R | 54 +++------ r/R/dplyr-funcs-agg.R | 26 ++--- r/R/dplyr-funcs.R | 119 ++++---------------- r/R/dplyr-mutate.R | 2 +- r/R/dplyr-summarize.R | 2 +- r/R/udf.R | 7 +- r/man/register_binding.Rd | 45 +------- r/tests/testthat/test-dataset-dplyr.R | 2 +- r/tests/testthat/test-dplyr-filter.R | 9 +- r/tests/testthat/test-dplyr-funcs.R | 30 ++--- r/tests/testthat/test-dplyr-summarize.R | 28 ++--- r/tests/testthat/test-udf.R | 14 +-- r/vignettes/developers/writing_bindings.Rmd | 7 +- 15 files changed, 109 insertions(+), 261 deletions(-) diff --git a/r/R/dplyr-arrange.R b/r/R/dplyr-arrange.R index f91cd14211e0f..c8594c77df000 100644 --- a/r/R/dplyr-arrange.R +++ b/r/R/dplyr-arrange.R @@ -47,6 +47,14 @@ arrange.arrow_dplyr_query <- function(.data, ..., .by_group = FALSE) { msg <- paste("Expression", names(sorts)[i], "not supported in Arrow") return(abandon_ship(call, .data, msg)) } + if (length(mask$.aggregations)) { + # dplyr lets you arrange on e.g. x < mean(x), but we haven't implemented it. + # But we could, the same way it works in mutate() via join, if someone asks. + # Until then, just error. + # TODO: add a test for this + msg <- paste("Expression", format_expr(expr), "not supported in arrange() in Arrow") + return(abandon_ship(call, .data, msg)) + } descs[i] <- x[["desc"]] } .data$arrange_vars <- c(sorts, .data$arrange_vars) diff --git a/r/R/dplyr-eval.R b/r/R/dplyr-eval.R index ff1619ce944d0..211c26cecce8c 100644 --- a/r/R/dplyr-eval.R +++ b/r/R/dplyr-eval.R @@ -121,24 +121,9 @@ arrow_not_supported <- function(msg) { } # Create a data mask for evaluating a dplyr expression -arrow_mask <- function(.data, aggregation = FALSE) { +arrow_mask <- function(.data) { f_env <- new_environment(.cache$functions) - if (aggregation) { - # Add the aggregation functions to the environment. - for (f in names(agg_funcs)) { - f_env[[f]] <- agg_funcs[[f]] - } - } else { - # Add functions that need to error hard and clear. - # Some R functions will still try to evaluate on an Expression - # and return NA with a warning :exploding_head: - fail <- function(...) stop("Not implemented") - for (f in c("mean", "sd")) { - f_env[[f]] <- fail - } - } - # Assign the schema to the expressions schema <- .data$.data$schema walk(.data$selected_columns, ~ (.$schema <- schema)) diff --git a/r/R/dplyr-filter.R b/r/R/dplyr-filter.R index d85fa16af2e71..69decbd76655f 100644 --- a/r/R/dplyr-filter.R +++ b/r/R/dplyr-filter.R @@ -35,48 +35,24 @@ filter.arrow_dplyr_query <- function(.data, ..., .by = NULL, .preserve = FALSE) } # tidy-eval the filter expressions inside an Arrow data_mask - filters <- lapply(expanded_filters, arrow_eval, arrow_mask(out)) - bad_filters <- map_lgl(filters, ~ inherits(., "try-error")) - if (any(bad_filters)) { - # This is similar to abandon_ship() except that the filter eval is - # vectorized, and we apply filters that _did_ work before abandoning ship - # with the rest - expr_labs <- map_chr(expanded_filters[bad_filters], format_expr) - if (query_on_dataset(out)) { - # Abort. We don't want to auto-collect if this is a Dataset because that - # could blow up, too big. - stop( - "Filter expression not supported for Arrow Datasets: ", - oxford_paste(expr_labs, quote = FALSE), - "\nCall collect() first to pull data into R.", - call. = FALSE - ) - } else { - arrow_errors <- map2_chr( - filters[bad_filters], expr_labs, - handle_arrow_not_supported - ) - if (length(arrow_errors) == 1) { - msg <- paste0(arrow_errors, "; ") - } else { - msg <- paste0("* ", arrow_errors, "\n", collapse = "") - } - warning( - msg, "pulling data into R", - immediate. = TRUE, - call. = FALSE - ) - # Set any valid filters first, then collect and then apply the invalid ones in R - out <- dplyr::collect(set_filters(out, filters[!bad_filters])) - if (by$from_by) { - out <- dplyr::ungroup(out) - } - return(dplyr::filter(out, !!!expanded_filters[bad_filters], .by = {{ .by }})) + mask <- arrow_mask(out) + for (expr in expanded_filters) { + filt <- arrow_eval(expr, mask) + if (inherits(filt, "try-error")) { + msg <- handle_arrow_not_supported(filt, format_expr(expr)) + return(abandon_ship(match.call(), .data, msg)) + } + if (length(mask$.aggregations)) { + # dplyr lets you filter on e.g. x < mean(x), but we haven't implemented it. + # But we could, the same way it works in mutate() via join, if someone asks. + # Until then, just error. + # TODO: add a test for this + msg <- paste("Expression", format_expr(expr), "not supported in filter() in Arrow") + return(abandon_ship(match.call(), .data, msg)) } + out <- set_filters(out, filt) } - out <- set_filters(out, filters) - if (by$from_by) { out$group_by_vars <- character() } diff --git a/r/R/dplyr-funcs-agg.R b/r/R/dplyr-funcs-agg.R index 9411ce5ce6faf..c0c4eb3089425 100644 --- a/r/R/dplyr-funcs-agg.R +++ b/r/R/dplyr-funcs-agg.R @@ -29,56 +29,56 @@ # you can use list_compute_functions("^hash_") register_bindings_aggregate <- function() { - register_binding_agg("base::sum", function(..., na.rm = FALSE) { + register_binding("base::sum", function(..., na.rm = FALSE) { set_agg( fun = "sum", data = ensure_one_arg(list2(...), "sum"), options = list(skip_nulls = na.rm, min_count = 0L) ) }) - register_binding_agg("base::prod", function(..., na.rm = FALSE) { + register_binding("base::prod", function(..., na.rm = FALSE) { set_agg( fun = "product", data = ensure_one_arg(list2(...), "prod"), options = list(skip_nulls = na.rm, min_count = 0L) ) }) - register_binding_agg("base::any", function(..., na.rm = FALSE) { + register_binding("base::any", function(..., na.rm = FALSE) { set_agg( fun = "any", data = ensure_one_arg(list2(...), "any"), options = list(skip_nulls = na.rm, min_count = 0L) ) }) - register_binding_agg("base::all", function(..., na.rm = FALSE) { + register_binding("base::all", function(..., na.rm = FALSE) { set_agg( fun = "all", data = ensure_one_arg(list2(...), "all"), options = list(skip_nulls = na.rm, min_count = 0L) ) }) - register_binding_agg("base::mean", function(x, na.rm = FALSE) { + register_binding("base::mean", function(x, na.rm = FALSE) { set_agg( fun = "mean", data = list(x), options = list(skip_nulls = na.rm, min_count = 0L) ) }) - register_binding_agg("stats::sd", function(x, na.rm = FALSE, ddof = 1) { + register_binding("stats::sd", function(x, na.rm = FALSE, ddof = 1) { set_agg( fun = "stddev", data = list(x), options = list(skip_nulls = na.rm, min_count = 0L, ddof = ddof) ) }) - register_binding_agg("stats::var", function(x, na.rm = FALSE, ddof = 1) { + register_binding("stats::var", function(x, na.rm = FALSE, ddof = 1) { set_agg( fun = "variance", data = list(x), options = list(skip_nulls = na.rm, min_count = 0L, ddof = ddof) ) }) - register_binding_agg( + register_binding( "stats::quantile", function(x, probs, na.rm = FALSE) { if (length(probs) != 1) { @@ -103,7 +103,7 @@ register_bindings_aggregate <- function() { "approximate quantile (t-digest) is computed" ) ) - register_binding_agg( + register_binding( "stats::median", function(x, na.rm = FALSE) { # TODO: Bind to the Arrow function that returns an exact median and remove @@ -122,28 +122,28 @@ register_bindings_aggregate <- function() { }, notes = "approximate median (t-digest) is computed" ) - register_binding_agg("dplyr::n_distinct", function(..., na.rm = FALSE) { + register_binding("dplyr::n_distinct", function(..., na.rm = FALSE) { set_agg( fun = "count_distinct", data = ensure_one_arg(list2(...), "n_distinct"), options = list(na.rm = na.rm) ) }) - register_binding_agg("dplyr::n", function() { + register_binding("dplyr::n", function() { set_agg( fun = "count_all", data = list(), options = list() ) }) - register_binding_agg("base::min", function(..., na.rm = FALSE) { + register_binding("base::min", function(..., na.rm = FALSE) { set_agg( fun = "min", data = ensure_one_arg(list2(...), "min"), options = list(skip_nulls = na.rm, min_count = 0L) ) }) - register_binding_agg("base::max", function(..., na.rm = FALSE) { + register_binding("base::max", function(..., na.rm = FALSE) { set_agg( fun = "max", data = ensure_one_arg(list2(...), "max"), diff --git a/r/R/dplyr-funcs.R b/r/R/dplyr-funcs.R index abf2362d0107f..c0eb47e428b7f 100644 --- a/r/R/dplyr-funcs.R +++ b/r/R/dplyr-funcs.R @@ -22,8 +22,8 @@ NULL #' Register compute bindings #' -#' The `register_binding()` and `register_binding_agg()` functions -#' are used to populate a list of functions that operate on (and return) +#' `register_binding()` is used to populate a list of functions that operate on +#' (and return) #' Expressions. These are the basis for the `.data` mask inside dplyr methods. #' #' @section Writing bindings: @@ -40,26 +40,10 @@ NULL #' * Inside your function, you can call any other binding with `call_binding()`. #' #' @param fun_name A string containing a function name in the form `"function"` or -#' `"package::function"`. The package name is currently not used but -#' may be used in the future to allow these types of function calls. -#' @param fun A function or `NULL` to un-register a previous function. +#' `"package::function"`. +#' @param fun A function, or `NULL` to un-register a previous function. #' This function must accept `Expression` objects as arguments and return #' `Expression` objects instead of regular R objects. -#' @param agg_fun An aggregate function or `NULL` to un-register a previous -#' aggregate function. This function must accept `Expression` objects as -#' arguments and return a `list()` with components: -#' - `fun`: string function name -#' - `data`: list of 0 or more `Expression`s -#' - `options`: list of function options, as passed to call_function -#' @param update_cache Update .cache$functions at the time of registration. -#' the default is FALSE because the majority of usage is to register -#' bindings at package load, after which we create the cache once. The -#' reason why .cache$functions is needed in addition to nse_funcs for -#' non-aggregate functions could be revisited...it is currently used -#' as the data mask in mutate, filter, and aggregate (but not -#' summarise) because the data mask has to be a list. -#' @param registry An environment in which the functions should be -#' assigned. #' @param notes string for the docs: note any limitations or differences in #' behavior between the Arrow version and the R function. #' @return The previously registered binding or `NULL` if no previously @@ -67,12 +51,10 @@ NULL #' @keywords internal register_binding <- function(fun_name, fun, - registry = nse_funcs, - update_cache = FALSE, notes = character(0)) { unqualified_name <- sub("^.*?:{+}", "", fun_name) - previous_fun <- registry[[unqualified_name]] + previous_fun <- .cache$functions[[unqualified_name]] # if the unqualified name exists in the registry, warn if (!is.null(previous_fun) && !identical(fun, previous_fun)) { @@ -87,58 +69,25 @@ register_binding <- function(fun_name, # register both as `pkg::fun` and as `fun` if `qualified_name` is prefixed # unqualified_name and fun_name will be the same if not prefixed - registry[[unqualified_name]] <- fun - registry[[fun_name]] <- fun - + .cache$functions[[unqualified_name]] <- fun + .cache$functions[[fun_name]] <- fun .cache$docs[[fun_name]] <- notes - - if (update_cache) { - fun_cache <- .cache$functions - fun_cache[[unqualified_name]] <- fun - fun_cache[[fun_name]] <- fun - .cache$functions <- fun_cache - } - invisible(previous_fun) } -unregister_binding <- function(fun_name, registry = nse_funcs, - update_cache = FALSE) { +unregister_binding <- function(fun_name) { unqualified_name <- sub("^.*?:{+}", "", fun_name) - previous_fun <- registry[[unqualified_name]] + previous_fun <- .cache$functions[[unqualified_name]] - rm( - list = unique(c(fun_name, unqualified_name)), - envir = registry, - inherits = FALSE - ) - - if (update_cache) { - fun_cache <- .cache$functions - fun_cache[[unqualified_name]] <- NULL - fun_cache[[fun_name]] <- NULL - .cache$functions <- fun_cache - } + .cache$functions[[unqualified_name]] <- NULL + .cache$functions[[fun_name]] <- NULL invisible(previous_fun) } -#' @rdname register_binding -#' @keywords internal -register_binding_agg <- function(fun_name, - agg_fun, - registry = agg_funcs, - notes = character(0)) { - register_binding(fun_name, agg_fun, registry = registry, notes = notes) -} - # Supports functions and tests that call previously-defined bindings call_binding <- function(fun_name, ...) { - nse_funcs[[fun_name]](...) -} - -call_binding_agg <- function(fun_name, ...) { - agg_funcs[[fun_name]](...) + .cache$functions[[fun_name]](...) } create_binding_cache <- function() { @@ -147,7 +96,7 @@ create_binding_cache <- function() { # Register all available Arrow Compute functions, namespaced as arrow_fun. all_arrow_funs <- list_compute_functions() - arrow_funcs <- set_names( + .cache$functions <- set_names( lapply(all_arrow_funs, function(fun) { force(fun) function(...) Expression$create(fun, ...) @@ -155,7 +104,7 @@ create_binding_cache <- function() { paste0("arrow_", all_arrow_funs) ) - # Register bindings into nse_funcs and agg_funcs + # Register bindings into the cache register_bindings_array_function_map() register_bindings_aggregate() register_bindings_conditional() @@ -165,37 +114,17 @@ create_binding_cache <- function() { register_bindings_type() register_bindings_augmented() - # We only create the cache for nse_funcs and not agg_funcs - .cache$functions <- c(as.list(nse_funcs), arrow_funcs) -} - -# environments in the arrow namespace used in the above functions -nse_funcs <- new.env(parent = emptyenv()) -agg_funcs <- new.env(parent = emptyenv()) -.cache <- new.env(parent = emptyenv()) - -# we register 2 versions of the "::" binding - one for use with nse_funcs -# and another one for use with agg_funcs (registered in dplyr-funcs-agg.R) -nse_funcs[["::"]] <- function(lhs, rhs) { - lhs_name <- as.character(substitute(lhs)) - rhs_name <- as.character(substitute(rhs)) + .cache$functions[["::"]] <- function(lhs, rhs) { + lhs_name <- as.character(substitute(lhs)) + rhs_name <- as.character(substitute(rhs)) - fun_name <- paste0(lhs_name, "::", rhs_name) + fun_name <- paste0(lhs_name, "::", rhs_name) - # if we do not have a binding for pkg::fun, then fall back on to the - # regular pkg::fun function - nse_funcs[[fun_name]] %||% asNamespace(lhs_name)[[rhs_name]] + # if we do not have a binding for pkg::fun, then fall back on to the + # regular pkg::fun function + .cache$functions[[fun_name]] %||% asNamespace(lhs_name)[[rhs_name]] + } } -agg_funcs[["::"]] <- function(lhs, rhs) { - lhs_name <- as.character(substitute(lhs)) - rhs_name <- as.character(substitute(rhs)) - - fun_name <- paste0(lhs_name, "::", rhs_name) - - # if we do not have a binding for pkg::fun, then fall back on to the - # nse_funcs (useful when we have a regular function inside an aggregating one) - # and then, if searching nse_funcs fails too, fall back to the - # regular `pkg::fun()` function - agg_funcs[[fun_name]] %||% nse_funcs[[fun_name]] %||% asNamespace(lhs_name)[[rhs_name]] -} +# environment in the arrow namespace used in the above functions +.cache <- new.env(parent = emptyenv()) diff --git a/r/R/dplyr-mutate.R b/r/R/dplyr-mutate.R index 72882b6afd964..f0a8c005676df 100644 --- a/r/R/dplyr-mutate.R +++ b/r/R/dplyr-mutate.R @@ -48,7 +48,7 @@ mutate.arrow_dplyr_query <- function(.data, # Create a mask with aggregation functions in it # If there are any aggregations, we will need to compute them and # and join the results back in, for "window functions" like x - mean(x) - mask <- arrow_mask(out, aggregation = TRUE) + mask <- arrow_mask(out) # Evaluate the mutate expressions results <- list() for (i in seq_along(exprs)) { diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R index 56de14db6dd44..58ca849152a75 100644 --- a/r/R/dplyr-summarize.R +++ b/r/R/dplyr-summarize.R @@ -84,7 +84,7 @@ do_arrow_summarize <- function(.data, ..., .groups = NULL) { # and the aggregation functions will pull out those terms and insert into # that list. # nolint end - mask <- arrow_mask(.data, aggregation = TRUE) + mask <- arrow_mask(.data) # We'll collect any transformations after the aggregation here. # summarize_eval() returns NULL when the outer expression is an aggregation, diff --git a/r/R/udf.R b/r/R/udf.R index 922095cceba6a..0415fbac3c9fc 100644 --- a/r/R/udf.R +++ b/r/R/udf.R @@ -95,12 +95,7 @@ register_scalar_function <- function(name, fun, in_type, out_type, body(binding_fun) <- expr_substitute(body(binding_fun), sym("name"), name) environment(binding_fun) <- asNamespace("arrow") - register_binding( - name, - binding_fun, - update_cache = TRUE - ) - + register_binding(name, binding_fun) invisible(NULL) } diff --git a/r/man/register_binding.Rd b/r/man/register_binding.Rd index d10cd733bbe9d..b84cde3b8993a 100644 --- a/r/man/register_binding.Rd +++ b/r/man/register_binding.Rd @@ -2,63 +2,28 @@ % Please edit documentation in R/dplyr-funcs.R \name{register_binding} \alias{register_binding} -\alias{register_binding_agg} \title{Register compute bindings} \usage{ -register_binding( - fun_name, - fun, - registry = nse_funcs, - update_cache = FALSE, - notes = character(0) -) - -register_binding_agg( - fun_name, - agg_fun, - registry = agg_funcs, - notes = character(0) -) +register_binding(fun_name, fun, notes = character(0)) } \arguments{ \item{fun_name}{A string containing a function name in the form \code{"function"} or -\code{"package::function"}. The package name is currently not used but -may be used in the future to allow these types of function calls.} +\code{"package::function"}.} -\item{fun}{A function or \code{NULL} to un-register a previous function. +\item{fun}{A function, or \code{NULL} to un-register a previous function. This function must accept \code{Expression} objects as arguments and return \code{Expression} objects instead of regular R objects.} -\item{registry}{An environment in which the functions should be -assigned.} - -\item{update_cache}{Update .cache$functions at the time of registration. -the default is FALSE because the majority of usage is to register -bindings at package load, after which we create the cache once. The -reason why .cache$functions is needed in addition to nse_funcs for -non-aggregate functions could be revisited...it is currently used -as the data mask in mutate, filter, and aggregate (but not -summarise) because the data mask has to be a list.} - \item{notes}{string for the docs: note any limitations or differences in behavior between the Arrow version and the R function.} - -\item{agg_fun}{An aggregate function or \code{NULL} to un-register a previous -aggregate function. This function must accept \code{Expression} objects as -arguments and return a \code{list()} with components: -\itemize{ -\item \code{fun}: string function name -\item \code{data}: list of 0 or more \code{Expression}s -\item \code{options}: list of function options, as passed to call_function -}} } \value{ The previously registered binding or \code{NULL} if no previously registered function existed. } \description{ -The \code{register_binding()} and \code{register_binding_agg()} functions -are used to populate a list of functions that operate on (and return) +\code{register_binding()} is used to populate a list of functions that operate on +(and return) Expressions. These are the basis for the \code{.data} mask inside dplyr methods. } \section{Writing bindings}{ diff --git a/r/tests/testthat/test-dataset-dplyr.R b/r/tests/testthat/test-dataset-dplyr.R index 1e36ea8bd4966..493eac328e5cd 100644 --- a/r/tests/testthat/test-dataset-dplyr.R +++ b/r/tests/testthat/test-dataset-dplyr.R @@ -325,7 +325,7 @@ test_that("dplyr method not implemented messages", { # This one is more nuanced expect_error( ds %>% filter(int > 6, dbl > max(dbl)), - "Filter expression not supported for Arrow Datasets: dbl > max(dbl)\nCall collect() first to pull data into R.", + "Expression dbl > max(dbl) not supported in filter() in Arrow\nCall collect() first to pull data into R.", fixed = TRUE ) }) diff --git a/r/tests/testthat/test-dplyr-filter.R b/r/tests/testthat/test-dplyr-filter.R index bf23685362a82..535bcb70c4cab 100644 --- a/r/tests/testthat/test-dplyr-filter.R +++ b/r/tests/testthat/test-dplyr-filter.R @@ -324,13 +324,14 @@ test_that("Filtering with unsupported functions", { filter( nchar(chr, type = "bytes", allowNA = TRUE) == 1, # bad, Arrow msg int > 2, # good - pnorm(dbl) > .99 # bad, opaque + pnorm(dbl) > .99 # bad, opaque, but we'll error on the first one before we get here ) %>% collect(), tbl, - warning = '\\* In nchar\\(chr, type = "bytes", allowNA = TRUE\\) == 1, allowNA = TRUE not supported in Arrow -\\* Expression pnorm\\(dbl\\) > 0.99 not supported in Arrow -pulling data into R' + warning = paste( + 'In nchar\\(chr, type = "bytes", allowNA = TRUE\\) == 1,', + "allowNA = TRUE not supported in Arrow; pulling data into R" + ) ) }) diff --git a/r/tests/testthat/test-dplyr-funcs.R b/r/tests/testthat/test-dplyr-funcs.R index 039604a85ee0c..48c5d730f8493 100644 --- a/r/tests/testthat/test-dplyr-funcs.R +++ b/r/tests/testthat/test-dplyr-funcs.R @@ -19,35 +19,25 @@ skip_on_cran() test_that("register_binding()/unregister_binding() works", { - fake_registry <- new.env(parent = emptyenv()) fun1 <- function() NULL fun2 <- function() "Hello" - expect_null(register_binding("some.pkg::some_fun", fun1, fake_registry)) - expect_identical(fake_registry$some_fun, fun1) - expect_identical(fake_registry$`some.pkg::some_fun`, fun1) + expect_null(register_binding("some.pkg::some_fun", fun1)) + expect_identical(.cache$functions$some_fun, fun1) + expect_identical(.cache$functions$`some.pkg::some_fun`, fun1) - expect_identical(unregister_binding("some.pkg::some_fun", fake_registry), fun1) - expect_false("some.pkg::some_fun" %in% names(fake_registry)) - expect_false("some_fun" %in% names(fake_registry)) + expect_identical(unregister_binding("some.pkg::some_fun"), fun1) + expect_false("some.pkg::some_fun" %in% names(.cache$functions)) + expect_false("some_fun" %in% names(.cache$functions)) - expect_null(register_binding("somePkg::some_fun", fun1, fake_registry)) - expect_identical(fake_registry$some_fun, fun1) + expect_null(register_binding("somePkg::some_fun", fun1)) + expect_identical(.cache$functions$some_fun, fun1) expect_warning( - register_binding("some.pkg2::some_fun", fun2, fake_registry), + register_binding("some.pkg2::some_fun", fun2), "A \"some_fun\" binding already exists in the registry and will be overwritten." ) # No warning when an identical function is re-registered - expect_silent(register_binding("some.pkg2::some_fun", fun2, fake_registry)) -}) - -test_that("register_binding_agg() works", { - fake_registry <- new.env(parent = emptyenv()) - fun1 <- function() NULL - - expect_null(register_binding_agg("somePkg::some_fun", fun1, fake_registry)) - expect_identical(fake_registry$some_fun, fun1) - expect_identical(fake_registry$`somePkg::some_fun`, fun1) + expect_silent(register_binding("some.pkg2::some_fun", fun2)) }) diff --git a/r/tests/testthat/test-dplyr-summarize.R b/r/tests/testthat/test-dplyr-summarize.R index 87bb5e5fac959..a61ef95bee73d 100644 --- a/r/tests/testthat/test-dplyr-summarize.R +++ b/r/tests/testthat/test-dplyr-summarize.R @@ -337,20 +337,20 @@ test_that("Functions that take ... but we only accept a single arg", { ) # Now that we've demonstrated that the whole machinery works, let's test - # the agg_funcs directly - expect_error(call_binding_agg("n_distinct"), "n_distinct() with 0 arguments", fixed = TRUE) - expect_error(call_binding_agg("sum"), "sum() with 0 arguments", fixed = TRUE) - expect_error(call_binding_agg("prod"), "prod() with 0 arguments", fixed = TRUE) - expect_error(call_binding_agg("any"), "any() with 0 arguments", fixed = TRUE) - expect_error(call_binding_agg("all"), "all() with 0 arguments", fixed = TRUE) - expect_error(call_binding_agg("min"), "min() with 0 arguments", fixed = TRUE) - expect_error(call_binding_agg("max"), "max() with 0 arguments", fixed = TRUE) - expect_error(call_binding_agg("n_distinct", 1, 2), "Multiple arguments to n_distinct()") - expect_error(call_binding_agg("sum", 1, 2), "Multiple arguments to sum") - expect_error(call_binding_agg("any", 1, 2), "Multiple arguments to any()") - expect_error(call_binding_agg("all", 1, 2), "Multiple arguments to all()") - expect_error(call_binding_agg("min", 1, 2), "Multiple arguments to min()") - expect_error(call_binding_agg("max", 1, 2), "Multiple arguments to max()") + # the agg funcs directly + expect_error(call_binding("n_distinct"), "n_distinct() with 0 arguments", fixed = TRUE) + expect_error(call_binding("sum"), "sum() with 0 arguments", fixed = TRUE) + expect_error(call_binding("prod"), "prod() with 0 arguments", fixed = TRUE) + expect_error(call_binding("any"), "any() with 0 arguments", fixed = TRUE) + expect_error(call_binding("all"), "all() with 0 arguments", fixed = TRUE) + expect_error(call_binding("min"), "min() with 0 arguments", fixed = TRUE) + expect_error(call_binding("max"), "max() with 0 arguments", fixed = TRUE) + expect_error(call_binding("n_distinct", 1, 2), "Multiple arguments to n_distinct()") + expect_error(call_binding("sum", 1, 2), "Multiple arguments to sum") + expect_error(call_binding("any", 1, 2), "Multiple arguments to any()") + expect_error(call_binding("all", 1, 2), "Multiple arguments to all()") + expect_error(call_binding("min", 1, 2), "Multiple arguments to min()") + expect_error(call_binding("max", 1, 2), "Multiple arguments to max()") }) test_that("median()", { diff --git a/r/tests/testthat/test-udf.R b/r/tests/testthat/test-udf.R index 0eb75b1dde6e5..8604dc610a435 100644 --- a/r/tests/testthat/test-udf.R +++ b/r/tests/testthat/test-udf.R @@ -90,7 +90,7 @@ test_that("register_scalar_function() adds a compute function to the registry", int32(), float64(), auto_convert = TRUE ) - on.exit(unregister_binding("times_32", update_cache = TRUE)) + on.exit(unregister_binding("times_32")) expect_true("times_32" %in% names(asNamespace("arrow")$.cache$functions)) expect_true("times_32" %in% list_compute_functions()) @@ -124,7 +124,7 @@ test_that("arrow_scalar_function() with bad return type errors", { int32(), float64() ) - on.exit(unregister_binding("times_32_bad_return_type_array", update_cache = TRUE)) + on.exit(unregister_binding("times_32_bad_return_type_array")) expect_error( call_function("times_32_bad_return_type_array", Array$create(1L)), @@ -137,7 +137,7 @@ test_that("arrow_scalar_function() with bad return type errors", { int32(), float64() ) - on.exit(unregister_binding("times_32_bad_return_type_scalar", update_cache = TRUE)) + on.exit(unregister_binding("times_32_bad_return_type_scalar")) expect_error( call_function("times_32_bad_return_type_scalar", Array$create(1L)), @@ -155,7 +155,7 @@ test_that("register_scalar_function() can register multiple kernels", { out_type = function(in_types) in_types[[1]], auto_convert = TRUE ) - on.exit(unregister_binding("times_32", update_cache = TRUE)) + on.exit(unregister_binding("times_32")) expect_equal( call_function("times_32", Scalar$create(1L, int32())), @@ -238,7 +238,7 @@ test_that("user-defined functions work during multi-threaded execution", { float64(), auto_convert = TRUE ) - on.exit(unregister_binding("times_32", update_cache = TRUE)) + on.exit(unregister_binding("times_32")) # check a regular collect() result <- open_dataset(tf_dataset) %>% @@ -271,7 +271,7 @@ test_that("nested exec plans can contain user-defined functions", { float64(), auto_convert = TRUE ) - on.exit(unregister_binding("times_32", update_cache = TRUE)) + on.exit(unregister_binding("times_32")) stream_plan_with_udf <- function() { record_batch(a = 1:1000) %>% @@ -310,7 +310,7 @@ test_that("head() on exec plan containing user-defined functions", { float64(), auto_convert = TRUE ) - on.exit(unregister_binding("times_32", update_cache = TRUE)) + on.exit(unregister_binding("times_32")) result <- record_batch(a = 1:1000) %>% dplyr::mutate(b = times_32(a)) %>% diff --git a/r/vignettes/developers/writing_bindings.Rmd b/r/vignettes/developers/writing_bindings.Rmd index 443211b3c2b5e..e1ed92105dbc3 100644 --- a/r/vignettes/developers/writing_bindings.Rmd +++ b/r/vignettes/developers/writing_bindings.Rmd @@ -145,11 +145,10 @@ test_that("startsWith behaves identically in dplyr and Arrow", { df <- tibble(x = c("Foo", "bar", "baz", "qux")) compare_dplyr_binding( .input %>% - filter(startsWith(x, "b")) %>% - collect(), + filter(startsWith(x, "b")) %>% + collect(), df ) - }) ``` @@ -197,7 +196,7 @@ As `startsWith()` requires options, direct mapping is not appropriate. If the function cannot be mapped directly, some extra work may be needed to ensure that calling the arrow version of the function results in the same result as calling the R version of the function. In this case, the function will need -adding to the `nse_funcs` function registry. Here is how this might look for +adding to the `.cache$functions` function registry. Here is how this might look for `startsWith()`: ```{r, eval = FALSE} From 9cf0ee722ba048f3f305b38e536fa726eff9c813 Mon Sep 17 00:00:00 2001 From: mwish Date: Tue, 7 May 2024 23:56:02 +0800 Subject: [PATCH 065/261] GH-41562: [C++][Parquet] Decoding: Fix num_value handling in ByteStreamSplitDecoder (#41565) ### Rationale for this change This problem is raised from https://github.com/apache/arrow/pull/40094 . Original bug fixed here: https://github.com/apache/arrow/pull/34140 , but this is corrupt in https://github.com/apache/arrow/pull/40094 . ### What changes are included in this PR? Refine checking ### Are these changes tested? * [x] Will add ### Are there any user-facing changes? Bugfix * GitHub Issue: #41562 Authored-by: mwish Signed-off-by: Antoine Pitrou --- cpp/src/parquet/encoding.cc | 22 +++++++++++++++++----- cpp/src/parquet/encoding.h | 5 +++++ cpp/src/parquet/encoding_test.cc | 4 ++-- 3 files changed, 24 insertions(+), 7 deletions(-) diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc index 3da5c64ace5dd..05221568c8fa0 100644 --- a/cpp/src/parquet/encoding.cc +++ b/cpp/src/parquet/encoding.cc @@ -3694,12 +3694,24 @@ class ByteStreamSplitDecoderBase : public DecoderImpl, ByteStreamSplitDecoderBase(const ColumnDescriptor* descr, int byte_width) : DecoderImpl(descr, Encoding::BYTE_STREAM_SPLIT), byte_width_(byte_width) {} - void SetData(int num_values, const uint8_t* data, int len) override { - if (static_cast(num_values) * byte_width_ != len) { - throw ParquetException("Data size (" + std::to_string(len) + - ") does not match number of values in BYTE_STREAM_SPLIT (" + - std::to_string(num_values) + ")"); + void SetData(int num_values, const uint8_t* data, int len) final { + // Check that the data size is consistent with the number of values + // The spec requires that the data size is a multiple of the number of values, + // see: https://github.com/apache/parquet-format/pull/192 . + // GH-41562: passed in `num_values` may include nulls, so we need to check and + // adjust the number of values. + if (static_cast(num_values) * byte_width_ < len) { + throw ParquetException( + "Data size (" + std::to_string(len) + + ") is too small for the number of values in in BYTE_STREAM_SPLIT (" + + std::to_string(num_values) + ")"); + } + if (len % byte_width_ != 0) { + throw ParquetException("ByteStreamSplit data size " + std::to_string(len) + + " not aligned with type " + TypeToString(DType::type_num) + + " and byte_width: " + std::to_string(byte_width_)); } + num_values = len / byte_width_; DecoderImpl::SetData(num_values, data, len); stride_ = num_values_; } diff --git a/cpp/src/parquet/encoding.h b/cpp/src/parquet/encoding.h index 602009189595e..493c4044ddc1c 100644 --- a/cpp/src/parquet/encoding.h +++ b/cpp/src/parquet/encoding.h @@ -255,6 +255,11 @@ class Decoder { // Sets the data for a new page. This will be called multiple times on the same // decoder and should reset all internal state. + // + // `num_values` comes from the data page header, and may be greater than the number of + // physical values in the data buffer if there are some omitted (null) values. + // `len`, on the other hand, is the size in bytes of the data buffer and + // directly relates to the number of physical values. virtual void SetData(int num_values, const uint8_t* data, int len) = 0; // Returns the number of values left (for the last call to SetData()). This is diff --git a/cpp/src/parquet/encoding_test.cc b/cpp/src/parquet/encoding_test.cc index b91fcb0839cba..3c20b917f6994 100644 --- a/cpp/src/parquet/encoding_test.cc +++ b/cpp/src/parquet/encoding_test.cc @@ -1383,7 +1383,7 @@ class TestByteStreamSplitEncoding : public TestEncodingBase { encoder->PutSpaced(draws_, num_values_, valid_bits, valid_bits_offset); encode_buffer_ = encoder->FlushValues(); ASSERT_EQ(encode_buffer_->size(), physical_byte_width() * (num_values_ - null_count)); - decoder->SetData(num_values_ - null_count, encode_buffer_->data(), + decoder->SetData(num_values_, encode_buffer_->data(), static_cast(encode_buffer_->size())); auto values_decoded = decoder->DecodeSpaced(decode_buf_, num_values_, null_count, valid_bits, valid_bits_offset); @@ -1717,7 +1717,7 @@ class TestDeltaBitPackEncoding : public TestEncodingBase { for (size_t i = 0; i < kNumRoundTrips; ++i) { encoder->PutSpaced(draws_, num_values_, valid_bits, valid_bits_offset); encode_buffer_ = encoder->FlushValues(); - decoder->SetData(num_values_ - null_count, encode_buffer_->data(), + decoder->SetData(num_values_, encode_buffer_->data(), static_cast(encode_buffer_->size())); auto values_decoded = decoder->DecodeSpaced(decode_buf_, num_values_, null_count, valid_bits, valid_bits_offset); From 51689a040cbe3dee8702cd899a33fa62e0616bf1 Mon Sep 17 00:00:00 2001 From: mwish Date: Wed, 8 May 2024 00:14:22 +0800 Subject: [PATCH 066/261] GH-41545: [C++][Parquet] Fix DeltaLengthByteArrayEncoder::EstimatedDataEncodedSize (#41546) ### Rationale for this change `DeltaLengthByteArrayEncoder::EstimatedDataEncodedSize` would return an wrong estimate when `Put(const Array&)` was called. ### What changes are included in this PR? Remove `encoded_size_` and uses `sink_.length()` as `encoded_size_`. ### Are these changes tested? Yes ### Are there any user-facing changes? No * GitHub Issue: #41545 Authored-by: mwish Signed-off-by: Antoine Pitrou --- cpp/src/parquet/encoding.cc | 18 ++++++++++-------- cpp/src/parquet/encoding_test.cc | 9 +++++++++ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc index 05221568c8fa0..004cb746b3a89 100644 --- a/cpp/src/parquet/encoding.cc +++ b/cpp/src/parquet/encoding.cc @@ -2740,13 +2740,12 @@ class DeltaLengthByteArrayEncoder : public EncoderImpl, : EncoderImpl(descr, Encoding::DELTA_LENGTH_BYTE_ARRAY, pool = ::arrow::default_memory_pool()), sink_(pool), - length_encoder_(nullptr, pool), - encoded_size_{0} {} + length_encoder_(nullptr, pool) {} std::shared_ptr FlushValues() override; int64_t EstimatedDataEncodedSize() override { - return encoded_size_ + length_encoder_.EstimatedDataEncodedSize(); + return sink_.length() + length_encoder_.EstimatedDataEncodedSize(); } using TypedEncoder::Put; @@ -2768,6 +2767,11 @@ class DeltaLengthByteArrayEncoder : public EncoderImpl, return Status::Invalid( "Parquet cannot store strings with size 2GB or more, got: ", view.size()); } + if (ARROW_PREDICT_FALSE( + view.size() + sink_.length() > + static_cast(std::numeric_limits::max()))) { + return Status::Invalid("excess expansion in DELTA_LENGTH_BYTE_ARRAY"); + } length_encoder_.Put({static_cast(view.length())}, 1); PARQUET_THROW_NOT_OK(sink_.Append(view.data(), view.length())); return Status::OK(); @@ -2777,7 +2781,6 @@ class DeltaLengthByteArrayEncoder : public EncoderImpl, ::arrow::BufferBuilder sink_; DeltaBitPackEncoder length_encoder_; - uint32_t encoded_size_; }; template @@ -2803,15 +2806,15 @@ void DeltaLengthByteArrayEncoder::Put(const T* src, int num_values) { const int batch_size = std::min(kBatchSize, num_values - idx); for (int j = 0; j < batch_size; ++j) { const int32_t len = src[idx + j].len; - if (AddWithOverflow(total_increment_size, len, &total_increment_size)) { + if (ARROW_PREDICT_FALSE( + AddWithOverflow(total_increment_size, len, &total_increment_size))) { throw ParquetException("excess expansion in DELTA_LENGTH_BYTE_ARRAY"); } lengths[j] = len; } length_encoder_.Put(lengths.data(), batch_size); } - - if (AddWithOverflow(encoded_size_, total_increment_size, &encoded_size_)) { + if (sink_.length() + total_increment_size > std::numeric_limits::max()) { throw ParquetException("excess expansion in DELTA_LENGTH_BYTE_ARRAY"); } PARQUET_THROW_NOT_OK(sink_.Reserve(total_increment_size)); @@ -2850,7 +2853,6 @@ std::shared_ptr DeltaLengthByteArrayEncoder::FlushValues() { std::shared_ptr buffer; PARQUET_THROW_NOT_OK(sink_.Finish(&buffer, true)); - encoded_size_ = 0; return buffer; } diff --git a/cpp/src/parquet/encoding_test.cc b/cpp/src/parquet/encoding_test.cc index 3c20b917f6994..78bf26587e3fb 100644 --- a/cpp/src/parquet/encoding_test.cc +++ b/cpp/src/parquet/encoding_test.cc @@ -577,6 +577,11 @@ TEST(PlainEncodingAdHoc, ArrowBinaryDirectPut) { auto decoder = MakeTypedDecoder(Encoding::PLAIN); ASSERT_NO_THROW(encoder->Put(*values)); + // For Plain encoding, the estimated size should be at least the total byte size + auto& string_array = dynamic_cast(*values); + EXPECT_GE(encoder->EstimatedDataEncodedSize(), string_array.total_values_length()) + << "Estimated size should be at least the total byte size"; + auto buf = encoder->FlushValues(); int num_values = static_cast(values->length() - values->null_count()); @@ -2160,6 +2165,10 @@ TEST(DeltaLengthByteArrayEncodingAdHoc, ArrowBinaryDirectPut) { auto CheckSeed = [&](std::shared_ptr<::arrow::Array> values) { ASSERT_NO_THROW(encoder->Put(*values)); + auto* binary_array = checked_cast(values.get()); + // For DeltaLength encoding, the estimated size should be at least the total byte size + EXPECT_GE(encoder->EstimatedDataEncodedSize(), binary_array->total_values_length()) + << "Estimated size should be at least the total byte size"; auto buf = encoder->FlushValues(); int num_values = static_cast(values->length() - values->null_count()); From 53859262ea988f31ce33a469305251064b5a53b8 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Wed, 8 May 2024 09:52:57 +0800 Subject: [PATCH 067/261] GH-41431: [C++][Parquet][Dataset] Fix repeated scan on encrypted dataset (#41550) ### Rationale for this change When parquet dataset is reused to create multiple scanners, `FileMetaData` objects are cached to avoid parsing them again. However, these caused issues on encrypted files since internal file decryptors were no longer created by cached `FileMetaData` objects. ### What changes are included in this PR? Expose file_decryptor from FileMetaData and set it properly. ### Are these changes tested? Yes, modify the test to reproduce the issue and assure fixed. ### Are there any user-facing changes? No. * GitHub Issue: #41431 Authored-by: Gang Wu Signed-off-by: Gang Wu --- .../dataset/file_parquet_encryption_test.cc | 25 +++--- cpp/src/parquet/file_reader.cc | 83 ++++++++++--------- cpp/src/parquet/metadata.cc | 8 ++ cpp/src/parquet/metadata.h | 2 + 4 files changed, 70 insertions(+), 48 deletions(-) diff --git a/cpp/src/arrow/dataset/file_parquet_encryption_test.cc b/cpp/src/arrow/dataset/file_parquet_encryption_test.cc index 307017fd67e06..0287d593d12d3 100644 --- a/cpp/src/arrow/dataset/file_parquet_encryption_test.cc +++ b/cpp/src/arrow/dataset/file_parquet_encryption_test.cc @@ -148,17 +148,22 @@ class DatasetEncryptionTestBase : public ::testing::Test { FileSystemDatasetFactory::Make(file_system_, selector, file_format, factory_options)); - // Read dataset into table + // Create the dataset ASSERT_OK_AND_ASSIGN(auto dataset, dataset_factory->Finish()); - ASSERT_OK_AND_ASSIGN(auto scanner_builder, dataset->NewScan()); - ASSERT_OK_AND_ASSIGN(auto scanner, scanner_builder->Finish()); - ASSERT_OK_AND_ASSIGN(auto read_table, scanner->ToTable()); - - // Verify the data was read correctly - ASSERT_OK_AND_ASSIGN(auto combined_table, read_table->CombineChunks()); - // Validate the table - ASSERT_OK(combined_table->ValidateFull()); - AssertTablesEqual(*combined_table, *table_); + + // Reuse the dataset above to scan it twice to make sure decryption works correctly. + for (size_t i = 0; i < 2; ++i) { + // Read dataset into table + ASSERT_OK_AND_ASSIGN(auto scanner_builder, dataset->NewScan()); + ASSERT_OK_AND_ASSIGN(auto scanner, scanner_builder->Finish()); + ASSERT_OK_AND_ASSIGN(auto read_table, scanner->ToTable()); + + // Verify the data was read correctly + ASSERT_OK_AND_ASSIGN(auto combined_table, read_table->CombineChunks()); + // Validate the table + ASSERT_OK(combined_table->ValidateFull()); + AssertTablesEqual(*combined_table, *table_); + } } protected: diff --git a/cpp/src/parquet/file_reader.cc b/cpp/src/parquet/file_reader.cc index b3dd1d6054ac8..8fcb0870ce4b6 100644 --- a/cpp/src/parquet/file_reader.cc +++ b/cpp/src/parquet/file_reader.cc @@ -215,16 +215,14 @@ class SerializedRowGroup : public RowGroupReader::Contents { std::shared_ptr<::arrow::io::internal::ReadRangeCache> cached_source, int64_t source_size, FileMetaData* file_metadata, int row_group_number, ReaderProperties props, - std::shared_ptr prebuffered_column_chunks_bitmap, - std::shared_ptr file_decryptor = nullptr) + std::shared_ptr prebuffered_column_chunks_bitmap) : source_(std::move(source)), cached_source_(std::move(cached_source)), source_size_(source_size), file_metadata_(file_metadata), properties_(std::move(props)), row_group_ordinal_(row_group_number), - prebuffered_column_chunks_bitmap_(std::move(prebuffered_column_chunks_bitmap)), - file_decryptor_(std::move(file_decryptor)) { + prebuffered_column_chunks_bitmap_(std::move(prebuffered_column_chunks_bitmap)) { row_group_metadata_ = file_metadata->RowGroup(row_group_number); } @@ -263,10 +261,10 @@ class SerializedRowGroup : public RowGroupReader::Contents { } // The column is encrypted - std::shared_ptr meta_decryptor = - GetColumnMetaDecryptor(crypto_metadata.get(), file_decryptor_.get()); - std::shared_ptr data_decryptor = - GetColumnDataDecryptor(crypto_metadata.get(), file_decryptor_.get()); + std::shared_ptr meta_decryptor = GetColumnMetaDecryptor( + crypto_metadata.get(), file_metadata_->file_decryptor().get()); + std::shared_ptr data_decryptor = GetColumnDataDecryptor( + crypto_metadata.get(), file_metadata_->file_decryptor().get()); ARROW_DCHECK_NE(meta_decryptor, nullptr); ARROW_DCHECK_NE(data_decryptor, nullptr); @@ -291,7 +289,6 @@ class SerializedRowGroup : public RowGroupReader::Contents { ReaderProperties properties_; int row_group_ordinal_; const std::shared_ptr prebuffered_column_chunks_bitmap_; - std::shared_ptr file_decryptor_; }; // ---------------------------------------------------------------------- @@ -316,7 +313,9 @@ class SerializedFile : public ParquetFileReader::Contents { } void Close() override { - if (file_decryptor_) file_decryptor_->WipeOutDecryptionKeys(); + if (file_metadata_ && file_metadata_->file_decryptor()) { + file_metadata_->file_decryptor()->WipeOutDecryptionKeys(); + } } std::shared_ptr GetRowGroup(int i) override { @@ -330,7 +329,7 @@ class SerializedFile : public ParquetFileReader::Contents { std::unique_ptr contents = std::make_unique( source_, cached_source_, source_size_, file_metadata_.get(), i, properties_, - std::move(prebuffered_column_chunks_bitmap), file_decryptor_); + std::move(prebuffered_column_chunks_bitmap)); return std::make_shared(std::move(contents)); } @@ -346,8 +345,9 @@ class SerializedFile : public ParquetFileReader::Contents { "forget to call ParquetFileReader::Open() first?"); } if (!page_index_reader_) { - page_index_reader_ = PageIndexReader::Make(source_.get(), file_metadata_, - properties_, file_decryptor_.get()); + page_index_reader_ = + PageIndexReader::Make(source_.get(), file_metadata_, properties_, + file_metadata_->file_decryptor().get()); } return page_index_reader_; } @@ -362,8 +362,8 @@ class SerializedFile : public ParquetFileReader::Contents { "forget to call ParquetFileReader::Open() first?"); } if (!bloom_filter_reader_) { - bloom_filter_reader_ = - BloomFilterReader::Make(source_, file_metadata_, properties_, file_decryptor_); + bloom_filter_reader_ = BloomFilterReader::Make(source_, file_metadata_, properties_, + file_metadata_->file_decryptor()); if (bloom_filter_reader_ == nullptr) { throw ParquetException("Cannot create BloomFilterReader"); } @@ -441,10 +441,12 @@ class SerializedFile : public ParquetFileReader::Contents { // Parse the footer depending on encryption type const bool is_encrypted_footer = memcmp(footer_buffer->data() + footer_read_size - 4, kParquetEMagic, 4) == 0; + std::shared_ptr file_decryptor; if (is_encrypted_footer) { // Encrypted file with Encrypted footer. const std::pair read_size = - ParseMetaDataOfEncryptedFileWithEncryptedFooter(metadata_buffer, metadata_len); + ParseMetaDataOfEncryptedFileWithEncryptedFooter(metadata_buffer, metadata_len, + &file_decryptor); // Read the actual footer metadata_start = read_size.first; metadata_len = read_size.second; @@ -453,8 +455,8 @@ class SerializedFile : public ParquetFileReader::Contents { // Fall through } - const uint32_t read_metadata_len = - ParseUnencryptedFileMetadata(metadata_buffer, metadata_len); + const uint32_t read_metadata_len = ParseUnencryptedFileMetadata( + metadata_buffer, metadata_len, std::move(file_decryptor)); auto file_decryption_properties = properties_.file_decryption_properties().get(); if (is_encrypted_footer) { // Nothing else to do here. @@ -550,34 +552,37 @@ class SerializedFile : public ParquetFileReader::Contents { // Parse the footer depending on encryption type const bool is_encrypted_footer = memcmp(footer_buffer->data() + footer_read_size - 4, kParquetEMagic, 4) == 0; + std::shared_ptr file_decryptor; if (is_encrypted_footer) { // Encrypted file with Encrypted footer. std::pair read_size; BEGIN_PARQUET_CATCH_EXCEPTIONS - read_size = - ParseMetaDataOfEncryptedFileWithEncryptedFooter(metadata_buffer, metadata_len); + read_size = ParseMetaDataOfEncryptedFileWithEncryptedFooter( + metadata_buffer, metadata_len, &file_decryptor); END_PARQUET_CATCH_EXCEPTIONS // Read the actual footer int64_t metadata_start = read_size.first; metadata_len = read_size.second; return source_->ReadAsync(metadata_start, metadata_len) - .Then([this, metadata_len, is_encrypted_footer]( + .Then([this, metadata_len, is_encrypted_footer, file_decryptor]( const std::shared_ptr<::arrow::Buffer>& metadata_buffer) { // Continue and read the file footer - return ParseMetaDataFinal(metadata_buffer, metadata_len, is_encrypted_footer); + return ParseMetaDataFinal(metadata_buffer, metadata_len, is_encrypted_footer, + file_decryptor); }); } return ParseMetaDataFinal(std::move(metadata_buffer), metadata_len, - is_encrypted_footer); + is_encrypted_footer, std::move(file_decryptor)); } // Continuation - ::arrow::Status ParseMetaDataFinal(std::shared_ptr<::arrow::Buffer> metadata_buffer, - uint32_t metadata_len, - const bool is_encrypted_footer) { + ::arrow::Status ParseMetaDataFinal( + std::shared_ptr<::arrow::Buffer> metadata_buffer, uint32_t metadata_len, + const bool is_encrypted_footer, + std::shared_ptr file_decryptor) { BEGIN_PARQUET_CATCH_EXCEPTIONS - const uint32_t read_metadata_len = - ParseUnencryptedFileMetadata(metadata_buffer, metadata_len); + const uint32_t read_metadata_len = ParseUnencryptedFileMetadata( + metadata_buffer, metadata_len, std::move(file_decryptor)); auto file_decryption_properties = properties_.file_decryption_properties().get(); if (is_encrypted_footer) { // Nothing else to do here. @@ -608,11 +613,11 @@ class SerializedFile : public ParquetFileReader::Contents { // Maps row group ordinal and prebuffer status of its column chunks in the form of a // bitmap buffer. std::unordered_map> prebuffered_column_chunks_; - std::shared_ptr file_decryptor_; // \return The true length of the metadata in bytes - uint32_t ParseUnencryptedFileMetadata(const std::shared_ptr& footer_buffer, - const uint32_t metadata_len); + uint32_t ParseUnencryptedFileMetadata( + const std::shared_ptr& footer_buffer, const uint32_t metadata_len, + std::shared_ptr file_decryptor); std::string HandleAadPrefix(FileDecryptionProperties* file_decryption_properties, EncryptionAlgorithm& algo); @@ -624,11 +629,13 @@ class SerializedFile : public ParquetFileReader::Contents { // \return The position and size of the actual footer std::pair ParseMetaDataOfEncryptedFileWithEncryptedFooter( - const std::shared_ptr& crypto_metadata_buffer, uint32_t footer_len); + const std::shared_ptr& crypto_metadata_buffer, uint32_t footer_len, + std::shared_ptr* file_decryptor); }; uint32_t SerializedFile::ParseUnencryptedFileMetadata( - const std::shared_ptr& metadata_buffer, const uint32_t metadata_len) { + const std::shared_ptr& metadata_buffer, const uint32_t metadata_len, + std::shared_ptr file_decryptor) { if (metadata_buffer->size() != metadata_len) { throw ParquetException("Failed reading metadata buffer (requested " + std::to_string(metadata_len) + " bytes but got " + @@ -637,7 +644,7 @@ uint32_t SerializedFile::ParseUnencryptedFileMetadata( uint32_t read_metadata_len = metadata_len; // The encrypted read path falls through to here, so pass in the decryptor file_metadata_ = FileMetaData::Make(metadata_buffer->data(), &read_metadata_len, - properties_, file_decryptor_); + properties_, std::move(file_decryptor)); return read_metadata_len; } @@ -645,7 +652,7 @@ std::pair SerializedFile::ParseMetaDataOfEncryptedFileWithEncryptedFooter( const std::shared_ptr<::arrow::Buffer>& crypto_metadata_buffer, // both metadata & crypto metadata length - const uint32_t footer_len) { + const uint32_t footer_len, std::shared_ptr* file_decryptor) { // encryption with encrypted footer // Check if the footer_buffer contains the entire metadata if (crypto_metadata_buffer->size() != footer_len) { @@ -664,7 +671,7 @@ SerializedFile::ParseMetaDataOfEncryptedFileWithEncryptedFooter( // Handle AAD prefix EncryptionAlgorithm algo = file_crypto_metadata->encryption_algorithm(); std::string file_aad = HandleAadPrefix(file_decryption_properties, algo); - file_decryptor_ = std::make_shared( + *file_decryptor = std::make_shared( file_decryption_properties, file_aad, algo.algorithm, file_crypto_metadata->key_metadata(), properties_.memory_pool()); @@ -683,12 +690,12 @@ void SerializedFile::ParseMetaDataOfEncryptedFileWithPlaintextFooter( EncryptionAlgorithm algo = file_metadata_->encryption_algorithm(); // Handle AAD prefix std::string file_aad = HandleAadPrefix(file_decryption_properties, algo); - file_decryptor_ = std::make_shared( + auto file_decryptor = std::make_shared( file_decryption_properties, file_aad, algo.algorithm, file_metadata_->footer_signing_key_metadata(), properties_.memory_pool()); // set the InternalFileDecryptor in the metadata as well, as it's used // for signature verification and for ColumnChunkMetaData creation. - file_metadata_->set_file_decryptor(file_decryptor_); + file_metadata_->set_file_decryptor(std::move(file_decryptor)); if (file_decryption_properties->check_plaintext_footer_integrity()) { if (metadata_len - read_metadata_len != diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index 3f101b5ae3ac6..b24883cdc160b 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -826,6 +826,10 @@ class FileMetaData::FileMetaDataImpl { file_decryptor_ = std::move(file_decryptor); } + const std::shared_ptr& file_decryptor() const { + return file_decryptor_; + } + private: friend FileMetaDataBuilder; uint32_t metadata_len_ = 0; @@ -947,6 +951,10 @@ void FileMetaData::set_file_decryptor( impl_->set_file_decryptor(std::move(file_decryptor)); } +const std::shared_ptr& FileMetaData::file_decryptor() const { + return impl_->file_decryptor(); +} + ParquetVersion::type FileMetaData::version() const { switch (impl_->version()) { case 1: diff --git a/cpp/src/parquet/metadata.h b/cpp/src/parquet/metadata.h index 640b898024346..9fc30df58e0d3 100644 --- a/cpp/src/parquet/metadata.h +++ b/cpp/src/parquet/metadata.h @@ -399,12 +399,14 @@ class PARQUET_EXPORT FileMetaData { private: friend FileMetaDataBuilder; friend class SerializedFile; + friend class SerializedRowGroup; explicit FileMetaData(const void* serialized_metadata, uint32_t* metadata_len, const ReaderProperties& properties, std::shared_ptr file_decryptor = NULLPTR); void set_file_decryptor(std::shared_ptr file_decryptor); + const std::shared_ptr& file_decryptor() const; // PIMPL Idiom FileMetaData(); From d83af8f749ee560c0b04d986ba2912e696e1cd68 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Wed, 8 May 2024 12:57:10 +0200 Subject: [PATCH 068/261] GH-38770: [C++][Python] RecordBatch.filter() segfaults if passed a ChunkedArray (#40971) ### Rationale for this change Filtering a record batch with a boolean mask in the form of a `ChunkedArray` results in a segmentation fault. ### What changes are included in this PR? In case chunked array is passed as a mask to filter record batch, the code path for `pa.Table.filter()` is taken resulting in a filtered table. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #38770 Authored-by: AlenkaF Signed-off-by: AlenkaF --- .../vector_selection_filter_internal.cc | 26 ++++++++++++++----- python/pyarrow/tests/test_compute.py | 5 ++++ 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc index d5e5e5ad289ac..8d43c65668d4b 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc @@ -22,6 +22,7 @@ #include #include +#include "arrow/array/concatenate.h" #include "arrow/array/data.h" #include "arrow/buffer_builder.h" #include "arrow/chunked_array.h" @@ -928,12 +929,26 @@ Result> FilterRecordBatch(const RecordBatch& batch, return Status::Invalid("Filter inputs must all be the same length"); } - // Convert filter to selection vector/indices and use Take + // Fetch filter const auto& filter_opts = *static_cast(options); - ARROW_ASSIGN_OR_RAISE( - std::shared_ptr indices, - GetTakeIndices(*filter.array(), filter_opts.null_selection_behavior, - ctx->memory_pool())); + ArrayData filter_array; + switch (filter.kind()) { + case Datum::ARRAY: + filter_array = *filter.array(); + break; + case Datum::CHUNKED_ARRAY: { + ARROW_ASSIGN_OR_RAISE(auto combined, Concatenate(filter.chunked_array()->chunks())); + filter_array = *combined->data(); + break; + } + default: + return Status::TypeError("Filter should be array-like"); + } + + // Convert filter to selection vector/indices and use Take + ARROW_ASSIGN_OR_RAISE(std::shared_ptr indices, + GetTakeIndices(filter_array, filter_opts.null_selection_behavior, + ctx->memory_pool())); std::vector> columns(batch.num_columns()); for (int i = 0; i < batch.num_columns(); ++i) { ARROW_ASSIGN_OR_RAISE(Datum out, Take(batch.column(i)->data(), Datum(indices), @@ -1042,7 +1057,6 @@ class FilterMetaFunction : public MetaFunction { } if (args[0].kind() == Datum::RECORD_BATCH) { - auto values_batch = args[0].record_batch(); ARROW_ASSIGN_OR_RAISE( std::shared_ptr out_batch, FilterRecordBatch(*args[0].record_batch(), args[1], options, ctx)); diff --git a/python/pyarrow/tests/test_compute.py b/python/pyarrow/tests/test_compute.py index 17cc546f834ca..d7dee1ad05e93 100644 --- a/python/pyarrow/tests/test_compute.py +++ b/python/pyarrow/tests/test_compute.py @@ -1345,6 +1345,11 @@ def test_filter_record_batch(): expected = pa.record_batch([pa.array(["a", "e"])], names=["a'"]) assert result.equals(expected) + # GH-38770: mask is chunked array + chunked_mask = pa.chunked_array([[True, False], [None], [False, True]]) + result = batch.filter(chunked_mask) + assert result.equals(expected) + result = batch.filter(mask, null_selection_behavior="emit_null") expected = pa.record_batch([pa.array(["a", None, "e"])], names=["a'"]) assert result.equals(expected) From e21952f969cd9d0906a86898f561088606447359 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Wed, 8 May 2024 13:47:21 +0200 Subject: [PATCH 069/261] GH-40750: [C++][Python] Map child Array constructed from keys and items shouldn't have offset (#40871) ### Rationale for this change When `MapArray` is constructed from `keys` and `items` array the offset of the list offsets is passed down to the struct child array which is not correct. ### What changes are included in this PR? This PR fixes this issue. ### Are these changes tested? Yes. ### Are there any user-facing changes? Shouldn't be. * GitHub Issue: #40750 Authored-by: AlenkaF Signed-off-by: AlenkaF --- cpp/src/arrow/array/array_list_test.cc | 16 +++++++++++++++- cpp/src/arrow/array/array_nested.cc | 2 +- python/pyarrow/tests/test_array.py | 24 ++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/array/array_list_test.cc b/cpp/src/arrow/array/array_list_test.cc index 18afcc90d71f8..e79ce6fe172b2 100644 --- a/cpp/src/arrow/array/array_list_test.cc +++ b/cpp/src/arrow/array/array_list_test.cc @@ -1287,7 +1287,7 @@ TEST_F(TestMapArray, ValidateErrorNullKey) { } TEST_F(TestMapArray, FromArrays) { - std::shared_ptr offsets1, offsets2, offsets3, offsets4, keys, items; + std::shared_ptr offsets1, offsets2, offsets3, offsets4, offsets5, keys, items; std::vector offsets_is_valid3 = {true, false, true, true}; std::vector offsets_is_valid4 = {true, true, false, true}; @@ -1342,6 +1342,20 @@ TEST_F(TestMapArray, FromArrays) { // Zero-length offsets ASSERT_RAISES(Invalid, MapArray::FromArrays(offsets1->Slice(0, 0), keys, items, pool_)); + // Offseted offsets + ASSERT_OK_AND_ASSIGN(auto map5, + MapArray::FromArrays(offsets1->Slice(1), keys, items, pool_)); + ASSERT_OK(map5->Validate()); + + AssertArraysEqual(*expected1.Slice(1), *map5); + + std::vector offset5_values = {2, 2, 6}; + ArrayFromVector(offset5_values, &offsets5); + ASSERT_OK_AND_ASSIGN(auto map6, MapArray::FromArrays(offsets5, keys, items, pool_)); + ASSERT_OK(map6->Validate()); + + AssertArraysEqual(*map5, *map6); + // Offsets not the right type ASSERT_RAISES(TypeError, MapArray::FromArrays(keys, offsets1, items, pool_)); diff --git a/cpp/src/arrow/array/array_nested.cc b/cpp/src/arrow/array/array_nested.cc index 24e0dfb7081ac..1be771d8228d9 100644 --- a/cpp/src/arrow/array/array_nested.cc +++ b/cpp/src/arrow/array/array_nested.cc @@ -790,7 +790,7 @@ MapArray::MapArray(const std::shared_ptr& type, int64_t length, const std::shared_ptr& items, int64_t null_count, int64_t offset) { auto pair_data = ArrayData::Make(type->fields()[0]->type(), keys->data()->length, - {nullptr}, {keys->data(), items->data()}, 0, offset); + {nullptr}, {keys->data(), items->data()}, 0); auto map_data = ArrayData::Make(type, length, std::move(buffers), {pair_data}, null_count, offset); SetData(map_data); diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 3754daeb9b4bd..dbe29c5730758 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -1099,6 +1099,30 @@ def test_map_from_arrays(): with pytest.raises(ValueError): pa.MapArray.from_arrays(offsets, keys_with_null, items) + # Check if offset in offsets > 0 + offsets = pa.array(offsets, pa.int32()) + result = pa.MapArray.from_arrays(offsets.slice(1), keys, items) + expected = pa.MapArray.from_arrays([1, 3, 5], keys, items) + + assert result.equals(expected) + assert result.offset == 1 + assert expected.offset == 0 + + offsets = pa.array([0, 0, 0, 0, 0, 0], pa.int32()) + result = pa.MapArray.from_arrays( + offsets.slice(1), + pa.array([], pa.string()), + pa.array([], pa.string()), + ) + expected = pa.MapArray.from_arrays( + [0, 0, 0, 0, 0], + pa.array([], pa.string()), + pa.array([], pa.string()), + ) + assert result.equals(expected) + assert result.offset == 1 + assert expected.offset == 0 + def test_fixed_size_list_from_arrays(): values = pa.array(range(12), pa.int64()) From f462ec7e6b85aef3d84b777bc577441f4e10b214 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 8 May 2024 10:54:12 -0400 Subject: [PATCH 070/261] MINOR: [Go] Bump golang.org/x/sys from 0.19.0 to 0.20.0 in /go (#41554) Bumps [golang.org/x/sys](https://github.com/golang/sys) from 0.19.0 to 0.20.0.
Commits
  • 7d69d98 unix: extend support for z/OS
  • 7758090 cpu: add support for sve2 detection
  • 9a28524 windows: drop go version tags for unsupported versions
  • 27dc90b unix: update to Linux kernel 6.4
  • See full diff in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=golang.org/x/sys&package-manager=go_modules&previous-version=0.19.0&new-version=0.20.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Matt Topol --- go/go.mod | 2 +- go/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go/go.mod b/go/go.mod index 35fd9b9915c0b..188e5c6180ff1 100644 --- a/go/go.mod +++ b/go/go.mod @@ -36,7 +36,7 @@ require ( github.com/zeebo/xxh3 v1.0.2 golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 golang.org/x/sync v0.7.0 - golang.org/x/sys v0.19.0 + golang.org/x/sys v0.20.0 golang.org/x/tools v0.20.0 golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 gonum.org/v1/gonum v0.15.0 diff --git a/go/go.sum b/go/go.sum index bf33fed6c4c97..998b3cd8bbcc5 100644 --- a/go/go.sum +++ b/go/go.sum @@ -124,8 +124,8 @@ golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.19.0 h1:q5f1RH2jigJ1MoAWp2KTp3gm5zAGFUTarQZ5U386+4o= -golang.org/x/sys v0.19.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= golang.org/x/tools v0.20.0 h1:hz/CVckiOxybQvFw6h7b/q80NTr9IUQb4s1IIzW7KNY= From f6720276543844ad53dece91a9350b0a821e52d3 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 8 May 2024 10:54:32 -0400 Subject: [PATCH 071/261] MINOR: [Go] Bump google.golang.org/protobuf from 1.34.0 to 1.34.1 in /go (#41553) Bumps google.golang.org/protobuf from 1.34.0 to 1.34.1. [![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=google.golang.org/protobuf&package-manager=go_modules&previous-version=1.34.0&new-version=1.34.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Matt Topol --- go/go.mod | 2 +- go/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go/go.mod b/go/go.mod index 188e5c6180ff1..7c14ddcf9e216 100644 --- a/go/go.mod +++ b/go/go.mod @@ -41,7 +41,7 @@ require ( golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 gonum.org/v1/gonum v0.15.0 google.golang.org/grpc v1.63.2 - google.golang.org/protobuf v1.34.0 + google.golang.org/protobuf v1.34.1 modernc.org/sqlite v1.29.6 ) diff --git a/go/go.sum b/go/go.sum index 998b3cd8bbcc5..70e3a533d03f3 100644 --- a/go/go.sum +++ b/go/go.sum @@ -138,8 +138,8 @@ google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de h1: google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de/go.mod h1:H4O17MA/PE9BsGx3w+a+W2VOLLD1Qf7oJneAoU6WktY= google.golang.org/grpc v1.63.2 h1:MUeiw1B2maTVZthpU5xvASfTh3LDbxHd6IJ6QQVU+xM= google.golang.org/grpc v1.63.2/go.mod h1:WAX/8DgncnokcFUldAxq7GeB5DXHDbMF+lLvDomNkRA= -google.golang.org/protobuf v1.34.0 h1:Qo/qEd2RZPCf2nKuorzksSknv0d3ERwp1vFG38gSmH4= -google.golang.org/protobuf v1.34.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg= +google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= From 304650145689291eb87db5dd58f7b9776bdfaacf Mon Sep 17 00:00:00 2001 From: Sarah Gilmore <74676073+sgilmore10@users.noreply.github.com> Date: Wed, 8 May 2024 11:42:41 -0400 Subject: [PATCH 072/261] GH-41435: [CI][MATLAB] Add job to build and test MATLAB Interface on `macos-14` (#41592) ### Rationale for this change Currently, the MATLAB interface is built and tested on `macos-12` - not `macos-14` - because the version of `mathworks/libmexclass` depends on used to not support `macos-14`. However, now that https://github.com/apache/arrow/issues/41400 is closed, the version of `mathworks/libmexclass` the MATLAB interface depends on works on `macos-14`, so we will be able to build and test the MATLAB interface on `macos-14`. **Note**: When adding support for ARM-based macOS builds, we discovered an issue with the way in which we package the MLTBX files for the MATLAB Interface to Arrow. Currently, we bundle all shared libraries for all platforms (.dll, .dylib, and .so) into one large "monolithic" MLTBX file. Unfortunately, putting all platform-specific files into one MLTBX file poses an issue when we support multiple ISAs (e.g. x86 and ARM) because builds for the same operating system with different ISAs will have the same shared library file names. In other words, we will have a library named libarrowproxy.dylib for both ARM and x86 macOS builds. Therefore, we are going to hold off on adding ARM-based macOS builds to the crossbow packaging workflow for now until we have a chance to properly explore alternative packaging approaches. For example, we may want to consider having platform-specific MLTBX files. However, we still think it is worthwhile to add CI support for `macos-14` in the meantime. ### What changes are included in this PR? 1. Added workflow to build and test the MATLAB interface on `macos-14` as well as `macos-12`. ### Are these changes tested? N/A. ### Are there any user-facing changes? No. ### Future Directions 1. Add crossbow packaging workflow on `macos-14` once we determine how to package the interface for both ARM-based and Intel-based mac ISAs. * GitHub Issue: #41435 Authored-by: Sarah Gilmore Signed-off-by: Sarah Gilmore --- .github/workflows/matlab.yml | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/matlab.yml b/.github/workflows/matlab.yml index 2ae33d1e8d6c6..ca8280927f4a5 100644 --- a/.github/workflows/matlab.yml +++ b/.github/workflows/matlab.yml @@ -98,9 +98,16 @@ jobs: select-by-folder: matlab/test strict: true macos: - name: AMD64 macOS 12 MATLAB - runs-on: macos-12 + name: ${{ matrix.architecture }} macOS ${{ matrix.macos-version }} MATLAB + runs-on: macos-${{ matrix.macos-version }} if: ${{ !contains(github.event.pull_request.title, 'WIP') }} + strategy: + matrix: + include: + - architecture: AMD64 + macos-version: "12" + - architecture: ARM64 + macos-version: "14" steps: - name: Check out repository uses: actions/checkout@v4 From 5252c6ce13694fa31dbcb2623d1629cd8fe53a47 Mon Sep 17 00:00:00 2001 From: Alex Shcherbakov Date: Wed, 8 May 2024 22:46:45 +0300 Subject: [PATCH 073/261] GH-41594: [Go] Support reading `date64` type & properly validate list-like types (#41595) This PR includes 2 fixes: 1. support reading `date64` columns (as write is supported) 2. properly validate list-like data types (list of unsupported is unsupported) ### Rationale for this change See #41594 ### What changes are included in this PR? 1. Added `date64` reading & conversion funcs similar to `date32` 2. Refactored date type validation ### Are these changes tested? a55cd5324d2c47932410b0c7a9c46075386645d2 ### Are there any user-facing changes? No. * GitHub Issue: #41594 Authored-by: candiduslynx Signed-off-by: Matt Topol --- go/arrow/csv/common.go | 40 ++++++++++------- go/arrow/csv/reader.go | 74 +++++++++++++------------------- go/arrow/csv/reader_test.go | 8 ++++ go/arrow/csv/testdata/header.csv | 8 ++-- go/arrow/csv/testdata/types.csv | 8 ++-- go/arrow/csv/transformer.go | 69 +++++++---------------------- 6 files changed, 86 insertions(+), 121 deletions(-) diff --git a/go/arrow/csv/common.go b/go/arrow/csv/common.go index 4455c8b782167..06fed69a77fe5 100644 --- a/go/arrow/csv/common.go +++ b/go/arrow/csv/common.go @@ -239,21 +239,31 @@ func WithStringsReplacer(replacer *strings.Replacer) Option { func validate(schema *arrow.Schema) { for i, f := range schema.Fields() { - switch ft := f.Type.(type) { - case *arrow.BooleanType: - case *arrow.Int8Type, *arrow.Int16Type, *arrow.Int32Type, *arrow.Int64Type: - case *arrow.Uint8Type, *arrow.Uint16Type, *arrow.Uint32Type, *arrow.Uint64Type: - case *arrow.Float16Type, *arrow.Float32Type, *arrow.Float64Type: - case *arrow.StringType, *arrow.LargeStringType: - case *arrow.TimestampType: - case *arrow.Date32Type, *arrow.Date64Type: - case *arrow.Decimal128Type, *arrow.Decimal256Type: - case *arrow.ListType, *arrow.LargeListType, *arrow.FixedSizeListType: - case *arrow.BinaryType, *arrow.LargeBinaryType, *arrow.FixedSizeBinaryType: - case arrow.ExtensionType: - case *arrow.NullType: - default: - panic(fmt.Errorf("arrow/csv: field %d (%s) has invalid data type %T", i, f.Name, ft)) + if !typeSupported(f.Type) { + panic(fmt.Errorf("arrow/csv: field %d (%s) has invalid data type %T", i, f.Name, f.Type)) } } } + +func typeSupported(dt arrow.DataType) bool { + switch dt := dt.(type) { + case *arrow.BooleanType: + case *arrow.Int8Type, *arrow.Int16Type, *arrow.Int32Type, *arrow.Int64Type: + case *arrow.Uint8Type, *arrow.Uint16Type, *arrow.Uint32Type, *arrow.Uint64Type: + case *arrow.Float16Type, *arrow.Float32Type, *arrow.Float64Type: + case *arrow.StringType, *arrow.LargeStringType: + case *arrow.TimestampType: + case *arrow.Date32Type, *arrow.Date64Type: + case *arrow.Decimal128Type, *arrow.Decimal256Type: + case *arrow.MapType: + return false + case arrow.ListLikeType: + return typeSupported(dt.Elem()) + case *arrow.BinaryType, *arrow.LargeBinaryType, *arrow.FixedSizeBinaryType: + case arrow.ExtensionType: + case *arrow.NullType: + default: + return false + } + return true +} diff --git a/go/arrow/csv/reader.go b/go/arrow/csv/reader.go index 18f1083e6a9dc..46591a9a5adee 100644 --- a/go/arrow/csv/reader.go +++ b/go/arrow/csv/reader.go @@ -474,6 +474,10 @@ func (r *Reader) initFieldConverter(bldr array.Builder) func(string) { return func(str string) { r.parseDate32(bldr, str) } + case *arrow.Date64Type: + return func(str string) { + r.parseDate64(bldr, str) + } case *arrow.Time32Type: return func(str string) { r.parseTime32(bldr, str, dt.Unit) @@ -486,17 +490,13 @@ func (r *Reader) initFieldConverter(bldr array.Builder) func(string) { return func(str string) { r.parseDecimal256(bldr, str, dt.Precision, dt.Scale) } - case *arrow.ListType: - return func(s string) { - r.parseList(bldr, s) - } - case *arrow.LargeListType: + case *arrow.FixedSizeListType: return func(s string) { - r.parseLargeList(bldr, s) + r.parseFixedSizeList(bldr.(*array.FixedSizeListBuilder), s, int(dt.Len())) } - case *arrow.FixedSizeListType: + case arrow.ListLikeType: return func(s string) { - r.parseFixedSizeList(bldr, s, int(dt.Len())) + r.parseListLike(bldr.(array.ListLikeBuilder), s) } case *arrow.BinaryType: return func(s string) { @@ -740,81 +740,67 @@ func (r *Reader) parseDate32(field array.Builder, str string) { field.(*array.Date32Builder).Append(arrow.Date32FromTime(tm)) } -func (r *Reader) parseTime32(field array.Builder, str string, unit arrow.TimeUnit) { +func (r *Reader) parseDate64(field array.Builder, str string) { if r.isNull(str) { field.AppendNull() return } - val, err := arrow.Time32FromString(str, unit) + tm, err := time.Parse("2006-01-02", str) if err != nil && r.err == nil { r.err = err field.AppendNull() return } - field.(*array.Time32Builder).Append(val) + field.(*array.Date64Builder).Append(arrow.Date64FromTime(tm)) } -func (r *Reader) parseDecimal128(field array.Builder, str string, prec, scale int32) { +func (r *Reader) parseTime32(field array.Builder, str string, unit arrow.TimeUnit) { if r.isNull(str) { field.AppendNull() return } - val, err := decimal128.FromString(str, prec, scale) + val, err := arrow.Time32FromString(str, unit) if err != nil && r.err == nil { r.err = err field.AppendNull() return } - field.(*array.Decimal128Builder).Append(val) + field.(*array.Time32Builder).Append(val) } -func (r *Reader) parseDecimal256(field array.Builder, str string, prec, scale int32) { +func (r *Reader) parseDecimal128(field array.Builder, str string, prec, scale int32) { if r.isNull(str) { field.AppendNull() return } - val, err := decimal256.FromString(str, prec, scale) + val, err := decimal128.FromString(str, prec, scale) if err != nil && r.err == nil { r.err = err field.AppendNull() return } - field.(*array.Decimal256Builder).Append(val) + field.(*array.Decimal128Builder).Append(val) } -func (r *Reader) parseList(field array.Builder, str string) { +func (r *Reader) parseDecimal256(field array.Builder, str string, prec, scale int32) { if r.isNull(str) { field.AppendNull() return } - if !(strings.HasPrefix(str, "{") && strings.HasSuffix(str, "}")) { - r.err = errors.New("invalid list format. should start with '{' and end with '}'") - return - } - str = strings.Trim(str, "{}") - listBldr := field.(*array.ListBuilder) - listBldr.Append(true) - if len(str) == 0 { - // we don't want to create the csv reader if we already know the - // string is empty - return - } - valueBldr := listBldr.ValueBuilder() - reader := csv.NewReader(strings.NewReader(str)) - items, err := reader.Read() - if err != nil { + + val, err := decimal256.FromString(str, prec, scale) + if err != nil && r.err == nil { r.err = err + field.AppendNull() return } - for _, str := range items { - r.initFieldConverter(valueBldr)(str) - } + field.(*array.Decimal256Builder).Append(val) } -func (r *Reader) parseLargeList(field array.Builder, str string) { +func (r *Reader) parseListLike(field array.ListLikeBuilder, str string) { if r.isNull(str) { field.AppendNull() return @@ -824,14 +810,13 @@ func (r *Reader) parseLargeList(field array.Builder, str string) { return } str = strings.Trim(str, "{}") - largeListBldr := field.(*array.LargeListBuilder) - largeListBldr.Append(true) + field.Append(true) if len(str) == 0 { // we don't want to create the csv reader if we already know the // string is empty return } - valueBldr := largeListBldr.ValueBuilder() + valueBldr := field.ValueBuilder() reader := csv.NewReader(strings.NewReader(str)) items, err := reader.Read() if err != nil { @@ -843,7 +828,7 @@ func (r *Reader) parseLargeList(field array.Builder, str string) { } } -func (r *Reader) parseFixedSizeList(field array.Builder, str string, n int) { +func (r *Reader) parseFixedSizeList(field *array.FixedSizeListBuilder, str string, n int) { if r.isNull(str) { field.AppendNull() return @@ -853,14 +838,13 @@ func (r *Reader) parseFixedSizeList(field array.Builder, str string, n int) { return } str = strings.Trim(str, "{}") - fixedSizeListBldr := field.(*array.FixedSizeListBuilder) - fixedSizeListBldr.Append(true) + field.Append(true) if len(str) == 0 { // we don't want to create the csv reader if we already know the // string is empty return } - valueBldr := fixedSizeListBldr.ValueBuilder() + valueBldr := field.ValueBuilder() reader := csv.NewReader(strings.NewReader(str)) items, err := reader.Read() if err != nil { diff --git a/go/arrow/csv/reader_test.go b/go/arrow/csv/reader_test.go index b6654dd1984ea..65453db015a7e 100644 --- a/go/arrow/csv/reader_test.go +++ b/go/arrow/csv/reader_test.go @@ -357,6 +357,8 @@ func testCSVReader(t *testing.T, filepath string, withHeader bool, stringsCanBeN {Name: "large_binary", Type: arrow.BinaryTypes.LargeBinary}, {Name: "fixed_size_binary", Type: &arrow.FixedSizeBinaryType{ByteWidth: 3}}, {Name: "uuid", Type: types.NewUUIDType()}, + {Name: "date32", Type: arrow.PrimitiveTypes.Date32}, + {Name: "date64", Type: arrow.PrimitiveTypes.Date64}, }, nil, ) @@ -420,6 +422,8 @@ rec[0]["binary"]: ["\x00\x01\x02"] rec[0]["large_binary"]: ["\x00\x01\x02"] rec[0]["fixed_size_binary"]: ["\x00\x01\x02"] rec[0]["uuid"]: ["00000000-0000-0000-0000-000000000001"] +rec[0]["date32"]: [19121] +rec[0]["date64"]: [1652054400000] rec[1]["bool"]: [false] rec[1]["i8"]: [-2] rec[1]["i16"]: [-2] @@ -442,6 +446,8 @@ rec[1]["binary"]: [(null)] rec[1]["large_binary"]: [(null)] rec[1]["fixed_size_binary"]: [(null)] rec[1]["uuid"]: ["00000000-0000-0000-0000-000000000002"] +rec[1]["date32"]: [19121] +rec[1]["date64"]: [1652054400000] rec[2]["bool"]: [(null)] rec[2]["i8"]: [(null)] rec[2]["i16"]: [(null)] @@ -464,6 +470,8 @@ rec[2]["binary"]: [(null)] rec[2]["large_binary"]: [(null)] rec[2]["fixed_size_binary"]: [(null)] rec[2]["uuid"]: [(null)] +rec[2]["date32"]: [(null)] +rec[2]["date64"]: [(null)] `, str1Value, str1Value, str2Value, str2Value) got, want := out.String(), want require.Equal(t, want, got) diff --git a/go/arrow/csv/testdata/header.csv b/go/arrow/csv/testdata/header.csv index 50be4f5e4daca..68ae18a499dee 100644 --- a/go/arrow/csv/testdata/header.csv +++ b/go/arrow/csv/testdata/header.csv @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. # -bool;i8;i16;i32;i64;u8;u16;u32;u64;f16;f32;f64;str;large_str;ts;list(i64);large_list(i64);fixed_size_list(i64);binary;large_binary;fixed_size_binary;uuid -true;-1;-1;-1;-1;1;1;1;1;1.1;1.1;1.1;str-1;str-1;2022-05-09T00:01:01;{1,2,3};{1,2,3};{1,2,3};AAEC;AAEC;AAEC;00000000-0000-0000-0000-000000000001 -false;-2;-2;-2;-2;2;2;2;2;2.2;2.2;2.2;;;2022-05-09T23:59:59;{};{};{4,5,6};;;;00000000-0000-0000-0000-000000000002 -null;NULL;null;N/A;;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null \ No newline at end of file +bool;i8;i16;i32;i64;u8;u16;u32;u64;f16;f32;f64;str;large_str;ts;list(i64);large_list(i64);fixed_size_list(i64);binary;large_binary;fixed_size_binary;uuid;date32;date64 +true;-1;-1;-1;-1;1;1;1;1;1.1;1.1;1.1;str-1;str-1;2022-05-09T00:01:01;{1,2,3};{1,2,3};{1,2,3};AAEC;AAEC;AAEC;00000000-0000-0000-0000-000000000001;2022-05-09;2022-05-09 +false;-2;-2;-2;-2;2;2;2;2;2.2;2.2;2.2;;;2022-05-09T23:59:59;{};{};{4,5,6};;;;00000000-0000-0000-0000-000000000002;2022-05-09;2022-05-09 +null;NULL;null;N/A;;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null \ No newline at end of file diff --git a/go/arrow/csv/testdata/types.csv b/go/arrow/csv/testdata/types.csv index d32941f4b214d..91c0cf3b252b3 100644 --- a/go/arrow/csv/testdata/types.csv +++ b/go/arrow/csv/testdata/types.csv @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. # -## supported types: bool;int8;int16;int32;int64;uint8;uint16;uint32;uint64;float16;float32;float64;string;large_string;timestamp;list(i64);large_list(i64);fixed_size_list(i64);binary;large_binary;fixed_size_binary;uuid -true;-1;-1;-1;-1;1;1;1;1;1.1;1.1;1.1;str-1;str-1;2022-05-09T00:01:01;{1,2,3};{1,2,3};{1,2,3};AAEC;AAEC;AAEC;00000000-0000-0000-0000-000000000001 -false;-2;-2;-2;-2;2;2;2;2;2.2;2.2;2.2;;;2022-05-09T23:59:59;{};{};{4,5,6};;;;00000000-0000-0000-0000-000000000002 -null;NULL;null;N/A;;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null \ No newline at end of file +## supported types: bool;int8;int16;int32;int64;uint8;uint16;uint32;uint64;float16;float32;float64;string;large_string;timestamp;list(i64);large_list(i64);fixed_size_list(i64);binary;large_binary;fixed_size_binary;uuid;date32;date64 +true;-1;-1;-1;-1;1;1;1;1;1.1;1.1;1.1;str-1;str-1;2022-05-09T00:01:01;{1,2,3};{1,2,3};{1,2,3};AAEC;AAEC;AAEC;00000000-0000-0000-0000-000000000001;2022-05-09;2022-05-09 +false;-2;-2;-2;-2;2;2;2;2;2.2;2.2;2.2;;;2022-05-09T23:59:59;{};{};{4,5,6};;;;00000000-0000-0000-0000-000000000002;2022-05-09;2022-05-09 +null;NULL;null;N/A;;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null;null \ No newline at end of file diff --git a/go/arrow/csv/transformer.go b/go/arrow/csv/transformer.go index 90c26ac981078..237437c0441e1 100644 --- a/go/arrow/csv/transformer.go +++ b/go/arrow/csv/transformer.go @@ -29,7 +29,7 @@ import ( "github.com/apache/arrow/go/v17/arrow/array" ) -func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array, stringsReplacer func(string)string) []string { +func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array, stringsReplacer func(string) string) []string { res := make([]string, col.Len()) switch typ.(type) { case *arrow.BooleanType: @@ -215,62 +215,25 @@ func (w *Writer) transformColToStringArr(typ arrow.DataType, col arrow.Array, st res[i] = w.nullValue } } - case *arrow.ListType: - arr := col.(*array.List) - listVals, offsets := arr.ListValues(), arr.Offsets() - for i := 0; i < arr.Len(); i++ { - if arr.IsValid(i) { - list := array.NewSlice(listVals, int64(offsets[i]), int64(offsets[i+1])) - var b bytes.Buffer - b.Write([]byte{'{'}) - writer := csv.NewWriter(&b) - writer.Write(w.transformColToStringArr(list.DataType(), list, stringsReplacer)) - writer.Flush() - b.Truncate(b.Len() - 1) - b.Write([]byte{'}'}) - res[i] = b.String() - list.Release() - } else { - res[i] = w.nullValue - } - } - case *arrow.LargeListType: - arr := col.(*array.LargeList) - listVals, offsets := arr.ListValues(), arr.Offsets() - for i := 0; i < arr.Len(); i++ { - if arr.IsValid(i) { - list := array.NewSlice(listVals, int64(offsets[i]), int64(offsets[i+1])) - var b bytes.Buffer - b.Write([]byte{'{'}) - writer := csv.NewWriter(&b) - writer.Write(w.transformColToStringArr(list.DataType(), list, stringsReplacer)) - writer.Flush() - b.Truncate(b.Len() - 1) - b.Write([]byte{'}'}) - res[i] = b.String() - list.Release() - } else { - res[i] = w.nullValue - } - } - case *arrow.FixedSizeListType: - arr := col.(*array.FixedSizeList) + case arrow.ListLikeType: + arr := col.(array.ListLike) listVals := arr.ListValues() for i := 0; i < arr.Len(); i++ { - if arr.IsValid(i) { - list := array.NewSlice(listVals, int64((arr.Len()-1)*i), int64((arr.Len()-1)*(i+1))) - var b bytes.Buffer - b.Write([]byte{'{'}) - writer := csv.NewWriter(&b) - writer.Write(w.transformColToStringArr(list.DataType(), list, stringsReplacer)) - writer.Flush() - b.Truncate(b.Len() - 1) - b.Write([]byte{'}'}) - res[i] = b.String() - list.Release() - } else { + if arr.IsNull(i) { res[i] = w.nullValue + continue } + start, end := arr.ValueOffsets(i) + list := array.NewSlice(listVals, start, end) + var b bytes.Buffer + b.Write([]byte{'{'}) + writer := csv.NewWriter(&b) + writer.Write(w.transformColToStringArr(list.DataType(), list, stringsReplacer)) + writer.Flush() + b.Truncate(b.Len() - 1) + b.Write([]byte{'}'}) + res[i] = b.String() + list.Release() } case *arrow.BinaryType: arr := col.(*array.Binary) From 318d22adda3b66bd4a10fddc7789c8a13e4aa540 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Wed, 8 May 2024 22:05:23 +0200 Subject: [PATCH 074/261] MINOR: [Dev] Remove Dane from collaborators list (#41589) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change Dane is a new committer. ### What changes are included in this PR? Remove the collaborator list. ### Are these changes tested? Not required. ### Are there any user-facing changes? No Authored-by: Raúl Cumplido Signed-off-by: Dane Pitkin --- .asf.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/.asf.yaml b/.asf.yaml index 1eb019fea9af1..a1c6434587703 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -21,7 +21,6 @@ github: collaborators: - anjakefala - benibus - - danepitkin - davisusanibar - jbonofre - js8544 From 46e78160933d039991cedfabb9216dc4c861fb4b Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 9 May 2024 06:29:46 +0900 Subject: [PATCH 075/261] GH-41430: [Docs] Use sphinxcontrib-mermaid instead of generating images from .mmd (#41455) ### Rationale for this change This is for easy to maintain. ### What changes are included in this PR? * Install sphinxcontrib-mermaid * Install Chromium to generate SVG from .mmd * Use Debian instead of Ubuntu for building docs because Ubuntu provides Chromium only via snap * Use a normal user not root to build documents because Mermaid require additional `--no-sandbox` argument when we use root ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #41430 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- .github/workflows/docs.yml | 13 ++-- .github/workflows/docs_light.yml | 2 +- ci/conda_env_sphinx.txt | 1 + ci/docker/linux-apt-docs.dockerfile | 60 ++++++++++++------- ci/scripts/cpp_build.sh | 13 ++-- ci/scripts/integration_arrow.sh | 2 + ci/scripts/java_build.sh | 13 +++- ci/scripts/java_cdata_integration.sh | 4 +- ci/scripts/js_build.sh | 19 ++++-- ci/scripts/js_test.sh | 3 +- ci/scripts/python_build.sh | 33 ++++++++-- ci/scripts/r_build.sh | 20 ++++++- dev/archery/archery/docker/core.py | 4 ++ .../archery/integration/tester_java.py | 16 +++-- dev/archery/archery/integration/tester_js.py | 8 ++- dev/tasks/tasks.yml | 12 ++-- docker-compose.yml | 39 +++++++----- docs/requirements.txt | 3 +- docs/source/conf.py | 8 ++- docs/source/format/Flight.rst | 20 +++---- docs/source/format/Flight/DoExchange.mmd | 3 - docs/source/format/Flight/DoExchange.mmd.svg | 1 - docs/source/format/Flight/DoGet.mmd | 3 - docs/source/format/Flight/DoGet.mmd.svg | 1 - docs/source/format/Flight/DoPut.mmd | 3 - docs/source/format/Flight/DoPut.mmd.svg | 1 - docs/source/format/Flight/PollFlightInfo.mmd | 3 - .../format/Flight/PollFlightInfo.mmd.svg | 1 - docs/source/format/FlightSql.rst | 20 +++---- .../format/FlightSql/CommandGetTables.mmd | 3 - .../format/FlightSql/CommandGetTables.mmd.svg | 1 - .../CommandPreparedStatementQuery.mmd | 3 - .../CommandPreparedStatementQuery.mmd.svg | 1 - .../FlightSql/CommandStatementIngest.mmd | 3 - .../FlightSql/CommandStatementIngest.mmd.svg | 1 - .../FlightSql/CommandStatementQuery.mmd | 3 - .../FlightSql/CommandStatementQuery.mmd.svg | 1 - 37 files changed, 210 insertions(+), 135 deletions(-) delete mode 100644 docs/source/format/Flight/DoExchange.mmd.svg delete mode 100644 docs/source/format/Flight/DoGet.mmd.svg delete mode 100644 docs/source/format/Flight/DoPut.mmd.svg delete mode 100644 docs/source/format/Flight/PollFlightInfo.mmd.svg delete mode 100644 docs/source/format/FlightSql/CommandGetTables.mmd.svg delete mode 100644 docs/source/format/FlightSql/CommandPreparedStatementQuery.mmd.svg delete mode 100644 docs/source/format/FlightSql/CommandStatementIngest.mmd.svg delete mode 100644 docs/source/format/FlightSql/CommandStatementQuery.mmd.svg diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index fe49e275d908d..36a0dc014db8d 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -32,12 +32,12 @@ env: jobs: complete: - name: AMD64 Ubuntu 22.04 Complete Documentation + name: AMD64 Debian 12 Complete Documentation runs-on: ubuntu-latest if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 150 env: - UBUNTU: "22.04" + JDK: 17 steps: - name: Checkout Arrow uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0 @@ -50,8 +50,8 @@ jobs: uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 with: path: .docker - key: ubuntu-docs-${{ hashFiles('cpp/**') }} - restore-keys: ubuntu-docs- + key: debian-docs-${{ hashFiles('cpp/**') }} + restore-keys: debian-docs- - name: Setup Python uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5.1.0 with: @@ -62,7 +62,8 @@ jobs: env: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} - run: archery docker run ubuntu-docs + JDK: 17 + run: archery docker run debian-docs - name: Docker Push if: >- success() && @@ -73,4 +74,4 @@ jobs: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} continue-on-error: true - run: archery docker push ubuntu-docs + run: archery docker push debian-docs diff --git a/.github/workflows/docs_light.yml b/.github/workflows/docs_light.yml index 376c87651d2d0..947e2ac21b83c 100644 --- a/.github/workflows/docs_light.yml +++ b/.github/workflows/docs_light.yml @@ -31,7 +31,7 @@ on: permissions: contents: read - + env: ARCHERY_DEBUG: 1 ARCHERY_USE_DOCKER_CLI: 1 diff --git a/ci/conda_env_sphinx.txt b/ci/conda_env_sphinx.txt index 83afa69a653a9..4665a32e24bbe 100644 --- a/ci/conda_env_sphinx.txt +++ b/ci/conda_env_sphinx.txt @@ -28,6 +28,7 @@ sphinx-design sphinx-copybutton sphinx-lint sphinxcontrib-jquery +sphinxcontrib-mermaid sphinx==6.2 # Requirement for doctest-cython # Needs upper pin of 0.3.0, see: diff --git a/ci/docker/linux-apt-docs.dockerfile b/ci/docker/linux-apt-docs.dockerfile index ec424b4e6eaa0..1c916840e071b 100644 --- a/ci/docker/linux-apt-docs.dockerfile +++ b/ci/docker/linux-apt-docs.dockerfile @@ -21,18 +21,34 @@ FROM ${base} ARG r=4.4 ARG jdk=8 -# See R install instructions at https://cloud.r-project.org/bin/linux/ubuntu/ +ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium + +# See R install instructions at https://cloud.r-project.org/bin/linux/ RUN apt-get update -y && \ apt-get install -y \ - dirmngr \ apt-transport-https \ - software-properties-common && \ - wget -qO- https://cloud.r-project.org/bin/linux/ubuntu/marutter_pubkey.asc | \ - tee -a /etc/apt/trusted.gpg.d/cran_ubuntu_key.asc && \ - add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu '$(lsb_release -cs)'-cran40/' && \ + dirmngr \ + gpg \ + lsb-release && \ + gpg --keyserver keyserver.ubuntu.com \ + --recv-key 95C0FAF38DB3CCAD0C080A7BDC78B2DDEABC47B7 && \ + gpg --export 95C0FAF38DB3CCAD0C080A7BDC78B2DDEABC47B7 | \ + gpg --no-default-keyring \ + --keyring /usr/share/keyrings/cran.gpg \ + --import - && \ + echo "deb [signed-by=/usr/share/keyrings/cran.gpg] https://cloud.r-project.org/bin/linux/$(lsb_release -is | tr 'A-Z' 'a-z') $(lsb_release -cs)-cran40/" | \ + tee /etc/apt/sources.list.d/cran.list && \ + if [ -f /etc/apt/sources.list.d/debian.sources ]; then \ + sed -i \ + -e 's/main$/main contrib non-free non-free-firmware/g' \ + /etc/apt/sources.list.d/debian.sources; \ + fi && \ + apt-get update -y && \ apt-get install -y --no-install-recommends \ autoconf-archive \ automake \ + chromium \ + chromium-sandbox \ curl \ doxygen \ gi-docgen \ @@ -48,6 +64,8 @@ RUN apt-get update -y && \ libxml2-dev \ meson \ ninja-build \ + nodejs \ + npm \ nvidia-cuda-toolkit \ openjdk-${jdk}-jdk-headless \ pandoc \ @@ -55,9 +73,12 @@ RUN apt-get update -y && \ r-base=${r}* \ rsync \ ruby-dev \ + sudo \ wget && \ apt-get clean && \ - rm -rf /var/lib/apt/lists/* + rm -rf /var/lib/apt/lists/* && \ + PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true \ + npm install -g yarn @mermaid-js/mermaid-cli ENV JAVA_HOME=/usr/lib/jvm/java-${jdk}-openjdk-amd64 @@ -68,20 +89,6 @@ RUN /arrow/ci/scripts/util_download_apache.sh \ ENV PATH=/opt/apache-maven-${maven}/bin:$PATH RUN mvn -version -ARG node=16 -RUN apt-get purge -y npm && \ - apt-get autoremove -y --purge && \ - wget -q -O - https://deb.nodesource.com/setup_${node}.x | bash - && \ - apt-get install -y nodejs && \ - apt-get clean && \ - rm -rf /var/lib/apt/lists/* && \ - npm install -g yarn - -COPY docs/requirements.txt /arrow/docs/ -RUN python3 -m venv ${ARROW_PYTHON_VENV} && \ - . ${ARROW_PYTHON_VENV}/bin/activate && \ - pip install -r arrow/docs/requirements.txt - COPY c_glib/Gemfile /arrow/c_glib/ RUN gem install --no-document bundler && \ bundle install --gemfile /arrow/c_glib/Gemfile @@ -98,6 +105,17 @@ COPY r/DESCRIPTION /arrow/r/ RUN /arrow/ci/scripts/r_deps.sh /arrow && \ R -e "install.packages('pkgdown')" +RUN useradd --user-group --create-home --groups audio,video arrow +RUN echo "arrow ALL=(ALL:ALL) NOPASSWD:ALL" | \ + EDITOR=tee visudo -f /etc/sudoers.d/arrow +USER arrow + +COPY docs/requirements.txt /arrow/docs/ +RUN sudo chown -R arrow: ${ARROW_PYTHON_VENV} && \ + python3 -m venv ${ARROW_PYTHON_VENV} && \ + . ${ARROW_PYTHON_VENV}/bin/activate && \ + pip install -r arrow/docs/requirements.txt + ENV ARROW_ACERO=ON \ ARROW_AZURE=OFF \ ARROW_BUILD_STATIC=OFF \ diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index e28ceae8801f0..ceeab2455bef6 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -229,12 +229,17 @@ find . -name "*.o" -delete popd if [ -x "$(command -v ldconfig)" ]; then - ldconfig ${ARROW_HOME}/${CMAKE_INSTALL_LIBDIR:-lib} + if [ -x "$(command -v sudo)" ]; then + SUDO=sudo + else + SUDO= + fi + ${SUDO} ldconfig ${ARROW_HOME}/${CMAKE_INSTALL_LIBDIR:-lib} fi if [ "${ARROW_USE_CCACHE}" == "ON" ]; then - echo -e "===\n=== ccache statistics after build\n===" - ccache -sv 2>/dev/null || ccache -s + echo -e "===\n=== ccache statistics after build\n===" + ccache -sv 2>/dev/null || ccache -s fi if command -v sccache &> /dev/null; then @@ -244,6 +249,6 @@ fi if [ "${BUILD_DOCS_CPP}" == "ON" ]; then pushd ${source_dir}/apidoc - doxygen + OUTPUT_DIRECTORY=${build_dir}/apidoc doxygen popd fi diff --git a/ci/scripts/integration_arrow.sh b/ci/scripts/integration_arrow.sh index a5a012ad2c5c4..2eb58e8dc75ec 100755 --- a/ci/scripts/integration_arrow.sh +++ b/ci/scripts/integration_arrow.sh @@ -40,6 +40,8 @@ if [ "${ARROW_INTEGRATION_JAVA}" == "ON" ]; then pip install jpype1 fi +export ARROW_BUILD_ROOT=${build_dir} + # Get more detailed context on crashes export PYTHONFAULTHANDLER=1 diff --git a/ci/scripts/java_build.sh b/ci/scripts/java_build.sh index 2103f0329baec..0fa1edab429c0 100755 --- a/ci/scripts/java_build.sh +++ b/ci/scripts/java_build.sh @@ -75,7 +75,16 @@ fi # Use `2 * ncores` threads mvn="${mvn} -T 2C" -pushd ${source_dir} +# https://github.com/apache/arrow/issues/41429 +# TODO: We want to out-of-source build. This is a workaround. We copy +# all needed files to the build directory from the source directory +# and build in the build directory. +mkdir -p ${build_dir} +rm -rf ${build_dir}/format +cp -aL ${arrow_dir}/format ${build_dir}/ +rm -rf ${build_dir}/java +cp -aL ${source_dir} ${build_dir}/ +pushd ${build_dir}/java if [ "${ARROW_JAVA_SHADE_FLATBUFFERS}" == "ON" ]; then mvn="${mvn} -Pshade-flatbuffers" @@ -95,7 +104,7 @@ if [ "${BUILD_DOCS_JAVA}" == "ON" ]; then # HTTP pooling is turned of to avoid download issues https://issues.apache.org/jira/browse/ARROW-11633 mkdir -p ${build_dir}/docs/java/reference ${mvn} -Dcheckstyle.skip=true -Dhttp.keepAlive=false -Dmaven.wagon.http.pool=false clean install site - rsync -a ${arrow_dir}/java/target/site/apidocs/ ${build_dir}/docs/java/reference + rsync -a target/site/apidocs/ ${build_dir}/docs/java/reference fi popd diff --git a/ci/scripts/java_cdata_integration.sh b/ci/scripts/java_cdata_integration.sh index 86ea7cf155350..0ee5d3026aa09 100755 --- a/ci/scripts/java_cdata_integration.sh +++ b/ci/scripts/java_cdata_integration.sh @@ -20,9 +20,9 @@ set -ex arrow_dir=${1} -export ARROW_SOURCE_DIR=${arrow_dir} +build_dir=${2} -pushd ${arrow_dir}/java/c/src/test/python +pushd ${build_dir}/java/c/src/test/python python integration_tests.py diff --git a/ci/scripts/js_build.sh b/ci/scripts/js_build.sh index d61f74f0b7ca1..196539ee0f101 100755 --- a/ci/scripts/js_build.sh +++ b/ci/scripts/js_build.sh @@ -25,7 +25,16 @@ build_dir=${2} : ${BUILD_DOCS_JS:=OFF} -pushd ${source_dir} +# https://github.com/apache/arrow/issues/41429 +# TODO: We want to out-of-source build. This is a workaround. We copy +# all needed files to the build directory from the source directory +# and build in the build directory. +rm -rf ${build_dir}/js +mkdir -p ${build_dir} +cp -aL ${arrow_dir}/LICENSE.txt ${build_dir}/ +cp -aL ${arrow_dir}/NOTICE.txt ${build_dir}/ +cp -aL ${source_dir} ${build_dir}/js +pushd ${build_dir}/js yarn --immutable yarn lint:ci @@ -34,18 +43,18 @@ yarn build if [ "${BUILD_DOCS_JS}" == "ON" ]; then # If apache or upstream are defined use those as remote. # Otherwise use origin which could be a fork on PRs. - if [ "$(git config --get remote.apache.url)" == "git@github.com:apache/arrow.git" ]; then + if [ "$(git -C ${arrow_dir} config --get remote.apache.url)" == "git@github.com:apache/arrow.git" ]; then yarn doc --gitRemote apache - elif [[ "$(git config --get remote.upstream.url)" =~ "https://github.com/apache/arrow" ]]; then + elif [[ "$(git -C ${arrow_dir}config --get remote.upstream.url)" =~ "https://github.com/apache/arrow" ]]; then yarn doc --gitRemote upstream - elif [[ "$(basename -s .git $(git config --get remote.origin.url))" == "arrow" ]]; then + elif [[ "$(basename -s .git $(git -C ${arrow_dir} config --get remote.origin.url))" == "arrow" ]]; then yarn doc else echo "Failed to build docs because the remote is not set correctly. Please set the origin or upstream remote to https://github.com/apache/arrow.git or the apache remote to git@github.com:apache/arrow.git." exit 0 fi mkdir -p ${build_dir}/docs/js - rsync -a ${arrow_dir}/js/doc/ ${build_dir}/docs/js + rsync -a doc/ ${build_dir}/docs/js fi popd diff --git a/ci/scripts/js_test.sh b/ci/scripts/js_test.sh index 40de974ede161..863b1c3d34613 100755 --- a/ci/scripts/js_test.sh +++ b/ci/scripts/js_test.sh @@ -20,8 +20,9 @@ set -ex source_dir=${1}/js +build_dir=${2}/js -pushd ${source_dir} +pushd ${build_dir} yarn lint yarn test diff --git a/ci/scripts/python_build.sh b/ci/scripts/python_build.sh index 99153cdf75539..9455baf353633 100755 --- a/ci/scripts/python_build.sh +++ b/ci/scripts/python_build.sh @@ -78,17 +78,42 @@ export PYARROW_PARALLEL=${n_jobs} export CMAKE_PREFIX_PATH export LD_LIBRARY_PATH=${ARROW_HOME}/lib:${LD_LIBRARY_PATH} -pushd ${source_dir} +# https://github.com/apache/arrow/issues/41429 +# TODO: We want to out-of-source build. This is a workaround. We copy +# all needed files to the build directory from the source directory +# and build in the build directory. +rm -rf ${python_build_dir} +cp -aL ${source_dir} ${python_build_dir} +pushd ${python_build_dir} # - Cannot call setup.py as it may install in the wrong directory # on Debian/Ubuntu (ARROW-15243). # - Cannot use build isolation as we want to use specific dependency versions # (e.g. Numpy, Pandas) on some CI jobs. ${PYTHON:-python} -m pip install --no-deps --no-build-isolation -vv . -# Remove build artifacts from source directory -find build/ -user root -delete popd if [ "${BUILD_DOCS_PYTHON}" == "ON" ]; then + # https://github.com/apache/arrow/issues/41429 + # TODO: We want to out-of-source build. This is a workaround. + # + # Copy docs/source because the "autosummary_generate = True" + # configuration generates files to docs/source/python/generated/. + rm -rf ${python_build_dir}/docs/source + mkdir -p ${python_build_dir}/docs + cp -a ${arrow_dir}/docs/source ${python_build_dir}/docs/ + rm -rf ${python_build_dir}/format + cp -a ${arrow_dir}/format ${python_build_dir}/ + rm -rf ${python_build_dir}/cpp/examples + mkdir -p ${python_build_dir}/cpp + cp -a ${arrow_dir}/cpp/examples ${python_build_dir}/cpp/ + rm -rf ${python_build_dir}/ci + cp -a ${arrow_dir}/ci/ ${python_build_dir}/ ncpus=$(python -c "import os; print(os.cpu_count())") - sphinx-build -b html -j ${ncpus} ${arrow_dir}/docs/source ${build_dir}/docs + export ARROW_CPP_DOXYGEN_XML=${build_dir}/cpp/apidoc/xml + pushd ${build_dir} + sphinx-build \ + -b html \ + ${python_build_dir}/docs/source \ + ${build_dir}/docs + popd fi diff --git a/ci/scripts/r_build.sh b/ci/scripts/r_build.sh index 38b54e4434036..f4dc5a5781c6e 100755 --- a/ci/scripts/r_build.sh +++ b/ci/scripts/r_build.sh @@ -24,15 +24,29 @@ build_dir=${2} : ${BUILD_DOCS_R:=OFF} -pushd ${source_dir} +# https://github.com/apache/arrow/issues/41429 +# TODO: We want to out-of-source build. This is a workaround. We copy +# all needed files to the build directory from the source directory +# and build in the build directory. +rm -rf ${build_dir}/r +cp -aL ${source_dir} ${build_dir}/r +pushd ${build_dir}/r # build first so that any stray compiled files in r/src are ignored ${R_BIN} CMD build . -${R_BIN} CMD INSTALL ${INSTALL_ARGS} arrow*.tar.gz +if [ -x "$(command -v sudo)" ]; then + SUDO=sudo +else + SUDO= +fi +${SUDO} \ + env \ + PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig:${PKG_CONFIG_PATH} \ + ${R_BIN} CMD INSTALL ${INSTALL_ARGS} arrow*.tar.gz if [ "${BUILD_DOCS_R}" == "ON" ]; then ${R_BIN} -e "pkgdown::build_site(install = FALSE)" - rsync -a ${source_dir}/docs/ ${build_dir}/docs/r + rsync -a docs/ ${build_dir}/docs/r fi popd diff --git a/dev/archery/archery/docker/core.py b/dev/archery/archery/docker/core.py index 7376bb0a3b72d..cb831060022a4 100644 --- a/dev/archery/archery/docker/core.py +++ b/dev/archery/archery/docker/core.py @@ -371,6 +371,10 @@ def run(self, service_name, command=None, *, env=None, volumes=None, v = "{}:{}".format(v['source'], v['target']) args.extend(['-v', v]) + # append capabilities from the compose conf + for c in service.get('cap_add', []): + args.extend([f'--cap-add={c}']) + # infer whether an interactive shell is desired or not if command in ['cmd.exe', 'bash', 'sh', 'powershell']: args.append('-it') diff --git a/dev/archery/archery/integration/tester_java.py b/dev/archery/archery/integration/tester_java.py index 8e7a0bb99f9de..ccc807410a848 100644 --- a/dev/archery/archery/integration/tester_java.py +++ b/dev/archery/archery/integration/tester_java.py @@ -18,17 +18,23 @@ import contextlib import functools import os +from pathlib import Path import subprocess from . import cdata from .tester import Tester, CDataExporter, CDataImporter from .util import run_cmd, log -from ..utils.source import ARROW_ROOT_DEFAULT + + +ARROW_BUILD_ROOT = os.environ.get( + 'ARROW_BUILD_ROOT', + Path(__file__).resolve().parents[5] +) def load_version_from_pom(): import xml.etree.ElementTree as ET - tree = ET.parse(os.path.join(ARROW_ROOT_DEFAULT, 'java', 'pom.xml')) + tree = ET.parse(os.path.join(ARROW_BUILD_ROOT, 'java', 'pom.xml')) tag_pattern = '{http://maven.apache.org/POM/4.0.0}version' version_tag = list(tree.getroot().findall(tag_pattern))[0] return version_tag.text @@ -48,7 +54,7 @@ def load_version_from_pom(): _ARROW_TOOLS_JAR = os.environ.get( "ARROW_JAVA_INTEGRATION_JAR", os.path.join( - ARROW_ROOT_DEFAULT, + ARROW_BUILD_ROOT, "java/tools/target", f"arrow-tools-{_arrow_version}-jar-with-dependencies.jar" ) @@ -56,7 +62,7 @@ def load_version_from_pom(): _ARROW_C_DATA_JAR = os.environ.get( "ARROW_C_DATA_JAVA_INTEGRATION_JAR", os.path.join( - ARROW_ROOT_DEFAULT, + ARROW_BUILD_ROOT, "java/c/target", f"arrow-c-data-{_arrow_version}.jar" ) @@ -64,7 +70,7 @@ def load_version_from_pom(): _ARROW_FLIGHT_JAR = os.environ.get( "ARROW_FLIGHT_JAVA_INTEGRATION_JAR", os.path.join( - ARROW_ROOT_DEFAULT, + ARROW_BUILD_ROOT, "java/flight/flight-integration-tests/target", f"flight-integration-tests-{_arrow_version}-jar-with-dependencies.jar" ) diff --git a/dev/archery/archery/integration/tester_js.py b/dev/archery/archery/integration/tester_js.py index c7f363ba54687..3d1a229931cde 100644 --- a/dev/archery/archery/integration/tester_js.py +++ b/dev/archery/archery/integration/tester_js.py @@ -16,13 +16,17 @@ # under the License. import os +from pathlib import Path from .tester import Tester from .util import run_cmd, log -from ..utils.source import ARROW_ROOT_DEFAULT -ARROW_JS_ROOT = os.path.join(ARROW_ROOT_DEFAULT, 'js') +ARROW_BUILD_ROOT = os.environ.get( + 'ARROW_BUILD_ROOT', + Path(__file__).resolve().parents[5] +) +ARROW_JS_ROOT = os.path.join(ARROW_BUILD_ROOT, 'js') _EXE_PATH = os.path.join(ARROW_JS_ROOT, 'bin') _VALIDATE = os.path.join(_EXE_PATH, 'integration.ts') _JSON_TO_ARROW = os.path.join(_EXE_PATH, 'json-to-arrow.ts') diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 126b0fcb6f76a..146fa52fa958b 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -65,7 +65,7 @@ groups: - r-binary-packages - ubuntu-* - wheel-* - - test-ubuntu-*-docs + - test-debian-*-docs {############################# Testing tasks #################################} @@ -1458,15 +1458,15 @@ tasks: {% endfor %} # be sure to update binary-task.rb when upgrading ubuntu - test-ubuntu-22.04-docs: + test-debian-12-docs: ci: github template: docs/github.linux.yml params: env: - UBUNTU: 22.04 + JDK: 17 pr_number: Unset flags: "-v $PWD/build/:/build/" - image: ubuntu-docs + image: debian-docs publish: false artifacts: - docs.tar.gz @@ -1594,8 +1594,8 @@ tasks: template: docs/github.linux.yml params: env: - UBUNTU: 22.04 + JDK: 17 pr_number: Unset flags: "-v $PWD/build/:/build/" - image: ubuntu-docs + image: debian-docs publish: true diff --git a/docker-compose.yml b/docker-compose.yml index d771fc2d22a35..9bedb59a77be8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -131,7 +131,8 @@ x-hierarchy: - debian-cpp: - debian-c-glib: - debian-ruby - - debian-python + - debian-python: + - debian-docs - debian-go: - debian-go-cgo - debian-go-cgo-python @@ -145,8 +146,7 @@ x-hierarchy: - ubuntu-c-glib: - ubuntu-ruby - ubuntu-lint - - ubuntu-python: - - ubuntu-docs + - ubuntu-python - ubuntu-python-sdist-test - ubuntu-r - ubuntu-r-only-r @@ -1228,6 +1228,8 @@ services: # We should extend the list of enabled rules after adding this build to # the CI pipeline. image: ${REPO}:${ARCH}-conda-python-${PYTHON}-pandas-${PANDAS} + cap_add: + - SYS_ADMIN environment: <<: [*common, *ccache] ARROW_SUBSTRAIT: "ON" @@ -1378,7 +1380,7 @@ services: /arrow/ci/scripts/python_build.sh /arrow /build && /arrow/ci/scripts/java_jni_build.sh /arrow $${ARROW_HOME} /build /tmp/dist/java/ && /arrow/ci/scripts/java_build.sh /arrow /build /tmp/dist/java && - /arrow/ci/scripts/java_cdata_integration.sh /arrow /tmp/dist/java" ] + /arrow/ci/scripts/java_cdata_integration.sh /arrow /build" ] conda-python-cython2: # Usage: @@ -1680,7 +1682,7 @@ services: command: &js-command > /bin/bash -c " /arrow/ci/scripts/js_build.sh /arrow /build && - /arrow/ci/scripts/js_test.sh /arrow" + /arrow/ci/scripts/js_test.sh /arrow /build" #################################### C# ##################################### @@ -1759,29 +1761,34 @@ services: ################################ Docs ####################################### - ubuntu-docs: + debian-docs: # Usage: - # docker-compose build ubuntu-cpp - # docker-compose build ubuntu-python - # docker-compose build ubuntu-docs - # docker-compose run --rm ubuntu-docs - image: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-docs + # docker-compose build debian-cpp + # docker-compose build debian-python + # docker-compose build debian-docs + # docker-compose run --rm debian-docs + image: ${REPO}:${ARCH}-debian-${DEBIAN}-docs build: context: . dockerfile: ci/docker/linux-apt-docs.dockerfile cache_from: - - ${REPO}:${ARCH}-ubuntu-${UBUNTU}-docs + - ${REPO}:${ARCH}-debian-${DEBIAN}-docs args: r: ${R} jdk: ${JDK} maven: ${MAVEN} node: ${NODE} - base: ${REPO}:${ARCH}-ubuntu-${UBUNTU}-python-3 + base: ${REPO}:${ARCH}-debian-${DEBIAN}-python-3 + # This is for Chromium used by Mermaid. Chromium uses namespace + # isolation for security by default. + cap_add: + - SYS_ADMIN environment: <<: [*common, *ccache] ARROW_CUDA: "ON" ARROW_CXX_FLAGS_DEBUG: "-g1" ARROW_C_FLAGS_DEBUG: "-g1" + ARROW_HOME: "/tmp/local" ARROW_JAVA_SKIP_GIT_PLUGIN: ARROW_SUBSTRAIT: "ON" BUILD_DOCS_C_GLIB: "ON" @@ -1790,9 +1797,11 @@ services: BUILD_DOCS_JS: "ON" BUILD_DOCS_PYTHON: "ON" BUILD_DOCS_R: "ON" - volumes: *ubuntu-volumes - command: &docs-command > + volumes: *debian-volumes + command: > /bin/bash -c " + sudo mkdir -p /build /ccache && + sudo chown -R `id --user --name`: /build /ccache && /arrow/ci/scripts/cpp_build.sh /arrow /build && /arrow/ci/scripts/python_build.sh /arrow /build && /arrow/ci/scripts/c_glib_build.sh /arrow /build && diff --git a/docs/requirements.txt b/docs/requirements.txt index 8891680814dff..afb252e17457b 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -8,8 +8,9 @@ myst-parser[linkify] numpydoc pydata-sphinx-theme~=0.14 sphinx-autobuild -sphinx-design sphinx-copybutton +sphinx-design sphinx-lint +sphinxcontrib-mermaid sphinx==6.2 pandas diff --git a/docs/source/conf.py b/docs/source/conf.py index 05340dc923c89..b487200555a09 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -125,6 +125,7 @@ 'sphinx.ext.intersphinx', 'sphinx.ext.mathjax', 'sphinx.ext.viewcode', + 'sphinxcontrib.mermaid', ] # Show members for classes in .. autosummary @@ -137,7 +138,9 @@ } # Breathe configuration -breathe_projects = {"arrow_cpp": "../../cpp/apidoc/xml"} +breathe_projects = { + "arrow_cpp": os.environ.get("ARROW_CPP_DOXYGEN_XML", "../../cpp/apidoc/xml"), +} breathe_default_project = "arrow_cpp" # Overridden conditionally below @@ -584,6 +587,9 @@ # # texinfo_no_detailmenu = False +# -- Options for mermaid output ------------------------------------------- + +mermaid_output_format = 'svg' def setup(app): # Use a config value to indicate whether CUDA API docs can be generated. diff --git a/docs/source/format/Flight.rst b/docs/source/format/Flight.rst index 7ee84952b4350..c65a1f70bde7f 100644 --- a/docs/source/format/Flight.rst +++ b/docs/source/format/Flight.rst @@ -68,9 +68,8 @@ Downloading Data A client that wishes to download the data would: -.. figure:: ./Flight/DoGet.mmd.svg - - Retrieving data via ``DoGet``. +.. mermaid:: ./Flight/DoGet.mmd + :caption: Retrieving data via ``DoGet``. #. Construct or acquire a ``FlightDescriptor`` for the data set they are interested in. @@ -168,9 +167,8 @@ data. However, ``GetFlightInfo`` doesn't return until the query completes, so the client is blocked. In this situation, the client can use ``PollFlightInfo`` instead of ``GetFlightInfo``: -.. figure:: ./Flight/PollFlightInfo.mmd.svg - - Polling a long-running query by ``PollFlightInfo``. +.. mermaid:: ./Flight/PollFlightInfo.mmd + :caption: Polling a long-running query by ``PollFlightInfo``. #. Construct or acquire a ``FlightDescriptor``, as before. #. Call ``PollFlightInfo(FlightDescriptor)`` to get a ``PollInfo`` @@ -229,9 +227,8 @@ Uploading Data To upload data, a client would: -.. figure:: ./Flight/DoPut.mmd.svg - - Uploading data via ``DoPut``. +.. mermaid:: ./Flight/DoPut.mmd + :caption: Uploading data via ``DoPut``. #. Construct or acquire a ``FlightDescriptor``, as before. #. Call ``DoPut(FlightData)`` and upload a stream of Arrow record @@ -257,9 +254,8 @@ require being stateful if implemented using ``DoGet`` and ``DoPut``. Instead, ``DoExchange`` allows this to be implemented as a single call. A client would: -.. figure:: ./Flight/DoExchange.mmd.svg - - Complex data flow with ``DoExchange``. +.. mermaid:: ./Flight/DoExchange.mmd + :caption: Complex data flow with ``DoExchange``. #. Construct or acquire a ``FlightDescriptor``, as before. #. Call ``DoExchange(FlightData)``. diff --git a/docs/source/format/Flight/DoExchange.mmd b/docs/source/format/Flight/DoExchange.mmd index 14f1789aeaaa7..f7586bf35eb4f 100644 --- a/docs/source/format/Flight/DoExchange.mmd +++ b/docs/source/format/Flight/DoExchange.mmd @@ -15,9 +15,6 @@ %% specific language governing permissions and limitations %% under the License. -%% To generate the diagram, use mermaid-cli -%% Example: docker run --rm -v $(pwd)/FlightSql:/data minlag/mermaid-cli -i /data/CommandGetTables.mmd - sequenceDiagram autonumber diff --git a/docs/source/format/Flight/DoExchange.mmd.svg b/docs/source/format/Flight/DoExchange.mmd.svg deleted file mode 100644 index 204d63d77218d..0000000000000 --- a/docs/source/format/Flight/DoExchange.mmd.svg +++ /dev/null @@ -1 +0,0 @@ -ClientServerThe first FlightData includes a FlightDescriptorDoExchange(FlightData)1stream of FlightData2stream of FlightData3par[[Client sends data]][[Server sends data]]ClientServer \ No newline at end of file diff --git a/docs/source/format/Flight/DoGet.mmd b/docs/source/format/Flight/DoGet.mmd index c2e3cd034448c..cac59afb8219f 100644 --- a/docs/source/format/Flight/DoGet.mmd +++ b/docs/source/format/Flight/DoGet.mmd @@ -15,9 +15,6 @@ %% specific language governing permissions and limitations %% under the License. -%% To generate the diagram, use mermaid-cli -%% Example: docker run --rm -v $(pwd)/FlightSql:/data minlag/mermaid-cli -i /data/CommandGetTables.mmd - sequenceDiagram autonumber diff --git a/docs/source/format/Flight/DoGet.mmd.svg b/docs/source/format/Flight/DoGet.mmd.svg deleted file mode 100644 index 48a50d77ed33f..0000000000000 --- a/docs/source/format/Flight/DoGet.mmd.svg +++ /dev/null @@ -1 +0,0 @@ -ClientMetadata ServerData ServerGetFlightInfo(FlightDescriptor)1FlightInfo{endpoints: [FlightEndpoint{ticket: Ticket}, …]}2This may be parallelizedDoGet(Ticket)3stream of FlightData4loop[for each endpoint in FlightInfo.endpoints]ClientMetadata ServerData Server \ No newline at end of file diff --git a/docs/source/format/Flight/DoPut.mmd b/docs/source/format/Flight/DoPut.mmd index 5845edef1f466..876505da2d300 100644 --- a/docs/source/format/Flight/DoPut.mmd +++ b/docs/source/format/Flight/DoPut.mmd @@ -15,9 +15,6 @@ %% specific language governing permissions and limitations %% under the License. -%% To generate the diagram, use mermaid-cli -%% Example: docker run --rm -v $(pwd)/FlightSql:/data minlag/mermaid-cli -i /data/CommandGetTables.mmd - sequenceDiagram autonumber diff --git a/docs/source/format/Flight/DoPut.mmd.svg b/docs/source/format/Flight/DoPut.mmd.svg deleted file mode 100644 index 9e490e152bdb3..0000000000000 --- a/docs/source/format/Flight/DoPut.mmd.svg +++ /dev/null @@ -1 +0,0 @@ -ClientServerThe first FlightData includes a FlightDescriptorDoPut(FlightData)1stream of FlightData2PutResult{app_metadata}3ClientServer \ No newline at end of file diff --git a/docs/source/format/Flight/PollFlightInfo.mmd b/docs/source/format/Flight/PollFlightInfo.mmd index d062a3a216958..f91c077b655c0 100644 --- a/docs/source/format/Flight/PollFlightInfo.mmd +++ b/docs/source/format/Flight/PollFlightInfo.mmd @@ -15,9 +15,6 @@ %% specific language governing permissions and limitations %% under the License. -%% To generate the diagram, use mermaid-cli -%% Example: docker run --rm -v $(pwd):/data minlag/mermaid-cli -i /data/PollFlightInfo.mmd - sequenceDiagram autonumber diff --git a/docs/source/format/Flight/PollFlightInfo.mmd.svg b/docs/source/format/Flight/PollFlightInfo.mmd.svg deleted file mode 100644 index 1890361f88ce4..0000000000000 --- a/docs/source/format/Flight/PollFlightInfo.mmd.svg +++ /dev/null @@ -1 +0,0 @@ -ClientMetadata ServerData ServerThis may be parallelizedSome endpoints may be processed while pollingloop[for each endpoint in FlightInfo.endpoints]PollFlightInfo(FlightDescriptor)1PollInfo{descriptor: FlightDescriptor', ...}2PollFlightInfo(FlightDescriptor')3PollInfo{descriptor: FlightDescriptor'', ...}4PollFlightInfo(FlightDescriptor'')5PollInfo{descriptor: null, info: FlightInfo{endpoints: [FlightEndpoint{ticket: Ticket}, …]}6DoGet(Ticket)7stream of FlightData8ClientMetadata ServerData Server \ No newline at end of file diff --git a/docs/source/format/FlightSql.rst b/docs/source/format/FlightSql.rst index 1a43e4bdff306..181efce286e70 100644 --- a/docs/source/format/FlightSql.rst +++ b/docs/source/format/FlightSql.rst @@ -242,21 +242,17 @@ Close and invalidate the current session context. Sequence Diagrams ================= -.. figure:: ./FlightSql/CommandGetTables.mmd.svg +.. mermaid:: ./FlightSql/CommandGetTables.mmd + :caption: Listing available tables. - Listing available tables. +.. mermaid:: ./FlightSql/CommandStatementQuery.mmd + :caption: Executing an ad-hoc query. -.. figure:: ./FlightSql/CommandStatementQuery.mmd.svg +.. mermaid:: ./FlightSql/CommandPreparedStatementQuery.mmd + :caption: Creating a prepared statement, then executing it. - Executing an ad-hoc query. - -.. figure:: ./FlightSql/CommandPreparedStatementQuery.mmd.svg - - Creating a prepared statement, then executing it. - -.. figure:: ./FlightSql/CommandStatementIngest.mmd.svg - - Executing a bulk ingestion. +.. mermaid:: ./FlightSql/CommandStatementIngest.mmd + :caption: Executing a bulk ingestion. External Resources ================== diff --git a/docs/source/format/FlightSql/CommandGetTables.mmd b/docs/source/format/FlightSql/CommandGetTables.mmd index f151411647f23..e6b18ed7dc08b 100644 --- a/docs/source/format/FlightSql/CommandGetTables.mmd +++ b/docs/source/format/FlightSql/CommandGetTables.mmd @@ -15,9 +15,6 @@ %% specific language governing permissions and limitations %% under the License. -%% To generate the diagram, use mermaid-cli -%% Example: docker run --rm -v $(pwd)/FlightSql:/data minlag/mermaid-cli -i /data/CommandGetTables.mmd - sequenceDiagram autonumber diff --git a/docs/source/format/FlightSql/CommandGetTables.mmd.svg b/docs/source/format/FlightSql/CommandGetTables.mmd.svg deleted file mode 100644 index 4e71c01982289..0000000000000 --- a/docs/source/format/FlightSql/CommandGetTables.mmd.svg +++ /dev/null @@ -1 +0,0 @@ -ClientServerGetFlightInfo(CommandGetTables)1FlightInfo{…Ticket…}2DoGet(Ticket)3stream of FlightData4ClientServer \ No newline at end of file diff --git a/docs/source/format/FlightSql/CommandPreparedStatementQuery.mmd b/docs/source/format/FlightSql/CommandPreparedStatementQuery.mmd index cbd1eb6014bca..ce18b91eaa33e 100644 --- a/docs/source/format/FlightSql/CommandPreparedStatementQuery.mmd +++ b/docs/source/format/FlightSql/CommandPreparedStatementQuery.mmd @@ -15,9 +15,6 @@ %% specific language governing permissions and limitations %% under the License. -%% To generate the diagram, use mermaid-cli -%% Example: docker run --rm -v $(pwd)/FlightSql:/data minlag/mermaid-cli -i /data/CommandPreparedStatementQuery.mmd - sequenceDiagram autonumber diff --git a/docs/source/format/FlightSql/CommandPreparedStatementQuery.mmd.svg b/docs/source/format/FlightSql/CommandPreparedStatementQuery.mmd.svg deleted file mode 100644 index cbf6a78e9a5ce..0000000000000 --- a/docs/source/format/FlightSql/CommandPreparedStatementQuery.mmd.svg +++ /dev/null @@ -1 +0,0 @@ -ServerClientServerClientoptional response with updated handleloop[for each endpoint in FlightInfo.endpoints]loop[for each invocation of the prepared statement]DoAction(ActionCreatePreparedStatementRequest)1ActionCreatePreparedStatementResult{handle}2DoPut(CommandPreparedStatementQuery)3stream of FlightData4DoPutPreparedStatementResult{handle}5GetFlightInfo(CommandPreparedStatementQuery)6FlightInfo{endpoints: [FlightEndpoint{…}, …]}7DoGet(endpoint.ticket)8stream of FlightData9DoAction(ActionClosePreparedStatementRequest)10ActionClosePreparedStatementRequest{}11 \ No newline at end of file diff --git a/docs/source/format/FlightSql/CommandStatementIngest.mmd b/docs/source/format/FlightSql/CommandStatementIngest.mmd index 781289d77b41a..0578f465d4dda 100644 --- a/docs/source/format/FlightSql/CommandStatementIngest.mmd +++ b/docs/source/format/FlightSql/CommandStatementIngest.mmd @@ -15,9 +15,6 @@ %% specific language governing permissions and limitations %% under the License. -%% To generate the diagram, use mermaid-cli -%% Example: docker run --rm -v $(pwd)/FlightSql:/data minlag/mermaid-cli -i /data/CommandGetTables.mmd - sequenceDiagram autonumber diff --git a/docs/source/format/FlightSql/CommandStatementIngest.mmd.svg b/docs/source/format/FlightSql/CommandStatementIngest.mmd.svg deleted file mode 100644 index e2aa72459afa5..0000000000000 --- a/docs/source/format/FlightSql/CommandStatementIngest.mmd.svg +++ /dev/null @@ -1 +0,0 @@ -ServerClientServerClientDoPut(CommandStatementIngest)1stream of FlightData2PutResult{DoPutUpdateResult{RecordCount: int64}}3 \ No newline at end of file diff --git a/docs/source/format/FlightSql/CommandStatementQuery.mmd b/docs/source/format/FlightSql/CommandStatementQuery.mmd index 7b67fecfb75c6..f26aa2f951fcf 100644 --- a/docs/source/format/FlightSql/CommandStatementQuery.mmd +++ b/docs/source/format/FlightSql/CommandStatementQuery.mmd @@ -15,9 +15,6 @@ %% specific language governing permissions and limitations %% under the License. -%% To generate the diagram, use mermaid-cli -%% Example: docker run --rm -v $(pwd)/FlightSql:/data minlag/mermaid-cli -i /data/CommandStatementQuery.mmd - sequenceDiagram autonumber diff --git a/docs/source/format/FlightSql/CommandStatementQuery.mmd.svg b/docs/source/format/FlightSql/CommandStatementQuery.mmd.svg deleted file mode 100644 index f5e8c79f137ff..0000000000000 --- a/docs/source/format/FlightSql/CommandStatementQuery.mmd.svg +++ /dev/null @@ -1 +0,0 @@ -ClientServerGetFlightInfo(CommandStatementQuery)1FlightInfo{endpoints: [FlightEndpoint{…}, …]}2DoGet(endpoint.ticket)3stream of FlightData4loop[for each endpoint in FlightInfo.endpoints]ClientServer \ No newline at end of file From 071ffaf2633eb58540a872514507ab362cc26fb4 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Thu, 9 May 2024 00:22:02 +0200 Subject: [PATCH 076/261] GH-41256: [Format][Docs] Add a canonical extension type specification for JSON (#41257) ### Rationale for this change As per #41256 this proposes a specification of a canonical extension type for JSON serialized data. ### What changes are included in this PR? This adds to documentation of canonical extension types. ### Are these changes tested? No as only docs are changed. ### Are there any user-facing changes? No. * GitHub Issue: #41256 Lead-authored-by: Rok Mihevc Co-authored-by: Will Jones Co-authored-by: Antoine Pitrou Signed-off-by: Rok Mihevc --- docs/source/format/CanonicalExtensions.rst | 25 +++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/docs/source/format/CanonicalExtensions.rst b/docs/source/format/CanonicalExtensions.rst index 1f055b7f8edb5..47c161c14cafc 100644 --- a/docs/source/format/CanonicalExtensions.rst +++ b/docs/source/format/CanonicalExtensions.rst @@ -51,7 +51,7 @@ types: 3) Its serialization *must* be described in the proposal and should not require unduly implementation work or unusual software dependencies - (for example, a trivial custom text format or JSON would be acceptable). + (for example, a trivial custom text format or a JSON-based format would be acceptable). 4) Its expected semantics *should* be described as well and any potential ambiguities or pain points addressed or at least mentioned. @@ -251,6 +251,27 @@ Variable shape tensor Values inside each **data** tensor element are stored in row-major/C-contiguous order according to the corresponding **shape**. +.. _json_extension: + +JSON +==== + +* Extension name: ``arrow.json``. + +* The storage type of this extension is ``String`` or + or ``LargeString`` or ``StringView``. + Only UTF-8 encoded JSON as specified in `rfc8259`_ is supported. + +* Extension type parameters: + + This type does not have any parameters. + +* Description of the serialization: + + Metadata is either an empty string or a JSON string with an empty object. + In the future, additional fields may be added, but they are not required + to interpret the array. + ========================= Community Extension Types ========================= @@ -268,3 +289,5 @@ GeoArrow Arrow extension types for representing vector geometries. It is well known within the Arrow geospatial subcommunity. The GeoArrow specification is not yet finalized. + +.. _rfc8259: https://datatracker.ietf.org/doc/html/rfc8259 From c5be02703312f01186ceea2d910a93e5421e3c83 Mon Sep 17 00:00:00 2001 From: Rok Mihevc Date: Thu, 9 May 2024 00:46:29 +0200 Subject: [PATCH 077/261] GH-41298: [Format][Docs] Add a canonical extension type specification for UUID (#41299) ### Rationale for this change Several users have expressed a need for a UUID type. This is to provide a canonical UUID extension type specification. ### What changes are included in this PR? This adds to documentation of canonical extension types. ### Are these changes tested? No as only docs are changed. ### Are there any user-facing changes? No. * GitHub Issue: #41298 Lead-authored-by: Rok Mihevc Co-authored-by: Antoine Pitrou Signed-off-by: Rok Mihevc --- docs/source/format/CanonicalExtensions.rst | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/docs/source/format/CanonicalExtensions.rst b/docs/source/format/CanonicalExtensions.rst index 47c161c14cafc..c60f095dd354d 100644 --- a/docs/source/format/CanonicalExtensions.rst +++ b/docs/source/format/CanonicalExtensions.rst @@ -272,6 +272,17 @@ JSON In the future, additional fields may be added, but they are not required to interpret the array. +UUID +==== + +* Extension name: ``arrow.uuid``. + +* The storage type of the extension is ``FixedSizeBinary`` with a length of 16 bytes. + +.. note:: + A specific UUID version is not required or guaranteed. This extension represents + UUIDs as FixedSizeBinary(16) with big-endian notation and does not interpret the bytes in any way. + ========================= Community Extension Types ========================= From 7bfe02db04e34fc1ab6df6f647a76899e0c654db Mon Sep 17 00:00:00 2001 From: David Schlosnagle Date: Wed, 8 May 2024 19:46:15 -0400 Subject: [PATCH 078/261] GH-41573: [Java] VectorSchemaRoot uses inefficient stream to copy fieldVectors (#41574) ### Rationale for this change While reviewing allocation profiling of an Arrow intensive application, I noticed significant allocations due to `ArrayList#grow()` originating from `org.apache.arrow.vector.VectorSchemaRoot#getFieldVectors()`. The `org.apache.arrow.vector.VectorSchemaRoot#getFieldVectors()` method uses an inefficient `fieldVectors.stream().collect(Collectors.toList())` to create a list copy, leading to reallocations as the target list is collected. This could be replaced with a more efficent `new ArrayList<>(fieldVectors)` to make a pre-sized list copy, or even better an unmodifiable view via `Collections.unmodifiableList(fieldVectors)`. ### What changes are included in this PR? * Use `Collections.unmodifiableList(List)` to return unmodifiable list view of `fieldVectors` from `getFieldVectors()` * Pre-size the `fieldVectors` `ArrayList` in static factory `VectorSchemaRoot#create(Schema, BufferAllocator)` * `VectorSchemaRoot#setRowCount(int)` iterates over instance `fieldVectors` instead of copied list (similar to existing `allocateNew()`, `clear()`, `contentToTSVString()`). ### Are these changes tested? These changes are covered by existing unit and integration tests. ### Are there any user-facing changes? No * GitHub Issue: #41573 Authored-by: David Schlosnagle Signed-off-by: David Li --- .../java/org/apache/arrow/vector/VectorSchemaRoot.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java index 8768a90c80b83..9a92ce5060b1b 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorSchemaRoot.java @@ -19,6 +19,7 @@ import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -121,7 +122,7 @@ public VectorSchemaRoot(Schema schema, List fieldVectors, int rowCo * Creates a new set of empty vectors corresponding to the given schema. */ public static VectorSchemaRoot create(Schema schema, BufferAllocator allocator) { - List fieldVectors = new ArrayList<>(); + List fieldVectors = new ArrayList<>(schema.getFields().size()); for (Field field : schema.getFields()) { FieldVector vector = field.createVector(allocator); fieldVectors.add(vector); @@ -160,7 +161,7 @@ public void clear() { } public List getFieldVectors() { - return fieldVectors.stream().collect(Collectors.toList()); + return Collections.unmodifiableList(fieldVectors); } /** @@ -236,7 +237,7 @@ public int getRowCount() { */ public void setRowCount(int rowCount) { this.rowCount = rowCount; - for (FieldVector v : getFieldVectors()) { + for (FieldVector v : fieldVectors) { v.setValueCount(rowCount); } } From f6127a6d18af12ce18a0b8b1eac02346721cc399 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Thu, 9 May 2024 04:58:59 +0200 Subject: [PATCH 079/261] GH-41356: [Release][Docs] Update post release documentation task to remove the warnings banner for stable version (#41377) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change With every release dev documentation is moved to `docs/` and becomes stable version of the documentation but the version warnings banner is still present. ### What changes are included in this PR? This PR removes the banner before the dev docs are copied to the `docs/` folder. ### Are these changes tested? Not yet. ### Are there any user-facing changes? No. * GitHub Issue: #41356 Lead-authored-by: AlenkaF Co-authored-by: Alenka Frim Co-authored-by: Raúl Cumplido Signed-off-by: Sutou Kouhei --- dev/release/post-08-docs.sh | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/dev/release/post-08-docs.sh b/dev/release/post-08-docs.sh index c59f9b96857a6..58a462551f199 100755 --- a/dev/release/post-08-docs.sh +++ b/dev/release/post-08-docs.sh @@ -72,13 +72,28 @@ fi # delete current stable docs and restore all previous versioned docs rm -rf docs/* git checkout "${versioned_paths[@]}" +# Download and untar released docs in a temp folder +rm -rf docs_new +mkdir docs_new +pushd docs_new curl \ --fail \ --location \ --remote-name \ https://apache.jfrog.io/artifactory/arrow/docs/${version}/docs.tar.gz tar xvf docs.tar.gz -rm -f docs.tar.gz +# Update DOCUMENTATION_OPTIONS.show_version_warning_banner +find docs \ + -type f \ + -exec \ + sed -i.bak \ + -e "s/DOCUMENTATION_OPTIONS.show_version_warning_banner = true/DOCUMENTATION_OPTIONS.show_version_warning_banner = false/g" \ + {} \; +find ./ -name '*.bak' -delete +popd +mv docs_new/docs/* docs/ +rm -rf docs_new + if [ "$is_major_release" = "yes" ] ; then previous_series=${previous_version%.*} mv docs_temp docs/${previous_series} From bd444106af494b3d4c6cce0af88f6ce2a6a327eb Mon Sep 17 00:00:00 2001 From: Tom McTiernan Date: Thu, 9 May 2024 20:15:43 +0100 Subject: [PATCH 080/261] GH-39645: [Python] Fix read_table for encrypted parquet (#39438) ### Rationale for this change Currently, if you try to read a decrypted parquet with read_table, passing decryption_properties - in the happy path (pyarrow.data available for import) the reading/decryption of the file fails, as the decryption properties are missing. ### What changes are included in this PR? Pass through the argument that was intended to have been passed. ### Are these changes tested? We have tested this locally on an encrypted parquet dataset - please advise on any further testing you would like beyond that and the standard CI. ### Are there any user-facing changes? Not in any cases where their code was previously working? The intended behaviour for encrypted dataset decryption should start working. * Closes: #39645 Lead-authored-by: Tom McTiernan Co-authored-by: Don Co-authored-by: Rok Mihevc Signed-off-by: Rok Mihevc --- python/pyarrow/_dataset_parquet.pxd | 1 + python/pyarrow/_dataset_parquet.pyx | 30 ++- .../pyarrow/_dataset_parquet_encryption.pyx | 8 + python/pyarrow/parquet/core.py | 5 +- .../pyarrow/tests/parquet/test_encryption.py | 180 +++++++++--------- .../pyarrow/tests/test_dataset_encryption.py | 12 ++ 6 files changed, 142 insertions(+), 94 deletions(-) diff --git a/python/pyarrow/_dataset_parquet.pxd b/python/pyarrow/_dataset_parquet.pxd index d5bc172d324d5..0a3a2ff526ea4 100644 --- a/python/pyarrow/_dataset_parquet.pxd +++ b/python/pyarrow/_dataset_parquet.pxd @@ -29,6 +29,7 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions): cdef: CParquetFragmentScanOptions* parquet_options object _parquet_decryption_config + object _decryption_properties cdef void init(self, const shared_ptr[CFragmentScanOptions]& sp) cdef CReaderProperties* reader_properties(self) diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx index a55e889ba8246..4942336a12666 100644 --- a/python/pyarrow/_dataset_parquet.pyx +++ b/python/pyarrow/_dataset_parquet.pyx @@ -56,7 +56,7 @@ from pyarrow._parquet cimport ( try: from pyarrow._dataset_parquet_encryption import ( - set_encryption_config, set_decryption_config + set_encryption_config, set_decryption_config, set_decryption_properties ) parquet_encryption_enabled = True except ImportError: @@ -127,8 +127,7 @@ cdef class ParquetFileFormat(FileFormat): 'instance of ParquetReadOptions') if default_fragment_scan_options is None: - default_fragment_scan_options = ParquetFragmentScanOptions( - **scan_args) + default_fragment_scan_options = ParquetFragmentScanOptions(**scan_args) elif isinstance(default_fragment_scan_options, dict): default_fragment_scan_options = ParquetFragmentScanOptions( **default_fragment_scan_options) @@ -715,6 +714,9 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions): decryption_config : pyarrow.dataset.ParquetDecryptionConfig, default None If not None, use the provided ParquetDecryptionConfig to decrypt the Parquet file. + decryption_properties : pyarrow.parquet.FileDecryptionProperties, default None + If not None, use the provided FileDecryptionProperties to decrypt encrypted + Parquet file. page_checksum_verification : bool, default False If True, verify the page checksum for each page read from the file. """ @@ -729,6 +731,7 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions): thrift_string_size_limit=None, thrift_container_size_limit=None, decryption_config=None, + decryption_properties=None, bint page_checksum_verification=False): self.init(shared_ptr[CFragmentScanOptions]( new CParquetFragmentScanOptions())) @@ -743,6 +746,8 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions): self.thrift_container_size_limit = thrift_container_size_limit if decryption_config is not None: self.parquet_decryption_config = decryption_config + if decryption_properties is not None: + self.decryption_properties = decryption_properties self.page_checksum_verification = page_checksum_verification cdef void init(self, const shared_ptr[CFragmentScanOptions]& sp): @@ -812,6 +817,25 @@ cdef class ParquetFragmentScanOptions(FragmentScanOptions): raise ValueError("size must be larger than zero") self.reader_properties().set_thrift_container_size_limit(size) + @property + def decryption_properties(self): + if not parquet_encryption_enabled: + raise NotImplementedError( + "Unable to access encryption features. " + "Encryption is not enabled in your installation of pyarrow." + ) + return self._decryption_properties + + @decryption_properties.setter + def decryption_properties(self, config): + if not parquet_encryption_enabled: + raise NotImplementedError( + "Encryption is not enabled in your installation of pyarrow, but " + "decryption_properties were provided." + ) + set_decryption_properties(self, config) + self._decryption_properties = config + @property def parquet_decryption_config(self): if not parquet_encryption_enabled: diff --git a/python/pyarrow/_dataset_parquet_encryption.pyx b/python/pyarrow/_dataset_parquet_encryption.pyx index 11a7174eb3c9d..c8f5e5b01bf81 100644 --- a/python/pyarrow/_dataset_parquet_encryption.pyx +++ b/python/pyarrow/_dataset_parquet_encryption.pyx @@ -162,6 +162,14 @@ def set_encryption_config( opts.parquet_options.parquet_encryption_config = c_config +def set_decryption_properties( + ParquetFragmentScanOptions opts not None, + FileDecryptionProperties config not None +): + cdef CReaderProperties* reader_props = opts.reader_properties() + reader_props.file_decryption_properties(config.unwrap()) + + def set_decryption_config( ParquetFragmentScanOptions opts not None, ParquetDecryptionConfig config not None diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py index 69a1c9d19aae2..f54a203c8794c 100644 --- a/python/pyarrow/parquet/core.py +++ b/python/pyarrow/parquet/core.py @@ -1299,7 +1299,7 @@ def __init__(self, path_or_paths, filesystem=None, schema=None, *, filters=None, f"local file systems, not {type(filesystem)}" ) - # check for single fragment dataset + # check for single fragment dataset or dataset directory single_file = None self._base_dir = None if not isinstance(path_or_paths, list): @@ -1313,8 +1313,6 @@ def __init__(self, path_or_paths, filesystem=None, schema=None, *, filters=None, except ValueError: filesystem = LocalFileSystem(use_mmap=memory_map) finfo = filesystem.get_file_info(path_or_paths) - if finfo.is_file: - single_file = path_or_paths if finfo.type == FileType.Directory: self._base_dir = path_or_paths else: @@ -1771,6 +1769,7 @@ def read_table(source, *, columns=None, use_threads=True, ignore_prefixes=ignore_prefixes, pre_buffer=pre_buffer, coerce_int96_timestamp_unit=coerce_int96_timestamp_unit, + decryption_properties=decryption_properties, thrift_string_size_limit=thrift_string_size_limit, thrift_container_size_limit=thrift_container_size_limit, page_checksum_verification=page_checksum_verification, diff --git a/python/pyarrow/tests/parquet/test_encryption.py b/python/pyarrow/tests/parquet/test_encryption.py index edb6410d2fa0d..ff388ef506997 100644 --- a/python/pyarrow/tests/parquet/test_encryption.py +++ b/python/pyarrow/tests/parquet/test_encryption.py @@ -65,6 +65,44 @@ def basic_encryption_config(): return basic_encryption_config +def setup_encryption_environment(custom_kms_conf): + """ + Sets up and returns the KMS connection configuration and crypto factory + based on provided KMS configuration parameters. + """ + kms_connection_config = pe.KmsConnectionConfig(custom_kms_conf=custom_kms_conf) + + def kms_factory(kms_connection_configuration): + return InMemoryKmsClient(kms_connection_configuration) + + # Create our CryptoFactory + crypto_factory = pe.CryptoFactory(kms_factory) + + return kms_connection_config, crypto_factory + + +def write_encrypted_file(path, data_table, footer_key_name, col_key_name, + footer_key, col_key, encryption_config): + """ + Writes an encrypted parquet file based on the provided parameters. + """ + # Setup the custom KMS configuration with provided keys + custom_kms_conf = { + footer_key_name: footer_key.decode("UTF-8"), + col_key_name: col_key.decode("UTF-8"), + } + + # Setup encryption environment + kms_connection_config, crypto_factory = setup_encryption_environment( + custom_kms_conf) + + # Write the encrypted parquet file + write_encrypted_parquet(path, data_table, encryption_config, + kms_connection_config, crypto_factory) + + return kms_connection_config, crypto_factory + + def test_encrypted_parquet_write_read(tempdir, data_table): """Write an encrypted parquet, verify it's encrypted, and then read it.""" path = tempdir / PARQUET_NAME @@ -81,20 +119,10 @@ def test_encrypted_parquet_write_read(tempdir, data_table): cache_lifetime=timedelta(minutes=5.0), data_key_length_bits=256) - kms_connection_config = pe.KmsConnectionConfig( - custom_kms_conf={ - FOOTER_KEY_NAME: FOOTER_KEY.decode("UTF-8"), - COL_KEY_NAME: COL_KEY.decode("UTF-8"), - } - ) - - def kms_factory(kms_connection_configuration): - return InMemoryKmsClient(kms_connection_configuration) + kms_connection_config, crypto_factory = write_encrypted_file( + path, data_table, FOOTER_KEY_NAME, COL_KEY_NAME, FOOTER_KEY, COL_KEY, + encryption_config) - crypto_factory = pe.CryptoFactory(kms_factory) - # Write with encryption properties - write_encrypted_parquet(path, data_table, encryption_config, - kms_connection_config, crypto_factory) verify_file_encrypted(path) # Read with decryption properties @@ -150,36 +178,22 @@ def test_encrypted_parquet_write_read_wrong_key(tempdir, data_table): cache_lifetime=timedelta(minutes=5.0), data_key_length_bits=256) - kms_connection_config = pe.KmsConnectionConfig( - custom_kms_conf={ - FOOTER_KEY_NAME: FOOTER_KEY.decode("UTF-8"), - COL_KEY_NAME: COL_KEY.decode("UTF-8"), - } - ) - - def kms_factory(kms_connection_configuration): - return InMemoryKmsClient(kms_connection_configuration) + write_encrypted_file(path, data_table, FOOTER_KEY_NAME, COL_KEY_NAME, + FOOTER_KEY, COL_KEY, encryption_config) - crypto_factory = pe.CryptoFactory(kms_factory) - # Write with encryption properties - write_encrypted_parquet(path, data_table, encryption_config, - kms_connection_config, crypto_factory) verify_file_encrypted(path) - # Read with decryption properties - wrong_kms_connection_config = pe.KmsConnectionConfig( - custom_kms_conf={ - # Wrong keys - mixup in names - FOOTER_KEY_NAME: COL_KEY.decode("UTF-8"), - COL_KEY_NAME: FOOTER_KEY.decode("UTF-8"), - } - ) + wrong_kms_connection_config, wrong_crypto_factory = setup_encryption_environment({ + FOOTER_KEY_NAME: COL_KEY.decode("UTF-8"), # Intentionally wrong + COL_KEY_NAME: FOOTER_KEY.decode("UTF-8"), # Intentionally wrong + }) + decryption_config = pe.DecryptionConfiguration( cache_lifetime=timedelta(minutes=5.0)) with pytest.raises(ValueError, match=r"Incorrect master key used"): read_encrypted_parquet( path, decryption_config, wrong_kms_connection_config, - crypto_factory) + wrong_crypto_factory) def test_encrypted_parquet_read_no_decryption_config(tempdir, data_table): @@ -219,23 +233,12 @@ def test_encrypted_parquet_write_no_col_key(tempdir, data_table): encryption_config = pe.EncryptionConfiguration( footer_key=FOOTER_KEY_NAME) - kms_connection_config = pe.KmsConnectionConfig( - custom_kms_conf={ - FOOTER_KEY_NAME: FOOTER_KEY.decode("UTF-8"), - COL_KEY_NAME: COL_KEY.decode("UTF-8"), - } - ) - - def kms_factory(kms_connection_configuration): - return InMemoryKmsClient(kms_connection_configuration) - - crypto_factory = pe.CryptoFactory(kms_factory) with pytest.raises(OSError, match="Either column_keys or uniform_encryption " "must be set"): # Write with encryption properties - write_encrypted_parquet(path, data_table, encryption_config, - kms_connection_config, crypto_factory) + write_encrypted_file(path, data_table, FOOTER_KEY_NAME, COL_KEY_NAME, + FOOTER_KEY, b"", encryption_config) def test_encrypted_parquet_write_kms_error(tempdir, data_table, @@ -497,24 +500,11 @@ def test_encrypted_parquet_loop(tempdir, data_table, basic_encryption_config): # Encrypt the footer with the footer key, # encrypt column `a` and column `b` with another key, - # keep `c` plaintext - encryption_config = basic_encryption_config + # keep `c` plaintext, defined in basic_encryption_config + kms_connection_config, crypto_factory = write_encrypted_file( + path, data_table, FOOTER_KEY_NAME, COL_KEY_NAME, FOOTER_KEY, COL_KEY, + basic_encryption_config) - kms_connection_config = pe.KmsConnectionConfig( - custom_kms_conf={ - FOOTER_KEY_NAME: FOOTER_KEY.decode("UTF-8"), - COL_KEY_NAME: COL_KEY.decode("UTF-8"), - } - ) - - def kms_factory(kms_connection_configuration): - return InMemoryKmsClient(kms_connection_configuration) - - crypto_factory = pe.CryptoFactory(kms_factory) - - # Write with encryption properties - write_encrypted_parquet(path, data_table, encryption_config, - kms_connection_config, crypto_factory) verify_file_encrypted(path) decryption_config = pe.DecryptionConfiguration( @@ -537,32 +527,46 @@ def test_read_with_deleted_crypto_factory(tempdir, data_table, basic_encryption_ Test that decryption properties can be used if the crypto factory is no longer alive """ path = tempdir / PARQUET_NAME - encryption_config = basic_encryption_config - kms_connection_config = pe.KmsConnectionConfig( - custom_kms_conf={ - FOOTER_KEY_NAME: FOOTER_KEY.decode("UTF-8"), - COL_KEY_NAME: COL_KEY.decode("UTF-8"), - } - ) - - def kms_factory(kms_connection_configuration): - return InMemoryKmsClient(kms_connection_configuration) - - encryption_crypto_factory = pe.CryptoFactory(kms_factory) - write_encrypted_parquet(path, data_table, encryption_config, - kms_connection_config, encryption_crypto_factory) + kms_connection_config, crypto_factory = write_encrypted_file( + path, data_table, FOOTER_KEY_NAME, COL_KEY_NAME, FOOTER_KEY, COL_KEY, + basic_encryption_config) verify_file_encrypted(path) - # Use a local function to get decryption properties, so the crypto factory that - # creates the properties will be deleted after it returns. - def get_decryption_properties(): - decryption_crypto_factory = pe.CryptoFactory(kms_factory) - decryption_config = pe.DecryptionConfiguration( - cache_lifetime=timedelta(minutes=5.0)) - return decryption_crypto_factory.file_decryption_properties( - kms_connection_config, decryption_config) + # Create decryption properties and delete the crypto factory that created + # the properties afterwards. + decryption_config = pe.DecryptionConfiguration( + cache_lifetime=timedelta(minutes=5.0)) + file_decryption_properties = crypto_factory.file_decryption_properties( + kms_connection_config, decryption_config) + del crypto_factory result = pq.ParquetFile( - path, decryption_properties=get_decryption_properties()) + path, decryption_properties=file_decryption_properties) result_table = result.read(use_threads=True) assert data_table.equals(result_table) + + +def test_encrypted_parquet_read_table(tempdir, data_table, basic_encryption_config): + """Write an encrypted parquet then read it back using read_table.""" + path = tempdir / PARQUET_NAME + + # Write the encrypted parquet file using the utility function + kms_connection_config, crypto_factory = write_encrypted_file( + path, data_table, FOOTER_KEY_NAME, COL_KEY_NAME, FOOTER_KEY, COL_KEY, + basic_encryption_config) + + decryption_config = pe.DecryptionConfiguration( + cache_lifetime=timedelta(minutes=5.0)) + file_decryption_properties = crypto_factory.file_decryption_properties( + kms_connection_config, decryption_config) + + # Read the encrypted parquet file using read_table + result_table = pq.read_table(path, decryption_properties=file_decryption_properties) + + # Assert that the read table matches the original data + assert data_table.equals(result_table) + + # Read the encrypted parquet folder using read_table + result_table = pq.read_table( + tempdir, decryption_properties=file_decryption_properties) + assert data_table.equals(result_table) diff --git a/python/pyarrow/tests/test_dataset_encryption.py b/python/pyarrow/tests/test_dataset_encryption.py index 2a631db9fc0fa..0d8b4a152ab9f 100644 --- a/python/pyarrow/tests/test_dataset_encryption.py +++ b/python/pyarrow/tests/test_dataset_encryption.py @@ -142,6 +142,18 @@ def test_dataset_encryption_decryption(): assert table.equals(dataset.to_table()) + # set decryption properties for parquet fragment scan options + decryption_properties = crypto_factory.file_decryption_properties( + kms_connection_config, decryption_config) + pq_scan_opts = ds.ParquetFragmentScanOptions( + decryption_properties=decryption_properties + ) + + pformat = pa.dataset.ParquetFileFormat(default_fragment_scan_options=pq_scan_opts) + dataset = ds.dataset("sample_dataset", format=pformat, filesystem=mockfs) + + assert table.equals(dataset.to_table()) + @pytest.mark.skipif( not encryption_unavailable, reason="Parquet Encryption is currently enabled" From 1c62df5255ced89171d5b846dc82f5a10d519f4a Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Thu, 9 May 2024 15:17:20 -0400 Subject: [PATCH 081/261] GH-41179: [Docs] Documentation for Dissociated IPC Protocol (#41180) ### Rationale for this change Adding documentation for the Experimental Dissociated IPC Protocol for splitting arrow IPC metadata and body data into separate streams for use with newer transports such as UCX and Libfabric. The relevant mailing list discussion and vote: https://lists.apache.org/thread/k26n1h90b1wy1w5k53whh0t8o4nd0yx7 ### What changes are included in this PR? Only documentation changes and images for the Arrow docs site. I tagged people I thought might be relevant for reviewing, but feel free to tag and add anyone else that might seem relevant to reviewing this. Thanks! * GitHub Issue: #41179 Lead-authored-by: Matt Topol Co-authored-by: Benjamin Kietzman Co-authored-by: Sutou Kouhei Co-authored-by: Antoine Pitrou Signed-off-by: Matt Topol --- docs/source/format/Columnar.rst | 2 + docs/source/format/DissociatedIPC.rst | 403 ++++++++++++++++++ .../format/DissociatedIPC/ClientFlowchart.mmd | 37 ++ .../DissociatedIPC/SequenceDiagramSame.mmd | 43 ++ .../SequenceDiagramSeparate.mmd | 44 ++ docs/source/format/Flight.rst | 2 + docs/source/format/index.rst | 1 + 7 files changed, 532 insertions(+) create mode 100644 docs/source/format/DissociatedIPC.rst create mode 100644 docs/source/format/DissociatedIPC/ClientFlowchart.mmd create mode 100644 docs/source/format/DissociatedIPC/SequenceDiagramSame.mmd create mode 100644 docs/source/format/DissociatedIPC/SequenceDiagramSeparate.mmd diff --git a/docs/source/format/Columnar.rst b/docs/source/format/Columnar.rst index 0cfece2586294..ec6a7fa5e334a 100644 --- a/docs/source/format/Columnar.rst +++ b/docs/source/format/Columnar.rst @@ -1108,6 +1108,8 @@ includes a serialized Flatbuffer type along with an optional message body. We define this message format before describing how to serialize each constituent IPC message type. +.. _ipc-message-format: + Encapsulated message format --------------------------- diff --git a/docs/source/format/DissociatedIPC.rst b/docs/source/format/DissociatedIPC.rst new file mode 100644 index 0000000000000..0b0861399cb2f --- /dev/null +++ b/docs/source/format/DissociatedIPC.rst @@ -0,0 +1,403 @@ +.. Licensed to the Apache Software Foundation (ASF) under one +.. or more contributor license agreements. See the NOTICE file +.. distributed with this work for additional information +.. regarding copyright ownership. The ASF licenses this file +.. to you under the Apache License, Version 2.0 (the +.. "License"); you may not use this file except in compliance +.. with the License. You may obtain a copy of the License at + +.. http://www.apache.org/licenses/LICENSE-2.0 + +.. Unless required by applicable law or agreed to in writing, +.. software distributed under the License is distributed on an +.. "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +.. KIND, either express or implied. See the License for the +.. specific language governing permissions and limitations +.. under the License. + +.. _dissociated-ipc: + +======================== +Dissociated IPC Protocol +======================== + +.. warning:: + + Experimental: The Dissociated IPC Protocol is experimental in its current + form. Based on feedback and usage the protocol definition may change until + it is fully standardized. + +Rationale +========= + +The :ref:`Arrow IPC format ` describes a protocol for transferring +Arrow data as a stream of record batches. This protocol expects a continuous +stream of bytes divided into discrete messages (using a length prefix and +continuation indicator). Each discrete message consists of two portions: + +* A `Flatbuffers`_ header message +* A series of bytes consisting of the flattened and packed body buffers (some + message types, like Schema messages, do not have this section) + - This is referred to as the *message body* in the IPC format spec. + +For most cases, the existing IPC format as it currently exists is sufficiently efficient: + +* Receiving data in the IPC format allows zero-copy utilization of the body + buffer bytes, no deserialization is required to form Arrow Arrays +* An IPC file format can be memory-mapped because it is location agnostic + and the bytes of the file are exactly what is expected in memory. + +However, there are use cases that aren't handled by this: + +* Constructing the IPC record batch message requires allocating a contiguous + chunk of bytes and copying all of the data buffers into it, packed together + back-to-back. This pessimizes the common case of wrapping existing, directly + consumable data into an IPC message. +* Even if Arrow data is located in a memory accessible across process boundaries + or transports (such as UCX), there is no standard way to specify that shared + location to consumers which could take advantage of it. +* Arrow data located on a non-CPU device (such as a GPU) cannot be sent using + Arrow IPC without having to copy the data back to the host device or copying + the Flatbuffers metadata bytes into device memory. + + * By the same token, receiving IPC messages into device memory would require + performing a copy of the Flatbuffers metadata back to the host CPU device. This + is due to the fact that the IPC stream interleaves data and metadata across a + single stream. + +This protocol attempts to solve these use cases in an efficient manner. + +Goals +----- + +* Define a generic protocol for passing Arrow IPC data, not tied to any particular + transport, that also allows for utilizing non-CPU device memory, shared memory, and + newer "high performance" transports such as `UCX`_ or `libfabric`_. + + * This allows for the data in the body to be kept on non-CPU devices (like GPUs) + without expensive device-to-host copies. + +* Allow for using :ref:`Flight RPC ` purely for control flow by separating + the stream of IPC metadata from IPC body bytes + +Definitions +----------- + +IPC Metadata + The Flatbuffers message bytes that encompass the header of an Arrow IPC message + +Tag + A little-endian ``uint64`` value used for flow control and used in determining + how to interpret the body of a message. Specific bits can be masked to allow + identifying messages by only a portion of the tag, leaving the rest of the bits + to be used for control flow or other message metadata. Some transports, such as + UCX, have built-in support for such tag values and will provide them in CPU + memory regardless of whether or not the body of the message may reside on a + non-CPU device. + +Sequence Number + A little-endian, 4-byte unsigned integer starting at 0 for a stream, indicating + the sequence order of messages. It is also used to identify specific messages to + tie the IPC metadata header to its corresponding body since the metadata and body + can be sent across separate pipes/streams/transports. + + If a sequence number reaches ``UINT32_MAX``, it should be allowed to roll over as + it is unlikely there would be enough unprocessed messages waiting to be processed + that would cause an overlap of sequence numbers. + + The sequence number serves two purposes: To identify corresponding metadata and + tagged body data messages and to ensure we do not rely on messages having to arrive + in order. A client should use the sequence number to correctly order messages as + they arrive for processing. + +The Protocol +============ + +A reference example implementation utilizing `libcudf`_ and `UCX`_ can be found in the +`arrow-experiments repo `_. + +Requirements +------------ + +A transport implementing this protocol **MUST** provide two pieces of functionality: + +* Message sending + + * Delimited messages (like gRPC) as opposed to non-delimited streams (like plain TCP + without further framing). + + * Alternatively, a framing mechanism like the :ref:`encapsulated message format ` + for the IPC protocol can be used while leaving out the body bytes. + +* Tagged message sending + + * Sending a message that has an attached little-endian, unsigned 64-bit integral tag + for control flow. A tag like this allows control flow to operate on a message whose body + is on a non-CPU device without requiring the message itself to get copied off of the device. + +URI Specification +----------------- + +When providing a URI to a consumer to contact for use with this protocol (such as via +the :ref:`Location URI for Flight `), the URI should specify a scheme +like *ucx:* or *fabric:*, that is easily identifiable. In addition, the URI should +encode the following URI query parameters: + +.. note:: + As this protocol matures, this document will get updated with commonly recognized + transport schemes that get used with it. + +* ``want_data`` - **REQUIRED** - uint64 integer value + + * This value should be used to tag an initial message to the server to initiate a + data transfer. The body of the initiating message should be an opaque binary identifier + of the data stream being requested (like the ``Ticket`` in the Flight RPC protocol) + +* ``free_data`` - **OPTIONAL** - uint64 integer value + + * If the server might send messages using offsets / addresses for remote memory accessing + or shared memory locations, the URI should include this parameter. This value is used to + tag messages sent from the client to the data server, containing specific offsets / addresses + which were provided that are no longer required by the client (i.e. any operations that + directly reference those memory locations, such as copying the remote data into local memory, + have been completed). + +* ``remote_handle`` - **OPTIONAL** - base64-encoded string + + * When working with shared memory or remote memory, this value indicates any required + handle or identifier that is necessary for accessing the memory. + + * Using UCX, this would be an *rkey* value + + * With CUDA IPC, this would be the value of the base GPU pointer or memory handle, + and subsequent addresses would be offsets from this base pointer. + +Handling of Backpressure +------------------------ + +*Currently* this proposal does not specify any way to manage the backpressure of +messages to throttle for memory and bandwidth reasons. For now, this will be +**transport-defined** rather than lock into something sub-optimal. + +As usage among different transports and libraries grows, common patterns will emerge +that will allow for a generic, but efficient, way to handle backpressure across +different use cases. + +.. note:: + While the protocol itself is transport agnostic, the current usage and examples + only have been tested using UCX and libfabric transports so far, but that's all. + + +Protocol Description +==================== + +There are two possibilities that can occur: + +1. The streams of metadata and body data are sent across separate connections + +.. mermaid:: ./DissociatedIPC/SequenceDiagramSeparate.mmd + + +2. The streams of metadata and body data are sent simultaneously across the + same connection + +.. mermaid:: ./DissociatedIPC/SequenceDiagramSame.mmd + +Server Sequence +--------------- + +There can be either a single server handling both the IPC Metadata stream and the +Body data streams, or separate servers for handling the IPC Metadata and the body +data. This allows for streaming of data across either a single transport pipe or +two pipes if desired. + +Metadata Stream Sequence +'''''''''''''''''''''''' + +The standing state of the server is waiting for a **tagged** message with a specific +```` tag value to initiate a transfer. This ```` value is defined +by the server and propagated to any clients via the URI they are provided. This protocol +does not prescribe any particular value so that it will not interfere with any other +existing protocols that rely on tag values. The body of that message will contain an +opaque, binary identifier to indicate a particular dataset / data stream to send. + +.. note:: + + For instance, the **ticket** that was passed with a *FlightInfo* message would be + the body of this message. Because it is opaque, it can be anything the server wants + to use. The URI and identifier do not need to be given to the client via Flight RPC, + but could come across from any transport or protocol desired. + +Upon receiving a ```` request, the server *should* respond by sending a stream +of messages consisting of the following: + +.. mermaid:: + + block-beta + columns 8 + + block:P["\n\n\n\nPrefix"]:5 + T["Message type\nByte 0"] + S["Sequence number\nBytes 1-4"] + end + H["Flatbuffer bytes\nRest of the message"]:3 + +* A 5-byte prefix + + - The first byte of the message indicates the type of message, currently there are only + two allowed message types (more types may get added in the future): + + 0) End of Stream + 1) Flatbuffers IPC Metadata Message + + - the next 4-bytes are a little-endian, unsigned 32-bit integer indicating the sequence number of + the message. The first message in the stream (**MUST** always be a schema message) **MUST** + have a sequence number of ``0``. Each subsequent message **MUST** increment the number by + ``1``. + +* The full Flatbuffers bytes of an Arrow IPC header + +As defined in the Arrow IPC format, each metadata message can represent a chunk of data or +dictionaries for use by the stream of data. + +After sending the last metadata message, the server **MUST** indicate the end of the stream +by sending a message consisting of **exactly** 5 bytes: + +* The first byte is ``0``, indicating an **End of Stream** message +* The last 4 bytes are the sequence number (4-byte, unsigned integer in little-endian byte order) + +Data Stream Sequence +'''''''''''''''''''' + +If a single server is handling both the data and metadata streams, then the data messages +**should** begin being sent to the client in parallel with the metadata messages. Otherwise, +as with the metadata sequence, the standing state of the server is to wait for a **tagged** +message with the ```` tag value, whose body indicates the dataset / data stream +to send to the client. + +For each IPC message in the stream of data, a **tagged** message **MUST** be sent on the data +stream if that message has a body (i.e. a Record Batch or Dictionary message). The +:term:`tag ` for each message should be structured as follows: + +.. mermaid:: + + block-beta + columns 8 + + S["Sequence number\nBytes 0-3"]:4 + U["Unused (Reserved)\nBytes 4-6"]:3 + T["Message type\nByte 7"]:1 + +* The *least significant* 4-bytes (bits 0 - 31) of the tag should be the unsigned 32-bit, little-endian sequence + number of the message. +* The *most significant* byte (bits 56 - 63) of the tag indicates the message body **type** as an 8-bit + unsigned integer. Currently only two message types are specified, but more can be added as + needed to expand the protocol: + + 0) The body contains the raw body buffer bytes as a packed buffer (i.e. the standard IPC + format body bytes) + 1) The body contains a series of unsigned, little-endian 64-bit integer pairs to represent + either shared or remote memory, schematically structured as + + * The first two integers (e.g. the first 16 bytes) represent the *total* size (in bytes) + of all buffers and the number of buffers in this message (and thus the number of following + pairs of ``uint64``) + + * Each subsequent pair of ``uint64`` values are an address / offset followed the length of + that particular buffer. + +* All unspecified bits (bits 32 - 55) of the tag are *reserved* for future use by potential updates + to this protocol. For now they **MUST** be 0. + +.. note:: + + Any shared/remote memory addresses that are sent across **MUST** be kept alive by the server + until a corresponding tagged ```` message is received. If the client disconnects + before sending any ```` messages, it can be assumed to be safe to clean up the memory + if desired by the server. + +After sending the last tagged IPC body message, the server should maintain the connection and wait +for tagged ```` messages. The structure of these ```` messages is simple: +one or more unsigned, little-endian 64-bit integers which indicate the addresses/offsets that can +be freed. + +Once there are no more outstanding addresses to be freed, the work for this stream is complete. + +Client Sequence +--------------- + +A client for this protocol needs to concurrently handle both the data and metadata streams of +messages which may either both come from the same server or different servers. Below is a flowchart +showing how a client might handle the metadata and data streams: + +.. mermaid:: ./DissociatedIPC/ClientFlowchart.mmd + +#. First the client sends a tagged message using the ```` value it was provided in the + URI as the tag, and the opaque ID as the body. + + * If the metadata and data servers are separate, then a ```` message needs to be sent + separately to each. + * In either scenario, the metadata and data streams can be processed concurrently and/or asynchronously + depending on the nature of the transports. + +#. For each **untagged** message the client receives in the metadata stream: + + * The first byte of the message indicates whether it is an *End of Stream* message (value ``0``) + or a metadata message (value ``1``). + * The next 4 bytes are the sequence number of the message, an unsigned 32-bit integer in + little-endian byte order. + * If it is **not** an *End of Stream* message, the remaining bytes are the IPC Flatbuffer bytes which + can be interpreted as normal. + + * If the message has a body (i.e. Record Batch or Dictionary message) then the client should retrieve + a tagged message from the Data Stream using the same sequence number. + + * If it **is** an *End of Stream* message, then it is safe to close the metadata connection if there are + no gaps in the sequence numbers received. + +#. When a metadata message that requires a body is received, the tag mask of ``0x00000000FFFFFFFF`` **should** + be used alongside the sequence number to match the message regardless of the higher bytes (e.g. we only + care about matching the lower 4 bytes to the sequence number) + + * Once recieved, the Most Significant Byte's value determines how the client processes the body data: + + * If the most significant byte is 0: Then the body of the message is the raw IPC packed body buffers + allowing it to easily be processed with the corresponding metadata header bytes. + + * If the most significant byte is 1: The body of the message will consist of a series of pairs of + unsigned, 64-bit integers in little-endian byte order. + + * The first two integers represent *1)* the total size of all the body buffers together to allow + for easy allocation if an intermediate buffer is needed and *2)* the number of buffers being sent (``nbuf``). + + * The rest of the message will be ``nbuf`` pairs of integers, one for each buffer. Each pair is + *1)* the address / offset of the buffer and *2)* the length of that buffer. Memory can then be retrieved + via shared or remote memory routines based on the underlying transport. These addresses / offsets **MUST** + be retained so they can be sent back in ```` messages later, indicating to the server that + the client no longer needs the shared memory. + +#. Once an *End of Stream* message is received, the client should process any remaining un-processed + IPC metadata messages. + +#. After individual memory addresses / offsets are able to be freed by the remote server (in the case where + it has sent these rather than the full body bytes), the client should send corresponding ```` messages + to the server. + + * A single ```` message consists of an arbitrary number of unsigned 64-bit integer values, representing + the addresses / offsets which can be freed. The reason for it being an *arbitrary number* is to allow a client + to choose whether to send multiple messages to free multiple addresses or to coalesce multiple addresses into + fewer messages to be freed (thus making the protocol less "chatty" if desired) + +Continuing Development +====================== + +If you decide to try this protocol in your own environments and system, we'd love feedback and to learn about +your use case. As this is an **experimental** protocol currently, we need real-world usage in order to facilitate +improving it and finding the right generalizations to standardize on across transports. + +Please chime in using the Arrow Developers Mailing list: https://arrow.apache.org/community/#mailing-lists + +.. _Flatbuffers: http://github.com/google/flatbuffers +.. _UCX: https://openucx.org/ +.. _libfabric: https://ofiwg.github.io/libfabric/ +.. _libcudf: https://docs.rapids.ai/api diff --git a/docs/source/format/DissociatedIPC/ClientFlowchart.mmd b/docs/source/format/DissociatedIPC/ClientFlowchart.mmd new file mode 100644 index 0000000000000..652cabc1c7425 --- /dev/null +++ b/docs/source/format/DissociatedIPC/ClientFlowchart.mmd @@ -0,0 +1,37 @@ +%% Licensed to the Apache Software Foundation (ASF) under one +%% or more contributor license agreements. See the NOTICE file +%% distributed with this work for additional information +%% regarding copyright ownership. The ASF licenses this file +%% to you under the Apache License, Version 2.0 (the +%% "License"); you may not use this file except in compliance +%% with the License. You may obtain a copy of the License at + +%% http://www.apache.org/licenses/LICENSE-2.0 + +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. + +graph LR +client((Client))-->c1{{Send #60;want_data#gt; Msg}} +subgraph meta [Meta Message] + direction LR + m1[/Msg Type #40;byte 0#41;
Seq Num #40;bytes 1-5#41;/]-- type 1 -->m2[[Process IPC Header]] + m2-- IPC has body -->m3[Get Corresponding
Tagged Msg] + m2-- Schema Msg -->m4[/Store Schema/] + m1-- type 0 -->e[Indicate End of Stream] +end +subgraph data [Data Stream] + direction LR + d1[Request Msg
for Seq Num]-->d2{Most Significant
Byte} + d2-- 0 -->d3[Construct from
Metadata and Body] + d2-- 1 -->d4[Get shared/remote
buffers] + d4 -->d5[Construct from
Metadata and buffers] + d3 & d5 -->e2[Output Batch] +end + +client -- recv untagged msg --> meta +client -- get tagged msg --> data diff --git a/docs/source/format/DissociatedIPC/SequenceDiagramSame.mmd b/docs/source/format/DissociatedIPC/SequenceDiagramSame.mmd new file mode 100644 index 0000000000000..adf26bdc32767 --- /dev/null +++ b/docs/source/format/DissociatedIPC/SequenceDiagramSame.mmd @@ -0,0 +1,43 @@ +%% Licensed to the Apache Software Foundation (ASF) under one +%% or more contributor license agreements. See the NOTICE file +%% distributed with this work for additional information +%% regarding copyright ownership. The ASF licenses this file +%% to you under the Apache License, Version 2.0 (the +%% "License"); you may not use this file except in compliance +%% with the License. You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. + +sequenceDiagram + participant C as Client + participant S as Server + activate C + C-->>+S: TaggedMessage(server.want_data, bytes=ID_of_desired_data) + S-->>C: Message(bytes([1]) + le_bytes(sequence_number) + schema_metadata) + par + loop each chunk + S-->>C: Message(bytes([1]) + le_bytes(sequence_number) + batch_metadata) + end + S-->>C: Message(bytes([0]) + le_bytes(sequence_number)) + and + loop each chunk + alt + S-->>C: TaggedMessage((bytes[0] << 55) | le_bytes(sequence_number),
bytes=batch_data) + else + S-->>C: TaggedMessage((bytes[1] << 55) | le_bytes(sequence_number),
bytes=uint64_pairs) + end + end + end + + loop + C-->>S: TaggedMessage(server.free_data, bytes=uint64_list) + end + deactivate S + deactivate C diff --git a/docs/source/format/DissociatedIPC/SequenceDiagramSeparate.mmd b/docs/source/format/DissociatedIPC/SequenceDiagramSeparate.mmd new file mode 100644 index 0000000000000..11d2d9d6387eb --- /dev/null +++ b/docs/source/format/DissociatedIPC/SequenceDiagramSeparate.mmd @@ -0,0 +1,44 @@ +%% Licensed to the Apache Software Foundation (ASF) under one +%% or more contributor license agreements. See the NOTICE file +%% distributed with this work for additional information +%% regarding copyright ownership. The ASF licenses this file +%% to you under the Apache License, Version 2.0 (the +%% "License"); you may not use this file except in compliance +%% with the License. You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. + +sequenceDiagram + participant D as Data Stream + participant C as Client + participant M as Metadata Stream + + activate C + C-->>+M: TaggedMessage(server.want_data, bytes=ID_of_desired_data) + C-->>+D: TaggedMessage(server.want_data, bytes=ID_of_desired_data) + M-->>C: Message(bytes([1]) + le_bytes(sequence_number) + schema_metadata) + loop each batch + par + M-->>C: Message(bytes([1]) + le_bytes(sequence_number) + batch_metadata) + and + alt + D-->>C: TaggedMessage((bytes[0] << 55) | le_bytes(sequence_number),
bytes=batch_data) + else + D-->>C: TaggedMessage((bytes[1] << 55) | le_bytes(sequence_number),
bytes=uint64_pairs) + end + end + end + M-->>C: Message(bytes([0]) + le_bytes(sequence_number)) + deactivate M + loop + C-->>D: TaggedMessage(server.free_data, bytes=uint64_list) + end + deactivate D + deactivate C diff --git a/docs/source/format/Flight.rst b/docs/source/format/Flight.rst index c65a1f70bde7f..2c5487d857ea4 100644 --- a/docs/source/format/Flight.rst +++ b/docs/source/format/Flight.rst @@ -310,6 +310,8 @@ well, in which case any `authentication method supported by gRPC .. _Mutual TLS (mTLS): https://grpc.io/docs/guides/auth/#supported-auth-mechanisms +.. _flight-location-uris: + Location URIs ============= diff --git a/docs/source/format/index.rst b/docs/source/format/index.rst index 856830d863243..44ea3e8e7e608 100644 --- a/docs/source/format/index.rst +++ b/docs/source/format/index.rst @@ -30,6 +30,7 @@ Specifications CDataInterface CStreamInterface CDeviceDataInterface + DissociatedIPC Flight FlightSql ADBC From 5255adc5139d6094a7b3b04f273f3ef11d49ec38 Mon Sep 17 00:00:00 2001 From: ZhangHuiGui <106943008+ZhangHuiGui@users.noreply.github.com> Date: Thu, 9 May 2024 21:26:16 -0400 Subject: [PATCH 082/261] GH-41529: [C++][Compute] Remove redundant logic for ArrayData as ExecResults in ExecScalarCaseWhen (#41380) ### Rationale for this change Remove useless path in `ExecScalarCaseWhen` ### What changes are included in this PR? Refactor : remove processing logic for ArrayData as ExecResults in ExecScalarCaseWhen. ### Are these changes tested? Yes, by exists. ### Are there any user-facing changes? No * GitHub Issue: #41529 Authored-by: ZhangHuiGui <2689496754@qq.com> Signed-off-by: Felipe Oliveira Carvalho --- .../arrow/compute/kernels/scalar_if_else.cc | 50 +++++++------------ 1 file changed, 19 insertions(+), 31 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index 13874d9d65e70..6368ef525ff9c 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -1483,39 +1483,27 @@ Status ExecScalarCaseWhen(KernelContext* ctx, const ExecSpan& batch, ExecResult* result = temp.get(); } - // TODO(wesm): clean this up to have less duplication - if (out->is_array_data()) { - ArrayData* output = out->array_data().get(); - if (is_dictionary_type::value) { - const ExecValue& dict_from = has_result ? result : batch[1]; - if (dict_from.is_scalar()) { - output->dictionary = checked_cast(*dict_from.scalar) - .value.dictionary->data(); - } else { - output->dictionary = dict_from.array.ToArrayData()->dictionary; - } - } - CopyValues(result, /*in_offset=*/0, batch.length, - output->GetMutableValues(0, 0), - output->GetMutableValues(1, 0), output->offset); - } else { - // ArraySpan - ArraySpan* output = out->array_span_mutable(); - if (is_dictionary_type::value) { - const ExecValue& dict_from = has_result ? result : batch[1]; - output->child_data.resize(1); - if (dict_from.is_scalar()) { - output->child_data[0].SetMembers( - *checked_cast(*dict_from.scalar) - .value.dictionary->data()); - } else { - output->child_data[0] = dict_from.array; - } + // Only input types of non-fixed length (which cannot be pre-allocated) + // will save the output data in ArrayData. And make sure the FixedLength + // types must be output in ArraySpan. + static_assert(is_fixed_width(Type::type_id)); + DCHECK(out->is_array_span()); + + ArraySpan* output = out->array_span_mutable(); + if (is_dictionary_type::value) { + const ExecValue& dict_from = has_result ? result : batch[1]; + output->child_data.resize(1); + if (dict_from.is_scalar()) { + output->child_data[0].SetMembers( + *checked_cast(*dict_from.scalar) + .value.dictionary->data()); + } else { + output->child_data[0] = dict_from.array; } - CopyValues(result, /*in_offset=*/0, batch.length, - output->GetValues(0, 0), output->GetValues(1, 0), - output->offset); } + CopyValues(result, /*in_offset=*/0, batch.length, + output->GetValues(0, 0), output->GetValues(1, 0), + output->offset); return Status::OK(); } From f8d3b10b4b89b47f6e7a594b95c82e2ff161f1a5 Mon Sep 17 00:00:00 2001 From: Tai Le Manh <49281946+tlm365@users.noreply.github.com> Date: Fri, 10 May 2024 12:42:25 +0700 Subject: [PATCH 083/261] GH-41590: [Java] Improve BaseRepeatedValueVector function on isEmpty and isNull operations (#41601) ### Rationale for this change Resolves #41590 . ### What changes are included in this PR? Make `abstract` on `isNull` and `isEmpty` of `BaseRepeatedValueVector`. ### Are these changes tested? Existing tests pass. ### Are there any user-facing changes? No. * GitHub Issue: #41590 Authored-by: Tai Le Manh Signed-off-by: David Li --- .../vector/complex/BaseRepeatedValueVector.java | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java index 7906d90c2fff0..7c4015299a6cd 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueVector.java @@ -355,16 +355,8 @@ public int getInnerValueCountAt(int index) { offsetBuffer.getInt(index * OFFSET_WIDTH); } - /** Return if value at index is null (this implementation is always false). */ - @Override - public boolean isNull(int index) { - return false; - } - - /** Return if value at index is empty (this implementation is always false). */ - public boolean isEmpty(int index) { - return false; - } + /** Return if value at index is empty. */ + public abstract boolean isEmpty(int index); /** Starts a new repeated value. */ public int startNewValue(int index) { From 7aea8bf7a65d679bd71d973b358f997eb3b6c6af Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Fri, 10 May 2024 23:58:10 +0900 Subject: [PATCH 084/261] GH-41316: [CI][Python] Reduce CI time on macOS (#41378) ### Rationale for this change Reduce CI time for python build on macos-12 and macos-14 using ccache ### What changes are included in this PR? Add ccache for macos-12 and macos-14 ### Are these changes tested? Check the cache hit rate ### Are there any user-facing changes? No * GitHub Issue: #41316 Authored-by: Hyunseok Seo Signed-off-by: Sutou Kouhei --- .github/workflows/python.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index 15056961f8cf4..a568f8346e7fc 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -182,6 +182,19 @@ jobs: python -m pip install \ -r python/requirements-build.txt \ -r python/requirements-test.txt + - name: Setup ccache + shell: bash + run: ci/scripts/ccache_setup.sh + - name: ccache info + id: ccache-info + shell: bash + run: echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT + - name: Cache ccache + uses: actions/cache@v4 + with: + path: ${{ steps.ccache-info.outputs.cache-dir }} + key: python-ccache-macos-${{ matrix.macos-version }}-${{ hashFiles('cpp/**', 'python/**') }} + restore-keys: python-ccache-macos-${{ matrix.macos-version }}- - name: Build shell: bash run: | From 899422e16e3f1f71819f52fc627359d79f7d3662 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 10 May 2024 16:42:21 -0300 Subject: [PATCH 085/261] GH-39301: [Archery][CI][Integration] Add nanoarrow to archery + integration setup (#39302) ### Rationale for this change The ability to add integration testing was added in nanoarrow however, the infrastructure for running these tests currently lives in the arrow monorepo. ### What changes are included in this PR? - Added the relevant code to Archery such that these tests can be run - Added the relevant scripts/environment variables to CI such that these tests run in the integration CI job ### Are these changes tested? Yes, via the "Integration" CI job. ### Are there any user-facing changes? No. This PR still needs https://github.com/apache/arrow/pull/41264 for the integration tests to pass. * Closes: #39301 * GitHub Issue: #39301 Lead-authored-by: Dewey Dunnington Co-authored-by: Dewey Dunnington Signed-off-by: Dewey Dunnington --- .github/workflows/integration.yml | 6 + ci/scripts/integration_arrow_build.sh | 2 + ci/scripts/nanoarrow_build.sh | 52 ++++++ dev/archery/archery/cli.py | 5 +- dev/archery/archery/integration/datagen.py | 3 + dev/archery/archery/integration/runner.py | 8 +- .../archery/integration/tester_nanoarrow.py | 148 ++++++++++++++++++ docker-compose.yml | 2 + 8 files changed, 223 insertions(+), 3 deletions(-) create mode 100755 ci/scripts/nanoarrow_build.sh create mode 100644 dev/archery/archery/integration/tester_nanoarrow.py diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 6e09ad61480a6..f53f4aeb505d2 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -75,6 +75,11 @@ jobs: with: repository: apache/arrow-rs path: rust + - name: Checkout Arrow nanoarrow + uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0 + with: + repository: apache/arrow-nanoarrow + path: nanoarrow - name: Free up disk space run: | ci/scripts/util_free_space.sh @@ -97,6 +102,7 @@ jobs: run: > archery docker run \ -e ARCHERY_DEFAULT_BRANCH=${{ github.event.repository.default_branch }} \ + -e ARCHERY_INTEGRATION_WITH_NANOARROW=1 \ -e ARCHERY_INTEGRATION_WITH_RUST=1 \ conda-integration - name: Docker Push diff --git a/ci/scripts/integration_arrow_build.sh b/ci/scripts/integration_arrow_build.sh index e5c31527aedff..9b54049a2b803 100755 --- a/ci/scripts/integration_arrow_build.sh +++ b/ci/scripts/integration_arrow_build.sh @@ -30,6 +30,8 @@ build_dir=${2} ${arrow_dir}/ci/scripts/rust_build.sh ${arrow_dir} ${build_dir} +${arrow_dir}/ci/scripts/nanoarrow_build.sh ${arrow_dir} ${build_dir} + if [ "${ARROW_INTEGRATION_CPP}" == "ON" ]; then ${arrow_dir}/ci/scripts/cpp_build.sh ${arrow_dir} ${build_dir} fi diff --git a/ci/scripts/nanoarrow_build.sh b/ci/scripts/nanoarrow_build.sh new file mode 100755 index 0000000000000..1612b9a2d0102 --- /dev/null +++ b/ci/scripts/nanoarrow_build.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e + +arrow_dir=${1} +source_dir=${1}/nanoarrow +build_dir=${2}/nanoarrow + +# This file is used to build the nanoarrow binaries needed for the archery +# integration tests. Testing of the nanoarrow implementation in normal CI is handled +# by github workflows in the arrow-nanoarrow repository. + +if [ "${ARCHERY_INTEGRATION_WITH_NANOARROW}" -eq "0" ]; then + echo "=====================================================================" + echo "Not building nanoarrow" + echo "=====================================================================" + exit 0; +elif [ ! -d "${source_dir}" ]; then + echo "=====================================================================" + echo "The nanoarrow source is missing. Please clone the arrow-nanoarrow repository" + echo "to arrow/nanoarrow before running the integration tests:" + echo " git clone https://github.com/apache/arrow-nanoarrow.git path/to/arrow/nanoarrow" + echo "=====================================================================" + exit 1; +fi + +set -x + +mkdir -p ${build_dir} +pushd ${build_dir} + +cmake ${source_dir} -DNANOARROW_BUILD_INTEGRATION_TESTS=ON +cmake --build . + +popd diff --git a/dev/archery/archery/cli.py b/dev/archery/archery/cli.py index 8a26d9266f22d..cd746f9c4499a 100644 --- a/dev/archery/archery/cli.py +++ b/dev/archery/archery/cli.py @@ -738,6 +738,9 @@ def _set_default(opt, default): help='Include JavaScript in integration tests') @click.option('--with-go', type=bool, default=False, help='Include Go in integration tests') +@click.option('--with-nanoarrow', type=bool, default=False, + help='Include nanoarrow in integration tests', + envvar="ARCHERY_INTEGRATION_WITH_NANOARROW") @click.option('--with-rust', type=bool, default=False, help='Include Rust in integration tests', envvar="ARCHERY_INTEGRATION_WITH_RUST") @@ -776,7 +779,7 @@ def integration(with_all=False, random_seed=12345, **args): gen_path = args['write_generated_json'] - languages = ['cpp', 'csharp', 'java', 'js', 'go', 'rust'] + languages = ['cpp', 'csharp', 'java', 'js', 'go', 'nanoarrow', 'rust'] formats = ['ipc', 'flight', 'c_data'] enabled_languages = 0 diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py index 5cae907a4aa71..f6302165cd5a0 100644 --- a/dev/archery/archery/integration/datagen.py +++ b/dev/archery/archery/integration/datagen.py @@ -1928,17 +1928,20 @@ def _temp_path(): .skip_tester('C#') .skip_tester('Java') .skip_tester('JS') + .skip_tester('nanoarrow') .skip_tester('Rust'), generate_binary_view_case() .skip_tester('Java') .skip_tester('JS') + .skip_tester('nanoarrow') .skip_tester('Rust'), generate_list_view_case() .skip_tester('C#') # Doesn't support large list views .skip_tester('Java') .skip_tester('JS') + .skip_tester('nanoarrow') .skip_tester('Rust'), generate_extension_case() diff --git a/dev/archery/archery/integration/runner.py b/dev/archery/archery/integration/runner.py index 5b66842b25926..0ea244720cc1d 100644 --- a/dev/archery/archery/integration/runner.py +++ b/dev/archery/archery/integration/runner.py @@ -36,6 +36,7 @@ from .tester_java import JavaTester from .tester_js import JSTester from .tester_csharp import CSharpTester +from .tester_nanoarrow import NanoarrowTester from .util import guid, printer from .util import SKIP_C_ARRAY, SKIP_C_SCHEMA, SKIP_FLIGHT, SKIP_IPC from ..utils.source import ARROW_ROOT_DEFAULT @@ -541,8 +542,8 @@ def get_static_json_files(): def run_all_tests(with_cpp=True, with_java=True, with_js=True, with_csharp=True, with_go=True, with_rust=False, - run_ipc=False, run_flight=False, run_c_data=False, - tempdir=None, **kwargs): + with_nanoarrow=False, run_ipc=False, run_flight=False, + run_c_data=False, tempdir=None, **kwargs): tempdir = tempdir or tempfile.mkdtemp(prefix='arrow-integration-') testers: List[Tester] = [] @@ -562,6 +563,9 @@ def run_all_tests(with_cpp=True, with_java=True, with_js=True, if with_go: testers.append(GoTester(**kwargs)) + if with_nanoarrow: + testers.append(NanoarrowTester(**kwargs)) + if with_rust: testers.append(RustTester(**kwargs)) diff --git a/dev/archery/archery/integration/tester_nanoarrow.py b/dev/archery/archery/integration/tester_nanoarrow.py new file mode 100644 index 0000000000000..30ff1bb6e50a7 --- /dev/null +++ b/dev/archery/archery/integration/tester_nanoarrow.py @@ -0,0 +1,148 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import functools +import os + +from . import cdata +from .tester import Tester, CDataExporter, CDataImporter +from ..utils.source import ARROW_ROOT_DEFAULT + + +_NANOARROW_PATH = os.environ.get( + "ARROW_NANOARROW_PATH", + os.path.join(ARROW_ROOT_DEFAULT, "nanoarrow/cdata"), +) + +_INTEGRATION_DLL = os.path.join( + _NANOARROW_PATH, "libnanoarrow_c_data_integration" + cdata.dll_suffix +) + + +class NanoarrowTester(Tester): + PRODUCER = False + CONSUMER = False + FLIGHT_SERVER = False + FLIGHT_CLIENT = False + C_DATA_SCHEMA_EXPORTER = True + C_DATA_ARRAY_EXPORTER = True + C_DATA_SCHEMA_IMPORTER = True + C_DATA_ARRAY_IMPORTER = True + + name = "nanoarrow" + + def validate(self, json_path, arrow_path, quirks=None): + raise NotImplementedError() + + def json_to_file(self, json_path, arrow_path): + raise NotImplementedError() + + def stream_to_file(self, stream_path, file_path): + raise NotImplementedError() + + def file_to_stream(self, file_path, stream_path): + raise NotImplementedError() + + def make_c_data_exporter(self): + return NanoarrowCDataExporter(self.debug, self.args) + + def make_c_data_importer(self): + return NanoarrowCDataImporter(self.debug, self.args) + + +_nanoarrow_c_data_entrypoints = """ + const char* nanoarrow_CDataIntegration_ExportSchemaFromJson( + const char* json_path, struct ArrowSchema* out); + + const char* nanoarrow_CDataIntegration_ImportSchemaAndCompareToJson( + const char* json_path, struct ArrowSchema* schema); + + const char* nanoarrow_CDataIntegration_ExportBatchFromJson( + const char* json_path, int num_batch, struct ArrowArray* out); + + const char* nanoarrow_CDataIntegration_ImportBatchAndCompareToJson( + const char* json_path, int num_batch, struct ArrowArray* batch); + + int64_t nanoarrow_BytesAllocated(void); + """ + + +@functools.lru_cache +def _load_ffi(ffi, lib_path=_INTEGRATION_DLL): + ffi.cdef(_nanoarrow_c_data_entrypoints) + dll = ffi.dlopen(lib_path) + return dll + + +class _CDataBase: + def __init__(self, debug, args): + self.debug = debug + self.args = args + self.ffi = cdata.ffi() + self.dll = _load_ffi(self.ffi) + + def _check_nanoarrow_error(self, na_error): + """ + Check a `const char*` error return from an integration entrypoint. + + A null means success, a non-empty string is an error message. + The string is statically allocated on the nanoarrow side and does not + need to be released. + """ + assert self.ffi.typeof(na_error) is self.ffi.typeof("const char*") + if na_error != self.ffi.NULL: + error = self.ffi.string(na_error).decode("utf8", errors="replace") + raise RuntimeError(f"nanoarrow C Data Integration call failed: {error}") + + +class NanoarrowCDataExporter(CDataExporter, _CDataBase): + def export_schema_from_json(self, json_path, c_schema_ptr): + na_error = self.dll.nanoarrow_CDataIntegration_ExportSchemaFromJson( + str(json_path).encode(), c_schema_ptr + ) + self._check_nanoarrow_error(na_error) + + def export_batch_from_json(self, json_path, num_batch, c_array_ptr): + na_error = self.dll.nanoarrow_CDataIntegration_ExportBatchFromJson( + str(json_path).encode(), num_batch, c_array_ptr + ) + self._check_nanoarrow_error(na_error) + + @property + def supports_releasing_memory(self): + return True + + def record_allocation_state(self): + return self.dll.nanoarrow_BytesAllocated() + + +class NanoarrowCDataImporter(CDataImporter, _CDataBase): + def import_schema_and_compare_to_json(self, json_path, c_schema_ptr): + na_error = self.dll.nanoarrow_CDataIntegration_ImportSchemaAndCompareToJson( + str(json_path).encode(), c_schema_ptr + ) + self._check_nanoarrow_error(na_error) + + def import_batch_and_compare_to_json(self, json_path, num_batch, c_array_ptr): + na_error = self.dll.nanoarrow_CDataIntegration_ImportBatchAndCompareToJson( + str(json_path).encode(), num_batch, c_array_ptr + ) + self._check_nanoarrow_error(na_error) + + @property + def supports_releasing_memory(self): + return True diff --git a/docker-compose.yml b/docker-compose.yml index 9bedb59a77be8..7a4d455dfe723 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1751,9 +1751,11 @@ services: volumes: *conda-volumes environment: <<: [*common, *ccache] + ARCHERY_INTEGRATION_WITH_NANOARROW: 0 ARCHERY_INTEGRATION_WITH_RUST: 0 # Tell Archery where Arrow binaries are located ARROW_CPP_EXE_PATH: /build/cpp/debug + ARROW_NANOARROW_PATH: /build/nanoarrow ARROW_RUST_EXE_PATH: /build/rust/debug command: ["/arrow/ci/scripts/integration_arrow_build.sh /arrow /build && From 1e3772cac5f45edb6ada3d20140b77cc86208346 Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Sat, 11 May 2024 12:41:57 +0800 Subject: [PATCH 086/261] GH-41343: [C++][CMake] Remove unused ARROW_NO_DEPRECATED_API (#41345) ### Rationale for this change ARROW_NO_DEPRECATED_API is not used in the source code. ### What changes are included in this PR? Remove ARROW_NO_DEPRECATED_API cmake variable. ### Are these changes tested? Pass CIs. ### Are there any user-facing changes? Perhaps yes, users used to set ARROW_NO_DEPRECATED_API to build Arrow will see a warning for unknown CMake variable. * GitHub Issue: #41343 Authored-by: Gang Wu Signed-off-by: Gang Wu --- ci/docker/debian-12-cpp.dockerfile | 1 - ci/docker/linux-apt-r.dockerfile | 1 - ci/docker/ubuntu-20.04-cpp-minimal.dockerfile | 1 - ci/docker/ubuntu-20.04-cpp.dockerfile | 1 - ci/docker/ubuntu-22.04-cpp-minimal.dockerfile | 1 - ci/docker/ubuntu-22.04-cpp.dockerfile | 1 - ci/docker/ubuntu-24.04-cpp.dockerfile | 1 - ci/scripts/c_glib_build.sh | 3 --- ci/scripts/cpp_build.sh | 1 - cpp/CMakeLists.txt | 4 ---- cpp/cmake_modules/DefineOptions.cmake | 2 -- dev/conbench_envs/benchmarks.env | 1 - docs/source/developers/cpp/building.rst | 7 ++++--- 13 files changed, 4 insertions(+), 21 deletions(-) diff --git a/ci/docker/debian-12-cpp.dockerfile b/ci/docker/debian-12-cpp.dockerfile index 7036ddf27d52a..d7a6f9df2c2ee 100644 --- a/ci/docker/debian-12-cpp.dockerfile +++ b/ci/docker/debian-12-cpp.dockerfile @@ -119,7 +119,6 @@ ENV ARROW_ACERO=ON \ ARROW_GANDIVA=ON \ ARROW_GCS=ON \ ARROW_HOME=/usr/local \ - ARROW_NO_DEPRECATED_API=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ diff --git a/ci/docker/linux-apt-r.dockerfile b/ci/docker/linux-apt-r.dockerfile index a68354e3abf8d..630b96e1007b9 100644 --- a/ci/docker/linux-apt-r.dockerfile +++ b/ci/docker/linux-apt-r.dockerfile @@ -113,7 +113,6 @@ ENV \ ARROW_GANDIVA=OFF \ ARROW_HDFS=OFF \ ARROW_JSON=ON \ - ARROW_NO_DEPRECATED_API=ON \ ARROW_ORC=OFF \ ARROW_PARQUET=ON \ ARROW_S3=ON \ diff --git a/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile index ae2ba9421cd55..e17c0306f115d 100644 --- a/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile +++ b/ci/docker/ubuntu-20.04-cpp-minimal.dockerfile @@ -85,7 +85,6 @@ ENV ARROW_ACERO=ON \ ARROW_HDFS=ON \ ARROW_HOME=/usr/local \ ARROW_INSTALL_NAME_RPATH=OFF \ - ARROW_NO_DEPRECATED_API=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile index 124256378b287..d78c7a99cf4d6 100644 --- a/ci/docker/ubuntu-20.04-cpp.dockerfile +++ b/ci/docker/ubuntu-20.04-cpp.dockerfile @@ -158,7 +158,6 @@ ENV absl_SOURCE=BUNDLED \ ARROW_HDFS=ON \ ARROW_HOME=/usr/local \ ARROW_INSTALL_NAME_RPATH=OFF \ - ARROW_NO_DEPRECATED_API=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ diff --git a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile index dd887a6d00ceb..341d8a87e8661 100644 --- a/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile +++ b/ci/docker/ubuntu-22.04-cpp-minimal.dockerfile @@ -85,7 +85,6 @@ ENV ARROW_ACERO=ON \ ARROW_HDFS=ON \ ARROW_HOME=/usr/local \ ARROW_INSTALL_NAME_RPATH=OFF \ - ARROW_NO_DEPRECATED_API=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile b/ci/docker/ubuntu-22.04-cpp.dockerfile index eb189841cd344..f12e7456add8e 100644 --- a/ci/docker/ubuntu-22.04-cpp.dockerfile +++ b/ci/docker/ubuntu-22.04-cpp.dockerfile @@ -196,7 +196,6 @@ ENV absl_SOURCE=BUNDLED \ ARROW_HDFS=ON \ ARROW_HOME=/usr/local \ ARROW_INSTALL_NAME_RPATH=OFF \ - ARROW_NO_DEPRECATED_API=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ diff --git a/ci/docker/ubuntu-24.04-cpp.dockerfile b/ci/docker/ubuntu-24.04-cpp.dockerfile index 4a37818f94396..ecfb5e2f5096d 100644 --- a/ci/docker/ubuntu-24.04-cpp.dockerfile +++ b/ci/docker/ubuntu-24.04-cpp.dockerfile @@ -178,7 +178,6 @@ ENV ARROW_ACERO=ON \ ARROW_HDFS=ON \ ARROW_HOME=/usr/local \ ARROW_INSTALL_NAME_RPATH=OFF \ - ARROW_NO_DEPRECATED_API=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ diff --git a/ci/scripts/c_glib_build.sh b/ci/scripts/c_glib_build.sh index c4d2c4fdb5617..6a6295e4ff0bd 100755 --- a/ci/scripts/c_glib_build.sh +++ b/ci/scripts/c_glib_build.sh @@ -30,9 +30,6 @@ with_doc=$([ "${BUILD_DOCS_C_GLIB}" == "ON" ] && echo "true" || echo "false") export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig -export CFLAGS="-DARROW_NO_DEPRECATED_API" -export CXXFLAGS="-DARROW_NO_DEPRECATED_API" - mkdir -p ${build_dir} # Build with Meson diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index ceeab2455bef6..a1f40fc360e2f 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -152,7 +152,6 @@ else -DARROW_JSON=${ARROW_JSON:-ON} \ -DARROW_LARGE_MEMORY_TESTS=${ARROW_LARGE_MEMORY_TESTS:-OFF} \ -DARROW_MIMALLOC=${ARROW_MIMALLOC:-OFF} \ - -DARROW_NO_DEPRECATED_API=${ARROW_NO_DEPRECATED_API:-OFF} \ -DARROW_ORC=${ARROW_ORC:-OFF} \ -DARROW_PARQUET=${ARROW_PARQUET:-OFF} \ -DARROW_RUNTIME_SIMD_LEVEL=${ARROW_RUNTIME_SIMD_LEVEL:-MAX} \ diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index df83f56dd2f70..679842c31e0b1 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -431,10 +431,6 @@ endif() # Compiler flags # -if(ARROW_NO_DEPRECATED_API) - add_definitions(-DARROW_NO_DEPRECATED_API) -endif() - if(ARROW_EXTRA_ERROR_CONTEXT) add_definitions(-DARROW_EXTRA_ERROR_CONTEXT) endif() diff --git a/cpp/cmake_modules/DefineOptions.cmake b/cpp/cmake_modules/DefineOptions.cmake index dc0e5da63adb7..41466a1c22404 100644 --- a/cpp/cmake_modules/DefineOptions.cmake +++ b/cpp/cmake_modules/DefineOptions.cmake @@ -158,8 +158,6 @@ if(ARROW_DEFINE_OPTIONS) define_option_string(ARROW_GIT_DESCRIPTION "The Arrow git commit description (if any)" "") - define_option(ARROW_NO_DEPRECATED_API "Exclude deprecated APIs from build" OFF) - define_option(ARROW_POSITION_INDEPENDENT_CODE "Whether to create position-independent target" ON) diff --git a/dev/conbench_envs/benchmarks.env b/dev/conbench_envs/benchmarks.env index 2a5a9c32a86ec..3af29491a8345 100644 --- a/dev/conbench_envs/benchmarks.env +++ b/dev/conbench_envs/benchmarks.env @@ -31,7 +31,6 @@ ARROW_HOME=$CONDA_PREFIX ARROW_INSTALL_NAME_RPATH=ON ARROW_JEMALLOC=OFF ARROW_MIMALLOC=ON -ARROW_NO_DEPRECATED_API=ON ARROW_ORC=ON ARROW_PARQUET=ON ARROW_PYTHON=ON diff --git a/docs/source/developers/cpp/building.rst b/docs/source/developers/cpp/building.rst index 040a046c5153d..7b80d2138c33e 100644 --- a/docs/source/developers/cpp/building.rst +++ b/docs/source/developers/cpp/building.rst @@ -627,9 +627,10 @@ outputs like: Deprecations and API Changes ---------------------------- -We use the compiler definition ``ARROW_NO_DEPRECATED_API`` to disable APIs that -have been deprecated. It is a good practice to compile third party applications -with this flag to proactively catch and account for API changes. +We use the marco ``ARROW_DEPRECATED`` which wraps C++ deprecated attribute for +APIs that have been deprecated. It is a good practice to compile third party +applications with ``-Werror=deprecated-declarations`` (for GCC/Clang or similar +flags of other compilers) to proactively catch and account for API changes. Modular Build Targets --------------------- From a0f9d2eb2fe6743a869f3509f456389cf3de4926 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sun, 12 May 2024 20:09:55 +0900 Subject: [PATCH 087/261] GH-41617: [C++][CMake] Fix ARROW_USE_BOOST detect condition (#41622) ### Rationale for this change We also need Boost when ARROW_FLIGHT and ARROW_TESTING are true. ### What changes are included in this PR? Add missing the condition. ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. * GitHub Issue: #41617 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index ddea1c399cbba..2102a7fdcdd37 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -1263,7 +1263,7 @@ endif() # - S3FS and Flight benchmarks need Boost at runtime. if(ARROW_BUILD_INTEGRATION OR ARROW_BUILD_TESTS - OR (ARROW_FLIGHT AND ARROW_BUILD_BENCHMARKS) + OR (ARROW_FLIGHT AND (ARROW_TESTING OR ARROW_BUILD_BENCHMARKS)) OR (ARROW_S3 AND ARROW_BUILD_BENCHMARKS)) set(ARROW_USE_BOOST TRUE) set(ARROW_BOOST_REQUIRE_LIBRARY TRUE) From 37bd413c8322e0d9527c14420468b99f8abd1715 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Sun, 12 May 2024 13:19:26 +0200 Subject: [PATCH 088/261] GH-40734: [Packaging][Debian] Drop support for Debian bullseye (#41394) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change Debian GNU/Linux bullseye will reach EOL on 2024-07: https://wiki.debian.org/DebianReleases We can drop support for it after we release 16.0.0 because 17.0.0 will be released after 2024-07. ### What changes are included in this PR? Remove Debian bullseye support ### Are these changes tested? Yes on CI. ### Are there any user-facing changes? Yes, no support for Debian bullseye but no breaking changes on code. * GitHub Issue: #40734 Authored-by: Raúl Cumplido Signed-off-by: Sutou Kouhei --- c_glib/meson.build | 2 - dev/release/binary-task.rb | 3 - dev/release/verify-apt.sh | 6 -- dev/release/verify-release-candidate.sh | 4 +- .../apt/debian-bullseye/Dockerfile | 41 --------- .../apt/debian-bullseye-arm64/from | 18 ---- .../apt/debian-bullseye/Dockerfile | 87 ------------------- dev/tasks/linux-packages/package-task.rb | 2 - dev/tasks/tasks.yml | 3 +- r/tools/nixlibs.R | 4 +- 10 files changed, 3 insertions(+), 167 deletions(-) delete mode 100644 dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-bullseye/Dockerfile delete mode 100644 dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye-arm64/from delete mode 100644 dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile diff --git a/c_glib/meson.build b/c_glib/meson.build index 16a5ea7ccb432..08a9cd182e02e 100644 --- a/c_glib/meson.build +++ b/c_glib/meson.build @@ -26,8 +26,6 @@ project('arrow-glib', 'c', 'cpp', # Debian: # https://packages.debian.org/search?keywords=meson # - # * bullseye: 0.56.2 - # * bullseye-backports:1.0.0 # * bookworm: 1.0.0 # # Ubuntu: diff --git a/dev/release/binary-task.rb b/dev/release/binary-task.rb index 8fcdcf1f5f442..c2386a1f52f21 100644 --- a/dev/release/binary-task.rb +++ b/dev/release/binary-task.rb @@ -1083,7 +1083,6 @@ def apt_release_repositories_dir def available_apt_targets [ - ["debian", "bullseye", "main"], ["debian", "bookworm", "main"], ["debian", "trixie", "main"], ["ubuntu", "focal", "main"], @@ -2111,8 +2110,6 @@ def apt_test_targets_default # Disable arm64 targets by default for now # because they require some setups on host. [ - "debian-bullseye", - # "debian-bullseye-arm64", "debian-bookworm", # "debian-bookworm-arm64", "debian-trixie", diff --git a/dev/release/verify-apt.sh b/dev/release/verify-apt.sh index cbb6d93823b21..8c54fe5c11cf1 100755 --- a/dev/release/verify-apt.sh +++ b/dev/release/verify-apt.sh @@ -80,12 +80,6 @@ esac workaround_missing_packages=() case "${distribution}-${code_name}" in - debian-bullseye) - sed \ - -i"" \ - -e "s/ main$/ main contrib non-free/g" \ - /etc/apt/sources.list - ;; debian-*) sed \ -i"" \ diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 95be4800f7ffd..3ed871bd5305b 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -185,9 +185,7 @@ test_binary() { test_apt() { show_header "Testing APT packages" - for target in "debian:bullseye" \ - "arm64v8/debian:bullseye" \ - "debian:bookworm" \ + for target in "debian:bookworm" \ "arm64v8/debian:bookworm" \ "debian:trixie" \ "arm64v8/debian:trixie" \ diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-bullseye/Dockerfile b/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-bullseye/Dockerfile deleted file mode 100644 index b0842a0c0d6ff..0000000000000 --- a/dev/tasks/linux-packages/apache-arrow-apt-source/apt/debian-bullseye/Dockerfile +++ /dev/null @@ -1,41 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -FROM debian:bullseye - -RUN \ - echo "debconf debconf/frontend select Noninteractive" | \ - debconf-set-selections - -RUN \ - echo 'APT::Install-Recommends "false";' > \ - /etc/apt/apt.conf.d/disable-install-recommends - -ARG DEBUG - -RUN \ - quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \ - apt update ${quiet} && \ - apt install -y -V ${quiet} \ - build-essential \ - debhelper \ - devscripts \ - fakeroot \ - gnupg \ - lsb-release && \ - apt clean && \ - rm -rf /var/lib/apt/lists/* diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye-arm64/from b/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye-arm64/from deleted file mode 100644 index 34187b2af5a74..0000000000000 --- a/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye-arm64/from +++ /dev/null @@ -1,18 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -arm64v8/debian:bullseye diff --git a/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile deleted file mode 100644 index 2edcd4d5ed216..0000000000000 --- a/dev/tasks/linux-packages/apache-arrow/apt/debian-bullseye/Dockerfile +++ /dev/null @@ -1,87 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -ARG FROM=debian:bullseye -FROM ${FROM} - -RUN \ - echo "debconf debconf/frontend select Noninteractive" | \ - debconf-set-selections - -RUN \ - echo "deb http://deb.debian.org/debian bullseye-backports main" > \ - /etc/apt/sources.list.d/backports.list - -RUN \ - echo 'APT::Install-Recommends "false";' > \ - /etc/apt/apt.conf.d/disable-install-recommends - -RUN sed -i'' -e 's/main$/main contrib non-free/g' /etc/apt/sources.list - -ARG DEBUG -RUN \ - quiet=$([ "${DEBUG}" = "yes" ] || echo "-qq") && \ - apt update ${quiet} && \ - apt install -y -V ${quiet} \ - build-essential \ - clang \ - cmake \ - debhelper \ - devscripts \ - git \ - libboost-filesystem-dev \ - libboost-system-dev \ - libbrotli-dev \ - libbz2-dev \ - libc-ares-dev \ - libcurl4-openssl-dev \ - libgirepository1.0-dev \ - libglib2.0-doc \ - libgmock-dev \ - libgoogle-glog-dev \ - libgrpc++-dev \ - libgtest-dev \ - liblz4-dev \ - libprotobuf-dev \ - libprotoc-dev \ - libre2-dev \ - libsnappy-dev \ - libssl-dev \ - libthrift-dev \ - libutf8proc-dev \ - libzstd-dev \ - llvm-dev \ - lsb-release \ - ninja-build \ - nlohmann-json3-dev \ - pkg-config \ - protobuf-compiler-grpc \ - python3-dev \ - python3-pip \ - rapidjson-dev \ - tzdata \ - valac \ - zlib1g-dev && \ - if apt list | grep '^nvidia-cuda-toolkit/'; then \ - apt install -y -V ${quiet} nvidia-cuda-toolkit; \ - fi && \ - apt install -y -V -t bullseye-backports ${quiet} \ - meson && \ - pip3 install gi-docgen && \ - ln -fs /usr/local/bin/gi-docgen /usr/bin && \ - apt clean && \ - rm -rf /var/lib/apt/lists/* diff --git a/dev/tasks/linux-packages/package-task.rb b/dev/tasks/linux-packages/package-task.rb index 3a9e5e48b4585..6bcc397277e3a 100644 --- a/dev/tasks/linux-packages/package-task.rb +++ b/dev/tasks/linux-packages/package-task.rb @@ -267,8 +267,6 @@ def apt_targets_default # Disable arm64 targets by default for now # because they require some setups on host. [ - "debian-bullseye", - # "debian-bullseye-arm64", "debian-bookworm", # "debian-bookworm-arm64", "debian-trixie", diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 146fa52fa958b..9d68e57c75dc8 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -451,8 +451,7 @@ tasks: {############################## Linux PKGS ####################################} -{% for target in ["debian-bullseye", - "debian-bookworm", +{% for target in ["debian-bookworm", "debian-trixie", "ubuntu-focal", "ubuntu-jammy", diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R index 0af41888b95b7..def4d35f825be 100644 --- a/r/tools/nixlibs.R +++ b/r/tools/nixlibs.R @@ -386,9 +386,7 @@ distro <- function() { out$id <- tolower(out$id) # debian unstable & testing lsb_release `version` don't include numbers but we can map from pretty name if (is.null(out$version) || out$version %in% c("testing", "unstable")) { - if (grepl("bullseye", out$codename)) { - out$short_version <- "11" - } else if (grepl("bookworm", out$codename)) { + if (grepl("bookworm", out$codename)) { out$short_version <- "12" } } else if (out$id == "ubuntu") { From 6d0321554374523ae0633d6bfe42cdeeb3b5d145 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Sun, 12 May 2024 11:00:26 -0400 Subject: [PATCH 089/261] GH-41450: [R][CI] rhub/container follow ons (#41451) More CI changes: * GitHub Issue: #41450 (specifically use the rhub containers approach for clang sanitizer, remove some of our work arounds) * Remove CentOS 7 CI support for R Authored-by: Jonathan Keane Signed-off-by: Jonathan Keane --- .env | 3 --- .github/workflows/r.yml | 3 +-- ci/docker/linux-r.dockerfile | 3 --- ci/scripts/java_jni_manylinux_build.sh | 3 --- ci/scripts/r_docker_configure.sh | 20 ---------------- ci/scripts/r_sanitize.sh | 2 ++ ci/scripts/r_test.sh | 3 --- dev/tasks/r/azure.linux.yml | 1 - dev/tasks/r/github.packages.yml | 7 +++--- dev/tasks/tasks.yml | 13 ++-------- docker-compose.yml | 16 +++++-------- r/tools/test-nixlibs.R | 4 ---- r/tools/ubsan.supp | 1 + r/vignettes/install.Rmd | 33 -------------------------- 14 files changed, 15 insertions(+), 97 deletions(-) diff --git a/.env b/.env index ab2e4b4fbe7fb..27474b2c73199 100644 --- a/.env +++ b/.env @@ -86,9 +86,6 @@ ARROW_R_DEV=TRUE R_PRUNE_DEPS=FALSE TZ=UTC -# Any non-empty string will install devtoolset-${DEVTOOLSET_VERSION} -DEVTOOLSET_VERSION= - # Used through docker-compose.yml and serves as the default version for the # ci/scripts/install_vcpkg.sh script. Prefer to use short SHAs to keep the # docker tags more readable. diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index 8228aaad7ce37..aba77347659cd 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -192,12 +192,11 @@ jobs: fail-fast: false matrix: config: - - { org: "rhub", image: "ubuntu-gcc12", tag: "latest", devtoolset: "" } + - { org: "rhub", image: "ubuntu-gcc12", tag: "latest" } env: R_ORG: ${{ matrix.config.org }} R_IMAGE: ${{ matrix.config.image }} R_TAG: ${{ matrix.config.tag }} - DEVTOOLSET_VERSION: ${{ matrix.config.devtoolset }} steps: - name: Checkout Arrow uses: actions/checkout@3df4ab11eba7bda6032a0b82a6bb43b11571feac # v4.0.0 diff --git a/ci/docker/linux-r.dockerfile b/ci/docker/linux-r.dockerfile index d368a6629c587..7b7e989adc0d1 100644 --- a/ci/docker/linux-r.dockerfile +++ b/ci/docker/linux-r.dockerfile @@ -27,9 +27,6 @@ ENV R_BIN=${r_bin} ARG r_dev=FALSE ENV ARROW_R_DEV=${r_dev} -ARG devtoolset_version= -ENV DEVTOOLSET_VERSION=${devtoolset_version} - ARG r_prune_deps=FALSE ENV R_PRUNE_DEPS=${r_prune_deps} diff --git a/ci/scripts/java_jni_manylinux_build.sh b/ci/scripts/java_jni_manylinux_build.sh index da4987d307ce4..4921ce170b7a9 100755 --- a/ci/scripts/java_jni_manylinux_build.sh +++ b/ci/scripts/java_jni_manylinux_build.sh @@ -35,9 +35,6 @@ echo "=== Clear output directories and leftovers ===" rm -rf ${build_dir} echo "=== Building Arrow C++ libraries ===" -devtoolset_version=$(rpm -qa "devtoolset-*-gcc" --queryformat %{VERSION} | \ - grep -o "^[0-9]*") -devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}" : ${ARROW_ACERO:=ON} export ARROW_ACERO : ${ARROW_BUILD_TESTS:=ON} diff --git a/ci/scripts/r_docker_configure.sh b/ci/scripts/r_docker_configure.sh index 52db2e6df6611..8a962fe576cbb 100755 --- a/ci/scripts/r_docker_configure.sh +++ b/ci/scripts/r_docker_configure.sh @@ -67,26 +67,6 @@ sloppiness = include_file_ctime hash_dir = false" >> ~/.ccache/ccache.conf fi -# Special hacking to try to reproduce quirks on centos using non-default build -# tooling. -if [[ -n "$DEVTOOLSET_VERSION" ]]; then - $PACKAGE_MANAGER install -y centos-release-scl - $PACKAGE_MANAGER install -y "devtoolset-$DEVTOOLSET_VERSION" - - # Enable devtoolset here so that `which gcc` finds the right compiler below - source /opt/rh/devtoolset-${DEVTOOLSET_VERSION}/enable - - # Build images which require the devtoolset don't have CXX17 variables - # set as the system compiler doesn't support C++17 - if [ ! "`{R_BIN} CMD config CXX17`" ]; then - mkdir -p ~/.R - echo "CC = $(which gcc) -fPIC" >> ~/.R/Makevars - echo "CXX17 = $(which g++) -fPIC" >> ~/.R/Makevars - echo "CXX17STD = -std=c++17" >> ~/.R/Makevars - echo "CXX17FLAGS = ${CXX11FLAGS}" >> ~/.R/Makevars - fi -fi - if [ -f "${ARROW_SOURCE_HOME}/ci/scripts/r_install_system_dependencies.sh" ]; then "${ARROW_SOURCE_HOME}/ci/scripts/r_install_system_dependencies.sh" fi diff --git a/ci/scripts/r_sanitize.sh b/ci/scripts/r_sanitize.sh index f7ed07f0c864b..fb3e9a5836387 100755 --- a/ci/scripts/r_sanitize.sh +++ b/ci/scripts/r_sanitize.sh @@ -46,6 +46,8 @@ unset ARROW_R_DEV export ARROW_R_VERBOSE_TEST=TRUE export UBSAN_OPTIONS="print_stacktrace=1,suppressions=/arrow/r/tools/ubsan.supp" +# From the old rhub image https://github.com/r-hub/rhub-linux-builders/blob/master/fedora-clang-devel-san/Dockerfile +export ASAN_OPTIONS="alloc_dealloc_mismatch=0:detect_leaks=0:detect_odr_violation=0" # run tests pushd tests diff --git a/ci/scripts/r_test.sh b/ci/scripts/r_test.sh index 95a49ee83a79b..e13da45e2d296 100755 --- a/ci/scripts/r_test.sh +++ b/ci/scripts/r_test.sh @@ -48,9 +48,6 @@ if [ "$ARROW_USE_PKG_CONFIG" != "false" ]; then fi export _R_CHECK_COMPILATION_FLAGS_KNOWN_="${_R_CHECK_COMPILATION_FLAGS_KNOWN_} ${ARROW_R_CXXFLAGS}" -# These should generally be picked up, but are slightly wrong in rhub's containers it appears -# https://github.com/r-hub/containers/pull/63 -export _R_CHECK_COMPILATION_FLAGS_KNOWN_="${_R_CHECK_COMPILATION_FLAGS_KNOWN_} -Wno-parentheses -Werror=format-security -Wp,-D_FORTIFY_SOURCE=3" if [ "$ARROW_R_DEV" = "TRUE" ]; then # These are sometimes used in the Arrow C++ build and are not a problem diff --git a/dev/tasks/r/azure.linux.yml b/dev/tasks/r/azure.linux.yml index e26a59629fa1a..28893a81728c3 100644 --- a/dev/tasks/r/azure.linux.yml +++ b/dev/tasks/r/azure.linux.yml @@ -38,7 +38,6 @@ jobs: export R_ORG={{ r_org }} export R_IMAGE={{ r_image }} export R_TAG={{ r_tag }} - export DEVTOOLSET_VERSION={{ devtoolset_version|default("") }} export R_CUSTOM_CCACHE={{ r_custom_ccache|default("false") }} docker-compose pull --ignore-pull-failures r docker-compose build r diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml index 9ca7e59a957de..41d8b230f8bf4 100644 --- a/dev/tasks/r/github.packages.yml +++ b/dev/tasks/r/github.packages.yml @@ -299,14 +299,14 @@ jobs: # choosing a binary on this OS. If libarrow_binary is TRUE, we're on # an OS that is not in the allowlist, so we have to opt-in to use the # binary. Other env vars used in r_docker_configure.sh can be added - # here (like devtoolset) and wired up in the later steps. + # here and wired up in the later steps. - {image: "rhub/ubuntu-clang", libarrow_binary: "TRUE"} # fedora-clang-devel cannot use binaries bc of libc++ (uncomment to see the error) # - {image: "rhub/fedora-clang-devel", libarrow_binary: "TRUE"} - {image: "rhub/ubuntu-release"} # currently ubuntu-22.04 - {image: "rocker/r-ver:4.0.0"} # ubuntu-20.04 - - {image: "rstudio/r-base:4.1-focal"} # ubuntu-20.04 - - {image: "rstudio/r-base:4.2-centos7", devtoolset: "8"} + - {image: "rstudio/r-base:4.1-focal"} + - {image: "rstudio/r-base:4.2-jammy"} - {image: "rstudio/r-base:4.3-noble"} steps: # Get the arrow checkout just for the docker config scripts @@ -317,7 +317,6 @@ jobs: - name: Install system requirements env: ARROW_R_DEV: "TRUE" # To install curl/openssl in r_docker_configure.sh - DEVTOOLSET_VERSION: {{ '${{ matrix.config.devtoolset }}' }} shell: bash run: | # Make sure R is on the path for the R-hub devel versions (where RPREFIX is set in its dockerfile) diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 9d68e57c75dc8..5bf5037652dd9 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -1410,15 +1410,6 @@ tasks: GCC_VERSION: 12 image: ubuntu-r-only-r - test-r-rstudio-r-base-4.2-centos7-devtoolset-8: - ci: azure - template: r/azure.linux.yml - params: - r_org: rstudio - r_image: r-base - r_tag: 4.2-centos7 - devtoolset_version: 8 - test-r-minimal-build: ci: azure template: r/azure.linux.yml @@ -1436,13 +1427,13 @@ tasks: R_PRUNE_DEPS: TRUE image: ubuntu-r-sanitizer - test-fedora-r-clang-sanitizer: + test-r-clang-sanitizer: ci: github template: docker-tests/github.linux.yml params: env: R_PRUNE_DEPS: TRUE - image: fedora-r-clang-sanitizer + image: r-clang-sanitizer {% for go_version, staticcheck in [("1.21", "v0.4.7"), ("1.22", "latest")] %} test-debian-12-go-{{ go_version }}: diff --git a/docker-compose.yml b/docker-compose.yml index 7a4d455dfe723..a1d8f60a268d8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -162,7 +162,7 @@ x-hierarchy: - ubuntu-r-valgrind - ubuntu-swift - ubuntu-verify-rc - - fedora-r-clang-sanitizer + - r-clang-sanitizer - r - r-revdepcheck # helper services @@ -1472,7 +1472,6 @@ services: args: base: ${R_ORG}/${R_IMAGE}:${R_TAG} r_dev: ${ARROW_R_DEV} - devtoolset_version: ${DEVTOOLSET_VERSION} tz: ${TZ} r_prune_deps: ${R_PRUNE_DEPS} r_custom_ccache: ${R_CUSTOM_CCACHE} @@ -1484,7 +1483,6 @@ services: ARROW_R_DEV: ${ARROW_R_DEV} # To test for CRAN release, delete ^^ these two env vars so we download the Apache release ARROW_USE_PKG_CONFIG: "false" - devtoolset_version: ${DEVTOOLSET_VERSION} volumes: - .:/arrow:delegated command: > @@ -1517,19 +1515,17 @@ services: /bin/bash -c " /arrow/ci/scripts/r_sanitize.sh /arrow" - fedora-r-clang-sanitizer: - image: ${REPO}:r-rhub-fedora-clang-devel-latest + r-clang-sanitizer: + image: ${REPO}:r-rhub-clang-devel-latest build: context: . dockerfile: ci/docker/linux-r.dockerfile cache_from: - - ${REPO}:r-rhub-fedora-clang-devel-latest + - ${REPO}:r-rhub-clang-devel-latest args: - # TODO: change this to rhub/clang-asan - base: rhub/fedora-clang-devel-san + base: rhub/clang-asan r_dev: ${ARROW_R_DEV} - devtoolset_version: ${DEVTOOLSET_VERSION} - r_bin: RDsan + r_bin: R tz: ${TZ} r_prune_deps: ${R_PRUNE_DEPS} shm_size: *shm-size diff --git a/r/tools/test-nixlibs.R b/r/tools/test-nixlibs.R index 6996f234ced45..02e822c3420c8 100644 --- a/r/tools/test-nixlibs.R +++ b/r/tools/test-nixlibs.R @@ -158,10 +158,6 @@ test_that("check_allowlist", { }) test_that("find_latest_nightly()", { - skip_if( - getRversion() > "4.4.0", - "long last version components (>8) fail to max on r-devel" - ) tf <- tempfile() tf_uri <- paste0("file://", tf) on.exit(unlink(tf)) diff --git a/r/tools/ubsan.supp b/r/tools/ubsan.supp index ff88cf984136b..34854e79bcbf9 100644 --- a/r/tools/ubsan.supp +++ b/r/tools/ubsan.supp @@ -16,3 +16,4 @@ # under the License. vptr:include/c++/8/bits/shared_ptr_base.h +function:cleancall.c \ No newline at end of file diff --git a/r/vignettes/install.Rmd b/r/vignettes/install.Rmd index cc90c5ff08c60..c7b8251ccc99b 100644 --- a/r/vignettes/install.Rmd +++ b/r/vignettes/install.Rmd @@ -28,35 +28,6 @@ For `gcc`, this generally means version 7 or newer. Most contemporary Linux distributions have a new enough compiler; however, CentOS 7 is a notable exception, as it ships with gcc 4.8. -If you are on CentOS 7, to build arrow you will need to install a newer `devtoolset`, and you'll need to update R's Makevars to define the `CXX17` variables. This script installs `devtoolset-8` and configures R to be able to use C++17: - -``` -#!/usr/bin/env bash - -yum install -y centos-release-scl -yum install -y devtoolset-8 -# Optional: also install cloud storage dependencies, as described below -yum install -y libcurl-devel openssl-devel - -source /opt/rh/devtoolset-8/enable - -if [ ! `R CMD config CXX17` ]; then - mkdir -p ~/.R - echo "CC = $(which gcc) -fPIC" >> ~/.R/Makevars - echo "CXX17 = $(which g++) -fPIC" >> ~/.R/Makevars - echo "CXX17STD = -std=c++17" >> ~/.R/Makevars - echo "CXX17FLAGS = ${CXX11FLAGS}" >> ~/.R/Makevars -fi -``` - -Note that the C++17 compiler is only required at *build* time. You don't need -to enable the devtoolset every time you load the package. What's more, if you -install a binary package from RStudio Package Manager (see method 1a below), you -do not need to set up any of this. Likewise, if you `R CMD INSTALL --build` -arrow on a CentOS machine with the newer compilers, you can take the binary -package it produces and install it on any other CentOS machine without those -compilers. - ### Libraries Optional support for reading from cloud storage--AWS S3 and @@ -517,10 +488,6 @@ The install script should work everywhere, so if libarrow fails to compile, please [report an issue](https://issues.apache.org/jira/projects/ARROW/issues) so that we can improve the script. -### Known installation issues - -* On CentOS, building the package requires a more modern `devtoolset` than the default system compilers. See "System dependencies" above. - ## Contributing We are constantly working to make the installation process as painless as From 9a3973c9eec639de4750dcba334711a2a3c707a6 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Mon, 13 May 2024 06:45:45 +0900 Subject: [PATCH 090/261] GH-41626: [R][CI] Update OpenSUSE to 15.5 from 15.3 (#41627) ### Rationale for this change OpenSUSE 15.3 reached EOL and rstudio/r-builds dropped support for it: https://github.com/rstudio/r-builds/pull/177 ### What changes are included in this PR? Use `4.1-opensuse155` instead of `4.1-opensuse153`. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #41626 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- dev/tasks/tasks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index 5bf5037652dd9..d8e09ec2070bb 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -1358,7 +1358,7 @@ tasks: {% for r_org, r_image, r_tag in [("rhub", "ubuntu-release", "latest"), ("rocker", "r-ver", "latest"), ("rstudio", "r-base", "4.2-focal"), - ("rstudio", "r-base", "4.1-opensuse153")] %} + ("rstudio", "r-base", "4.1-opensuse155")] %} test-r-{{ r_org }}-{{ r_image }}-{{ r_tag }}: ci: azure template: r/azure.linux.yml From 0e9896dc54ca82876171521d206ad0906535572f Mon Sep 17 00:00:00 2001 From: Tai Le Manh <49281946+tlm365@users.noreply.github.com> Date: Mon, 13 May 2024 06:54:20 +0700 Subject: [PATCH 091/261] GH-40944: [Java] Implement TypeEqualsVisitor for StringView (#41606) ### Rationale for this change Resolves #40944 . ### What changes are included in this PR? ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #40944 Authored-by: Tai Le Manh Signed-off-by: David Li --- .../vector/compare/TypeEqualsVisitor.java | 2 +- .../vector/compare/TestTypeEqualsVisitor.java | 41 +++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java index 9bbe5c1b8997c..aaef161a563be 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/TypeEqualsVisitor.java @@ -88,7 +88,7 @@ public Boolean visit(BaseLargeVariableWidthVector left, Void value) { @Override public Boolean visit(BaseVariableWidthViewVector left, Void value) { - throw new UnsupportedOperationException("View vectors are not supported."); + return compareField(left.getField(), right.getField()); } @Override diff --git a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java index 62fa0336ea925..736b0f1b1aeac 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java @@ -20,6 +20,7 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; +import java.nio.charset.StandardCharsets; import java.util.HashMap; import java.util.Map; @@ -30,6 +31,8 @@ import org.apache.arrow.vector.Float8Vector; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.ViewVarBinaryVector; +import org.apache.arrow.vector.ViewVarCharVector; import org.apache.arrow.vector.complex.DenseUnionVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.StructVector; @@ -176,4 +179,42 @@ public void testDenseUnionTypeEquals() { assertFalse(typeVisitor.equals(vector1)); } } + + @Test + public void testStringViewTypeEquals() { + try (final ViewVarCharVector varchar1 = new ViewVarCharVector("varchar1", allocator); + final ViewVarCharVector varchar2 = new ViewVarCharVector("varchar2", allocator); + final ViewVarBinaryVector binary = new ViewVarBinaryVector("binary", allocator)) { + final int valueCount = 2; + final byte[] str0 = "apache".getBytes(StandardCharsets.UTF_8); + final byte[] str1 = "arrow".getBytes(StandardCharsets.UTF_8); + + // add elements for varchar1 + varchar1.allocateNew(48, valueCount); + varchar1.set(0, str0); + varchar1.set(1, str1); + varchar1.setValueCount(valueCount); + + // add elements for varchar2 in a difference order + varchar2.allocateNew(48, valueCount); + varchar2.set(0, str1); + varchar2.set(1, str0); + varchar2.setValueCount(valueCount); + + // add elements for binary + binary.allocateNew(48, valueCount); + binary.set(0, str0); + binary.set(1, str1); + binary.setValueCount(valueCount); + + // compare ignore check name + TypeEqualsVisitor visitor = new TypeEqualsVisitor(varchar1, /* check name */ false, /* check meta data */ true); + assertTrue(visitor.equals(varchar2)); + assertFalse(visitor.equals(binary)); + + // if we check names, the types should be different + visitor = new TypeEqualsVisitor(varchar1, /* check name */ true, /* check meta data */ true); + assertFalse(visitor.equals(varchar2)); + } + } } From 2552c26c9625f8f0c538a520d26c11d13ce3b48d Mon Sep 17 00:00:00 2001 From: Gang Wu Date: Mon, 13 May 2024 10:37:36 +0800 Subject: [PATCH 092/261] MINOR: [C++][ORC][CMake] Fix ORC CMake args to use LZ4_STATIC_LIB (#41632) ### Rationale for this change In the ORC_CMAKE_ARGS, we use `LZ4_STATIC_LIBRARY` but the Apache ORC library expects [`LZ4_STATIC_LIB`](https://github.com/apache/orc/blob/08aaebc371927e6bb9a0f19c7cc90478200e3b6f/cmake_modules/ThirdpartyToolchain.cmake#L313) ### What changes are included in this PR? Switch `LZ4_STATIC_LIBRARY` to `LZ4_STATIC_LIB` in the ORC_CMAKE_ARGS. ### Are these changes tested? Pass CIs. ### Are there any user-facing changes? No. Authored-by: Gang Wu Signed-off-by: Sutou Kouhei --- cpp/cmake_modules/ThirdpartyToolchain.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 2102a7fdcdd37..c24442dcb8749 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -4522,7 +4522,7 @@ macro(build_orc) "-DSNAPPY_HOME=${ORC_SNAPPY_ROOT}" "-DSNAPPY_LIBRARY=$" "-DLZ4_LIBRARY=$" - "-DLZ4_STATIC_LIBRARY=$" + "-DLZ4_STATIC_LIB=$" "-DLZ4_INCLUDE_DIR=${ORC_LZ4_ROOT}/include" "-DSNAPPY_INCLUDE_DIR=${ORC_SNAPPY_INCLUDE_DIR}" "-DZSTD_HOME=${ORC_ZSTD_ROOT}" From a0c0ffdeac94fc3e1bfcaf0c4b0260f0b4a7717b Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Mon, 13 May 2024 18:00:15 +0900 Subject: [PATCH 093/261] GH-41587: [Docs][Python] Remove duplicate contents (#41588) ### Rationale for this change Remove duplicate contents ### What changes are included in this PR? - Remove duplicate contents - Remove `,` for consistency ### Are these changes tested? Yes ### Are there any user-facing changes? No * GitHub Issue: #41587 Authored-by: Hyunseok Seo Signed-off-by: AlenkaF --- docs/source/python/api/arrays.rst | 4 ++-- docs/source/python/api/compute.rst | 3 --- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/docs/source/python/api/arrays.rst b/docs/source/python/api/arrays.rst index e6f6c3dbbd3d1..aefed00b3d2e0 100644 --- a/docs/source/python/api/arrays.rst +++ b/docs/source/python/api/arrays.rst @@ -63,8 +63,8 @@ may expose data type-specific methods or properties. FixedSizeBinaryArray LargeBinaryArray LargeStringArray - BinaryViewArray, - StringViewArray, + BinaryViewArray + StringViewArray Time32Array Time64Array Date32Array diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst index ae48578a1bd61..f2ac6bd1e1226 100644 --- a/docs/source/python/api/compute.rst +++ b/docs/source/python/api/compute.rst @@ -540,7 +540,6 @@ Compute Options AssumeTimezoneOptions CastOptions CountOptions - CountOptions CumulativeSumOptions DayOfWeekOptions DictionaryEncodeOptions @@ -566,7 +565,6 @@ Compute Options RoundToMultipleOptions RunEndEncodeOptions ScalarAggregateOptions - ScalarAggregateOptions SelectKOptions SetLookupOptions SliceOptions @@ -578,7 +576,6 @@ Compute Options StructFieldOptions TakeOptions TDigestOptions - TDigestOptions TrimOptions VarianceOptions WeekOptions From 875e4df48e29ee76f06a4b90e8a6a0cc8f93743b Mon Sep 17 00:00:00 2001 From: mwish Date: Mon, 13 May 2024 21:52:30 +0800 Subject: [PATCH 094/261] GH-41361: [C++][Parquet] Optimize DelimitRecords by batch execution when max_rep_level > 1 (#41362) ### Rationale for this change We uses Parquet to store nested types. When doing benchmarks, the nested types spend half time on `DelimitRecords`. The flamegraph can be seen in the issue. It can be reproduced when running `parquet-column-reader-benchmark` This patch optimize DelimitRecords by batch execution. The previous code is slow because of branching. This patch changes branch to batch execution. This could be a bit slower on some scenerio, but generally it makes DelimitRecords faster. ### What changes are included in this PR? Change logic of DelimitRecords to batch execution. ### Are these changes tested? Already has tests ### Are there any user-facing changes? Yes * GitHub Issue: #41361 Lead-authored-by: mwish Co-authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- cpp/src/parquet/column_reader.cc | 71 ++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 30 deletions(-) diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index eae7ac4252735..a4794c564733a 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -1675,44 +1675,55 @@ class TypedRecordReader : public TypedColumnReaderImpl, // // \return Number of records delimited int64_t DelimitRecords(int64_t num_records, int64_t* values_seen) { - int64_t values_to_read = 0; + if (ARROW_PREDICT_FALSE(num_records == 0 || levels_position_ == levels_written_)) { + *values_seen = 0; + return 0; + } int64_t records_read = 0; - - const int16_t* def_levels = this->def_levels() + levels_position_; - const int16_t* rep_levels = this->rep_levels() + levels_position_; - + const int16_t* const rep_levels = this->rep_levels(); + const int16_t* const def_levels = this->def_levels(); ARROW_DCHECK_GT(this->max_rep_level_, 0); - - // Count logical records and number of values to read - while (levels_position_ < levels_written_) { - const int16_t rep_level = *rep_levels++; - if (rep_level == 0) { - // If at_record_start_ is true, we are seeing the start of a record - // for the second time, such as after repeated calls to - // DelimitRecords. In this case we must continue until we find - // another record start or exhausting the ColumnChunk - if (!at_record_start_) { - // We've reached the end of a record; increment the record count. - ++records_read; - if (records_read == num_records) { - // We've found the number of records we were looking for. Set - // at_record_start_ to true and break - at_record_start_ = true; - break; - } - } - } + // If at_record_start_ is true, we are seeing the start of a record + // for the second time, such as after repeated calls to + // DelimitRecords. In this case we must continue until we find + // another record start or exhausting the ColumnChunk + int64_t level = levels_position_; + if (at_record_start_) { + ARROW_DCHECK_EQ(0, rep_levels[levels_position_]); + ++levels_position_; // We have decided to consume the level at this position; therefore we // must advance until we find another record boundary at_record_start_ = false; + } - const int16_t def_level = *def_levels++; - if (def_level == this->max_def_level_) { - ++values_to_read; + // Count logical records and number of non-null values to read + ARROW_DCHECK(!at_record_start_); + // Scan repetition levels to find record end + while (levels_position_ < levels_written_) { + // We use an estimated batch size to simplify branching and + // improve performance in the common case. This might slow + // things down a bit if a single long record remains, though. + int64_t stride = + std::min(levels_written_ - levels_position_, num_records - records_read); + const int64_t position_end = levels_position_ + stride; + for (int64_t i = levels_position_; i < position_end; ++i) { + records_read += rep_levels[i] == 0; + } + levels_position_ = position_end; + if (records_read == num_records) { + // Check last rep_level reaches the boundary and + // pop the last level. + ARROW_CHECK_EQ(rep_levels[levels_position_ - 1], 0); + --levels_position_; + // We've found the number of records we were looking for. Set + // at_record_start_ to true and break + at_record_start_ = true; + break; } - ++levels_position_; } - *values_seen = values_to_read; + // Scan definition levels to find number of physical values + *values_seen = std::count(def_levels + level, def_levels + levels_position_, + this->max_def_level_); return records_read; } From a715ea06b71ec206a987d7921264778e9954404b Mon Sep 17 00:00:00 2001 From: Gavin Murrison <2135106+voidstar69@users.noreply.github.com> Date: Mon, 13 May 2024 16:38:14 +0100 Subject: [PATCH 095/261] GH-38692: [C#] Implement ICollection on scalar arrays (#41539) ### What changes are included in this PR? This PR makes the following array types support ICollection : - PrimitiveArray - BooleanArray - Date32Array - Date64Array - Time32Array - Time64Array - BinaryArray - TimestampArray - StringArray ### Are these changes tested? Yes ### Are there any user-facing changes? No Closes #38692 * GitHub Issue: #38692 Authored-by: voidstar69 Signed-off-by: Curt Hagenlocher --- csharp/src/Apache.Arrow/Arrays/BinaryArray.cs | 27 +++- .../src/Apache.Arrow/Arrays/BooleanArray.cs | 29 +++- csharp/src/Apache.Arrow/Arrays/Date32Array.cs | 63 +++++++- csharp/src/Apache.Arrow/Arrays/Date64Array.cs | 63 +++++++- .../src/Apache.Arrow/Arrays/IntervalArray.cs | 2 +- .../src/Apache.Arrow/Arrays/PrimitiveArray.cs | 37 ++++- .../Arrays/PrimitiveArrayBuilder.cs | 2 +- csharp/src/Apache.Arrow/Arrays/StringArray.cs | 27 +++- csharp/src/Apache.Arrow/Arrays/Time32Array.cs | 27 +++- csharp/src/Apache.Arrow/Arrays/Time64Array.cs | 27 +++- .../src/Apache.Arrow/Arrays/TimestampArray.cs | 27 +++- .../src/Apache.Arrow/Ipc/ArrowStreamWriter.cs | 2 +- .../Apache.Arrow.IntegrationTest/JsonFile.cs | 6 +- .../Apache.Arrow.Tests/ArrowArrayTests.cs | 145 +++++++++++++++++- .../Apache.Arrow.Tests/Date32ArrayTests.cs | 2 +- .../Extensions/DateTimeOffsetExtensions.cs | 2 - .../Apache.Arrow.Tests/UnionArrayTests.cs | 2 +- 17 files changed, 450 insertions(+), 40 deletions(-) diff --git a/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs b/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs index 1bd4035d5b9da..0c84fa2be23d9 100644 --- a/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs @@ -22,7 +22,7 @@ namespace Apache.Arrow { - public class BinaryArray : Array, IReadOnlyList + public class BinaryArray : Array, IReadOnlyList, ICollection { public class Builder : BuilderBase { @@ -380,5 +380,30 @@ IEnumerator IEnumerable.GetEnumerator() } IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable)this).GetEnumerator(); + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(byte[]? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(byte[]? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(byte[] item) + { + for (int index = 0; index < Length; index++) + { + if (GetBytes(index).SequenceEqual(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(byte[][] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetBytes(srcIndex).ToArray(); + } + } } } diff --git a/csharp/src/Apache.Arrow/Arrays/BooleanArray.cs b/csharp/src/Apache.Arrow/Arrays/BooleanArray.cs index e9c5f8979e48f..19d4d0b7ed564 100644 --- a/csharp/src/Apache.Arrow/Arrays/BooleanArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/BooleanArray.cs @@ -21,7 +21,7 @@ namespace Apache.Arrow { - public class BooleanArray: Array, IReadOnlyList + public class BooleanArray: Array, IReadOnlyList, ICollection { public class Builder : IArrowArrayBuilder { @@ -188,7 +188,7 @@ public bool GetBoolean(int index) public bool? GetValue(int index) { return IsNull(index) - ? (bool?)null + ? null : BitUtility.GetBit(ValueBuffer.Span, index + Offset); } @@ -205,5 +205,30 @@ public bool GetBoolean(int index) } IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable)this).GetEnumerator(); + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(bool? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(bool? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(bool? item) + { + for (int index = 0; index < Length; index++) + { + if (GetValue(index).Equals(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(bool?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetValue(srcIndex); + } + } } } diff --git a/csharp/src/Apache.Arrow/Arrays/Date32Array.cs b/csharp/src/Apache.Arrow/Arrays/Date32Array.cs index 6ab4986f573e2..55864e89e2eb3 100644 --- a/csharp/src/Apache.Arrow/Arrays/Date32Array.cs +++ b/csharp/src/Apache.Arrow/Arrays/Date32Array.cs @@ -23,9 +23,9 @@ namespace Apache.Arrow /// The class holds an array of dates in the Date32 format, where each date is /// stored as the number of days since the dawn of (UNIX) time. /// - public class Date32Array : PrimitiveArray, IReadOnlyList + public class Date32Array : PrimitiveArray, IReadOnlyList, ICollection #if NET6_0_OR_GREATER - , IReadOnlyList + , IReadOnlyList, ICollection #endif { private static readonly DateTime _epochDate = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Unspecified); @@ -40,10 +40,9 @@ public class Builder : DateArrayBuilder { private class DateBuilder : PrimitiveArrayBuilder { - protected override Date32Array Build( - ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, - int length, int nullCount, int offset) => - new Date32Array(valueBuffer, nullBitmapBuffer, length, nullCount, offset); + protected override Date32Array Build(ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, int length, + int nullCount, int offset) => + new(valueBuffer, nullBitmapBuffer, length, nullCount, offset); } /// @@ -149,6 +148,31 @@ public Date32Array(ArrayData data) yield return GetDateOnly(index); }; } + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(DateOnly? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(DateOnly? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(DateOnly? item) + { + for (int index = 0; index < Length; index++) + { + if (GetDateOnly(index).Equals(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(DateOnly?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetDateOnly(srcIndex); + } + } #endif int IReadOnlyCollection.Count => Length; @@ -160,7 +184,32 @@ public Date32Array(ArrayData data) for (int index = 0; index < Length; index++) { yield return GetDateTime(index); - }; + } + } + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(DateTime? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(DateTime? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(DateTime? item) + { + for (int index = 0; index < Length; index++) + { + if (GetDateTime(index).Equals(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(DateTime?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetDateTime(srcIndex); + } } } } diff --git a/csharp/src/Apache.Arrow/Arrays/Date64Array.cs b/csharp/src/Apache.Arrow/Arrays/Date64Array.cs index 43e698e10b25c..77538ce59ffae 100644 --- a/csharp/src/Apache.Arrow/Arrays/Date64Array.cs +++ b/csharp/src/Apache.Arrow/Arrays/Date64Array.cs @@ -24,9 +24,9 @@ namespace Apache.Arrow /// stored as the number of milliseconds since the dawn of (UNIX) time, excluding leap seconds, in multiples of /// 86400000. /// - public class Date64Array : PrimitiveArray, IReadOnlyList + public class Date64Array : PrimitiveArray, IReadOnlyList, ICollection #if NET6_0_OR_GREATER - , IReadOnlyList + , IReadOnlyList, ICollection #endif { private const long MillisecondsPerDay = 86400000; @@ -45,10 +45,9 @@ public class Builder : DateArrayBuilder { private class DateBuilder : PrimitiveArrayBuilder { - protected override Date64Array Build( - ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, - int length, int nullCount, int offset) => - new Date64Array(valueBuffer, nullBitmapBuffer, length, nullCount, offset); + protected override Date64Array Build(ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, int length, + int nullCount, int offset) => + new(valueBuffer, nullBitmapBuffer, length, nullCount, offset); } /// @@ -151,6 +150,31 @@ public Date64Array(ArrayData data) yield return GetDateOnly(index); }; } + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(DateOnly? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(DateOnly? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(DateOnly? item) + { + for (int index = 0; index < Length; index++) + { + if (GetDateOnly(index).Equals(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(DateOnly?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetDateOnly(srcIndex); + } + } #endif int IReadOnlyCollection.Count => Length; @@ -162,7 +186,32 @@ public Date64Array(ArrayData data) for (int index = 0; index < Length; index++) { yield return GetDateTime(index); - }; + } + } + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(DateTime? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(DateTime? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(DateTime? item) + { + for (int index = 0; index < Length; index++) + { + if (GetDateTime(index).Equals(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(DateTime?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetDateTime(srcIndex); + } } } } diff --git a/csharp/src/Apache.Arrow/Arrays/IntervalArray.cs b/csharp/src/Apache.Arrow/Arrays/IntervalArray.cs index de4fc42b4cf92..3949af877b0c5 100644 --- a/csharp/src/Apache.Arrow/Arrays/IntervalArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/IntervalArray.cs @@ -31,7 +31,7 @@ internal static class IntervalArray } public abstract class IntervalArray : PrimitiveArray - where T : struct + where T : struct, IEquatable { protected IntervalArray(ArrayData data) : base(data) diff --git a/csharp/src/Apache.Arrow/Arrays/PrimitiveArray.cs b/csharp/src/Apache.Arrow/Arrays/PrimitiveArray.cs index 0456c5cc65ba4..05d659b5270ad 100644 --- a/csharp/src/Apache.Arrow/Arrays/PrimitiveArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/PrimitiveArray.cs @@ -20,8 +20,8 @@ namespace Apache.Arrow { - public abstract class PrimitiveArray : Array, IReadOnlyList - where T : struct + public abstract class PrimitiveArray : Array, IReadOnlyList, ICollection + where T : struct, IEquatable { protected PrimitiveArray(ArrayData data) : base(data) @@ -40,7 +40,7 @@ protected PrimitiveArray(ArrayData data) { throw new ArgumentOutOfRangeException(nameof(index)); } - return IsValid(index) ? Values[index] : (T?)null; + return IsValid(index) ? Values[index] : null; } public IList ToList(bool includeNulls = false) @@ -86,5 +86,36 @@ IEnumerator IEnumerable.GetEnumerator() yield return IsValid(index) ? Values[index] : null; } } + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(T? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(T? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(T? item) + { + if (item == null) + { + return NullCount > 0; + } + + ReadOnlySpan values = Values; + while (values.Length > 0) + { + int index = Values.IndexOf(item.Value); + if (index < 0 || IsValid(index)) { return index >= 0; } + values = values.Slice(index + 1); + } + return false; + } + + void ICollection.CopyTo(T?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetValue(srcIndex); + } + } } } diff --git a/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs b/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs index 67fe46633c18f..ae02173fb0df4 100644 --- a/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs +++ b/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs @@ -20,7 +20,7 @@ namespace Apache.Arrow { - public abstract class PrimitiveArrayBuilder : IArrowArrayBuilder + public abstract class PrimitiveArrayBuilder : IArrowArrayBuilder where TTo : struct where TArray : IArrowArray where TBuilder : class, IArrowArrayBuilder diff --git a/csharp/src/Apache.Arrow/Arrays/StringArray.cs b/csharp/src/Apache.Arrow/Arrays/StringArray.cs index a3ec596adc7ba..ab44805d8d1e9 100644 --- a/csharp/src/Apache.Arrow/Arrays/StringArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/StringArray.cs @@ -22,7 +22,7 @@ namespace Apache.Arrow { - public class StringArray: BinaryArray, IReadOnlyList + public class StringArray: BinaryArray, IReadOnlyList, ICollection { public static readonly Encoding DefaultEncoding = Encoding.UTF8; @@ -164,5 +164,30 @@ IEnumerator IEnumerable.GetEnumerator() } IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable)this).GetEnumerator(); + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(string item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(string item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(string item) + { + for (int index = 0; index < Length; index++) + { + if (GetString(index) == item) + return true; + } + + return false; + } + + void ICollection.CopyTo(string[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetString(srcIndex); + } + } } } diff --git a/csharp/src/Apache.Arrow/Arrays/Time32Array.cs b/csharp/src/Apache.Arrow/Arrays/Time32Array.cs index e9c2d7a4d9b28..63c0898935ba5 100644 --- a/csharp/src/Apache.Arrow/Arrays/Time32Array.cs +++ b/csharp/src/Apache.Arrow/Arrays/Time32Array.cs @@ -26,7 +26,7 @@ namespace Apache.Arrow /// public class Time32Array : PrimitiveArray #if NET6_0_OR_GREATER - , IReadOnlyList + , IReadOnlyList, ICollection #endif { /// @@ -171,6 +171,31 @@ public Time32Array(ArrayData data) yield return GetTime(index); }; } + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(TimeOnly? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(TimeOnly? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(TimeOnly? item) + { + for (int index = 0; index < Length; index++) + { + if (GetTime(index).Equals(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(TimeOnly?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetTime(srcIndex); + } + } #endif } } diff --git a/csharp/src/Apache.Arrow/Arrays/Time64Array.cs b/csharp/src/Apache.Arrow/Arrays/Time64Array.cs index fc18dfb8bf726..5518462952050 100644 --- a/csharp/src/Apache.Arrow/Arrays/Time64Array.cs +++ b/csharp/src/Apache.Arrow/Arrays/Time64Array.cs @@ -26,7 +26,7 @@ namespace Apache.Arrow /// public class Time64Array : PrimitiveArray #if NET6_0_OR_GREATER - , IReadOnlyList + , IReadOnlyList, ICollection #endif { /// @@ -162,6 +162,31 @@ public Time64Array(ArrayData data) yield return GetTime(index); }; } + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(TimeOnly? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(TimeOnly? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(TimeOnly? item) + { + for (int index = 0; index < Length; index++) + { + if (GetTime(index).Equals(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(TimeOnly?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetTime(srcIndex); + } + } #endif } } diff --git a/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs b/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs index ccb656854a5df..b83860584707e 100644 --- a/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs @@ -21,7 +21,7 @@ namespace Apache.Arrow { - public class TimestampArray : PrimitiveArray, IReadOnlyList + public class TimestampArray : PrimitiveArray, IReadOnlyList, ICollection { private static readonly DateTimeOffset s_epoch = new DateTimeOffset(1970, 1, 1, 0, 0, 0, 0, TimeSpan.Zero); @@ -157,5 +157,30 @@ public DateTimeOffset GetTimestampUnchecked(int index) yield return GetTimestamp(index); }; } + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(DateTimeOffset? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(DateTimeOffset? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(DateTimeOffset? item) + { + for (int index = 0; index < Length; index++) + { + if (GetTimestamp(index).Equals(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(DateTimeOffset?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetTimestamp(srcIndex); + } + } } } diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs index b11479c0d4460..c66569afeba85 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs @@ -148,7 +148,7 @@ public void VisitArray(IArrowArray array) public void Visit(MonthDayNanosecondIntervalArray array) => VisitPrimitiveArray(array); private void VisitPrimitiveArray(PrimitiveArray array) - where T : struct + where T : struct, IEquatable { _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); _buffers.Add(CreateSlicedBuffer(array.ValueBuffer, array.Offset, array.Length)); diff --git a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs index 31a5676f01315..7232f74b8bec6 100644 --- a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs +++ b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs @@ -908,8 +908,8 @@ private static byte[] ConvertHexStringToByteArray(string hexString) }; private void GenerateArray(Func createArray) + where T : struct, IEquatable where TArray : PrimitiveArray - where T : struct { ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); @@ -929,8 +929,8 @@ private void GenerateArray(Func(Func createArray, Func parse) + where T : struct, IEquatable where TArray : PrimitiveArray - where T : struct { ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); @@ -950,8 +950,8 @@ private void GenerateLongArray(Func(Func createArray, Func construct) + where T : struct, IEquatable where TArray : PrimitiveArray - where T : struct { ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); diff --git a/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs index 682ebec323dc0..d3032b8d4ac40 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs @@ -101,9 +101,9 @@ public void EnumerateArray() { var array = new Int64Array.Builder().Append(1).Append(2).Build(); - foreach(long? foo in (IEnumerable)array) + foreach(long? foo in array) { - Assert.InRange(foo.Value, 1, 2); + Assert.InRange(foo!.Value, 1, 2); } foreach (object foo in (IEnumerable)array) @@ -115,12 +115,145 @@ public void EnumerateArray() [Fact] public void ArrayAsReadOnlyList() { - Int64Array array = new Int64Array.Builder().Append(1).Append(2).Build(); - var readOnlyList = (IReadOnlyList)array; + TestArrayAsReadOnlyList([1, 2]); + TestArrayAsReadOnlyList([1, 2]); + TestArrayAsReadOnlyList([true, false]); + TestArrayAsReadOnlyList([DateTime.MinValue.Date, DateTime.MaxValue.Date]); + TestArrayAsReadOnlyList([DateTime.MinValue.Date, DateTime.MaxValue.Date]); + TestArrayAsReadOnlyList([DateTimeOffset.MinValue, DateTimeOffset.MinValue.AddYears(100)]); + +#if NET5_0_OR_GREATER + TestArrayAsReadOnlyList([DateOnly.MinValue, DateOnly.MaxValue]); + TestArrayAsReadOnlyList([DateOnly.MinValue, DateOnly.MaxValue]); + TestArrayAsReadOnlyList([TimeOnly.MinValue, TimeOnly.MinValue.AddHours(23)]); + TestArrayAsReadOnlyList([TimeOnly.MinValue, TimeOnly.MaxValue]); + TestArrayAsReadOnlyList([(Half)1.1, (Half)2.2f]); +#endif + } + + // Parameter 'values' must contain two distinct values + private static void TestArrayAsReadOnlyList(IReadOnlyList values) + where T : struct + where TArray : IArrowArray + where TArrayBuilder : IArrowArrayBuilder, new() + { + Assert.Equal(2, values.Count); + TArray array = new TArrayBuilder().Append(values[0]).AppendNull().Append(values[1]).Build(default); + Assert.NotNull(array); + var readOnlyList = (IReadOnlyList)array; Assert.Equal(array.Length, readOnlyList.Count); - Assert.Equal(readOnlyList[0], 1); - Assert.Equal(readOnlyList[1], 2); + Assert.Equal(3, readOnlyList.Count); + Assert.Equal(values[0], readOnlyList[0]); + Assert.Null(readOnlyList[1]); + Assert.Equal(values[1], readOnlyList[2]); + } + + [Fact] + public void ArrayAsCollection() + { + TestPrimitiveArrayAsCollection([1, 2, 3, 4]); + TestPrimitiveArrayAsCollection([1, 2, 3, 4]); + TestPrimitiveArrayAsCollection([true, true, true, false]); + TestPrimitiveArrayAsCollection([DateTime.MinValue.Date, DateTime.MaxValue.Date, DateTime.Today, DateTime.Today]); + TestPrimitiveArrayAsCollection([DateTime.MinValue.Date, DateTime.MaxValue.Date, DateTime.Today, DateTime.Today]); + TestPrimitiveArrayAsCollection([DateTimeOffset.MinValue, DateTimeOffset.MinValue.AddYears(100), DateTimeOffset.Now, DateTimeOffset.UtcNow]); + +#if NET5_0_OR_GREATER + TestPrimitiveArrayAsCollection([DateOnly.MinValue, DateOnly.MaxValue, DateOnly.FromDayNumber(1), DateOnly.FromDayNumber(2)]); + TestPrimitiveArrayAsCollection([DateOnly.MinValue, DateOnly.MaxValue, DateOnly.FromDayNumber(1), DateOnly.FromDayNumber(2)]); + TestPrimitiveArrayAsCollection([TimeOnly.MinValue, TimeOnly.MinValue.AddHours(23), TimeOnly.MinValue.AddHours(1), TimeOnly.MinValue.AddHours(2)]); + TestPrimitiveArrayAsCollection([TimeOnly.MinValue, TimeOnly.MaxValue, TimeOnly.MinValue.AddHours(1), TimeOnly.MinValue.AddHours(2)]); + TestPrimitiveArrayAsCollection([(Half)1.1, (Half)2.2f, (Half)3.3f, (Half)4.4f]); +#endif + + byte[][] byteArrs = [new byte[1], [], [255], new byte[2]]; + TestObjectArrayAsCollection(new BinaryArray.Builder().Append(byteArrs[0].AsEnumerable()).AppendNull().Append(byteArrs[1].AsEnumerable()).Append(byteArrs[0].AsEnumerable()).Build(), System.Array.Empty(), byteArrs); + + string[] strings = ["abc", "abd", "acd", "adc"]; + TestObjectArrayAsCollection(new StringArray.Builder().Append(strings[0]).AppendNull().Append(strings[1]).Append(strings[0]).Build(), null, strings); + } + + // Parameter 'values' must contain four values. The last value must be distinct from the rest. + private static void TestPrimitiveArrayAsCollection(IReadOnlyList values) + where T : struct + where TArray : IArrowArray, ICollection + where TArrayBuilder : IArrowArrayBuilder, new() + { + Assert.Equal(4, values.Count); + TArray array = new TArrayBuilder().Append(values[0]).AppendNull().Append(values[1]).Append(values[0]).Build(default); + Assert.NotNull(array); + var collection = (ICollection)array; + + Assert.Equal(array.Length, collection.Count); + Assert.Equal(4, collection.Count); + Assert.True(collection.IsReadOnly); + + Assert.Equal("Collection is read-only.", Assert.Throws(() => collection.Add(values[3])).Message); + Assert.Equal("Collection is read-only.", Assert.Throws(() => collection.Remove(values[3])).Message); + Assert.Equal("Collection is read-only.", Assert.Throws(collection.Clear).Message); + + Assert.True(collection.Contains(values[0])); + Assert.True(collection.Contains(values[1])); + Assert.True(collection.Contains(default)); + Assert.False(collection.Contains(values[3])); + + T sentinel = values[2]; + T?[] destArr = { sentinel, sentinel, sentinel, sentinel, sentinel, sentinel }; + collection.CopyTo(destArr, 1); + Assert.Equal(sentinel, destArr[0]); + Assert.Equal(values[0], destArr[1]); + Assert.Null(destArr[2]); + Assert.Equal(values[1], destArr[3]); + Assert.Equal(values[0], destArr[4]); + Assert.Equal(sentinel, destArr[0]); + } + + // Parameter 'values' must contain four values. The last value must be distinct from the rest. + private static void TestObjectArrayAsCollection(TArray array, T nullValue, IReadOnlyList values) + where T : class + where TArray : IArrowArray, ICollection + { + Assert.NotNull(array); + Assert.Equal(4, values.Count); + var collection = (ICollection)array; + + Assert.Equal(array.Length, collection.Count); + Assert.Equal(4, collection.Count); + Assert.True(collection.IsReadOnly); + + Assert.Equal("Collection is read-only.", Assert.Throws(() => collection.Add(values[3])).Message); + Assert.Equal("Collection is read-only.", Assert.Throws(() => collection.Remove(values[3])).Message); + Assert.Equal("Collection is read-only.", Assert.Throws(collection.Clear).Message); + + Assert.True(collection.Contains(values[0])); + Assert.True(collection.Contains(values[1])); + Assert.True(collection.Contains(default)); + Assert.False(collection.Contains(values[3])); + + T sentinel = values[2]; + T?[] destArr = { sentinel, sentinel, sentinel, sentinel, sentinel, sentinel }; + collection.CopyTo(destArr, 1); + Assert.Equal(sentinel, destArr[0]); + Assert.Equal(values[0], destArr[1]); + Assert.Equal(nullValue, destArr[2]); + Assert.Equal(values[1], destArr[3]); + Assert.Equal(values[0], destArr[4]); + Assert.Equal(sentinel, destArr[0]); + } + + [Fact] + public void ContainsDoesNotMatchDefaultValueInArrayWithNullValue() + { + Int64Array array = new Int64Array.Builder().Append(1).Append(2).AppendNull().Build(); + Assert.NotNull(array); + var collection = (ICollection)array; + + Assert.True(collection.Contains(1)); + Assert.True(collection.Contains(2)); + Assert.True(collection.Contains(default)); + // A null value is stored as a null bit in the null bitmap, and a default value in the value buffer. Check that we do not match the default value. + Assert.False(collection.Contains(0)); } [Fact] diff --git a/csharp/test/Apache.Arrow.Tests/Date32ArrayTests.cs b/csharp/test/Apache.Arrow.Tests/Date32ArrayTests.cs index 2a674b942c17b..6e4742cad06f2 100644 --- a/csharp/test/Apache.Arrow.Tests/Date32ArrayTests.cs +++ b/csharp/test/Apache.Arrow.Tests/Date32ArrayTests.cs @@ -131,7 +131,7 @@ public void AppendGivesUtcDate(DateTimeOffset dateTimeOffset) public class AppendDateOnly { [Theory] - [MemberData(nameof(GetDateOnlyData), MemberType = typeof(Date64ArrayTests))] + [MemberData(nameof(GetDateOnlyData), MemberType = typeof(Date32ArrayTests))] public void AppendDateGivesSameDate(DateOnly date) { // Arrange diff --git a/csharp/test/Apache.Arrow.Tests/Extensions/DateTimeOffsetExtensions.cs b/csharp/test/Apache.Arrow.Tests/Extensions/DateTimeOffsetExtensions.cs index 4375c39cdfaf6..01809735d14c9 100644 --- a/csharp/test/Apache.Arrow.Tests/Extensions/DateTimeOffsetExtensions.cs +++ b/csharp/test/Apache.Arrow.Tests/Extensions/DateTimeOffsetExtensions.cs @@ -14,8 +14,6 @@ // limitations under the License. using System; -using System.Collections.Generic; -using System.Text; namespace Apache.Arrow.Tests { diff --git a/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs b/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs index 712a87a252b6c..c603ef63a4d3e 100644 --- a/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs +++ b/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs @@ -110,7 +110,7 @@ private static void CompareValue(UnionArray originalArray, int originalIndex, Un } private static void CompareFieldValue(byte typeId, UnionArray originalArray, int originalIndex, UnionArray slicedArray, int sliceIndex) - where T: struct + where T : struct, IEquatable where TArray : PrimitiveArray { if (originalArray is DenseUnionArray denseOriginalArray) From c555488c56c8de6d8020c8460b3b87081f7fb49d Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Tue, 14 May 2024 01:15:20 +0100 Subject: [PATCH 096/261] GH-41630: [Benchmarking] Fix out-of-source build in benchmarks (#41631) ### Rationale for this change Broken benchmarks after #41455 ### What changes are included in this PR? Use /tmp/arrow as build dir. ### Are these changes tested? ### Are there any user-facing changes? * GitHub Issue: #41630 Authored-by: Jacob Wujciak-Jens Signed-off-by: Jacob Wujciak-Jens --- dev/conbench_envs/hooks.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/dev/conbench_envs/hooks.sh b/dev/conbench_envs/hooks.sh index a77189764aed3..0745357d2c0d3 100755 --- a/dev/conbench_envs/hooks.sh +++ b/dev/conbench_envs/hooks.sh @@ -59,7 +59,8 @@ build_arrow_cpp() { } build_arrow_python() { - ci/scripts/python_build.sh $(pwd) $(pwd) + mkdir -p /tmp/arrow + ci/scripts/python_build.sh $(pwd) /tmp/arrow } build_arrow_r() { @@ -69,7 +70,8 @@ build_arrow_r() { } build_arrow_java() { - ci/scripts/java_build.sh $(pwd) $(pwd) + mkdir -p /tmp/arrow + ci/scripts/java_build.sh $(pwd) /tmp/arrow } install_archery() { From fc7c723babce0bb6aae3e2b9653296cdb508578d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 14 May 2024 09:25:09 +0900 Subject: [PATCH 097/261] MINOR: [Go] Bump golang.org/x/tools from 0.20.0 to 0.21.0 in /go (#41639) Bumps [golang.org/x/tools](https://github.com/golang/tools) from 0.20.0 to 0.21.0.
Commits
  • cc29c91 go.mod: update golang.org/x dependencies
  • 397fef9 gopls/internal/protocol: add links to LSP spec
  • e2a352c internal/refactor/inline: extensible API
  • c16c816 go/analysis/passes/stdversion: test *.go < go.mod version
  • 629a7be go/analysis/analysistest: stricter errors and GOWORK setting
  • 4db1697 go/packages/packagestest: fold modules_111.go into modules.go
  • ccdef3c gopls/internal/golang: fix nil panic in InlayHint
  • 74c9cfe go/analysis: add Pass.ReadFile
  • 5ef4fc9 gopls/internal/golang/completion: fix the isEmptyInterface predicate
  • 77f691b internal/gcimporter: use Alias.Rhs, not unsafe hack
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=golang.org/x/tools&package-manager=go_modules&previous-version=0.20.0&new-version=0.21.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- go/go.mod | 6 +++--- go/go.sum | 16 ++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/go/go.mod b/go/go.mod index 7c14ddcf9e216..8fdfea3dbe5eb 100644 --- a/go/go.mod +++ b/go/go.mod @@ -37,7 +37,7 @@ require ( golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 golang.org/x/sync v0.7.0 golang.org/x/sys v0.20.0 - golang.org/x/tools v0.20.0 + golang.org/x/tools v0.21.0 golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 gonum.org/v1/gonum v0.15.0 google.golang.org/grpc v1.63.2 @@ -75,8 +75,8 @@ require ( github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.0 // indirect golang.org/x/mod v0.17.0 // indirect - golang.org/x/net v0.24.0 // indirect - golang.org/x/text v0.14.0 // indirect + golang.org/x/net v0.25.0 // indirect + golang.org/x/text v0.15.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de // indirect gopkg.in/yaml.v3 v3.0.1 // indirect modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 // indirect diff --git a/go/go.sum b/go/go.sum index 70e3a533d03f3..c2db1a72ccf2d 100644 --- a/go/go.sum +++ b/go/go.sum @@ -111,14 +111,14 @@ github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= -golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30= -golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M= +golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI= +golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 h1:LfspQV/FYTatPTr/3HzIcmiUFH7PGP+OQ6mgDYo3yuQ= golang.org/x/exp v0.0.0-20240222234643-814bf88cf225/go.mod h1:CxmFvTBINI24O/j8iY7H1xHzx2i4OsyguNBmN/uPtqc= golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA= golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= -golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w= -golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= +golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -126,10 +126,10 @@ golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/tools v0.20.0 h1:hz/CVckiOxybQvFw6h7b/q80NTr9IUQb4s1IIzW7KNY= -golang.org/x/tools v0.20.0/go.mod h1:WvitBU7JJf6A4jOdg4S1tviW9bhUxkgeCui/0JHctQg= +golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/tools v0.21.0 h1:qc0xYgIbsSDt9EyWz05J5wfa7LOVW0YTLOXrqdLAWIw= +golang.org/x/tools v0.21.0/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU= golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= gonum.org/v1/gonum v0.15.0 h1:2lYxjRbTYyxkJxlhC+LvJIx3SsANPdRybu1tGj9/OrQ= From fd84ec0b1a6bc5345de089e01cc9e8d235c458b6 Mon Sep 17 00:00:00 2001 From: Konstantin Malanchev Date: Tue, 14 May 2024 05:20:45 -0300 Subject: [PATCH 098/261] GH-39129 [Python] pa.array: add check for byte-swapped numpy arrays inside python objects (#41549) ### What changes are included in this PR? This PR introduces a check to verify if the dtype of the input numpy array is byte-swapped. If it is, a not-implemented exception is raised. This precaution prevents the data from being cast incorrectly as if it were in the correct byte order, which would lead to wrong data values. ### Are these changes tested? I added a new test to check if not-implemented exception is raised - for both old (primitive types) and new (composed types) code. ### Are there any user-facing changes? No changes in API, but old code which gave incorrect results now would fail with a not-implemented exception * GitHub Issue: #39129 Authored-by: Konstantin Malanchev Signed-off-by: Joris Van den Bossche --- .../src/arrow/python/python_to_arrow.cc | 4 ++++ python/pyarrow/tests/test_array.py | 24 +++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc index 79da47567bf24..a2a325fde8dbd 100644 --- a/python/pyarrow/src/arrow/python/python_to_arrow.cc +++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc @@ -874,6 +874,10 @@ class PyListConverter : public ListConverter { if (PyArray_NDIM(ndarray) != 1) { return Status::Invalid("Can only convert 1-dimensional array values"); } + if (PyArray_ISBYTESWAPPED(ndarray)) { + // TODO + return Status::NotImplemented("Byte-swapped arrays not supported"); + } const int64_t size = PyArray_SIZE(ndarray); RETURN_NOT_OK(AppendTo(this->list_type_, size)); RETURN_NOT_OK(this->list_builder_->ValidateOverflow(size)); diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index dbe29c5730758..f1f946ecc7dfb 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3920,3 +3920,27 @@ def test_list_view_slice(list_view_type): j = sliced_array.offsets[1].as_py() assert sliced_array[0].as_py() == sliced_array.values[i:j].to_pylist() == [4] + + +@pytest.mark.parametrize('numpy_native_dtype', ['u2', 'i4', 'f8']) +def test_swapped_byte_order_fails(numpy_native_dtype): + # ARROW-39129 + + numpy_swapped_dtype = np.dtype(numpy_native_dtype).newbyteorder() + np_arr = np.arange(10, dtype=numpy_swapped_dtype) + + # Primitive type array, type is inferred from the numpy array + with pytest.raises(pa.ArrowNotImplementedError): + pa.array(np_arr) + + # Primitive type array, type is explicitly provided + with pytest.raises(pa.ArrowNotImplementedError): + pa.array(np_arr, type=pa.float64()) + + # List type array + with pytest.raises(pa.ArrowNotImplementedError): + pa.array([np_arr]) + + # Struct type array + with pytest.raises(pa.ArrowNotImplementedError): + pa.StructArray.from_arrays([np_arr], names=['a']) From d7c22601e7046bdcdc3b59eeb82be6ead2c96460 Mon Sep 17 00:00:00 2001 From: a-reich <73507369+a-reich@users.noreply.github.com> Date: Tue, 14 May 2024 07:47:55 -0400 Subject: [PATCH 099/261] GH-41464: [Python] Fix StructArray.sort() for by=None (#41495) ### Rationale for this change Closes issue https://github.com/apache/arrow/issues/41464. Fix `StructArray.sort` method's `by` param to work in the case of `by=None` which was documented to mean sort by all fields (the default), but would raise an exception. ### What changes are included in this PR? * Add a unit test with by=None in `test_struct_array_sort` that fails on main * Fix the sort method ### Are these changes tested? yes ### Are there any user-facing changes? yes * GitHub Issue: #41464 Authored-by: a-reich Signed-off-by: Joris Van den Bossche --- python/pyarrow/array.pxi | 7 +++---- python/pyarrow/tests/test_array.py | 8 ++++++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 946c82b258241..406830ad4dd69 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -3920,12 +3920,11 @@ cdef class StructArray(Array): result : StructArray """ if by is not None: - tosort = self._flattened_field(by) + tosort, sort_keys = self._flattened_field(by), [("", order)] else: - tosort = self + tosort, sort_keys = self, [(field.name, order) for field in self.type] indices = _pc().sort_indices( - tosort, - options=_pc().SortOptions(sort_keys=[("", order)], **kwargs) + tosort, options=_pc().SortOptions(sort_keys=sort_keys, **kwargs) ) return self.take(indices) diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index f1f946ecc7dfb..b89e0ace157af 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3536,6 +3536,14 @@ def test_struct_array_sort(): {"a": 5, "b": "foo"}, ] + sorted_arr = arr.sort() + assert sorted_arr.to_pylist() == [ + {"a": 5, "b": "foo"}, + {"a": 7, "b": "bar"}, + {"a": 7, "b": "car"}, + {"a": 35, "b": "foobar"}, + ] + arr_with_nulls = pa.StructArray.from_arrays([ pa.array([5, 7, 7, 35], type=pa.int64()), pa.array(["foo", "car", "bar", "foobar"]) From e6ab174e20137d62b33e4373f5fbd3c435948036 Mon Sep 17 00:00:00 2001 From: Rossi Sun Date: Tue, 14 May 2024 20:56:49 +0800 Subject: [PATCH 100/261] GH-41329: [C++][Gandiva] Fix gandiva cache size env var (#41330) ### Rationale for this change Gandiva cache size validity checks are not robust enough (the negativity test is broken), and they are not currently tested. ### What changes are included in this PR? 1. Fix checking gandiva cache size env var. 2. Make cache size static so it only gets evaluated once. 3. Add test cases. 4. Enrich the description in the document about this env var. ### Are these changes tested? UT included. ### Are there any user-facing changes? None. * GitHub Issue: #41329 Lead-authored-by: Ruoxi Sun Co-authored-by: Rossi Sun Co-authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- cpp/src/gandiva/cache.cc | 47 +++++++++++++++--------- cpp/src/gandiva/cache.h | 15 +++++++- cpp/src/gandiva/cache_test.cc | 68 ++++++++++++++++++++++++++++++++++- docs/source/cpp/env_vars.rst | 4 +++ 4 files changed, 116 insertions(+), 18 deletions(-) diff --git a/cpp/src/gandiva/cache.cc b/cpp/src/gandiva/cache.cc index a1333ccdc5d43..2358b08c82424 100644 --- a/cpp/src/gandiva/cache.cc +++ b/cpp/src/gandiva/cache.cc @@ -20,26 +20,41 @@ #include "arrow/result.h" #include "arrow/util/io_util.h" #include "arrow/util/logging.h" +#include "arrow/util/value_parsing.h" namespace gandiva { -static const size_t DEFAULT_CACHE_SIZE = 5000; - -int GetCapacity() { - size_t capacity = DEFAULT_CACHE_SIZE; - auto maybe_env_cache_size = ::arrow::internal::GetEnvVar("GANDIVA_CACHE_SIZE"); - if (maybe_env_cache_size.ok()) { - const auto env_cache_size = *std::move(maybe_env_cache_size); - if (!env_cache_size.empty()) { - capacity = std::atol(env_cache_size.c_str()); - if (capacity <= 0) { - ARROW_LOG(WARNING) << "Invalid cache size provided in GANDIVA_CACHE_SIZE. " - << "Using default cache size: " << DEFAULT_CACHE_SIZE; - capacity = DEFAULT_CACHE_SIZE; - } - } +constexpr auto kCacheCapacityEnvVar = "GANDIVA_CACHE_SIZE"; +constexpr auto kDefaultCacheSize = 5000; + +namespace internal { +int GetCacheCapacityFromEnvVar() { + auto maybe_env_value = ::arrow::internal::GetEnvVar(kCacheCapacityEnvVar); + if (!maybe_env_value.ok()) { + return kDefaultCacheSize; + } + const auto env_value = *std::move(maybe_env_value); + if (env_value.empty()) { + return kDefaultCacheSize; + } + int capacity = 0; + bool ok = ::arrow::internal::ParseValue<::arrow::Int32Type>( + env_value.c_str(), env_value.size(), &capacity); + if (!ok || capacity <= 0) { + ARROW_LOG(WARNING) << "Invalid cache size provided in " << kCacheCapacityEnvVar + << ". Using default cache size: " << kDefaultCacheSize; + return kDefaultCacheSize; } - return static_cast(capacity); + return capacity; +} +} // namespace internal + +// Deprecated in 17.0.0. Use GetCacheCapacity instead. +int GetCapacity() { return GetCacheCapacity(); } + +int GetCacheCapacity() { + static const int capacity = internal::GetCacheCapacityFromEnvVar(); + return capacity; } void LogCacheSize(size_t capacity) { diff --git a/cpp/src/gandiva/cache.h b/cpp/src/gandiva/cache.h index 7cff9b02692ae..c19dbb7a0e30e 100644 --- a/cpp/src/gandiva/cache.h +++ b/cpp/src/gandiva/cache.h @@ -20,14 +20,27 @@ #include #include +#include "arrow/util/macros.h" #include "gandiva/lru_cache.h" #include "gandiva/visibility.h" namespace gandiva { +namespace internal { +// Only called once by GetCacheCapacity(). +// Do the actual work of getting the cache capacity from env var. +// Also makes the testing easier. +GANDIVA_EXPORT +int GetCacheCapacityFromEnvVar(); +} // namespace internal + +ARROW_DEPRECATED("Deprecated in 17.0.0. Use GetCacheCapacity instead.") GANDIVA_EXPORT int GetCapacity(); +GANDIVA_EXPORT +int GetCacheCapacity(); + GANDIVA_EXPORT void LogCacheSize(size_t capacity); @@ -36,7 +49,7 @@ class Cache { public: explicit Cache(size_t capacity) : cache_(capacity) { LogCacheSize(capacity); } - Cache() : Cache(GetCapacity()) {} + Cache() : Cache(GetCacheCapacity()) {} ValueType GetObjectCode(const KeyType& cache_key) { std::optional result; diff --git a/cpp/src/gandiva/cache_test.cc b/cpp/src/gandiva/cache_test.cc index a146707079fa6..96cf4a12e587a 100644 --- a/cpp/src/gandiva/cache_test.cc +++ b/cpp/src/gandiva/cache_test.cc @@ -16,10 +16,14 @@ // under the License. #include "gandiva/cache.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/util/io_util.h" +#include "arrow/util/logging.h" #include namespace gandiva { + class TestCacheKey { public: explicit TestCacheKey(int value) : value_(value) {} @@ -38,5 +42,67 @@ TEST(TestCache, TestGetPut) { ASSERT_EQ(cache.GetObjectCode(TestCacheKey(2)), "world"); } -TEST(TestCache, TestGetCacheCapacity) { ASSERT_EQ(GetCapacity(), 5000); } +namespace { +constexpr auto cache_capacity_env_var = "GANDIVA_CACHE_SIZE"; +constexpr auto default_cache_capacity = 5000; +} // namespace + +TEST(TestCache, TestGetCacheCapacityDefault) { + ASSERT_EQ(GetCacheCapacity(), default_cache_capacity); +} + +TEST(TestCache, TestGetCacheCapacityEnvVar) { + using ::arrow::EnvVarGuard; + + // Empty. + { + EnvVarGuard guard(cache_capacity_env_var, ""); + ASSERT_EQ(internal::GetCacheCapacityFromEnvVar(), default_cache_capacity); + } + + // Non-number. + { + EnvVarGuard guard(cache_capacity_env_var, "invalid"); + ASSERT_EQ(internal::GetCacheCapacityFromEnvVar(), default_cache_capacity); + } + + // Number with invalid suffix. + { + EnvVarGuard guard(cache_capacity_env_var, "42MB"); + ASSERT_EQ(internal::GetCacheCapacityFromEnvVar(), default_cache_capacity); + } + + // Valid positive number. + { + EnvVarGuard guard(cache_capacity_env_var, "42"); + ASSERT_EQ(internal::GetCacheCapacityFromEnvVar(), 42); + } + + // Int max. + { + auto str = std::to_string(std::numeric_limits::max()); + EnvVarGuard guard(cache_capacity_env_var, str.c_str()); + ASSERT_EQ(internal::GetCacheCapacityFromEnvVar(), std::numeric_limits::max()); + } + + // Zero. + { + EnvVarGuard guard(cache_capacity_env_var, "0"); + ASSERT_EQ(internal::GetCacheCapacityFromEnvVar(), default_cache_capacity); + } + + // Negative number. + { + EnvVarGuard guard(cache_capacity_env_var, "-1"); + ASSERT_EQ(internal::GetCacheCapacityFromEnvVar(), default_cache_capacity); + } + + // Over int max. + { + auto str = std::to_string(static_cast(std::numeric_limits::max()) + 1); + EnvVarGuard guard(cache_capacity_env_var, str.c_str()); + ASSERT_EQ(internal::GetCacheCapacityFromEnvVar(), default_cache_capacity); + } +} + } // namespace gandiva diff --git a/docs/source/cpp/env_vars.rst b/docs/source/cpp/env_vars.rst index 116c151824c75..0a082b0a5d859 100644 --- a/docs/source/cpp/env_vars.rst +++ b/docs/source/cpp/env_vars.rst @@ -181,6 +181,10 @@ that changing their value later will have an effect. The number of entries to keep in the Gandiva JIT compilation cache. The cache is in-memory and does not persist across processes. + The default cache size is 5000. The value of this environment variable + should be a positive integer and should not exceed the maximum value + of int32. Otherwise the default value is used. + .. envvar:: HADOOP_HOME The path to the Hadoop installation. From ada965ff8b93320105937f76815cb6ce6e5c855e Mon Sep 17 00:00:00 2001 From: Rossi Sun Date: Wed, 15 May 2024 00:03:24 +0800 Subject: [PATCH 101/261] GH-33484: [C++][Compute] Implement `Grouper::Reset` (#41352) ### Rationale for this change Recently I've been working on some improvement for `Grouper` and I found adding `Reset` function could be beneficial. Then I trace down to #33484 from a TODO in code. Here comes this PR. ### What changes are included in this PR? Add `Reset` function for all the concrete `Grouper` implementations, and eliminate the recreation of `Grouper` in `AnyKeysSegmenter`. Also add more `RowSegmenter` cases covering `AnyKeysSegmenter`. ### Are these changes tested? Yes. Legacy UTs should cover it well. Also added some new UTs. ### Are there any user-facing changes? None. * GitHub Issue: #33484 Lead-authored-by: Ruoxi Sun Co-authored-by: Rossi Sun Co-authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- cpp/src/arrow/acero/hash_aggregate_test.cc | 172 ++++++++++++++++----- cpp/src/arrow/compute/row/grouper.cc | 43 ++++-- cpp/src/arrow/compute/row/grouper.h | 4 + 3 files changed, 168 insertions(+), 51 deletions(-) diff --git a/cpp/src/arrow/acero/hash_aggregate_test.cc b/cpp/src/arrow/acero/hash_aggregate_test.cc index 2626fd50379dd..d529f443319b9 100644 --- a/cpp/src/arrow/acero/hash_aggregate_test.cc +++ b/cpp/src/arrow/acero/hash_aggregate_test.cc @@ -592,6 +592,12 @@ void TestSegments(std::unique_ptr& segmenter, const ExecSpan& batc ASSERT_EQ(expected_segment, segment); offset = segment.offset + segment.length; } + // Assert next is the last (empty) segment. + ASSERT_OK_AND_ASSIGN(auto segment, segmenter->GetNextSegment(batch, offset)); + ASSERT_GE(segment.offset, batch.length); + ASSERT_EQ(segment.length, 0); + ASSERT_TRUE(segment.is_open); + ASSERT_TRUE(segment.extends); } Result> MakeGrouper(const std::vector& key_types) { @@ -682,48 +688,142 @@ TEST(RowSegmenter, Basics) { } TEST(RowSegmenter, NonOrdered) { - std::vector types = {int32()}; - auto batch = ExecBatchFromJSON(types, "[[1], [1], [2], [1], [2]]"); - ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); - TestSegments(segmenter, ExecSpan(batch), - {{0, 2, false, true}, - {2, 1, false, false}, - {3, 1, false, false}, - {4, 1, true, false}, - {5, 0, true, true}}); + { + std::vector types = {int32()}; + auto batch = ExecBatchFromJSON(types, "[[1], [1], [2], [1], [2]]"); + ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); + TestSegments(segmenter, ExecSpan(batch), + {{0, 2, false, true}, + {2, 1, false, false}, + {3, 1, false, false}, + {4, 1, true, false}, + {5, 0, true, true}}); + } + { + std::vector types = {int32(), int32()}; + auto batch = ExecBatchFromJSON(types, "[[1, 1], [1, 1], [2, 2], [1, 2], [2, 2]]"); + ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); + TestSegments(segmenter, ExecSpan(batch), + {{0, 2, false, true}, + {2, 1, false, false}, + {3, 1, false, false}, + {4, 1, true, false}, + {5, 0, true, true}}); + } } TEST(RowSegmenter, EmptyBatches) { - std::vector types = {int32()}; - std::vector batches = { - ExecBatchFromJSON(types, "[]"), ExecBatchFromJSON(types, "[]"), - ExecBatchFromJSON(types, "[[1]]"), ExecBatchFromJSON(types, "[]"), - ExecBatchFromJSON(types, "[[1]]"), ExecBatchFromJSON(types, "[]"), - ExecBatchFromJSON(types, "[[2], [2]]"), ExecBatchFromJSON(types, "[]"), - }; - ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); - TestSegments(segmenter, ExecSpan(batches[0]), {}); - TestSegments(segmenter, ExecSpan(batches[1]), {}); - TestSegments(segmenter, ExecSpan(batches[2]), {{0, 1, true, true}}); - TestSegments(segmenter, ExecSpan(batches[3]), {}); - TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, true}}); - TestSegments(segmenter, ExecSpan(batches[5]), {}); - TestSegments(segmenter, ExecSpan(batches[6]), {{0, 2, true, false}}); - TestSegments(segmenter, ExecSpan(batches[7]), {}); + { + std::vector types = {int32()}; + std::vector batches = { + ExecBatchFromJSON(types, "[]"), ExecBatchFromJSON(types, "[]"), + ExecBatchFromJSON(types, "[[1]]"), ExecBatchFromJSON(types, "[]"), + ExecBatchFromJSON(types, "[[1]]"), ExecBatchFromJSON(types, "[]"), + ExecBatchFromJSON(types, "[[2], [2]]"), ExecBatchFromJSON(types, "[]"), + }; + ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); + TestSegments(segmenter, ExecSpan(batches[0]), {}); + TestSegments(segmenter, ExecSpan(batches[1]), {}); + TestSegments(segmenter, ExecSpan(batches[2]), {{0, 1, true, true}}); + TestSegments(segmenter, ExecSpan(batches[3]), {}); + TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, true}}); + TestSegments(segmenter, ExecSpan(batches[5]), {}); + TestSegments(segmenter, ExecSpan(batches[6]), {{0, 2, true, false}}); + TestSegments(segmenter, ExecSpan(batches[7]), {}); + } + { + std::vector types = {int32(), int32()}; + std::vector batches = { + ExecBatchFromJSON(types, "[]"), + ExecBatchFromJSON(types, "[]"), + ExecBatchFromJSON(types, "[[1, 1]]"), + ExecBatchFromJSON(types, "[]"), + ExecBatchFromJSON(types, "[[1, 1]]"), + ExecBatchFromJSON(types, "[]"), + ExecBatchFromJSON(types, "[[2, 2], [2, 2]]"), + ExecBatchFromJSON(types, "[]"), + }; + ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); + TestSegments(segmenter, ExecSpan(batches[0]), {}); + TestSegments(segmenter, ExecSpan(batches[1]), {}); + TestSegments(segmenter, ExecSpan(batches[2]), {{0, 1, true, true}}); + TestSegments(segmenter, ExecSpan(batches[3]), {}); + TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, true}}); + TestSegments(segmenter, ExecSpan(batches[5]), {}); + TestSegments(segmenter, ExecSpan(batches[6]), {{0, 2, true, false}}); + TestSegments(segmenter, ExecSpan(batches[7]), {}); + } } TEST(RowSegmenter, MultipleSegments) { - std::vector types = {int32()}; - auto batch = ExecBatchFromJSON(types, "[[1], [1], [2], [5], [3], [3], [5], [5], [4]]"); - ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); - TestSegments(segmenter, ExecSpan(batch), - {{0, 2, false, true}, - {2, 1, false, false}, - {3, 1, false, false}, - {4, 2, false, false}, - {6, 2, false, false}, - {8, 1, true, false}, - {9, 0, true, true}}); + { + std::vector types = {int32()}; + auto batch = + ExecBatchFromJSON(types, "[[1], [1], [2], [5], [3], [3], [5], [5], [4]]"); + ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); + TestSegments(segmenter, ExecSpan(batch), + {{0, 2, false, true}, + {2, 1, false, false}, + {3, 1, false, false}, + {4, 2, false, false}, + {6, 2, false, false}, + {8, 1, true, false}, + {9, 0, true, true}}); + } + { + std::vector types = {int32(), int32()}; + auto batch = ExecBatchFromJSON( + types, + "[[1, 1], [1, 1], [2, 2], [5, 5], [3, 3], [3, 3], [5, 5], [5, 5], [4, 4]]"); + ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); + TestSegments(segmenter, ExecSpan(batch), + {{0, 2, false, true}, + {2, 1, false, false}, + {3, 1, false, false}, + {4, 2, false, false}, + {6, 2, false, false}, + {8, 1, true, false}, + {9, 0, true, true}}); + } +} + +TEST(RowSegmenter, MultipleSegmentsMultipleBatches) { + { + std::vector types = {int32()}; + std::vector batches = { + ExecBatchFromJSON(types, "[[1]]"), ExecBatchFromJSON(types, "[[1], [2]]"), + ExecBatchFromJSON(types, "[[5], [3]]"), + ExecBatchFromJSON(types, "[[3], [5], [5]]"), ExecBatchFromJSON(types, "[[4]]")}; + + ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); + TestSegments(segmenter, ExecSpan(batches[0]), {{0, 1, true, true}}); + TestSegments(segmenter, ExecSpan(batches[1]), + {{0, 1, false, true}, {1, 1, true, false}}); + TestSegments(segmenter, ExecSpan(batches[2]), + {{0, 1, false, false}, {1, 1, true, false}}); + TestSegments(segmenter, ExecSpan(batches[3]), + {{0, 1, false, true}, {1, 2, true, false}}); + TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, false}}); + } + { + std::vector types = {int32(), int32()}; + std::vector batches = { + ExecBatchFromJSON(types, "[[1, 1]]"), + ExecBatchFromJSON(types, "[[1, 1], [2, 2]]"), + ExecBatchFromJSON(types, "[[5, 5], [3, 3]]"), + ExecBatchFromJSON(types, "[[3, 3], [5, 5], [5, 5]]"), + ExecBatchFromJSON(types, "[[4, 4]]")}; + + ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); + TestSegments(segmenter, ExecSpan(batches[0]), {{0, 1, true, true}}); + TestSegments(segmenter, ExecSpan(batches[1]), + {{0, 1, false, true}, {1, 1, true, false}}); + TestSegments(segmenter, ExecSpan(batches[2]), + {{0, 1, false, false}, {1, 1, true, false}}); + TestSegments(segmenter, ExecSpan(batches[3]), + {{0, 1, false, true}, {1, 2, true, false}}); + TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, false}}); + } } namespace { diff --git a/cpp/src/arrow/compute/row/grouper.cc b/cpp/src/arrow/compute/row/grouper.cc index 756c70967ac6f..50ca20bd14f31 100644 --- a/cpp/src/arrow/compute/row/grouper.cc +++ b/cpp/src/arrow/compute/row/grouper.cc @@ -217,18 +217,18 @@ struct SimpleKeySegmenter : public BaseRowSegmenter { struct AnyKeysSegmenter : public BaseRowSegmenter { static Result> Make( const std::vector& key_types, ExecContext* ctx) { - ARROW_RETURN_NOT_OK(Grouper::Make(key_types, ctx)); // check types - return std::make_unique(key_types, ctx); + ARROW_ASSIGN_OR_RAISE(auto grouper, Grouper::Make(key_types, ctx)); // check types + return std::make_unique(key_types, ctx, std::move(grouper)); } - AnyKeysSegmenter(const std::vector& key_types, ExecContext* ctx) + AnyKeysSegmenter(const std::vector& key_types, ExecContext* ctx, + std::unique_ptr grouper) : BaseRowSegmenter(key_types), - ctx_(ctx), - grouper_(nullptr), + grouper_(std::move(grouper)), save_group_id_(kNoGroupId) {} Status Reset() override { - grouper_ = nullptr; + ARROW_RETURN_NOT_OK(grouper_->Reset()); save_group_id_ = kNoGroupId; return Status::OK(); } @@ -245,7 +245,6 @@ struct AnyKeysSegmenter : public BaseRowSegmenter { // first row of a new segment to see if it extends the previous segment. template Result MapGroupIdAt(const Batch& batch, int64_t offset) { - if (!grouper_) return kNoGroupId; ARROW_ASSIGN_OR_RAISE(auto datum, grouper_->Consume(batch, offset, /*length=*/1)); if (!datum.is_array()) { @@ -264,9 +263,6 @@ struct AnyKeysSegmenter : public BaseRowSegmenter { if (offset == batch.length) { return MakeSegment(batch.length, offset, 0, kEmptyExtends); } - // ARROW-18311: make Grouper support Reset() - // so it can be reset instead of recreated below - // // the group id must be computed prior to resetting the grouper, since it is compared // to save_group_id_, and after resetting the grouper produces incomparable group ids ARROW_ASSIGN_OR_RAISE(auto group_id, MapGroupIdAt(batch, offset)); @@ -276,7 +272,7 @@ struct AnyKeysSegmenter : public BaseRowSegmenter { return extends; }; // resetting drops grouper's group-ids, freeing-up memory for the next segment - ARROW_ASSIGN_OR_RAISE(grouper_, Grouper::Make(key_types_, ctx_)); // TODO: reset it + ARROW_RETURN_NOT_OK(grouper_->Reset()); // GH-34475: cache the grouper-consume result across invocations of GetNextSegment ARROW_ASSIGN_OR_RAISE(auto datum, grouper_->Consume(batch, offset)); if (datum.is_array()) { @@ -299,7 +295,6 @@ struct AnyKeysSegmenter : public BaseRowSegmenter { } private: - ExecContext* const ctx_; std::unique_ptr grouper_; group_id_t save_group_id_; }; @@ -354,6 +349,7 @@ struct GrouperNoKeysImpl : Grouper { RETURN_NOT_OK(builder->Finish(&array)); return std::move(array); } + Status Reset() override { return Status::OK(); } Result Consume(const ExecSpan& batch, int64_t offset, int64_t length) override { ARROW_ASSIGN_OR_RAISE(auto array, MakeConstantGroupIdArray(length, 0)); return Datum(array); @@ -419,6 +415,14 @@ struct GrouperImpl : public Grouper { return std::move(impl); } + Status Reset() override { + map_.clear(); + offsets_.clear(); + key_bytes_.clear(); + num_groups_ = 0; + return Status::OK(); + } + Result Consume(const ExecSpan& batch, int64_t offset, int64_t length) override { ARROW_RETURN_NOT_OK(CheckAndCapLengthForConsume(batch.length, offset, &length)); if (offset != 0 || length != batch.length) { @@ -595,7 +599,17 @@ struct GrouperFastImpl : public Grouper { return std::move(impl); } - ~GrouperFastImpl() { map_.cleanup(); } + Status Reset() override { + rows_.Clean(); + rows_minibatch_.Clean(); + map_.cleanup(); + RETURN_NOT_OK(map_.init(encode_ctx_.hardware_flags, ctx_->memory_pool())); + // TODO: It is now assumed that the dictionaries_ are identical to the first batch + // throughout the grouper's lifespan so no resetting is needed. But if we want to + // support different dictionaries for different batches, we need to reset the + // dictionaries_ here. + return Status::OK(); + } Result Consume(const ExecSpan& batch, int64_t offset, int64_t length) override { ARROW_RETURN_NOT_OK(CheckAndCapLengthForConsume(batch.length, offset, &length)); @@ -838,8 +852,7 @@ struct GrouperFastImpl : public Grouper { return out; } - static constexpr int log_minibatch_max_ = 10; - static constexpr int minibatch_size_max_ = 1 << log_minibatch_max_; + static constexpr int minibatch_size_max_ = arrow::util::MiniBatch::kMiniBatchLength; static constexpr int minibatch_size_min_ = 128; int minibatch_size_; diff --git a/cpp/src/arrow/compute/row/grouper.h b/cpp/src/arrow/compute/row/grouper.h index 628a9c14f3e44..a883fb938ddaf 100644 --- a/cpp/src/arrow/compute/row/grouper.h +++ b/cpp/src/arrow/compute/row/grouper.h @@ -109,6 +109,10 @@ class ARROW_EXPORT Grouper { static Result> Make(const std::vector& key_types, ExecContext* ctx = default_exec_context()); + /// Reset all intermediate state, make the grouper logically as just `Make`ed. + /// The underlying buffers, if any, may or may not be released though. + virtual Status Reset() = 0; + /// Consume a batch of keys, producing the corresponding group ids as an integer array, /// over a slice defined by an offset and length, which defaults to the batch length. /// Currently only uint32 indices will be produced, eventually the bit width will only From a4a5cf1fbe804f5b47184afe91b3c243e0487ab2 Mon Sep 17 00:00:00 2001 From: David Sisson Date: Tue, 14 May 2024 09:28:50 -0700 Subject: [PATCH 102/261] GH-34484: [Substrait] add an option to disable augmented fields (#41583) ### Rationale for this change Augmented fields interfere with the schema passing between nodes. When enabled they cause names/schema mismatching at the end of the plan. ### What changes are included in this PR? Adds an option to disable augmented fields (defaulting to adding them), connects it everywhere it is called, and disables it in ReadRel conversion. ### Are these changes tested? Yes. ### Are there any user-facing changes? There are no API related changes however this will allow Substrait plans that consume local files to work without requiring a project/emit relation after the read relation to remove the unexpected fields. * GitHub Issue: #34484 Authored-by: David Sisson Signed-off-by: Matt Topol --- cpp/src/arrow/acero/sink_node.cc | 1 + cpp/src/arrow/dataset/discovery_test.cc | 3 +- cpp/src/arrow/dataset/file_parquet_test.cc | 5 +- cpp/src/arrow/dataset/scanner.cc | 35 +++++--- cpp/src/arrow/dataset/scanner.h | 9 ++- cpp/src/arrow/dataset/scanner_test.cc | 12 ++- cpp/src/arrow/dataset/test_util_internal.h | 18 +++-- .../engine/substrait/relation_internal.cc | 1 + cpp/src/arrow/engine/substrait/serde_test.cc | 81 +++++++++++++++++++ 9 files changed, 138 insertions(+), 27 deletions(-) diff --git a/cpp/src/arrow/acero/sink_node.cc b/cpp/src/arrow/acero/sink_node.cc index 4ab6b4537de02..66f447aa87f11 100644 --- a/cpp/src/arrow/acero/sink_node.cc +++ b/cpp/src/arrow/acero/sink_node.cc @@ -423,6 +423,7 @@ class ConsumingSinkNode : public ExecNode, std::atomic backpressure_counter_ = 0; std::unique_ptr sequencer_; }; + static Result MakeTableConsumingSinkNode(ExecPlan* plan, std::vector inputs, const ExecNodeOptions& options) { diff --git a/cpp/src/arrow/dataset/discovery_test.cc b/cpp/src/arrow/dataset/discovery_test.cc index 92cec7f324963..981146b7999ef 100644 --- a/cpp/src/arrow/dataset/discovery_test.cc +++ b/cpp/src/arrow/dataset/discovery_test.cc @@ -144,7 +144,8 @@ class FileSystemDatasetFactoryTest : public DatasetFactoryTest { } options_ = std::make_shared(); options_->dataset_schema = schema; - ASSERT_OK_AND_ASSIGN(auto projection, ProjectionDescr::Default(*schema)); + ASSERT_OK_AND_ASSIGN(auto projection, ProjectionDescr::Default( + *schema, options_->add_augmented_fields)); SetProjection(options_.get(), std::move(projection)); ASSERT_OK_AND_ASSIGN(dataset_, factory_->Finish(schema)); ASSERT_OK_AND_ASSIGN(auto fragment_it, dataset_->GetFragments()); diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc b/cpp/src/arrow/dataset/file_parquet_test.cc index 76cd0af3b835f..bf626826d4d1b 100644 --- a/cpp/src/arrow/dataset/file_parquet_test.cc +++ b/cpp/src/arrow/dataset/file_parquet_test.cc @@ -330,8 +330,9 @@ TEST_F(TestParquetFileFormat, CachedMetadata) { // Read the file the first time, will read metadata auto options = std::make_shared(); options->filter = literal(true); - ASSERT_OK_AND_ASSIGN(auto projection_descr, - ProjectionDescr::FromNames({"x"}, *test_schema)); + ASSERT_OK_AND_ASSIGN( + auto projection_descr, + ProjectionDescr::FromNames({"x"}, *test_schema, options->add_augmented_fields)); options->projected_schema = projection_descr.schema; options->projection = projection_descr.expression; ASSERT_OK_AND_ASSIGN(auto generator, fragment->ScanBatchesAsync(options)); diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc index 18981d1451980..a856a792a264f 100644 --- a/cpp/src/arrow/dataset/scanner.cc +++ b/cpp/src/arrow/dataset/scanner.cc @@ -211,7 +211,8 @@ Status NormalizeScanOptions(const std::shared_ptr& scan_options, // create the projected schema only if the provided expressions // produces valid set of fields. ARROW_ASSIGN_OR_RAISE(auto projection_descr, - ProjectionDescr::Default(*projected_schema)); + ProjectionDescr::Default( + *projected_schema, scan_options->add_augmented_fields)); scan_options->projected_schema = std::move(projection_descr.schema); scan_options->projection = projection_descr.expression; ARROW_ASSIGN_OR_RAISE(scan_options->projection, @@ -220,7 +221,8 @@ Status NormalizeScanOptions(const std::shared_ptr& scan_options, // if projected_fields are not found, we default to creating the projected_schema // and projection from the dataset_schema. ARROW_ASSIGN_OR_RAISE(auto projection_descr, - ProjectionDescr::Default(*dataset_schema)); + ProjectionDescr::Default( + *dataset_schema, scan_options->add_augmented_fields)); scan_options->projected_schema = std::move(projection_descr.schema); scan_options->projection = projection_descr.expression; } @@ -231,7 +233,7 @@ Status NormalizeScanOptions(const std::shared_ptr& scan_options, ARROW_ASSIGN_OR_RAISE( auto projection_descr, ProjectionDescr::FromNames(scan_options->projected_schema->field_names(), - *dataset_schema)); + *dataset_schema, scan_options->add_augmented_fields)); scan_options->projection = projection_descr.expression; } @@ -730,7 +732,8 @@ Future AsyncScanner::CountRowsAsync(Executor* executor) { const auto options = std::make_shared(*scan_options_); ARROW_ASSIGN_OR_RAISE(auto empty_projection, ProjectionDescr::FromNames(std::vector(), - *scan_options_->dataset_schema)); + *scan_options_->dataset_schema, + scan_options_->add_augmented_fields)); SetProjection(options.get(), empty_projection); auto total = std::make_shared>(0); @@ -828,7 +831,8 @@ Result ProjectionDescr::FromExpressions( } Result ProjectionDescr::FromNames(std::vector names, - const Schema& dataset_schema) { + const Schema& dataset_schema, + bool add_augmented_fields) { std::vector exprs(names.size()); for (size_t i = 0; i < exprs.size(); ++i) { // If name isn't in schema, try finding it by dotted path. @@ -846,15 +850,19 @@ Result ProjectionDescr::FromNames(std::vector name } } auto fields = dataset_schema.fields(); - for (const auto& aug_field : kAugmentedFields) { - fields.push_back(aug_field); + if (add_augmented_fields) { + for (const auto& aug_field : kAugmentedFields) { + fields.push_back(aug_field); + } } return ProjectionDescr::FromExpressions(std::move(exprs), std::move(names), Schema(fields, dataset_schema.metadata())); } -Result ProjectionDescr::Default(const Schema& dataset_schema) { - return ProjectionDescr::FromNames(dataset_schema.field_names(), dataset_schema); +Result ProjectionDescr::Default(const Schema& dataset_schema, + bool add_augmented_fields) { + return ProjectionDescr::FromNames(dataset_schema.field_names(), dataset_schema, + add_augmented_fields); } void SetProjection(ScanOptions* options, ProjectionDescr projection) { @@ -899,7 +907,8 @@ const std::shared_ptr& ScannerBuilder::projected_schema() const { Status ScannerBuilder::Project(std::vector columns) { ARROW_ASSIGN_OR_RAISE( auto projection, - ProjectionDescr::FromNames(std::move(columns), *scan_options_->dataset_schema)); + ProjectionDescr::FromNames(std::move(columns), *scan_options_->dataset_schema, + scan_options_->add_augmented_fields)); SetProjection(scan_options_.get(), std::move(projection)); return Status::OK(); } @@ -1052,8 +1061,10 @@ Result MakeScanNode(acero::ExecPlan* plan, }); auto fields = scan_options->dataset_schema->fields(); - for (const auto& aug_field : kAugmentedFields) { - fields.push_back(aug_field); + if (scan_options->add_augmented_fields) { + for (const auto& aug_field : kAugmentedFields) { + fields.push_back(aug_field); + } } return acero::MakeExecNode( diff --git a/cpp/src/arrow/dataset/scanner.h b/cpp/src/arrow/dataset/scanner.h index 4479158ff20cc..d2de267897180 100644 --- a/cpp/src/arrow/dataset/scanner.h +++ b/cpp/src/arrow/dataset/scanner.h @@ -114,6 +114,9 @@ struct ARROW_DS_EXPORT ScanOptions { /// Note: This must be true in order for any readahead to happen bool use_threads = false; + /// If true the scanner will add augmented fields to the output schema. + bool add_augmented_fields = true; + /// Fragment-specific scan options. std::shared_ptr fragment_scan_options; @@ -287,10 +290,12 @@ struct ARROW_DS_EXPORT ProjectionDescr { /// \brief Create a default projection referencing fields in the dataset schema static Result FromNames(std::vector names, - const Schema& dataset_schema); + const Schema& dataset_schema, + bool add_augmented_fields = true); /// \brief Make a projection that projects every field in the dataset schema - static Result Default(const Schema& dataset_schema); + static Result Default(const Schema& dataset_schema, + bool add_augmented_fields = true); }; /// \brief Utility method to set the projection expression and schema diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc index fccfc80032d31..58bc9c8c0ea6b 100644 --- a/cpp/src/arrow/dataset/scanner_test.cc +++ b/cpp/src/arrow/dataset/scanner_test.cc @@ -1103,7 +1103,8 @@ TEST_P(TestScanner, ProjectionDefaults) { } // If we only specify a projection expression then infer the projected schema // from the projection expression - auto projection_desc = ProjectionDescr::FromNames({"i32"}, *schema_); + auto projection_desc = + ProjectionDescr::FromNames({"i32"}, *schema_, /*add_augmented_fields=*/true); { ARROW_SCOPED_TRACE("User only specifies projection"); options_->projection = projection_desc->expression; @@ -1148,7 +1149,8 @@ TEST_P(TestScanner, ProjectedScanNestedFromNames) { }); ASSERT_OK_AND_ASSIGN(auto descr, ProjectionDescr::FromNames({".struct.i32", "nested.right.f64"}, - *options_->dataset_schema)) + *options_->dataset_schema, + options_->add_augmented_fields)) SetProjection(options_.get(), std::move(descr)); auto batch_in = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_); auto batch_out = ConstantArrayGenerator::Zeroes( @@ -2106,7 +2108,8 @@ TEST(ScanOptions, TestMaterializedFields) { auto set_projection_from_names = [&opts](std::vector names) { ASSERT_OK_AND_ASSIGN(auto projection, ProjectionDescr::FromNames( - std::move(names), *opts->dataset_schema)); + std::move(names), *opts->dataset_schema, + opts->add_augmented_fields)); SetProjection(opts.get(), std::move(projection)); }; @@ -2160,7 +2163,8 @@ TEST(ScanOptions, TestMaterializedFields) { // project top-level field, filter nothing opts->filter = literal(true); ASSERT_OK_AND_ASSIGN(projection, - ProjectionDescr::FromNames({"nested"}, *opts->dataset_schema)); + ProjectionDescr::FromNames({"nested"}, *opts->dataset_schema, + opts->add_augmented_fields)); SetProjection(opts.get(), std::move(projection)); EXPECT_THAT(opts->MaterializedFields(), ElementsAre(FieldRef("nested"))); diff --git a/cpp/src/arrow/dataset/test_util_internal.h b/cpp/src/arrow/dataset/test_util_internal.h index de0519afac9e1..8195218b0cfe8 100644 --- a/cpp/src/arrow/dataset/test_util_internal.h +++ b/cpp/src/arrow/dataset/test_util_internal.h @@ -386,7 +386,8 @@ class DatasetFixtureMixin : public ::testing::Test { options_ = std::make_shared(); options_->dataset_schema = schema_; ASSERT_OK_AND_ASSIGN(auto projection, - ProjectionDescr::FromNames(schema_->field_names(), *schema_)); + ProjectionDescr::FromNames(schema_->field_names(), *schema_, + options_->add_augmented_fields)); SetProjection(options_.get(), std::move(projection)); SetFilter(literal(true)); } @@ -398,7 +399,8 @@ class DatasetFixtureMixin : public ::testing::Test { void SetProjectedColumns(std::vector column_names) { ASSERT_OK_AND_ASSIGN( auto projection, - ProjectionDescr::FromNames(std::move(column_names), *options_->dataset_schema)); + ProjectionDescr::FromNames(std::move(column_names), *options_->dataset_schema, + /*add_augmented_fields=*/true)); SetProjection(options_.get(), std::move(projection)); } @@ -502,7 +504,8 @@ class FileFormatFixtureMixin : public ::testing::Test { void SetSchema(std::vector> fields) { opts_->dataset_schema = schema(std::move(fields)); ASSERT_OK_AND_ASSIGN(auto projection, - ProjectionDescr::Default(*opts_->dataset_schema)); + ProjectionDescr::Default(*opts_->dataset_schema, + /*add_augmented_fields=*/true)); SetProjection(opts_.get(), std::move(projection)); } @@ -512,7 +515,8 @@ class FileFormatFixtureMixin : public ::testing::Test { void Project(std::vector names) { ASSERT_OK_AND_ASSIGN(auto projection, ProjectionDescr::FromNames( - std::move(names), *opts_->dataset_schema)); + std::move(names), *opts_->dataset_schema, + /*add_augmented_fields=*/true)); SetProjection(opts_.get(), std::move(projection)); } @@ -993,7 +997,8 @@ class FileFormatScanMixin : public FileFormatFixtureMixin, auto i64 = field("i64", int64()); this->opts_->dataset_schema = schema({i32, i32, i64}); ASSERT_RAISES(Invalid, - ProjectionDescr::FromNames({"i32"}, *this->opts_->dataset_schema)); + ProjectionDescr::FromNames({"i32"}, *this->opts_->dataset_schema, + /*add_augmented_fields=*/true)); } void TestScanWithPushdownNulls() { // Regression test for ARROW-15312 @@ -1933,7 +1938,8 @@ class WriteFileSystemDatasetMixin : public MakeFileSystemDatasetMixin { scan_options_->dataset_schema = dataset_->schema(); ASSERT_OK_AND_ASSIGN( auto projection, - ProjectionDescr::FromNames(source_schema_->field_names(), *dataset_->schema())); + ProjectionDescr::FromNames(source_schema_->field_names(), *dataset_->schema(), + scan_options_->add_augmented_fields)); SetProjection(scan_options_.get(), std::move(projection)); } diff --git a/cpp/src/arrow/engine/substrait/relation_internal.cc b/cpp/src/arrow/engine/substrait/relation_internal.cc index f15f1a5527b7b..7c462c418f81b 100644 --- a/cpp/src/arrow/engine/substrait/relation_internal.cc +++ b/cpp/src/arrow/engine/substrait/relation_internal.cc @@ -393,6 +393,7 @@ Result FromProto(const substrait::Rel& rel, const ExtensionSet& auto scan_options = std::make_shared(); scan_options->use_threads = true; + scan_options->add_augmented_fields = false; if (read.has_filter()) { ARROW_ASSIGN_OR_RAISE(scan_options->filter, diff --git a/cpp/src/arrow/engine/substrait/serde_test.cc b/cpp/src/arrow/engine/substrait/serde_test.cc index 3e80192377937..6762d1e045450 100644 --- a/cpp/src/arrow/engine/substrait/serde_test.cc +++ b/cpp/src/arrow/engine/substrait/serde_test.cc @@ -1064,6 +1064,86 @@ NamedTableProvider AlwaysProvideSameTable(std::shared_ptr
table) { }; } +TEST(Substrait, ExecReadRelWithLocalFiles) { + ASSERT_OK_AND_ASSIGN(std::string dir_string, + arrow::internal::GetEnvVar("PARQUET_TEST_DATA")); + + std::string substrait_json = R"({ + "relations": [ + { + "root": { + "input": { + "read": { + "common": { + "direct": {} + }, + "baseSchema": { + "names": [ + "f32", + "f64" + ], + "struct": { + "types": [ + { + "fp32": { + "nullability": "NULLABILITY_REQUIRED" + } + }, + { + "fp64": { + "nullability": "NULLABILITY_REQUIRED" + } + } + ], + "nullability": "NULLABILITY_REQUIRED" + } + }, + "localFiles": { + "items": [ + { + "uriFile": "file://[DIRECTORY_PLACEHOLDER]/byte_stream_split.zstd.parquet", + "parquet": {} + } + ] + } + } + }, + "names": [ + "f32", + "f64" + ] + } + } + ], + "version": { + "minorNumber": 42, + "producer": "my-producer" + } + })"; + const char* placeholder = "[DIRECTORY_PLACEHOLDER]"; + substrait_json.replace(substrait_json.find(placeholder), strlen(placeholder), + dir_string); + + ASSERT_OK_AND_ASSIGN(auto buf, + internal::SubstraitFromJSON("Plan", substrait_json, + /*ignore_unknown_fields=*/false)); + + ASSERT_OK_AND_ASSIGN(auto declarations, + DeserializePlans(*buf, acero::NullSinkNodeConsumer::Make)); + ASSERT_EQ(declarations.size(), 1); + acero::Declaration* decl = &declarations[0]; + ASSERT_EQ(decl->factory_name, "consuming_sink"); + ASSERT_OK_AND_ASSIGN(auto plan, acero::ExecPlan::Make()); + ASSERT_OK_AND_ASSIGN(auto sink_node, declarations[0].AddToPlan(plan.get())); + ASSERT_STREQ(sink_node->kind_name(), "ConsumingSinkNode"); + ASSERT_EQ(sink_node->num_inputs(), 1); + auto& prev_node = sink_node->inputs()[0]; + ASSERT_STREQ(prev_node->kind_name(), "SourceNode"); + + plan->StartProducing(); + ASSERT_FINISHES_OK(plan->finished()); +} + TEST(Substrait, RelWithHint) { ASSERT_OK_AND_ASSIGN(auto buf, internal::SubstraitFromJSON("Rel", R"({ @@ -2443,6 +2523,7 @@ TEST(SubstraitRoundTrip, BasicPlanEndToEnd) { auto scan_options = std::make_shared(); scan_options->projection = compute::project({}, {}); + scan_options->add_augmented_fields = false; const std::string filter_col_left = "shared"; const std::string filter_col_right = "distinct"; auto comp_left_value = compute::field_ref(filter_col_left); From 8f27e269cb4c9fc9b593177f30bf9a1ec6ef5cff Mon Sep 17 00:00:00 2001 From: Rossi Sun Date: Wed, 15 May 2024 00:40:53 +0800 Subject: [PATCH 103/261] GH-41149: [C++][Acero] Fix asof join race (#41614) ### Rationale for this change Sporadic asof join test failures have been frequently and annoyingly observed in pyarrow CI, as recorded in #40675 and #41149. Turns out the root causes are the same - a logical race (as opposed to physical race which can be detected by sanitizers). By injecting special delay in various places in asof join, as shown in https://github.com/zanmato1984/arrow/commit/ea3b24c5f7308fe42f60dad41f51dbcbc1a54929, the issue can be reproduced almost 100%. And I have put some descriptions in that commit to explain how the race happens. ### What changes are included in this PR? Eliminate the logical race of emptiness by combining multiple call-sites of `Empty()`. ### Are these changes tested? Include the UT to reproduce the issue. ### Are there any user-facing changes? None. **This PR contains a "Critical Fix".** In #40675 and #41149 , incorrect results are produced. * GitHub Issue: #41149 * Also closes #40675 Authored-by: Ruoxi Sun Signed-off-by: Antoine Pitrou --- cpp/src/arrow/acero/asof_join_node.cc | 73 +++++++++++++--------- cpp/src/arrow/acero/asof_join_node_test.cc | 54 ++++++++++++++++ 2 files changed, 98 insertions(+), 29 deletions(-) diff --git a/cpp/src/arrow/acero/asof_join_node.cc b/cpp/src/arrow/acero/asof_join_node.cc index 48cc83dd3d6a9..1d94467df9ee2 100644 --- a/cpp/src/arrow/acero/asof_join_node.cc +++ b/cpp/src/arrow/acero/asof_join_node.cc @@ -548,8 +548,10 @@ class InputState { // true when the queue is empty and, when memo may have future entries (the case of a // positive tolerance), when the memo is empty. // used when checking whether RHS is up to date with LHS. - bool CurrentEmpty() const { - return memo_.no_future_ ? Empty() : memo_.times_.empty() && Empty(); + // NOTE: The emptiness must be decided by a single call to Empty() in caller, due to the + // potential race with Push(), see GH-41614. + bool CurrentEmpty(bool empty) const { + return memo_.no_future_ ? empty : (memo_.times_.empty() && empty); } // in case memo may not have future entries (the case of a non-positive tolerance), @@ -650,13 +652,15 @@ class InputState { // timestamp, update latest_time and latest_ref_row to the value that immediately pass // the horizon. Update the memo-store with any entries or future entries so observed. // Returns true if updates were made, false if not. - Result AdvanceAndMemoize(OnType ts) { + // NOTE: The emptiness must be decided by a single call to Empty() in caller, due to the + // potential race with Push(), see GH-41614. + Result AdvanceAndMemoize(OnType ts, bool empty) { // Advance the right side row index until we reach the latest right row (for each key) // for the given left timestamp. DEBUG_SYNC(node_, "Advancing input ", index_, DEBUG_MANIP(std::endl)); // Check if already updated for TS (or if there is no latest) - if (Empty()) { // can't advance if empty and no future entries + if (empty) { // can't advance if empty and no future entries return memo_.no_future_ ? false : memo_.RemoveEntriesWithLesserTime(ts); } @@ -918,34 +922,46 @@ class CompositeTableBuilder { // guaranteeing this probability is below 1 in a billion. The fix is 128-bit hashing. // See ARROW-17653 class AsofJoinNode : public ExecNode { - // Advances the RHS as far as possible to be up to date for the current LHS timestamp - Result UpdateRhs() { + // A simple wrapper for the result of a single call to UpdateRhs(), identifying: + // 1) If any RHS has advanced. + // 2) If all RHS are up to date with LHS. + struct RhsUpdateState { + bool any_advanced; + bool all_up_to_date_with_lhs; + }; + // Advances the RHS as far as possible to be up to date for the current LHS timestamp, + // and checks if all RHS are up to date with LHS. The reason they have to be performed + // together is that they both depend on the emptiness of the RHS, which can be changed + // by Push() executing in another thread. + Result UpdateRhs() { auto& lhs = *state_.at(0); auto lhs_latest_time = lhs.GetLatestTime(); - bool any_updated = false; - for (size_t i = 1; i < state_.size(); ++i) { - ARROW_ASSIGN_OR_RAISE(bool advanced, state_[i]->AdvanceAndMemoize(lhs_latest_time)); - any_updated |= advanced; - } - return any_updated; - } - - // Returns false if RHS not up to date for LHS - bool IsUpToDateWithLhsRow() const { - auto& lhs = *state_[0]; - if (lhs.Empty()) return false; // can't proceed if nothing on the LHS - OnType lhs_ts = lhs.GetLatestTime(); + RhsUpdateState update_state{/*any_advanced=*/false, /*all_up_to_date_with_lhs=*/true}; for (size_t i = 1; i < state_.size(); ++i) { auto& rhs = *state_[i]; - if (!rhs.Finished()) { + + // Obtain RHS emptiness once for subsequent AdvanceAndMemoize() and CurrentEmpty(). + bool rhs_empty = rhs.Empty(); + // Obtain RHS current time here because AdvanceAndMemoize() can change the + // emptiness. + OnType rhs_current_time = rhs_empty ? OnType{} : rhs.GetLatestTime(); + + ARROW_ASSIGN_OR_RAISE(bool advanced, + rhs.AdvanceAndMemoize(lhs_latest_time, rhs_empty)); + update_state.any_advanced |= advanced; + + if (update_state.all_up_to_date_with_lhs && !rhs.Finished()) { // If RHS is finished, then we know it's up to date - if (rhs.CurrentEmpty()) - return false; // RHS isn't finished, but is empty --> not up to date - if (lhs_ts > rhs.GetCurrentTime()) - return false; // RHS isn't up to date (and not finished) + if (rhs.CurrentEmpty(rhs_empty)) { + // RHS isn't finished, but is empty --> not up to date + update_state.all_up_to_date_with_lhs = false; + } else if (lhs_latest_time > rhs_current_time) { + // RHS isn't up to date (and not finished) + update_state.all_up_to_date_with_lhs = false; + } } } - return true; + return update_state; } Result> ProcessInner() { @@ -963,20 +979,19 @@ class AsofJoinNode : public ExecNode { // If LHS is finished or empty then there's nothing we can do here if (lhs.Finished() || lhs.Empty()) break; - // Advance each of the RHS as far as possible to be up to date for the LHS timestamp - ARROW_ASSIGN_OR_RAISE(bool any_rhs_advanced, UpdateRhs()); + ARROW_ASSIGN_OR_RAISE(auto rhs_update_state, UpdateRhs()); // If we have received enough inputs to produce the next output batch // (decided by IsUpToDateWithLhsRow), we will perform the join and // materialize the output batch. The join is done by advancing through // the LHS and adding joined row to rows_ (done by Emplace). Finally, // input batches that are no longer needed are removed to free up memory. - if (IsUpToDateWithLhsRow()) { + if (rhs_update_state.all_up_to_date_with_lhs) { dst.Emplace(state_, tolerance_); ARROW_ASSIGN_OR_RAISE(bool advanced, lhs.Advance()); if (!advanced) break; // if we can't advance LHS, we're done for this batch } else { - if (!any_rhs_advanced) break; // need to wait for new data + if (!rhs_update_state.any_advanced) break; // need to wait for new data } } diff --git a/cpp/src/arrow/acero/asof_join_node_test.cc b/cpp/src/arrow/acero/asof_join_node_test.cc index d95d2aaad3643..051e280a4c53c 100644 --- a/cpp/src/arrow/acero/asof_join_node_test.cc +++ b/cpp/src/arrow/acero/asof_join_node_test.cc @@ -1678,5 +1678,59 @@ TEST(AsofJoinTest, BackpressureWithBatchesGen) { /*slow_r0=*/false); } +// Reproduction of GH-40675: A logical race between Process() and Push() that can be more +// easily observed with single small batch. +TEST(AsofJoinTest, RhsEmptinessRace) { + auto left_batch = ExecBatchFromJSON( + {int64(), utf8()}, R"([[1, "a"], [1, "b"], [5, "a"], [6, "b"], [7, "f"]])"); + auto right_batch = ExecBatchFromJSON( + {int64(), utf8(), float64()}, R"([[2, "a", 1.0], [9, "b", 3.0], [15, "g", 5.0]])"); + + Declaration left{ + "exec_batch_source", + ExecBatchSourceNodeOptions(schema({field("colA", int64()), field("col2", utf8())}), + {std::move(left_batch)})}; + Declaration right{ + "exec_batch_source", + ExecBatchSourceNodeOptions(schema({field("colB", int64()), field("col3", utf8()), + field("colC", float64())}), + {std::move(right_batch)})}; + AsofJoinNodeOptions asof_join_opts({{{"colA"}, {{"col2"}}}, {{"colB"}, {{"col3"}}}}, 1); + Declaration asof_join{ + "asofjoin", {std::move(left), std::move(right)}, std::move(asof_join_opts)}; + + ASSERT_OK_AND_ASSIGN(auto result, DeclarationToExecBatches(std::move(asof_join))); + + auto exp_batch = ExecBatchFromJSON( + {int64(), utf8(), float64()}, + R"([[1, "a", 1.0], [1, "b", null], [5, "a", null], [6, "b", null], [7, "f", null]])"); + AssertExecBatchesEqualIgnoringOrder(result.schema, {exp_batch}, result.batches); +} + +// Reproduction of GH-41149: Another case of the same root cause as GH-40675, but with +// empty "by" columns. +TEST(AsofJoinTest, RhsEmptinessRaceEmptyBy) { + auto left_batch = ExecBatchFromJSON({int64()}, R"([[1], [2], [3]])"); + auto right_batch = + ExecBatchFromJSON({utf8(), int64()}, R"([["Z", 2], ["B", 3], ["A", 4]])"); + + Declaration left{"exec_batch_source", + ExecBatchSourceNodeOptions(schema({field("on", int64())}), + {std::move(left_batch)})}; + Declaration right{ + "exec_batch_source", + ExecBatchSourceNodeOptions(schema({field("colVals", utf8()), field("on", int64())}), + {std::move(right_batch)})}; + AsofJoinNodeOptions asof_join_opts({{{"on"}, {}}, {{"on"}, {}}}, 1); + Declaration asof_join{ + "asofjoin", {std::move(left), std::move(right)}, std::move(asof_join_opts)}; + + ASSERT_OK_AND_ASSIGN(auto result, DeclarationToExecBatches(std::move(asof_join))); + + auto exp_batch = + ExecBatchFromJSON({int64(), utf8()}, R"([[1, "Z"], [2, "Z"], [3, "B"]])"); + AssertExecBatchesEqualIgnoringOrder(result.schema, {exp_batch}, result.batches); +} + } // namespace acero } // namespace arrow From 6c386dab6760961160ddbfe7dcb6952943920828 Mon Sep 17 00:00:00 2001 From: Rossi Sun Date: Wed, 15 May 2024 01:10:22 +0800 Subject: [PATCH 104/261] GH-41334: [C++][Acero] Use per-node basis temp vector stack to mitigate overflow (#41335) ### Rationale for this change The risk of temp vector stack overflow still exists as described in #41334 . Many people have agreed on a per-node basis approach: > 1) it doesn't introduce more performance penalty than shared stack; 2) it can mitigate the overflow in a natural way, i.e., expanding the stack size linear to the number of nodes; 3) it requires no more complexity to the existing stack implementation. The full (but long) story is also revealed in the subsequent discussion of this PR. Feel free to scroll down. ### What changes are included in this PR? 1. Change the current shared (per-thread) temp vector stack usage to per-node basis. 2. Make the stack size required by each stack user more explicit. ### Are these changes tested? UT included. ### Are there any user-facing changes? None. * GitHub Issue: #41334 Authored-by: Ruoxi Sun Signed-off-by: Antoine Pitrou --- cpp/src/arrow/CMakeLists.txt | 3 +- cpp/src/arrow/acero/exec_plan.cc | 2 +- cpp/src/arrow/acero/hash_join_node.cc | 38 +++++++--- cpp/src/arrow/acero/hash_join_node_test.cc | 52 +++++++++++++ cpp/src/arrow/acero/query_context.cc | 12 +-- cpp/src/arrow/acero/query_context.h | 8 +- cpp/src/arrow/acero/swiss_join.cc | 16 ++-- cpp/src/arrow/compute/key_hash_internal.h | 19 +++++ cpp/src/arrow/compute/key_hash_test.cc | 59 ++++++++++++++- cpp/src/arrow/compute/key_map_internal.h | 1 + cpp/src/arrow/compute/light_array_internal.h | 1 + cpp/src/arrow/compute/light_array_test.cc | 1 + cpp/src/arrow/compute/row/compare_internal.h | 10 +++ cpp/src/arrow/compute/row/compare_test.cc | 62 ++++++++++++++- cpp/src/arrow/compute/row/grouper.cc | 1 + cpp/src/arrow/compute/util.cc | 31 -------- cpp/src/arrow/compute/util.h | 73 ------------------ cpp/src/arrow/compute/util_internal.cc | 79 ++++++++++++++++++++ cpp/src/arrow/compute/util_internal.h | 53 +++++++++++++ 19 files changed, 371 insertions(+), 150 deletions(-) create mode 100644 cpp/src/arrow/compute/util_internal.cc diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 5d61112518f5e..0f4824ec99daa 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -716,7 +716,8 @@ set(ARROW_COMPUTE_SRCS compute/row/compare_internal.cc compute/row/grouper.cc compute/row/row_internal.cc - compute/util.cc) + compute/util.cc + compute/util_internal.cc) append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/key_hash_internal_avx2.cc) append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/key_map_internal_avx2.cc) diff --git a/cpp/src/arrow/acero/exec_plan.cc b/cpp/src/arrow/acero/exec_plan.cc index 97119726d4b17..d9fb1942fccd8 100644 --- a/cpp/src/arrow/acero/exec_plan.cc +++ b/cpp/src/arrow/acero/exec_plan.cc @@ -128,7 +128,7 @@ struct ExecPlanImpl : public ExecPlan { Future<> scheduler_finished = arrow::util::AsyncTaskScheduler::Make( [this](arrow::util::AsyncTaskScheduler* async_scheduler) { QueryContext* ctx = query_context(); - RETURN_NOT_OK(ctx->Init(ctx->max_concurrency(), async_scheduler)); + RETURN_NOT_OK(ctx->Init(async_scheduler)); #ifdef ARROW_WITH_OPENTELEMETRY if (HasMetadata()) { diff --git a/cpp/src/arrow/acero/hash_join_node.cc b/cpp/src/arrow/acero/hash_join_node.cc index b49364300dac8..06405f16c8d4c 100644 --- a/cpp/src/arrow/acero/hash_join_node.cc +++ b/cpp/src/arrow/acero/hash_join_node.cc @@ -497,11 +497,11 @@ struct BloomFilterPushdownContext { using BuildFinishedCallback = std::function; using FiltersReceivedCallback = std::function; using FilterFinishedCallback = std::function; - void Init(HashJoinNode* owner, size_t num_threads, - RegisterTaskGroupCallback register_task_group_callback, - StartTaskGroupCallback start_task_group_callback, - FiltersReceivedCallback on_bloom_filters_received, bool disable_bloom_filter, - bool use_sync_execution); + Status Init(HashJoinNode* owner, size_t num_threads, + RegisterTaskGroupCallback register_task_group_callback, + StartTaskGroupCallback start_task_group_callback, + FiltersReceivedCallback on_bloom_filters_received, + bool disable_bloom_filter, bool use_sync_execution); Status StartProducing(size_t thread_index); @@ -559,8 +559,7 @@ struct BloomFilterPushdownContext { std::vector hashes(batch.length); std::vector bv(bit_vector_bytes); - ARROW_ASSIGN_OR_RAISE(arrow::util::TempVectorStack * stack, - ctx_->GetTempStack(thread_index)); + arrow::util::TempVectorStack* stack = &tld_[thread_index].stack; // Start with full selection for the current batch memset(selected.data(), 0xff, bit_vector_bytes); @@ -654,7 +653,17 @@ struct BloomFilterPushdownContext { FiltersReceivedCallback all_received_callback_; FilterFinishedCallback on_finished_; } eval_; + + static constexpr auto kTempStackUsage = + Hashing32::kHashBatchTempStackUsage + + (sizeof(uint32_t) + /*extra=*/1) * arrow::util::MiniBatch::kMiniBatchLength; + + struct ThreadLocalData { + arrow::util::TempVectorStack stack; + }; + std::vector tld_; }; + bool HashJoinSchema::HasDictionaries() const { for (int side = 0; side <= 1; ++side) { for (int icol = 0; icol < proj_maps[side].num_cols(HashJoinProjection::INPUT); @@ -930,7 +939,7 @@ class HashJoinNode : public ExecNode, public TracedNode { // we will change it back to just the CPU's thread pool capacity. size_t num_threads = (GetCpuThreadPoolCapacity() + io::GetIOThreadPoolCapacity() + 1); - pushdown_context_.Init( + RETURN_NOT_OK(pushdown_context_.Init( this, num_threads, [ctx](std::function fn, std::function on_finished) { @@ -940,7 +949,7 @@ class HashJoinNode : public ExecNode, public TracedNode { return ctx->StartTaskGroup(task_group_id, num_tasks); }, [this](size_t thread_index) { return OnFiltersReceived(thread_index); }, - disable_bloom_filter_, use_sync_execution); + disable_bloom_filter_, use_sync_execution)); RETURN_NOT_OK(impl_->Init( ctx, join_type_, num_threads, &(schema_mgr_->proj_maps[0]), @@ -1037,7 +1046,7 @@ class HashJoinNode : public ExecNode, public TracedNode { BloomFilterPushdownContext pushdown_context_; }; -void BloomFilterPushdownContext::Init( +Status BloomFilterPushdownContext::Init( HashJoinNode* owner, size_t num_threads, RegisterTaskGroupCallback register_task_group_callback, StartTaskGroupCallback start_task_group_callback, @@ -1074,6 +1083,12 @@ void BloomFilterPushdownContext::Init( return eval_.on_finished_(thread_index, std::move(eval_.batches_)); }); start_task_group_callback_ = std::move(start_task_group_callback); + tld_.resize(num_threads); + for (auto& local_data : tld_) { + RETURN_NOT_OK(local_data.stack.Init(ctx_->memory_pool(), kTempStackUsage)); + } + + return Status::OK(); } Status BloomFilterPushdownContext::StartProducing(size_t thread_index) { @@ -1124,8 +1139,7 @@ Status BloomFilterPushdownContext::BuildBloomFilter_exec_task(size_t thread_inde } ARROW_ASSIGN_OR_RAISE(ExecBatch key_batch, ExecBatch::Make(std::move(key_columns))); - ARROW_ASSIGN_OR_RAISE(arrow::util::TempVectorStack * stack, - ctx_->GetTempStack(thread_index)); + arrow::util::TempVectorStack* stack = &tld_[thread_index].stack; arrow::util::TempVectorHolder hash_holder( stack, arrow::util::MiniBatch::kMiniBatchLength); uint32_t* hashes = hash_holder.mutable_data(); diff --git a/cpp/src/arrow/acero/hash_join_node_test.cc b/cpp/src/arrow/acero/hash_join_node_test.cc index 9c3dbc176ff4f..215b1e4d21125 100644 --- a/cpp/src/arrow/acero/hash_join_node_test.cc +++ b/cpp/src/arrow/acero/hash_join_node_test.cc @@ -28,6 +28,7 @@ #include "arrow/api.h" #include "arrow/compute/kernels/row_encoder_internal.h" #include "arrow/compute/kernels/test_util.h" +#include "arrow/compute/light_array_internal.h" #include "arrow/testing/extension_type.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/matchers.h" @@ -41,6 +42,7 @@ namespace arrow { using compute::call; using compute::default_exec_context; +using compute::ExecBatchBuilder; using compute::ExecSpan; using compute::field_ref; using compute::SortIndices; @@ -3201,5 +3203,55 @@ TEST(HashJoin, ChainedIntegerHashJoins) { } } +// Test that a large number of joins don't overflow the temp vector stack, like GH-39582 +// and GH-39951. +TEST(HashJoin, ManyJoins) { + // The idea of this case is to create many nested join nodes that may possibly cause + // recursive usage of temp vector stack. To make sure that the recursion happens: + // 1. A left-deep join tree is created so that the left-most (the final probe side) + // table will go through all the hash tables from the right side. + // 2. Left-outer join is used so that every join will increase the cardinality. + // 3. The left-most table contains rows of unique integers from 0 to N. + // 4. Each right table at level i contains two rows of integer i, so that the probing of + // each level will increase the result by one row. + // 5. The left-most table is a single batch of enough rows, so that at each level, the + // probing will accumulate enough result rows to have to output to the subsequent level + // before finishing the current batch (releasing the buffer allocated on the temp vector + // stack), which is essentially the recursive usage of the temp vector stack. + + // A fair number of joins to guarantee temp vector stack overflow before GH-41335. + const int num_joins = 64; + + // `ExecBatchBuilder::num_rows_max()` is the number of rows for swiss join to accumulate + // before outputting. + const int num_left_rows = ExecBatchBuilder::num_rows_max(); + ASSERT_OK_AND_ASSIGN( + auto left_batches, + MakeIntegerBatches({[](int row_id) -> int64_t { return row_id; }}, + schema({field("l_key", int32())}), + /*num_batches=*/1, /*batch_size=*/num_left_rows)); + Declaration root{"exec_batch_source", + ExecBatchSourceNodeOptions(std::move(left_batches.schema), + std::move(left_batches.batches))}; + + HashJoinNodeOptions join_opts(JoinType::LEFT_OUTER, /*left_keys=*/{"l_key"}, + /*right_keys=*/{"r_key"}); + + for (int i = 0; i < num_joins; ++i) { + ASSERT_OK_AND_ASSIGN(auto right_batches, + MakeIntegerBatches({[i](int) -> int64_t { return i; }}, + schema({field("r_key", int32())}), + /*num_batches=*/1, /*batch_size=*/2)); + Declaration table{"exec_batch_source", + ExecBatchSourceNodeOptions(std::move(right_batches.schema), + std::move(right_batches.batches))}; + + Declaration new_root{"hashjoin", {std::move(root), std::move(table)}, join_opts}; + root = std::move(new_root); + } + + ASSERT_OK_AND_ASSIGN(std::ignore, DeclarationToTable(std::move(root))); +} + } // namespace acero } // namespace arrow diff --git a/cpp/src/arrow/acero/query_context.cc b/cpp/src/arrow/acero/query_context.cc index a27397d12079d..18beb19ab7f8b 100644 --- a/cpp/src/arrow/acero/query_context.cc +++ b/cpp/src/arrow/acero/query_context.cc @@ -40,8 +40,7 @@ QueryContext::QueryContext(QueryOptions opts, ExecContext exec_context) const CpuInfo* QueryContext::cpu_info() const { return CpuInfo::GetInstance(); } int64_t QueryContext::hardware_flags() const { return cpu_info()->hardware_flags(); } -Status QueryContext::Init(size_t max_num_threads, util::AsyncTaskScheduler* scheduler) { - tld_.resize(max_num_threads); +Status QueryContext::Init(util::AsyncTaskScheduler* scheduler) { async_scheduler_ = scheduler; return Status::OK(); } @@ -50,15 +49,6 @@ size_t QueryContext::GetThreadIndex() { return thread_indexer_(); } size_t QueryContext::max_concurrency() const { return thread_indexer_.Capacity(); } -Result QueryContext::GetTempStack(size_t thread_index) { - if (!tld_[thread_index].is_init) { - RETURN_NOT_OK(tld_[thread_index].stack.Init( - memory_pool(), 32 * util::MiniBatch::kMiniBatchLength * sizeof(uint64_t))); - tld_[thread_index].is_init = true; - } - return &tld_[thread_index].stack; -} - Result> QueryContext::BeginExternalTask(std::string_view name) { Future<> completion_future = Future<>::Make(); if (async_scheduler_->AddSimpleTask([completion_future] { return completion_future; }, diff --git a/cpp/src/arrow/acero/query_context.h b/cpp/src/arrow/acero/query_context.h index 9ea11679cba05..3eff299439828 100644 --- a/cpp/src/arrow/acero/query_context.h +++ b/cpp/src/arrow/acero/query_context.h @@ -38,7 +38,7 @@ class ARROW_ACERO_EXPORT QueryContext { QueryContext(QueryOptions opts = {}, ExecContext exec_context = *default_exec_context()); - Status Init(size_t max_num_threads, arrow::util::AsyncTaskScheduler* scheduler); + Status Init(arrow::util::AsyncTaskScheduler* scheduler); const ::arrow::internal::CpuInfo* cpu_info() const; int64_t hardware_flags() const; @@ -52,7 +52,6 @@ class ARROW_ACERO_EXPORT QueryContext { size_t GetThreadIndex(); size_t max_concurrency() const; - Result GetTempStack(size_t thread_index); /// \brief Start an external task /// @@ -145,11 +144,6 @@ class ARROW_ACERO_EXPORT QueryContext { std::unique_ptr task_scheduler_ = TaskScheduler::Make(); ThreadIndexer thread_indexer_; - struct ThreadLocalData { - bool is_init = false; - arrow::util::TempVectorStack stack; - }; - std::vector tld_; std::atomic in_flight_bytes_to_disk_{0}; }; diff --git a/cpp/src/arrow/acero/swiss_join.cc b/cpp/src/arrow/acero/swiss_join.cc index 542e943c4a82b..17c5212697339 100644 --- a/cpp/src/arrow/acero/swiss_join.cc +++ b/cpp/src/arrow/acero/swiss_join.cc @@ -2470,6 +2470,8 @@ Status JoinProbeProcessor::OnFinished() { class SwissJoin : public HashJoinImpl { public: + static constexpr auto kTempStackUsage = 64 * arrow::util::MiniBatch::kMiniBatchLength; + Status Init(QueryContext* ctx, JoinType join_type, size_t num_threads, const HashJoinProjectionMaps* proj_map_left, const HashJoinProjectionMaps* proj_map_right, @@ -2513,6 +2515,7 @@ class SwissJoin : public HashJoinImpl { local_states_.resize(num_threads_); for (int i = 0; i < num_threads_; ++i) { + RETURN_NOT_OK(local_states_[i].stack.Init(pool_, kTempStackUsage)); local_states_[i].hash_table_ready = false; local_states_[i].num_output_batches = 0; local_states_[i].materialize.Init(pool_, proj_map_left, proj_map_right); @@ -2566,8 +2569,7 @@ class SwissJoin : public HashJoinImpl { ExecBatch keypayload_batch; ARROW_ASSIGN_OR_RAISE(keypayload_batch, KeyPayloadFromInput(/*side=*/0, &batch)); - ARROW_ASSIGN_OR_RAISE(arrow::util::TempVectorStack * temp_stack, - ctx_->GetTempStack(thread_index)); + arrow::util::TempVectorStack* temp_stack = &local_states_[thread_index].stack; return CancelIfNotOK( probe_processor_.OnNextBatch(thread_index, keypayload_batch, temp_stack, @@ -2679,8 +2681,7 @@ class SwissJoin : public HashJoinImpl { input_batch.values[schema->num_cols(HashJoinProjection::KEY) + icol]; } } - ARROW_ASSIGN_OR_RAISE(arrow::util::TempVectorStack * temp_stack, - ctx_->GetTempStack(thread_id)); + arrow::util::TempVectorStack* temp_stack = &local_states_[thread_id].stack; RETURN_NOT_OK(CancelIfNotOK(hash_table_build_.PushNextBatch( static_cast(thread_id), key_batch, no_payload ? nullptr : &payload_batch, temp_stack))); @@ -2715,8 +2716,7 @@ class SwissJoin : public HashJoinImpl { Status MergeFinished(size_t thread_id) { RETURN_NOT_OK(status()); - ARROW_ASSIGN_OR_RAISE(arrow::util::TempVectorStack * temp_stack, - ctx_->GetTempStack(thread_id)); + arrow::util::TempVectorStack* temp_stack = &local_states_[thread_id].stack; hash_table_build_.FinishPrtnMerge(temp_stack); return CancelIfNotOK(OnBuildHashTableFinished(static_cast(thread_id))); } @@ -2771,8 +2771,7 @@ class SwissJoin : public HashJoinImpl { std::min((task_id + 1) * kNumRowsPerScanTask, hash_table_.num_rows()); // Get thread index and related temp vector stack // - ARROW_ASSIGN_OR_RAISE(arrow::util::TempVectorStack * temp_stack, - ctx_->GetTempStack(thread_id)); + arrow::util::TempVectorStack* temp_stack = &local_states_[thread_id].stack; // Split into mini-batches // @@ -2949,6 +2948,7 @@ class SwissJoin : public HashJoinImpl { FinishedCallback finished_callback_; struct ThreadLocalState { + arrow::util::TempVectorStack stack; JoinResultMaterialize materialize; std::vector temp_column_arrays; int64_t num_output_batches; diff --git a/cpp/src/arrow/compute/key_hash_internal.h b/cpp/src/arrow/compute/key_hash_internal.h index 7d226f52086b1..1f25beb0e1622 100644 --- a/cpp/src/arrow/compute/key_hash_internal.h +++ b/cpp/src/arrow/compute/key_hash_internal.h @@ -48,6 +48,16 @@ class ARROW_EXPORT Hashing32 { static void HashMultiColumn(const std::vector& cols, LightContext* ctx, uint32_t* out_hash); + // Clarify the max temp stack usage for HashBatch, which might be necessary for the + // caller to be aware of at compile time to reserve enough stack size in advance. The + // HashBatch implementation uses one uint32 temp vector as a buffer for hash, one uint16 + // temp vector as a buffer for null indices and one uint32 temp vector as a buffer for + // null hash, all are of size kMiniBatchLength. Plus extra kMiniBatchLength to cope with + // stack padding and aligning. + static constexpr auto kHashBatchTempStackUsage = + (sizeof(uint32_t) + sizeof(uint16_t) + sizeof(uint32_t) + /*extra=*/1) * + util::MiniBatch::kMiniBatchLength; + static Status HashBatch(const ExecBatch& key_batch, uint32_t* hashes, std::vector& column_arrays, int64_t hardware_flags, util::TempVectorStack* temp_stack, @@ -161,6 +171,15 @@ class ARROW_EXPORT Hashing64 { static void HashMultiColumn(const std::vector& cols, LightContext* ctx, uint64_t* hashes); + // Clarify the max temp stack usage for HashBatch, which might be necessary for the + // caller to be aware of at compile time to reserve enough stack size in advance. The + // HashBatch implementation uses one uint16 temp vector as a buffer for null indices and + // one uint64 temp vector as a buffer for null hash, all are of size kMiniBatchLength. + // Plus extra kMiniBatchLength to cope with stack padding and aligning. + static constexpr auto kHashBatchTempStackUsage = + (sizeof(uint16_t) + sizeof(uint64_t) + /*extra=*/1) * + util::MiniBatch::kMiniBatchLength; + static Status HashBatch(const ExecBatch& key_batch, uint64_t* hashes, std::vector& column_arrays, int64_t hardware_flags, util::TempVectorStack* temp_stack, diff --git a/cpp/src/arrow/compute/key_hash_test.cc b/cpp/src/arrow/compute/key_hash_test.cc index 4e5d869cb7db6..fdf6d2125850a 100644 --- a/cpp/src/arrow/compute/key_hash_test.cc +++ b/cpp/src/arrow/compute/key_hash_test.cc @@ -25,12 +25,16 @@ #include "arrow/array/builder_binary.h" #include "arrow/compute/key_hash_internal.h" #include "arrow/testing/gtest_util.h" +#include "arrow/testing/random.h" #include "arrow/testing/util.h" #include "arrow/util/cpu_info.h" #include "arrow/util/pcg_random.h" namespace arrow { +using arrow::random::RandomArrayGenerator; +using arrow::util::MiniBatch; +using arrow::util::TempVectorStack; using internal::checked_pointer_cast; using internal::CpuInfo; @@ -156,7 +160,7 @@ class TestVectorHash { std::vector temp_buffer; temp_buffer.resize(mini_batch_size * 4); - for (int i = 0; i < static_cast(hardware_flags_for_testing.size()); ++i) { + for (size_t i = 0; i < hardware_flags_for_testing.size(); ++i) { const auto hardware_flags = hardware_flags_for_testing[i]; if (use_32bit_hash) { if (!use_varlen_input) { @@ -192,7 +196,7 @@ class TestVectorHash { // Verify that all implementations (scalar, SIMD) give the same hashes // const auto& hashes_scalar64 = hashes64[0]; - for (int i = 0; i < static_cast(hardware_flags_for_testing.size()); ++i) { + for (size_t i = 0; i < hardware_flags_for_testing.size(); ++i) { for (int j = 0; j < num_rows; ++j) { ASSERT_EQ(hashes64[i][j], hashes_scalar64[j]) << "scalar and simd approaches yielded different hashes"; @@ -280,7 +284,7 @@ void HashFixedLengthFrom(int key_length, int num_rows, int start_row) { std::vector temp_buffer; temp_buffer.resize(mini_batch_size * 4); - for (int i = 0; i < static_cast(hardware_flags_for_testing.size()); ++i) { + for (size_t i = 0; i < hardware_flags_for_testing.size(); ++i) { const auto hardware_flags = hardware_flags_for_testing[i]; Hashing32::HashFixed(hardware_flags, /*combine_hashes=*/false, num_rows_to_hash, key_length, @@ -292,7 +296,7 @@ void HashFixedLengthFrom(int key_length, int num_rows, int start_row) { } // Verify that all implementations (scalar, SIMD) give the same hashes. - for (int i = 1; i < static_cast(hardware_flags_for_testing.size()); ++i) { + for (size_t i = 1; i < hardware_flags_for_testing.size(); ++i) { for (int j = 0; j < num_rows_to_hash; ++j) { ASSERT_EQ(hashes32[i][j], hashes32[0][j]) << "scalar and simd approaches yielded different 32-bit hashes"; @@ -311,5 +315,52 @@ TEST(VectorHash, FixedLengthTailByteSafety) { HashFixedLengthFrom(/*key_length=*/19, /*num_rows=*/64, /*start_row=*/63); } +// Make sure that Hashing32/64::HashBatch uses no more stack space than declared in +// Hashing32/64::kHashBatchTempStackUsage. +TEST(VectorHash, HashBatchTempStackUsage) { + for (auto num_rows : + {0, 1, MiniBatch::kMiniBatchLength, MiniBatch::kMiniBatchLength * 64}) { + SCOPED_TRACE("num_rows = " + std::to_string(num_rows)); + + MemoryPool* pool = default_memory_pool(); + RandomArrayGenerator gen(42); + + auto column = gen.Int8(num_rows, 0, 127); + ExecBatch batch({column}, num_rows); + + std::vector column_arrays; + ASSERT_OK(ColumnArraysFromExecBatch(batch, &column_arrays)); + + const auto hardware_flags_for_testing = HardwareFlagsForTesting(); + ASSERT_GT(hardware_flags_for_testing.size(), 0); + + { + std::vector hashes(num_rows); + TempVectorStack stack; + ASSERT_OK(stack.Init(pool, Hashing32::kHashBatchTempStackUsage)); + for (size_t i = 0; i < hardware_flags_for_testing.size(); ++i) { + SCOPED_TRACE("hashing32 for hardware flags = " + + std::to_string(hardware_flags_for_testing[i])); + ASSERT_OK(Hashing32::HashBatch(batch, hashes.data(), column_arrays, + hardware_flags_for_testing[i], &stack, + /*start_rows=*/0, num_rows)); + } + } + + { + std::vector hashes(num_rows); + TempVectorStack stack; + ASSERT_OK(stack.Init(pool, Hashing64::kHashBatchTempStackUsage)); + for (size_t i = 0; i < hardware_flags_for_testing.size(); ++i) { + SCOPED_TRACE("hashing64 for hardware flags = " + + std::to_string(hardware_flags_for_testing[i])); + ASSERT_OK(Hashing64::HashBatch(batch, hashes.data(), column_arrays, + hardware_flags_for_testing[i], &stack, + /*start_rows=*/0, num_rows)); + } + } + } +} + } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/key_map_internal.h b/cpp/src/arrow/compute/key_map_internal.h index 8e06dc83483aa..a5e784a9e4463 100644 --- a/cpp/src/arrow/compute/key_map_internal.h +++ b/cpp/src/arrow/compute/key_map_internal.h @@ -21,6 +21,7 @@ #include #include "arrow/compute/util.h" +#include "arrow/compute/util_internal.h" #include "arrow/result.h" #include "arrow/status.h" #include "arrow/type_fwd.h" diff --git a/cpp/src/arrow/compute/light_array_internal.h b/cpp/src/arrow/compute/light_array_internal.h index 67de71bf56c92..995c4211998e0 100644 --- a/cpp/src/arrow/compute/light_array_internal.h +++ b/cpp/src/arrow/compute/light_array_internal.h @@ -22,6 +22,7 @@ #include "arrow/array.h" #include "arrow/compute/exec.h" #include "arrow/compute/util.h" +#include "arrow/compute/util_internal.h" #include "arrow/type.h" #include "arrow/util/cpu_info.h" #include "arrow/util/logging.h" diff --git a/cpp/src/arrow/compute/light_array_test.cc b/cpp/src/arrow/compute/light_array_test.cc index 08f36ee606025..cc02d489d138f 100644 --- a/cpp/src/arrow/compute/light_array_test.cc +++ b/cpp/src/arrow/compute/light_array_test.cc @@ -20,6 +20,7 @@ #include #include +#include "arrow/memory_pool.h" #include "arrow/testing/generator.h" #include "arrow/testing/gtest_util.h" #include "arrow/type.h" diff --git a/cpp/src/arrow/compute/row/compare_internal.h b/cpp/src/arrow/compute/row/compare_internal.h index 16002ee5184e9..a5a109b0b516a 100644 --- a/cpp/src/arrow/compute/row/compare_internal.h +++ b/cpp/src/arrow/compute/row/compare_internal.h @@ -32,6 +32,16 @@ namespace compute { class ARROW_EXPORT KeyCompare { public: + // Clarify the max temp stack usage for CompareColumnsToRows, which might be necessary + // for the caller to be aware of (possibly at compile time) to reserve enough stack size + // in advance. The CompareColumnsToRows implementation uses three uint8 temp vectors as + // buffers for match vectors, all are of size num_rows. Plus extra kMiniBatchLength to + // cope with stack padding and aligning. + constexpr static int64_t CompareColumnsToRowsTempStackUsage(int64_t num_rows) { + return (sizeof(uint8_t) + sizeof(uint8_t) + sizeof(uint8_t)) * num_rows + + /*extra=*/util::MiniBatch::kMiniBatchLength; + } + // Returns a single 16-bit selection vector of rows that failed comparison. // If there is input selection on the left, the resulting selection is a filtered image // of input selection. diff --git a/cpp/src/arrow/compute/row/compare_test.cc b/cpp/src/arrow/compute/row/compare_test.cc index 1d8562cd56d3c..4044049b10863 100644 --- a/cpp/src/arrow/compute/row/compare_test.cc +++ b/cpp/src/arrow/compute/row/compare_test.cc @@ -19,23 +19,26 @@ #include "arrow/compute/row/compare_internal.h" #include "arrow/testing/gtest_util.h" +#include "arrow/testing/random.h" namespace arrow { namespace compute { using arrow::bit_util::BytesForBits; using arrow::internal::CpuInfo; +using arrow::random::RandomArrayGenerator; using arrow::util::MiniBatch; using arrow::util::TempVectorStack; // Specialized case for GH-39577. TEST(KeyCompare, CompareColumnsToRowsCuriousFSB) { int fsb_length = 9; + int num_rows = 7; + MemoryPool* pool = default_memory_pool(); TempVectorStack stack; - ASSERT_OK(stack.Init(pool, 8 * MiniBatch::kMiniBatchLength * sizeof(uint64_t))); + ASSERT_OK(stack.Init(pool, KeyCompare::CompareColumnsToRowsTempStackUsage(num_rows))); - int num_rows = 7; auto column_right = ArrayFromJSON(fixed_size_binary(fsb_length), R"([ "000000000", "111111111", @@ -106,5 +109,60 @@ TEST(KeyCompare, CompareColumnsToRowsCuriousFSB) { } } +// Make sure that KeyCompare::CompareColumnsToRows uses no more stack space than declared +// in KeyCompare::CompareColumnsToRowsTempStackUsage(). +TEST(KeyCompare, CompareColumnsToRowsTempStackUsage) { + for (auto num_rows : + {0, 1, MiniBatch::kMiniBatchLength, MiniBatch::kMiniBatchLength * 64}) { + SCOPED_TRACE("num_rows = " + std::to_string(num_rows)); + + MemoryPool* pool = default_memory_pool(); + TempVectorStack stack; + ASSERT_OK(stack.Init(pool, KeyCompare::CompareColumnsToRowsTempStackUsage(num_rows))); + + RandomArrayGenerator gen(42); + + auto column_right = gen.Int8(num_rows, 0, 127); + ExecBatch batch_right({column_right}, num_rows); + + std::vector column_metadatas_right; + ASSERT_OK(ColumnMetadatasFromExecBatch(batch_right, &column_metadatas_right)); + + RowTableMetadata table_metadata_right; + table_metadata_right.FromColumnMetadataVector(column_metadatas_right, + sizeof(uint64_t), sizeof(uint64_t)); + + std::vector column_arrays_right; + ASSERT_OK(ColumnArraysFromExecBatch(batch_right, &column_arrays_right)); + + RowTableImpl row_table; + ASSERT_OK(row_table.Init(pool, table_metadata_right)); + + RowTableEncoder row_encoder; + row_encoder.Init(column_metadatas_right, sizeof(uint64_t), sizeof(uint64_t)); + row_encoder.PrepareEncodeSelected(0, num_rows, column_arrays_right); + + std::vector row_ids_right(num_rows); + std::iota(row_ids_right.begin(), row_ids_right.end(), 0); + ASSERT_OK(row_encoder.EncodeSelected(&row_table, num_rows, row_ids_right.data())); + + auto column_left = gen.Int8(num_rows, 0, 127); + ExecBatch batch_left({column_left}, num_rows); + std::vector column_arrays_left; + ASSERT_OK(ColumnArraysFromExecBatch(batch_left, &column_arrays_left)); + + std::vector row_ids_left(num_rows); + std::iota(row_ids_left.begin(), row_ids_left.end(), 0); + + LightContext ctx{CpuInfo::GetInstance()->hardware_flags(), &stack}; + + uint32_t num_rows_no_match; + std::vector row_ids_out(num_rows); + KeyCompare::CompareColumnsToRows(num_rows, NULLPTR, row_ids_left.data(), &ctx, + &num_rows_no_match, row_ids_out.data(), + column_arrays_left, row_table, true, NULLPTR); + } +} + } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/row/grouper.cc b/cpp/src/arrow/compute/row/grouper.cc index 50ca20bd14f31..3ed5411d0ba02 100644 --- a/cpp/src/arrow/compute/row/grouper.cc +++ b/cpp/src/arrow/compute/row/grouper.cc @@ -600,6 +600,7 @@ struct GrouperFastImpl : public Grouper { } Status Reset() override { + ARROW_DCHECK_EQ(temp_stack_.AllocatedSize(), 0); rows_.Clean(); rows_minibatch_.Clean(); map_.cleanup(); diff --git a/cpp/src/arrow/compute/util.cc b/cpp/src/arrow/compute/util.cc index b0c863b26a062..b90b3a64056bd 100644 --- a/cpp/src/arrow/compute/util.cc +++ b/cpp/src/arrow/compute/util.cc @@ -17,11 +17,7 @@ #include "arrow/compute/util.h" -#include "arrow/table.h" -#include "arrow/util/bit_util.h" -#include "arrow/util/bitmap_ops.h" #include "arrow/util/logging.h" -#include "arrow/util/tracing_internal.h" #include "arrow/util/ubsan.h" namespace arrow { @@ -31,33 +27,6 @@ using internal::CpuInfo; namespace util { -void TempVectorStack::alloc(uint32_t num_bytes, uint8_t** data, int* id) { - int64_t new_top = top_ + EstimatedAllocationSize(num_bytes); - // Stack overflow check (see GH-39582). - // XXX cannot return a regular Status because most consumers do not either. - ARROW_CHECK_LE(new_top, buffer_size_) << "TempVectorStack::alloc overflow"; - *data = buffer_->mutable_data() + top_ + sizeof(uint64_t); - // We set 8 bytes before the beginning of the allocated range and - // 8 bytes after the end to check for stack overflow (which would - // result in those known bytes being corrupted). - reinterpret_cast(buffer_->mutable_data() + top_)[0] = kGuard1; - reinterpret_cast(buffer_->mutable_data() + new_top)[-1] = kGuard2; - *id = num_vectors_++; - top_ = new_top; -} - -void TempVectorStack::release(int id, uint32_t num_bytes) { - ARROW_DCHECK(num_vectors_ == id + 1); - int64_t size = EstimatedAllocationSize(num_bytes); - ARROW_DCHECK(reinterpret_cast(buffer_->mutable_data() + top_)[-1] == - kGuard2); - ARROW_DCHECK(top_ >= size); - top_ -= size; - ARROW_DCHECK(reinterpret_cast(buffer_->mutable_data() + top_)[0] == - kGuard1); - --num_vectors_; -} - namespace bit_util { inline uint64_t SafeLoadUpTo8Bytes(const uint8_t* bytes, int num_bytes) { diff --git a/cpp/src/arrow/compute/util.h b/cpp/src/arrow/compute/util.h index 88dce160ce936..d56e398667f66 100644 --- a/cpp/src/arrow/compute/util.h +++ b/cpp/src/arrow/compute/util.h @@ -24,17 +24,10 @@ #include #include -#include "arrow/buffer.h" #include "arrow/compute/expression.h" #include "arrow/compute/type_fwd.h" -#include "arrow/memory_pool.h" #include "arrow/result.h" -#include "arrow/status.h" -#include "arrow/util/bit_util.h" #include "arrow/util/cpu_info.h" -#include "arrow/util/mutex.h" -#include "arrow/util/thread_pool.h" -#include "arrow/util/type_fwd.h" #if defined(__clang__) || defined(__GNUC__) #define BYTESWAP(x) __builtin_bswap64(x) @@ -77,72 +70,6 @@ class MiniBatch { static constexpr int kMiniBatchLength = 1 << kLogMiniBatchLength; }; -/// Storage used to allocate temporary vectors of a batch size. -/// Temporary vectors should resemble allocating temporary variables on the stack -/// but in the context of vectorized processing where we need to store a vector of -/// temporaries instead of a single value. -class ARROW_EXPORT TempVectorStack { - template - friend class TempVectorHolder; - - public: - Status Init(MemoryPool* pool, int64_t size) { - num_vectors_ = 0; - top_ = 0; - buffer_size_ = EstimatedAllocationSize(size); - ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(size, pool)); - // Ensure later operations don't accidentally read uninitialized memory. - std::memset(buffer->mutable_data(), 0xFF, size); - buffer_ = std::move(buffer); - return Status::OK(); - } - - private: - static int64_t EstimatedAllocationSize(int64_t size) { - return PaddedAllocationSize(size) + 2 * sizeof(uint64_t); - } - - static int64_t PaddedAllocationSize(int64_t num_bytes) { - // Round up allocation size to multiple of 8 bytes - // to avoid returning temp vectors with unaligned address. - // - // Also add padding at the end to facilitate loads and stores - // using SIMD when number of vector elements is not divisible - // by the number of SIMD lanes. - // - return ::arrow::bit_util::RoundUp(num_bytes, sizeof(int64_t)) + kPadding; - } - void alloc(uint32_t num_bytes, uint8_t** data, int* id); - void release(int id, uint32_t num_bytes); - static constexpr uint64_t kGuard1 = 0x3141592653589793ULL; - static constexpr uint64_t kGuard2 = 0x0577215664901532ULL; - static constexpr int64_t kPadding = 64; - int num_vectors_; - int64_t top_; - std::unique_ptr buffer_; - int64_t buffer_size_; -}; - -template -class TempVectorHolder { - friend class TempVectorStack; - - public: - ~TempVectorHolder() { stack_->release(id_, num_elements_ * sizeof(T)); } - T* mutable_data() { return reinterpret_cast(data_); } - TempVectorHolder(TempVectorStack* stack, uint32_t num_elements) { - stack_ = stack; - num_elements_ = num_elements; - stack_->alloc(num_elements * sizeof(T), &data_, &id_); - } - - private: - TempVectorStack* stack_; - uint8_t* data_; - int id_; - uint32_t num_elements_; -}; - namespace bit_util { ARROW_EXPORT void bits_to_indexes(int bit_to_search, int64_t hardware_flags, diff --git a/cpp/src/arrow/compute/util_internal.cc b/cpp/src/arrow/compute/util_internal.cc new file mode 100644 index 0000000000000..cc26982fef110 --- /dev/null +++ b/cpp/src/arrow/compute/util_internal.cc @@ -0,0 +1,79 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/compute/util_internal.h" + +#include "arrow/compute/util.h" +#include "arrow/memory_pool.h" + +namespace arrow { +namespace util { + +Status TempVectorStack::Init(MemoryPool* pool, int64_t size) { + num_vectors_ = 0; + top_ = 0; + buffer_size_ = EstimatedAllocationSize(size); + ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(size, pool)); + // Ensure later operations don't accidentally read uninitialized memory. + std::memset(buffer->mutable_data(), 0xFF, size); + buffer_ = std::move(buffer); + return Status::OK(); +} + +int64_t TempVectorStack::PaddedAllocationSize(int64_t num_bytes) { + // Round up allocation size to multiple of 8 bytes + // to avoid returning temp vectors with unaligned address. + // + // Also add padding at the end to facilitate loads and stores + // using SIMD when number of vector elements is not divisible + // by the number of SIMD lanes. + // + return ::arrow::bit_util::RoundUp(num_bytes, sizeof(int64_t)) + kPadding; +} + +void TempVectorStack::alloc(uint32_t num_bytes, uint8_t** data, int* id) { + int64_t estimated_alloc_size = EstimatedAllocationSize(num_bytes); + int64_t new_top = top_ + estimated_alloc_size; + // Stack overflow check (see GH-39582). + // XXX cannot return a regular Status because most consumers do not either. + ARROW_CHECK_LE(new_top, buffer_size_) + << "TempVectorStack::alloc overflow: allocating " << estimated_alloc_size + << " on top of " << top_ << " in stack of size " << buffer_size_; + *data = buffer_->mutable_data() + top_ + sizeof(uint64_t); + // We set 8 bytes before the beginning of the allocated range and + // 8 bytes after the end to check for stack overflow (which would + // result in those known bytes being corrupted). + reinterpret_cast(buffer_->mutable_data() + top_)[0] = kGuard1; + reinterpret_cast(buffer_->mutable_data() + new_top)[-1] = kGuard2; + *id = num_vectors_++; + top_ = new_top; +} + +void TempVectorStack::release(int id, uint32_t num_bytes) { + ARROW_DCHECK(num_vectors_ == id + 1); + int64_t size = EstimatedAllocationSize(num_bytes); + ARROW_DCHECK(reinterpret_cast(buffer_->mutable_data() + top_)[-1] == + kGuard2); + ARROW_DCHECK(top_ >= size); + top_ -= size; + ARROW_DCHECK(reinterpret_cast(buffer_->mutable_data() + top_)[0] == + kGuard1); + --num_vectors_; +} + +} // namespace util +} // namespace arrow diff --git a/cpp/src/arrow/compute/util_internal.h b/cpp/src/arrow/compute/util_internal.h index 87e89a3350721..043ff118062e4 100644 --- a/cpp/src/arrow/compute/util_internal.h +++ b/cpp/src/arrow/compute/util_internal.h @@ -17,6 +17,8 @@ #pragma once +#include "arrow/status.h" +#include "arrow/type_fwd.h" #include "arrow/util/logging.h" namespace arrow { @@ -27,5 +29,56 @@ void CheckAlignment(const void* ptr) { ARROW_DCHECK(reinterpret_cast(ptr) % sizeof(T) == 0); } +/// Storage used to allocate temporary vectors of a batch size. +/// Temporary vectors should resemble allocating temporary variables on the stack +/// but in the context of vectorized processing where we need to store a vector of +/// temporaries instead of a single value. +class ARROW_EXPORT TempVectorStack { + template + friend class TempVectorHolder; + + public: + Status Init(MemoryPool* pool, int64_t size); + + int64_t AllocatedSize() const { return top_; } + + private: + static int64_t EstimatedAllocationSize(int64_t size) { + return PaddedAllocationSize(size) + 2 * sizeof(uint64_t); + } + + static int64_t PaddedAllocationSize(int64_t num_bytes); + + void alloc(uint32_t num_bytes, uint8_t** data, int* id); + void release(int id, uint32_t num_bytes); + static constexpr uint64_t kGuard1 = 0x3141592653589793ULL; + static constexpr uint64_t kGuard2 = 0x0577215664901532ULL; + static constexpr int64_t kPadding = 64; + int num_vectors_; + int64_t top_; + std::unique_ptr buffer_; + int64_t buffer_size_; +}; + +template +class TempVectorHolder { + friend class TempVectorStack; + + public: + ~TempVectorHolder() { stack_->release(id_, num_elements_ * sizeof(T)); } + T* mutable_data() { return reinterpret_cast(data_); } + TempVectorHolder(TempVectorStack* stack, uint32_t num_elements) { + stack_ = stack; + num_elements_ = num_elements; + stack_->alloc(num_elements * sizeof(T), &data_, &id_); + } + + private: + TempVectorStack* stack_; + uint8_t* data_; + int id_; + uint32_t num_elements_; +}; + } // namespace util } // namespace arrow From cc1e1d87c11666830385332eef9e2a5a102ba1b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 14 May 2024 20:17:06 +0200 Subject: [PATCH 105/261] MINOR: [Release] Update versions for 17.0.0-SNAPSHOT --- ci/scripts/PKGBUILD | 2 +- docs/source/_static/versions.json | 7 ++++++- r/DESCRIPTION | 2 +- r/NEWS.md | 4 +++- r/pkgdown/assets/versions.json | 4 ++-- 5 files changed, 13 insertions(+), 6 deletions(-) diff --git a/ci/scripts/PKGBUILD b/ci/scripts/PKGBUILD index b0905886dd50f..f6bbc78be710e 100644 --- a/ci/scripts/PKGBUILD +++ b/ci/scripts/PKGBUILD @@ -18,7 +18,7 @@ _realname=arrow pkgbase=mingw-w64-${_realname} pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}" -pkgver=16.0.0.9000 +pkgver=16.1.0.9000 pkgrel=8000 pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)" arch=("any") diff --git a/docs/source/_static/versions.json b/docs/source/_static/versions.json index f8ff19095b3fd..e879fc69138d0 100644 --- a/docs/source/_static/versions.json +++ b/docs/source/_static/versions.json @@ -5,11 +5,16 @@ "url": "https://arrow.apache.org/docs/dev/" }, { - "name": "16.0 (stable)", + "name": "16.1 (stable)", "version": "", "url": "https://arrow.apache.org/docs/", "preferred": true }, + { + "name": "16.0", + "version": "16.0/", + "url": "https://arrow.apache.org/docs/16.0/" + }, { "name": "15.0", "version": "15.0/", diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 38cbaa94a3c25..bb4470e29037d 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -1,6 +1,6 @@ Package: arrow Title: Integration to 'Apache' 'Arrow' -Version: 16.0.0.9000 +Version: 16.1.0.9000 Authors@R: c( person("Neal", "Richardson", email = "neal.p.richardson@gmail.com", role = c("aut")), person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")), diff --git a/r/NEWS.md b/r/NEWS.md index 05f934dac68f3..47c4ac1571dad 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -17,7 +17,9 @@ under the License. --> -# arrow 16.0.0.9000 +# arrow 16.1.0.9000 + +# arrow 16.1.0 * R functions that users write that use functions that Arrow supports in dataset queries now can be used in queries too. Previously, only functions that used arithmetic operators worked. For example, `time_hours <- function(mins) mins / 60` worked, but `time_hours_rounded <- function(mins) round(mins / 60)` did not; now both work. These are automatic translations rather than true user-defined functions (UDFs); for UDFs, see `register_scalar_function()`. (#41223) * `summarize()` supports more complex expressions, and correctly handles cases where column names are reused in expressions. diff --git a/r/pkgdown/assets/versions.json b/r/pkgdown/assets/versions.json index 75d179f240515..43f0b3fac62a1 100644 --- a/r/pkgdown/assets/versions.json +++ b/r/pkgdown/assets/versions.json @@ -1,10 +1,10 @@ [ { - "name": "16.0.0.9000 (dev)", + "name": "16.1.0.9000 (dev)", "version": "dev/" }, { - "name": "16.0.0 (release)", + "name": "16.1.0 (release)", "version": "" }, { From bd89c4298612d37bb752f7823356c489b1e79162 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Tue, 14 May 2024 20:17:07 +0200 Subject: [PATCH 106/261] MINOR: [Release] Update .deb/.rpm changelogs for 16.1.0 --- .../linux-packages/apache-arrow-apt-source/debian/changelog | 6 ++++++ .../apache-arrow-release/yum/apache-arrow-release.spec.in | 3 +++ dev/tasks/linux-packages/apache-arrow/debian/changelog | 6 ++++++ dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in | 3 +++ 4 files changed, 18 insertions(+) diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog index 60e745301d9db..04aa586dc3c96 100644 --- a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog +++ b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog @@ -1,3 +1,9 @@ +apache-arrow-apt-source (16.1.0-1) unstable; urgency=low + + * New upstream release. + + -- Raúl Cumplido Thu, 09 May 2024 07:21:29 -0000 + apache-arrow-apt-source (16.0.0-1) unstable; urgency=low * New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in index 676c9e0d16dea..f0eb785dd6bc7 100644 --- a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in +++ b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in @@ -102,6 +102,9 @@ else fi %changelog +* Thu May 09 2024 Raúl Cumplido - 16.1.0-1 +- New upstream release. + * Tue Apr 16 2024 Raúl Cumplido - 16.0.0-1 - New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow/debian/changelog b/dev/tasks/linux-packages/apache-arrow/debian/changelog index e255e84096e4e..35cc598fe6f87 100644 --- a/dev/tasks/linux-packages/apache-arrow/debian/changelog +++ b/dev/tasks/linux-packages/apache-arrow/debian/changelog @@ -1,3 +1,9 @@ +apache-arrow (16.1.0-1) unstable; urgency=low + + * New upstream release. + + -- Raúl Cumplido Thu, 09 May 2024 07:21:29 -0000 + apache-arrow (16.0.0-1) unstable; urgency=low * New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in index 3ede1814b865d..c6148e9260586 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in +++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in @@ -881,6 +881,9 @@ Documentation for Apache Parquet GLib. %endif %changelog +* Thu May 09 2024 Raúl Cumplido - 16.1.0-1 +- New upstream release. + * Tue Apr 16 2024 Raúl Cumplido - 16.0.0-1 - New upstream release. From e411e0e211206dd7040668dac08ae935e8037aa9 Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Tue, 14 May 2024 12:52:59 -0700 Subject: [PATCH 107/261] GH-41602: [C#] Resolve build warnings (#41645) ### What changes are included in this PR? Adds annotations or suppressions to disable build warnings. Configures projects to produce an error on warnings. ### Are these changes tested? Changes are covered by existing tests. Closes #41602 * GitHub Issue: #41602 Authored-by: Curt Hagenlocher Signed-off-by: Curt Hagenlocher --- csharp/Directory.Build.props | 4 ++- csharp/feather.png | Bin 0 -> 40042 bytes csharp/src/Apache.Arrow/Arrays/BinaryArray.cs | 4 +-- .../Apache.Arrow/Arrays/Decimal256Array.cs | 14 ++++---- .../Memory/NativeMemoryManager.cs | 2 ++ .../FlightSqlServerTests.cs | 25 ++++++++------- .../Apache.Arrow.Flight.Tests/FlightTests.cs | 30 +++++++++--------- .../Apache.Arrow.Tests/ArrowArrayTests.cs | 6 ++-- .../Apache.Arrow.Tests/DurationArrayTests.cs | 2 +- 9 files changed, 47 insertions(+), 40 deletions(-) create mode 100644 csharp/feather.png diff --git a/csharp/Directory.Build.props b/csharp/Directory.Build.props index f6d42241f95cf..3c06d3cd31d90 100644 --- a/csharp/Directory.Build.props +++ b/csharp/Directory.Build.props @@ -37,12 +37,13 @@ latest true $(CSharpDir)ApacheArrow.snk + true The Apache Software Foundation - https://www.apache.org/images/feather.png + feather.png LICENSE.txt https://arrow.apache.org/ @@ -55,6 +56,7 @@ + diff --git a/csharp/feather.png b/csharp/feather.png new file mode 100644 index 0000000000000000000000000000000000000000..7b596e6683ddbb083f672c3b5d9270a9a3035ec0 GIT binary patch literal 40042 zcmaHRQ+Q_Wl6Gv{Hs9E`ZR3q?cG$7)bdrucw(WFm+qOFS``fek%pCmJtfP8r)m?Y- zWUVJsSy2iR4i63l1O!n=T3i(b1k~)W><9z(S5kCJZ2fz{b(7F`Q**R%^E7b*f{2(q zngU5=>`g3zsz4KSZ|5-}KL`kTv$eXmo3?^HubHDglgU3eOkVa*f6*Wy{6bz%CT6xk zHxg5zrL}_q*>y(`8Hu&I0GSqt0zkn@3}|I7?c)Me^HEec^RYGKF((reB;ohs{p-LU z=w?FVWpC%;%IhUS_AkG@f8~GF%w#10GI6sNAp5VNv=x*|#2j6KBpgftMl%2>3kfGX z6AL?lhnR50C+jLcmXUV|9;5+vbmUB@T!VS{+sLXNr24C&CQ9I znc36RlgX2f$*T1Y?-Bf}96UP77 z+Ev}#3COGpbaixhG5gyO3-bSf|JLsRCi;(|x!Hf)Ik~&o{VPv%GiIP2&>raE=K41% zmVZU`H*#KaM>F?-v@+rXWPb^m%&pCN+1XfGI5=6^c({02Sy&`kL_|5+MYzSqSy=#_ zTz}vF!}AYT#=+Ii#K8>sAFTENU|Ii9EU%ag(8SHrMcvWS?mwNDv9oh`b#wVgO(Leo z!Nl@U1OAIg9Oz>00W_C%akMA-R|R>k{};2n`#-dtCMEzjP98>XASZwkV8YJD$Zcw2 z!N~nL7c&4iy9I|CknG=J^Z%9dzrbu}JOB%Gb0DLcDZrGG6Tr^KXaWFmGXgl+xJ=p2 zElezcJY>v&!!bAGb+dM}1O9usMC{!DE!kQB(_mgZ69>z`Wn?r5T9~-oxseH)xLO$5 zIa)fBh$-mEk?4szI(PtG+<@i={LKH%lAZNm{J*6Ca>e|=2>zx1U&Hqw%fA!-kNDrK z^!M=J%LsJ%yMZo$SFIN2s1F1rLsmvyMBQs`LBn63Km+Bg>wA~u)y@ldUy&3Kgg8nH za-=Mz?I50*JtJup7R)1ZQJHmkSKKF@+pQUJ`;XTr~8$B3!Xv9<2C-n&@}s7Mm)74FgsQqJ-AUAN(IOJ7Urxtv~s_KUyXB zJdcm5Gw(Qc4YyUr&*nW~@cO_*(m@$-lsgC`?wogMgL6d&yf{2Ko86383!4I0e25LtZ?ZH)3dhc@p!=M&vkCC)eq z$yd``MY>#%6OV0b<9hDxd+ydM87V7Y&*_ zgwDQ$aJ>>{6k&{d=$Rm^?s`}d%s1lM+i}%Ha@?UHQXYfbL4;!p`I4&txaWe4sWo+1yJn^g11Gu@?5^pChuK! z^5;*^z@8Vtjh-sw+Bu#T6TDrT;>BXn_t+_@_C-0(y$+*l`-gh~TDS6KUz>e{FfQiK z8;&{>(nHB19;-*s;%BLFweGweIXbxX!kdL5tm|*bZ&xEIH>T*iV*RL10OU$40!a%f z|2GYsoeOD<>_xd?XHDE_t7&c<%j_DTY8`*-W>?)tE4ACkDtMb7QCI%LzbyXP%q6k3aQJ6Ts6VN5L`W{12bC>|X6)7lwZ1}$f4SX0@Reb*x(9wV>1K_S?$&}fc)}Bu^j4qtl z3Jyf> zvDx*w>Q4FHN*Si?c|w5anCDpVc;nc6>bmA-TZJe{>DuT4`wRO|s?v&1=Z4Is5%w#Y zz|N}vVbjX>W6zxcxEd=G2jx@(ckE-E&%r|r?I&Y+svHZyG>cl)e5sl>K)2b(th~d8 z+y}aS7M%}1GIn3%J3P4x>^|hW+V4pULuuWD1Jzo6n;u(y<&6Y_xS>JmeJd(845phM zRd^=~OB7V99dic2jQd6dAnvRc3T$8}8PlY*aT8H*Gq2C=7tf4O-058aLg0WDM ziu=43hTpRTzr&p*s=j-OB<%Mj;tzB`GDTSKm9<&IyuQBm?j{S^I%Q<~D@nEGAp3C#qO2U{1a}?6?#;~NqjudY1#r}fhamxY( z^DaT+vqVZn1h*N*KFy-(lY-v*g%DOf`rjzJ(K^slu;zZP&>5g~RFk>Oh+B$~=F&;= z+X)B%uvE1U89LNUP*9xs<6YE4E~iEDHPUY21e5w00- znM%3RXo4q?8vY!gC-sM|vbN}6UvDI{cmHd2SKhj5d59}gD|%=B3-i{VbKy-?8(rLK z5JW=U3A6!R1`@sy#0{N|Ph;rC^xF<^-jrXSnig1N=4?)t7vlpVlN>)z72z|B3BZ;^ z`Pw~r47lTeixEfQhmo_79Q>}gnlpyrpk}UAHFA3B886Z@0n_b`G;B|Bw6-U11s6<+ znFt^sxr)(+j(mfVjQl}|A2cX|aJoD?b{>gbpr#fK*FVQp42o@`hl*uc?a#JdF|bT* z9};}m|9t3u@y&-rQ)qV1Ne+z)-D_jwmCN&WF!!z7WiampHUsl^LLEVI&hHw_K=Myn zZW2=@&S~FJi?ro}!UC0YlXnJk;kK0+?&*+7E@ZP6ipeCooF2$2NOY7b{g$8;)+5LH zjA*KdL>_F50kfl|U7}O#ueZ^*lAv@%uY-iZVHdQ9ez3boyoFEWu%leC$sP`*#XMju zH(xk9=*Ky*-b6Q%=W)r?mfqI}o_?GY~T}IFgXKf-{3SEI)+(|T(^J!5!Y^2jG^4Zns9svZ^@}UM-T#W; zI)oD5=1|J1icr+SzP6@-OI|o9{_VAp!EcLXt`f=+sSPj-R?f+@{dGY%Qh9f&Pps#L zh{Pd`rar(^6pfLDm{ed={R+d0K!juxfuI%>%Ci0WK&PN1dav~-o$VDF)7q%l-_Y@! zJMi$Ld)a7w0^jIct+m!gt$JrZm!3AeN82tt*hWtis<%h^CWI{vwCD(nro1??7Hj@H zx3)Sau!K{d#gzL*?hy~ehD+q9f0v#XiohZ$sca|;ZGYPf`K}P+-67ZE!2!kha4Bg4 zGyg@I>1bz2$(QpFOs8<5b1la(Z95W~K@54AlwK2wGo%@%aEBW+l-NBO6^1;y3bZag*e{6P>fzF3-u zSehptmY80xL=-ELpGa<2xd4V)FXmeG$`p+82NW<(YI8b?d|U-DL0=qTSLh7&F)avh zXpjYo;o#jpbsY^GBneAx zmpDtmx*mk&eoHd)naMwe2}D3STW&~?g@fxm3HO*~aDwd&MY!zue2J3pke;z%GT zi{ZUwfAwV_$375CrVEht$&b}(y3q;E%^ z1fUjS_NIjCA#y{DIEbRo`h@$&C(yvxW&7%8-miaxN>fVg&b6b8K%YBvOKZ0WZ}OuQ zR_`~3j`3%(H0tt?V;z(tOI&?YO|oq`(TJ?j0yN>SAlhs=gT8JN!C&Ee!&!8OF-8*W zr{&VO1<;B+z76@ON%sn(NtS|c$AQnlczut{)r56Ef*}e8m=c2~Ll6T6lGScSkcq$g zc!JNxmik|2)q)8|%rTm34sqWsnU9KI78){eIIwpyT8p~vy~K(7CQ#seseD}=Pi1ZC zht$dOUi+O}$%!S|A+P(E3}8ylSztqDenprO;eu{eAAz8`q8q!&Ho{Hb$PETa z@$EQ`YB!bO!NY&H^7w9&7%4mraToJshBVHI_tbMU2ru)|>q!Yw#=r5kCS$9)f9q04`eqi41-+v zN~o|Cd%YL?w4sO!sqKEZbh9-C`4re*(_*d`rI$`gO>| z*cFc;4AxsI#jK7Cbb6T6)W8G_3;VKfqJz9n)JC!zEgWUh_b3A3J}~8>J9p7FZMu)^ zSnAR{g<#%ew|l)Bg1N@btHYNP_&%2kMo)pq1sKws{tdyo?;*wd@+9-&+rSy&W&3D&Ai#rVXaqB51^-0E8@t(|r_y~)ai*y6WPrsl z2-nR7>3z>;qTsL4YLc5RNExOgLd}Nzbzztd3%{kv(xa>r0Hp&v`|>tm)@l?j9-p!h zji#b@-S>OKkD$KE)9ZHfMqE+>h33|EU+(AlcUN9sHgRrq)FurJu+13Bfq2*!)uYJX zjWu0{+tLBN-S80EHNls}jSExKq4TsNFV799C4Au#&hOAa(CLz%Vha!Z8(?eodu#rF zj3CNLLLi1T9+Gr5bd{4EL$r@K7_l*S&-c*GIED6{wQOdUU*?5gXI8uva8k-Pf6%R! zk&#(dEWHnS4! z5y-&0QjS%kHBWwp)}W=aYiR`1mmGcZivRgt%F9=AN+}_MBmoJj)$!m;wZlG>j7hJ_ zlmLym<@YAcKdj&n3!m?o5sm0Ai$nJm1nwx($$`ln?OQOm6->k#SIlf3C=8j)zWhu1 zK2#_%uvaZ=+0`XM+t62w1F7R^S8453maE(L@kU`#cId`YK*QOv)1lZ)#VH6@ zg`}8uOFe%`Fx5^KBLk081CafignT4!!5sSLww-P=k^-kgwia;fW8ycL8o13dDwKM| z<9c_1eb8aTG+}IetgI`Mn<}B-3l$Yp`$|?Y4UmFffR@*_m(y=ME5`fj>et>ZiAL?* zJgXpPJ`YsTyo^b=DbPVeX=D7XmyTV>9y(rMI`^RitSz_jG?)wu8v*AFfhph%GiAyl z;^Jz7_NW7MM`D;BMpIH)LD*sUCxc2gMLvBP8&1*1v@jB2nPtben2shw*@XmV<+Jk?L3Mj3pp-kCHI4aN1GwKE_;F|J2oG*HOn!9< z8QEU@jocR*7qY1m+C}LC-kV{FzQeFkG*&w#VCg~_emf$+;Ubk2`f^0;v8)ZN}vDK<3sDYJ$?}7O1;E$EcqrQP3^^Kt)+x>Q`c|uw0bMwh+ zu=hqzcJOPr<%u+g_{}n zDZ}>W!Cgw408~tfmUAJ+)GZVn%RiUEyQgBaT+RdAG@}pKJ2C$9 zgMPp-mSsuqS_y_eQx&W<54s+U0@1EJ;nhyK;B&d9K3S{W! zeFzt1k?7;{)-Od}Rlx={t`^nBH3h-1cgR0|edA0wsO7+QG~rM7~%)Kaup_IGXxTBP`JF zMeE#KKJ`&7whBoDscXlEYtL)KdNS$30`CgL6_jY-0B6WAo*v@*5v(Yyar=IAzOPN% z`7$R4Uq9E4%@$cLv6ouEN!?VO5=f{TOW%+}#< zPq|wDag15cS7MnO=Xz>&6KG<6t~Ba?>sT@FyNlY7;`n+6pMJBSl@aUQ za6n9`nB12~3b2^EB4JKaJ{$gQa6B_%U`VN6$ZQ$?|H*i z7K?}GL1Z}~pJ8|h&XSGX-#s+*Xtl1mOI-rxi;-^!$-wf6jumuTEh=AhJY8K~Ngbk* z`VrLFK=VcfM+7f?>2M>ec(ehv=TlE9qm#AJ@_lbnnpfN(GQ0s}A%y50;#lqNhxKHshYvm`{&B2;Q{4Vb* zlB^SOTix5vSX6g;&d#8X(pRxCf`o1MU~JUc{)w@-6lVtw4ijLmMJ>^J-X0wXCal#f zCMv3r9bd+mooN_iY{`*@mCCBTcn_1Iz`yr&U>dd)g6tnv%UVdCMrDy~eHQP&C(3{d zHba-lGEus$T)Bj@WK7OfCQ~BLR_9nz{sSxn{Yf?^At9*or%A8{gsuc@Yqrfw;7r$0 zqe}TQjvB5jEO90?S-&^j8Gg@@wnRWql9h2iSK)XL^qXd=WXEpW*R(adD2uxtjNb;q z&2!Oe2B%yHGuw!7f&&7 zGOT2*ZPJI(6GFHa4lp%--HD9ZI*Ivl8}|aGx$e6sw$aO4$*ee0bpp@0wKG>Ygg40G zo7)O=KUA>7bg(a>LV`DHv~&lyzTm%6ObpInzDxfL>k-(WwkeDGK|zFL%wXmdWh=`} zWjfY)uuIz!_BgH4T6dk~YtZ}V89tW?=6v`hhW}&oGXy({jd~M%6B0;mh2B`^@L3g}Z}<#- zd<7ttl{#x@$1A}S6F!tLCNk@|^eEGS*dKum!KcU+Y_G7l)CVD}tP0uo8q}tZWwk z$-zo7LE7}p!!A?EJXGYO&^+X`Gl9MMR|w+v%;fAYuxe1J%u&V^aurj*hYdxX$|uLI zh#?nwbRps%Udvix8XUbEY~#DfNa;(FY|{}2_|B({FQZJn90w=|r|?F%NM;=S^X|pM zkiXEIQgit{Q$Ek&3Yorb+zFn5;w!6Y=GEn`l11H`HvLHQ#AyC6rJAJqTW*~hz<-a1%*f&jEwM; z1fh<^VQdJ=`F`7vBAR+`g!^t{2WWe+L*}1O6X2sR#b=w!ubB?QCz`>G>CkEIZGwe~xt9A2-nWt@y zHDeFVHx5Q&YFhm7_3vLB;S5F{GCt;K^WL)Sa-w`Arf(_bU84dl1zP=S>m0CZwr%%O z)!$WC;!A{ZdTHY6gW9Y&^*qKFd|rNx^yVQ9UOSqELk{z@sXkv5x10@ZMz>TOeq$hK zBj48GVj;7bpitqg`ss>IPY%0;xro``+S8jBqf*!XwwB?fVy?%XGDRAgkL3x+5qPjYw zHR2&Q>N0-i2$DrsW_FS9H+P=6%+#n(MmfdSvq)CioB&5L~2szx7Xm6J_%j7fiwQaN3(NFiUpS!c1m5 z2?d;#x8&_QxuDJ#w)zPZFO+*=Y#Oc%NBQVQS9lZe?4vmGA$Z2lfI`<5!~ zppzJsg%JJw$Y?C8!{K)pv*S%pkG9>?&_LKW@vr4`NCU5rRkpl4Y8R(-3^>vJ13LE28Li8})!Q;Zr5DYL6<*>Acsg@t%tFGuX%UAgJ2+w!>vmoGS3fTq&ptcB~n@ZpMmSNA=#IX^+bIP{*K+( zU>=C)!l;_`9}&@FN1GN;odjH<2IBD(#+GwF#e&2WXho!6EW*AvJhj+gZcq);YyBg7 zBjvJP>7lpSN+my!^N5$=n)pCQ8Rb1YzOh~PJhePOSJ-$nwv_$a-z9!#zD`aF41q@4 zQ1^wfk*!}uTcKQ1gK@tuA%c!TB-nvUZ?q6*2+M_4SIP`)rGf4E>^dYFZhKbY;WAjC zb%KHa&W(Fgh;ZTQM5Kl&pz$M_Jz)78f+L3{U8kNV4I2VkyEM@UZ*C6=8`4O|ty{db zj!OgL6<*33e%&`mq%|#8#i{mhm;}QMZ)>?4Pm-$q_jevQVijF8POhZHJr77CK!s_zI>UJkX;!QO=V-%&~n}E1;(sQnGqv1)A)zXR)zp z;}`0xbUPi9^;K%I0bRonQ4BpL%T-s&))G%o<7mbUf6!%;amsmnsltUYx1`heYGSCs z`}WB9g%)i1rysu$s35bc7E8*zhgFO-XOfSpxTNVZdAPMhi~16>2gzGMF0*b`@2}z{ zM^^%TeR+%LNX86(xmM=C)W(UtyXQRe?iZg34GDyR!)LLTY2}eh(6K}ToQ&SrrX9sk zW_iAG0^v&zMFHZfqLQQ5c^}P2GkHVnt>GirD6O{}_Q1Fsu$MX0Yq>2}J=#k1jeCk|L!JR5wQBieBGQb+B#?|5L(FLAw8u$e>F ziv6NkdxetkSWtJ*c)%Xn|KKvPBJ9*jVDwul?}=@v(DZn+2WVBPIUHy7L6EKmhI)Ks z)o@;>QPmH5o;DQUX=B=$&{ka7Nm+O0trh2w6DbmH#lH15ve;HvUM567NSl(3#_Gb- z($=P-3qNkLU#FYO=es_vkBX`>7;!VPZvov_tcQ#Zlu6O@4axCNw%m6M9aAAy0GZm) zWd}QgSg1WV?x|pFWIY zsDkYFqdosLS9(V3rW5umfgFshBq@gF}Y&s7(|elaz0EFW%WP}9 zvGSZc^PL!;{Suznrg4;b3W~NN7iC9V?wGC5__E&V=32A(t@?81-95$GC^RYB_OpKT z$f+sfmg9oqQex!eBr$t@ToA$)v$YAnpbj)2Ibn)O8@pfPjR)n@9y+ka)0W?pPd2`3 zHar(us`7@d%o*U&LxJSQ*s>9QW=ZrrNtSwL7pkQ$Ey#4E!xx-V8k?Fz9G@^b=nft<831cR<#sc6| z(Fcl1hhV9uqN^68KO!Q%3_e1>=c?Foi5O`N7e7)BS5cpv0>`TCQlY#-)wMc_WnTQ3 zl5x&vlbQ1+)G1R{B6f)cm@M&Xgd(Ii_ZJg)Qhc==uJ05cIYT?#O#+pAfM+foTUW*) zSNmLP;9TOw{l#0)CB#`eruySHIs>q7p4Uv49V!$y-@9-&DFxy;k`$uSsr`|7Q(nc?wSplC07z19)qbRm}WhEW^?9Pp3fO4Gv(^^57))Z+FZ z*Q8&fgMa;ct3{esfiB1;0umuj@^~ZZ8REfggK%NK07moKjN~|p#uvj(ShVmu z*TF`=rsVYP-`1yfI}d(K?dOpg6Z2?AhJqLY6e4Mj6)BdzjIFGwrIBEt?5*`x=ebsD zss>1A^RLkX$EWZ&oxZ_v7QFe4hW#WRh6VI45gr_%hOkb>CJmLvY0NQlBNIOxa8Lnf zF5${&D&v3dVTvf_^8vN^~2uI4PruhtvD(U!f=ZqaFXV2-Ut`qdi{Iok;-|?g`+_e}thTd*? zn-?PKm*sM>l4;_Jke~HkjnLZ*7P-~-CYkL3HnpZo?nKSRL3 zqy~r)PoEPzVHEUKhrr#5)*+d!P2`T1JszlRL7X_smuFmT=@F7P^TE$1&k~fw2IQY@ zMQ^7AI|4B{bz4utR(jQ+8_K6*C859sGMS*4@+FN(rBwjHXgHZy^Kt^buWvbPtu(Rt z$y-u{*r;YKW)UZqF3yX%(K0udoA9;Xm(qC$Tjth-aes~hwibR3$6ueR=D&0Rq06WT zKuBdrhrvX2Ozvd8kR%uM&yT()Y`vbUKtDv|k&QAU%0eZjO@X^)Tz^TKScFMVRQevi z*fwx8k~;sARg=KryUiY!`yISoqS>Ts(Ug-xuSUPm;xl7eUJ#}H|C2A-XfMJ zBxKB$g*aFwCP?f2G$Me#F zyaNgec6@!UcOay~mk0q37Gx~OG2>t5)nggssX`*BP{golm9jf;?ZkjygSbiR8c{l< z3^mjftbuuA{r2yJB+K*|8-jtS^g5!1hDO{tkp}DK%ZBSoVM4#JkRPGV596;12JcF* z)DkAPl7l)2tt9nJ11%oQE@Y>MGCT2J=5)z^`FLCvn~jA>hqaZB+9Mw56_7H*dmql0 z_GCQu4$i7DqqJgI^TfPvzl~ki*0h~FLsLCkKEwp&g9J2Z$jm)6j$c#XbLRsKZ{a+8#w9t$q>{YP8i?fJ%N^Q5AWu!{o}I4 z4G)x97Mo=r_m_E1VybGPfN!xZ!U8QhIG^G1=x_kOj|=Ys*f$L`9x@2W!>XX?&Viju z+e)TxSx0qao+xZ>DU7TiAy4T=C^B2fH!UPcdFG6!0|f+@w?%)zHvGd!(KHhqB`qo6 z_=YTwyG0RA5eya{b>cY8jW^V}Qq(hnC|A>mwv`E?T`C2sj^Wf{ zdfzBgIt3uZ->nVO2Qa17fmxLa1Vb19pwdntEQ)(^4}~tiG`8$+A{gW^Pu4xg+(oeP zLQ>%a1$`;&0#RBKSmZpx#ZQRe9q)H#QYLoB*n6GcZ~eZ1{goXngFHkhmNf|3U)V!r zI;Yo6($&HPuM~gEWTX|-f@MUlgj#>|2<{@VbklhJWIUIE(+z`4Imn}@8WJz4gns2k zvfYc}H3X;DqG|=|+;SnBS2Bu&o3hRPnY7%hl2|jdCklN8G_i8Rhe6meQ=Pxn=z-Aj zX%I4TUgKe?NTxjHQ2@_(yEzI>FMeIxwcwv;j++0WQEqwr(F4W=o}S9FW;#hYSy2T{O=Oz5t_tqDh zRA87WzsX6l?7E~Ytm!CFgVq%Rn?-M1t-9I`DNACq4r`oH&pLWd-=eHvzMz4wsoXBf z<@Rigw0`SuKqAJM29Ak{%m0eV&fqHn;UreaD+x3&jUqYaJYdtA;5q(c)HQ{WM|Wzz zIU)EZ9J83d()L1*wr|J)@gO^MP$f;RC=;W_=*)X?-(Hhm%e1%#DK10ru+nD#9xSTX z>5p+Jl+WHP_LGT($GtuQM#bYwumjAai&i;%Bsj=I)2k(1_vixQtGXDaOs(B_8y+kgH3UdiT_M3a6H#(LUBtwkQA%L;_p+ze<@m%f9N6VnfP0qH-eBaU?MLS#!sud;EXh|+rW|EiAbStg z#vto+k9cHsETOiJAx3II9s5B55`M?CvkzrkwTfzGao1)3pXTs@#gnVPX(Aq@*f~l3 z;m7Em-ome!>v!gzz4T(7J@h~bBVwd>XG&@oAMJ%F(9Jb|8R?JpJot_^EZ#>~a^ASY z_dEceQ0DA%sgK<@QiFcr*2|yQ-%oJ^o?GKlj@62rR5WEfygQwlg(Z0H^xSuZvflc( zd_L?v_8p06@AUK(z#;qf=A{i4kAtPV23|?EcVEKy`-!X*7^cE=NS<>1T-)aE!&qK} zc^_Ft*0aEpw1%dy?IH`?@n1_WS}9-XfnDc;@rAlQmd(_w3d}!IRHX~PD?^II{J85C ztrMh=!i4uwQ5LWJ0rvI+pBA1{YC04a3=eS^mU ze3{hN`5)-JkA&DT9t4p~n%&$aF5OR^e-U+cpEh@_|CG?Y9L>HKx>|m7ozK|jqb#Y)U02nw+hmcUK~tIf{B5O`DXxgofRoe=z`lX6T=b^v`DvcnvY)W8Wc3MF-xYAWLdo z7xg+5eo95fHwL|MqtM4(fU`PgbJ@+b>Q{qPua3}``i5% ziMokG>$^R2f83m!4u;`4<~^=m3^iUxO~NMCI1I=yTkuz%_lfXNyMjDhvFe>wvS@zO znjHDF<>g(SK{Jh=UClh4oQGsHrX}bpf3t@-Q1IP=?DuL>(i%_>3Y1>962F_d;%XM^ zriUiNpB@vx59TYD>*Hmgv5-cQcdYwclQ|tGu-wyB(&sv8zi1E5`|;JjvPbl*C$>+? zh)JZV$>g%d$d5gA&X+lJ6{Yb)5A>R;LJ&jTom1qIzJYf*78Bf(bxtwN}MHMaNdFJM1>CmV9BG17<-)@U6~`wnsjPX;}#0qbg5wr>dsh!q~wgKJ_Nb z@UtK;>}Q{56>6h$ucUD`DtjyY4`S#BN8Ca=Npf&{Xn+9%uRy-?sd644PU|(MM?)Y0 zo{28IIQ4wMI|IhMqYW$zPT(qWZQ>k)v6BgO#Aeg0qzojVz1VWuA|~nVEhZ7`d7Nke z^{)BJ2&9gfKAm8C+*>N{fAm4^)eAH&CSIbypD6VG7X8GDyEIt z{a*IBnZBN?1i_)WBGPc&;@LumG;OgN4B5?%--c|b;E6KlPpXCa>}i-g6=IhOHKX2j z#M5;TrnLy1I<&ttROyeq2f4BlEqd6OTa~8(4d^ur^O^dTv?lsYl248>891Vj$Odfz=vJi_Hg|)u|r*79VStQd(uH%)x-7ygxYgEdH|JGEu5jx zPhR-CH}2w!PVCq#S&DlbR7Ri@>^XH%HTuUTd3qeEcfB@#De@cHAy*A@gfRj9h=Vq3f=;3+-_)AV+)y z0K!X1t;RFlS*$&~rTKaE^o(5>%Yu$A>#zF?%Ld_s{_l5sZcd$HM?LpTlZ`gG;AKM# zkqzWQEXwGUlwm>}J{dk4hJ?O0vu?1G zQJ%&gMEJDDWnx`l(+p0~BU7{#Ur(v>IKycQT1QsQyIU1{hzt^T+91CAsdF5c4>cZE z%BRTX(M{SJ7i6UdF-6Rh1c_|Egys z_Qwjzz>D6RcFfP@U@8`SCy%@&oA}sXZ?e_$45u1RX!&#Os}mT0{r9dr*R#}I}=IsKq+zgt92OY{2km=y|m{zEiZ_?qvP(8k;Q`dQ1Og8w)DTLDYYXT`&Lx3%dW!i{AC>WSECz^6}ycbMAVm6R|T`+P}iF!`g?eu)L zIb_mDO1{rUIrX{q1kT)>vfcRH*j*X$qELHMsD$xs=Un>(AiO-Kq$Ect=2*`(pgiHk z^P;BByQVQDz~l867vxc+9po{*py^Nw85h!uqT+HSmeR#_2M%Vg=F$9RT7$Wnt^dK% za)yWk;QIl#yn@Dkqk+%TTqE0B3uwZMH_VCLK_N$magATS9kTRp9mENXabk-Gu-nYq zW=jq(Q>JRW{qWi?2?=+w4!O*SFg-)?`Zk-c2HX1H?9hLuAXF{jW2`g<+@}_GL=e`N z=NAlcg_7KOBWd>gQ{&wS`nEg@@A%Lc$}fT*0at(vWzXG$s5b;swVcta(C65+PnhbJ zHrRax@hTGg!R;5uCg@?P@kM$C+g__7+={CpZ83Q@Kbg-}J+Qwv6}8&lc}1M1>cA+8 zut~z3a9Y~{FQOa^tOi_+WM>IhOx{%G6Kh0Bbrg@bSz@7lGf;ti=7nZ@3>~MODvQb1 zQ4Sdx;H6Gm&>*GeB@ja+KLlI0TU|09Y95E$;HV|9nNx(UStcm&2PbxT&u4QKA}67G zy@{XJoXUINNS3N;H&yr0&~R^FMr-g`oTF?#O|)!n+xH}9?A>8mZa6lyd$$2i1vZ7B z%5%m4(eWx6qLQEPy7}fpD7hSXMKHrIXo_W;m~6Mt?>BOG6*s>op2)rIcHNn8z6|ML`(FZRULzUEyo4Px?1qElL_2W zq8}e&TRp9hPEx7fV7iIAWb$+W3Fi2jBh^Q)akcfjeY3$$E%NasWhBE!)RE8DOPL;D z)nj=UGqarUgX1a00d6=v$&reAQ;%*98{M1rd^~j%wdJ`wLRbBEyHfX*g<>a6SigF? zU7Q##&!xo1Vs)`Oh6gM{-TG{drG4034LLUxXHT%Jq#f;%ch~bD6^-2Kgs}n3?U4RX}0xhtF>SdEh z#^!c|gok$J;>BYfm~0W`l4%J{i^u8lVN*3@`f6O38H>!s5NxYDhI#7_pY>K?|8WqL zj36_>j!DU{Uwhkx{WTTki4{fpPu@FyHbk$8C39xXuCJ?_bIYU`^J&D|mv6s9EG+)` z!w*es(65AwNzc_3;#N&Z)yz@$y3=t`&u!j<8-fOLK$DSUlVCuz&dlpz7dZ2<=GcnU z_RaA3x1&a>xp-p=&p#7r>s`f=Omgvh0$f4k&lsaiQB!uLqnEP4u&-qp2WX+I5SMw4 zs_dZU9iall4H>Gjy4&kQc4LETEkKV~>i6SyeSg*G&esk8j#PjNVxsDl#C5bXUV4tU z1P^%PT5Jb&z;%W^%n`Go_bQneQOX<~)bI`xC<{EiLC*R}rq^9QEW>&3HQ zctt5yov+bo?lQ95E1fExPDGHmS+0)G-AQFz0RG4T8tJyl?xZxJN6@1}7s%_`W-tIo zM&HkONWK^3FMTRcyF$k{zW7?4sVhuOQ&WIQV4;jgDr68-t@Jca1n(u}1}sX7F2*th z$CzHH*Kwj7jZ9@Gjulmn_)&wB~kHYK8s zgZ3t=BfCg_Zb>a#`VsvByzWhhTPy48N~sy|8=ZK8F?lolE=kK4FHo*^VJRx`5CglW zhJClztrQK{W9qg;xT{w(;vxd^%y}6Q25E8xqc#Ty0blJe!^>aqgRRL;*^zU0HjM5M zexACX>lZFuP)$|^VPWBz`NYIII=y~9XElvRA7C;fzFpM&6%`e38sd3JLiek5OlnB% zq=kqAH#*_obQ(dm`|@Zt+>0Z`?cktxd$pXMZ2=;mJ4MTW#ketufpKtXO9u%_j^E03 zlsWhE#Xm^&8MROcOR_N;RwV!=ATMe72>m1PLOp3^w(rYz@!S$YFUYxOv#zt+HBJEj z$*~y|*)~m*>^w@KEeVK4BD{4~RxQ4c} zZJz$79HY<$5$toXNzpJGyi*OLXR14?AD)XlfEzCCh@bXVHhj0WB>7wz8}hqBta#TI z5~h4cGP!$pfC2dbeB12az5Clv{fB}q2pG@{7>Jqf^>FqhMx|S%j@=Ro&_4Ge4royW zy~YEJ%gF)lW9J}t)?|;mmR8Sm#Vsi}av=4&ke;r+ZWCkkz1HP?p|*uB8A1J6aMc|_ zjD0ggF5v~w>6eF#4%qZDsxf*V^(qEsbI|)bR<6+`GR9Q~9e>)ON4_QSnF*s=DqzwGU( z-I#M-Pkdw?!0o-_0*s-Sz25pk329u`tkTpqe4(v7E2nn-ZV)q2*83GbKEyNj)1LLE z0GiqRg*1!$zM>M2ZTY<*1WSkY@mZ16bI zi2;jk;+-EIcTK|v$I}T(KCF?A4W4yvz^nR!vaIm3lIikc9y5X^6AP^6!Ae5C7lOu% zYePiz3%r{B4j21Pvbl}dt3i$6M2~)zcrB(g#3~o}A$fOk8AeMnxK-)^16`PGUAET} zI?2<%;QsdJh9$*&!~PN=_JPtrC}ixsk}yU9R%TEmt4rqgUAuM_s;uX9mQP8N7CxUA zFwUOGJ}*2?Xj>H_# zAw0oAr`1U8cStSiJ4*dU#v+wnd<_(EGE(p`MkfZ&6-zOtgB^mMDSZ~3Y*b{>x zx+Heduw%{kFS5UZCJoR9NZ<9EJ2iB8T$nVj>?h%z2k|KwI?oT-PAZRod|~It(6ATZ z%*gmD&{{cClMr}?WMv1a-h;Izbr@{7aMQ)Vdn5P>;0(=93Y{p6AS#kWLcF^k@d#iC z+KMQTRZY{8^q^o-wcJ9BLAo0)PDrGu{msz#*f+q=WX0+B9M*YivD~`sg!Si{vT;Ca z;GHH8_PaWVUWs}o%Tezx!ui1?VO7R$_;S)qZ7Xn{`9k(*i|jV<r*1|Z zx$eVwy|xyTmF>f#!8gWwcIWHG0$w*aoAD|h+Fl0sawb-mMik+qzU?qHGX_^0+p(pq zBp5V!@0X)^dD%V+3UB$VfY|EK%oNBG8z0xm1TFAjZti--Caqs&E3uDDOSl=0Ui{ct zj3Hg47@)lvToooWv{%lt5)&PDm7*JCIhy+s?K*}I?6w>)=ozb2sfLI2xXDw5EG8w$ z6p?Fe1;{nJV9w$`zn>7VL$%ZGY)VJCCJ>@!2!H*vZ9UVo0jJ&B;%mP+cj6cSp2a3P zU?j#aVr(8{NS0G~g3Qd@8rS1*!IyElq2m{+6U*F3c=tNpb?h2Fml1VREg}UIqVx$I zoCIFFX}#;*jEA+|m=g923_a4DR78U?iG1%6Ncqc}S+sxTu#~1S}*5OhifrGGIa3O*L z&}U@$s(UwX47#8tSQJ`->sHcuw2+sv=(i1}^dyCWK5=Y9r|>IV{VfzIQkQt;8F1^Q z9>!QhD#Eo92-cbrA_O6iT>$35UMKWAQg?dKz<~&4dX^iSE2iTc8aLwV;|44FJf0uZ zRH~!>j!!qvu{Nq>r1OPFtclLy`%@PtBR*iV=RGN*#mrpkHRRpqCJmO+q~EPBT;>kt z-^)8&;@JD~I-a9`yRsA^sb<`GaT*7^YG|&Qns*ub0qN>&C0)Dr_u}o0gZSV1^5dsD z1!ys$gSR}oe_Y7ldqstSyq?z_szOd^C8hyRYel_^3b#1}*O%_h;^}c9a0| zCXBu;RsCJ$TQ~0R@2K;o$2kMyld&>2ny>djM)?NG;pIS{xB3w7%u13$eWV&Wod!W- zLEy}_YjA%-0QY43MpK6oiqEoaST3N*9Kmn5s);#W@y?^@tBF+on4c97;;w+cD$qCB zx8v`cM0NYEp(!d;(>_15uEwa~i}?HHO^1+SXs>`=g9Gkjq#HVL2~+5<#9pSaD_uor zgtd=v!5TB7wPAc>ZIU($DcUHtlVz!M3&O*${#6E^?9!?;T1YoPZw^zt;|Z{V48Zp4 z4=~A_)T-CeH`xDmT`7hK_TZTrDsp3>9#5Ke!kr`!H2P>)IWz75L_5zHZ z&<%01W?p)1@+%uK;g0Tzk2SaYoeH2&YgWuz0b)gN&|eYfzm6n^qkR8E@qr#)g)#l` z<6Rt6EiAwwAeXwL5gTN|wxea}{(b?V0FERDxqAAnX5R~JMvf*{?i7HmA6GOT;2i@# zyV;k80^AVTQw8liu05FE9LKk%{Z0$0!x&SRDoeJw3(<>dT6+G8;}={X&`SmQ7fcFV z#sV(ig|trVK@@?SJ>S8=f0aqTGaU6);Iqa}Fl&SV6o9M24wEFr=+rxDa~;J4LcB7u zR+b%8PwwB{|XEc`@gh;ZF z?Q86yYGDG}r=EOFa7D_U1?)B1*_s35)c30-Cwkf){2$VHeC% zPbFvdL*7~#7!dzVp!=jtl%XSRo@%`6jcV0v=&O(SsJcDohxcg;revWTR_$;f9GT)~@_<*O0Rs#K&he@EdNM#LkU1NAF6K>h8LGa2h0+ zh}_a+pezSHCt$z7rx=%R4E1xgXR%3Njcf?sC>w%LsV~MOVyep6vvqlRmNFtP@D7+e z8GFAhc2U?HcoT)VlZqU%=Z|7Pp4E1T&*y#))7bZDJE;;T&r^jiLZ~_>#HH1%cY$iD ziN#65^1vr0HAsN|ym14@DPX7VUiai;Xz(yO2hMo$uya2~ie}}q1Jl4;5fOIP3CD+k zJ&0Y5^h;{Ib-X*9mTgia@3^tLt9ameCR%sN!pFL7N^ZhqH}pd9PVz8-uYStM=u11c zdFni3Cxoo+lVK<9n@p<)gT+L%VZ#B z%uPTqYhYmdPExfrJ%)@nLh?{Y3wWNoaYeIw5LJ1iyr#Nvl5+UIm#Q&Lh*r1xKq_Jv zL5!NhdrET{kLa^h4gQ#C3ubD2^Sm?#nm(|THosHfjWagCIJ~*II-swb8r;x)5SJrb zF#?zHp*t4=;bTrXE{E$s#vOjlP1~1?4cN^jk}r)%>AUOo>FPFb*;k@x#I5`yppk?G zc*<%}j=Gpho!{H~bdG@~NRR6;&g3BXLGFibB^cT};RFEB`+N`XzB&uO5e7Dg3-dnS z!_UXja@)~GUu{S}>p}c0?5tOGv*uLn91|0hSW=kR?}d9qpOj^Wh$WYA#3f=lKR!DI zI|j|^tb&|m$m`R0;#O}v_>S0=~&=roc~8PPXO%9B}LktnOA zDx9xNP;=oQHSfbXO^WZ@!pq)$h>N&h9aZAXjAu0bj0{bf-+Evp)9(!-Llq*F@KgPJ zc-(Y>`u=>W99e;VSWF=$R1ch0mvw5xP zC`!Q=jHCuzK`A&nC$f>{Oiy}HAg<4IRvFW1y!OhN=CtH|~E9by4q_hzUPCJzo6y)LAS0j*?> zO%RdK@R7o}+jl`wR)9Jud_bSeH2U3QqBUdHYgE+g07VuS-~{>L?KPRaHt&bN~44MUbXp#m^&{rYlw-J;UfY665BA{~ z!+j60!41PRxwNpe2x021<>x+#j{`H&0h#8|*)riYEq^1zf^H7dA5Am|8QeQ}S4G~v zDKMssH=r*a&$|uz_}7-PE!zC1<&41+X<#uQq;$yPK)NP?zh365#fbVa6}SYDH#{BH zKzq8b2-C;10MW^S%$B9A%lMo+ANwB`n$RpXqo+3rEd!6@lsOFO&&&DbmuAaA_NC<(&)<*uc4pi@`IEm6h))9QQwD>l zYFSxX?Td?wE(kqZlawzsiD9u${fw!-ZlL$>yMHxajZEeYooj960boWD0gt`5H6n0h zi`1ZtV6H1g!K2)>c;%EXT#J(?|Lh-!aEUiu$y3Xly%=ALsVONf85FySLpBG(twzR@ zIu-C8yc(FLmQi`n>5K8SSBd!9^B26;oU8^_KAkW>z>UF7N)YJZ z_vB)-Gg93z$(GCmnW6n-YxIRlJY)Q^!kS3n=kPP6-&;E=Fw|7S{lPZr)uSlVZ z7Jpd~9|vZlM>_k>O8!~@NB2weq;bQFM;;&0acr5j2~*eX$2%i>a0Znan6LeCGv?Wu zE|he5hsly_y^$*5h*_6jA!5QMnHqu#r{oTEI$ACzWjY(4m`KkhKMy@ ze!J(oT;oDmv<~J*N|8uB9XLC>5)UUQ$@d_(`tjX`xYwK30%UePmIUYFk>o_VXF($a zo+^s&-sa|PPuorr_Ta6EH2Hlk4Y+5w_;M<5o8H3Y>?MQl(LBR< z*OlTHF}7ut8QcCT?Ge1W`Et&rWkrqX(<4R(s;wDsFsM5Eo;XB?$XR zdiYRPyI0aOGPdp6vxnPLC~gLSZ4jSE>h6TX;b*cDqt{ed^U*tXwY8DMmgf!{<_M-H9?wI?}T^KbYRUSJ*Af3O9kwZ9!4Q?6Zeb1vCKk{hepFY{~{htN#sX(m&n@$@^ z5%Zr3Zg$KAmG5`Z^OB;xezn!rk>*Ng*yw%st9pt-DwwBtti}tI2f(7!!rIV`^%M8C z0z3twW5%Z;Ho(Z4drd(#GSk8pP%&NYa$~{FeYnjNqk@?viR;!d^iGaYWd(t6;p83s z&Ot9{o6GU|J=rpd>7PbWwiS3i=J&Q7#jxaF3wY05kltZPXjkMoy1nNWeWR}F(GeFsbocnB^*k&B>3}l`?cWTn>PHDS{}`#Jk7twKh7v*pKDfz5@_i8ACY`*AU?N& z9;|?anp**f=*Rd_m@!mgdU~&xZBu``cMTsKDnO)cj(EDC|4+p&!tnu2TWf3QTGO;0 zg0^<9xBnNQz4L&NqRij;GuyJ8-aF|ep@y#XqKE|qdqY7z8=n5{r`}o5ex7>8)6-M% zZ0M;VBBD|PQUpQ?38awTdp6lkvUTSFzSDMQcLDpUx9^9S*`1x8oteyh=b7jIy}vg= z;y?+MKnawei$0d>udRq;SE0_Eq-UY@_dRv2G!@#VEwEW37V;7=YXff6Xc zI2hAWWk{_hb@?wu`7S5b2e>n)&pNoXqKTu*IPfPAlt2lTU!*c702@bYFR7eZdqqe` z{+xDX-sxx7uHRS5UHO|8z21QS$J=YT&4+8bS+nPTnw^n3KQk?Ty4Nx*1E3C+Knawe z6O4UYq3K|uq|&$#DVsfo@I$030Zn0+@n2NL4==y#p$wbdfhqIOiu6+Q8ELsAVRJY! zZsMdbt7>b$TyVjq(=NJvS?~CQ@oQGD{gTTWIeOgp9c+{S=7O{lYc3i#daLM7X9P~K zKnavU`L0DZipR2L%W3~$`x|epvWf{dS5c)*rfVqV6pK>$QHwx&Ur6dB-!+l_cPd&v zh6s$K6H+jD@g*zq@q2|*s%D$b!3Ug%#-%);laW~%8Xj@>>E~Y>dDU(AFWLB?w@`Jo zbnZz&yYe?o=JNT7uo^rynf7LdjiW%a^J)6x$2m@W1$Wlg9$+QwfSZQuEZ?g2# z`SscqCxo%6kg0t^vs(mteV=xFp&o7jj>5vrsdD~?ze|Kath#D3=mb_$}8qDRx4++lm{#m-_t~buj<=T0UOR?qxi&wxvY`@?3iW<`!xe7 zV+QLsYR&Xm{>A%PD#Q4G(x;R_36wzjabPSh_ze@~^zYfZbDq=byo9_y_gidBp$L?3 zons>}JLk=uynMUE#UgrG&b5Feq@-*m;BIhMBA%$;8QNri08eTD=k7)fW5O^>6Dfk* zqktvlT+dmtfosCuY?i2)p-|2qSXX0^&(rr?b{N7qjY3U}HU}rY7IOD}s_ntSpaIN@ ziNc)NXoQ(`{Md(#o-9EsV_@jWO8n~RB2Yk0wsqwR_Jc4UNs$oq5e|2r`8T zwgsteb;n>ecVooF=>)_YKG|5&m+NrKf+hIu-~TAwxbFSA3Prj6#*i~sCTk)*V+E0i zfHt=oyBt-xB50fk#H4O;9m6TwRKzkSd3hyd>GZFzV%);y@}9FiA0D->5dr%wQ-f$7AQ*dpSDnm{Pfdn zdb+#w$aiOahwY{6`3-MK_{*|Q$2usk?-`jhX~vxt$Qq)Ai$4Ddr<`>mzhCVH&UBib zosZ4$zEb$*`VW*ae*VtLD{qrQEUMqvET3UT$b25mT7Eu@_4X1>GvpwKf|&CFR(jpy z?!ysxE6&qqdOlZ7CEyOhiFeS1W!fB-n${>3@m7N2^=u1Xa1*GLPk3dmus^(Q4q6Uq zt$0`8ja!|mK8=Wsm}*|0NB`bLR39KJ`&IH=Ep7*fX@kt!L8iz~&RIy)TyC=T9uofs zhXR*Rpaja#1jfe!bEs%#oDj@(2rs+McHXD!*3BoXdD?f}e%^a_I-QU9Su8DM3ko8$ zva=(U3xtJ*T{RGr;)+Yld8lgtR~VKz7NL@wi1jnWFd+PRzX%$y@#B2`~cslYEwL8CwnPktq9`( zuywdhmnS#&Db@2=1m<738}JlH3ATj-(kee&VU0Ygt;a|v($k0t1-PMZM5M22bjNGj zHr(z?MlMH_e{!l(nD~6dG}eZyY*9(?XRY|VrKPUaIdJ=Tv6liRPy*#gfbns_OsxzW zjS+M*Fq?aNuGq49(+t+lPW>MHvn3%hvF(fn3qCjZ^=;U^c@xso(?dp&9us-w$dNmS zO+W9dvhC|J{>+O}xqCCR<3kXXHv=C%bC)~4O?8R-3w!k`{tFsj--p62<${yiZN06}?vMZ!=?8V^7`A`mQ&W$2U*{B~X4M z7$3)Drc8*(H^{t8bDmd}mCgJC;LHO$J13{=ybCV)kSOQRAARB(boZDM7MFxumS2q5 z*KES1%-F(*AALkt#?x{aY%PdbG>3wR3c~uq{Rr2DBGr(=1C>IwXmBSc1y5H1*aPl* zM;$ty%{a|8(F18t0c6GU{~X$Z<-v1QikZ%@b$4Qm{V47+ctKjJz~y=5=0H^HM@&0z z(2Y>Q+P4wWqojc+xH8<%-Yt#1t$g`LeqTLVr8xb#{hzW;FGE6fa@f=G?p=Ac8}*E#BO zo?(>iO>6wv9MZfs?i%FlQ;>#GaojpZ1EB)2(tJ9w^ULls+@l$-0I=6o0rJgUC(7Ag zJm5@~M`7X@kQ)nWOtgl7{i?MU``Y{Q2WJ{THkiD;!pCNKe}&@7YGJsDMWuC;=G%1V8?5z$_KoK(}w-z8NKl4&}4#_g2qR z;Txx>PMfwYtZEwVX^38DBl?{kNDrPUFH}9S3)vz5ZSEuepCLOa4~hC@8NxiQ<<|o6 zO6{ctI>E>@tG<9cV}llmLu> zL@-mInGS0GtwznFzB4H^H-GWP8Lh1?F&xWH`ysYV#{>~od=}z^ghAt}Wf31V944)Q zx24GZ9wtRzB7;f-vQARar1rZyal}@NNuj4J)Up8F{gyAWfWTE&&=P5i%X%6M8HM_OQd5WN z+7ygMxB}#mZ4c>v4Ksj`HEp;dA_2*vMnO^Y4S@oLC9C7zwpN%cY`A17Mvk$Dignm4-+beZA9UNI4a1OZ98Kyd)qDzST9hi0mO3b0&}K=6o9w4{5A;&)9|VG!y1##A4F0XoLmpg~mgrx~87B#fGAW9(-NX zRrl$kW_l8=`XPFJ21=m(Pr+C!G*xGCSj6+W6Q?iF9(O8&Bci~v9AEo;yZCtF-u7mG z+2F89gvKT!GC2cX4b>>$u>nRhZ(MFV3dW7Ye?D9bvvmmbrccJ24?p}tx6`DJfRnW$ z#UwyFOc(Fp2kBeO!#YYoOf}!)>OjAv0Xe2|vYnyj)w1WcHOl$~GZFABTKRl&qxH4T^#< z{deafJmJa`I;Cew_rC_C;hn6d`#+g)on-r_woy{R0;4w)EHS>N8VlzYs{?6es~0 z9|y?P!AQy&zASs}q;csZ3J8>=36NQS2k4Z-`N-}qXs@rrs2K}j9~!`+O>2=oaVAnn zOh89N6>1LdL`wd6==vJ}YS3w3l-6(1>+P)vI_t8?!*;dp2x^8(;b84Q*!EMagIXId zq9*CoTT#=ZY$TM5q;hqf$9vvs*?=(-=gDeXBPwPsyrMM}uUGfHjwKQ2BSI7EM?DKb zE^}0ab9UiueXam_e~pD?`hxQ?mY60Zn9(Vgk@9Z{>-xxX6pPtNWOF7zYFSveow4Je zx(?i^9pT$}NTzkLVijABXB`B<)GSu0Xd#J`&|rALX~oNpEm%I7i1A#gDs&GjZyQj? zPH)%tVZUwwnv5Vs5S2YCJC=v`Wm6~huKOT#OsWeg@Id+1z?dev89rvh%DLxVmJy$l4wmCYMJ#}qB^Um& z;vyW__Bp@&^hKAVy7T}XLxUJUZy|JsAau1f;Lz4jF>%2KFoj2LkcQxzUHtBTxccbjp*R)*hd^5jrHRRf_2;&ogL!h_;5LKEc^Zr8~6W3{Us zPdPKi4ldr=?t}zNHyLU$iHXIPT#70-S`N!oBwwQ4$3HZ5AS&C4MOg_*jxdQa(Ha4| zV*RA54OpI|{pcP}JRz{hemyMT8

CPE|koyrpjlE#{%ZeYM?h1iGXL zVEkW$G0h!(*@`=tPn>xgzj4Qo97b8uj^`NXkkx1~6uX~5($vbJ zQwrY}qSyt6o%4;?L*Y$9uZ;XqDqRZMCbC|Gvja1XV?4Hp7mR6r$9@QNXk#I~Z4hCZ zowgF3!WwzN`h?*r6z?qR9ygj{_v=0rv!Da0s(n(+ZORofQ0@oZTw59rHBq{{Po8%2!2=I7C=RwIA* z$InX59~^9)8;BeU!1$+v@VL_#UwH7GORvHw@4SI6pM3a;Sde>)b9vz#NR8(ACTp|7( zJ0d^7r{$GT-@8hedpb+@(@o$?6;zSC1M~D*{CirdGN#k7oec=rh9OO##Dkd5=Rr-b z&Tei92Zu^9nW$F!S-;zYMtc=+ck~j_{g8!%npO(s6M>mFCepCHaS=<7yA1u@Ja`bZ z31pQq(^@a}yk=-0Cd-i@YM+_H`=kCfxYc+nLZMTuW)YAQY*)FfvCwp?+WsKR9dOhm z-Z_BN5T{bn%G@1~FqH&U!F))bx)-84^WAo~4fT!=Jm5<8K$O1F3Cflsb-kt+FZZ?K zAubm=w879TQbg{F2u~xTmHeewYTNi=y-VG(NahT_4U%+Y^r9CnCTMEaIq_V76Bb6q zVoq$7B8(4;21f|nN5EUy*@LaE-3V#rkkqe59%sbF@Gu}k!&fFm%Ud)|um=0B7E}%n zVq;ThU1@heAC?IrMF3v{X;T9*{!So1^ZZK=e!Xq;Dsy)i&2wO;3FA}JGv7Jys%xhf zoHCsU?anVZqO!CY7hZD{f`}TH#)_iQUH|rLj|VeOS9^D#>4F>XxJR-zHomv&(XqqR zlpr2l`_V_uv!m1h^=FK}hJu){fM> z2u?AMB-fZpP|+0B5MWJJG@Wkj{eY;aGf_uWbgQG0pZ?HAf;6>2Xe{*1nCp(a?2x-3 zW&Im4E8+qXz^nqstQ?ZJd0+=}A{P0E*@52ZhOoo@Ayx#>mld+IAyF$wXbj*@*D)*( z@$G52ac~!|XEF%LwSIdS(uTkr*?qXtG#$e<;eL7_3bl#rM~SVL&k^n0SSaS-&{+9+ z=tneTk%eG&T(u%nM6x|f(8o6(F5^-#jty0*bOH3fzX5Ujp0avtRK{MvWL}O7}JW`^9$@(;W zW#}|W!}pHldq|RM<=lAd>pFb!b^WT&e%tMV#>oJTPXy!Tk~g+&+47*MsHnVQsi_%^ zM#D)E9XVn|M2FSBcgYo3^I)dix$(nySM)a4WZE2#f6-+|j2Mw%(}fP3JZJvfbT_ts z^1*MD!h^a>N=gojKh?cz)ynRy^o%)wXY;nw7+Q`IG!O;v8%zt9A{8>A`KsY_Q2d@f zjE8mO`S;$SUgNC7Ifi^6NJ|Qu*4M0?aenm0kd7`V)*Y5Yj0rmp?F7(G_G3S|<)QSb zsqGP_i{oF@UW7_mGm7vztr+uscwjq*h0gTrQM))hs{7Vq2?4Cu%UjkeqBI)Z?bzmS z!_3gxs?I9BpG}U*HM&tUw7nY(DTp6+9m4e?3)H;|l@pxytS5h=vg0NIZj~<;FK5Y2L z+|dQ7uReyB(tX!dR95a3Kkg*!>ZSX1{jhOcA9JS90V8M`PMp3@9z&PAlZE#TWJuF` zCsT%JFv_{iI8?fRMDjV%XJhBBF~l~EcQ*qF6(ZjhD;%%Ne#UH zmMxeVHV+O0y(Zf+8P-2!l!tebd}<2o4)R-hq4SR0x#d9bYMdRuP_3vrbw`zQM-@uh zK1>LkFGqbypO^N3hRgZ9(468k55#G_^UJO>ToihSDgl!+4cVc|F(5 zN0ux~OTeV~NI{89jgwl&%bymeR<|4LDq6AaP%lC)CO%X#g9#-!h#>xNWak`cU@d&D zhNZ5A>nrSm<6|k82yJ8!SRp?ijCKEwjUXVV0M3Lkj8hYV<{+I<({YkY=j&sVQU?t> zEna@+vHON4#rJrFn3uiX-7SqBy?2bAIQh9fUwm@^ptWBDaT^`Ck5p-|`4b!B&PxxM z8X|Y}tc!oFlwd4DQ={Wxay$9?6u6uSW%^kV<3c!5RDAzBUNMWX=7G%ckWNMrc`#;0 z_ot5}?Acimu-RRBPB=qvBUviaWlqLf6$X|CRRq_m{ie#bc`FT{KTFc=Yax=IeBcUUpwNdag0)SmS zgf;xW_p=WC$$b>3YNK%;5;291CdZZ|s)VdhsOG#@8 zH!$so4qZu^>)Jhn)i$L@Sw6ahbj|n|~kMCYr2LW^y z0rjyTybvC)1M^(RAnkQH&RPXZ=sIogY{#j*_- z=tufyv=b}s^=C0ABV8BoH>p2;;&^vx7nYi)B81T@@^g4afpDCvA(H0ySnVvs<+?ni zGodQqS%Ul!=fX?wGK3j3Fx4;|T2=%d4>&guJidzazjilcC#f{%I+2_Uf@PuT@WZll zkIXNbPJDqbJmbvZJJYBd5v9>yq(($3@3dwB&zfyGHDoRk1Ul$B;dk}ow&*2Q4$rpH zUY_gO5=q3v0DKtGXCx zz_1lqqp$^YDGJGISUq7aA#P8i{@Qh?9f?86~ zq~5M6$IXdX@Sm{?tD@4;f+OU<^KD_gsb@7#kG}L9AHUswpJ8gmS*qWlwg;X5l(42s zKS~#aC~Yi~h@zv_#OP(OdB5!-bfz&rJs6dWv&#GlmIf850501dq;7OnKw}z?6kVd< zF5-vmNBQ+l4$4!5S`o^pR>B+(IP0<9S%X`Q1m2>Jp}q-_M>?(#Z@VgyVjPW;hHMY; zpoXtbgx(fM8G6|PEYpqU=WF4yDkK|2g7V+EV@PFnSk5LNHvyA(BjO)*ji`@06{*IN zFmS@Q=*e%(Q&xhquuXK@?drx6Z4n+Hoh9^k)Oos>^LeT*19)wJ3+D8vA&VRd-ky?f z9Exq18gOO%iB*RozNhQL^SYL+198;9Oc>`X-ttz4AIQ?=La7q0$ujl+_JP8fOoC~0 zAxlHujk5-OgU^hKx$2Xq4&-W$q?*7D5o8Vskmd-iS35hA>oj5l8=^u4JzD?Lwc~7e z6q0DT6T^5K=oksu3_ps-{FFrkh&e`vaT$Se1*<`#A(i)-slVJp)H0n?7_*9qS8o`f z6@4M#eFIQzmp>2MU!e_#HA6>arA<|}1nD2!r#@bxjfUa+co@keGF8?T&U#G{9|bas zb@7TI=9Gn%AN^C)GAuol&@PK4V#t_qyV z#UO`hT7LawnxJ_GupolqsdwSA!>-~Va)>eUpfup!HG*uoqk*{Ko>Q5})*wEx6 zH^H&uc6z$&>%2hh17=nj&4!MOUd&kdc=5;#6Tdyjv_r5kY++Gt_cn18sijf(zi4gz zl3+|5_h>6u?Hg~bavpDImM_0L2SUVW6IpKX1h$CJ2-h@ka>L<+S3I3+yqP@5f^!gb z+R4;Tr!4+lQ;$2M$jLe=fSRMQCZAi}F5cp}!ZM#~j(}tfuex6i+jOdSVQ{uHzeC;PHGa+n_ocDwAiW1q^w+@rS zPA9A}coel)>@;1r$JK>h1j46>%s`|j)LM7q z!u}RMQbRc=qHz5K>-D`j5SoJVx^&M?gA?tR?h*&cksG$1=Q2#YckkrxvxD{H0A z;pyPQ_;}FL`01@cRR zF+Zjr0W5ox<*45rLY8Yf$-1&|oN@Z1LrlxAi0HjPH5hb)Y4i;fd#2Kn%*u;C)odnhmGR*(mg+_@`m2J|5^9_;I z+m6)5@}Q=Pnrem)A|ohY$VKfn7=UD9^peN;&4XVdn?T(=*^{@eLX~jqPzk^8ai&QK zX0$S>J@$a`o<8b#5r}QKmEs!xXg==RUjin^Pu}U5+~ue-rSbzFeA7aEXI?otfV0?W zRabKf$e(Gt@qUOA^MYrHO5fvodpu^32foQsjt;coq4*3W(NSQe?&97(5%p@|dLll6 zPwLvSwW<>djSLo&n?oLJg%w)(4oE+ftSVqpAjcj%jpZ0$Xww6ahO!#U7@ai+a4 z?y;aN9HLYEm$19lD2YqF4ToPhMfwILfQ=h z)-K?06<^>=E>&*)qnF%n^ze{8m5K4dkMY2oPM>j=;L4Dhz5tf?k?{cY3;Pi))(@AJ zwFm6N`~8kuq5?yap-WJGPqJCQ9N2@aba}oFnB@Hi0ZAIRfK8)*+n+JH&z6$&wf2r-pCAOfz0 z0NCj^<6OgNuSP_50w(2XEm9-p`{=~vs7p?ouS6=eTm9*lW0)`Yc2s4^Bd|{vI7CQO6;0v%Y-JtpoOZ4NVpaq)S?uNAg}AySMCJF=)9*=b z9qwf_RSH#tBZXiqxo|cZ>^NPQDbI(}VqiRAu5foF!ZpO_j*#M~C9q3Mn>%tEDB%X-zFjJ>msnNvDac^$SHsV_WKFs=Nhufb!rg4 z$Em+h?U-RisZvOH5LMdewxW~l3w`}_F5aFueIH4bIE7e*CIl>NL+#K3KH4Hl6X_Kd z!usjt;(;`c|9#Q63&RL7kC&>|AB1NN)M1V$K@}yU>=<)5Q-+wxg{pN*@D*+*lD}Mz zU~4JvbEluQ5m9Y>aM)(BV3oEP=WDVshUg$w)QA{kt>wJ@dseVvUemT>FjI>g#$+Hl z!l;h!@QU{Ewmd9O7vA1ehgQagC5v(}d3>T`!+_>CE4F;q$WLiWNeV@NZnS(H`YjF~ z(7X4x7FN}mf0P7H|MM{RV`0#Ujr8LISt?`4jvcmV)7sxYam9!wd=@YTlppw@7*~u> zLvBo%IG+$#RMyWHcjB%YGlh@}Qpq`oSBi>o(bq$K4>C`*2F*^!;!%Ago_1zS0^m{V3`G0=b5C^s!c$-4+7y9{%@!X(dA&&+m1THd3${*goE;mOy_p zJK_vCgX!92PiGfSnMses=k7-AF{PexPz3tD0arVWJsWW+m+jMt=skF?()O@&F5IK9 z$86Imj9{Wmw-uacL-A}>&813P1x@m#P4pH zg#4T+c}FY*PQ1CM3@u#)0+=TxobVX#JJ61`>#O*aIxNJ1@KA#c^0LaFLtAz<(q4<- zI|p_E#>clX=vkilUDv_G`iqZl`Rmd#i^FtU5!4)i0=%@l4UaCFrvmZ=Pprm++BAL{ zSviyW7RHU|b**^NnX0%f)RA`&2DM;?ZlnUjjH2;SQpv!(jxyZtO!fh_v<&qL^~M;C za^?|kY8$H}0d9-fC*@k>iLnm3}Ljv1{ zhCDvRP3d(@uRpXO#`!@7e(e~#&Yf&8#+$~7J~-`kqvic@YJF&J)tIQyQmJieo9N9U zCu*Tj??(!AYM~sq?t+1JAyF3%GXZ-0FBIm{OfaG~Cd4p8rLo4tm~hzO#H-FDyv=bU zg-yG@*O@=YgX}ONhsX&Auhl5>N!Jhw@Jf&c^Nkbu?`grR8!lDeGfN&G9eCSaNq%sM zAN-cj-shcw=>)dN`o3ds@)1qm}OOEBqu&=TOdp8`y_1VLOIA<3p0Gv!Mb{Af0YQi0{!$j*ttTu5O z!iPU_Ex3crk=3LGP-S$-D_kw^ABy$ePw9*z|NdZX#VwkAA9XDO@XyX-JZB%~6*?z; zmkvz&n5moG7)E$*2#i`?f9S5kEKM?EH73=%|Lv;475aSj4M3sl*E)~kQvFEv{)yjx zj{xP2piz93gts4F&!SR}`X zN{K{Nz%8%o#mAd#c=(5h8$_rxGTfM_ocDR;+LGUPMcA-GKWe$%7U7;78b$4`e8;e^x= z&ef&Jkf&4o&s@Q$YvcLtq}{#~2*?vT1Ah!?N>d?&pL%#MI^D>0qd4A%Q!mWK;#t1% z<=aN;d-2lRQvB}bNk~Zu@ng1d^2zw-+Hy2ET5;1wV|e?)XMN=LGb{HaJi>sbOY#vO zV({fScQRN|Tf~94$Z!4Q-NUz^XwcjMjK4pWHKJ;kdI#EhF(;_!sBgPYX_RY6!`>;k zEzG=W^+a6t-^gMx(b zxHumr{%~y#uJ24!twU*Be8F@gJ1tD~Vg|8* zi&ZIHx?b9`gGP|3Y!-3*pJ0#TR!tt?1I}A<)6NbbxvFs!m+sdO--Hfmi5j1*iSw~( z=#(n+Ndyr2QUj+PPDt0I@w_K>x3dw|u1@^MFh-@crJL7EM=`8gBNm5D_lXaely{k} z0x=FVCM&|@lu7JY+Yewcc=R`p8KznI&d|j2je*|yQ6BwvcP~D09L1Q(^ZiC(&?u0) zft{ETG(tF;3Di49$V=}#$U1%wB`ny0y!0p}#|hA_Xza!BuAaz)H7_H|FD$q0ZNj=O z)wt)%iSqY+?uev3@bcGKdQQG)JtsQ$b-j3NLplGui|1w`FEdKDKE@01Egb{6{ps!I zqt!jC0RsXs{_$ZQC4yP=UA^b#hU$4vYja9!w40eWF8(SC-4|Dv;@)NBke?Q*1~P%^ z$Ghs$@vk;q!p8aanV*Ph!V|Yl5&-O009^t1KW(eTl}BTEAba%;=ijf=v|_=M6y!yR zLQ1y5N&UPDkl(3nM#dhf{ADHBKhyN^Drh(ttlBo}TRSy6KJ-d;BM{HUyKw~1Fr$3@ zY+m^j%>C6}fu~%VK7Kd_@*&2861@}WGAVu=J4=Z=-@rEFLF9`H+Y`kiD{h7IE+lHb z6lUC{8|h=O(DJhD2r_l?n5r8lXZcfR{G-$N2KVADO)}D$Fuz=Q(^Uz3_{?tlfJ=BwgYkK!d_mWYOWbinH*%HLVZ~nU5SDW(zT$?g<2ua| zJjdpH-6%X5c$lfg!;WOW0Z8eG(^AO{;Y~v??m~_V^h&Vb%a-Cd?nGYM`*fsX)SY4I z>z9~%L~DYvh)MD5lmCsa;O&=@Oq424TseKhu@`r1Mu~YZ)VHod&V|Rha*T+&5C+EJ zqjnjuBe17$J?>^Q5yI$vpX50;FxR+i&|pl$DB~C(CEez1LG!>q+-R66gI&3SKGHz4X7{=pQLJ%eR`4Xh?PGKyi`$Cb%syOyLl)+p zlB$N|fCJC0+OJZxMWyZX=jFA%d=$l&eU1EmJvkwi{|@i_l^d&E|JG2e~HZ#hL)#hz?P9De~kcFPVtXikE^XTn!??U>Dm z^U2ti;H86({#oCOTivO$U802Y{n{Fw#l>I-8|l}M*}?SkBTQe%#(Od|Dfas=LjxY= zas|u7kFQN66$e5$Skr~dcQXZXT?CTxSUfM=*QyY&w?F)K zH&RkVv0_<)&pF-FK7bcrIfOsnJQb?ZAmk~Bmp&-Ll<7&b1wpTq&0h&UO~b;3zfJkE z9$2g%@1DXpKPQ?Gv!j*9I!g_UPyKBl9{SS;CII7~FQ6%$qko8oLT_kWES3>=n@wJC zkkL5kv>uu;c4S!h;`9$ZJakWh;uk{0M2Ggat~8ah_k{B?WWNUa#;V`4sql9NQontr(Sb9J6MXjA8(yE= z4&n;C2|1!2#Q584F=&9U_iu(CEH$0#V>`6C`*6%w$$RqtED(Al`H$X;B|)Rr>X-9X z?$gfyW=5u2$L>2kw7(Cmy?LsvuOnfXqp1{LKTnc}0OLT4VFJM!dPY z4ENqXN!4+Lk5^t-g{0tMUcoCH4d-6>;o3dLW#;aea9i{+yYE zTvM!Q>n4>)#VbE=g!whY*kHzt^yBZ_yE}X^2`1~fso7bNVZMaomfM6Oo&iPmG z;2}DMPn^fF)ESN8oJrM)sr*rT>>t%PVXh$)qY-hOHa7vx)PG)^(1hzA7$=9El~?xQ z&3DSMbjb+hXGi%}${&AGgE!ZeW7R*-kd>``&fdKh_gq8H=Pa*ALB)0X7Wr;Pvl%O{ zSA$vVFhXw<-a>`0`@TnZtj-;~_}P2zxqE-WGY+WeAGM4bGiH2KW8;XZu)*M-Zoxk3 zw*qC=3{nPrLw`3q`JynrUe2~(VYcGV9Y=8Im@LdqQ7B~*+7<^x!FkWKAn>b0r`Dc$2za(}(wCdT`go z9OQ+Ccp5*Ys1!+olWC=O0MAx6;7Pln-o05vZzO-!G~rLqG$?OsO5Cj1_TZSwjyob# z%q8Z5G9Arm1! zHprjcrTC*GSx#u=rzH=Mk{e(G6N$x0_T%iU!Ays)YSH!KhVUfNPEtPGrxCWT(mjYb zyF0OHC<)nIhzg3{%6b>mgY|i4-1opKvM``(dEdS^-22-}yltQo@Z`UiwgLS9>7BUp z@{u@gw)!U6+-k+s&+X$QIIcWTp=`ZUH1=DayyB+Gh-jjt(`Kh3pFEvW$kTP)|H$sO zeS_il@e?K-2n!GYs-mKT_U-3W{sv(DKcJ!&YC2r>q=Vs!@L&}Uia&Xg!ubXotMWBY zyFSukjyj!w$$V?(C6 zf~U|1K?-K7(y14HsXImiI?p-XLY8?Uq8+0aCgaL;auH^vzB$VW?mzXfgUH(BK5jg< z3}$UV-iz+VO=ss~QfidCAynbLS9DwO%;yz&+L|s}D(dUlLjDAg8yj$WP7)@DgbP-R zQ_;bPDDX;>{1raPxHq$L+O8l`qX%LD!R)9gV#Q+SiU4 zJ@%Nij{ZTtGcF;u+T7Q--)n>-?_wn|ZVA9RP`o2P^rH0<h+QZ>J;?M6gy2u_=shRNd-_{ZA4huX3E{c2x zEXNsTAw2k$3fDq*?B0-i+_NB0$X)E``^v7wG-=Pesy1vbX~S@{0mI30m`I*vyrXKQ zr=3rAy=c$ZVBWYS%pH~JVUyxFiW++GkM-pk-(Ps%fKKU%%r&!*gQ!j6&7{hu-Fj?+%H`s2Um? z`syUWJuZkL3?0V?U>qplOMQ!xAV$S38kgY|p&c$l{a00_41WFn{psf3-q^^xt_*+6 z9o7u;7g%1h)}0dNLP1XCZzhk5hozsW?|&bHakB;2x#C5&$oMF43ZwTzdoVCUkHrP4 zn3NW+vQ{iZPP|>*047L-n@-DB=TDFZfNkr?$GaNf+GE9KTr83pgI`##)^*`vniKOT zC1c4cX&%Y#wd`BlYw-0)4Y(sL1-B8&;=#7#Kgwdmz0zo{1Hzh ze*Dfi-dJVzcU=Ww94J4jbs{!GM&SghOHWXw%^v_whx%75tYg5S%)#XroI;e%Acn?$f1@eB0N zf2M^#ws$L@cx2E?<6bImtUB@dYsKlHLWR1-dEwvS_vp~*!N++rvwCd{-3B|^mLFv} zngEOgB~bqRfTv)I!O7#rC0I$xslKnur%-Xje>kLFf{%FP@%K3^ZnHPYC>YHOAf9Ny zsFLjcu>ki=DghV=N}&8aVIAzP3IpYT7{-ATD1j0vf%4xe{|^mAGIRjOq%!~j002ov JPDHLkV1i$1cXR*% literal 0 HcmV?d00001 diff --git a/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs b/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs index 0c84fa2be23d9..bd5d9315e9fc4 100644 --- a/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs @@ -383,8 +383,8 @@ IEnumerator IEnumerable.GetEnumerator() int ICollection.Count => Length; bool ICollection.IsReadOnly => true; - void ICollection.Add(byte[]? item) => throw new NotSupportedException("Collection is read-only."); - bool ICollection.Remove(byte[]? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Add(byte[] item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(byte[] item) => throw new NotSupportedException("Collection is read-only."); void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); bool ICollection.Contains(byte[] item) diff --git a/csharp/src/Apache.Arrow/Arrays/Decimal256Array.cs b/csharp/src/Apache.Arrow/Arrays/Decimal256Array.cs index fa6f765475240..52bfb9eb20768 100644 --- a/csharp/src/Apache.Arrow/Arrays/Decimal256Array.cs +++ b/csharp/src/Apache.Arrow/Arrays/Decimal256Array.cs @@ -13,6 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#nullable enable + using System; using System.Collections; using System.Collections.Generic; @@ -23,7 +25,7 @@ namespace Apache.Arrow { - public class Decimal256Array : FixedSizeBinaryArray, IReadOnlyList, IReadOnlyList + public class Decimal256Array : FixedSizeBinaryArray, IReadOnlyList, IReadOnlyList { public class Builder : BuilderBase { @@ -178,7 +180,7 @@ public Decimal256Array(ArrayData data) return list; } - public string GetString(int index) + public string? GetString(int index) { if (IsNull(index)) { @@ -230,10 +232,10 @@ public bool TryGetSqlDecimal(int index, out SqlDecimal? value) } } - int IReadOnlyCollection.Count => Length; - string? IReadOnlyList.this[int index] => GetString(index); + int IReadOnlyCollection.Count => Length; + string? IReadOnlyList.this[int index] => GetString(index); - IEnumerator IEnumerable.GetEnumerator() + IEnumerator IEnumerable.GetEnumerator() { for (int index = 0; index < Length; index++) { @@ -241,6 +243,6 @@ IEnumerator IEnumerable.GetEnumerator() } } - IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable)this).GetEnumerator(); + IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable)this).GetEnumerator(); } } diff --git a/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs b/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs index 8f0210b28240f..d42ee5279e795 100644 --- a/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs +++ b/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs @@ -40,10 +40,12 @@ internal NativeMemoryManager(INativeAllocationOwner owner, IntPtr ptr, int offse _owner = owner; } +#pragma warning disable CA2015 // TODO: is this correct? ~NativeMemoryManager() { Dispose(false); } +#pragma warning restore CA2015 public override unsafe Span GetSpan() { diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/FlightSqlServerTests.cs b/csharp/test/Apache.Arrow.Flight.Sql.Tests/FlightSqlServerTests.cs index 4ad5bde0874a8..e5e64b073f799 100644 --- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/FlightSqlServerTests.cs +++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/FlightSqlServerTests.cs @@ -14,6 +14,7 @@ // limitations under the License. #nullable enable + using System; using System.Collections.Generic; using System.Collections.ObjectModel; @@ -65,7 +66,7 @@ public async Task EnsureTheCorrectActionsAreGiven() var streamWriter = new MockServerStreamWriter(); //When - await producer.ListActions(streamWriter, new MockServerCallContext()).ConfigureAwait(false); + await producer.ListActions(streamWriter, new MockServerCallContext()); var actions = streamWriter.Messages.ToArray(); Assert.Equal(FlightSqlUtils.FlightSqlActions, actions); @@ -115,7 +116,7 @@ public void EnsureTableSchemaIsCorrectWithoutTableSchema(bool includeTableSchema [InlineData(typeof(CommandGetImportedKeys), "GetImportedKeysFlightInfo")] [InlineData(typeof(CommandGetCrossReference), "GetCrossReferenceFlightInfo")] [InlineData(typeof(CommandGetXdbcTypeInfo), "GetXdbcTypeFlightInfo")] - public async void EnsureGetFlightInfoIsCorrectlyRoutedForCommand(Type commandType, string expectedResult) + public async Task EnsureGetFlightInfoIsCorrectlyRoutedForCommand(Type commandType, string expectedResult) { //Given var command = (IMessage) Activator.CreateInstance(commandType)!; @@ -131,7 +132,7 @@ public async void EnsureGetFlightInfoIsCorrectlyRoutedForCommand(Type commandTyp [Fact] - public async void EnsureAnInvalidOperationExceptionIsThrownWhenACommandIsNotSupportedAndHasNoDescriptor() + public async Task EnsureAnInvalidOperationExceptionIsThrownWhenACommandIsNotSupportedAndHasNoDescriptor() { //Given var producer = new TestFlightSqlSever(); @@ -145,7 +146,7 @@ public async void EnsureAnInvalidOperationExceptionIsThrownWhenACommandIsNotSupp } [Fact] - public async void EnsureAnInvalidOperationExceptionIsThrownWhenACommandIsNotSupported() + public async Task EnsureAnInvalidOperationExceptionIsThrownWhenACommandIsNotSupported() { //Given var producer = new TestFlightSqlSever(); @@ -175,7 +176,7 @@ public async void EnsureAnInvalidOperationExceptionIsThrownWhenACommandIsNotSupp [InlineData(typeof(CommandGetImportedKeys), "DoGetImportedKeys")] [InlineData(typeof(CommandGetCrossReference), "DoGetCrossReference")] [InlineData(typeof(CommandGetXdbcTypeInfo), "DoGetXbdcTypeInfo")] - public async void EnsureDoGetIsCorrectlyRoutedForADoGetCommand(Type commandType, string expectedResult) + public async Task EnsureDoGetIsCorrectlyRoutedForADoGetCommand(Type commandType, string expectedResult) { //Given var producer = new TestFlightSqlSever(); @@ -192,7 +193,7 @@ public async void EnsureDoGetIsCorrectlyRoutedForADoGetCommand(Type commandType, } [Fact] - public async void EnsureAnInvalidOperationExceptionIsThrownWhenADoGetCommandIsNotSupported() + public async Task EnsureAnInvalidOperationExceptionIsThrownWhenADoGetCommandIsNotSupported() { //Given var producer = new TestFlightSqlSever(); @@ -213,7 +214,7 @@ public async void EnsureAnInvalidOperationExceptionIsThrownWhenADoGetCommandIsNo [InlineData(SqlAction.CloseRequest, typeof(ActionClosePreparedStatementRequest), "ClosePreparedStatement")] [InlineData(SqlAction.CreateRequest, typeof(ActionCreatePreparedStatementRequest), "CreatePreparedStatement")] [InlineData("BadCommand", typeof(ActionCreatePreparedStatementRequest), "Action type BadCommand not supported", true)] - public async void EnsureDoActionIsCorrectlyRoutedForAnActionRequest(string actionType, Type actionBodyType, string expectedResponse, bool isException = false) + public async Task EnsureDoActionIsCorrectlyRoutedForAnActionRequest(string actionType, Type actionBodyType, string expectedResponse, bool isException = false) { //Given var producer = new TestFlightSqlSever(); @@ -237,19 +238,19 @@ public async void EnsureDoActionIsCorrectlyRoutedForAnActionRequest(string actio [InlineData(typeof(CommandPreparedStatementQuery), "PutPreparedStatementQuery")] [InlineData(typeof(CommandPreparedStatementUpdate), "PutPreparedStatementUpdate")] [InlineData(typeof(CommandGetXdbcTypeInfo), "Command CommandGetXdbcTypeInfo not supported", true)] - public async void EnsureDoPutIsCorrectlyRoutedForTheCommand(Type commandType, string expectedResponse, bool isException = false) + public async Task EnsureDoPutIsCorrectlyRoutedForTheCommand(Type commandType, string expectedResponse, bool isException = false) { //Given var command = (IMessage) Activator.CreateInstance(commandType)!; var producer = new TestFlightSqlSever(); var descriptor = FlightDescriptor.CreateCommandDescriptor(command.PackAndSerialize().ToArray()); var recordBatch = new RecordBatch(new Schema(new List(), null), System.Array.Empty(), 0); - var reader = new MockStreamReader(await recordBatch.ToFlightData(descriptor).ConfigureAwait(false)); + var reader = new MockStreamReader(await recordBatch.ToFlightData(descriptor)); var batchReader = new FlightServerRecordBatchStreamReader(reader); var mockStreamWriter = new MockServerStreamWriter(); //When - async Task Act() => await producer.DoPut(batchReader, mockStreamWriter, new MockServerCallContext()).ConfigureAwait(false); + async Task Act() => await producer.DoPut(batchReader, mockStreamWriter, new MockServerCallContext()); var exception = await Record.ExceptionAsync(Act); string? actualMessage = isException ? exception?.Message : mockStreamWriter.Messages[0].ApplicationMetadata.ToStringUtf8(); @@ -271,7 +272,7 @@ private class MockServerCallContext : ServerCallContext protected override CancellationToken CancellationTokenCore => default; protected override Metadata ResponseTrailersCore => new(); protected override Status StatusCore { get; set; } - protected override WriteOptions WriteOptionsCore { get; set; } = WriteOptions.Default; + protected override WriteOptions? WriteOptionsCore { get; set; } = WriteOptions.Default; protected override AuthContext AuthContextCore => new("", new Dictionary>()); } } @@ -325,7 +326,7 @@ public static async Task GetSchema(this IEnumerable flightDa public static async Task> ToFlightData(this RecordBatch recordBatch, FlightDescriptor? descriptor = null) { var responseStream = new MockFlightServerRecordBatchStreamWriter(); - await responseStream.WriteRecordBatchAsync(recordBatch).ConfigureAwait(false); + await responseStream.WriteRecordBatchAsync(recordBatch); if (descriptor == null) { return responseStream.FlightData; diff --git a/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs b/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs index ebc38354b5c28..aac4e4209240a 100644 --- a/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs +++ b/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs @@ -288,9 +288,9 @@ public async Task TestHandshake() { var duplexStreamingCall = _flightClient.Handshake(); - await duplexStreamingCall.RequestStream.WriteAsync(new FlightHandshakeRequest(ByteString.Empty)).ConfigureAwait(false); - await duplexStreamingCall.RequestStream.CompleteAsync().ConfigureAwait(false); - var results = await duplexStreamingCall.ResponseStream.ToListAsync().ConfigureAwait(false); + await duplexStreamingCall.RequestStream.WriteAsync(new FlightHandshakeRequest(ByteString.Empty)); + await duplexStreamingCall.RequestStream.CompleteAsync(); + var results = await duplexStreamingCall.ResponseStream.ToListAsync(); Assert.Single(results); Assert.Equal("Done", results.First().Payload.ToStringUtf8()); @@ -303,10 +303,10 @@ public async Task TestSingleExchange() var duplexStreamingCall = _flightClient.DoExchange(flightDescriptor); var expectedBatch = CreateTestBatch(0, 100); - await duplexStreamingCall.RequestStream.WriteAsync(expectedBatch).ConfigureAwait(false); - await duplexStreamingCall.RequestStream.CompleteAsync().ConfigureAwait(false); + await duplexStreamingCall.RequestStream.WriteAsync(expectedBatch); + await duplexStreamingCall.RequestStream.CompleteAsync(); - var results = await duplexStreamingCall.ResponseStream.ToListAsync().ConfigureAwait(false); + var results = await duplexStreamingCall.ResponseStream.ToListAsync(); Assert.Single(results); ArrowReaderVerifier.CompareBatches(expectedBatch, results.FirstOrDefault()); @@ -320,11 +320,11 @@ public async Task TestMultipleExchange() var expectedBatch1 = CreateTestBatch(0, 100); var expectedBatch2 = CreateTestBatch(100, 100); - await duplexStreamingCall.RequestStream.WriteAsync(expectedBatch1).ConfigureAwait(false); - await duplexStreamingCall.RequestStream.WriteAsync(expectedBatch2).ConfigureAwait(false); - await duplexStreamingCall.RequestStream.CompleteAsync().ConfigureAwait(false); + await duplexStreamingCall.RequestStream.WriteAsync(expectedBatch1); + await duplexStreamingCall.RequestStream.WriteAsync(expectedBatch2); + await duplexStreamingCall.RequestStream.CompleteAsync(); - var results = await duplexStreamingCall.ResponseStream.ToListAsync().ConfigureAwait(false); + var results = await duplexStreamingCall.ResponseStream.ToListAsync(); ArrowReaderVerifier.CompareBatches(expectedBatch1, results[0]); ArrowReaderVerifier.CompareBatches(expectedBatch2, results[1]); @@ -338,8 +338,8 @@ public async Task TestExchangeWithMetadata() var expectedBatch = CreateTestBatch(0, 100); var expectedMetadata = ByteString.CopyFromUtf8("test metadata"); - await duplexStreamingCall.RequestStream.WriteAsync(expectedBatch, expectedMetadata).ConfigureAwait(false); - await duplexStreamingCall.RequestStream.CompleteAsync().ConfigureAwait(false); + await duplexStreamingCall.RequestStream.WriteAsync(expectedBatch, expectedMetadata); + await duplexStreamingCall.RequestStream.CompleteAsync(); List actualMetadata = new List(); List actualBatch = new List(); @@ -358,9 +358,9 @@ public async Task TestHandshakeWithSpecificMessage() { var duplexStreamingCall = _flightClient.Handshake(); - await duplexStreamingCall.RequestStream.WriteAsync(new FlightHandshakeRequest(ByteString.CopyFromUtf8("Hello"))).ConfigureAwait(false); - await duplexStreamingCall.RequestStream.CompleteAsync().ConfigureAwait(false); - var results = await duplexStreamingCall.ResponseStream.ToListAsync().ConfigureAwait(false); + await duplexStreamingCall.RequestStream.WriteAsync(new FlightHandshakeRequest(ByteString.CopyFromUtf8("Hello"))); + await duplexStreamingCall.RequestStream.CompleteAsync(); + var results = await duplexStreamingCall.ResponseStream.ToListAsync(); Assert.Single(results); Assert.Equal("Hello handshake", results.First().Payload.ToStringUtf8()); diff --git a/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs index d3032b8d4ac40..c3c21c412d20d 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs @@ -212,11 +212,11 @@ private static void TestPrimitiveArrayAsCollection(IRe // Parameter 'values' must contain four values. The last value must be distinct from the rest. private static void TestObjectArrayAsCollection(TArray array, T nullValue, IReadOnlyList values) where T : class - where TArray : IArrowArray, ICollection + where TArray : IArrowArray, ICollection { Assert.NotNull(array); Assert.Equal(4, values.Count); - var collection = (ICollection)array; + var collection = (ICollection)array; Assert.Equal(array.Length, collection.Count); Assert.Equal(4, collection.Count); @@ -232,7 +232,7 @@ private static void TestObjectArrayAsCollection(TArray array, T nullV Assert.False(collection.Contains(values[3])); T sentinel = values[2]; - T?[] destArr = { sentinel, sentinel, sentinel, sentinel, sentinel, sentinel }; + T[] destArr = { sentinel, sentinel, sentinel, sentinel, sentinel, sentinel }; collection.CopyTo(destArr, 1); Assert.Equal(sentinel, destArr[0]); Assert.Equal(values[0], destArr[1]); diff --git a/csharp/test/Apache.Arrow.Tests/DurationArrayTests.cs b/csharp/test/Apache.Arrow.Tests/DurationArrayTests.cs index 59080d739b10b..412f67de5f0fb 100644 --- a/csharp/test/Apache.Arrow.Tests/DurationArrayTests.cs +++ b/csharp/test/Apache.Arrow.Tests/DurationArrayTests.cs @@ -115,7 +115,7 @@ public void AppendTimeSpanGivesSameTimeSpan(TimeSpan? timeSpan, DurationType typ Assert.Equal(timeSpan, array.GetTimeSpan(0)); IReadOnlyList asList = array; - Assert.Equal(1, asList.Count); + Assert.Single(asList); Assert.Equal(timeSpan, asList[0]); } } From 657c4faf21700c0899703a4759bde76235c38199 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Tue, 14 May 2024 17:54:40 -0300 Subject: [PATCH 108/261] GH-41596: [C++] fixed_width_internal.h: Simplify docstring and support bit-sized types (BOOL) (#41597) ### Rationale for this change Post-merge feedback from #41297. ### What changes are included in this PR? - Supporting `BOOL` as both a top-level and nested in FSL types - Removing the long example from the docstring of `IsFixedWidthLike` These changes don't affect users because this header was added recently and not released. ### Are these changes tested? Yes, by existing and new test cases. * GitHub Issue: #41596 Authored-by: Felipe Oliveira Carvalho Signed-off-by: Felipe Oliveira Carvalho --- .../vector_selection_filter_internal.cc | 4 +- .../kernels/vector_selection_internal.cc | 4 +- .../kernels/vector_selection_take_internal.cc | 5 +- cpp/src/arrow/util/fixed_width_internal.cc | 100 +++--- cpp/src/arrow/util/fixed_width_internal.h | 286 +++++++++--------- cpp/src/arrow/util/fixed_width_test.cc | 21 +- 6 files changed, 212 insertions(+), 208 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc index 8d43c65668d4b..5e24331fe96f2 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc @@ -164,7 +164,7 @@ class PrimitiveFilterImpl { values_is_valid_(values.buffers[0].data), // No offset applied for boolean because it's a bitmap values_data_(kIsBoolean ? values.buffers[1].data - : util::OffsetPointerOfFixedWidthValues(values)), + : util::OffsetPointerOfFixedByteWidthValues(values)), values_null_count_(values.null_count), values_offset_(values.offset), values_length_(values.length), @@ -470,7 +470,7 @@ Status PrimitiveFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult // validity bitmap. const bool allocate_validity = values.null_count != 0 || !filter_null_count_is_zero; - DCHECK(util::IsFixedWidthLike(values, /*force_null_count=*/false)); + DCHECK(util::IsFixedWidthLike(values)); const int64_t bit_width = util::FixedWidthInBits(*values.type); RETURN_NOT_OK(util::internal::PreallocateFixedWidthArrayData( ctx, output_length, /*source=*/values, allocate_validity, out_arr)); diff --git a/cpp/src/arrow/compute/kernels/vector_selection_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_internal.cc index 93cd5060348db..2ba660e49ac38 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_internal.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_internal.cc @@ -898,7 +898,7 @@ Status FSLFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) // PrimitiveFilterExec for a fixed-size list array. if (util::IsFixedWidthLike(values, /*force_null_count=*/true, - /*exclude_dictionary=*/true)) { + /*exclude_bool_and_dictionary=*/true)) { const auto byte_width = util::FixedWidthInBytes(*values.type); // 0 is a valid byte width for FixedSizeList, but PrimitiveFilterExec // might not handle it correctly. @@ -971,7 +971,7 @@ Status FSLTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { // PrimitiveTakeExec for a fixed-size list array. if (util::IsFixedWidthLike(values, /*force_null_count=*/true, - /*exclude_dictionary=*/true)) { + /*exclude_bool_and_dictionary=*/true)) { const auto byte_width = util::FixedWidthInBytes(*values.type); // Additionally, PrimitiveTakeExec is only implemented for specific byte widths. // TODO(GH-41301): Extend PrimitiveTakeExec for any fixed-width type. diff --git a/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc index 48a2de9936cd4..1a9af0efcd700 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc @@ -347,7 +347,7 @@ struct PrimitiveTakeImpl { static void Exec(const ArraySpan& values, const ArraySpan& indices, ArrayData* out_arr) { DCHECK_EQ(util::FixedWidthInBytes(*values.type), kValueWidth); - const auto* values_data = util::OffsetPointerOfFixedWidthValues(values); + const auto* values_data = util::OffsetPointerOfFixedByteWidthValues(values); const uint8_t* values_is_valid = values.buffers[0].data; auto values_offset = values.offset; @@ -588,8 +588,7 @@ Status PrimitiveTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* ArrayData* out_arr = out->array_data().get(); - DCHECK(util::IsFixedWidthLike(values, /*force_null_count=*/false, - /*exclude_dictionary=*/true)); + DCHECK(util::IsFixedWidthLike(values)); const int64_t bit_width = util::FixedWidthInBits(*values.type); // TODO: When neither values nor indices contain nulls, we can skip diff --git a/cpp/src/arrow/util/fixed_width_internal.cc b/cpp/src/arrow/util/fixed_width_internal.cc index 164af3cff66b3..3f12fafb54f0f 100644 --- a/cpp/src/arrow/util/fixed_width_internal.cc +++ b/cpp/src/arrow/util/fixed_width_internal.cc @@ -33,11 +33,12 @@ namespace arrow::util { using ::arrow::internal::checked_cast; bool IsFixedWidthLike(const ArraySpan& source, bool force_null_count, - bool exclude_dictionary) { - return IsFixedWidthLike(source, force_null_count, - [exclude_dictionary](const DataType& type) { - return !exclude_dictionary || type.id() != Type::DICTIONARY; - }); + bool exclude_bool_and_dictionary) { + return IsFixedWidthLike( + source, force_null_count, [exclude_bool_and_dictionary](const DataType& type) { + return !exclude_bool_and_dictionary || + (type.id() != Type::DICTIONARY && type.id() != Type::BOOL); + }); } static int64_t FixedWidthInBytesFallback(const FixedSizeListType& fixed_size_list_type) { @@ -73,16 +74,37 @@ int64_t FixedWidthInBytes(const DataType& type) { return -1; } +static int64_t FixedWidthInBitsFallback(const FixedSizeListType& fixed_size_list_type) { + auto* fsl = &fixed_size_list_type; + int64_t list_size = fsl->list_size(); + for (auto type = fsl->value_type().get();;) { + auto type_id = type->id(); + if (type_id == Type::FIXED_SIZE_LIST) { + fsl = checked_cast(type); + list_size *= fsl->list_size(); + type = fsl->value_type().get(); + continue; + } + if (is_fixed_width(type_id)) { + const int64_t flat_bit_width = list_size * type->bit_width(); + DCHECK_GE(flat_bit_width, 0); + return flat_bit_width; + } + break; + } + return -1; +} + int64_t FixedWidthInBits(const DataType& type) { auto type_id = type.id(); if (is_fixed_width(type_id)) { return type.bit_width(); } - const int64_t byte_width = FixedWidthInBytes(type); - if (ARROW_PREDICT_FALSE(byte_width < 0)) { - return -1; + if (type_id == Type::FIXED_SIZE_LIST) { + auto& fsl = ::arrow::internal::checked_cast(type); + return FixedWidthInBitsFallback(fsl); } - return byte_width * 8; + return -1; } namespace internal { @@ -121,9 +143,6 @@ Status PreallocateFixedWidthArrayData(::arrow::compute::KernelContext* ctx, if (type->id() == Type::FIXED_SIZE_LIST) { auto& fsl_type = checked_cast(*type); auto& value_type = fsl_type.value_type(); - if (ARROW_PREDICT_FALSE(value_type->id() == Type::BOOL)) { - return Status::Invalid("PreallocateFixedWidthArrayData: Invalid type: ", fsl_type); - } if (ARROW_PREDICT_FALSE(value_type->id() == Type::DICTIONARY)) { return Status::NotImplemented( "PreallocateFixedWidthArrayData: DICTIONARY type allocation: ", *type); @@ -146,16 +165,13 @@ Status PreallocateFixedWidthArrayData(::arrow::compute::KernelContext* ctx, } // namespace internal -/// \pre same as OffsetPointerOfFixedWidthValues -/// \pre source.type->id() != Type::BOOL -static const uint8_t* OffsetPointerOfFixedWidthValuesFallback(const ArraySpan& source) { +std::pair OffsetPointerOfFixedBitWidthValues( + const ArraySpan& source) { using OffsetAndListSize = std::pair; auto get_offset = [](auto pair) { return pair.first; }; auto get_list_size = [](auto pair) { return pair.second; }; ::arrow::internal::SmallVector stack; - DCHECK_NE(source.type->id(), Type::BOOL); - int64_t list_size = 1; auto* array = &source; while (array->type->id() == Type::FIXED_SIZE_LIST) { @@ -166,31 +182,25 @@ static const uint8_t* OffsetPointerOfFixedWidthValuesFallback(const ArraySpan& s // Now that innermost values were reached, pop the stack and calculate the offset // in bytes of the innermost values buffer by considering the offset at each // level of nesting. - DCHECK(array->type->id() != Type::BOOL && is_fixed_width(*array->type)); + DCHECK(is_fixed_width(*array->type)); DCHECK(array == &source || !array->MayHaveNulls()) << "OffsetPointerOfFixedWidthValues: array is expected to be flat or have no " "nulls in the arrays nested by FIXED_SIZE_LIST."; - int64_t value_width = array->type->byte_width(); - int64_t offset_in_bytes = array->offset * value_width; + int64_t value_width_in_bits = array->type->bit_width(); + int64_t offset_in_bits = array->offset * value_width_in_bits; for (auto it = stack.rbegin(); it != stack.rend(); ++it) { - value_width *= get_list_size(*it); - offset_in_bytes += get_offset(*it) * value_width; + value_width_in_bits *= get_list_size(*it); + offset_in_bits += get_offset(*it) * value_width_in_bits; } - return value_width < 0 ? nullptr : array->GetValues(1, offset_in_bytes); + DCHECK_GE(value_width_in_bits, 0); + const auto* values_ptr = array->GetValues(1, 0); + return {static_cast(offset_in_bits % 8), values_ptr + (offset_in_bits / 8)}; } -const uint8_t* OffsetPointerOfFixedWidthValues(const ArraySpan& source) { - auto type_id = source.type->id(); - if (is_fixed_width(type_id)) { - if (ARROW_PREDICT_FALSE(type_id == Type::BOOL)) { - // BOOL arrays are bit-packed, thus a byte-aligned pointer cannot be produced in the - // general case. Returning something for BOOL arrays that happen to byte-align - // because offset=0 would create too much confusion. - return nullptr; - } - return source.GetValues(1, 0) + source.offset * source.type->byte_width(); - } - return OffsetPointerOfFixedWidthValuesFallback(source); +const uint8_t* OffsetPointerOfFixedByteWidthValues(const ArraySpan& source) { + DCHECK(IsFixedWidthLike(source, /*force_null_count=*/false, + [](const DataType& type) { return type.id() != Type::BOOL; })); + return OffsetPointerOfFixedBitWidthValues(source).second; } /// \brief Get the mutable pointer to the fixed-width values of an array @@ -203,24 +213,20 @@ const uint8_t* OffsetPointerOfFixedWidthValues(const ArraySpan& source) { /// \return The mutable pointer to the fixed-width byte blocks of the array. If /// pre-conditions are not satisfied, the return values is undefined. uint8_t* MutableFixedWidthValuesPointer(ArrayData* mutable_array) { - auto type_id = mutable_array->type->id(); - if (type_id == Type::FIXED_SIZE_LIST) { - auto* array = mutable_array; - do { - DCHECK_EQ(array->offset, 0); - DCHECK_EQ(array->child_data.size(), 1) << array->type->ToString(true) << " part of " - << mutable_array->type->ToString(true); - array = array->child_data[0].get(); - } while (array->type->id() == Type::FIXED_SIZE_LIST); + auto* array = mutable_array; + auto type_id = array->type->id(); + while (type_id == Type::FIXED_SIZE_LIST) { DCHECK_EQ(array->offset, 0); - DCHECK(array->type->id() != Type::BOOL && is_fixed_width(*array->type)); - return array->GetMutableValues(1, 0); + DCHECK_EQ(array->child_data.size(), 1) << array->type->ToString(true) << " part of " + << mutable_array->type->ToString(true); + array = array->child_data[0].get(); + type_id = array->type->id(); } DCHECK_EQ(mutable_array->offset, 0); // BOOL is allowed here only because the offset is expected to be 0, // so the byte-aligned pointer also points to the first *bit* of the buffer. DCHECK(is_fixed_width(type_id)); - return mutable_array->GetMutableValues(1, 0); + return array->GetMutableValues(1, 0); } } // namespace arrow::util diff --git a/cpp/src/arrow/util/fixed_width_internal.h b/cpp/src/arrow/util/fixed_width_internal.h index f6959485fbd01..232411f4c4a56 100644 --- a/cpp/src/arrow/util/fixed_width_internal.h +++ b/cpp/src/arrow/util/fixed_width_internal.h @@ -56,146 +56,140 @@ namespace arrow::util { /// Additionally, we say that a type is "fixed-width like" if it's a fixed-width as /// defined above, or if it's a fixed-size list (or nested fixed-size lists) and /// the innermost type is fixed-width and the following restrictions also apply: -/// - The value type of the innermost fixed-size list is not BOOL (it has to be excluded -/// because a 1-bit type doesn't byte-align) /// - Only the top-level array may have nulls, all the inner array have to be completely /// free of nulls so we don't need to manage internal validity bitmaps. /// -/// Take the following `fixed_size_list, 3>` array as an -/// example: -/// -/// [ -/// [[1, 2], [3, 4], [ 5, 6]], -/// null, -/// [[7, 8], [9, 10], [11, 12]] -/// ] -/// -/// in memory, it would look like: -/// -/// { -/// type: fixed_size_list, 3>, -/// length: 3, -/// null_count: 1, -/// offset: 0, -/// buffers: [ -/// 0: [0b00000101] -/// ], -/// child_data: [ -/// 0: { -/// type: fixed_size_list, -/// length: 9, -/// null_count: 0, -/// offset: 0, -/// buffers: [0: NULL], -/// child_data: [ -/// 0: { -/// type: int32, -/// length: 18, -/// null_count: 0, -/// offset: 0, -/// buffers: [ -/// 0: NULL, -/// 1: [ 1, 2, 3, 4, 5, 6, -/// 0, 0, 0, 0, 0, 0 -/// 7, 8, 9, 10, 11, 12 ] -/// ], -/// child_data: [] -/// } -/// ] -/// } -/// ] -/// } -/// -/// This layout fits the fixed-width like definition because the innermost type -/// is byte-aligned fixed-width (int32 = 4 bytes) and the internal arrays don't -/// have nulls. The validity bitmap is only needed at the top-level array. -/// -/// Writing to this array can be done in the same way writing to a flat fixed-width -/// array is done, by: -/// 1. Updating the validity bitmap at the top-level array if nulls are present. -/// 2. Updating a continuous fixed-width block of memory through a single pointer. -/// -/// The length of this block of memory is the product of the list sizes in the -/// `FixedSizeList` types and the byte width of the innermost fixed-width type: -/// -/// 3 * 2 * 4 = 24 bytes -/// -/// Writing the `[[1, 2], [3, 4], [5, 6]]` value at a given index can be done by -/// simply setting the validity bit to 1 and writing the 24-byte sequence of -/// integers `[1, 2, 3, 4, 5, 6]` to the memory block at `byte_ptr + index * 24`. -/// -/// The length of the top-level array fully defines the lengths that all the nested -/// arrays must have, which makes defining all the lengths as easy as defining the -/// length of the top-level array. -/// -/// length = 3 -/// child_data[0].length == 3 * 3 == 9 -/// child_data[0].child_data[0].length == 3 * 3 * 2 == 18 -/// -/// child_data[0].child_data[0].buffers[1].size() >= -/// (3 * (3 * 2 * sizeof(int32)) == 3 * 24 == 72) -/// -/// Dealing with offsets is a bit involved. Let's say the array described above has -/// the offsets 2, 5, and 7: -/// -/// { -/// type: fixed_size_list, 3>, -/// offset: 2, -/// ... -/// child_data: [ -/// 0: { -/// type: fixed_size_list, -/// offset: 5, -/// ... -/// child_data: [ -/// 0: { -/// type: int32, -/// offset: 7, -/// buffers: [ -/// 0: NULL, -/// 1: [ 1, 1, 1, 1, 1, 1, 1, // 7 values skipped -/// 0,1, 0,1, 0,1, 0,1, 0,1, // 5 [x,x] values skipped -/// -/// 0,0,0,0,0,1, // -/// 0,0,0,0,0,1, // 2 [[x,x], [x,x], [x,x]] values skipped -/// -/// 1, 2, 3, 4, 5, 6, // -/// 0, 0, 0, 0, 0, 0 // the actual values -/// 7, 8, 9, 10, 11, 12 // -/// ] -/// ], -/// } -/// ] -/// } -/// ] -/// } -/// -/// The offset of the innermost values buffer, in bytes, is calculated as: -/// -/// ((2 * 3) + (5 * 2) + 7) * sizeof(int32) = 29 * 4 bytes = 116 bytes -/// -/// In general, the formula to calculate the offset of the innermost values buffer is: -/// -/// ((off_0 * fsl_size_0) + (off_1 * fsl_size_1) + ... + innermost_off) -/// * sizeof(innermost_type) -/// -/// `OffsetPointerOfFixedWidthValues()` can calculate this byte offset and return the -/// pointer to the first relevant byte of the innermost values buffer. -/// /// \param source The array to check /// \param force_null_count If true, GetNullCount() is used instead of null_count -/// \param exclude_dictionary If true, DICTIONARY is excluded from the -/// is_fixed_width() types. Default: false. +/// \param exclude_bool_and_dictionary If true, BOOL and DICTIONARY are excluded from +/// the is_fixed_width() types. Default: false. ARROW_EXPORT bool IsFixedWidthLike(const ArraySpan& source, bool force_null_count = false, - bool exclude_dictionary = false); + bool exclude_bool_and_dictionary = false); + +// Take the following `fixed_size_list, 3>` array as an +// example: +// +// [ +// [[1, 2], [3, 4], [ 5, 6]], +// null, +// [[7, 8], [9, 10], [11, 12]] +// ] +// +// in memory, it would look like: +// +// { +// type: fixed_size_list, 3>, +// length: 3, +// null_count: 1, +// offset: 0, +// buffers: [ +// 0: [0b00000101] +// ], +// child_data: [ +// 0: { +// type: fixed_size_list, +// length: 9, +// null_count: 0, +// offset: 0, +// buffers: [0: NULL], +// child_data: [ +// 0: { +// type: int32, +// length: 18, +// null_count: 0, +// offset: 0, +// buffers: [ +// 0: NULL, +// 1: [ 1, 2, 3, 4, 5, 6, +// 0, 0, 0, 0, 0, 0 +// 7, 8, 9, 10, 11, 12 ] +// ], +// child_data: [] +// } +// ] +// } +// ] +// } +// +// This layout fits the fixed-width like definition because the innermost type +// is byte-aligned fixed-width (int32 = 4 bytes) and the internal arrays don't +// have nulls. The validity bitmap is only needed at the top-level array. +// +// Writing to this array can be done in the same way writing to a flat fixed-width +// array is done, by: +// 1. Updating the validity bitmap at the top-level array if nulls are present. +// 2. Updating a continuous fixed-width block of memory through a single pointer. +// +// The length of this block of memory is the product of the list sizes in the +// `FixedSizeList` types and the byte width of the innermost fixed-width type: +// +// 3 * 2 * 4 = 24 bytes +// +// Writing the `[[1, 2], [3, 4], [5, 6]]` value at a given index can be done by +// simply setting the validity bit to 1 and writing the 24-byte sequence of +// integers `[1, 2, 3, 4, 5, 6]` to the memory block at `byte_ptr + index * 24`. +// +// The length of the top-level array fully defines the lengths that all the nested +// arrays must have, which makes defining all the lengths as easy as defining the +// length of the top-level array. +// +// length = 3 +// child_data[0].length == 3 * 3 == 9 +// child_data[0].child_data[0].length == 3 * 3 * 2 == 18 +// +// child_data[0].child_data[0].buffers[1].size() >= +// (3 * (3 * 2 * sizeof(int32)) == 3 * 24 == 72) +// +// Dealing with offsets is a bit involved. Let's say the array described above has +// the offsets 2, 5, and 7: +// +// { +// type: fixed_size_list, 3>, +// offset: 2, +// ... +// child_data: [ +// 0: { +// type: fixed_size_list, +// offset: 5, +// ... +// child_data: [ +// 0: { +// type: int32, +// offset: 7, +// buffers: [ +// 0: NULL, +// 1: [ 1, 1, 1, 1, 1, 1, 1, // 7 values skipped +// 0,1, 0,1, 0,1, 0,1, 0,1, // 5 [x,x] values skipped +// +// 0,0,0,0,0,1, // +// 0,0,0,0,0,1, // 2 [[x,x], [x,x], [x,x]] values skipped +// +// 1, 2, 3, 4, 5, 6, // +// 0, 0, 0, 0, 0, 0 // the actual values +// 7, 8, 9, 10, 11, 12 // +// ] +// ], +// } +// ] +// } +// ] +// } +// +// The offset of the innermost values buffer, in bytes, is calculated as: +// +// ((2 * 3) + (5 * 2) + 7) * sizeof(int32) = 29 * 4 bytes = 116 bytes +// +// In general, the formula to calculate the offset of the innermost values buffer is: +// +// ((off_0 * fsl_size_0) + (off_1 * fsl_size_1) + ... + innermost_off) +// * sizeof(innermost_type) +// +// `OffsetPointerOfFixedByteWidthValues()` can calculate this byte offset and return +// the pointer to the first relevant byte of the innermost values buffer. /// \brief Checks if the given array has a fixed-width type or if it's an array of /// fixed-size list that can be flattened to an array of fixed-width values. /// -/// This function is a more general version of -/// `IsFixedWidthLike(const ArraySpan&, bool)` that allows the caller to further -/// restrict the inner value types that should be considered fixed-width. -/// /// \param source The array to check /// \param force_null_count If true, GetNullCount() is used instead of null_count /// \param extra_predicate A DataType predicate that can be used to further @@ -217,9 +211,7 @@ inline bool IsFixedWidthLike(const ArraySpan& source, bool force_null_count, values = &values->child_data[0]; continue; } - // BOOL has to be excluded because it's not byte-aligned. - return type->id() != Type::BOOL && is_fixed_width(type->id()) && - extra_predicate(*type); + return is_fixed_width(type->id()) && extra_predicate(*type); } } return false; @@ -251,6 +243,10 @@ ARROW_EXPORT int64_t FixedWidthInBytes(const DataType& type); /// \brief Get the fixed-width in bits of a type if it is a fixed-width like /// type. /// +/// If the array is a FixedSizeList (of any level of nesting), the bit width of +/// the values is the product of all fixed-list sizes and the bit width of the +/// innermost fixed-width value type. +/// /// \return The bit-width of the values or -1 /// \see FixedWidthInBytes ARROW_EXPORT int64_t FixedWidthInBits(const DataType& type); @@ -260,7 +256,7 @@ namespace internal { /// \brief Allocate an ArrayData for a type that is fixed-width like. /// /// This function performs the same checks performed by -/// `IsFixedWidthLike(source, false)`. If `source.type` is not a simple +/// `IsFixedWidthLike(source, false, false)`. If `source.type` is not a simple /// fixed-width type, caller should make sure it passes the /// `IsFixedWidthLike(source)` checks. That guarantees that it's possible to /// allocate an array that can serve as a destination for a kernel that writes values @@ -280,18 +276,24 @@ ARROW_EXPORT Status PreallocateFixedWidthArrayData(::arrow::compute::KernelConte } // namespace internal -/// \brief Get the pointer to the fixed-width values of a fixed-width like array. +/// \brief Get the 0-7 residual offset in bits and the pointer to the fixed-width +/// values of a fixed-width like array. /// -/// This function might return NULLPTR if the type of the array is BOOL or -/// if the pre-conditions listed are not satisfied. The converse is not true -/// (i.e. not getting NULLPTR doesn't guarantee that source is a fixed-width -/// like array). +/// For byte-aligned types, the offset is always 0. /// /// \pre `IsFixedWidthLike(source)` or the more restrictive /// is_fixed_width(*mutable_array->type) SHOULD be true -/// \return The pointer to the fixed-width values of an array or NULLPTR -/// if pre-conditions are not satisfied. -ARROW_EXPORT const uint8_t* OffsetPointerOfFixedWidthValues(const ArraySpan& source); +/// \return A pair with the residual offset in bits (0-7) and the pointer +/// to the fixed-width values. +ARROW_EXPORT std::pair OffsetPointerOfFixedBitWidthValues( + const ArraySpan& source); + +/// \brief Get the pointer to the fixed-width values of a fixed-width like array. +/// +/// \pre `IsFixedWidthLike(source)` should be true and BOOL should be excluded +/// as each bool is 1-bit width making it impossible to produce a +/// byte-aligned pointer to the values in the general case. +ARROW_EXPORT const uint8_t* OffsetPointerOfFixedByteWidthValues(const ArraySpan& source); /// \brief Get the mutable pointer to the fixed-width values of an array /// allocated by PreallocateFixedWidthArrayData. diff --git a/cpp/src/arrow/util/fixed_width_test.cc b/cpp/src/arrow/util/fixed_width_test.cc index 2f05221ed6535..3b35de1b6bbeb 100644 --- a/cpp/src/arrow/util/fixed_width_test.cc +++ b/cpp/src/arrow/util/fixed_width_test.cc @@ -80,10 +80,7 @@ TEST_F(TestFixedWidth, IsFixedWidth) { TEST_F(TestFixedWidth, IsFixedWidthLike) { auto arr = ArraySpan{*fsl_bool_array_->data()}; - // bools wrapped by fixed-size-list are not fixed-width because the - // innermost data buffer is a bitmap and won't byte-align. - ASSERT_FALSE(IsFixedWidthLike(arr, /*force_null_count=*/false)); - ASSERT_FALSE(IsFixedWidthLike(arr, /*force_null_count=*/true)); + ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/false)); arr = ArraySpan{*fsl_int_array_->data()}; ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/false)); @@ -114,12 +111,12 @@ TEST_F(TestFixedWidth, IsFixedWidthLike) { arr = ArraySpan{*dict_string_array_->data()}; // Dictionaries are considered fixed-width by is_fixed_width(), but excluded - // by IsFixedWidthLike if exclude_dictionary=true. + // by IsFixedWidthLike if exclude_bool_and_dictionary=true. ASSERT_TRUE(IsFixedWidthLike(arr)); - ASSERT_TRUE( - IsFixedWidthLike(arr, /*force_null_count=*/false, /*exclude_dictionary=*/false)); - ASSERT_FALSE( - IsFixedWidthLike(arr, /*force_null_count=*/false, /*exclude_dictionary=*/true)); + ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/false, + /*exclude_bool_and_dictionary=*/false)); + ASSERT_FALSE(IsFixedWidthLike(arr, /*force_null_count=*/false, + /*exclude_bool_and_dictionary=*/true)); } TEST_F(TestFixedWidth, MeasureWidthInBytes) { @@ -184,9 +181,9 @@ TEST_F(TestFixedWidth, MeasureWidthInBits) { ASSERT_EQ(FixedWidthInBits(*varlen), -1); ASSERT_EQ(FixedWidthInBits(*varlen), -1); - ASSERT_EQ(FixedWidthInBits(*fsl(0, b)), -1); - ASSERT_EQ(FixedWidthInBits(*fsl(3, b)), -1); - ASSERT_EQ(FixedWidthInBits(*fsl(5, b)), -1); + ASSERT_EQ(FixedWidthInBits(*fsl(0, b)), 0); + ASSERT_EQ(FixedWidthInBits(*fsl(3, b)), 3); + ASSERT_EQ(FixedWidthInBits(*fsl(5, b)), 5); ASSERT_EQ(FixedWidthInBits(*fsl(0, i8)), 0); ASSERT_EQ(FixedWidthInBits(*fsl(3, i8)), 3 * 8); From 2ca9ad2861387a08244427eb1a2457c32a8ed31a Mon Sep 17 00:00:00 2001 From: Sarah Gilmore <74676073+sgilmore10@users.noreply.github.com> Date: Wed, 15 May 2024 10:00:48 -0400 Subject: [PATCH 109/261] GH-41653: [MATLAB] Add new `arrow.c.Array` MATLAB class which wraps a C Data Interface format `ArrowArray` C struct (#41655) ### Rationale for this change Now that the MATLAB interface has support for `arrow.tabular.RecordBatch` and `arrow.array.Array`, we should add support for the [C Data Interface](https://arrow.apache.org/docs/format/CDataInterface.html) format. The C Data Interface is based around two C struct definitions: (1) `ArrowArray` and (2) `ArrowSchema`. We should start by adding a new MATLAB class (e.g. `arrow.c.Array`) which wraps the underlying `ArrowArray` C struct. Later, we can add another new MATLAB class (e.g. `arrow.c.Schema`) which wraps the `ArrowSchema` C struct. Once we have added these two MATLAB classes, we can then add import and export functionality to share the Arrow memory between multiple language runtimes running in the same process. This would help enable workflows like sharing Arrow data between the MATLAB Interface to Arrow and `pyarrow` running within the MATLAB process via the [MATLAB interface to Python](https://www.mathworks.com/help/matlab/call-python-libraries.html)). ### What changes are included in this PR? 1. Added a new C++ proxy class called `arrow::matlab::c::proxy::Array` which wraps an `ArrowArray` `struct` pointer. This class is registered as the proxy `arrow.c.proxy.Array` in order to make it accessible to MATLAB. 2. Added a new MATLAB class called `arrow.c.Array` that has an `arrow.c.proxy.Array` instance. It has one public property named `Address`, which is a scalar `uint64`. This property is the memory address of the `ArrowArray` `struct` pointer owned by `arrow.c.proxy.Array`. ### Are these changes tested? Yes. 1. Added a new test class called `test/arrow/c/tArray.m`. 2. @ kevingurney and I created a prototype for importing and exporting arrow `Array`s via the C Data Interface format [here](https://github.com/mathworks/arrow/tree/arrow-array-address). We were able to share arrow `Array`s and `RecordBatch`es between mlarrow and pyarrow. Our plan now is to submit the necessary MATLAB code incrementally. ### Are there any user-facing changes? Yes. The `arrow.c.Array` class is user-facing. However, it's only intended for "advanced" use-cases. In the future, we may add higher-level functionality on top of the C Data Interface so that users don't need to interact with it directly. **NOTE:** On destruction, `arrow.c.proxy.Array` will check to see if the `ArrowArray` has already been consumed by an importer. If not, `arrow.c.proxy.Array`'s destructor will call the `release` callback on the `ArrowArray` to avoid memory leaks. To the best of our knowledge, this is similar to the how the [Arrow PyCapsule Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html) works. ### Future Directions 1. #41654 2. #41656 * GitHub Issue: #41653 Lead-authored-by: Sarah Gilmore Co-authored-by: Sarah Gilmore <74676073+sgilmore10@users.noreply.github.com> Co-authored-by: Kevin Gurney Signed-off-by: Sarah Gilmore --- matlab/src/cpp/arrow/matlab/c/proxy/array.cc | 49 +++++++++++++++++++ matlab/src/cpp/arrow/matlab/c/proxy/array.h | 41 ++++++++++++++++ matlab/src/cpp/arrow/matlab/proxy/factory.cc | 2 + matlab/src/matlab/+arrow/+c/Array.m | 37 ++++++++++++++ matlab/test/arrow/c/tArray.m | 48 ++++++++++++++++++ .../cmake/BuildMatlabArrowInterface.cmake | 3 +- 6 files changed, 179 insertions(+), 1 deletion(-) create mode 100644 matlab/src/cpp/arrow/matlab/c/proxy/array.cc create mode 100644 matlab/src/cpp/arrow/matlab/c/proxy/array.h create mode 100644 matlab/src/matlab/+arrow/+c/Array.m create mode 100644 matlab/test/arrow/c/tArray.m diff --git a/matlab/src/cpp/arrow/matlab/c/proxy/array.cc b/matlab/src/cpp/arrow/matlab/c/proxy/array.cc new file mode 100644 index 0000000000000..a5f3418f1bcfa --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/c/proxy/array.cc @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include "arrow/c/abi.h" + +#include "arrow/matlab/c/proxy/array.h" + +#include "libmexclass/proxy/Proxy.h" + +namespace arrow::matlab::c::proxy { + +Array::Array() : arrowArray{} { REGISTER_METHOD(Array, getAddress); } + +Array::~Array() { + if (arrowArray.release != NULL) { + arrowArray.release(&arrowArray); + arrowArray.release = NULL; + } +} + +libmexclass::proxy::MakeResult Array::make( + const libmexclass::proxy::FunctionArguments& constructor_arguments) { + return std::make_shared(); +} + +void Array::getAddress(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + + mda::ArrayFactory factory; + auto address = reinterpret_cast(&arrowArray); + context.outputs[0] = factory.createScalar(address); +} + +} // namespace arrow::matlab::c::proxy \ No newline at end of file diff --git a/matlab/src/cpp/arrow/matlab/c/proxy/array.h b/matlab/src/cpp/arrow/matlab/c/proxy/array.h new file mode 100644 index 0000000000000..b42b2dcd9cfa8 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/c/proxy/array.h @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/c/abi.h" + +#include "libmexclass/proxy/Proxy.h" + +namespace arrow::matlab::c::proxy { + +class Array : public libmexclass::proxy::Proxy { + public: + Array(); + + ~Array(); + + static libmexclass::proxy::MakeResult make( + const libmexclass::proxy::FunctionArguments& constructor_arguments); + + protected: + void getAddress(libmexclass::proxy::method::Context& context); + + struct ArrowArray arrowArray; + + // struct ArrowArray* arrowArray; +}; + +} // namespace arrow::matlab::c::proxy \ No newline at end of file diff --git a/matlab/src/cpp/arrow/matlab/proxy/factory.cc b/matlab/src/cpp/arrow/matlab/proxy/factory.cc index 23492f75deacc..cf13ed6aa57fa 100644 --- a/matlab/src/cpp/arrow/matlab/proxy/factory.cc +++ b/matlab/src/cpp/arrow/matlab/proxy/factory.cc @@ -25,6 +25,7 @@ #include "arrow/matlab/array/proxy/time64_array.h" #include "arrow/matlab/array/proxy/timestamp_array.h" #include "arrow/matlab/buffer/proxy/buffer.h" +#include "arrow/matlab/c/proxy/array.h" #include "arrow/matlab/error/error.h" #include "arrow/matlab/io/csv/proxy/table_reader.h" #include "arrow/matlab/io/csv/proxy/table_writer.h" @@ -99,6 +100,7 @@ libmexclass::proxy::MakeResult Factory::make_proxy( REGISTER_PROXY(arrow.io.feather.proxy.Reader , arrow::matlab::io::feather::proxy::Reader); REGISTER_PROXY(arrow.io.csv.proxy.TableWriter , arrow::matlab::io::csv::proxy::TableWriter); REGISTER_PROXY(arrow.io.csv.proxy.TableReader , arrow::matlab::io::csv::proxy::TableReader); + REGISTER_PROXY(arrow.c.proxy.Array , arrow::matlab::c::proxy::Array); // clang-format on return libmexclass::error::Error{error::UNKNOWN_PROXY_ERROR_ID, diff --git a/matlab/src/matlab/+arrow/+c/Array.m b/matlab/src/matlab/+arrow/+c/Array.m new file mode 100644 index 0000000000000..574fca9afebd8 --- /dev/null +++ b/matlab/src/matlab/+arrow/+c/Array.m @@ -0,0 +1,37 @@ +%ARRAY Wrapper for an Arrow C Data Interface format ArrowArray C struct pointer. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +classdef Array < matlab.mixin.Scalar + + properties (Hidden, SetAccess=private, GetAccess=public) + Proxy + end + + properties(Dependent, GetAccess=public, SetAccess=private) + Address(1, 1) uint64 + end + + methods + function obj = Array() + proxyName = "arrow.c.proxy.Array"; + obj.Proxy = arrow.internal.proxy.create(proxyName); + end + + function address = get.Address(obj) + address = obj.Proxy.getAddress(); + end + end +end \ No newline at end of file diff --git a/matlab/test/arrow/c/tArray.m b/matlab/test/arrow/c/tArray.m new file mode 100644 index 0000000000000..f8caf48065114 --- /dev/null +++ b/matlab/test/arrow/c/tArray.m @@ -0,0 +1,48 @@ +%TARRAY Defines unit tests for arrow.c.Array. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +classdef tArray < matlab.unittest.TestCase + + methods (Test) + function TestClassStructure(testCase) + array = arrow.c.Array(); + + % Verify array is an instance of arrow.c.Array. + testCase.verifyInstanceOf(array, "arrow.c.Array"); + + % Verify array has one public property named Address. + props = properties(array); + testCase.verifyEqual(props, {'Address'}); + end + + function TestAddressProperty(testCase) + array = arrow.c.Array(); + + % It's impossible to know what the value of Address will be. + % Just verify Address is a scalar uint64. + address = array.Address; + testCase.verifyInstanceOf(address, "uint64"); + testCase.verifyTrue(isscalar(address)); + end + + function TestAddressNoSetter(testCase) + % Verify the Address property is read-only. + array = arrow.c.Array(); + fcn = @() setfield(array, "Address", uint64(10)); + testCase.verifyError(fcn, "MATLAB:class:SetProhibited"); + end + end +end \ No newline at end of file diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake index e1641842ca8b9..7a8cf8f40358b 100644 --- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake +++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake @@ -75,7 +75,8 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/a "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/csv/proxy/table_writer.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/csv/proxy/table_reader.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/index/validate.cc" - "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/buffer/proxy/buffer.cc") + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/buffer/proxy/buffer.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/array.cc") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy") From 82045527b775d3847d3f34ebb51af852c76a2e44 Mon Sep 17 00:00:00 2001 From: Kevin Gurney Date: Wed, 15 May 2024 10:49:29 -0400 Subject: [PATCH 110/261] GH-41654: [MATLAB] Add new `arrow.c.Schema` MATLAB class which wraps a C Data Interface format `ArrowSchema` C struct (#41674) ### Rationale for this change Now that the MATLAB interface has support for `arrow.tabular.RecordBatch` and `arrow.array.Array`, we should add support for the [C Data Interface](https://arrow.apache.org/docs/format/CDataInterface.html) format. The C Data Interface is based around two C struct definitions: (1) `ArrowArray` and (2) `ArrowSchema`. Now that #41653 (add support for `arrow.c.Array`) has been addressed, we should add another new MATLAB class (e.g. `arrow.c.Schema`) which wraps the underlying `ArrowSchema` C struct. Once we have added these two MATLAB classes, we can then add import and export functionality to share the Arrow memory between multiple language runtimes running in the same process. This would help enable workflows like sharing Arrow data between the MATLAB Interface to Arrow and `pyarrow` running within the MATLAB process via the [MATLAB interface to Python](https://www.mathworks.com/help/matlab/call-python-libraries.html)). ### What changes are included in this PR? 1. Added a new C++ proxy class called `arrow::matlab::c::proxy::Schema` which wraps an `ArrowSchema` struct pointer. This class is registered as the proxy `arrow.c.proxy.Schema` in order to make it accessible to MATLAB. 2. Added a new MATLAB class called `arrow.c.Schema` that has an `arrow.c.proxy.Schema` instance. It has one public property named `Address`, which is a scalar `uint64`. This property is the memory address of the `ArrowSchema` struct pointer owned by `arrow.c.proxy.Schema`. ### Are these changes tested? Yes. 1. Added a new test class called `test/arrow/c/tSchema.m`. 2. @ sgilmore10 and I created a prototype for importing and exporting arrow `Array`s via the C Data Interface format [here](https://github.com/mathworks/arrow/tree/arrow-array-address). We were able to share arrow `Array`s and `RecordBatch`s between `mlarrow` and `pyarrow`. Our plan now is to submit the necessary MATLAB code incrementally. ### Are there any user-facing changes? Yes. 1. The `arrow.c.Schema` class is user-facing. However, it's only intended for "advanced" use-cases. In the future, we may add higher-level functionality on top of the C Data Interface so that users don't need to interact with it directly. 2. **NOTE**: On destruction, `arrow.c.proxy.Schema` will check to see if the `ArrowSchema` has already been consumed by an importer. If not, `arrow.c.proxy.Schema`'s destructor will call the release callback on the `ArrowSchema` to avoid memory leaks. To the best of our knowledge, this is similar to the how the [Arrow PyCapsule Interface](https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html) works. ### Future Directions 1. #41656 2. We should probably follow up with a PR to create shared infrastructure for `arrow.c.Array` and `arrow.c.Schema`, since they are almost identical in design and implementation. ### Notes 1. Thank you @ sgilmore10 for your help with this pull request! * GitHub Issue: #41654 Authored-by: Kevin Gurney Signed-off-by: Kevin Gurney --- matlab/src/cpp/arrow/matlab/c/proxy/array.h | 4 +- matlab/src/cpp/arrow/matlab/c/proxy/schema.cc | 49 +++++++++++++++++++ matlab/src/cpp/arrow/matlab/c/proxy/schema.h | 39 +++++++++++++++ matlab/src/cpp/arrow/matlab/proxy/factory.cc | 2 + matlab/src/matlab/+arrow/+c/Schema.m | 37 ++++++++++++++ matlab/test/arrow/c/tSchema.m | 48 ++++++++++++++++++ .../cmake/BuildMatlabArrowInterface.cmake | 3 +- 7 files changed, 178 insertions(+), 4 deletions(-) create mode 100644 matlab/src/cpp/arrow/matlab/c/proxy/schema.cc create mode 100644 matlab/src/cpp/arrow/matlab/c/proxy/schema.h create mode 100644 matlab/src/matlab/+arrow/+c/Schema.m create mode 100644 matlab/test/arrow/c/tSchema.m diff --git a/matlab/src/cpp/arrow/matlab/c/proxy/array.h b/matlab/src/cpp/arrow/matlab/c/proxy/array.h index b42b2dcd9cfa8..bb35807fcd015 100644 --- a/matlab/src/cpp/arrow/matlab/c/proxy/array.h +++ b/matlab/src/cpp/arrow/matlab/c/proxy/array.h @@ -34,8 +34,6 @@ class Array : public libmexclass::proxy::Proxy { void getAddress(libmexclass::proxy::method::Context& context); struct ArrowArray arrowArray; - - // struct ArrowArray* arrowArray; }; -} // namespace arrow::matlab::c::proxy \ No newline at end of file +} // namespace arrow::matlab::c::proxy diff --git a/matlab/src/cpp/arrow/matlab/c/proxy/schema.cc b/matlab/src/cpp/arrow/matlab/c/proxy/schema.cc new file mode 100644 index 0000000000000..7f239f5628720 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/c/proxy/schema.cc @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include "arrow/c/abi.h" + +#include "arrow/matlab/c/proxy/schema.h" + +#include "libmexclass/proxy/Proxy.h" + +namespace arrow::matlab::c::proxy { + +Schema::Schema() : arrowSchema{} { REGISTER_METHOD(Schema, getAddress); } + +Schema::~Schema() { + if (arrowSchema.release != NULL) { + arrowSchema.release(&arrowSchema); + arrowSchema.release = NULL; + } +} + +libmexclass::proxy::MakeResult Schema::make( + const libmexclass::proxy::FunctionArguments& constructor_arguments) { + return std::make_shared(); +} + +void Schema::getAddress(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + + mda::ArrayFactory factory; + auto address = reinterpret_cast(&arrowSchema); + context.outputs[0] = factory.createScalar(address); +} + +} // namespace arrow::matlab::c::proxy diff --git a/matlab/src/cpp/arrow/matlab/c/proxy/schema.h b/matlab/src/cpp/arrow/matlab/c/proxy/schema.h new file mode 100644 index 0000000000000..8f781ea9c7341 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/c/proxy/schema.h @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/c/abi.h" + +#include "libmexclass/proxy/Proxy.h" + +namespace arrow::matlab::c::proxy { + +class Schema : public libmexclass::proxy::Proxy { + public: + Schema(); + + ~Schema(); + + static libmexclass::proxy::MakeResult make( + const libmexclass::proxy::FunctionArguments& constructor_arguments); + + protected: + void getAddress(libmexclass::proxy::method::Context& context); + + struct ArrowSchema arrowSchema; +}; + +} // namespace arrow::matlab::c::proxy diff --git a/matlab/src/cpp/arrow/matlab/proxy/factory.cc b/matlab/src/cpp/arrow/matlab/proxy/factory.cc index cf13ed6aa57fa..d7a8fa9ac2e74 100644 --- a/matlab/src/cpp/arrow/matlab/proxy/factory.cc +++ b/matlab/src/cpp/arrow/matlab/proxy/factory.cc @@ -26,6 +26,7 @@ #include "arrow/matlab/array/proxy/timestamp_array.h" #include "arrow/matlab/buffer/proxy/buffer.h" #include "arrow/matlab/c/proxy/array.h" +#include "arrow/matlab/c/proxy/schema.h" #include "arrow/matlab/error/error.h" #include "arrow/matlab/io/csv/proxy/table_reader.h" #include "arrow/matlab/io/csv/proxy/table_writer.h" @@ -101,6 +102,7 @@ libmexclass::proxy::MakeResult Factory::make_proxy( REGISTER_PROXY(arrow.io.csv.proxy.TableWriter , arrow::matlab::io::csv::proxy::TableWriter); REGISTER_PROXY(arrow.io.csv.proxy.TableReader , arrow::matlab::io::csv::proxy::TableReader); REGISTER_PROXY(arrow.c.proxy.Array , arrow::matlab::c::proxy::Array); + REGISTER_PROXY(arrow.c.proxy.Schema , arrow::matlab::c::proxy::Schema); // clang-format on return libmexclass::error::Error{error::UNKNOWN_PROXY_ERROR_ID, diff --git a/matlab/src/matlab/+arrow/+c/Schema.m b/matlab/src/matlab/+arrow/+c/Schema.m new file mode 100644 index 0000000000000..29eba59016044 --- /dev/null +++ b/matlab/src/matlab/+arrow/+c/Schema.m @@ -0,0 +1,37 @@ +%SCHEMA Wrapper for an Arrow C Data Interface format ArrowSchema C struct pointer. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +classdef Schema < matlab.mixin.Scalar + + properties (Hidden, SetAccess=private, GetAccess=public) + Proxy + end + + properties(Dependent, GetAccess=public, SetAccess=private) + Address(1, 1) uint64 + end + + methods + function obj = Schema() + proxyName = "arrow.c.proxy.Schema"; + obj.Proxy = arrow.internal.proxy.create(proxyName); + end + + function address = get.Address(obj) + address = obj.Proxy.getAddress(); + end + end +end \ No newline at end of file diff --git a/matlab/test/arrow/c/tSchema.m b/matlab/test/arrow/c/tSchema.m new file mode 100644 index 0000000000000..16dcf1965b463 --- /dev/null +++ b/matlab/test/arrow/c/tSchema.m @@ -0,0 +1,48 @@ +%TSCHEMA Defines unit tests for arrow.c.Schema. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +classdef tSchema < matlab.unittest.TestCase + + methods (Test) + function TestClassStructure(testCase) + schema = arrow.c.Schema(); + + % Verify schema is an instance of arrow.c.Schema. + testCase.verifyInstanceOf(schema, "arrow.c.Schema"); + + % Verify schema has one public property named Address. + props = properties(schema); + testCase.verifyEqual(props, {'Address'}); + end + + function TestAddressProperty(testCase) + schema = arrow.c.Schema(); + + % It's impossible to know what the value of Address will be. + % Just verify Address is a scalar uint64. + address = schema.Address; + testCase.verifyInstanceOf(address, "uint64"); + testCase.verifyTrue(isscalar(address)); + end + + function TestAddressNoSetter(testCase) + % Verify the Address property is read-only. + schema = arrow.c.Schema(); + fcn = @() setfield(schema, "Address", uint64(10)); + testCase.verifyError(fcn, "MATLAB:class:SetProhibited"); + end + end +end \ No newline at end of file diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake index 7a8cf8f40358b..8f37bef77b859 100644 --- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake +++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake @@ -76,7 +76,8 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/a "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/csv/proxy/table_reader.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/index/validate.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/buffer/proxy/buffer.cc" - "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/array.cc") + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/array.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/schema.cc") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy") From edd62f75326c86edc22e705e13b0674acd7cc1c1 Mon Sep 17 00:00:00 2001 From: h-vetinari Date: Thu, 16 May 2024 02:07:45 +1100 Subject: [PATCH 111/261] GH-41581: [C++][CMake] correctly use Protobuf_PROTOC_EXECUTABLE (#41582) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #41581 * GitHub Issue: #41581 Lead-authored-by: H. Vetinari Co-authored-by: Raúl Cumplido Signed-off-by: Raúl Cumplido --- cpp/cmake_modules/FindProtobufAlt.cmake | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cpp/cmake_modules/FindProtobufAlt.cmake b/cpp/cmake_modules/FindProtobufAlt.cmake index f343b42f2b762..703e05c4731b6 100644 --- a/cpp/cmake_modules/FindProtobufAlt.cmake +++ b/cpp/cmake_modules/FindProtobufAlt.cmake @@ -31,6 +31,11 @@ endif() find_package(protobuf CONFIG ${find_package_args}) set(ProtobufAlt_FOUND ${protobuf_FOUND}) if(ProtobufAlt_FOUND) + if(Protobuf_PROTOC_EXECUTABLE) + # work around https://github.com/protocolbuffers/protobuf/issues/14576 + set_target_properties(protobuf::protoc PROPERTIES IMPORTED_LOCATION_RELEASE + "${Protobuf_PROTOC_EXECUTABLE}") + endif() set(ProtobufAlt_VERSION ${protobuf_VERSION}) set(ProtobufAlt_VERSION_MAJOR ${protobuf_VERSION_MAJOR}) set(ProtobufAlt_VERSION_MINOR ${protobuf_VERSION_MINOR}) From 63fddd7b2f12fb65ed5feff820a1913931773968 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 16 May 2024 00:54:12 +0900 Subject: [PATCH 112/261] GH-41660: [CI][Java] Restore devtoolset relatead GANDIVA_CXX_FLAGS (#41661) ### Rationale for this change Because #41451 removed devtoolset related flags unexpectedly. ### What changes are included in this PR? Restore devtoolset related flags. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #41660 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- ci/scripts/java_jni_manylinux_build.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ci/scripts/java_jni_manylinux_build.sh b/ci/scripts/java_jni_manylinux_build.sh index 4921ce170b7a9..6f3769751af42 100755 --- a/ci/scripts/java_jni_manylinux_build.sh +++ b/ci/scripts/java_jni_manylinux_build.sh @@ -35,6 +35,9 @@ echo "=== Clear output directories and leftovers ===" rm -rf ${build_dir} echo "=== Building Arrow C++ libraries ===" +devtoolset_version=$(rpm -qa "devtoolset-*-gcc" --queryformat %{VERSION} | \ + grep -o "^[0-9]*") +devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}" : ${ARROW_ACERO:=ON} export ARROW_ACERO : ${ARROW_BUILD_TESTS:=ON} @@ -55,7 +58,7 @@ export ARROW_ORC : ${VCPKG_ROOT:=/opt/vcpkg} : ${VCPKG_FEATURE_FLAGS:=-manifests} : ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-${CMAKE_BUILD_TYPE}}} -: ${GANDIVA_CXX_FLAGS:=-isystem;${devtoolset_include_cpp};-isystem;${devtoolset_include_cpp}/x86_64-redhat-linux;-isystem;-lpthread} +: ${GANDIVA_CXX_FLAGS:=-isystem;${devtoolset_include_cpp};-isystem;${devtoolset_include_cpp}/x86_64-redhat-linux;-lpthread} if [ "${ARROW_USE_CCACHE}" == "ON" ]; then echo "=== ccache statistics before build ===" From e1de9c52d5a60b2e2a314b8589170467fe36415d Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Wed, 15 May 2024 12:35:04 -0400 Subject: [PATCH 113/261] GH-41541: [Go][Parquet] Fix writer performance regression (#41638) ### Rationale for this change A performance regression was reported for the parquet writer since v14. Profiling revealed excessive allocations. This was due to us always adding the current offset to the current capacity when reserving, resulting in Reserve always performing a reallocate even when it didn't need to. ### What changes are included in this PR? `PooledBufferWriter` should only pass `nbytes` to the `Reserve` call, not `byteoffset + nbytes`. `BitWriter` should not be adding `b.offset` to the capacity when determining the new capacity. ### Are these changes tested? Yes. ### Are there any user-facing changes? No, only performance changes: Before: ```shell goos: linux goarch: amd64 pkg: github.com/apache/arrow/go/v17/parquet/pqarrow cpu: 12th Gen Intel(R) Core(TM) i7-12700H BenchmarkWriteColumn/int32_not_nullable-20 514 2127175 ns/op 1971.77 MB/s 5425676 B/op 239 allocs/op BenchmarkWriteColumn/int32_nullable-20 31 467352621 ns/op 8.97 MB/s 2210271923 B/op2350 allocs/op BenchmarkWriteColumn/int64_not_nullable-20 326 4132204 ns/op 2030.06 MB/s 5442976 B/op 265 allocs/op BenchmarkWriteColumn/int64_nullable-20 33 432764687 ns/op 19.38 MB/s 2100068812 B/op2384 allocs/op BenchmarkWriteColumn/float32_not_nullable-20 334 3540566 ns/op 1184.64 MB/s 5453079 B/op 1263 allocs/op BenchmarkWriteColumn/float32_nullable-20 6 492103646 ns/op 8.52 MB/s 2283305841 B/op3371 allocs/op BenchmarkWriteColumn/float64_not_nullable-20 241 4783268 ns/op 1753.74 MB/s 5498759 B/op 1292 allocs/op BenchmarkWriteColumn/float64_nullable-20 4 369619096 ns/op 22.70 MB/s 1725354454 B/op3401 allocs/op PASS ok github.com/apache/arrow/go/v17/parquet/pqarrow 40.862s ``` After: ```shell goos: linux goarch: amd64 pkg: github.com/apache/arrow/go/v17/parquet/pqarrow cpu: 12th Gen Intel(R) Core(TM) i7-12700H BenchmarkWriteColumn/int32_not_nullable-20 500 2136823 ns/op 1962.87 MB/s 5410591 B/op 240 allocs/op BenchmarkWriteColumn/int32_nullable-20 48 26604880 ns/op 157.65 MB/s 12053510 B/op 250 allocs/op BenchmarkWriteColumn/int64_not_nullable-20 340 3530509 ns/op 2376.03 MB/s 5439578 B/op 265 allocs/op BenchmarkWriteColumn/int64_nullable-20 44 27387334 ns/op 306.30 MB/s 11870305 B/op 260 allocs/op BenchmarkWriteColumn/float32_not_nullable-20 316 3479312 ns/op 1205.50 MB/s 5456685 B/op 1263 allocs/op BenchmarkWriteColumn/float32_nullable-20 50 25910872 ns/op 161.87 MB/s 12054582 B/op 1271 allocs/op BenchmarkWriteColumn/float64_not_nullable-20 249 4769664 ns/op 1758.74 MB/s 5486020 B/op 1292 allocs/op BenchmarkWriteColumn/float64_nullable-20 51 25496256 ns/op 329.01 MB/s 12140753 B/op 1284 allocs/op PASS ok github.com/apache/arrow/go/v17/parquet/pqarrow 11.492s ``` All of the nullable column cases average around a 16x-17x performance improvement. * GitHub Issue: #41541 Authored-by: Matt Topol Signed-off-by: Matt Topol --- go/parquet/internal/encoding/types.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/parquet/internal/encoding/types.go b/go/parquet/internal/encoding/types.go index 51f48c797488f..147c1746c515a 100644 --- a/go/parquet/internal/encoding/types.go +++ b/go/parquet/internal/encoding/types.go @@ -185,7 +185,7 @@ func (b *PooledBufferWriter) Reserve(nbytes int) { b.buf = bufferPool.Get().(*memory.Buffer) } - newCap := utils.Max(b.buf.Cap()+b.offset, 256) + newCap := utils.Max(b.buf.Cap(), 256) for newCap < b.pos+nbytes { newCap = bitutil.NextPowerOf2(newCap) } From e04f5b4b905cfc37b5eaeea2c34e51349ae562b9 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Wed, 15 May 2024 20:59:49 -0300 Subject: [PATCH 114/261] GH-41560: [C++] ChunkResolver: Implement ResolveMany and add unit tests (#41561) ### Rationale for this change I want `ResolveMany` to support me in the implementation of `Take` that doesn't `Concatenate` all the chunks from a `ChunkedArray` `values` parameter. ### What changes are included in this PR? - Implementation of `ChunkResolver::ResolveMany()` - Addition of missing unit tests for `ChunkResolver` ### Are these changes tested? Yes. By new unit tests. ### Are there any user-facing changes? No. `ChunkResolver` is an internal API at the moment (see #34535 for future plans). * GitHub Issue: #41560 Authored-by: Felipe Oliveira Carvalho Signed-off-by: Felipe Oliveira Carvalho --- cpp/src/arrow/chunk_resolver.cc | 80 +++++++- cpp/src/arrow/chunk_resolver.h | 128 +++++++++++- cpp/src/arrow/chunked_array_test.cc | 200 +++++++++++++++++++ cpp/src/arrow/compute/kernels/vector_sort.cc | 30 ++- 4 files changed, 407 insertions(+), 31 deletions(-) diff --git a/cpp/src/arrow/chunk_resolver.cc b/cpp/src/arrow/chunk_resolver.cc index 29bccb52658f8..55eec53ced1c7 100644 --- a/cpp/src/arrow/chunk_resolver.cc +++ b/cpp/src/arrow/chunk_resolver.cc @@ -19,14 +19,14 @@ #include #include +#include #include #include #include "arrow/array.h" #include "arrow/record_batch.h" -namespace arrow { -namespace internal { +namespace arrow::internal { namespace { template @@ -54,6 +54,51 @@ inline std::vector MakeChunksOffsets(const std::vector& chunks) { offsets[chunks.size()] = offset; return offsets; } + +/// \pre all the pre-conditions of ChunkResolver::ResolveMany() +/// \pre num_offsets - 1 <= std::numeric_limits::max() +template +void ResolveManyInline(size_t num_offsets, const int64_t* signed_offsets, + int64_t n_indices, const IndexType* logical_index_vec, + IndexType* out_chunk_index_vec, IndexType chunk_hint, + IndexType* out_index_in_chunk_vec) { + auto* offsets = reinterpret_cast(signed_offsets); + const auto num_chunks = static_cast(num_offsets - 1); + // chunk_hint in [0, num_offsets) per the precondition. + for (int64_t i = 0; i < n_indices; i++) { + const auto index = static_cast(logical_index_vec[i]); + if (index >= offsets[chunk_hint] && + (chunk_hint == num_chunks || index < offsets[chunk_hint + 1])) { + out_chunk_index_vec[i] = chunk_hint; // hint is correct! + continue; + } + // lo < hi is guaranteed by `num_offsets = chunks.size() + 1` + auto chunk_index = + ChunkResolver::Bisect(index, offsets, /*lo=*/0, /*hi=*/num_offsets); + chunk_hint = static_cast(chunk_index); + out_chunk_index_vec[i] = chunk_hint; + } + if (out_index_in_chunk_vec != NULLPTR) { + for (int64_t i = 0; i < n_indices; i++) { + auto logical_index = logical_index_vec[i]; + auto chunk_index = out_chunk_index_vec[i]; + // chunk_index is in [0, chunks.size()] no matter what the + // value of logical_index is, so it's always safe to dereference + // offset_ as it contains chunks.size()+1 values. + out_index_in_chunk_vec[i] = + logical_index - static_cast(offsets[chunk_index]); +#if defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) + // Make it more likely that Valgrind/ASAN can catch an invalid memory + // access by poisoning out_index_in_chunk_vec[i] when the logical + // index is out-of-bounds. + if (chunk_index == num_chunks) { + out_index_in_chunk_vec[i] = std::numeric_limits::max(); + } +#endif + } + } +} + } // namespace ChunkResolver::ChunkResolver(const ArrayVector& chunks) noexcept @@ -84,5 +129,32 @@ ChunkResolver& ChunkResolver::operator=(const ChunkResolver& other) noexcept { return *this; } -} // namespace internal -} // namespace arrow +void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint8_t* logical_index_vec, + uint8_t* out_chunk_index_vec, uint8_t chunk_hint, + uint8_t* out_index_in_chunk_vec) const { + ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec, + out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec); +} + +void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint32_t* logical_index_vec, + uint32_t* out_chunk_index_vec, uint32_t chunk_hint, + uint32_t* out_index_in_chunk_vec) const { + ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec, + out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec); +} + +void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint16_t* logical_index_vec, + uint16_t* out_chunk_index_vec, uint16_t chunk_hint, + uint16_t* out_index_in_chunk_vec) const { + ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec, + out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec); +} + +void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint64_t* logical_index_vec, + uint64_t* out_chunk_index_vec, uint64_t chunk_hint, + uint64_t* out_index_in_chunk_vec) const { + ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec, + out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec); +} + +} // namespace arrow::internal diff --git a/cpp/src/arrow/chunk_resolver.h b/cpp/src/arrow/chunk_resolver.h index c5dad1a17b18e..a2a3d5a864243 100644 --- a/cpp/src/arrow/chunk_resolver.h +++ b/cpp/src/arrow/chunk_resolver.h @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include #include "arrow/type_fwd.h" @@ -27,6 +29,8 @@ namespace arrow::internal { +struct ChunkResolver; + struct ChunkLocation { /// \brief Index of the chunk in the array of chunks /// @@ -36,8 +40,17 @@ struct ChunkLocation { /// \brief Index of the value in the chunk /// - /// The value is undefined if chunk_index >= chunks.size() + /// The value is UNDEFINED if chunk_index >= chunks.size() int64_t index_in_chunk = 0; + + ChunkLocation() = default; + + ChunkLocation(int64_t chunk_index, int64_t index_in_chunk) + : chunk_index(chunk_index), index_in_chunk(index_in_chunk) {} + + bool operator==(ChunkLocation other) const { + return chunk_index == other.chunk_index && index_in_chunk == other.index_in_chunk; + } }; /// \brief An utility that incrementally resolves logical indices into @@ -60,12 +73,35 @@ struct ARROW_EXPORT ChunkResolver { explicit ChunkResolver(const std::vector& chunks) noexcept; explicit ChunkResolver(const RecordBatchVector& batches) noexcept; + /// \brief Construct a ChunkResolver from a vector of chunks.size() + 1 offsets. + /// + /// The first offset must be 0 and the last offset must be the logical length of the + /// chunked array. Each offset before the last represents the starting logical index of + /// the corresponding chunk. + explicit ChunkResolver(std::vector offsets) noexcept + : offsets_(std::move(offsets)), cached_chunk_(0) { +#ifndef NDEBUG + assert(offsets_.size() >= 1); + assert(offsets_[0] == 0); + for (size_t i = 1; i < offsets_.size(); i++) { + assert(offsets_[i] >= offsets_[i - 1]); + } +#endif + } + ChunkResolver(ChunkResolver&& other) noexcept; ChunkResolver& operator=(ChunkResolver&& other) noexcept; ChunkResolver(const ChunkResolver& other) noexcept; ChunkResolver& operator=(const ChunkResolver& other) noexcept; + int64_t logical_array_length() const { return offsets_.back(); } + int64_t num_chunks() const { return static_cast(offsets_.size()) - 1; } + + int64_t chunk_length(int64_t chunk_index) const { + return offsets_[chunk_index + 1] - offsets_[chunk_index]; + } + /// \brief Resolve a logical index to a ChunkLocation. /// /// The returned ChunkLocation contains the chunk index and the within-chunk index @@ -81,7 +117,7 @@ struct ARROW_EXPORT ChunkResolver { const auto cached_chunk = cached_chunk_.load(std::memory_order_relaxed); const auto chunk_index = ResolveChunkIndex(index, cached_chunk); - return {chunk_index, index - offsets_[chunk_index]}; + return ChunkLocation{chunk_index, index - offsets_[chunk_index]}; } /// \brief Resolve a logical index to a ChunkLocation. @@ -97,12 +133,70 @@ struct ARROW_EXPORT ChunkResolver { /// \return ChunkLocation with a valid chunk_index if index is within /// bounds, or with chunk_index == chunks.size() if logical index is /// `>= chunked_array.length()`. - inline ChunkLocation ResolveWithChunkIndexHint(int64_t index, - ChunkLocation hint) const { + inline ChunkLocation ResolveWithHint(int64_t index, ChunkLocation hint) const { assert(hint.chunk_index < static_cast(offsets_.size())); const auto chunk_index = ResolveChunkIndex(index, hint.chunk_index); - return {chunk_index, index - offsets_[chunk_index]}; + return ChunkLocation{chunk_index, index - offsets_[chunk_index]}; + } + + /// \brief Resolve `n_indices` logical indices to chunk indices. + /// + /// \pre 0 <= logical_index_vec[i] < logical_array_length() + /// (for well-defined and valid chunk index results) + /// \pre out_chunk_index_vec has space for `n_indices` + /// \pre chunk_hint in [0, chunks.size()] + /// \post out_chunk_index_vec[i] in [0, chunks.size()] for i in [0, n) + /// \post if logical_index_vec[i] >= chunked_array.length(), then + /// out_chunk_index_vec[i] == chunks.size() + /// and out_index_in_chunk_vec[i] is UNDEFINED (can be out-of-bounds) + /// \post if logical_index_vec[i] < 0, then both out_chunk_index_vec[i] and + /// out_index_in_chunk_vec[i] are UNDEFINED + /// + /// \param n_indices The number of logical indices to resolve + /// \param logical_index_vec The logical indices to resolve + /// \param out_chunk_index_vec The output array where the chunk indices will be written + /// \param chunk_hint 0 or the last chunk_index produced by ResolveMany + /// \param out_index_in_chunk_vec If not NULLPTR, the output array where the + /// within-chunk indices will be written + /// \return false iff chunks.size() > std::numeric_limits::max() + template + [[nodiscard]] bool ResolveMany(int64_t n_indices, const IndexType* logical_index_vec, + IndexType* out_chunk_index_vec, IndexType chunk_hint = 0, + IndexType* out_index_in_chunk_vec = NULLPTR) const { + if constexpr (sizeof(IndexType) < sizeof(uint64_t)) { + // The max value returned by Bisect is `offsets.size() - 1` (= chunks.size()). + constexpr uint64_t kMaxIndexTypeValue = std::numeric_limits::max(); + // A ChunkedArray with enough empty chunks can make the index of a chunk + // exceed the logical index and thus the maximum value of IndexType. + const bool chunk_index_fits_on_type = + static_cast(offsets_.size() - 1) <= kMaxIndexTypeValue; + if (ARROW_PREDICT_FALSE(!chunk_index_fits_on_type)) { + return false; + } + // Since an index-in-chunk cannot possibly exceed the logical index being + // queried, we don't have to worry about these values not fitting on IndexType. + } + if constexpr (std::is_signed_v) { + // We interpret signed integers as unsigned and avoid having to generate double + // the amount of binary code to handle each integer width. + // + // Negative logical indices can become large values when cast to unsigned, and + // they are gracefully handled by ResolveManyImpl, but both the chunk index + // and the index in chunk values will be undefined in these cases. This + // happend because int8_t(-1) == uint8_t(255) and 255 could be a valid + // logical index in the chunked array. + using U = std::make_unsigned_t; + ResolveManyImpl(n_indices, reinterpret_cast(logical_index_vec), + reinterpret_cast(out_chunk_index_vec), + static_cast(chunk_hint), + reinterpret_cast(out_index_in_chunk_vec)); + } else { + static_assert(std::is_unsigned_v); + ResolveManyImpl(n_indices, logical_index_vec, out_chunk_index_vec, chunk_hint, + out_index_in_chunk_vec); + } + return true; } private: @@ -130,17 +224,33 @@ struct ARROW_EXPORT ChunkResolver { return chunk_index; } + /// \pre all the pre-conditions of ChunkResolver::ResolveMany() + /// \pre num_offsets - 1 <= std::numeric_limits::max() + void ResolveManyImpl(int64_t, const uint8_t*, uint8_t*, uint8_t, uint8_t*) const; + void ResolveManyImpl(int64_t, const uint16_t*, uint16_t*, uint16_t, uint16_t*) const; + void ResolveManyImpl(int64_t, const uint32_t*, uint32_t*, uint32_t, uint32_t*) const; + void ResolveManyImpl(int64_t, const uint64_t*, uint64_t*, uint64_t, uint64_t*) const; + + public: /// \brief Find the index of the chunk that contains the logical index. /// /// Any non-negative index is accepted. When `hi=num_offsets`, the largest /// possible return value is `num_offsets-1` which is equal to - /// `chunks.size()`. The is returned when the logical index is out-of-bounds. + /// `chunks.size()`. Which is returned when the logical index is greater or + /// equal the logical length of the chunked array. /// - /// \pre index >= 0 + /// \pre index >= 0 (otherwise, when index is negative, hi-1 is returned) /// \pre lo < hi /// \pre lo >= 0 && hi <= offsets_.size() static inline int64_t Bisect(int64_t index, const int64_t* offsets, int64_t lo, int64_t hi) { + return Bisect(static_cast(index), + reinterpret_cast(offsets), static_cast(lo), + static_cast(hi)); + } + + static inline int64_t Bisect(uint64_t index, const uint64_t* offsets, uint64_t lo, + uint64_t hi) { // Similar to std::upper_bound(), but slightly different as our offsets // array always starts with 0. auto n = hi - lo; @@ -148,8 +258,8 @@ struct ARROW_EXPORT ChunkResolver { // (lo < hi is guaranteed by the precondition). assert(n > 1 && "lo < hi is a precondition of Bisect"); do { - const int64_t m = n >> 1; - const int64_t mid = lo + m; + const uint64_t m = n >> 1; + const uint64_t mid = lo + m; if (index >= offsets[mid]) { lo = mid; n -= m; diff --git a/cpp/src/arrow/chunked_array_test.cc b/cpp/src/arrow/chunked_array_test.cc index 6ca52ab46ca68..e9cc283b53cd5 100644 --- a/cpp/src/arrow/chunked_array_test.cc +++ b/cpp/src/arrow/chunked_array_test.cc @@ -23,6 +23,7 @@ #include #include +#include "arrow/chunk_resolver.h" #include "arrow/scalar.h" #include "arrow/status.h" #include "arrow/testing/builder.h" @@ -34,6 +35,9 @@ namespace arrow { +using internal::ChunkLocation; +using internal::ChunkResolver; + class TestChunkedArray : public ::testing::Test { protected: virtual void Construct() { @@ -310,4 +314,200 @@ TEST_F(TestChunkedArray, GetScalar) { ASSERT_RAISES(IndexError, carr.GetScalar(7)); } +// ChunkResolver tests + +using IndexTypes = ::testing::Types; + +TEST(TestChunkResolver, Resolve) { + ChunkResolver empty(std::vector({0})); // [] + // ChunkLocation::index_in_chunk is undefined when chunk_index==chunks.size(), + // so only chunk_index is compared in these cases. + ASSERT_EQ(empty.Resolve(0).chunk_index, 0); + ASSERT_EQ(empty.Resolve(0).chunk_index, 0); + + ChunkResolver one(std::vector({0, 1})); // [[0]] + ASSERT_EQ(one.Resolve(1).chunk_index, 1); + ASSERT_EQ(one.Resolve(0), (ChunkLocation(0, 0))); + ASSERT_EQ(one.Resolve(1).chunk_index, 1); + + ChunkResolver one_and_empty(std::vector({0, 1, 1, 1})); // [[0], [], []] + ASSERT_EQ(one_and_empty.Resolve(3).chunk_index, 3); + ASSERT_EQ(one_and_empty.Resolve(2).chunk_index, 3); + ASSERT_EQ(one_and_empty.Resolve(1).chunk_index, 3); + ASSERT_EQ(one_and_empty.Resolve(0), (ChunkLocation(0, 0))); + ASSERT_EQ(one_and_empty.Resolve(1).chunk_index, 3); + ASSERT_EQ(one_and_empty.Resolve(2).chunk_index, 3); + ASSERT_EQ(one_and_empty.Resolve(3).chunk_index, 3); + + ChunkResolver one_one_one(std::vector({0, 1, 2, 3})); // [[0], [1], [2]] + ASSERT_EQ(one_one_one.Resolve(3).chunk_index, 3); + ASSERT_EQ(one_one_one.Resolve(2), (ChunkLocation(2, 0))); + ASSERT_EQ(one_one_one.Resolve(1), (ChunkLocation(1, 0))); + ASSERT_EQ(one_one_one.Resolve(0), (ChunkLocation(0, 0))); + ASSERT_EQ(one_one_one.Resolve(1), (ChunkLocation(1, 0))); + ASSERT_EQ(one_one_one.Resolve(2), (ChunkLocation(2, 0))); + ASSERT_EQ(one_one_one.Resolve(3).chunk_index, 3); + + ChunkResolver resolver(std::vector({0, 2, 3, 10})); // [[0, 1], [2], [3..9]] + ASSERT_EQ(resolver.Resolve(10).chunk_index, 3); + ASSERT_EQ(resolver.Resolve(9), (ChunkLocation(2, 6))); + ASSERT_EQ(resolver.Resolve(8), (ChunkLocation(2, 5))); + ASSERT_EQ(resolver.Resolve(4), (ChunkLocation(2, 1))); + ASSERT_EQ(resolver.Resolve(3), (ChunkLocation(2, 0))); + ASSERT_EQ(resolver.Resolve(2), (ChunkLocation(1, 0))); + ASSERT_EQ(resolver.Resolve(1), (ChunkLocation(0, 1))); + ASSERT_EQ(resolver.Resolve(0), (ChunkLocation(0, 0))); + ASSERT_EQ(resolver.Resolve(1), (ChunkLocation(0, 1))); + ASSERT_EQ(resolver.Resolve(2), (ChunkLocation(1, 0))); + ASSERT_EQ(resolver.Resolve(3), (ChunkLocation(2, 0))); + ASSERT_EQ(resolver.Resolve(4), (ChunkLocation(2, 1))); + ASSERT_EQ(resolver.Resolve(8), (ChunkLocation(2, 5))); + ASSERT_EQ(resolver.Resolve(9), (ChunkLocation(2, 6))); + ASSERT_EQ(resolver.Resolve(10).chunk_index, 3); +} + +template +class TestChunkResolverMany : public ::testing::Test { + public: + using IndexType = T; + + Result> ResolveMany( + const ChunkResolver& resolver, const std::vector& logical_index_vec) { + const size_t n = logical_index_vec.size(); + std::vector chunk_index_vec; + chunk_index_vec.resize(n); + std::vector index_in_chunk_vec; + index_in_chunk_vec.resize(n); + bool valid = resolver.ResolveMany( + static_cast(n), logical_index_vec.data(), chunk_index_vec.data(), 0, + index_in_chunk_vec.data()); + if (ARROW_PREDICT_FALSE(!valid)) { + return Status::Invalid("index type doesn't fit possible chunk indexes"); + } + std::vector locations; + locations.reserve(n); + for (size_t i = 0; i < n; i++) { + auto chunk_index = static_cast(chunk_index_vec[i]); + auto index_in_chunk = static_cast(index_in_chunk_vec[i]); + locations.emplace_back(chunk_index, index_in_chunk); + } + return locations; + } + + void CheckResolveMany(const ChunkResolver& resolver, + const std::vector& logical_index_vec) { + ASSERT_OK_AND_ASSIGN(auto locations, ResolveMany(resolver, logical_index_vec)); + EXPECT_EQ(logical_index_vec.size(), locations.size()); + for (size_t i = 0; i < logical_index_vec.size(); i++) { + IndexType logical_index = logical_index_vec[i]; + const auto expected = resolver.Resolve(logical_index); + ASSERT_LE(expected.chunk_index, resolver.num_chunks()); + if (expected.chunk_index == resolver.num_chunks()) { + // index_in_chunk is undefined in this case + ASSERT_EQ(locations[i].chunk_index, expected.chunk_index); + } else { + ASSERT_EQ(locations[i], expected); + } + } + } + + void TestBasics() { + std::vector logical_index_vec; + + ChunkResolver empty(std::vector({0})); // [] + logical_index_vec = {0, 0}; + CheckResolveMany(empty, logical_index_vec); + + ChunkResolver one(std::vector({0, 1})); // [[0]] + logical_index_vec = {1, 0, 1}; + CheckResolveMany(one, logical_index_vec); + + ChunkResolver one_and_empty(std::vector({0, 1, 1, 1})); // [[0], [], []] + logical_index_vec = {3, 2, 1, 0, 1, 2, 3}; + CheckResolveMany(one_and_empty, logical_index_vec); + + ChunkResolver one_one_one(std::vector({0, 1, 2, 3})); // [[0], [1], [2]] + logical_index_vec = {3, 2, 1, 0, 1, 2, 3}; + CheckResolveMany(one_one_one, logical_index_vec); + + ChunkResolver resolver(std::vector({0, 2, 3, 10})); // [[0, 1], [2], [3..9]] + logical_index_vec = {10, 9, 8, 4, 3, 2, 1, 0, 1, 2, 3, 4, 8, 9, 10}; + CheckResolveMany(resolver, logical_index_vec); + } + + void TestOutOfBounds() { + ChunkResolver resolver(std::vector({0, 2, 3, 10})); // [[0, 1], [2], [3..9]] + + std::vector logical_index_vec = {10, 11, 12, 13, 14, 13, 11, 10}; + ASSERT_OK_AND_ASSIGN(auto locations, ResolveMany(resolver, logical_index_vec)); + EXPECT_EQ(logical_index_vec.size(), locations.size()); + for (size_t i = 0; i < logical_index_vec.size(); i++) { + ASSERT_EQ(locations[i].chunk_index, resolver.num_chunks()); + } + + if constexpr (std::is_signed_v) { + std::vector logical_index_vec = {-1, -2, -3, -4, INT8_MIN}; + + ChunkResolver resolver(std::vector({0, 2, 128})); // [[0, 1], [2..127]] + ASSERT_OK_AND_ASSIGN(auto locations, ResolveMany(resolver, logical_index_vec)); + EXPECT_EQ(logical_index_vec.size(), locations.size()); + for (size_t i = 0; i < logical_index_vec.size(); i++) { + // All the negative indices are greater than resolver.logical_array_length()-1 + // when cast to uint8_t. + ASSERT_EQ(locations[i].chunk_index, resolver.num_chunks()); + } + + if constexpr (sizeof(IndexType) == 1) { + ChunkResolver resolver(std::vector( + {0, 2, 128, 129, 256})); // [[0, 1], [2..127], [128], [129, 255]] + ASSERT_OK_AND_ASSIGN(auto locations, ResolveMany(resolver, logical_index_vec)); + EXPECT_EQ(logical_index_vec.size(), locations.size()); + for (size_t i = 0; i < logical_index_vec.size(); i++) { + if constexpr (sizeof(IndexType) == 1) { + // All the negative 8-bit indices are SMALLER than + // resolver.logical_array_length()=256 when cast to 8-bit unsigned integers. + // So the resolved locations might look valid, but they should not be trusted. + ASSERT_LT(locations[i].chunk_index, resolver.num_chunks()); + } else { + // All the negative indices are greater than resolver.logical_array_length() + // when cast to 16/32/64-bit unsigned integers. + ASSERT_EQ(locations[i].chunk_index, resolver.num_chunks()); + } + } + } + } + } + + void TestOverflow() { + const int64_t kMaxIndex = std::is_signed_v ? 127 : 255; + std::vector logical_index_vec = {0, 1, 2, + static_cast(kMaxIndex)}; + + // Overflows are rare because to make them possible, we need more chunks + // than logical elements in the ChunkedArray. That requires at least one + // empty chunk. + std::vector offsets; + for (int64_t i = 0; i <= kMaxIndex; i++) { + offsets.push_back(i); + } + ChunkResolver resolver{offsets}; + ASSERT_OK(ResolveMany(resolver, logical_index_vec)); + + offsets.push_back(kMaxIndex); // adding an empty chunk + ChunkResolver resolver_with_empty{offsets}; + if (sizeof(IndexType) == 1) { + ASSERT_NOT_OK(ResolveMany(resolver_with_empty, logical_index_vec)); + } else { + ASSERT_OK(ResolveMany(resolver_with_empty, logical_index_vec)); + } + } +}; + +TYPED_TEST_SUITE(TestChunkResolverMany, IndexTypes); + +TYPED_TEST(TestChunkResolverMany, Basics) { this->TestBasics(); } +TYPED_TEST(TestChunkResolverMany, OutOfBounds) { this->TestOutOfBounds(); } +TYPED_TEST(TestChunkResolverMany, Overflow) { this->TestOverflow(); } + } // namespace arrow diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc index db2023ef04cad..ad22fa8d365c4 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort.cc +++ b/cpp/src/arrow/compute/kernels/vector_sort.cc @@ -747,15 +747,13 @@ class TableSorter { auto& comparator = comparator_; const auto& first_sort_key = sort_keys_[0]; - ChunkLocation left_loc{0, 0}; - ChunkLocation right_loc{0, 0}; + ChunkLocation left_loc; + ChunkLocation right_loc; std::merge(nulls_begin, nulls_middle, nulls_middle, nulls_end, temp_indices, [&](uint64_t left, uint64_t right) { // First column is either null or nan - left_loc = - left_resolver_.ResolveWithChunkIndexHint(left, /*hint=*/left_loc); - right_loc = - right_resolver_.ResolveWithChunkIndexHint(right, /*hint=*/right_loc); + left_loc = left_resolver_.ResolveWithHint(left, /*hint=*/left_loc); + right_loc = right_resolver_.ResolveWithHint(right, /*hint=*/right_loc); auto chunk_left = first_sort_key.GetChunk(left_loc); auto chunk_right = first_sort_key.GetChunk(right_loc); const auto left_is_null = chunk_left.IsNull(); @@ -786,15 +784,13 @@ class TableSorter { // Untyped implementation auto& comparator = comparator_; - ChunkLocation left_loc{0, 0}; - ChunkLocation right_loc{0, 0}; + ChunkLocation left_loc; + ChunkLocation right_loc; std::merge(nulls_begin, nulls_middle, nulls_middle, nulls_end, temp_indices, [&](uint64_t left, uint64_t right) { // First column is always null - left_loc = - left_resolver_.ResolveWithChunkIndexHint(left, /*hint=*/left_loc); - right_loc = - right_resolver_.ResolveWithChunkIndexHint(right, /*hint=*/right_loc); + left_loc = left_resolver_.ResolveWithHint(left, /*hint=*/left_loc); + right_loc = right_resolver_.ResolveWithHint(right, /*hint=*/right_loc); return comparator.Compare(left_loc, right_loc, 1); }); // Copy back temp area into main buffer @@ -812,15 +808,13 @@ class TableSorter { auto& comparator = comparator_; const auto& first_sort_key = sort_keys_[0]; - ChunkLocation left_loc{0, 0}; - ChunkLocation right_loc{0, 0}; + ChunkLocation left_loc; + ChunkLocation right_loc; std::merge(range_begin, range_middle, range_middle, range_end, temp_indices, [&](uint64_t left, uint64_t right) { // Both values are never null nor NaN. - left_loc = - left_resolver_.ResolveWithChunkIndexHint(left, /*hint=*/left_loc); - right_loc = - right_resolver_.ResolveWithChunkIndexHint(right, /*hint=*/right_loc); + left_loc = left_resolver_.ResolveWithHint(left, /*hint=*/left_loc); + right_loc = right_resolver_.ResolveWithHint(right, /*hint=*/right_loc); auto chunk_left = first_sort_key.GetChunk(left_loc); auto chunk_right = first_sort_key.GetChunk(right_loc); DCHECK(!chunk_left.IsNull()); From 084387c56e45bf7e8335c28e14a2e61b16515ad5 Mon Sep 17 00:00:00 2001 From: James Duong Date: Wed, 15 May 2024 17:22:34 -0700 Subject: [PATCH 115/261] GH-39204: [Format][FlightRPC][Docs] Stabilize Flight SQL (#41657) Update documentation, protobufs, and class documentation to remove experimental tags from Flight and Flight SQL documentation. ### Rationale for this change Flight SQL has been used by multiple databases now and has been voted as stable per the mailing list discussion: [https://lists.apache.org/thread/qoshg8mln3t2ovr90o1yklz4yrpv503h](url) ### What changes are included in this PR? Update protobuf, class comments, and user documentation to remove references to Flight and Flight SQL being experimental. This change excludes the UCX transport and the session option messages ### Are these changes tested? No, documentation only. ### Are there any user-facing changes? User documentation. * GitHub Issue: #39204 Authored-by: James Duong Signed-off-by: Sutou Kouhei --- cpp/src/arrow/flight/client.h | 8 +---- cpp/src/arrow/flight/cookie_internal.cc | 3 +- cpp/src/arrow/flight/middleware.h | 2 +- cpp/src/arrow/flight/server.h | 3 +- cpp/src/arrow/flight/server_middleware.h | 3 +- cpp/src/arrow/flight/sql/server.cc | 3 +- cpp/src/arrow/flight/sql/server.h | 3 +- .../flight/sql/server_session_middleware.h | 1 - cpp/src/arrow/flight/transport.h | 4 --- cpp/src/arrow/flight/types.h | 4 +-- cpp/src/arrow/flight/types_async.h | 4 --- docs/source/cpp/api/flightsql.rst | 2 -- docs/source/format/FlightSql.rst | 3 -- docs/source/java/overview.rst | 4 +-- format/FlightSql.proto | 32 ------------------- java/flight/flight-core/pom.xml | 2 +- .../arrow/flight/ServerSessionMiddleware.java | 2 -- java/flight/flight-sql/pom.xml | 2 +- 18 files changed, 12 insertions(+), 73 deletions(-) diff --git a/cpp/src/arrow/flight/client.h b/cpp/src/arrow/flight/client.h index 330fa8bad730d..613903108949e 100644 --- a/cpp/src/arrow/flight/client.h +++ b/cpp/src/arrow/flight/client.h @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -/// \brief Implementation of Flight RPC client. API should be -/// considered experimental for now +/// \brief Implementation of Flight RPC client. #pragma once @@ -177,7 +176,6 @@ class ARROW_FLIGHT_EXPORT FlightMetadataReader { }; /// \brief Client class for Arrow Flight RPC services. -/// API experimental for now class ARROW_FLIGHT_EXPORT FlightClient { public: ~FlightClient(); @@ -275,8 +273,6 @@ class ARROW_FLIGHT_EXPORT FlightClient { /// \param[in] options Per-RPC options /// \param[in] descriptor the dataset request /// \param[in] listener Callbacks for response and RPC completion - /// - /// This API is EXPERIMENTAL. void GetFlightInfoAsync(const FlightCallOptions& options, const FlightDescriptor& descriptor, std::shared_ptr> listener); @@ -288,8 +284,6 @@ class ARROW_FLIGHT_EXPORT FlightClient { /// \brief Asynchronous GetFlightInfo returning a Future. /// \param[in] options Per-RPC options /// \param[in] descriptor the dataset request - /// - /// This API is EXPERIMENTAL. arrow::Future GetFlightInfoAsync(const FlightCallOptions& options, const FlightDescriptor& descriptor); arrow::Future GetFlightInfoAsync(const FlightDescriptor& descriptor) { diff --git a/cpp/src/arrow/flight/cookie_internal.cc b/cpp/src/arrow/flight/cookie_internal.cc index 8f41106ebce5c..75a10d148bf47 100644 --- a/cpp/src/arrow/flight/cookie_internal.cc +++ b/cpp/src/arrow/flight/cookie_internal.cc @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -// Interfaces for defining middleware for Flight clients. Currently -// experimental. +// Interfaces for defining middleware for Flight clients. #include "arrow/flight/cookie_internal.h" #include "arrow/flight/client.h" diff --git a/cpp/src/arrow/flight/middleware.h b/cpp/src/arrow/flight/middleware.h index 84448097ff019..d717e396a8b68 100644 --- a/cpp/src/arrow/flight/middleware.h +++ b/cpp/src/arrow/flight/middleware.h @@ -16,7 +16,7 @@ // under the License. // Interfaces for defining middleware for Flight clients and -// servers. Currently experimental. +// servers. #pragma once diff --git a/cpp/src/arrow/flight/server.h b/cpp/src/arrow/flight/server.h index ffcffe12e3c78..8d73353ab16c1 100644 --- a/cpp/src/arrow/flight/server.h +++ b/cpp/src/arrow/flight/server.h @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -// Interfaces to use for defining Flight RPC servers. API should be considered -// experimental for now +// Interfaces to use for defining Flight RPC servers. #pragma once diff --git a/cpp/src/arrow/flight/server_middleware.h b/cpp/src/arrow/flight/server_middleware.h index 030f1a17c2100..3a3e6f8616ed6 100644 --- a/cpp/src/arrow/flight/server_middleware.h +++ b/cpp/src/arrow/flight/server_middleware.h @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -// Interfaces for defining middleware for Flight servers. Currently -// experimental. +// Interfaces for defining middleware for Flight servers. #pragma once diff --git a/cpp/src/arrow/flight/sql/server.cc b/cpp/src/arrow/flight/sql/server.cc index cae3542b4faf8..63d1f5c5225fa 100644 --- a/cpp/src/arrow/flight/sql/server.cc +++ b/cpp/src/arrow/flight/sql/server.cc @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -// Interfaces to use for defining Flight RPC servers. API should be considered -// experimental for now +// Interfaces to use for defining Flight RPC servers. // Platform-specific defines #include "arrow/flight/platform.h" diff --git a/cpp/src/arrow/flight/sql/server.h b/cpp/src/arrow/flight/sql/server.h index 7b5d71678f3de..7130e96987b89 100644 --- a/cpp/src/arrow/flight/sql/server.h +++ b/cpp/src/arrow/flight/sql/server.h @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -// Interfaces to use for defining Flight RPC servers. API should be considered -// experimental for now +// Interfaces to use for defining Flight RPC servers. #pragma once diff --git a/cpp/src/arrow/flight/sql/server_session_middleware.h b/cpp/src/arrow/flight/sql/server_session_middleware.h index 021793de3de32..6eb11041a08bd 100644 --- a/cpp/src/arrow/flight/sql/server_session_middleware.h +++ b/cpp/src/arrow/flight/sql/server_session_middleware.h @@ -16,7 +16,6 @@ // under the License. // Middleware for handling Flight SQL Sessions including session cookie handling. -// Currently experimental. #pragma once diff --git a/cpp/src/arrow/flight/transport.h b/cpp/src/arrow/flight/transport.h index 4029aa5223deb..4ce50534023fc 100644 --- a/cpp/src/arrow/flight/transport.h +++ b/cpp/src/arrow/flight/transport.h @@ -19,8 +19,6 @@ /// Internal (but not private) interface for implementing /// alternate network transports in Flight. /// -/// \warning EXPERIMENTAL. Subject to change. -/// /// To implement a transport, implement ServerTransport and /// ClientTransport, and register the desired URI schemes with /// TransportRegistry. Flight takes care of most of the per-RPC @@ -248,8 +246,6 @@ TransportRegistry* GetDefaultTransportRegistry(); /// Transport implementations may subclass this to store their own /// state, and stash an instance in a user-supplied AsyncListener via /// ClientTransport::GetAsyncRpc and ClientTransport::SetAsyncRpc. -/// -/// This API is EXPERIMENTAL. class ARROW_FLIGHT_EXPORT AsyncRpc { public: virtual ~AsyncRpc() = default; diff --git a/cpp/src/arrow/flight/types.h b/cpp/src/arrow/flight/types.h index b3df8377b8ffd..cdf03f21041ee 100644 --- a/cpp/src/arrow/flight/types.h +++ b/cpp/src/arrow/flight/types.h @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -// Data structure for Flight RPC. API should be considered experimental for now +// Data structure for Flight RPC. #pragma once @@ -1115,8 +1115,6 @@ std::string ToString(TransportStatusCode code); /// instead of trying to translate to Arrow Status. /// /// Currently, only attached to the Status passed to AsyncListener::OnFinish. -/// -/// This API is EXPERIMENTAL. class ARROW_FLIGHT_EXPORT TransportStatusDetail : public StatusDetail { public: constexpr static const char* kTypeId = "flight::TransportStatusDetail"; diff --git a/cpp/src/arrow/flight/types_async.h b/cpp/src/arrow/flight/types_async.h index a241e64fb4e49..d5ed48d8a6438 100644 --- a/cpp/src/arrow/flight/types_async.h +++ b/cpp/src/arrow/flight/types_async.h @@ -31,8 +31,6 @@ namespace arrow::flight { /// @{ /// \brief Non-templated state for an async RPC. -/// -/// This API is EXPERIMENTAL. class ARROW_FLIGHT_EXPORT AsyncListenerBase { public: AsyncListenerBase(); @@ -57,8 +55,6 @@ class ARROW_FLIGHT_EXPORT AsyncListenerBase { /// A single listener may not be used for multiple concurrent RPC /// calls. The application MUST hold the listener alive until /// OnFinish() is called and has finished. -/// -/// This API is EXPERIMENTAL. template class ARROW_FLIGHT_EXPORT AsyncListener : public AsyncListenerBase { public: diff --git a/docs/source/cpp/api/flightsql.rst b/docs/source/cpp/api/flightsql.rst index 565b605108d9f..0f49a76f20687 100644 --- a/docs/source/cpp/api/flightsql.rst +++ b/docs/source/cpp/api/flightsql.rst @@ -22,8 +22,6 @@ Arrow Flight SQL ================ -.. note:: Flight SQL is currently experimental and APIs are subject to change. - Common Types ============ diff --git a/docs/source/format/FlightSql.rst b/docs/source/format/FlightSql.rst index 181efce286e70..9c3523755f3ae 100644 --- a/docs/source/format/FlightSql.rst +++ b/docs/source/format/FlightSql.rst @@ -32,9 +32,6 @@ with any database that supports the necessary endpoints. Flight SQL clients wrap the underlying Flight client to provide methods for the new RPC methods described here. -.. warning:: Flight SQL is **experimental** and changes to the - protocol may still be made. - RPC Methods =========== diff --git a/docs/source/java/overview.rst b/docs/source/java/overview.rst index 9d9cbad8a26c1..7780ee32ec9bc 100644 --- a/docs/source/java/overview.rst +++ b/docs/source/java/overview.rst @@ -54,10 +54,10 @@ but some modules are JNI bindings to the C++ library. - (Experimental) A library for converting JDBC data to Arrow data. - Native * - flight-core - - (Experimental) An RPC mechanism for transferring ValueVectors. + - An RPC mechanism for transferring ValueVectors. - Native * - flight-sql - - (Experimental) Contains utility classes to expose Flight SQL semantics for clients and servers over Arrow Flight. + - Contains utility classes to expose Flight SQL semantics for clients and servers over Arrow Flight. - Native * - flight-integration-tests - Integration tests for Flight RPC. diff --git a/format/FlightSql.proto b/format/FlightSql.proto index bf3fcb6c3d229..6fca141d692a7 100644 --- a/format/FlightSql.proto +++ b/format/FlightSql.proto @@ -43,7 +43,6 @@ package arrow.flight.protocol.sql; * where there is one row per requested piece of metadata information. */ message CommandGetSqlInfo { - option (experimental) = true; /* * Values are modelled after ODBC's SQLGetInfo() function. This information is intended to provide @@ -1131,7 +1130,6 @@ enum Searchable { * The returned data should be ordered by data_type and then by type_name. */ message CommandGetXdbcTypeInfo { - option (experimental) = true; /* * Specifies the data type to search for the info. @@ -1153,7 +1151,6 @@ message CommandGetXdbcTypeInfo { * The returned data should be ordered by catalog_name. */ message CommandGetCatalogs { - option (experimental) = true; } /* @@ -1171,7 +1168,6 @@ message CommandGetCatalogs { * The returned data should be ordered by catalog_name, then db_schema_name. */ message CommandGetDbSchemas { - option (experimental) = true; /* * Specifies the Catalog to search for the tables. @@ -1219,7 +1215,6 @@ message CommandGetDbSchemas { * The returned data should be ordered by catalog_name, db_schema_name, table_name, then table_type, followed by table_schema if requested. */ message CommandGetTables { - option (experimental) = true; /* * Specifies the Catalog to search for the tables. @@ -1272,7 +1267,6 @@ message CommandGetTables { * The returned data should be ordered by table_type. */ message CommandGetTableTypes { - option (experimental) = true; } /* @@ -1293,7 +1287,6 @@ message CommandGetTableTypes { * The returned data should be ordered by catalog_name, db_schema_name, table_name, key_name, then key_sequence. */ message CommandGetPrimaryKeys { - option (experimental) = true; /* * Specifies the catalog to search for the table. @@ -1348,7 +1341,6 @@ enum UpdateDeleteRules { * update_rule and delete_rule returns a byte that is equivalent to actions declared on UpdateDeleteRules enum. */ message CommandGetExportedKeys { - option (experimental) = true; /* * Specifies the catalog to search for the foreign key table. @@ -1399,7 +1391,6 @@ message CommandGetExportedKeys { * - 4 = SET DEFAULT */ message CommandGetImportedKeys { - option (experimental) = true; /* * Specifies the catalog to search for the primary key table. @@ -1452,7 +1443,6 @@ message CommandGetImportedKeys { * - 4 = SET DEFAULT */ message CommandGetCrossReference { - option (experimental) = true; /** * The catalog name where the parent table is. @@ -1499,7 +1489,6 @@ message CommandGetCrossReference { * Request message for the "CreatePreparedStatement" action on a Flight SQL enabled backend. */ message ActionCreatePreparedStatementRequest { - option (experimental) = true; // The valid SQL string to create a prepared statement for. string query = 1; @@ -1512,7 +1501,6 @@ message ActionCreatePreparedStatementRequest { * An embedded message describing a Substrait plan to execute. */ message SubstraitPlan { - option (experimental) = true; // The serialized substrait.Plan to create a prepared statement for. // XXX(ARROW-16902): this is bytes instead of an embedded message @@ -1529,7 +1517,6 @@ message SubstraitPlan { * Request message for the "CreatePreparedSubstraitPlan" action on a Flight SQL enabled backend. */ message ActionCreatePreparedSubstraitPlanRequest { - option (experimental) = true; // The serialized substrait.Plan to create a prepared statement for. SubstraitPlan plan = 1; @@ -1548,7 +1535,6 @@ message ActionCreatePreparedSubstraitPlanRequest { * The result should be wrapped in a google.protobuf.Any message. */ message ActionCreatePreparedStatementResult { - option (experimental) = true; // Opaque handle for the prepared statement on the server. bytes prepared_statement_handle = 1; @@ -1570,7 +1556,6 @@ message ActionCreatePreparedStatementResult { * Closes server resources associated with the prepared statement handle. */ message ActionClosePreparedStatementRequest { - option (experimental) = true; // Opaque handle for the prepared statement on the server. bytes prepared_statement_handle = 1; @@ -1581,7 +1566,6 @@ message ActionClosePreparedStatementRequest { * Begins a transaction. */ message ActionBeginTransactionRequest { - option (experimental) = true; } /* @@ -1592,7 +1576,6 @@ message ActionBeginTransactionRequest { * FLIGHT_SQL_TRANSACTION_SUPPORT_SAVEPOINT. */ message ActionBeginSavepointRequest { - option (experimental) = true; // The transaction to which a savepoint belongs. bytes transaction_id = 1; @@ -1610,7 +1593,6 @@ message ActionBeginSavepointRequest { * The result should be wrapped in a google.protobuf.Any message. */ message ActionBeginTransactionResult { - option (experimental) = true; // Opaque handle for the transaction on the server. bytes transaction_id = 1; @@ -1626,7 +1608,6 @@ message ActionBeginTransactionResult { * The result should be wrapped in a google.protobuf.Any message. */ message ActionBeginSavepointResult { - option (experimental) = true; // Opaque handle for the savepoint on the server. bytes savepoint_id = 1; @@ -1641,7 +1622,6 @@ message ActionBeginSavepointResult { * invalidated, as are all associated savepoints. */ message ActionEndTransactionRequest { - option (experimental) = true; enum EndTransaction { END_TRANSACTION_UNSPECIFIED = 0; @@ -1667,7 +1647,6 @@ message ActionEndTransactionRequest { * savepoints created after the current savepoint. */ message ActionEndSavepointRequest { - option (experimental) = true; enum EndSavepoint { END_SAVEPOINT_UNSPECIFIED = 0; @@ -1702,7 +1681,6 @@ message ActionEndSavepointRequest { * - GetFlightInfo: execute the query. */ message CommandStatementQuery { - option (experimental) = true; // The SQL syntax. string query = 1; @@ -1729,7 +1707,6 @@ message CommandStatementQuery { * - DoPut: execute the query. */ message CommandStatementSubstraitPlan { - option (experimental) = true; // A serialized substrait.Plan SubstraitPlan plan = 1; @@ -1742,7 +1719,6 @@ message CommandStatementSubstraitPlan { * This should be used only once and treated as an opaque value, that is, clients should not attempt to parse this. */ message TicketStatementQuery { - option (experimental) = true; // Unique identifier for the instance of the statement to execute. bytes statement_handle = 1; @@ -1770,7 +1746,6 @@ message TicketStatementQuery { * - GetFlightInfo: execute the prepared statement instance. */ message CommandPreparedStatementQuery { - option (experimental) = true; // Opaque handle for the prepared statement on the server. bytes prepared_statement_handle = 1; @@ -1781,7 +1756,6 @@ message CommandPreparedStatementQuery { * for the RPC call DoPut to cause the server to execute the included SQL update. */ message CommandStatementUpdate { - option (experimental) = true; // The SQL syntax. string query = 1; @@ -1795,7 +1769,6 @@ message CommandStatementUpdate { * prepared statement handle as an update. */ message CommandPreparedStatementUpdate { - option (experimental) = true; // Opaque handle for the prepared statement on the server. bytes prepared_statement_handle = 1; @@ -1807,7 +1780,6 @@ message CommandPreparedStatementUpdate { * FlightData into the target destination. */ message CommandStatementIngest { - option (experimental) = true; // Options for table definition behavior message TableDefinitionOptions { @@ -1866,7 +1838,6 @@ message CommandStatementIngest { * in the request, containing results from the update. */ message DoPutUpdateResult { - option (experimental) = true; // The number of records updated. A return value of -1 represents // an unknown updated record count. @@ -1880,7 +1851,6 @@ message DoPutUpdateResult { * can continue as though the fields in this message were not provided or set to sensible default values. */ message DoPutPreparedStatementResult { - option (experimental) = true; // Represents a (potentially updated) opaque handle for the prepared statement on the server. // Because the handle could potentially be updated, any previous handles for this prepared @@ -1912,7 +1882,6 @@ message DoPutPreparedStatementResult { */ message ActionCancelQueryRequest { option deprecated = true; - option (experimental) = true; // The result of the GetFlightInfo RPC that initiated the query. // XXX(ARROW-16902): this must be a serialized FlightInfo, but is @@ -1931,7 +1900,6 @@ message ActionCancelQueryRequest { */ message ActionCancelQueryResult { option deprecated = true; - option (experimental) = true; enum CancelResult { // The cancellation status is unknown. Servers should avoid using diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml index 163b4c24031b1..4c1002ae75f04 100644 --- a/java/flight/flight-core/pom.xml +++ b/java/flight/flight-core/pom.xml @@ -20,7 +20,7 @@ flight-core jar Arrow Flight Core - (Experimental)An RPC mechanism for transferring ValueVectors. + An RPC mechanism for transferring ValueVectors. 1 diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ServerSessionMiddleware.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ServerSessionMiddleware.java index 7091caa5e98bc..af22cd8aade22 100644 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ServerSessionMiddleware.java +++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ServerSessionMiddleware.java @@ -26,8 +26,6 @@ /** * Middleware for handling Flight SQL Sessions including session cookie handling. - * - * Currently experimental. */ public class ServerSessionMiddleware implements FlightServerMiddleware { Factory factory; diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml index cf466ab1720cf..f5926d6e68485 100644 --- a/java/flight/flight-sql/pom.xml +++ b/java/flight/flight-sql/pom.xml @@ -20,7 +20,7 @@ flight-sql jar Arrow Flight SQL - (Experimental)Contains utility classes to expose Flight SQL semantics for clients and servers over Arrow Flight + Contains utility classes to expose Flight SQL semantics for clients and servers over Arrow Flight 1 From 1c15c88b4b62b19e7a226cc9e11946af1d9ac343 Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Thu, 16 May 2024 06:38:57 +0530 Subject: [PATCH 116/261] GH-40943: [Java] Implement RangeEqualsVisitor for StringView (#41636) ### Rationale for this change Adding `RangeEqualsVisitor` for StringView as discussed in https://github.com/apache/arrow/issues/40943. ### What changes are included in this PR? Including `RangeEqualsVisitor` visitor method and test cases to validate it. ### Are these changes tested? Yes ### Are there any user-facing changes? No * GitHub Issue: #40943 Authored-by: Vibhatha Abeykoon Signed-off-by: David Li --- .../vector/BaseVariableWidthViewVector.java | 21 +++-- .../vector/compare/RangeEqualsVisitor.java | 85 ++++++++++++++++++- .../compare/TestRangeEqualsVisitor.java | 71 ++++++++++++++-- 3 files changed, 161 insertions(+), 16 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java index 2f80775a48f58..ec700a0dc2592 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java @@ -46,7 +46,7 @@ */ public abstract class BaseVariableWidthViewVector extends BaseValueVector implements VariableWidthFieldVector { // A single element of a view comprises 16 bytes - protected static final int ELEMENT_SIZE = 16; + public static final int ELEMENT_SIZE = 16; public static final int INITIAL_VIEW_VALUE_ALLOCATION = 4096; private static final int INITIAL_BYTE_COUNT = INITIAL_VIEW_VALUE_ALLOCATION * ELEMENT_SIZE; private static final int MAX_BUFFER_SIZE = (int) Math.min(MAX_ALLOCATION_SIZE, Integer.MAX_VALUE); @@ -70,14 +70,14 @@ public abstract class BaseVariableWidthViewVector extends BaseValueVector implem * * */ // 12 byte unsigned int to track inline views - protected static final int INLINE_SIZE = 12; + public static final int INLINE_SIZE = 12; // The first 4 bytes of view are allocated for length - protected static final int LENGTH_WIDTH = 4; + public static final int LENGTH_WIDTH = 4; // The second 4 bytes of view are allocated for prefix width - protected static final int PREFIX_WIDTH = 4; + public static final int PREFIX_WIDTH = 4; // The third 4 bytes of view are allocated for buffer index - protected static final int BUF_INDEX_WIDTH = 4; - protected static final byte[] EMPTY_BYTE_ARRAY = new byte[]{}; + public static final int BUF_INDEX_WIDTH = 4; + public static final byte[] EMPTY_BYTE_ARRAY = new byte[]{}; protected ArrowBuf validityBuffer; // The view buffer is used to store the variable width view elements protected ArrowBuf viewBuffer; @@ -158,6 +158,15 @@ public ArrowBuf getDataBuffer() { return viewBuffer; } + /** + * Get the buffers that store the data for views in the vector. + * + * @return buffer + */ + public List getDataBuffers() { + return dataBuffers; + } + /** * BaseVariableWidthViewVector doesn't support offset buffer. * diff --git a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java index 56220d270fa9b..28da2a86a53c8 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/compare/RangeEqualsVisitor.java @@ -22,6 +22,7 @@ import java.util.List; import java.util.function.BiFunction; +import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.util.ByteFunctionHelpers; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.BaseFixedWidthVector; @@ -165,7 +166,10 @@ public Boolean visit(BaseLargeVariableWidthVector left, Range range) { @Override public Boolean visit(BaseVariableWidthViewVector left, Range range) { - throw new UnsupportedOperationException("View vectors are not supported."); + if (!validate(left)) { + return false; + } + return compareBaseVariableWidthViewVectors(range); } @Override @@ -450,6 +454,85 @@ protected boolean compareBaseLargeVariableWidthVectors(Range range) { return true; } + protected boolean compareBaseVariableWidthViewVectors(Range range) { + BaseVariableWidthViewVector leftVector = (BaseVariableWidthViewVector) left; + BaseVariableWidthViewVector rightVector = (BaseVariableWidthViewVector) right; + + final ArrowBuf leftViewBuffer = leftVector.getDataBuffer(); + final ArrowBuf rightViewBuffer = rightVector.getDataBuffer(); + + final int elementSize = BaseVariableWidthViewVector.ELEMENT_SIZE; + final int lengthWidth = BaseVariableWidthViewVector.LENGTH_WIDTH; + final int prefixWidth = BaseVariableWidthViewVector.PREFIX_WIDTH; + final int bufIndexWidth = BaseVariableWidthViewVector.BUF_INDEX_WIDTH; + + List leftDataBuffers = leftVector.getDataBuffers(); + List rightDataBuffers = rightVector.getDataBuffers(); + + for (int i = 0; i < range.getLength(); i++) { + int leftIndex = range.getLeftStart() + i; + int rightIndex = range.getRightStart() + i; + + boolean isNull = leftVector.isNull(leftIndex); + if (isNull != rightVector.isNull(rightIndex)) { + return false; + } + + if (isNull) { + continue; + } + + int startLeftByteOffset = leftIndex * elementSize; + + int startRightByteOffset = rightIndex * elementSize; + + int leftDataBufferValueLength = leftVector.getValueLength(leftIndex); + int rightDataBufferValueLength = rightVector.getValueLength(rightIndex); + + if (leftDataBufferValueLength != rightDataBufferValueLength) { + return false; + } + + if (leftDataBufferValueLength > BaseVariableWidthViewVector.INLINE_SIZE) { + // if the value is stored in the dataBuffers + int leftDataBufferIndex = leftViewBuffer.getInt(startLeftByteOffset + lengthWidth + prefixWidth); + int rightDataBufferIndex = rightViewBuffer.getInt(startRightByteOffset + lengthWidth + prefixWidth); + + final int leftDataOffset = + leftViewBuffer.getInt(startLeftByteOffset + lengthWidth + prefixWidth + bufIndexWidth); + final int rightDataOffset = + rightViewBuffer.getInt(startRightByteOffset + lengthWidth + prefixWidth + bufIndexWidth); + + ArrowBuf leftDataBuffer = leftDataBuffers.get(leftDataBufferIndex); + ArrowBuf rightDataBuffer = rightDataBuffers.get(rightDataBufferIndex); + + // check equality in the considered string stored in the dataBuffers + int retDataBuf = ByteFunctionHelpers.equal( + leftDataBuffer, leftDataOffset, leftDataOffset + leftDataBufferValueLength, + rightDataBuffer, rightDataOffset, rightDataOffset + rightDataBufferValueLength); + + if (retDataBuf == 0) { + return false; + } + } else { + // if the value is stored in the view + final int leftDataOffset = startLeftByteOffset + lengthWidth; + final int rightDataOffset = startRightByteOffset + lengthWidth; + + // check equality in the considered string stored in the view + int retDataBuf = ByteFunctionHelpers.equal( + leftViewBuffer, leftDataOffset, leftDataOffset + leftDataBufferValueLength, + rightViewBuffer, rightDataOffset, rightDataOffset + rightDataBufferValueLength); + + if (retDataBuf == 0) { + return false; + } + } + + } + return true; + } + protected boolean compareListVectors(Range range) { ListVector leftVector = (ListVector) left; ListVector rightVector = (ListVector) right; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java index ab8c6c634891e..c3e7ef8bf8b08 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java @@ -18,8 +18,8 @@ package org.apache.arrow.vector.compare; import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.Charset; import java.util.Arrays; @@ -33,6 +33,7 @@ import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.LargeVarCharVector; import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.ViewVarCharVector; import org.apache.arrow.vector.ZeroVector; import org.apache.arrow.vector.compare.util.ValueEpsilonEqualizers; import org.apache.arrow.vector.complex.DenseUnionVector; @@ -53,16 +54,16 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Before; -import org.junit.Ignore; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; public class TestRangeEqualsVisitor { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } @@ -71,8 +72,11 @@ public void init() { private static final byte[] STR1 = "AAAAA1".getBytes(utf8Charset); private static final byte[] STR2 = "BBBBBBBBB2".getBytes(utf8Charset); private static final byte[] STR3 = "CCCC3".getBytes(utf8Charset); + private static final byte[] STR4 = "12345678901234A".getBytes(utf8Charset); + private static final byte[] STR5 = "A2345678901234ABC".getBytes(utf8Charset); + private static final byte[] STR6 = "AB45678901234ABCD".getBytes(utf8Charset); - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -132,6 +136,55 @@ public void testBaseVariableVectorRangeEquals() { } } + @Test + public void testBaseVariableViewVectorRangeEquals() { + try (final ViewVarCharVector vector1 = new ViewVarCharVector("varchar", allocator); + final ViewVarCharVector vector2 = new ViewVarCharVector("varchar", allocator)) { + + setVector(vector1, STR1, STR2, STR4, STR3, STR2, STR5, STR1, STR6, STR1, STR2, STR4); + setVector(vector2, STR1, STR2, STR4, STR3, STR2, STR5, STR1, STR6, STR1, STR2, STR4); + + RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2); + // inclusion of long string in the middle + assertTrue(visitor.rangeEquals(new Range(1, 1, 3))); + assertFalse(visitor.rangeEquals(new Range(0, 1, 4))); + // inclusion of long string at the start + assertTrue(visitor.rangeEquals(new Range(2, 2, 4))); + assertFalse(visitor.rangeEquals(new Range(2, 5, 4))); + // inclusion of long string at the end + assertTrue(visitor.rangeEquals(new Range(4, 4, 4))); + // unequal range + assertTrue(visitor.rangeEquals(new Range(8, 0, 3))); + assertFalse(visitor.rangeEquals(new Range(4, 5, 3))); + + // checking the same ranges when nulls are set + + vector1.setNull(1); + vector2.setNull(1); + + vector1.setNull(3); + vector2.setNull(3); + + vector1.setNull(5); + vector2.setNull(5); + + vector1.setNull(9); + vector2.setNull(9); + + // inclusion of long string in the middle + assertTrue(visitor.rangeEquals(new Range(1, 1, 3))); + assertFalse(visitor.rangeEquals(new Range(0, 1, 4))); + // inclusion of long string at the start + assertTrue(visitor.rangeEquals(new Range(2, 2, 4))); + assertFalse(visitor.rangeEquals(new Range(2, 5, 4))); + // inclusion of long string at the end + assertTrue(visitor.rangeEquals(new Range(4, 4, 4))); + // unequal range + assertTrue(visitor.rangeEquals(new Range(8, 0, 3))); + assertFalse(visitor.rangeEquals(new Range(4, 5, 3))); + } + } + @Test public void testListVectorWithDifferentChild() { try (final ListVector vector1 = ListVector.empty("list", allocator); @@ -476,7 +529,7 @@ public void testDenseUnionVectorEquals() { } } - @Ignore + @Disabled @Test public void testEqualsWithOutTypeCheck() { try (final IntVector intVector = new IntVector("int", allocator); From 0574988e328d483446b2b758bbc8c26bf1c82196 Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Thu, 16 May 2024 14:18:31 +0530 Subject: [PATCH 117/261] GH-41287: [Java] ListViewVector Implementation (#41285) ### Rationale for this change Apache Arrow format defines ListView and this has been introduced into other language bindings and the objective of this PR is to provide the initial ListView support to Java by adding `ListViewVector`. ### Non-Goals The following list of issues propose the extended work depending on this PR. They were separated to streamline the implementation process. - [ ] https://github.com/apache/arrow/issues/41272 - [ ] https://github.com/apache/arrow/issues/41286 - [ ] https://github.com/apache/arrow/issues/41290 - [ ] https://github.com/apache/arrow/issues/41288 - [ ] https://github.com/apache/arrow/issues/41289 - [ ] https://github.com/apache/arrow/issues/41269 - [ ] https://github.com/apache/arrow/issues/41291 - [ ] https://github.com/apache/arrow/issues/41292 - [ ] https://github.com/apache/arrow/issues/41270 - [ ] https://github.com/apache/arrow/issues/41293 - [ ] https://github.com/apache/arrow/issues/41294 - [ ] https://github.com/apache/arrow/issues/41569 - [ ] https://github.com/apache/arrow/issues/41570 - [ ] https://github.com/apache/arrow/issues/41584 - [ ] https://github.com/apache/arrow/issues/41585 ### Are these changes tested? Yes ### Are there any user-facing changes? No * GitHub Issue: #41287 Lead-authored-by: Vibhatha Abeykoon Co-authored-by: Vibhatha Lakmal Abeykoon Signed-off-by: David Li --- .../binder/ColumnBinderArrowTypeVisitor.java | 5 + .../arrow/c/BufferImportTypeVisitor.java | 6 + .../jdbc/utils/AvaticaParameterBinder.java | 5 + .../arrow/driver/jdbc/utils/ConvertUtils.java | 5 + .../src/main/codegen/data/ArrowTypes.tdd | 5 + .../codegen/templates/UnionListWriter.java | 24 +- .../main/codegen/templates/UnionReader.java | 2 +- .../org/apache/arrow/vector/BufferLayout.java | 13 +- .../org/apache/arrow/vector/TypeLayout.java | 20 +- .../complex/BaseRepeatedValueViewVector.java | 405 ++++ .../arrow/vector/complex/ListViewVector.java | 872 +++++++++ .../vector/complex/impl/PromotableWriter.java | 30 + .../org/apache/arrow/vector/types/Types.java | 21 + .../arrow/vector/TestListViewVector.java | 1651 +++++++++++++++++ 14 files changed, 3059 insertions(+), 5 deletions(-) create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java create mode 100644 java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java create mode 100644 java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java index 7d50676688e0f..7420a8c23dd48 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java @@ -256,4 +256,9 @@ public ColumnBinder visit(ArrowType.Interval type) { public ColumnBinder visit(ArrowType.Duration type) { throw new UnsupportedOperationException("No column binder implemented for type " + type); } + + @Override + public ColumnBinder visit(ArrowType.ListView type) { + throw new UnsupportedOperationException("No column binder implemented for type " + type); + } } diff --git a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java index bc6139cc84c54..99873dadad242 100644 --- a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java +++ b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java @@ -53,6 +53,7 @@ import org.apache.arrow.vector.complex.UnionVector; import org.apache.arrow.vector.ipc.message.ArrowFieldNode; import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ArrowType.ListView; import org.apache.arrow.vector.util.DataSizeRoundingUtil; /** @@ -328,4 +329,9 @@ public List visit(ArrowType.Interval type) { public List visit(ArrowType.Duration type) { return Arrays.asList(maybeImportBitmap(type), importFixedBytes(type, 1, DurationVector.TYPE_WIDTH)); } + + @Override + public List visit(ListView type) { + throw new UnsupportedOperationException("Importing buffers for view type: " + type + " not supported"); + } } diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java index fd9127c226910..70a58ff440ed4 100644 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java +++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java @@ -254,6 +254,11 @@ public Boolean visit(ArrowType.Interval type) { public Boolean visit(ArrowType.Duration type) { return new DurationAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); } + + @Override + public Boolean visit(ArrowType.ListView type) { + throw new UnsupportedOperationException("Binding is not yet supported for type " + type); + } } } diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java index 93b5faaef32c7..6ec33fafcfa46 100644 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java +++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java @@ -274,6 +274,11 @@ public AvaticaParameter visit(ArrowType.Interval type) { public AvaticaParameter visit(ArrowType.Duration type) { return new DurationAvaticaParameterConverter(type).createParameter(field); } + + @Override + public AvaticaParameter visit(ArrowType.ListView type) { + throw new UnsupportedOperationException("AvaticaParameter not yet supported for type " + type); + } } } diff --git a/java/vector/src/main/codegen/data/ArrowTypes.tdd b/java/vector/src/main/codegen/data/ArrowTypes.tdd index 9fe40f2319bfd..72df4779793f0 100644 --- a/java/vector/src/main/codegen/data/ArrowTypes.tdd +++ b/java/vector/src/main/codegen/data/ArrowTypes.tdd @@ -129,6 +129,11 @@ name: "Duration", fields: [{name: "unit", type: short, valueType: TimeUnit}], complex: false + }, + { + name: "ListView", + fields: [], + complex: true } ] } diff --git a/java/vector/src/main/codegen/templates/UnionListWriter.java b/java/vector/src/main/codegen/templates/UnionListWriter.java index 5c0565ee27175..eeb964c055f71 100644 --- a/java/vector/src/main/codegen/templates/UnionListWriter.java +++ b/java/vector/src/main/codegen/templates/UnionListWriter.java @@ -26,7 +26,7 @@ import java.math.BigDecimal; <@pp.dropOutputFile /> -<#list ["List", "LargeList"] as listName> +<#list ["List", "ListView", "LargeList"] as listName> <@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/Union${listName}Writer.java" /> @@ -59,6 +59,10 @@ public class Union${listName}Writer extends AbstractFieldWriter { private static final int OFFSET_WIDTH = 4; + <#if listName = "ListView"> + private static final long SIZE_WIDTH = 4; + + public Union${listName}Writer(${listName}Vector vector) { this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance()); } @@ -193,6 +197,24 @@ public void endList() { setPosition(idx() + 1); listStarted = false; } + <#elseif listName == "ListView"> + @Override + public void startList() { + vector.startNewValue(idx()); + writer.setPosition(vector.getOffsetBuffer().getInt((idx()) * OFFSET_WIDTH)); + listStarted = true; + } + + @Override + public void endList() { + int sizeUptoIdx = 0; + for (int i = 0; i < idx(); i++) { + sizeUptoIdx += vector.getSizeBuffer().getInt(i * SIZE_WIDTH); + } + vector.getSizeBuffer().setInt(idx() * SIZE_WIDTH, writer.idx() - sizeUptoIdx); + setPosition(idx() + 1); + listStarted = false; + } <#else> @Override public void startList() { diff --git a/java/vector/src/main/codegen/templates/UnionReader.java b/java/vector/src/main/codegen/templates/UnionReader.java index 956bc91e9185c..243bd832255c2 100644 --- a/java/vector/src/main/codegen/templates/UnionReader.java +++ b/java/vector/src/main/codegen/templates/UnionReader.java @@ -39,7 +39,7 @@ @SuppressWarnings("unused") public class UnionReader extends AbstractFieldReader { - private static final int NUM_SUPPORTED_TYPES = 48; + private static final int NUM_SUPPORTED_TYPES = 49; private BaseReader[] readers = new BaseReader[NUM_SUPPORTED_TYPES]; public UnionVector data; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java index 9725693348a48..4eeb92a0c9199 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java @@ -28,12 +28,18 @@ public class BufferLayout { /** * Enumeration of the different logical types a buffer can have. + * Data buffer is common to most of the layouts. + * Offset buffer is used for variable width types. + * Validity buffer is used for nullable types. + * Type buffer is used for Union types. + * Size buffer is used for ListView and LargeListView types. */ public enum BufferType { DATA("DATA"), OFFSET("OFFSET"), VALIDITY("VALIDITY"), - TYPE("TYPE_ID"); + TYPE("TYPE_ID"), + SIZE("SIZE"); private final String name; @@ -57,6 +63,7 @@ public String getName() { private static final BufferLayout VALUES_32 = new BufferLayout(BufferType.DATA, 32); private static final BufferLayout VALUES_16 = new BufferLayout(BufferType.DATA, 16); private static final BufferLayout VALUES_8 = new BufferLayout(BufferType.DATA, 8); + private static final BufferLayout SIZE_BUFFER = new BufferLayout(BufferType.SIZE, 32); public static BufferLayout typeBuffer() { return TYPE_BUFFER; @@ -70,6 +77,10 @@ public static BufferLayout largeOffsetBuffer() { return LARGE_OFFSET_BUFFER; } + public static BufferLayout sizeBuffer() { + return SIZE_BUFFER; + } + /** * Returns a databuffer for the given bitwidth. Only supports powers of two between 8 and 128 * inclusive. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java index 18032528c86d8..ea92efdc55f61 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java @@ -101,7 +101,7 @@ public TypeLayout visit(Timestamp type) { } @Override - public TypeLayout visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) { + public TypeLayout visit(ArrowType.List type) { List vectors = asList( BufferLayout.validityVector(), BufferLayout.offsetBuffer() @@ -109,6 +109,16 @@ public TypeLayout visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) return new TypeLayout(vectors); } + @Override + public TypeLayout visit(ArrowType.ListView type) { + List vectors = asList( + BufferLayout.validityVector(), + BufferLayout.offsetBuffer(), + BufferLayout.sizeBuffer() + ); + return new TypeLayout(vectors); + } + @Override public TypeLayout visit(ArrowType.LargeList type) { List vectors = asList( @@ -312,11 +322,17 @@ public Integer visit(Timestamp type) { } @Override - public Integer visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) { + public Integer visit(ArrowType.List type) { // validity buffer + offset buffer return 2; } + @Override + public Integer visit(ArrowType.ListView type) { + // validity buffer + offset buffer + size buffer + return 3; + } + @Override public Integer visit(ArrowType.LargeList type) { // validity buffer + offset buffer diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java new file mode 100644 index 0000000000000..73a25738854f3 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java @@ -0,0 +1,405 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.complex; + +import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; + +import java.util.Collections; +import java.util.Iterator; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.util.CommonUtil; +import org.apache.arrow.util.Preconditions; +import org.apache.arrow.vector.AddOrGetResult; +import org.apache.arrow.vector.BaseFixedWidthVector; +import org.apache.arrow.vector.BaseValueVector; +import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.DensityAwareVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.NullVector; +import org.apache.arrow.vector.UInt4Vector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.ZeroVector; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.CallBack; +import org.apache.arrow.vector.util.OversizedAllocationException; +import org.apache.arrow.vector.util.SchemaChangeRuntimeException; + +public abstract class BaseRepeatedValueViewVector extends BaseValueVector + implements RepeatedValueVector, BaseListVector { + + public static final FieldVector DEFAULT_DATA_VECTOR = ZeroVector.INSTANCE; + public static final String DATA_VECTOR_NAME = "$data$"; + + public static final byte OFFSET_WIDTH = 4; + public static final byte SIZE_WIDTH = 4; + protected ArrowBuf offsetBuffer; + protected ArrowBuf sizeBuffer; + protected FieldVector vector; + protected final CallBack repeatedCallBack; + protected int valueCount; + protected long offsetAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH; + protected long sizeAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * SIZE_WIDTH; + private final String name; + + protected String defaultDataVectorName = DATA_VECTOR_NAME; + + protected BaseRepeatedValueViewVector(String name, BufferAllocator allocator, CallBack callBack) { + this(name, allocator, DEFAULT_DATA_VECTOR, callBack); + } + + protected BaseRepeatedValueViewVector( + String name, BufferAllocator allocator, FieldVector vector, CallBack callBack) { + super(allocator); + this.name = name; + this.offsetBuffer = allocator.getEmpty(); + this.sizeBuffer = allocator.getEmpty(); + this.vector = Preconditions.checkNotNull(vector, "data vector cannot be null"); + this.repeatedCallBack = callBack; + this.valueCount = 0; + } + + @Override + public String getName() { + return name; + } + + @Override + public boolean allocateNewSafe() { + boolean dataAlloc = false; + try { + allocateBuffers(); + dataAlloc = vector.allocateNewSafe(); + } catch (Exception e) { + clear(); + return false; + } finally { + if (!dataAlloc) { + clear(); + } + } + return dataAlloc; + } + + private void allocateBuffers() { + offsetBuffer = allocateBuffers(offsetAllocationSizeInBytes); + sizeBuffer = allocateBuffers(sizeAllocationSizeInBytes); + } + + private ArrowBuf allocateBuffers(final long size) { + final int curSize = (int) size; + ArrowBuf buffer = allocator.buffer(curSize); + buffer.readerIndex(0); + buffer.setZero(0, buffer.capacity()); + return buffer; + } + + @Override + public void reAlloc() { + reallocateBuffers(); + vector.reAlloc(); + } + + protected void reallocateBuffers() { + reallocOffsetBuffer(); + reallocSizeBuffer(); + } + + private void reallocOffsetBuffer() { + final long currentBufferCapacity = offsetBuffer.capacity(); + long newAllocationSize = currentBufferCapacity * 2; + if (newAllocationSize == 0) { + if (offsetAllocationSizeInBytes > 0) { + newAllocationSize = offsetAllocationSizeInBytes; + } else { + newAllocationSize = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH * 2; + } + } + + newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); + newAllocationSize = Math.min(newAllocationSize, (long) OFFSET_WIDTH * Integer.MAX_VALUE); + assert newAllocationSize >= 1; + + if (newAllocationSize > MAX_ALLOCATION_SIZE || newAllocationSize <= offsetBuffer.capacity()) { + throw new OversizedAllocationException("Unable to expand the buffer"); + } + + final ArrowBuf newBuf = allocator.buffer(newAllocationSize); + newBuf.setBytes(0, offsetBuffer, 0, currentBufferCapacity); + newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); + offsetBuffer.getReferenceManager().release(1); + offsetBuffer = newBuf; + offsetAllocationSizeInBytes = newAllocationSize; + } + + private void reallocSizeBuffer() { + final long currentBufferCapacity = sizeBuffer.capacity(); + long newAllocationSize = currentBufferCapacity * 2; + if (newAllocationSize == 0) { + if (sizeAllocationSizeInBytes > 0) { + newAllocationSize = sizeAllocationSizeInBytes; + } else { + newAllocationSize = INITIAL_VALUE_ALLOCATION * SIZE_WIDTH * 2; + } + } + + newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); + newAllocationSize = Math.min(newAllocationSize, (long) SIZE_WIDTH * Integer.MAX_VALUE); + assert newAllocationSize >= 1; + + if (newAllocationSize > MAX_ALLOCATION_SIZE || newAllocationSize <= sizeBuffer.capacity()) { + throw new OversizedAllocationException("Unable to expand the buffer"); + } + + final ArrowBuf newBuf = allocator.buffer(newAllocationSize); + newBuf.setBytes(0, sizeBuffer, 0, currentBufferCapacity); + newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); + sizeBuffer.getReferenceManager().release(1); + sizeBuffer = newBuf; + sizeAllocationSizeInBytes = newAllocationSize; + } + + @Override + public FieldVector getDataVector() { + return vector; + } + + @Override + public void setInitialCapacity(int numRecords) { + offsetAllocationSizeInBytes = (numRecords) * OFFSET_WIDTH; + sizeAllocationSizeInBytes = (numRecords) * SIZE_WIDTH; + if (vector instanceof BaseFixedWidthVector || vector instanceof BaseVariableWidthVector) { + vector.setInitialCapacity(numRecords * RepeatedValueVector.DEFAULT_REPEAT_PER_RECORD); + } else { + vector.setInitialCapacity(numRecords); + } + } + + @Override + public void setInitialCapacity(int numRecords, double density) { + if ((numRecords * density) >= Integer.MAX_VALUE) { + throw new OversizedAllocationException("Requested amount of memory is more than max allowed"); + } + + offsetAllocationSizeInBytes = numRecords * OFFSET_WIDTH; + sizeAllocationSizeInBytes = numRecords * SIZE_WIDTH; + + int innerValueCapacity = Math.max((int) (numRecords * density), 1); + + if (vector instanceof DensityAwareVector) { + ((DensityAwareVector) vector).setInitialCapacity(innerValueCapacity, density); + } else { + vector.setInitialCapacity(innerValueCapacity); + } + } + + /** + * Specialized version of setInitialTotalCapacity() for ListViewVector. + * This is used by some callers when they want to explicitly control and be + * conservative about memory allocated for inner data vector. + * This is very useful when we are working with memory constraints for a query + * and have a fixed amount of memory reserved for the record batch. + * In such cases, we are likely to face OOM or related problems when + * we reserve memory for a record batch with value count x and + * do setInitialCapacity(x) such that each vector allocates only + * what is necessary and not the default amount, but the multiplier + * forces the memory requirement to go beyond what was needed. + * + * @param numRecords value count + * @param totalNumberOfElements the total number of elements to allow + * for in this vector across all records. + */ + public void setInitialTotalCapacity(int numRecords, int totalNumberOfElements) { + offsetAllocationSizeInBytes = numRecords * OFFSET_WIDTH; + sizeAllocationSizeInBytes = numRecords * SIZE_WIDTH; + vector.setInitialCapacity(totalNumberOfElements); + } + + @Override + public int getValueCapacity() { + throw new UnsupportedOperationException( + "Get value capacity is not supported in RepeatedValueVector"); + } + + protected int getOffsetBufferValueCapacity() { + return capAtMaxInt(offsetBuffer.capacity() / OFFSET_WIDTH); + } + + protected int getSizeBufferValueCapacity() { + return capAtMaxInt(sizeBuffer.capacity() / SIZE_WIDTH); + } + + @Override + public int getBufferSize() { + if (valueCount == 0) { + return 0; + } + return (valueCount * OFFSET_WIDTH) + (valueCount * SIZE_WIDTH) + vector.getBufferSize(); + } + + @Override + public int getBufferSizeFor(int valueCount) { + if (valueCount == 0) { + return 0; + } + + int innerVectorValueCount = 0; + + for (int i = 0; i < valueCount; i++) { + innerVectorValueCount += sizeBuffer.getInt(i * SIZE_WIDTH); + } + + return (valueCount * OFFSET_WIDTH) + (valueCount * SIZE_WIDTH) + + vector.getBufferSizeFor(innerVectorValueCount); + } + + @Override + public Iterator iterator() { + return Collections.singleton(getDataVector()).iterator(); + } + + @Override + public void clear() { + offsetBuffer = releaseBuffer(offsetBuffer); + sizeBuffer = releaseBuffer(sizeBuffer); + vector.clear(); + valueCount = 0; + super.clear(); + } + + @Override + public void reset() { + offsetBuffer.setZero(0, offsetBuffer.capacity()); + sizeBuffer.setZero(0, sizeBuffer.capacity()); + vector.reset(); + valueCount = 0; + } + + @Override + public ArrowBuf[] getBuffers(boolean clear) { + return new ArrowBuf[0]; + } + + @Override + public int getValueCount() { + return valueCount; + } + + @Override + public void setValueCount(int valueCount) { + this.valueCount = valueCount; + while (valueCount > getOffsetBufferValueCapacity()) { + reallocateBuffers(); + } + final int childValueCount = valueCount == 0 ? 0 : getLengthOfChildVector(); + vector.setValueCount(childValueCount); + } + + protected int getLengthOfChildVector() { + int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0); + int minOffset = offsetBuffer.getInt(0); + for (int i = 0; i < valueCount; i++) { + int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH); + int currentSize = sizeBuffer.getInt(i * SIZE_WIDTH); + int currentSum = currentOffset + currentSize; + + maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum); + minOffset = Math.min(minOffset, currentOffset); + } + + return maxOffsetSizeSum - minOffset; + } + + protected int getLengthOfChildVectorByIndex(int index) { + int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0); + int minOffset = offsetBuffer.getInt(0); + for (int i = 0; i < index; i++) { + int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH); + int currentSize = sizeBuffer.getInt(i * SIZE_WIDTH); + int currentSum = currentOffset + currentSize; + + maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum); + minOffset = Math.min(minOffset, currentOffset); + } + + return maxOffsetSizeSum - minOffset; + } + + /** + * Initialize the data vector (and execute callback) if it hasn't already been done, + * returns the data vector. + */ + public AddOrGetResult addOrGetVector(FieldType fieldType) { + boolean created = false; + if (vector instanceof NullVector) { + vector = fieldType.createNewSingleVector(defaultDataVectorName, allocator, repeatedCallBack); + // returned vector must have the same field + created = true; + if (repeatedCallBack != null && + // not a schema change if changing from ZeroVector to ZeroVector + (fieldType.getType().getTypeID() != ArrowType.ArrowTypeID.Null)) { + repeatedCallBack.doWork(); + } + } + + if (vector.getField().getType().getTypeID() != fieldType.getType().getTypeID()) { + final String msg = String.format("Inner vector type mismatch. Requested type: [%s], actual type: [%s]", + fieldType.getType().getTypeID(), vector.getField().getType().getTypeID()); + throw new SchemaChangeRuntimeException(msg); + } + + return new AddOrGetResult<>((T) vector, created); + } + + protected void replaceDataVector(FieldVector v) { + vector.clear(); + vector = v; + } + + public abstract boolean isEmpty(int index); + + /** + * Start a new value at the given index. + * @param index the index to start the new value at + * @return the offset in the data vector where the new value starts + */ + public int startNewValue(int index) { + while (index >= getOffsetBufferValueCapacity()) { + reallocOffsetBuffer(); + } + while (index >= getSizeBufferValueCapacity()) { + reallocSizeBuffer(); + } + + if (index > 0) { + final int prevOffset = getLengthOfChildVectorByIndex(index); + offsetBuffer.setInt(index * OFFSET_WIDTH, prevOffset); + } + + setValueCount(index + 1); + return offsetBuffer.getInt(index * OFFSET_WIDTH); + } + + @Override + @Deprecated + public UInt4Vector getOffsetVector() { + throw new UnsupportedOperationException("There is no inner offset vector"); + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java new file mode 100644 index 0000000000000..b19691e7aaab7 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -0,0 +1,872 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.complex; + +import static java.util.Collections.singletonList; +import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; +import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; +import static org.apache.arrow.util.Preconditions.checkArgument; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.OutOfMemoryException; +import org.apache.arrow.memory.util.ArrowBufPointer; +import org.apache.arrow.memory.util.ByteFunctionHelpers; +import org.apache.arrow.memory.util.CommonUtil; +import org.apache.arrow.memory.util.hash.ArrowBufHasher; +import org.apache.arrow.vector.AddOrGetResult; +import org.apache.arrow.vector.BitVectorHelper; +import org.apache.arrow.vector.BufferBacked; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.compare.VectorVisitor; +import org.apache.arrow.vector.complex.impl.UnionListReader; +import org.apache.arrow.vector.complex.impl.UnionListViewWriter; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.ipc.message.ArrowFieldNode; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.CallBack; +import org.apache.arrow.vector.util.JsonStringArrayList; +import org.apache.arrow.vector.util.OversizedAllocationException; +import org.apache.arrow.vector.util.TransferPair; + +/** + * A list view vector contains lists of a specific type of elements. + * Its structure contains four elements. + *

    + *
  1. A validity buffer.
  2. + *
  3. An offset buffer, that denotes lists starts.
  4. + *
  5. A size buffer, that denotes lists ends.
  6. + *
  7. A child data vector that contains the elements of lists.
  8. + *
+ * The latter three are managed by its superclass. + */ + +/* +* TODO: consider merging the functionality in `BaseRepeatedValueVector` into this class. +*/ +public class ListViewVector extends BaseRepeatedValueViewVector implements PromotableVector { + + protected ArrowBuf validityBuffer; + protected UnionListReader reader; + private CallBack callBack; + protected Field field; + protected int validityAllocationSizeInBytes; + + public static ListViewVector empty(String name, BufferAllocator allocator) { + return new ListViewVector(name, allocator, FieldType.nullable(ArrowType.ListView.INSTANCE), null); + } + + /** + * Constructs a new instance. + * + * @param name The name of the instance. + * @param allocator The allocator to use for allocating/reallocating buffers. + * @param fieldType The type of this list. + * @param callBack A schema change callback. + */ + public ListViewVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { + this(new Field(name, fieldType, null), allocator, callBack); + } + + /** + * Constructs a new instance. + * + * @param field The field materialized by this vector. + * @param allocator The allocator to use for allocating/reallocating buffers. + * @param callBack A schema change callback. + */ + public ListViewVector(Field field, BufferAllocator allocator, CallBack callBack) { + super(field.getName(), allocator, callBack); + this.validityBuffer = allocator.getEmpty(); + this.field = field; + this.callBack = callBack; + this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); + } + + @Override + public void initializeChildrenFromFields(List children) { + checkArgument(children.size() == 1, + "ListViews have one child Field. Found: %s", children.isEmpty() ? "none" : children); + + Field field = children.get(0); + AddOrGetResult addOrGetVector = addOrGetVector(field.getFieldType()); + checkArgument(addOrGetVector.isCreated(), "Child vector already existed: %s", addOrGetVector.getVector()); + + addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren()); + this.field = new Field(this.field.getName(), this.field.getFieldType(), children); + } + + @Override + public void setInitialCapacity(int numRecords) { + validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); + super.setInitialCapacity(numRecords); + } + + /** + * Specialized version of setInitialCapacity() for ListViewVector. + * This is used by some callers when they want to explicitly control and be + * conservative about memory allocated for inner data vector. + * This is very useful when we are working with memory constraints for a query + * and have a fixed amount of memory reserved for the record batch. + * In such cases, we are likely to face OOM or related problems when + * we reserve memory for a record batch with value count x and + * do setInitialCapacity(x) such that each vector allocates only + * what is necessary and not the default amount, but the multiplier + * forces the memory requirement to go beyond what was needed. + * + * @param numRecords value count + * @param density density of ListViewVector. + * Density is the average size of a list per position in the ListViewVector. + * For example, a + * density value of 10 implies each position in the list + * vector has a list of 10 values. + * A density value of 0.1 implies out of 10 positions in + * the list vector, 1 position has a list of size 1, and + * the remaining positions are null (no lists) or empty lists. + * This helps in tightly controlling the memory we provision + * for inner data vector. + */ + @Override + public void setInitialCapacity(int numRecords, double density) { + validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); + super.setInitialCapacity(numRecords, density); + } + + /** + * Specialized version of setInitialTotalCapacity() for ListViewVector. + * This is used by some callers when they want to explicitly control and be + * conservative about memory allocated for inner data vector. + * This is very useful when we are working with memory constraints for a query + * and have a fixed amount of memory reserved for the record batch. + * In such cases, we are likely to face OOM or related problems when + * we reserve memory for a record batch with value count x and + * do setInitialCapacity(x) such that each vector allocates only + * what is necessary and not the default amount, but the multiplier + * forces the memory requirement to go beyond what was needed. + * + * @param numRecords value count + * @param totalNumberOfElements the total number of elements to allow + * for in this vector across all records. + */ + @Override + public void setInitialTotalCapacity(int numRecords, int totalNumberOfElements) { + validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); + super.setInitialTotalCapacity(numRecords, totalNumberOfElements); + } + + @Override + public List getChildrenFromFields() { + return singletonList(getDataVector()); + } + + /** + * Load the buffers associated with this Field. + * @param fieldNode the fieldNode + * @param ownBuffers the buffers for this Field (own buffers only, children not included) + */ + @Override + public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { + if (ownBuffers.size() != 3) { + throw new IllegalArgumentException("Illegal buffer count, expected " + + 3 + ", got: " + ownBuffers.size()); + } + + ArrowBuf bitBuffer = ownBuffers.get(0); + ArrowBuf offBuffer = ownBuffers.get(1); + ArrowBuf szBuffer = ownBuffers.get(2); + + validityBuffer.getReferenceManager().release(); + validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator); + offsetBuffer.getReferenceManager().release(); + offsetBuffer = offBuffer.getReferenceManager().retain(offBuffer, allocator); + sizeBuffer.getReferenceManager().release(); + sizeBuffer = szBuffer.getReferenceManager().retain(szBuffer, allocator); + + validityAllocationSizeInBytes = checkedCastToInt(validityBuffer.capacity()); + offsetAllocationSizeInBytes = offsetBuffer.capacity(); + sizeAllocationSizeInBytes = sizeBuffer.capacity(); + + valueCount = fieldNode.getLength(); + } + + /** + * Set the reader and writer indexes for the inner buffers. + */ + private void setReaderAndWriterIndex() { + validityBuffer.readerIndex(0); + offsetBuffer.readerIndex(0); + sizeBuffer.readerIndex(0); + if (valueCount == 0) { + validityBuffer.writerIndex(0); + offsetBuffer.writerIndex(0); + sizeBuffer.writerIndex(0); + } else { + validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount)); + offsetBuffer.writerIndex(valueCount * OFFSET_WIDTH); + sizeBuffer.writerIndex(valueCount * SIZE_WIDTH); + } + } + + @Override + public List getFieldBuffers() { + List result = new ArrayList<>(2); + setReaderAndWriterIndex(); + result.add(validityBuffer); + result.add(offsetBuffer); + result.add(sizeBuffer); + + return result; + } + + /** + * Export the buffers of the fields for C Data Interface. + * This method traverses the buffers and export buffer and buffer's memory address into a list of + * buffers and a pointer to the list of buffers. + */ + @Override + public void exportCDataBuffers(List buffers, ArrowBuf buffersPtr, long nullValue) { + throw new UnsupportedOperationException("exportCDataBuffers Not implemented yet"); + } + + @Override + public void allocateNew() throws OutOfMemoryException { + if (!allocateNewSafe()) { + throw new OutOfMemoryException("Failure while allocating memory"); + } + } + + @Override + public boolean allocateNewSafe() { + boolean success = false; + try { + /* release the current buffers, hence this is a new allocation + * Note that, the `clear` method call below is releasing validityBuffer + * calling the superclass clear method which is releasing the associated buffers + * (sizeBuffer and offsetBuffer). + */ + clear(); + /* allocate validity buffer */ + allocateValidityBuffer(validityAllocationSizeInBytes); + /* allocate offset, data and sizes buffer */ + success = super.allocateNewSafe(); + } finally { + if (!success) { + clear(); + } + } + return success; + } + + protected void allocateValidityBuffer(final long size) { + final int curSize = (int) size; + validityBuffer = allocator.buffer(curSize); + validityBuffer.readerIndex(0); + validityAllocationSizeInBytes = curSize; + validityBuffer.setZero(0, validityBuffer.capacity()); + } + + @Override + public void reAlloc() { + /* reallocate the validity buffer */ + reallocValidityBuffer(); + /* reallocate the offset, size, and data */ + super.reAlloc(); + } + + protected void reallocValidityAndSizeAndOffsetBuffers() { + reallocateBuffers(); + reallocValidityBuffer(); + } + + private void reallocValidityBuffer() { + final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity()); + long newAllocationSize = getNewAllocationSize(currentBufferCapacity); + + final ArrowBuf newBuf = allocator.buffer(newAllocationSize); + newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity); + newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); + validityBuffer.getReferenceManager().release(1); + validityBuffer = newBuf; + validityAllocationSizeInBytes = (int) newAllocationSize; + } + + private long getNewAllocationSize(int currentBufferCapacity) { + long newAllocationSize = currentBufferCapacity * 2L; + if (newAllocationSize == 0) { + if (validityAllocationSizeInBytes > 0) { + newAllocationSize = validityAllocationSizeInBytes; + } else { + newAllocationSize = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION) * 2L; + } + } + newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); + assert newAllocationSize >= 1; + + if (newAllocationSize > MAX_ALLOCATION_SIZE) { + throw new OversizedAllocationException("Unable to expand the buffer"); + } + return newAllocationSize; + } + + @Override + public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { + // TODO: https://github.com/apache/arrow/issues/41270 + throw new UnsupportedOperationException( + "ListViewVector does not support copyFromSafe operation yet."); + } + + @Override + public void copyFrom(int inIndex, int outIndex, ValueVector from) { + // TODO: https://github.com/apache/arrow/issues/41270 + throw new UnsupportedOperationException( + "ListViewVector does not support copyFrom operation yet."); + } + + @Override + public FieldVector getDataVector() { + return vector; + } + + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator) { + return getTransferPair(ref, allocator, null); + } + + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return getTransferPair(field, allocator, null); + } + + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { + // TODO: https://github.com/apache/arrow/issues/41269 + throw new UnsupportedOperationException( + "ListVector does not support getTransferPair(String, BufferAllocator, CallBack) yet"); + } + + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { + // TODO: https://github.com/apache/arrow/issues/41269 + throw new UnsupportedOperationException( + "ListVector does not support getTransferPair(Field, BufferAllocator, CallBack) yet"); + } + + @Override + public TransferPair makeTransferPair(ValueVector target) { + // TODO: https://github.com/apache/arrow/issues/41269 + throw new UnsupportedOperationException( + "ListVector does not support makeTransferPair(ValueVector) yet"); + } + + @Override + public long getValidityBufferAddress() { + return validityBuffer.memoryAddress(); + } + + @Override + public long getDataBufferAddress() { + throw new UnsupportedOperationException(); + } + + @Override + public long getOffsetBufferAddress() { + return offsetBuffer.memoryAddress(); + } + + @Override + public ArrowBuf getValidityBuffer() { + return validityBuffer; + } + + @Override + public ArrowBuf getDataBuffer() { + throw new UnsupportedOperationException(); + } + + @Override + public ArrowBuf getOffsetBuffer() { + return offsetBuffer; + } + + public ArrowBuf getSizeBuffer() { + return sizeBuffer; + } + + public long getSizeBufferAddress() { + return sizeBuffer.memoryAddress(); + } + + /** + * Get the hash code for the element at the given index. + * @param index position of the element + * @return hash code for the element at the given index + */ + @Override + public int hashCode(int index) { + return hashCode(index, null); + } + + /** + * Get the hash code for the element at the given index. + * @param index position of the element + * @param hasher hasher to use + * @return hash code for the element at the given index + */ + @Override + public int hashCode(int index, ArrowBufHasher hasher) { + if (isSet(index) == 0) { + return ArrowBufPointer.NULL_HASH_CODE; + } + int hash = 0; + final int start = offsetBuffer.getInt(index * OFFSET_WIDTH); + final int end = sizeBuffer.getInt(index * OFFSET_WIDTH); + for (int i = start; i < end; i++) { + hash = ByteFunctionHelpers.combineHash(hash, vector.hashCode(i, hasher)); + } + return hash; + } + + @Override + public OUT accept(VectorVisitor visitor, IN value) { + throw new UnsupportedOperationException(); + } + + @Override + protected FieldReader getReaderImpl() { + // TODO: https://github.com/apache/arrow/issues/41569 + throw new UnsupportedOperationException( + "ListViewVector does not support getReaderImpl operation yet."); + } + + @Override + public UnionListReader getReader() { + // TODO: https://github.com/apache/arrow/issues/41569 + throw new UnsupportedOperationException( + "ListViewVector does not support getReader operation yet."); + } + + /** + * Get the size (number of bytes) of underlying buffers used by this + * vector. + * @return size of underlying buffers. + */ + @Override + public int getBufferSize() { + if (valueCount == 0) { + return 0; + } + final int offsetBufferSize = valueCount * OFFSET_WIDTH; + final int sizeBufferSize = valueCount * SIZE_WIDTH; + final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); + return offsetBufferSize + sizeBufferSize + validityBufferSize + vector.getBufferSize(); + } + + /** + * Get the size (number of bytes) of underlying buffers used by this. + * @param valueCount the number of values to assume this vector contains + * @return size of underlying buffers. + */ + @Override + public int getBufferSizeFor(int valueCount) { + if (valueCount == 0) { + return 0; + } + final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); + + return super.getBufferSizeFor(valueCount) + validityBufferSize; + } + + /** + * Get the field associated with the list view vector. + * @return the field + */ + @Override + public Field getField() { + if (field.getChildren().contains(getDataVector().getField())) { + return field; + } + field = new Field(field.getName(), field.getFieldType(), Collections.singletonList(getDataVector().getField())); + return field; + } + + /** + * Get the minor type for the vector. + * @return the minor type + */ + @Override + public MinorType getMinorType() { + return MinorType.LISTVIEW; + } + + /** + * Clear the vector data. + */ + @Override + public void clear() { + // calling superclass clear method which is releasing the sizeBufer and offsetBuffer + super.clear(); + validityBuffer = releaseBuffer(validityBuffer); + } + + /** + * Release the buffers associated with this vector. + */ + @Override + public void reset() { + super.reset(); + validityBuffer.setZero(0, validityBuffer.capacity()); + } + + /** + * Return the underlying buffers associated with this vector. Note that this doesn't + * impact the reference counts for this buffer, so it only should be used for in-context + * access. Also note that this buffer changes regularly, thus + * external classes shouldn't hold a reference to it (unless they change it). + * + * @param clear Whether to clear vector before returning, the buffers will still be refcounted + * but the returned array will be the only reference to them + * @return The underlying {@link ArrowBuf buffers} that is used by this + * vector instance. + */ + @Override + public ArrowBuf[] getBuffers(boolean clear) { + setReaderAndWriterIndex(); + final ArrowBuf[] buffers; + if (getBufferSize() == 0) { + buffers = new ArrowBuf[0]; + } else { + List list = new ArrayList<>(); + // the order must be validity, offset and size buffers + list.add(validityBuffer); + list.add(offsetBuffer); + list.add(sizeBuffer); + list.addAll(Arrays.asList(vector.getBuffers(false))); + buffers = list.toArray(new ArrowBuf[list.size()]); + } + if (clear) { + for (ArrowBuf buffer : buffers) { + buffer.getReferenceManager().retain(); + } + clear(); + } + return buffers; + } + + /** + * Get the element in the list view vector at a particular index. + * @param index position of the element + * @return Object at given position + */ + @Override + public List getObject(int index) { + if (isSet(index) == 0) { + return null; + } + final List vals = new JsonStringArrayList<>(); + final int start = offsetBuffer.getInt(index * OFFSET_WIDTH); + final int end = start + sizeBuffer.getInt((index) * SIZE_WIDTH); + final ValueVector vv = getDataVector(); + for (int i = start; i < end; i++) { + vals.add(vv.getObject(i)); + } + + return vals; + } + + /** + * Check if an element at given index is null. + * + * @param index position of an element + * @return true if an element at given index is null, false otherwise + */ + @Override + public boolean isNull(int index) { + return (isSet(index) == 0); + } + + /** + * Check if an element at given index is an empty list. + * @param index position of an element + * @return true if an element at given index is an empty list or NULL, false otherwise + */ + @Override + public boolean isEmpty(int index) { + if (isNull(index)) { + return true; + } else { + return sizeBuffer.getInt(index * SIZE_WIDTH) == 0; + } + } + + /** + * Same as {@link #isNull(int)}. + * + * @param index position of the element + * @return 1 if element at given index is not null, 0 otherwise + */ + public int isSet(int index) { + final int byteIndex = index >> 3; + final byte b = validityBuffer.getByte(byteIndex); + final int bitIndex = index & 7; + return (b >> bitIndex) & 0x01; + } + + /** + * Get the number of elements that are null in the vector. + * + * @return the number of null elements. + */ + @Override + public int getNullCount() { + return BitVectorHelper.getNullCount(validityBuffer, valueCount); + } + + /** + * Get the value capacity by considering validity and offset capacity. + * Note that the size buffer capacity is not considered here since it has + * the same capacity as the offset buffer. + * + * @return the value capacity + */ + @Override + public int getValueCapacity() { + return getValidityAndOffsetValueCapacity(); + } + + private int getValidityAndSizeValueCapacity() { + final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity(), 0); + final int sizeValueCapacity = Math.max(getSizeBufferValueCapacity(), 0); + return Math.min(offsetValueCapacity, sizeValueCapacity); + } + + private int getValidityAndOffsetValueCapacity() { + final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity(), 0); + return Math.min(offsetValueCapacity, getValidityBufferValueCapacity()); + } + + private int getValidityBufferValueCapacity() { + return capAtMaxInt(validityBuffer.capacity() * 8); + } + + /** + * Set the element at the given index to null. + * @param index the value to change + */ + @Override + public void setNull(int index) { + while (index >= getValidityAndSizeValueCapacity()) { + reallocValidityAndSizeAndOffsetBuffers(); + } + + offsetBuffer.setInt(index * OFFSET_WIDTH, 0); + sizeBuffer.setInt(index * SIZE_WIDTH, 0); + BitVectorHelper.unsetBit(validityBuffer, index); + } + + /** + * Start new value in the ListView vector. + * + * @param index index of the value to start + * @return offset of the new value + */ + @Override + public int startNewValue(int index) { + while (index >= getValidityAndSizeValueCapacity()) { + reallocValidityAndSizeAndOffsetBuffers(); + } + + if (index > 0) { + final int prevOffset = getLengthOfChildVectorByIndex(index); + offsetBuffer.setInt(index * OFFSET_WIDTH, prevOffset); + } + + BitVectorHelper.setBit(validityBuffer, index); + return offsetBuffer.getInt(index * OFFSET_WIDTH); + } + + /** + * Validate the invariants of the offset and size buffers. + * 0 <= offsets[i] <= length of the child array + * 0 <= offsets[i] + size[i] <= length of the child array + * @param offset the offset at a given index + * @param size the size at a given index + */ + private void validateInvariants(int offset, int size) { + if (offset < 0) { + throw new IllegalArgumentException("Offset cannot be negative"); + } + + if (size < 0) { + throw new IllegalArgumentException("Size cannot be negative"); + } + + // 0 <= offsets[i] <= length of the child array + if (offset > this.vector.getValueCount()) { + throw new IllegalArgumentException("Offset is out of bounds."); + } + + // 0 <= offsets[i] + size[i] <= length of the child array + if (offset + size > this.vector.getValueCount()) { + throw new IllegalArgumentException("Offset + size <= length of the child array."); + } + } + + /** + * Set the offset at the given index. + * Make sure to use this function after updating `field` vector and using `setValidity` + * @param index index of the value to set + * @param value value to set + */ + public void setOffset(int index, int value) { + validateInvariants(value, sizeBuffer.getInt(index * SIZE_WIDTH)); + + offsetBuffer.setInt(index * OFFSET_WIDTH, value); + } + + /** + * Set the size at the given index. + * Make sure to use this function after using `setOffset`. + * @param index index of the value to set + * @param value value to set + */ + public void setSize(int index, int value) { + validateInvariants(offsetBuffer.getInt(index * SIZE_WIDTH), value); + + sizeBuffer.setInt(index * SIZE_WIDTH, value); + } + + /** + * Set the validity at the given index. + * @param index index of the value to set + * @param value value to set (0 for unset and 1 for a set) + */ + public void setValidity(int index, int value) { + if (value == 0) { + BitVectorHelper.unsetBit(validityBuffer, index); + } else { + BitVectorHelper.setBit(validityBuffer, index); + } + } + + @Override + public void setValueCount(int valueCount) { + this.valueCount = valueCount; + if (valueCount > 0) { + while (valueCount > getValidityAndSizeValueCapacity()) { + /* check if validity and offset buffers need to be re-allocated */ + reallocValidityAndSizeAndOffsetBuffers(); + } + } + /* valueCount for the data vector is the current end offset */ + final int childValueCount = (valueCount == 0) ? 0 : getLengthOfChildVector(); + /* set the value count of data vector and this will take care of + * checking whether data buffer needs to be reallocated. + */ + vector.setValueCount(childValueCount); + } + + @Override + public int getElementStartIndex(int index) { + return offsetBuffer.getInt(index * OFFSET_WIDTH); + } + + @Override + public int getElementEndIndex(int index) { + return sizeBuffer.getInt(index * OFFSET_WIDTH); + } + + @Override + public AddOrGetResult addOrGetVector(FieldType fieldType) { + AddOrGetResult result = super.addOrGetVector(fieldType); + invalidateReader(); + return result; + } + + @Override + public UnionVector promoteToUnion() { + UnionVector vector = new UnionVector("$data$", allocator, /* field type*/ null, callBack); + replaceDataVector(vector); + invalidateReader(); + if (callBack != null) { + callBack.doWork(); + } + return vector; + } + + private void invalidateReader() { + reader = null; + } + + @Deprecated + @Override + public List getFieldInnerVectors() { + throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers"); + } + + public UnionListViewWriter getWriter() { + return new UnionListViewWriter(this); + } + + @Override + public int getValueCount() { + return valueCount; + } + + /** + * Get the density of this ListVector. + * @return density + */ + public double getDensity() { + if (valueCount == 0) { + return 0.0D; + } + final double totalListSize = getLengthOfChildVector(); + return totalListSize / valueCount; + } + + /** + * Validating ListViewVector creation based on the specification guideline. + */ + @Override + public void validate() { + for (int i = 0; i < valueCount; i++) { + final int offset = offsetBuffer.getInt(i * OFFSET_WIDTH); + final int size = sizeBuffer.getInt(i * SIZE_WIDTH); + validateInvariants(offset, size); + } + } + + /** + * End the current value. + * + * @param index index of the value to end + * @param size number of elements in the list that was written + */ + public void endValue(int index, int size) { + sizeBuffer.setInt(index * SIZE_WIDTH, size); + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java index 7f724829ef1eb..c59b997286d2d 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java @@ -29,6 +29,7 @@ import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.UnionVector; @@ -54,6 +55,7 @@ public class PromotableWriter extends AbstractPromotableFieldWriter { private final AbstractStructVector parentContainer; private final ListVector listVector; + private final ListViewVector listViewVector; private final FixedSizeListVector fixedListVector; private final LargeListVector largeListVector; private final NullableStructWriterFactory nullableStructWriterFactory; @@ -94,6 +96,7 @@ public PromotableWriter( NullableStructWriterFactory nullableStructWriterFactory) { this.parentContainer = parentContainer; this.listVector = null; + this.listViewVector = null; this.fixedListVector = null; this.largeListVector = null; this.nullableStructWriterFactory = nullableStructWriterFactory; @@ -142,6 +145,27 @@ public PromotableWriter( ListVector listVector, NullableStructWriterFactory nullableStructWriterFactory) { this.listVector = listVector; + this.listViewVector = null; + this.parentContainer = null; + this.fixedListVector = null; + this.largeListVector = null; + this.nullableStructWriterFactory = nullableStructWriterFactory; + init(v); + } + + /** + * Constructs a new instance. + * + * @param v The vector to initialize the writer with. + * @param listViewVector The vector that serves as a parent of v. + * @param nullableStructWriterFactory The factory to create the delegate writer. + */ + public PromotableWriter( + ValueVector v, + ListViewVector listViewVector, + NullableStructWriterFactory nullableStructWriterFactory) { + this.listViewVector = listViewVector; + this.listVector = null; this.parentContainer = null; this.fixedListVector = null; this.largeListVector = null; @@ -163,6 +187,7 @@ public PromotableWriter( this.fixedListVector = fixedListVector; this.parentContainer = null; this.listVector = null; + this.listViewVector = null; this.largeListVector = null; this.nullableStructWriterFactory = nullableStructWriterFactory; init(v); @@ -183,6 +208,7 @@ public PromotableWriter( this.fixedListVector = null; this.parentContainer = null; this.listVector = null; + this.listViewVector = null; this.nullableStructWriterFactory = nullableStructWriterFactory; init(v); } @@ -280,6 +306,8 @@ protected FieldWriter getWriter(MinorType type, ArrowType arrowType) { v = listVector.addOrGetVector(fieldType).getVector(); } else if (fixedListVector != null) { v = fixedListVector.addOrGetVector(fieldType).getVector(); + } else if (listViewVector != null) { + v = listViewVector.addOrGetVector(fieldType).getVector(); } else { v = largeListVector.addOrGetVector(fieldType).getVector(); } @@ -322,6 +350,8 @@ private FieldWriter promoteToUnion() { unionVector = fixedListVector.promoteToUnion(); } else if (largeListVector != null) { unionVector = largeListVector.promoteToUnion(); + } else if (listViewVector != null) { + unionVector = listViewVector.promoteToUnion(); } unionVector.addVector((FieldVector) tp.getTo()); writer = new UnionWriter(unionVector, nullableStructWriterFactory); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java index 89d8441d42aa9..e10a65e3b2c53 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java @@ -71,6 +71,7 @@ import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.UnionVector; @@ -136,6 +137,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.LargeBinary; import org.apache.arrow.vector.types.pojo.ArrowType.LargeUtf8; import org.apache.arrow.vector.types.pojo.ArrowType.List; +import org.apache.arrow.vector.types.pojo.ArrowType.ListView; import org.apache.arrow.vector.types.pojo.ArrowType.Map; import org.apache.arrow.vector.types.pojo.ArrowType.Null; import org.apache.arrow.vector.types.pojo.ArrowType.Struct; @@ -692,6 +694,20 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { return new UnionListWriter((ListVector) vector); } }, + LISTVIEW(ListView.INSTANCE) { + @Override + public FieldVector getNewVector( + Field field, + BufferAllocator allocator, + CallBack schemaChangeCallback) { + return new ListViewVector(field.getName(), allocator, field.getFieldType(), schemaChangeCallback); + } + + @Override + public FieldWriter getNewFieldWriter(ValueVector vector) { + return new UnionListWriter((ListVector) vector); + } + }, LARGELIST(ArrowType.LargeList.INSTANCE) { @Override public FieldVector getNewVector(Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { @@ -1064,6 +1080,11 @@ public MinorType visit(Duration type) { return MinorType.DURATION; } + @Override + public MinorType visit(ListView type) { + return MinorType.LISTVIEW; + } + @Override public MinorType visit(ExtensionType type) { return MinorType.EXTENSIONTYPE; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java new file mode 100644 index 0000000000000..e64ed77b1eb9f --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -0,0 +1,1651 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.complex.BaseRepeatedValueVector; +import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector; +import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.ListViewVector; +import org.apache.arrow.vector.complex.impl.UnionListViewWriter; +import org.apache.arrow.vector.complex.impl.UnionListWriter; +import org.apache.arrow.vector.holders.DurationHolder; +import org.apache.arrow.vector.holders.TimeStampMilliTZHolder; +import org.apache.arrow.vector.types.TimeUnit; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class TestListViewVector { + + private BufferAllocator allocator; + + @BeforeEach + public void init() { + allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); + } + + @AfterEach + public void terminate() throws Exception { + allocator.close(); + } + + @Test + public void testBasicListViewVector() { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + UnionListViewWriter listViewWriter = listViewVector.getWriter(); + + /* allocate memory */ + listViewWriter.allocate(); + + /* write the first list at index 0 */ + listViewWriter.setPosition(0); + listViewWriter.startList(); + + listViewWriter.bigInt().writeBigInt(12); + listViewWriter.bigInt().writeBigInt(-7); + listViewWriter.bigInt().writeBigInt(25); + listViewWriter.endList(); + + /* the second list at index 1 is null (we are not setting any)*/ + + /* write the third list at index 2 */ + listViewWriter.setPosition(2); + listViewWriter.startList(); + + listViewWriter.bigInt().writeBigInt(0); + listViewWriter.bigInt().writeBigInt(-127); + listViewWriter.bigInt().writeBigInt(127); + listViewWriter.bigInt().writeBigInt(50); + listViewWriter.endList(); + + /* write the fourth list at index 3 (empty list) */ + listViewWriter.setPosition(3); + listViewWriter.startList(); + listViewWriter.endList(); + + /* write the fifth list at index 4 */ + listViewWriter.setPosition(4); + listViewWriter.startList(); + listViewWriter.bigInt().writeBigInt(1); + listViewWriter.bigInt().writeBigInt(2); + listViewWriter.bigInt().writeBigInt(3); + listViewWriter.bigInt().writeBigInt(4); + listViewWriter.endList(); + + listViewVector.setValueCount(5); + // check value count + assertEquals(5, listViewVector.getValueCount()); + + /* get vector at index 0 -- the value is a BigIntVector*/ + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + final FieldVector dataVec = listViewVector.getDataVector(); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check data vector + assertEquals(12, ((BigIntVector) dataVec).get(0)); + assertEquals(-7, ((BigIntVector) dataVec).get(1)); + assertEquals(25, ((BigIntVector) dataVec).get(2)); + assertEquals(0, ((BigIntVector) dataVec).get(3)); + assertEquals(-127, ((BigIntVector) dataVec).get(4)); + assertEquals(127, ((BigIntVector) dataVec).get(5)); + assertEquals(50, ((BigIntVector) dataVec).get(6)); + assertEquals(1, ((BigIntVector) dataVec).get(7)); + assertEquals(2, ((BigIntVector) dataVec).get(8)); + assertEquals(3, ((BigIntVector) dataVec).get(9)); + assertEquals(4, ((BigIntVector) dataVec).get(10)); + + listViewVector.validate(); + } + } + + @Test + public void testImplicitNullVectors() { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + UnionListViewWriter listViewWriter = listViewVector.getWriter(); + /* allocate memory */ + listViewWriter.allocate(); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + /* write the first list at index 0 */ + listViewWriter.setPosition(0); + listViewWriter.startList(); + + listViewWriter.bigInt().writeBigInt(12); + listViewWriter.bigInt().writeBigInt(-7); + listViewWriter.bigInt().writeBigInt(25); + listViewWriter.endList(); + + int offSet0 = offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH); + int size0 = sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH); + + // after the first list is written, + // the initial offset must be 0, + // the size must be 3 (as there are 3 elements in the array), + // the lastSet must be 0 since, the first list is written at index 0. + + assertEquals(0, offSet0); + assertEquals(3, size0); + + listViewWriter.setPosition(5); + listViewWriter.startList(); + + // writing the 6th list at index 5, + // and the list items from index 1 through 4 are not populated. + // but since there is a gap between the 0th and 5th list, in terms + // of buffer allocation, the offset and size buffers must be updated + // to reflect the implicit null vectors. + + for (int i = 1; i < 5; i++) { + int offSet = offSetBuffer.getInt(i * BaseRepeatedValueViewVector.OFFSET_WIDTH); + int size = sizeBuffer.getInt(i * BaseRepeatedValueViewVector.SIZE_WIDTH); + // Since the list is not written, the offset and size must equal to child vector's size + // i.e., 3, and size should be 0 as the list is not written. + // And the last set value is the value currently being written, which is 5. + assertEquals(0, offSet); + assertEquals(0, size); + } + + listViewWriter.bigInt().writeBigInt(12); + listViewWriter.bigInt().writeBigInt(25); + listViewWriter.endList(); + + int offSet5 = offSetBuffer.getInt(5 * BaseRepeatedValueViewVector.OFFSET_WIDTH); + int size5 = sizeBuffer.getInt(5 * BaseRepeatedValueViewVector.SIZE_WIDTH); + + assertEquals(3, offSet5); + assertEquals(2, size5); + + listViewWriter.setPosition(10); + listViewWriter.startList(); + + // writing the 11th list at index 10, + // and the list items from index 6 through 10 are not populated. + // but since there is a gap between the 5th and 11th list, in terms + // of buffer allocation, the offset and size buffers must be updated + // to reflect the implicit null vectors. + for (int i = 6; i < 10; i++) { + int offSet = offSetBuffer.getInt(i * BaseRepeatedValueViewVector.OFFSET_WIDTH); + int size = sizeBuffer.getInt(i * BaseRepeatedValueViewVector.SIZE_WIDTH); + // Since the list is not written, the offset and size must equal to 0 + // and size should be 0 as the list is not written. + // And the last set value is the value currently being written, which is 10. + assertEquals(0, offSet); + assertEquals(0, size); + } + + listViewWriter.bigInt().writeBigInt(12); + listViewWriter.endList(); + + int offSet11 = offSetBuffer.getInt(10 * BaseRepeatedValueViewVector.OFFSET_WIDTH); + int size11 = sizeBuffer.getInt(10 * BaseRepeatedValueViewVector.SIZE_WIDTH); + + assertEquals(5, offSet11); + assertEquals(1, size11); + + listViewVector.setValueCount(11); + + listViewVector.validate(); + } + } + + @Test + public void testNestedListViewVector() { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + UnionListViewWriter listViewWriter = listViewVector.getWriter(); + + /* allocate memory */ + listViewWriter.allocate(); + + /* the dataVector that backs a listVector will also be a + * listVector for this test. + */ + + /* write one or more inner lists at index 0 */ + listViewWriter.setPosition(0); + listViewWriter.startList(); + + listViewWriter.list().startList(); + listViewWriter.list().bigInt().writeBigInt(50); + listViewWriter.list().bigInt().writeBigInt(100); + listViewWriter.list().bigInt().writeBigInt(200); + listViewWriter.list().endList(); + + listViewWriter.list().startList(); + listViewWriter.list().bigInt().writeBigInt(75); + listViewWriter.list().bigInt().writeBigInt(125); + listViewWriter.list().bigInt().writeBigInt(150); + listViewWriter.list().bigInt().writeBigInt(175); + listViewWriter.list().endList(); + + listViewWriter.endList(); + + /* write one or more inner lists at index 1 */ + listViewWriter.setPosition(1); + listViewWriter.startList(); + + listViewWriter.list().startList(); + listViewWriter.list().bigInt().writeBigInt(10); + listViewWriter.list().endList(); + + listViewWriter.list().startList(); + listViewWriter.list().bigInt().writeBigInt(15); + listViewWriter.list().bigInt().writeBigInt(20); + listViewWriter.list().endList(); + + listViewWriter.list().startList(); + listViewWriter.list().bigInt().writeBigInt(25); + listViewWriter.list().bigInt().writeBigInt(30); + listViewWriter.list().bigInt().writeBigInt(35); + listViewWriter.list().endList(); + + listViewWriter.endList(); + + listViewVector.setValueCount(2); + + // [[[50,100,200],[75,125,150,175]], [[10],[15,20],[25,30,35]]] + + assertEquals(2, listViewVector.getValueCount()); + + /* get listViewVector value at index 0 -- the value itself is a listViewVector */ + Object result = listViewVector.getObject(0); + ArrayList> resultSet = (ArrayList>) result; + ArrayList list; + + assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */ + assertEquals(3, resultSet.get(0).size()); /* size of the first inner list */ + assertEquals(4, resultSet.get(1).size()); /* size of the second inner list */ + + list = resultSet.get(0); + assertEquals(Long.valueOf(50), list.get(0)); + assertEquals(Long.valueOf(100), list.get(1)); + assertEquals(Long.valueOf(200), list.get(2)); + + list = resultSet.get(1); + assertEquals(Long.valueOf(75), list.get(0)); + assertEquals(Long.valueOf(125), list.get(1)); + assertEquals(Long.valueOf(150), list.get(2)); + assertEquals(Long.valueOf(175), list.get(3)); + + /* get listViewVector value at index 1 -- the value itself is a listViewVector */ + result = listViewVector.getObject(1); + resultSet = (ArrayList>) result; + + assertEquals(3, resultSet.size()); /* 3 inner lists at index 1 */ + assertEquals(1, resultSet.get(0).size()); /* size of the first inner list */ + assertEquals(2, resultSet.get(1).size()); /* size of the second inner list */ + assertEquals(3, resultSet.get(2).size()); /* size of the third inner list */ + + list = resultSet.get(0); + assertEquals(Long.valueOf(10), list.get(0)); + + list = resultSet.get(1); + assertEquals(Long.valueOf(15), list.get(0)); + assertEquals(Long.valueOf(20), list.get(1)); + + list = resultSet.get(2); + assertEquals(Long.valueOf(25), list.get(0)); + assertEquals(Long.valueOf(30), list.get(1)); + assertEquals(Long.valueOf(35), list.get(2)); + + /* check underlying bitVector */ + assertFalse(listViewVector.isNull(0)); + assertFalse(listViewVector.isNull(1)); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(2, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + listViewVector.validate(); + } + } + + @Test + public void testNestedListVector() throws Exception { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + + MinorType listType = MinorType.LISTVIEW; + MinorType scalarType = MinorType.BIGINT; + + listViewVector.addOrGetVector(FieldType.nullable(listType.getType())); + + ListViewVector innerList1 = (ListViewVector) listViewVector.getDataVector(); + innerList1.addOrGetVector(FieldType.nullable(listType.getType())); + + ListViewVector innerList2 = (ListViewVector) innerList1.getDataVector(); + innerList2.addOrGetVector(FieldType.nullable(listType.getType())); + + ListViewVector innerList3 = (ListViewVector) innerList2.getDataVector(); + innerList3.addOrGetVector(FieldType.nullable(listType.getType())); + + ListViewVector innerList4 = (ListViewVector) innerList3.getDataVector(); + innerList4.addOrGetVector(FieldType.nullable(listType.getType())); + + ListViewVector innerList5 = (ListViewVector) innerList4.getDataVector(); + innerList5.addOrGetVector(FieldType.nullable(listType.getType())); + + ListViewVector innerList6 = (ListViewVector) innerList5.getDataVector(); + innerList6.addOrGetVector(FieldType.nullable(scalarType.getType())); + + listViewVector.setInitialCapacity(128); + + listViewVector.validate(); + } + } + + private void setValuesInBuffer(int[] bufValues, ArrowBuf buffer, long bufWidth) { + for (int i = 0; i < bufValues.length; i++) { + buffer.setInt(i * bufWidth, bufValues[i]); + } + } + + /* + * Setting up the buffers directly needs to be validated with the base method used in + * the ListVector class where we use the approach of startList(), + * write to the child vector and endList(). + *

+ * To support this, we have to consider the following scenarios; + *

+ * 1. Only using directly buffer-based inserts. + * 2. Default list insertion followed by buffer-based inserts. + * 3. Buffer-based inserts followed by default list insertion. + */ + + /* Setting up buffers directly would require the following steps to be taken + * 0. Allocate buffers in listViewVector by calling `allocateNew` method. + * 1. Initialize the child vector using `initializeChildrenFromFields` method. + * 2. Set values in the child vector. + * 3. Set validity, offset and size buffers using `setValidity`, + * `setOffset` and `setSize` methods. + * 4. Set value count using `setValueCount` method. + */ + @Test + public void testBasicListViewSet() { + + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + // Allocate buffers in listViewVector by calling `allocateNew` method. + listViewVector.allocateNew(); + + // Initialize the child vector using `initializeChildrenFromFields` method. + FieldType fieldType = new FieldType(true, new ArrowType.Int(64, true), + null, null); + Field field = new Field("child-vector", fieldType, null); + listViewVector.initializeChildrenFromFields(Collections.singletonList(field)); + + // Set values in the child vector. + FieldVector fieldVector = listViewVector.getDataVector(); + fieldVector.clear(); + + BigIntVector childVector = (BigIntVector) fieldVector; + childVector.allocateNew(7); + + childVector.set(0, 12); + childVector.set(1, -7); + childVector.set(2, 25); + childVector.set(3, 0); + childVector.set(4, -127); + childVector.set(5, 127); + childVector.set(6, 50); + + childVector.setValueCount(7); + + // Set validity, offset and size buffers using `setValidity`, + // `setOffset` and `setSize` methods. + listViewVector.setOffset(0, 0); + listViewVector.setOffset(1, 3); + listViewVector.setOffset(2, 3); + listViewVector.setOffset(3, 7); + + listViewVector.setSize(0, 3); + listViewVector.setSize(1, 0); + listViewVector.setSize(2, 4); + listViewVector.setSize(3, 0); + + listViewVector.setValidity(0, 1); + listViewVector.setValidity(1, 0); + listViewVector.setValidity(2, 1); + listViewVector.setValidity(3, 1); + + // Set value count using `setValueCount` method. + listViewVector.setValueCount(4); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check values + assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); + assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); + assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); + assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); + assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); + assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); + assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); + + listViewVector.validate(); + } + } + + @Test + public void testBasicListViewSetNested() { + // Expected listview + // [[[50,100,200],[75,125,150,175]],[[10],[15,20],[25,30,35]]] + + // Setting child vector + // [[50,100,200],[75,125,150,175],[10],[15,20],[25,30,35]] + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + // Allocate buffers in listViewVector by calling `allocateNew` method. + listViewVector.allocateNew(); + + // Initialize the child vector using `initializeChildrenFromFields` method. + FieldType fieldType = new FieldType(true, new ArrowType.List(), + null, null); + FieldType childFieldType = new FieldType(true, new ArrowType.Int(64, true), + null, null); + Field childField = new Field("child-vector", childFieldType, null); + List children = new ArrayList<>(); + children.add(childField); + Field field = new Field("child-vector", fieldType, children); + listViewVector.initializeChildrenFromFields(Collections.singletonList(field)); + + // Set values in the child vector. + FieldVector fieldVector = listViewVector.getDataVector(); + fieldVector.clear(); + + ListVector childVector = (ListVector) fieldVector; + UnionListWriter listWriter = childVector.getWriter(); + listWriter.allocate(); + + listWriter.setPosition(0); + listWriter.startList(); + + listWriter.bigInt().writeBigInt(50); + listWriter.bigInt().writeBigInt(100); + listWriter.bigInt().writeBigInt(200); + + listWriter.endList(); + + listWriter.setPosition(1); + listWriter.startList(); + + listWriter.bigInt().writeBigInt(75); + listWriter.bigInt().writeBigInt(125); + listWriter.bigInt().writeBigInt(150); + listWriter.bigInt().writeBigInt(175); + + listWriter.endList(); + + listWriter.setPosition(2); + listWriter.startList(); + + listWriter.bigInt().writeBigInt(10); + + listWriter.endList(); + + listWriter.startList(); + listWriter.setPosition(3); + + listWriter.bigInt().writeBigInt(15); + listWriter.bigInt().writeBigInt(20); + + listWriter.endList(); + + listWriter.startList(); + listWriter.setPosition(4); + + listWriter.bigInt().writeBigInt(25); + listWriter.bigInt().writeBigInt(30); + listWriter.bigInt().writeBigInt(35); + + listWriter.endList(); + + childVector.setValueCount(5); + + // Set validity, offset and size buffers using `setValidity`, + // `setOffset` and `setSize` methods. + + listViewVector.setValidity(0, 1); + listViewVector.setValidity(1, 1); + + listViewVector.setOffset(0, 0); + listViewVector.setOffset(1, 2); + + listViewVector.setSize(0, 2); + listViewVector.setSize(1, 3); + + // Set value count using `setValueCount` method. + listViewVector.setValueCount(2); + + assertEquals(2, listViewVector.getValueCount()); + + /* get listViewVector value at index 0 -- the value itself is a listViewVector */ + Object result = listViewVector.getObject(0); + ArrayList> resultSet = (ArrayList>) result; + ArrayList list; + + assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */ + assertEquals(3, resultSet.get(0).size()); /* size of the first inner list */ + assertEquals(4, resultSet.get(1).size()); /* size of the second inner list */ + + list = resultSet.get(0); + assertEquals(Long.valueOf(50), list.get(0)); + assertEquals(Long.valueOf(100), list.get(1)); + assertEquals(Long.valueOf(200), list.get(2)); + + list = resultSet.get(1); + assertEquals(Long.valueOf(75), list.get(0)); + assertEquals(Long.valueOf(125), list.get(1)); + assertEquals(Long.valueOf(150), list.get(2)); + assertEquals(Long.valueOf(175), list.get(3)); + + /* get listViewVector value at index 1 -- the value itself is a listViewVector */ + result = listViewVector.getObject(1); + resultSet = (ArrayList>) result; + + assertEquals(3, resultSet.size()); /* 3 inner lists at index 1 */ + assertEquals(1, resultSet.get(0).size()); /* size of the first inner list */ + assertEquals(2, resultSet.get(1).size()); /* size of the second inner list */ + assertEquals(3, resultSet.get(2).size()); /* size of the third inner list */ + + list = resultSet.get(0); + assertEquals(Long.valueOf(10), list.get(0)); + + list = resultSet.get(1); + assertEquals(Long.valueOf(15), list.get(0)); + assertEquals(Long.valueOf(20), list.get(1)); + + list = resultSet.get(2); + assertEquals(Long.valueOf(25), list.get(0)); + assertEquals(Long.valueOf(30), list.get(1)); + assertEquals(Long.valueOf(35), list.get(2)); + + /* check underlying bitVector */ + assertFalse(listViewVector.isNull(0)); + assertFalse(listViewVector.isNull(1)); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(2, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + listViewVector.validate(); + } + } + + @Test + public void testBasicListViewSetWithListViewWriter() { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + // Allocate buffers in listViewVector by calling `allocateNew` method. + listViewVector.allocateNew(); + + // Initialize the child vector using `initializeChildrenFromFields` method. + FieldType fieldType = new FieldType(true, new ArrowType.Int(64, true), + null, null); + Field field = new Field("child-vector", fieldType, null); + listViewVector.initializeChildrenFromFields(Collections.singletonList(field)); + + // Set values in the child vector. + FieldVector fieldVector = listViewVector.getDataVector(); + fieldVector.clear(); + + BigIntVector childVector = (BigIntVector) fieldVector; + childVector.allocateNew(7); + + childVector.set(0, 12); + childVector.set(1, -7); + childVector.set(2, 25); + childVector.set(3, 0); + childVector.set(4, -127); + childVector.set(5, 127); + childVector.set(6, 50); + + childVector.setValueCount(7); + + // Set validity, offset and size buffers using `setValidity`, + // `setOffset` and `setSize` methods. + + listViewVector.setValidity(0, 1); + listViewVector.setValidity(1, 0); + listViewVector.setValidity(2, 1); + listViewVector.setValidity(3, 1); + + listViewVector.setOffset(0, 0); + listViewVector.setOffset(1, 3); + listViewVector.setOffset(2, 3); + listViewVector.setOffset(3, 7); + + listViewVector.setSize(0, 3); + listViewVector.setSize(1, 0); + listViewVector.setSize(2, 4); + listViewVector.setSize(3, 0); + + // Set value count using `setValueCount` method. + listViewVector.setValueCount(4); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check values + assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); + assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); + assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); + assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); + assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); + assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); + assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); + + UnionListViewWriter listViewWriter = listViewVector.getWriter(); + + listViewWriter.setPosition(4); + listViewWriter.startList(); + + listViewWriter.bigInt().writeBigInt(121); + listViewWriter.bigInt().writeBigInt(-71); + listViewWriter.bigInt().writeBigInt(251); + listViewWriter.endList(); + + listViewVector.setValueCount(5); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check values + assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); + assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); + assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); + assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); + assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); + assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); + assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); + assertEquals(121, ((BigIntVector) listViewVector.getDataVector()).get(7)); + assertEquals(-71, ((BigIntVector) listViewVector.getDataVector()).get(8)); + assertEquals(251, ((BigIntVector) listViewVector.getDataVector()).get(9)); + + listViewVector.validate(); + } + } + + @Test + public void testGetBufferAddress() throws Exception { + try (ListViewVector listViewVector = ListViewVector.empty("vector", allocator)) { + + UnionListViewWriter listViewWriter = listViewVector.getWriter(); + boolean error = false; + + listViewWriter.allocate(); + + listViewWriter.setPosition(0); + listViewWriter.startList(); + listViewWriter.bigInt().writeBigInt(50); + listViewWriter.bigInt().writeBigInt(100); + listViewWriter.bigInt().writeBigInt(200); + listViewWriter.endList(); + + listViewWriter.setPosition(1); + listViewWriter.startList(); + listViewWriter.bigInt().writeBigInt(250); + listViewWriter.bigInt().writeBigInt(300); + listViewWriter.endList(); + + listViewVector.setValueCount(2); + + /* check listVector contents */ + Object result = listViewVector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Long.valueOf(50), resultSet.get(0)); + assertEquals(Long.valueOf(100), resultSet.get(1)); + assertEquals(Long.valueOf(200), resultSet.get(2)); + + result = listViewVector.getObject(1); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(250), resultSet.get(0)); + assertEquals(Long.valueOf(300), resultSet.get(1)); + + List buffers = listViewVector.getFieldBuffers(); + + long bitAddress = listViewVector.getValidityBufferAddress(); + long offsetAddress = listViewVector.getOffsetBufferAddress(); + long sizeAddress = listViewVector.getSizeBufferAddress(); + + try { + listViewVector.getDataBufferAddress(); + } catch (UnsupportedOperationException ue) { + error = true; + } finally { + assertTrue(error); + } + + assertEquals(3, buffers.size()); + assertEquals(bitAddress, buffers.get(0).memoryAddress()); + assertEquals(offsetAddress, buffers.get(1).memoryAddress()); + assertEquals(sizeAddress, buffers.get(2).memoryAddress()); + + /* (3+2)/2 */ + assertEquals(2.5, listViewVector.getDensity(), 0); + listViewVector.validate(); + } + } + + @Test + public void testConsistentChildName() throws Exception { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + String emptyListStr = listViewVector.getField().toString(); + assertTrue(emptyListStr.contains(ListVector.DATA_VECTOR_NAME)); + + listViewVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + String emptyVectorStr = listViewVector.getField().toString(); + assertTrue(emptyVectorStr.contains(ListVector.DATA_VECTOR_NAME)); + } + } + + @Test + public void testSetInitialCapacity() { + try (final ListViewVector vector = ListViewVector.empty("", allocator)) { + vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + + vector.setInitialCapacity(512); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertTrue(vector.getDataVector().getValueCapacity() >= 512); + + vector.setInitialCapacity(512, 4); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertTrue(vector.getDataVector().getValueCapacity() >= 512 * 4); + + vector.setInitialCapacity(512, 0.1); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertTrue(vector.getDataVector().getValueCapacity() >= 51); + + vector.setInitialCapacity(512, 0.01); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertTrue(vector.getDataVector().getValueCapacity() >= 5); + + vector.setInitialCapacity(5, 0.1); + vector.allocateNew(); + assertEquals(8, vector.getValueCapacity()); + assertTrue(vector.getDataVector().getValueCapacity() >= 1); + + vector.validate(); + } + } + + @Test + public void testClearAndReuse() { + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + BigIntVector bigIntVector = + (BigIntVector) vector.addOrGetVector(FieldType.nullable(MinorType.BIGINT.getType())).getVector(); + vector.setInitialCapacity(10); + vector.allocateNew(); + + vector.startNewValue(0); + bigIntVector.setSafe(0, 7); + vector.endValue(0, 1); + vector.startNewValue(1); + bigIntVector.setSafe(1, 8); + vector.endValue(1, 1); + vector.setValueCount(2); + + Object result = vector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(Long.valueOf(7), resultSet.get(0)); + + result = vector.getObject(1); + resultSet = (ArrayList) result; + assertEquals(Long.valueOf(8), resultSet.get(0)); + + // Clear and release the buffers to trigger a realloc when adding next value + vector.clear(); + + // The list vector should reuse a buffer when reallocating the offset buffer + vector.startNewValue(0); + bigIntVector.setSafe(0, 7); + vector.endValue(0, 1); + vector.startNewValue(1); + bigIntVector.setSafe(1, 8); + vector.endValue(1, 1); + vector.setValueCount(2); + + result = vector.getObject(0); + resultSet = (ArrayList) result; + assertEquals(Long.valueOf(7), resultSet.get(0)); + + result = vector.getObject(1); + resultSet = (ArrayList) result; + assertEquals(Long.valueOf(8), resultSet.get(0)); + + vector.validate(); + } + } + + @Test + public void testWriterGetField() { + // adopted from ListVector test cases + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + //set some values + writer.startList(); + writer.integer().writeInt(1); + writer.integer().writeInt(2); + writer.endList(); + vector.setValueCount(2); + + Field expectedDataField = new Field(BaseRepeatedValueVector.DATA_VECTOR_NAME, + FieldType.nullable(new ArrowType.Int(32, true)), null); + Field expectedField = new Field(vector.getName(), FieldType.nullable(ArrowType.ListView.INSTANCE), + Arrays.asList(expectedDataField)); + + assertEquals(expectedField, writer.getField()); + + vector.validate(); + } + } + + @Test + public void testWriterUsingHolderGetTimestampMilliTZField() { + // adopted from ListVector test cases + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + org.apache.arrow.vector.complex.writer.FieldWriter writer = vector.getWriter(); + writer.allocate(); + + TimeStampMilliTZHolder holder = new TimeStampMilliTZHolder(); + holder.timezone = "SomeFakeTimeZone"; + writer.startList(); + holder.value = 12341234L; + writer.timeStampMilliTZ().write(holder); + holder.value = 55555L; + writer.timeStampMilliTZ().write(holder); + + // Writing with a different timezone should throw + holder.timezone = "AsdfTimeZone"; + holder.value = 77777; + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, + () -> writer.timeStampMilliTZ().write(holder)); + assertEquals( + "holder.timezone: AsdfTimeZone not equal to vector timezone: SomeFakeTimeZone", + ex.getMessage()); + + writer.endList(); + vector.setValueCount(1); + + Field expectedDataField = new Field(BaseRepeatedValueVector.DATA_VECTOR_NAME, + FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MILLISECOND, "SomeFakeTimeZone")), null); + Field expectedField = new Field(vector.getName(), FieldType.nullable(ArrowType.ListView.INSTANCE), + Arrays.asList(expectedDataField)); + + assertEquals(expectedField, writer.getField()); + + vector.validate(); + } + } + + @Test + public void testWriterGetDurationField() { + // adopted from ListVector test cases + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + org.apache.arrow.vector.complex.writer.FieldWriter writer = vector.getWriter(); + writer.allocate(); + + DurationHolder durationHolder = new DurationHolder(); + durationHolder.unit = TimeUnit.MILLISECOND; + + writer.startList(); + durationHolder.value = 812374L; + writer.duration().write(durationHolder); + durationHolder.value = 143451L; + writer.duration().write(durationHolder); + + // Writing with a different unit should throw + durationHolder.unit = TimeUnit.SECOND; + durationHolder.value = 8888888; + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, + () -> writer.duration().write(durationHolder)); + assertEquals( + "holder.unit: SECOND not equal to vector unit: MILLISECOND", ex.getMessage()); + + writer.endList(); + vector.setValueCount(1); + + Field expectedDataField = new Field(BaseRepeatedValueVector.DATA_VECTOR_NAME, + FieldType.nullable(new ArrowType.Duration(TimeUnit.MILLISECOND)), null); + Field expectedField = new Field(vector.getName(), + FieldType.nullable(ArrowType.ListView.INSTANCE), + Arrays.asList(expectedDataField)); + + assertEquals(expectedField, writer.getField()); + + vector.validate(); + } + } + + @Test + public void testClose() throws Exception { + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + //set some values + writer.startList(); + writer.integer().writeInt(1); + writer.integer().writeInt(2); + writer.endList(); + vector.setValueCount(2); + + assertTrue(vector.getBufferSize() > 0); + assertTrue(vector.getDataVector().getBufferSize() > 0); + + writer.close(); + assertEquals(0, vector.getBufferSize()); + assertEquals(0, vector.getDataVector().getBufferSize()); + + vector.validate(); + } + } + + @Test + public void testGetBufferSizeFor() { + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + //set some values + writeIntValues(writer, new int[] {1, 2}); + writeIntValues(writer, new int[] {3, 4}); + writeIntValues(writer, new int[] {5, 6}); + writeIntValues(writer, new int[] {7, 8, 9, 10}); + writeIntValues(writer, new int[] {11, 12, 13, 14}); + writer.setValueCount(5); + + IntVector dataVector = (IntVector) vector.getDataVector(); + int[] indices = new int[] {0, 2, 4, 6, 10, 14}; + + for (int valueCount = 1; valueCount <= 5; valueCount++) { + int validityBufferSize = BitVectorHelper.getValidityBufferSize(valueCount); + int offsetBufferSize = valueCount * BaseRepeatedValueViewVector.OFFSET_WIDTH; + int sizeBufferSize = valueCount * BaseRepeatedValueViewVector.SIZE_WIDTH; + + int expectedSize = validityBufferSize + offsetBufferSize + sizeBufferSize + + dataVector.getBufferSizeFor(indices[valueCount]); + assertEquals(expectedSize, vector.getBufferSizeFor(valueCount)); + } + vector.validate(); + } + } + + @Test + public void testIsEmpty() { + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + // set values [1,2], null, [], [5,6] + writeIntValues(writer, new int[] {1, 2}); + writer.setPosition(2); + writeIntValues(writer, new int[] {}); + writeIntValues(writer, new int[] {5, 6}); + writer.setValueCount(4); + + assertFalse(vector.isEmpty(0)); + assertTrue(vector.isNull(1)); + assertTrue(vector.isEmpty(1)); + assertFalse(vector.isNull(2)); + assertTrue(vector.isEmpty(2)); + assertFalse(vector.isEmpty(3)); + + vector.validate(); + } + } + + @Test + public void testTotalCapacity() { + // adopted from ListVector test cases + final FieldType type = FieldType.nullable(MinorType.INT.getType()); + try (final ListViewVector vector = new ListViewVector("listview", allocator, type, null)) { + // Force the child vector to be allocated based on the type + // (this is a bad API: we have to track and repeat the type twice) + vector.addOrGetVector(type); + + // Specify the allocation size but do not allocate + vector.setInitialTotalCapacity(10, 100); + + // Finally, actually do the allocation + vector.allocateNewSafe(); + + // Note: allocator rounds up and can be greater than the requested allocation. + assertTrue(vector.getValueCapacity() >= 10); + assertTrue(vector.getDataVector().getValueCapacity() >= 100); + + vector.validate(); + } + } + + @Test + public void testSetNull1() { + try (ListViewVector vector = ListViewVector.empty("listview", allocator)) { + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + writer.setPosition(0); + writer.startList(); + writer.bigInt().writeBigInt(10); + writer.bigInt().writeBigInt(20); + writer.endList(); + + vector.setNull(1); + + writer.setPosition(2); + writer.startList(); + writer.bigInt().writeBigInt(30); + writer.bigInt().writeBigInt(40); + writer.endList(); + + vector.setNull(3); + vector.setNull(4); + + writer.setPosition(5); + writer.startList(); + writer.bigInt().writeBigInt(50); + writer.bigInt().writeBigInt(60); + writer.endList(); + + vector.setValueCount(6); + + assertFalse(vector.isNull(0)); + assertTrue(vector.isNull(1)); + assertFalse(vector.isNull(2)); + assertTrue(vector.isNull(3)); + assertTrue(vector.isNull(4)); + assertFalse(vector.isNull(5)); + + // validate buffers + + final ArrowBuf validityBuffer = vector.getValidityBuffer(); + final ArrowBuf offsetBuffer = vector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = vector.getSizeBuffer(); + + assertEquals(1, BitVectorHelper.get(validityBuffer, 0)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 1)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 2)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 3)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 4)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 5)); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(2, offsetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(4, offsetBuffer.getInt(5 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(5 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // validate values + + Object result = vector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(10), resultSet.get(0)); + assertEquals(Long.valueOf(20), resultSet.get(1)); + + result = vector.getObject(2); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(30), resultSet.get(0)); + assertEquals(Long.valueOf(40), resultSet.get(1)); + + result = vector.getObject(5); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(50), resultSet.get(0)); + assertEquals(Long.valueOf(60), resultSet.get(1)); + + vector.validate(); + } + } + + @Test + public void testSetNull2() { + try (ListViewVector vector = ListViewVector.empty("listview", allocator)) { + // validate setting nulls first and then writing values + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + vector.setNull(0); + vector.setNull(2); + vector.setNull(4); + + writer.setPosition(1); + writer.startList(); + writer.bigInt().writeBigInt(10); + writer.bigInt().writeBigInt(20); + writer.bigInt().writeBigInt(30); + writer.endList(); + + writer.setPosition(3); + writer.startList(); + writer.bigInt().writeBigInt(40); + writer.bigInt().writeBigInt(50); + writer.endList(); + + writer.setPosition(5); + writer.startList(); + writer.bigInt().writeBigInt(60); + writer.bigInt().writeBigInt(70); + writer.bigInt().writeBigInt(80); + writer.endList(); + + vector.setValueCount(6); + + assertTrue(vector.isNull(0)); + assertFalse(vector.isNull(1)); + assertTrue(vector.isNull(2)); + assertFalse(vector.isNull(3)); + assertTrue(vector.isNull(4)); + assertFalse(vector.isNull(5)); + + // validate buffers + + final ArrowBuf validityBuffer = vector.getValidityBuffer(); + final ArrowBuf offsetBuffer = vector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = vector.getSizeBuffer(); + + assertEquals(0, BitVectorHelper.get(validityBuffer, 0)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 1)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 2)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 3)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 4)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 5)); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offsetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(5, offsetBuffer.getInt(5 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + assertEquals(0, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(5 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // validate values + + Object result = vector.getObject(1); + ArrayList resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Long.valueOf(10), resultSet.get(0)); + assertEquals(Long.valueOf(20), resultSet.get(1)); + assertEquals(Long.valueOf(30), resultSet.get(2)); + + result = vector.getObject(3); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(40), resultSet.get(0)); + assertEquals(Long.valueOf(50), resultSet.get(1)); + + result = vector.getObject(5); + resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Long.valueOf(60), resultSet.get(0)); + assertEquals(Long.valueOf(70), resultSet.get(1)); + assertEquals(Long.valueOf(80), resultSet.get(2)); + + vector.validate(); + } + } + + @Test + public void testSetNull3() { + try (ListViewVector vector = ListViewVector.empty("listview", allocator)) { + // validate setting values first and then writing nulls + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + writer.setPosition(1); + writer.startList(); + writer.bigInt().writeBigInt(10); + writer.bigInt().writeBigInt(20); + writer.bigInt().writeBigInt(30); + writer.endList(); + + writer.setPosition(3); + writer.startList(); + writer.bigInt().writeBigInt(40); + writer.bigInt().writeBigInt(50); + writer.endList(); + + writer.setPosition(5); + writer.startList(); + writer.bigInt().writeBigInt(60); + writer.bigInt().writeBigInt(70); + writer.bigInt().writeBigInt(80); + writer.endList(); + + vector.setNull(0); + vector.setNull(2); + vector.setNull(4); + + vector.setValueCount(6); + + assertTrue(vector.isNull(0)); + assertFalse(vector.isNull(1)); + assertTrue(vector.isNull(2)); + assertFalse(vector.isNull(3)); + assertTrue(vector.isNull(4)); + assertFalse(vector.isNull(5)); + + // validate buffers + + final ArrowBuf validityBuffer = vector.getValidityBuffer(); + final ArrowBuf offsetBuffer = vector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = vector.getSizeBuffer(); + + assertEquals(0, BitVectorHelper.get(validityBuffer, 0)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 1)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 2)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 3)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 4)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 5)); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offsetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(5, offsetBuffer.getInt(5 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + assertEquals(0, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(5 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // validate values + + Object result = vector.getObject(1); + ArrayList resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Long.valueOf(10), resultSet.get(0)); + assertEquals(Long.valueOf(20), resultSet.get(1)); + assertEquals(Long.valueOf(30), resultSet.get(2)); + + result = vector.getObject(3); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(40), resultSet.get(0)); + assertEquals(Long.valueOf(50), resultSet.get(1)); + + result = vector.getObject(5); + resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Long.valueOf(60), resultSet.get(0)); + assertEquals(Long.valueOf(70), resultSet.get(1)); + assertEquals(Long.valueOf(80), resultSet.get(2)); + + vector.validate(); + } + } + + @Test + public void testOverWrite1() { + try (ListViewVector vector = ListViewVector.empty("listview", allocator)) { + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + writer.setPosition(0); + writer.startList(); + writer.bigInt().writeBigInt(10); + writer.bigInt().writeBigInt(20); + writer.bigInt().writeBigInt(30); + writer.endList(); + + writer.setPosition(1); + writer.startList(); + writer.bigInt().writeBigInt(40); + writer.bigInt().writeBigInt(50); + writer.endList(); + + vector.setValueCount(2); + + writer.setPosition(0); + writer.startList(); + writer.bigInt().writeBigInt(60); + writer.bigInt().writeBigInt(70); + writer.endList(); + + writer.setPosition(1); + writer.startList(); + writer.bigInt().writeBigInt(80); + writer.bigInt().writeBigInt(90); + writer.endList(); + + vector.setValueCount(2); + + Object result = vector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(60), resultSet.get(0)); + assertEquals(Long.valueOf(70), resultSet.get(1)); + + result = vector.getObject(1); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(80), resultSet.get(0)); + assertEquals(Long.valueOf(90), resultSet.get(1)); + + vector.validate(); + } + } + + @Test + public void testOverwriteWithNull() { + try (ListViewVector vector = ListViewVector.empty("listview", allocator)) { + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + ArrowBuf offsetBuffer = vector.getOffsetBuffer(); + ArrowBuf sizeBuffer = vector.getSizeBuffer(); + + writer.setPosition(0); + writer.startList(); + writer.bigInt().writeBigInt(10); + writer.bigInt().writeBigInt(20); + writer.bigInt().writeBigInt(30); + writer.endList(); + + writer.setPosition(1); + writer.startList(); + writer.bigInt().writeBigInt(40); + writer.bigInt().writeBigInt(50); + writer.endList(); + + vector.setValueCount(2); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + vector.setNull(0); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + vector.setNull(1); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + assertTrue(vector.isNull(0)); + assertTrue(vector.isNull(1)); + + writer.setPosition(0); + writer.startList(); + writer.bigInt().writeBigInt(60); + writer.bigInt().writeBigInt(70); + writer.endList(); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + writer.setPosition(1); + writer.startList(); + writer.bigInt().writeBigInt(80); + writer.bigInt().writeBigInt(90); + writer.endList(); + + assertEquals(2, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(2, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + vector.setValueCount(2); + + assertFalse(vector.isNull(0)); + assertFalse(vector.isNull(1)); + + Object result = vector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(60), resultSet.get(0)); + assertEquals(Long.valueOf(70), resultSet.get(1)); + + result = vector.getObject(1); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(80), resultSet.get(0)); + assertEquals(Long.valueOf(90), resultSet.get(1)); + + vector.validate(); + } + } + + @Test + public void testOutOfOrderOffset1() { + // [[12, -7, 25], null, [0, -127, 127, 50], [], [50, 12]] + try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { + // Allocate buffers in listViewVector by calling `allocateNew` method. + listViewVector.allocateNew(); + + // Initialize the child vector using `initializeChildrenFromFields` method. + + FieldType fieldType = new FieldType(true, new ArrowType.Int(16, true), + null, null); + Field field = new Field("child-vector", fieldType, null); + listViewVector.initializeChildrenFromFields(Collections.singletonList(field)); + + // Set values in the child vector. + FieldVector fieldVector = listViewVector.getDataVector(); + fieldVector.clear(); + + SmallIntVector childVector = (SmallIntVector) fieldVector; + + childVector.allocateNew(7); + + childVector.set(0, 0); + childVector.set(1, -127); + childVector.set(2, 127); + childVector.set(3, 50); + childVector.set(4, 12); + childVector.set(5, -7); + childVector.set(6, 25); + + childVector.setValueCount(7); + + // Set validity, offset and size buffers using `setValidity`, + // `setOffset` and `setSize` methods. + listViewVector.setValidity(0, 1); + listViewVector.setValidity(1, 0); + listViewVector.setValidity(2, 1); + listViewVector.setValidity(3, 1); + listViewVector.setValidity(4, 1); + + listViewVector.setOffset(0, 4); + listViewVector.setOffset(1, 7); + listViewVector.setOffset(2, 0); + listViewVector.setOffset(3, 0); + listViewVector.setOffset(4, 3); + + listViewVector.setSize(0, 3); + listViewVector.setSize(1, 0); + listViewVector.setSize(2, 4); + listViewVector.setSize(3, 0); + listViewVector.setSize(4, 2); + + // Set value count using `setValueCount` method. + listViewVector.setValueCount(5); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // check offset buffer + assertEquals(4, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check child vector + assertEquals(0, ((SmallIntVector) listViewVector.getDataVector()).get(0)); + assertEquals(-127, ((SmallIntVector) listViewVector.getDataVector()).get(1)); + assertEquals(127, ((SmallIntVector) listViewVector.getDataVector()).get(2)); + assertEquals(50, ((SmallIntVector) listViewVector.getDataVector()).get(3)); + assertEquals(12, ((SmallIntVector) listViewVector.getDataVector()).get(4)); + assertEquals(-7, ((SmallIntVector) listViewVector.getDataVector()).get(5)); + assertEquals(25, ((SmallIntVector) listViewVector.getDataVector()).get(6)); + + // check values + Object result = listViewVector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Short.valueOf("12"), resultSet.get(0)); + assertEquals(Short.valueOf("-7"), resultSet.get(1)); + assertEquals(Short.valueOf("25"), resultSet.get(2)); + + assertTrue(listViewVector.isNull(1)); + + result = listViewVector.getObject(2); + resultSet = (ArrayList) result; + assertEquals(4, resultSet.size()); + assertEquals(Short.valueOf("0"), resultSet.get(0)); + assertEquals(Short.valueOf("-127"), resultSet.get(1)); + assertEquals(Short.valueOf("127"), resultSet.get(2)); + assertEquals(Short.valueOf("50"), resultSet.get(3)); + + assertTrue(listViewVector.isEmpty(3)); + + result = listViewVector.getObject(4); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Short.valueOf("50"), resultSet.get(0)); + assertEquals(Short.valueOf("12"), resultSet.get(1)); + + listViewVector.validate(); + } + } + + private void writeIntValues(UnionListViewWriter writer, int[] values) { + writer.startList(); + for (int v: values) { + writer.integer().writeInt(v); + } + writer.endList(); + } + +} From 07a30d9a5784852187d100660325b8c12b4ff6c8 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Thu, 16 May 2024 03:30:14 -0800 Subject: [PATCH 118/261] GH-41611: [Docs][CI] Enable most sphinx-lint rules for documentation (#41612) ### Rationale for this change https://github.com/apache/arrow/issues/41611 ### What changes are included in this PR? - Update to pre-commit config to enable all checks except `dangling-hyphen`, `line-too-long` by default - Associated fix docs ### Are these changes tested? Yes, by building and looking at the docs locally. ### Are there any user-facing changes? Just docs. * GitHub Issue: #41611 Authored-by: Bryce Mecum Signed-off-by: AlenkaF --- .pre-commit-config.yaml | 10 +++++-- docs/source/conf.py | 2 +- docs/source/cpp/acero/developer_guide.rst | 10 +++---- docs/source/cpp/acero/overview.rst | 26 +++++++++---------- docs/source/cpp/acero/user_guide.rst | 8 +++--- docs/source/cpp/build_system.rst | 2 +- docs/source/cpp/compute.rst | 18 ++++++------- docs/source/developers/cpp/building.rst | 2 +- docs/source/developers/documentation.rst | 2 +- .../guide/step_by_step/arrow_codebase.rst | 4 +-- .../developers/guide/step_by_step/set_up.rst | 8 +++--- docs/source/developers/java/development.rst | 2 +- docs/source/developers/release.rst | 4 +-- docs/source/format/CanonicalExtensions.rst | 4 +-- docs/source/format/Columnar.rst | 6 ++--- docs/source/format/FlightSql.rst | 2 +- docs/source/format/Integration.rst | 2 +- docs/source/java/algorithm.rst | 2 +- docs/source/java/flight_sql_jdbc_driver.rst | 2 +- docs/source/java/install.rst | 2 +- docs/source/java/ipc.rst | 2 +- docs/source/java/quickstartguide.rst | 16 ++++++------ docs/source/java/substrait.rst | 20 +++++++------- docs/source/java/table.rst | 16 ++++++------ docs/source/python/api/compute.rst | 2 +- docs/source/python/data.rst | 4 +-- docs/source/python/extending_types.rst | 2 +- docs/source/python/filesystems.rst | 4 +-- docs/source/python/install.rst | 2 +- docs/source/python/integration/extending.rst | 2 +- docs/source/python/memory.rst | 2 +- docs/source/python/timestamps.rst | 2 +- 32 files changed, 99 insertions(+), 93 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bf5ca08d53c32..7dcc1c9816d12 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -136,5 +136,11 @@ repos: rev: v0.9.1 hooks: - id: sphinx-lint - files: ^docs/ - args: ['--disable', 'all', '--enable', 'trailing-whitespace,missing-final-newline', 'docs'] + files: ^docs/source + exclude: ^docs/source/python/generated + args: [ + '--enable', + 'all', + '--disable', + 'dangling-hyphen,line-too-long', + ] diff --git a/docs/source/conf.py b/docs/source/conf.py index b487200555a09..1e6c113e33188 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -535,7 +535,7 @@ # # latex_appendices = [] -# It false, will not define \strong, \code, itleref, \crossref ... but only +# It false, will not define \strong, \code, \titleref, \crossref ... but only # \sphinxstrong, ..., \sphinxtitleref, ... To help avoid clash with user added # packages. # diff --git a/docs/source/cpp/acero/developer_guide.rst b/docs/source/cpp/acero/developer_guide.rst index 80ca68556fc40..7dd08fe3ce2ce 100644 --- a/docs/source/cpp/acero/developer_guide.rst +++ b/docs/source/cpp/acero/developer_guide.rst @@ -327,8 +327,8 @@ An engine could choose to create a thread task for every execution of a node. H this leads to problems with cache locality. For example, let's assume we have a basic plan consisting of three exec nodes, scan, project, and then filter (this is a very common use case). Now let's assume there are 100 batches. In a task-per-operator model we would have tasks like "Scan Batch 5", "Project Batch 5", and "Filter Batch 5". Each -of those tasks is potentially going to access the same data. For example, maybe the `project` and `filter` nodes need -to read the same column. A column which is intially created in a decode phase of the `scan` node. To maximize cache +of those tasks is potentially going to access the same data. For example, maybe the ``project`` and ``filter`` nodes need +to read the same column. A column which is intially created in a decode phase of the ``scan`` node. To maximize cache utilization we would need to carefully schedule our tasks to ensure that all three of those tasks are run consecutively and assigned to the same CPU core. @@ -412,7 +412,7 @@ Ordered Execution ================= Some nodes either establish an ordering to their outgoing batches or they need to be able to process batches in order. -Acero handles ordering using the `batch_index` property on an ExecBatch. If a node has a deterministic output order +Acero handles ordering using the ``batch_index`` property on an ExecBatch. If a node has a deterministic output order then it should apply a batch index on batches that it emits. For example, the OrderByNode applies a new ordering to batches (regardless of the incoming ordering). The scan node is able to attach an implicit ordering to batches which reflects the order of the rows in the files being scanned. @@ -461,8 +461,8 @@ Acero's tracing is currently half-implemented and there are major gaps in profil effort at tracing with open telemetry and most of the necessary pieces are in place. The main thing currently lacking is some kind of effective visualization of the tracing results. -In order to use the tracing that is present today you will need to build with Arrow with `ARROW_WITH_OPENTELEMETRY=ON`. -Then you will need to set the environment variable `ARROW_TRACING_BACKEND=otlp_http`. This will configure open telemetry +In order to use the tracing that is present today you will need to build with Arrow with ``ARROW_WITH_OPENTELEMETRY=ON``. +Then you will need to set the environment variable ``ARROW_TRACING_BACKEND=otlp_http``. This will configure open telemetry to export trace results (as OTLP) to the HTTP endpoint http://localhost:4318/v1/traces. You will need to configure an open telemetry collector to collect results on that endpoint and you will need to configure a trace viewer of some kind such as Jaeger: https://www.jaegertracing.io/docs/1.21/opentelemetry/ diff --git a/docs/source/cpp/acero/overview.rst b/docs/source/cpp/acero/overview.rst index 8be4cbc1b1772..34e0b143bc2ce 100644 --- a/docs/source/cpp/acero/overview.rst +++ b/docs/source/cpp/acero/overview.rst @@ -209,16 +209,16 @@ must have the same length. There are a few key differences from ExecBatch: Both the record batch and the exec batch have strong ownership of the arrays & buffers -* An `ExecBatch` does not have a schema. This is because an `ExecBatch` is assumed to be +* An ``ExecBatch`` does not have a schema. This is because an ``ExecBatch`` is assumed to be part of a stream of batches and the stream is assumed to have a consistent schema. So - the schema for an `ExecBatch` is typically stored in the ExecNode. -* Columns in an `ExecBatch` are either an `Array` or a `Scalar`. When a column is a `Scalar` - this means that the column has a single value for every row in the batch. An `ExecBatch` + the schema for an ``ExecBatch`` is typically stored in the ExecNode. +* Columns in an ``ExecBatch`` are either an ``Array`` or a ``Scalar``. When a column is a ``Scalar`` + this means that the column has a single value for every row in the batch. An ``ExecBatch`` also has a length property which describes how many rows are in a batch. So another way to - view a `Scalar` is a constant array with `length` elements. -* An `ExecBatch` contains additional information used by the exec plan. For example, an - `index` can be used to describe a batch's position in an ordered stream. We expect - that `ExecBatch` will also evolve to contain additional fields such as a selection vector. + view a ``Scalar`` is a constant array with ``length`` elements. +* An ``ExecBatch`` contains additional information used by the exec plan. For example, an + ``index`` can be used to describe a batch's position in an ordered stream. We expect + that ``ExecBatch`` will also evolve to contain additional fields such as a selection vector. .. figure:: scalar_vs_array.svg @@ -231,8 +231,8 @@ only zero copy if there are no scalars in the exec batch. .. note:: Both Acero and the compute module have "lightweight" versions of batches and arrays. - In the compute module these are called `BatchSpan`, `ArraySpan`, and `BufferSpan`. In - Acero the concept is called `KeyColumnArray`. These types were developed concurrently + In the compute module these are called ``BatchSpan``, ``ArraySpan``, and ``BufferSpan``. In + Acero the concept is called ``KeyColumnArray``. These types were developed concurrently and serve the same purpose. They aim to provide an array container that can be completely stack allocated (provided the data type is non-nested) in order to avoid heap allocation overhead. Ideally these two concepts will be merged someday. @@ -247,9 +247,9 @@ execution of the nodes. Both ExecPlan and ExecNode are tied to the lifecycle of They have state and are not expected to be restartable. .. warning:: - The structures within Acero, including `ExecBatch`, are still experimental. The `ExecBatch` - class should not be used outside of Acero. Instead, an `ExecBatch` should be converted to - a more standard structure such as a `RecordBatch`. + The structures within Acero, including ``ExecBatch``, are still experimental. The ``ExecBatch`` + class should not be used outside of Acero. Instead, an ``ExecBatch`` should be converted to + a more standard structure such as a ``RecordBatch``. Similarly, an ExecPlan is an internal concept. Users creating plans should be using Declaration objects. APIs for consuming and executing plans should abstract away the details of the underlying diff --git a/docs/source/cpp/acero/user_guide.rst b/docs/source/cpp/acero/user_guide.rst index adcc17216e5ae..0271be2180e99 100644 --- a/docs/source/cpp/acero/user_guide.rst +++ b/docs/source/cpp/acero/user_guide.rst @@ -455,8 +455,8 @@ can be selected from :ref:`this list of aggregation functions will be added which should alleviate this constraint. The aggregation can provide results as a group or scalar. For instances, -an operation like `hash_count` provides the counts per each unique record -as a grouped result while an operation like `sum` provides a single record. +an operation like ``hash_count`` provides the counts per each unique record +as a grouped result while an operation like ``sum`` provides a single record. Scalar Aggregation example: @@ -490,7 +490,7 @@ caller will repeatedly call this function until the generator function is exhaus will accumulate in memory. An execution plan should only have one "terminal" node (one sink node). An :class:`ExecPlan` can terminate early due to cancellation or an error, before the output is fully consumed. However, the plan can be safely destroyed independently -of the sink, which will hold the unconsumed batches by `exec_plan->finished()`. +of the sink, which will hold the unconsumed batches by ``exec_plan->finished()``. As a part of the Source Example, the Sink operation is also included; @@ -515,7 +515,7 @@ The consuming function may be called before a previous invocation has completed. function does not run quickly enough then many concurrent executions could pile up, blocking the CPU thread pool. The execution plan will not be marked finished until all consuming function callbacks have been completed. -Once all batches have been delivered the execution plan will wait for the `finish` future to complete +Once all batches have been delivered the execution plan will wait for the ``finish`` future to complete before marking the execution plan finished. This allows for workflows where the consumption function converts batches into async tasks (this is currently done internally for the dataset write node). diff --git a/docs/source/cpp/build_system.rst b/docs/source/cpp/build_system.rst index 0c94d7e5ce5dc..e80bca4c949dc 100644 --- a/docs/source/cpp/build_system.rst +++ b/docs/source/cpp/build_system.rst @@ -167,7 +167,7 @@ file into an executable linked with the Arrow C++ shared library: .. code-block:: makefile my_example: my_example.cc - $(CXX) -o $@ $(CXXFLAGS) $< $$(pkg-config --cflags --libs arrow) + $(CXX) -o $@ $(CXXFLAGS) $< $$(pkg-config --cflags --libs arrow) Many build systems support pkg-config. For example: diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index 546b6e5716df7..701c7d573ac0e 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -514,8 +514,8 @@ Mixed time resolution temporal inputs will be cast to finest input resolution. +------------+---------------------------------------------+ It's compatible with Redshift's decimal promotion rules. All decimal digits - are preserved for `add`, `subtract` and `multiply` operations. The result - precision of `divide` is at least the sum of precisions of both operands with + are preserved for ``add``, ``subtract`` and ``multiply`` operations. The result + precision of ``divide`` is at least the sum of precisions of both operands with enough scale kept. Error is returned if the result precision is beyond the decimal value range. @@ -1029,7 +1029,7 @@ These functions trim off characters on both sides (trim), or the left (ltrim) or +--------------------------+------------+-------------------------+---------------------+----------------------------------------+---------+ * \(1) Only characters specified in :member:`TrimOptions::characters` will be - trimmed off. Both the input string and the `characters` argument are + trimmed off. Both the input string and the ``characters`` argument are interpreted as ASCII characters. * \(2) Only trim off ASCII whitespace characters (``'\t'``, ``'\n'``, ``'\v'``, @@ -1570,7 +1570,7 @@ is the same, even though the UTC years would be different. Timezone handling ~~~~~~~~~~~~~~~~~ -`assume_timezone` function is meant to be used when an external system produces +``assume_timezone`` function is meant to be used when an external system produces "timezone-naive" timestamps which need to be converted to "timezone-aware" timestamps (see for example the `definition `__ @@ -1581,11 +1581,11 @@ Input timestamps are assumed to be relative to the timezone given in UTC-relative timestamps with the timezone metadata set to the above value. An error is returned if the timestamps already have the timezone metadata set. -`local_timestamp` function converts UTC-relative timestamps to local "timezone-naive" +``local_timestamp`` function converts UTC-relative timestamps to local "timezone-naive" timestamps. The timezone is taken from the timezone metadata of the input -timestamps. This function is the inverse of `assume_timezone`. Please note: +timestamps. This function is the inverse of ``assume_timezone``. Please note: **all temporal functions already operate on timestamps as if they were in local -time of the metadata provided timezone**. Using `local_timestamp` is only meant to be +time of the metadata provided timezone**. Using ``local_timestamp`` is only meant to be used when an external system expects local timestamps. +-----------------+-------+-------------+---------------+---------------------------------+-------+ @@ -1649,8 +1649,8 @@ overflow is detected. * \(1) CumulativeOptions has two optional parameters. The first parameter :member:`CumulativeOptions::start` is a starting value for the running - accumulation. It has a default value of 0 for `sum`, 1 for `prod`, min of - input type for `max`, and max of input type for `min`. Specified values of + accumulation. It has a default value of 0 for ``sum``, 1 for ``prod``, min of + input type for ``max``, and max of input type for ``min``. Specified values of ``start`` must be castable to the input type. The second parameter :member:`CumulativeOptions::skip_nulls` is a boolean. When set to false (the default), the first encountered null is propagated. When set to diff --git a/docs/source/developers/cpp/building.rst b/docs/source/developers/cpp/building.rst index 7b80d2138c33e..b052b856c9bd5 100644 --- a/docs/source/developers/cpp/building.rst +++ b/docs/source/developers/cpp/building.rst @@ -312,7 +312,7 @@ depends on ``python`` being available). On some Linux distributions, running the test suite might require setting an explicit locale. If you see any locale-related errors, try setting the -environment variable (which requires the `locales` package or equivalent): +environment variable (which requires the ``locales`` package or equivalent): .. code-block:: diff --git a/docs/source/developers/documentation.rst b/docs/source/developers/documentation.rst index 8b1ea28c0f54b..a479065f6297e 100644 --- a/docs/source/developers/documentation.rst +++ b/docs/source/developers/documentation.rst @@ -259,7 +259,7 @@ Build the docs in the target directory: sphinx-build ./source/developers ./source/developers/_build -c ./source -D master_doc=temp_index This builds everything in the target directory to a folder inside of it -called ``_build`` using the config file in the `source` directory. +called ``_build`` using the config file in the ``source`` directory. Once you have verified the HTML documents, you can remove temporary index file: diff --git a/docs/source/developers/guide/step_by_step/arrow_codebase.rst b/docs/source/developers/guide/step_by_step/arrow_codebase.rst index 0beece991b197..0c194ab3a3f70 100644 --- a/docs/source/developers/guide/step_by_step/arrow_codebase.rst +++ b/docs/source/developers/guide/step_by_step/arrow_codebase.rst @@ -99,8 +99,8 @@ can be called from a function in another language. After a function is defined C++ we must create the binding manually to use it in that implementation. .. note:: - There is much you can learn by checking **Pull Requests** - and **unit tests** for similar issues. + There is much you can learn by checking **Pull Requests** + and **unit tests** for similar issues. .. tab-set:: diff --git a/docs/source/developers/guide/step_by_step/set_up.rst b/docs/source/developers/guide/step_by_step/set_up.rst index 9a2177568d6f5..9c808ceee7be6 100644 --- a/docs/source/developers/guide/step_by_step/set_up.rst +++ b/docs/source/developers/guide/step_by_step/set_up.rst @@ -118,10 +118,10 @@ Should give you a result similar to this: .. code:: console - origin https://github.com//arrow.git (fetch) - origin https://github.com//arrow.git (push) - upstream https://github.com/apache/arrow (fetch) - upstream https://github.com/apache/arrow (push) + origin https://github.com//arrow.git (fetch) + origin https://github.com//arrow.git (push) + upstream https://github.com/apache/arrow (fetch) + upstream https://github.com/apache/arrow (push) If you did everything correctly, you should now have a copy of the code in the ``arrow`` directory and two remotes that refer to your own GitHub diff --git a/docs/source/developers/java/development.rst b/docs/source/developers/java/development.rst index 17d47c324ce12..3f0ff6cdd0103 100644 --- a/docs/source/developers/java/development.rst +++ b/docs/source/developers/java/development.rst @@ -118,7 +118,7 @@ This checks the code style of all source code under the current directory or fro $ mvn checkstyle:check -Maven `pom.xml` style is enforced with Spotless using `Apache Maven pom.xml guidelines`_ +Maven ``pom.xml`` style is enforced with Spotless using `Apache Maven pom.xml guidelines`_ You can also just check the style without building the project. This checks the style of all pom.xml files under the current directory or from within an individual module. diff --git a/docs/source/developers/release.rst b/docs/source/developers/release.rst index 0b3a83dc5aabe..d903cc71bd5c4 100644 --- a/docs/source/developers/release.rst +++ b/docs/source/developers/release.rst @@ -106,7 +106,7 @@ If there is consensus and there is a Release Manager willing to take the effort the release a patch release can be created. Committers can tag issues that should be included on the next patch release using the -`backport-candidate` label. Is the responsability of the author or the committer to add the +``backport-candidate`` label. Is the responsability of the author or the committer to add the label to the issue to help the Release Manager identify the issues that should be backported. If a specific issue is identified as the reason to create a patch release the Release Manager @@ -117,7 +117,7 @@ Be sure to go through on the following checklist: #. Create milestone #. Create maintenance branch #. Include issue that was requested as requiring new patch release -#. Add new milestone to issues with `backport-candidate` label +#. Add new milestone to issues with ``backport-candidate`` label #. cherry-pick issues into maintenance branch Creating a Release Candidate diff --git a/docs/source/format/CanonicalExtensions.rst b/docs/source/format/CanonicalExtensions.rst index c60f095dd354d..c258f889dc6ac 100644 --- a/docs/source/format/CanonicalExtensions.rst +++ b/docs/source/format/CanonicalExtensions.rst @@ -77,7 +77,7 @@ Official List Fixed shape tensor ================== -* Extension name: `arrow.fixed_shape_tensor`. +* Extension name: ``arrow.fixed_shape_tensor``. * The storage type of the extension: ``FixedSizeList`` where: @@ -153,7 +153,7 @@ Fixed shape tensor Variable shape tensor ===================== -* Extension name: `arrow.variable_shape_tensor`. +* Extension name: ``arrow.variable_shape_tensor``. * The storage type of the extension is: ``StructArray`` where struct is composed of **data** and **shape** fields describing a single diff --git a/docs/source/format/Columnar.rst b/docs/source/format/Columnar.rst index ec6a7fa5e334a..7c853de7829be 100644 --- a/docs/source/format/Columnar.rst +++ b/docs/source/format/Columnar.rst @@ -312,7 +312,7 @@ Each value in this layout consists of 0 or more bytes. While primitive arrays have a single values buffer, variable-size binary have an **offsets** buffer and **data** buffer. -The offsets buffer contains `length + 1` signed integers (either +The offsets buffer contains ``length + 1`` signed integers (either 32-bit or 64-bit, depending on the logical type), which encode the start position of each slot in the data buffer. The length of the value in each slot is computed using the difference between the offset @@ -374,7 +374,7 @@ locations are indicated using a **views** buffer, which may point to one of potentially several **data** buffers or may contain the characters inline. -The views buffer contains `length` view structures with the following layout: +The views buffer contains ``length`` view structures with the following layout: :: @@ -394,7 +394,7 @@ should be interpreted. In the short string case the string's bytes are inlined — stored inside the view itself, in the twelve bytes which follow the length. Any remaining bytes -after the string itself are padded with `0`. +after the string itself are padded with ``0``. In the long string case, a buffer index indicates which data buffer stores the data bytes and an offset indicates where in that buffer the diff --git a/docs/source/format/FlightSql.rst b/docs/source/format/FlightSql.rst index 9c3523755f3ae..b4b85e77a2e5f 100644 --- a/docs/source/format/FlightSql.rst +++ b/docs/source/format/FlightSql.rst @@ -193,7 +193,7 @@ in the ``app_metadata`` field of the Flight RPC ``PutResult`` returned. When used with DoPut: load the stream of Arrow record batches into the specified target table and return the number of rows ingested - via a `DoPutUpdateResult` message. + via a ``DoPutUpdateResult`` message. Flight Server Session Management -------------------------------- diff --git a/docs/source/format/Integration.rst b/docs/source/format/Integration.rst index c800255687796..436747989acf3 100644 --- a/docs/source/format/Integration.rst +++ b/docs/source/format/Integration.rst @@ -501,7 +501,7 @@ integration testing actually tests. There are two types of integration test cases: the ones populated on the fly by the data generator in the Archery utility, and *gold* files that exist -in the `arrow-testing ` +in the `arrow-testing `_ repository. Data Generator Tests diff --git a/docs/source/java/algorithm.rst b/docs/source/java/algorithm.rst index 06ed32bd48cf7..d4838967d614f 100644 --- a/docs/source/java/algorithm.rst +++ b/docs/source/java/algorithm.rst @@ -82,7 +82,7 @@ for fixed width and variable width vectors, respectively. Both algorithms run in 3. **Index sorter**: this sorter does not actually sort the vector. Instead, it returns an integer vector, which correspond to indices of vector elements in sorted order. With the index vector, one can -easily construct a sorted vector. In addition, some other tasks can be easily achieved, like finding the ``k``th +easily construct a sorted vector. In addition, some other tasks can be easily achieved, like finding the ``k`` th smallest value in the vector. Index sorting is supported by ``org.apache.arrow.algorithm.sort.IndexSorter``, which runs in ``O(nlog(n))`` time. It is applicable to vectors of any type. diff --git a/docs/source/java/flight_sql_jdbc_driver.rst b/docs/source/java/flight_sql_jdbc_driver.rst index cc8822247b007..f95c2ac755d97 100644 --- a/docs/source/java/flight_sql_jdbc_driver.rst +++ b/docs/source/java/flight_sql_jdbc_driver.rst @@ -162,7 +162,7 @@ the Flight SQL service as gRPC headers. For example, the following URI :: This will connect without authentication or encryption, to a Flight SQL service running on ``localhost`` on port 12345. Each request will -also include a `database=mydb` gRPC header. +also include a ``database=mydb`` gRPC header. Connection parameters may also be supplied using the Properties object when using the JDBC Driver Manager to connect. When supplying using diff --git a/docs/source/java/install.rst b/docs/source/java/install.rst index a551edc36c477..dc6a55c87fcd6 100644 --- a/docs/source/java/install.rst +++ b/docs/source/java/install.rst @@ -63,7 +63,7 @@ Modifying the command above for Flight: Otherwise, you may see errors like ``java.lang.IllegalAccessError: superclass access check failed: class org.apache.arrow.flight.ArrowMessage$ArrowBufRetainingCompositeByteBuf (in module org.apache.arrow.flight.core) cannot access class io.netty.buffer.CompositeByteBuf (in unnamed module ...) because module -org.apache.arrow.flight.core does not read unnamed module ... +org.apache.arrow.flight.core does not read unnamed module ...`` Finally, if you are using arrow-dataset, you'll also need to report that JDK internals need to be exposed. Modifying the command above for arrow-memory: diff --git a/docs/source/java/ipc.rst b/docs/source/java/ipc.rst index 01341ff2cc391..f5939179177d5 100644 --- a/docs/source/java/ipc.rst +++ b/docs/source/java/ipc.rst @@ -81,7 +81,7 @@ Here we used an in-memory stream, but this could have been a socket or some othe writer.end(); Note that, since the :class:`VectorSchemaRoot` in the writer is a container that can hold batches, batches flow through -:class:`VectorSchemaRoot` as part of a pipeline, so we need to populate data before `writeBatch`, so that later batches +:class:`VectorSchemaRoot` as part of a pipeline, so we need to populate data before ``writeBatch``, so that later batches could overwrite previous ones. Now the :class:`ByteArrayOutputStream` contains the complete stream which contains 5 record batches. diff --git a/docs/source/java/quickstartguide.rst b/docs/source/java/quickstartguide.rst index a71ddc5b5e55f..1f3ec861d3f46 100644 --- a/docs/source/java/quickstartguide.rst +++ b/docs/source/java/quickstartguide.rst @@ -195,10 +195,10 @@ Example: Create a dataset of names (strings) and ages (32-bit signed integers). .. code-block:: shell VectorSchemaRoot created: - age name - 10 Dave - 20 Peter - 30 Mary + age name + 10 Dave + 20 Peter + 30 Mary Interprocess Communication (IPC) @@ -306,10 +306,10 @@ Example: Read the dataset from the previous example from an Arrow IPC file (rand Record batches in file: 1 VectorSchemaRoot read: - age name - 10 Dave - 20 Peter - 30 Mary + age name + 10 Dave + 20 Peter + 30 Mary More examples available at `Arrow Java Cookbook`_. diff --git a/docs/source/java/substrait.rst b/docs/source/java/substrait.rst index c5857dcc23f75..fa20dbd61dbfb 100644 --- a/docs/source/java/substrait.rst +++ b/docs/source/java/substrait.rst @@ -100,9 +100,9 @@ Here is an example of a Java program that queries a Parquet file using Java Subs .. code-block:: text // Results example: - FieldPath(0) FieldPath(1) FieldPath(2) FieldPath(3) - 0 ALGERIA 0 haggle. carefully final deposits detect slyly agai - 1 ARGENTINA 1 al foxes promise slyly according to the regular accounts. bold requests alon + FieldPath(0) FieldPath(1) FieldPath(2) FieldPath(3) + 0 ALGERIA 0 haggle. carefully final deposits detect slyly agai + 1 ARGENTINA 1 al foxes promise slyly according to the regular accounts. bold requests alon Executing Projections and Filters Using Extended Expressions ============================================================ @@ -189,13 +189,13 @@ This Java program: .. code-block:: text - column-1 column-2 - 13 ROMANIA - ular asymptotes are about the furious multipliers. express dependencies nag above the ironically ironic account - 14 SAUDI ARABIA - ts. silent requests haggle. closely express packages sleep across the blithely - 12 VIETNAM - hely enticingly express accounts. even, final - 13 RUSSIA - requests against the platelets use never according to the quickly regular pint - 13 UNITED KINGDOM - eans boost carefully special requests. accounts are. carefull - 11 UNITED STATES - y final packages. slow foxes cajole quickly. quickly silent platelets breach ironic accounts. unusual pinto be + column-1 column-2 + 13 ROMANIA - ular asymptotes are about the furious multipliers. express dependencies nag above the ironically ironic account + 14 SAUDI ARABIA - ts. silent requests haggle. closely express packages sleep across the blithely + 12 VIETNAM - hely enticingly express accounts. even, final + 13 RUSSIA - requests against the platelets use never according to the quickly regular pint + 13 UNITED KINGDOM - eans boost carefully special requests. accounts are. carefull + 11 UNITED STATES - y final packages. slow foxes cajole quickly. quickly silent platelets breach ironic accounts. unusual pinto be .. _`Substrait`: https://substrait.io/ .. _`Substrait Java`: https://github.com/substrait-io/substrait-java diff --git a/docs/source/java/table.rst b/docs/source/java/table.rst index 603910f51694f..5aa95e153cea0 100644 --- a/docs/source/java/table.rst +++ b/docs/source/java/table.rst @@ -75,7 +75,7 @@ Tables are created from a ``VectorSchemaRoot`` as shown below. The memory buffer Table t = new Table(someVectorSchemaRoot); -If you now update the vectors held by the ``VectorSchemaRoot`` (using some version of `ValueVector#setSafe()`), it would reflect those changes, but the values in table *t* are unchanged. +If you now update the vectors held by the ``VectorSchemaRoot`` (using some version of ``ValueVector#setSafe()``), it would reflect those changes, but the values in table *t* are unchanged. Creating a Table from FieldVectors ********************************** @@ -243,7 +243,7 @@ It is important to recognize that rows are NOT reified as objects, but rather op Getting a row ************* -Calling `immutableRow()` on any table instance returns a new ``Row`` instance. +Calling ``immutableRow()`` on any table instance returns a new ``Row`` instance. .. code-block:: Java @@ -262,7 +262,7 @@ Since rows are iterable, you can traverse a table using a standard while loop: // do something useful here } -``Table`` implements `Iterable` so you can access rows directly from a table in an enhanced *for* loop: +``Table`` implements ``Iterable`` so you can access rows directly from a table in an enhanced *for* loop: .. code-block:: Java @@ -272,7 +272,7 @@ Since rows are iterable, you can traverse a table using a standard while loop: ... } -Finally, while rows are usually iterated in the order of the underlying data vectors, but they are also positionable using the `Row#setPosition()` method, so you can skip to a specific row. Row numbers are 0-based. +Finally, while rows are usually iterated in the order of the underlying data vectors, but they are also positionable using the ``Row#setPosition()`` method, so you can skip to a specific row. Row numbers are 0-based. .. code-block:: Java @@ -281,7 +281,7 @@ Finally, while rows are usually iterated in the order of the underlying data vec Any changes to position are applied to all the columns in the table. -Note that you must call `next()`, or `setPosition()` before accessing values via a row. Failure to do so results in a runtime exception. +Note that you must call ``next()``, or ``setPosition()`` before accessing values via a row. Failure to do so results in a runtime exception. Read operations using rows ************************** @@ -304,7 +304,7 @@ You can also get value using a nullable ``ValueHolder``. For example: This can be used to retrieve values without creating a new Object for each. -In addition to getting values, you can check if a value is null using `isNull()`. This is important if the vector contains any nulls, as asking for a value from a vector can cause NullPointerExceptions in some cases. +In addition to getting values, you can check if a value is null using ``isNull()``. This is important if the vector contains any nulls, as asking for a value from a vector can cause NullPointerExceptions in some cases. .. code-block:: Java @@ -352,13 +352,13 @@ Working with the C-Data interface The ability to work with native code is required for many Arrow features. This section describes how tables can be be exported for use with native code -Exporting works by converting the data to a ``VectorSchemaRoot`` instance and using the existing facilities to transfer the data. You could do it yourself, but that isn't ideal because conversion to a vector schema root breaks the immutability guarantees. Using the `exportTable()` methods in the `Data`_ class avoids this concern. +Exporting works by converting the data to a ``VectorSchemaRoot`` instance and using the existing facilities to transfer the data. You could do it yourself, but that isn't ideal because conversion to a vector schema root breaks the immutability guarantees. Using the ``exportTable()`` methods in the `Data`_ class avoids this concern. .. code-block:: Java Data.exportTable(bufferAllocator, table, dictionaryProvider, outArrowArray); -If the table contains dictionary-encoded vectors and was constructed with a ``DictionaryProvider``, the provider argument to `exportTable()` can be omitted and the table's provider attribute will be used: +If the table contains dictionary-encoded vectors and was constructed with a ``DictionaryProvider``, the provider argument to ``exportTable()`` can be omitted and the table's provider attribute will be used: .. code-block:: Java diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst index f2ac6bd1e1226..5423eebfbab40 100644 --- a/docs/source/python/api/compute.rst +++ b/docs/source/python/api/compute.rst @@ -173,7 +173,7 @@ variants which detect domain errors where appropriate. Comparisons ----------- -These functions expect two inputs of the same type. If one of the inputs is `null` +These functions expect two inputs of the same type. If one of the inputs is ``null`` they return ``null``. .. autosummary:: diff --git a/docs/source/python/data.rst b/docs/source/python/data.rst index 9156157fcd0c2..f17475138c9a4 100644 --- a/docs/source/python/data.rst +++ b/docs/source/python/data.rst @@ -76,7 +76,7 @@ We use the name **logical type** because the **physical** storage may be the same for one or more types. For example, ``int64``, ``float64``, and ``timestamp[ms]`` all occupy 64 bits per value. -These objects are `metadata`; they are used for describing the data in arrays, +These objects are ``metadata``; they are used for describing the data in arrays, schemas, and record batches. In Python, they can be used in functions where the input data (e.g. Python objects) may be coerced to more than one Arrow type. @@ -99,7 +99,7 @@ types' children. For example, we can define a list of int32 values with: t6 = pa.list_(t1) t6 -A `struct` is a collection of named fields: +A ``struct`` is a collection of named fields: .. ipython:: python diff --git a/docs/source/python/extending_types.rst b/docs/source/python/extending_types.rst index 8df0ef0b1fe99..83fce84f47c08 100644 --- a/docs/source/python/extending_types.rst +++ b/docs/source/python/extending_types.rst @@ -101,7 +101,7 @@ define the ``__arrow_array__`` method to return an Arrow array:: import pyarrow return pyarrow.array(..., type=type) -The ``__arrow_array__`` method takes an optional `type` keyword which is passed +The ``__arrow_array__`` method takes an optional ``type`` keyword which is passed through from :func:`pyarrow.array`. The method is allowed to return either a :class:`~pyarrow.Array` or a :class:`~pyarrow.ChunkedArray`. diff --git a/docs/source/python/filesystems.rst b/docs/source/python/filesystems.rst index 22f983a60c349..23d10aaaad720 100644 --- a/docs/source/python/filesystems.rst +++ b/docs/source/python/filesystems.rst @@ -182,7 +182,7 @@ Example how you can read contents from a S3 bucket:: Note that it is important to configure :class:`S3FileSystem` with the correct -region for the bucket being used. If `region` is not set, the AWS SDK will +region for the bucket being used. If ``region`` is not set, the AWS SDK will choose a value, defaulting to 'us-east-1' if the SDK version is <1.8. Otherwise it will try to use a variety of heuristics (environment variables, configuration profile, EC2 metadata server) to resolve the region. @@ -277,7 +277,7 @@ load time, since the library may not be in your LD_LIBRARY_PATH), and relies on some environment variables. * ``HADOOP_HOME``: the root of your installed Hadoop distribution. Often has - `lib/native/libhdfs.so`. + ``lib/native/libhdfs.so``. * ``JAVA_HOME``: the location of your Java SDK installation. diff --git a/docs/source/python/install.rst b/docs/source/python/install.rst index 4b966e6d2653d..12555c93067f9 100644 --- a/docs/source/python/install.rst +++ b/docs/source/python/install.rst @@ -83,7 +83,7 @@ While Arrow uses the OS-provided timezone database on Linux and macOS, it requir user-provided database on Windows. To download and extract the text version of the IANA timezone database follow the instructions in the C++ :ref:`download-timezone-database` or use pyarrow utility function -`pyarrow.util.download_tzdata_on_windows()` that does the same. +``pyarrow.util.download_tzdata_on_windows()`` that does the same. By default, the timezone database will be detected at ``%USERPROFILE%\Downloads\tzdata``. If the database has been downloaded in a different location, you will need to set diff --git a/docs/source/python/integration/extending.rst b/docs/source/python/integration/extending.rst index b380fea7e902c..d4d099bcf43c8 100644 --- a/docs/source/python/integration/extending.rst +++ b/docs/source/python/integration/extending.rst @@ -474,7 +474,7 @@ Toolchain Compatibility (Linux) The Python wheels for Linux are built using the `PyPA manylinux images `_ which use -the CentOS `devtoolset-9`. In addition to the other notes +the CentOS ``devtoolset-9``. In addition to the other notes above, if you are compiling C++ using these shared libraries, you will need to make sure you use a compatible toolchain as well or you might see a segfault during runtime. diff --git a/docs/source/python/memory.rst b/docs/source/python/memory.rst index 23474b923718d..7b49d48ab20fa 100644 --- a/docs/source/python/memory.rst +++ b/docs/source/python/memory.rst @@ -46,7 +46,7 @@ parent-child relationships. There are many implementations of ``arrow::Buffer``, but they all provide a standard interface: a data pointer and length. This is similar to Python's -built-in `buffer protocol` and ``memoryview`` objects. +built-in ``buffer protocol`` and ``memoryview`` objects. A :class:`Buffer` can be created from any Python object implementing the buffer protocol by calling the :func:`py_buffer` function. Let's consider diff --git a/docs/source/python/timestamps.rst b/docs/source/python/timestamps.rst index cecbd5b595bc7..80a1b7280cbfa 100644 --- a/docs/source/python/timestamps.rst +++ b/docs/source/python/timestamps.rst @@ -24,7 +24,7 @@ Arrow/Pandas Timestamps Arrow timestamps are stored as a 64-bit integer with column metadata to associate a time unit (e.g. milliseconds, microseconds, or nanoseconds), and an -optional time zone. Pandas (`Timestamp`) uses a 64-bit integer representing +optional time zone. Pandas (``Timestamp``) uses a 64-bit integer representing nanoseconds and an optional time zone. Python/Pandas timestamp types without a associated time zone are referred to as "Time Zone Naive". Python/Pandas timestamp types with an associated time zone are From 1c546fb3c130fc6a4f3e06ad31dc49d923785104 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 16 May 2024 14:15:57 +0200 Subject: [PATCH 119/261] GH-41480: [Python] Building PyArrow: enable/disable python components by default based on availability in Arrow C++ (#41494) ### Rationale for this change Currently, when building pyarrow from source, one needs to manually enable the optional components through setting `PYARROW_WITH_...` environment variables. However, we could also make a default choice of components based on which ones where enabled in the Arrow C++ build. ### What changes are included in this PR? Set defaults for the various `PYARROW_BUILD_` based on the `ARROW_` setting. Keep the current `PYARROW_WITH_` environment variables working to allow to override this default. ### Are there any user-facing changes? No * GitHub Issue: #41480 Lead-authored-by: Joris Van den Bossche Co-authored-by: Sutou Kouhei Signed-off-by: Joris Van den Bossche --- ci/appveyor-cpp-build.bat | 1 - python/CMakeLists.txt | 115 +++++++++++++++++++++++--------- python/setup.py | 134 +++++++++++--------------------------- 3 files changed, 123 insertions(+), 127 deletions(-) diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat index 8cfa67c437264..f688fbb63a9ad 100644 --- a/ci/appveyor-cpp-build.bat +++ b/ci/appveyor-cpp-build.bat @@ -129,7 +129,6 @@ set PYARROW_WITH_ORC=%ARROW_ORC% set PYARROW_WITH_PARQUET=ON set PYARROW_WITH_PARQUET_ENCRYPTION=ON set PYARROW_WITH_S3=%ARROW_S3% -set PYARROW_WITH_STATIC_BOOST=ON set PYARROW_WITH_SUBSTRAIT=ON set ARROW_HOME=%CONDA_PREFIX%\Library diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 212862357ace2..07acb9e31a731 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -108,25 +108,6 @@ if(UNIX) endif() endif() -# Top level cmake dir -if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") - option(PYARROW_BUILD_ACERO "Build the PyArrow Acero integration" OFF) - option(PYARROW_BUILD_CUDA "Build the PyArrow CUDA support" OFF) - option(PYARROW_BUILD_DATASET "Build the PyArrow Dataset integration" OFF) - option(PYARROW_BUILD_FLIGHT "Build the PyArrow Flight integration" OFF) - option(PYARROW_BUILD_GANDIVA "Build the PyArrow Gandiva integration" OFF) - option(PYARROW_BUILD_ORC "Build the PyArrow ORC integration" OFF) - option(PYARROW_BUILD_PARQUET "Build the PyArrow Parquet integration" OFF) - option(PYARROW_BUILD_PARQUET_ENCRYPTION - "Build the PyArrow Parquet encryption integration" OFF) - option(PYARROW_BUNDLE_ARROW_CPP "Bundle the Arrow C++ libraries" OFF) - option(PYARROW_BUNDLE_CYTHON_CPP "Bundle the C++ files generated by Cython" OFF) - option(PYARROW_GENERATE_COVERAGE "Build with Cython code coverage enabled" OFF) - set(PYARROW_CXXFLAGS - "" - CACHE STRING "Compiler flags to append when compiling Arrow") -endif() - find_program(CCACHE_FOUND ccache) if(CCACHE_FOUND AND NOT CMAKE_C_COMPILER_LAUNCHER @@ -265,11 +246,70 @@ message(STATUS "NumPy include dir: ${NUMPY_INCLUDE_DIRS}") include(UseCython) -# PyArrow C++ +# Arrow C++ and set default PyArrow build options include(GNUInstallDirs) - find_package(Arrow REQUIRED) +macro(define_option name description arrow_option) + set("PYARROW_${name}" + "AUTO" + CACHE STRING ${description}) + + if("${PYARROW_${name}}" STREQUAL "AUTO") + # by default, first check if env variable exists, otherwise use Arrow C++ config + set(env_variable "PYARROW_WITH_${name}") + if(DEFINED ENV{${env_variable}}) + if($ENV{${env_variable}}) + set("PYARROW_BUILD_${name}" ON) + else() + set("PYARROW_BUILD_${name}" OFF) + endif() + else() + if(${arrow_option}) + set("PYARROW_BUILD_${name}" ON) + else() + set("PYARROW_BUILD_${name}" OFF) + endif() + endif() + else() + if("${PYARROW_${name}}") + set("PYARROW_BUILD_${name}" ON) + else() + set("PYARROW_BUILD_${name}" OFF) + endif() + endif() +endmacro() + +define_option(ACERO "Build the PyArrow Acero integration" ARROW_ACERO) +define_option(CUDA "Build the PyArrow CUDA support" ARROW_CUDA) +define_option(DATASET "Build the PyArrow Dataset integration" ARROW_DATASET) +define_option(FLIGHT "Build the PyArrow Flight integration" ARROW_FLIGHT) +define_option(GANDIVA "Build the PyArrow Gandiva integration" ARROW_GANDIVA) +define_option(ORC "Build the PyArrow ORC integration" ARROW_ORC) +define_option(PARQUET "Build the PyArrow Parquet integration" ARROW_PARQUET) +define_option(PARQUET_ENCRYPTION "Build the PyArrow Parquet encryption integration" + PARQUET_REQUIRE_ENCRYPTION) +define_option(SUBSTRAIT "Build the PyArrow Substrait integration" ARROW_SUBSTRAIT) +define_option(AZURE "Build the PyArrow Azure integration" ARROW_AZURE) +define_option(GCS "Build the PyArrow GCS integration" ARROW_GCS) +define_option(S3 "Build the PyArrow S3 integration" ARROW_S3) +define_option(HDFS "Build the PyArrow HDFS integration" ARROW_HDFS) +option(PYARROW_BUNDLE_ARROW_CPP "Bundle the Arrow C++ libraries" OFF) +option(PYARROW_BUNDLE_CYTHON_CPP "Bundle the C++ files generated by Cython" OFF) +option(PYARROW_GENERATE_COVERAGE "Build with Cython code coverage enabled" OFF) +set(PYARROW_CXXFLAGS + "" + CACHE STRING "Compiler flags to append when compiling PyArrow C++") + +# enforce module dependencies +if(PYARROW_BUILD_SUBSTRAIT) + set(PYARROW_BUILD_DATASET ON) +endif() +if(PYARROW_BUILD_DATASET) + set(PYARROW_BUILD_ACERO ON) +endif() + +# PyArrow C++ set(PYARROW_CPP_ROOT_DIR pyarrow/src) set(PYARROW_CPP_SOURCE_DIR ${PYARROW_CPP_ROOT_DIR}/arrow/python) set(PYARROW_CPP_SRCS @@ -305,6 +345,7 @@ set(PYARROW_CPP_LINK_LIBS "") # Check all the options from Arrow and PyArrow C++ to be in line if(PYARROW_BUILD_DATASET) + message(STATUS "Building PyArrow with Dataset") if(NOT ARROW_DATASET) message(FATAL_ERROR "You must build Arrow C++ with ARROW_DATASET=ON") endif() @@ -317,6 +358,7 @@ if(PYARROW_BUILD_DATASET) endif() if(PYARROW_BUILD_ACERO) + message(STATUS "Building PyArrow with Acero") if(NOT ARROW_ACERO) message(FATAL_ERROR "You must build Arrow C++ with ARROW_ACERO=ON") endif() @@ -329,18 +371,13 @@ if(PYARROW_BUILD_ACERO) endif() if(PYARROW_BUILD_PARQUET OR PYARROW_BUILD_PARQUET_ENCRYPTION) + message(STATUS "Building PyArrow with Parquet") if(NOT ARROW_PARQUET) message(FATAL_ERROR "You must build Arrow C++ with ARROW_PARQUET=ON") endif() find_package(Parquet REQUIRED) endif() -if(PYARROW_BUILD_HDFS) - if(NOT ARROW_HDFS) - message(FATAL_ERROR "You must build Arrow C++ with ARROW_HDFS=ON") - endif() -endif() - # Check for only Arrow C++ options if(ARROW_CSV) list(APPEND PYARROW_CPP_SRCS ${PYARROW_CPP_SOURCE_DIR}/csv.cc) @@ -400,6 +437,7 @@ endif() set(PYARROW_CPP_FLIGHT_SRCS ${PYARROW_CPP_SOURCE_DIR}/flight.cc) if(PYARROW_BUILD_FLIGHT) + message(STATUS "Building PyArrow with Flight") if(NOT ARROW_FLIGHT) message(FATAL_ERROR "You must build Arrow C++ with ARROW_FLIGHT=ON") endif() @@ -555,23 +593,39 @@ set_source_files_properties(pyarrow/lib.pyx PROPERTIES CYTHON_API TRUE) set(LINK_LIBS arrow_python) if(PYARROW_BUILD_AZURE) + message(STATUS "Building PyArrow with Azure") + if(NOT ARROW_AZURE) + message(FATAL_ERROR "You must build Arrow C++ with ARROW_AZURE=ON") + endif() list(APPEND CYTHON_EXTENSIONS _azurefs) endif() if(PYARROW_BUILD_GCS) + message(STATUS "Building PyArrow with GCS") + if(NOT ARROW_GCS) + message(FATAL_ERROR "You must build Arrow C++ with ARROW_GCS=ON") + endif() list(APPEND CYTHON_EXTENSIONS _gcsfs) endif() if(PYARROW_BUILD_S3) + message(STATUS "Building PyArrow with S3") + if(NOT ARROW_S3) + message(FATAL_ERROR "You must build Arrow C++ with ARROW_S3=ON") + endif() list(APPEND CYTHON_EXTENSIONS _s3fs) endif() if(PYARROW_BUILD_HDFS) + message(STATUS "Building PyArrow with HDFS") + if(NOT ARROW_HDFS) + message(FATAL_ERROR "You must build Arrow C++ with ARROW_HDFS=ON") + endif() list(APPEND CYTHON_EXTENSIONS _hdfs) endif() if(PYARROW_BUILD_CUDA) - # Arrow CUDA + message(STATUS "Building PyArrow with CUDA") if(NOT ARROW_CUDA) message(FATAL_ERROR "You must build Arrow C++ with ARROW_CUDA=ON") endif() @@ -646,8 +700,9 @@ if(PYARROW_BUILD_PARQUET) endif() endif() +# ORC if(PYARROW_BUILD_ORC) - # ORC + message(STATUS "Building PyArrow with ORC") if(NOT ARROW_ORC) message(FATAL_ERROR "You must build Arrow C++ with ARROW_ORC=ON") endif() @@ -679,6 +734,7 @@ endif() # Substrait if(PYARROW_BUILD_SUBSTRAIT) + message(STATUS "Building PyArrow with Substrait") if(NOT ARROW_SUBSTRAIT) message(FATAL_ERROR "You must build Arrow C++ with ARROW_SUBSTRAIT=ON") endif() @@ -696,6 +752,7 @@ endif() # Gandiva if(PYARROW_BUILD_GANDIVA) + message(STATUS "Building PyArrow with Gandiva") if(NOT ARROW_GANDIVA) message(FATAL_ERROR "You must build Arrow C++ with ARROW_GANDIVA=ON") endif() diff --git a/python/setup.py b/python/setup.py index 6f3dddb29d248..ed2b7961e5fbb 100755 --- a/python/setup.py +++ b/python/setup.py @@ -152,32 +152,20 @@ def initialize_options(self): if not hasattr(sys, 'gettotalrefcount'): self.build_type = 'release' - self.with_azure = strtobool( - os.environ.get('PYARROW_WITH_AZURE', '0')) - self.with_gcs = strtobool( - os.environ.get('PYARROW_WITH_GCS', '0')) - self.with_s3 = strtobool( - os.environ.get('PYARROW_WITH_S3', '0')) - self.with_hdfs = strtobool( - os.environ.get('PYARROW_WITH_HDFS', '0')) - self.with_cuda = strtobool( - os.environ.get('PYARROW_WITH_CUDA', '0')) - self.with_substrait = strtobool( - os.environ.get('PYARROW_WITH_SUBSTRAIT', '0')) - self.with_flight = strtobool( - os.environ.get('PYARROW_WITH_FLIGHT', '0')) - self.with_acero = strtobool( - os.environ.get('PYARROW_WITH_ACERO', '0')) - self.with_dataset = strtobool( - os.environ.get('PYARROW_WITH_DATASET', '0')) - self.with_parquet = strtobool( - os.environ.get('PYARROW_WITH_PARQUET', '0')) - self.with_parquet_encryption = strtobool( - os.environ.get('PYARROW_WITH_PARQUET_ENCRYPTION', '0')) - self.with_orc = strtobool( - os.environ.get('PYARROW_WITH_ORC', '0')) - self.with_gandiva = strtobool( - os.environ.get('PYARROW_WITH_GANDIVA', '0')) + self.with_azure = None + self.with_gcs = None + self.with_s3 = None + self.with_hdfs = None + self.with_cuda = None + self.with_substrait = None + self.with_flight = None + self.with_acero = None + self.with_dataset = None + self.with_parquet = None + self.with_parquet_encryption = None + self.with_orc = None + self.with_gandiva = None + self.generate_coverage = strtobool( os.environ.get('PYARROW_GENERATE_COVERAGE', '0')) self.bundle_arrow_cpp = strtobool( @@ -185,15 +173,6 @@ def initialize_options(self): self.bundle_cython_cpp = strtobool( os.environ.get('PYARROW_BUNDLE_CYTHON_CPP', '0')) - self.with_parquet_encryption = (self.with_parquet_encryption and - self.with_parquet) - - # enforce module dependencies - if self.with_substrait: - self.with_dataset = True - if self.with_dataset: - self.with_acero = True - CYTHON_MODULE_NAMES = [ 'lib', '_fs', @@ -270,23 +249,30 @@ def append_cmake_bool(value, varname): cmake_options.append('-D{0}={1}'.format( varname, 'on' if value else 'off')) + def append_cmake_component(flag, varname): + # only pass this to cmake is the user pass the --with-component + # flag to setup.py build_ext + if flag is not None: + append_cmake_bool(flag, varname) + if self.cmake_generator: cmake_options += ['-G', self.cmake_generator] - append_cmake_bool(self.with_cuda, 'PYARROW_BUILD_CUDA') - append_cmake_bool(self.with_substrait, 'PYARROW_BUILD_SUBSTRAIT') - append_cmake_bool(self.with_flight, 'PYARROW_BUILD_FLIGHT') - append_cmake_bool(self.with_gandiva, 'PYARROW_BUILD_GANDIVA') - append_cmake_bool(self.with_acero, 'PYARROW_BUILD_ACERO') - append_cmake_bool(self.with_dataset, 'PYARROW_BUILD_DATASET') - append_cmake_bool(self.with_orc, 'PYARROW_BUILD_ORC') - append_cmake_bool(self.with_parquet, 'PYARROW_BUILD_PARQUET') - append_cmake_bool(self.with_parquet_encryption, - 'PYARROW_BUILD_PARQUET_ENCRYPTION') - append_cmake_bool(self.with_azure, 'PYARROW_BUILD_AZURE') - append_cmake_bool(self.with_gcs, 'PYARROW_BUILD_GCS') - append_cmake_bool(self.with_s3, 'PYARROW_BUILD_S3') - append_cmake_bool(self.with_hdfs, 'PYARROW_BUILD_HDFS') + append_cmake_component(self.with_cuda, 'PYARROW_CUDA') + append_cmake_component(self.with_substrait, 'PYARROW_SUBSTRAIT') + append_cmake_component(self.with_flight, 'PYARROW_FLIGHT') + append_cmake_component(self.with_gandiva, 'PYARROW_GANDIVA') + append_cmake_component(self.with_acero, 'PYARROW_ACERO') + append_cmake_component(self.with_dataset, 'PYARROW_DATASET') + append_cmake_component(self.with_orc, 'PYARROW_ORC') + append_cmake_component(self.with_parquet, 'PYARROW_PARQUET') + append_cmake_component(self.with_parquet_encryption, + 'PYARROW_PARQUET_ENCRYPTION') + append_cmake_component(self.with_azure, 'PYARROW_AZURE') + append_cmake_component(self.with_gcs, 'PYARROW_GCS') + append_cmake_component(self.with_s3, 'PYARROW_S3') + append_cmake_component(self.with_hdfs, 'PYARROW_HDFS') + append_cmake_bool(self.bundle_arrow_cpp, 'PYARROW_BUNDLE_ARROW_CPP') append_cmake_bool(self.bundle_cython_cpp, @@ -329,54 +315,8 @@ def append_cmake_bool(value, varname): self._found_names = [] for name in self.CYTHON_MODULE_NAMES: built_path = pjoin(install_prefix, name + ext_suffix) - if not os.path.exists(built_path): - print(f'Did not find {built_path}') - if self._failure_permitted(name): - print(f'Cython module {name} failure permitted') - continue - raise RuntimeError('PyArrow C-extension failed to build:', - os.path.abspath(built_path)) - - self._found_names.append(name) - - def _failure_permitted(self, name): - if name == '_parquet' and not self.with_parquet: - return True - if name == '_parquet_encryption' and not self.with_parquet_encryption: - return True - if name == '_orc' and not self.with_orc: - return True - if name == '_flight' and not self.with_flight: - return True - if name == '_substrait' and not self.with_substrait: - return True - if name == '_azurefs' and not self.with_azure: - return True - if name == '_gcsfs' and not self.with_gcs: - return True - if name == '_s3fs' and not self.with_s3: - return True - if name == '_hdfs' and not self.with_hdfs: - return True - if name == '_dataset' and not self.with_dataset: - return True - if name == '_acero' and not self.with_acero: - return True - if name == '_exec_plan' and not self.with_acero: - return True - if name == '_dataset_orc' and not ( - self.with_orc and self.with_dataset - ): - return True - if name == '_dataset_parquet' and not ( - self.with_parquet and self.with_dataset - ): - return True - if name == '_cuda' and not self.with_cuda: - return True - if name == 'gandiva' and not self.with_gandiva: - return True - return False + if os.path.exists(built_path): + self._found_names.append(name) def _get_build_dir(self): # Get the package directory from build_py From 74f7578f77adca6b0fd79f7d37e28941330221eb Mon Sep 17 00:00:00 2001 From: Noam Ross Date: Fri, 17 May 2024 02:18:07 +0300 Subject: [PATCH 120/261] GH-40361: [C++] Make flatbuffers serialization more deterministic (#40392) ### Rationale for this change This is the start of a PR to address #40361, and in turn #40202, to make metadata in parquet files written by arrow to be identical irrespective of the platform configuration. This is limited, as platform-specific differences in R or Python versions or compression libraries could still result in differences. ### What changes are included in this PR? So far I have only made a partial change to part of the metadata serialization. I need to look at whether other calls to flatbuffers require similar treatment. ### Are these changes tested? Not yet, this is a draft PR ### Are there any user-facing changes? No * GitHub Issue: #40361 Lead-authored-by: Noam Ross Co-authored-by: Bryce Mecum Co-authored-by: Sutou Kouhei Co-authored-by: Antoine Pitrou Signed-off-by: Bryce Mecum --- cpp/src/arrow/ipc/CMakeLists.txt | 4 +- cpp/src/arrow/ipc/message_internal_test.cc | 81 ++++++++++++++++++++++ cpp/src/arrow/ipc/metadata_internal.cc | 4 +- 3 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 cpp/src/arrow/ipc/message_internal_test.cc diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt index 9fd71361d9b76..2d3eb0f6c589d 100644 --- a/cpp/src/arrow/ipc/CMakeLists.txt +++ b/cpp/src/arrow/ipc/CMakeLists.txt @@ -6,7 +6,7 @@ # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an @@ -39,6 +39,7 @@ endfunction() add_arrow_test(feather_test) add_arrow_ipc_test(json_simple_test) +add_arrow_ipc_test(message_internal_test) add_arrow_ipc_test(read_write_test) add_arrow_ipc_test(tensor_test) @@ -56,6 +57,7 @@ if(ARROW_BUILD_UTILITIES OR ARROW_BUILD_INTEGRATION) target_link_libraries(arrow-file-to-stream ${ARROW_UTIL_LIB}) add_executable(arrow-stream-to-file stream_to_file.cc) target_link_libraries(arrow-stream-to-file ${ARROW_UTIL_LIB}) + if(ARROW_BUILD_UTILITIES) install(TARGETS arrow-file-to-stream arrow-stream-to-file ${INSTALL_IS_OPTIONAL} DESTINATION ${CMAKE_INSTALL_BINDIR}) diff --git a/cpp/src/arrow/ipc/message_internal_test.cc b/cpp/src/arrow/ipc/message_internal_test.cc new file mode 100644 index 0000000000000..112240f08d552 --- /dev/null +++ b/cpp/src/arrow/ipc/message_internal_test.cc @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include "arrow/buffer.h" +#include "arrow/ipc/dictionary.h" +#include "arrow/ipc/metadata_internal.h" +#include "arrow/ipc/options.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/util/key_value_metadata.h" + +namespace arrow::ipc::internal { + +using FBB = flatbuffers::FlatBufferBuilder; + +// GH-40361: Test that Flatbuffer serialization matches a known output +// byte-for-byte. +// +// Our Flatbuffers code should not depend on argument evaluation order as it's +// undefined (https://en.cppreference.com/w/cpp/language/eval_order) and may +// lead to unnecessary platform- or toolchain-specific differences in +// serialization. +TEST(TestMessageInternal, TestByteIdentical) { + FBB fbb; + flatbuffers::Offset fb_schema; + DictionaryFieldMapper mapper; + + // Create a simple Schema with just two metadata KVPs + auto f0 = field("f0", int64()); + auto f1 = field("f1", int64()); + std::vector> fields = {f0, f1}; + std::shared_ptr metadata = + KeyValueMetadata::Make({"key_1", "key_2"}, {"key_1_value", "key_2_value"}); + auto schema = ::arrow::schema({f0}, metadata); + + // Serialize the Schema to a Buffer + std::shared_ptr out_buffer; + ASSERT_OK( + WriteSchemaMessage(*schema, mapper, IpcWriteOptions::Defaults(), &out_buffer)); + + // This is example output from macOS+ARM+LLVM + const uint8_t expected[] = { + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x0E, 0x00, 0x06, 0x00, 0x05, 0x00, + 0x08, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x0A, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0A, 0x00, + 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x38, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xD8, 0xFF, 0xFF, 0xFF, 0x18, 0x00, + 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x6B, 0x65, 0x79, 0x5F, + 0x32, 0x5F, 0x76, 0x61, 0x6C, 0x75, 0x65, 0x00, 0x05, 0x00, 0x00, 0x00, 0x6B, 0x65, + 0x79, 0x5F, 0x32, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0C, 0x00, 0x04, 0x00, 0x08, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0B, 0x00, + 0x00, 0x00, 0x6B, 0x65, 0x79, 0x5F, 0x31, 0x5F, 0x76, 0x61, 0x6C, 0x75, 0x65, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x6B, 0x65, 0x79, 0x5F, 0x31, 0x00, 0x00, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x14, 0x00, 0x08, 0x00, 0x06, 0x00, + 0x07, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x10, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x02, 0x10, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x66, 0x30, 0x00, 0x00, 0x08, 0x00, + 0x0C, 0x00, 0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x40, 0x00, 0x00, 0x00}; + Buffer expected_buffer(expected, sizeof(expected)); + + AssertBufferEqual(expected_buffer, *out_buffer); +} +} // namespace arrow::ipc::internal diff --git a/cpp/src/arrow/ipc/metadata_internal.cc b/cpp/src/arrow/ipc/metadata_internal.cc index e20b352d18d95..eed426d9337dd 100644 --- a/cpp/src/arrow/ipc/metadata_internal.cc +++ b/cpp/src/arrow/ipc/metadata_internal.cc @@ -478,7 +478,9 @@ static Status GetDictionaryEncoding(FBB& fbb, const std::shared_ptr& fiel static KeyValueOffset AppendKeyValue(FBB& fbb, const std::string& key, const std::string& value) { - return flatbuf::CreateKeyValue(fbb, fbb.CreateString(key), fbb.CreateString(value)); + auto fbb_key = fbb.CreateString(key); + auto fbb_value = fbb.CreateString(value); + return flatbuf::CreateKeyValue(fbb, fbb_key, fbb_value); } static void AppendKeyValueMetadata(FBB& fbb, const KeyValueMetadata& metadata, From f5ac05cca21bd07d888ce51050cebef64e65757c Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Thu, 16 May 2024 15:20:47 -0800 Subject: [PATCH 121/261] GH-41105: [Python][Docs] Update PyArrow installation docs for conda package split (#41135) Do not merge until some discussion is had about how to time this relative to https://github.com/conda-forge/arrow-cpp-feedstock/pull/1376. Additionally, consider hot-patching this into arrow-site if appropriate. ### What changes are included in this PR? Updates to the [Python installation docs](https://arrow.apache.org/docs/python/install.html) to reflect the in-progress change splitting PyArrow on conda-forge into three separate packages. Specifically: 1. Add a note in the conda section highlighting that there are three packages and linking to a new section (2) in order to provide more information 2. Add a new section, linked from (1), providing a comparison of each package as a table ### Are these changes tested? These are just docs changes. I have built them locally and they look fine. ### Are there any user-facing changes? Just docs. * GitHub Issue: #41105 Lead-authored-by: Bryce Mecum Co-authored-by: Joris Van den Bossche Signed-off-by: Bryce Mecum --- docs/source/python/flight.rst | 1 + docs/source/python/install.rst | 89 ++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) diff --git a/docs/source/python/flight.rst b/docs/source/python/flight.rst index f07b9511ccf68..b63d256547de0 100644 --- a/docs/source/python/flight.rst +++ b/docs/source/python/flight.rst @@ -17,6 +17,7 @@ .. currentmodule:: pyarrow.flight .. highlight:: python +.. _flight: ================ Arrow Flight RPC diff --git a/docs/source/python/install.rst b/docs/source/python/install.rst index 12555c93067f9..84d6253691f09 100644 --- a/docs/source/python/install.rst +++ b/docs/source/python/install.rst @@ -39,6 +39,13 @@ Install the latest version of PyArrow from conda install -c conda-forge pyarrow +.. note:: + + While the ``pyarrow`` `conda-forge `_ package is + the right choice for most users, both a minimal and maximal variant of the + package exist, either of which may be better for your use case. See + :ref:`python-conda-differences`. + Using Pip --------- @@ -93,3 +100,85 @@ a custom path to the database from Python: >>> import pyarrow as pa >>> pa.set_timezone_db_path("custom_path") + + +.. _python-conda-differences: + +Differences between conda-forge packages +---------------------------------------- + +On `conda-forge `_, PyArrow is published as three +separate packages, each providing varying levels of functionality. This is in +contrast to PyPi, where only a single PyArrow package is provided. + +The purpose of this split is to minimize the size of the installed package for +most users (``pyarrow``), provide a smaller, minimal package for specialized use +cases (``pyarrow-core``), while still providing a complete package for users who +require it (``pyarrow-all``). What was historically ``pyarrow`` on +`conda-forge `_ is now ``pyarrow-all``, though most +users can continue using ``pyarrow``. + +The ``pyarrow-core`` package includes the following functionality: + +- :ref:`data` +- :ref:`compute` (i.e., ``pyarrow.compute``) +- :ref:`io` +- :ref:`ipc` (i.e., ``pyarrow.ipc``) +- :ref:`filesystem` (i.e., ``pyarrow.fs``. Note: It's planned to move cloud fileystems (i.e., :ref:`S3`, :ref:`GCS`, etc) into ``pyarrow`` in a future release though :ref:`filesystem-localfs` will remain in ``pyarrow-core``.) +- File formats: :ref:`Arrow/Feather`, :ref:`JSON`, :ref:`CSV`, :ref:`ORC` (but not Parquet) + +The ``pyarrow`` package adds the following: + +- Acero (i.e., ``pyarrow.acero``) +- :ref:`dataset` (i.e., ``pyarrow.dataset``) +- :ref:`Parquet` (i.e., ``pyarrow.parquet``) +- Substrait (i.e., ``pyarrow.substrait``) + +Finally, ``pyarrow-all`` adds: + +- :ref:`flight` and Flight SQL (i.e., ``pyarrow.flight``) +- Gandiva (i.e., ``pyarrow.gandiva``) + +The following table lists the functionality provided by each package and may be +useful when deciding to use one package over another or when +:ref:`python-conda-custom-selection`. + ++------------+---------------------+--------------+---------+-------------+ +| Component | Package | pyarrow-core | pyarrow | pyarrow-all | ++------------+---------------------+--------------+---------+-------------+ +| Core | pyarrow-core | ✓ | ✓ | ✓ | ++------------+---------------------+--------------+---------+-------------+ +| Parquet | libparquet | | ✓ | ✓ | ++------------+---------------------+--------------+---------+-------------+ +| Dataset | libarrow-dataset | | ✓ | ✓ | ++------------+---------------------+--------------+---------+-------------+ +| Acero | libarrow-acero | | ✓ | ✓ | ++------------+---------------------+--------------+---------+-------------+ +| Substrait | libarrow-substrait | | ✓ | ✓ | ++------------+---------------------+--------------+---------+-------------+ +| Flight | libarrow-flight | | | ✓ | ++------------+---------------------+--------------+---------+-------------+ +| Flight SQL | libarrow-flight-sql | | | ✓ | ++------------+---------------------+--------------+---------+-------------+ +| Gandiva | libarrow-gandiva | | | ✓ | ++------------+---------------------+--------------+---------+-------------+ + +.. _python-conda-custom-selection: + +Creating A Custom Selection +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you know which components you need and want to control what's installed, you +can create a custom selection of packages to include only the extra features you +need. For example, to install ``pyarrow-core`` and add support for reading and +writing Parquet, install ``libparquet`` alongside ``pyarrow-core``: + +.. code-block:: shell + + conda install -c conda-forge pyarrow-core libparquet + +Or if you wish to use ``pyarrow`` but need support for Flight RPC: + +.. code-block:: shell + + conda install -c conda-forge pyarrow libarrow-flight From dc973c2bde2d8a4cf789805e758070290c2669b9 Mon Sep 17 00:00:00 2001 From: Bryce Mecum Date: Thu, 16 May 2024 21:06:21 -0800 Subject: [PATCH 122/261] MINOR: [C++] Revert change in ipc/CMakeLists.txt (#41701) ### Rationale for this change https://github.com/apache/arrow/pull/40392 introduced a mistake in the the associated CMakeLists.txt, noticed by @ kou in https://github.com/apache/arrow/pull/40392#discussion_r1604344494. ### What changes are included in this PR? A reversion of that change. ### Are these changes tested? No. ### Are there any user-facing changes? No. Authored-by: Bryce Mecum Signed-off-by: Bryce Mecum --- cpp/src/arrow/ipc/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt index 2d3eb0f6c589d..2fc9b145ccc98 100644 --- a/cpp/src/arrow/ipc/CMakeLists.txt +++ b/cpp/src/arrow/ipc/CMakeLists.txt @@ -6,7 +6,7 @@ # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an From 6a9e2d53b5cdd0f387bfcd44e9549f122fac93e5 Mon Sep 17 00:00:00 2001 From: Jacob Hayes Date: Fri, 17 May 2024 03:07:02 -0400 Subject: [PATCH 123/261] GH-38575: [Python] Include metadata when creating pa.schema from PyCapsule (#41538) ### Rationale for this change Fixes the dropped `pa.schema` metadata reported in #38575, which was introduced in #37797. ### What changes are included in this PR? Passes through the `metadata` to the short-circuited `Schema` created with `_import_from_c_capsule`. ### Are these changes tested? Yes - added `metadata` to the existing test. ### Are there any user-facing changes? I'm not sure this quite rises to the `(b) a bug that caused incorrect or invalid data to be produced,` condition, but I added that note to be safe since the resulting schema is "incorrect" (and broke some round-trip tests on my end after a pyarrow update): **This PR contains a "Critical Fix".** * GitHub Issue: #38575 Lead-authored-by: Jacob Hayes Co-authored-by: Joris Van den Bossche Signed-off-by: Joris Van den Bossche --- python/pyarrow/tests/test_types.py | 5 ++++- python/pyarrow/types.pxi | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py index 4f66a6f41672d..f7b6040f510af 100644 --- a/python/pyarrow/tests/test_types.py +++ b/python/pyarrow/tests/test_types.py @@ -1331,10 +1331,13 @@ def __init__(self, schema): def __arrow_c_schema__(self): return self.schema.__arrow_c_schema__() - schema = pa.schema([pa.field("field_name", pa.int32())]) + schema = pa.schema([pa.field("field_name", pa.int32())], metadata={"a": "b"}) + assert schema.metadata == {b"a": b"b"} wrapped_schema = Wrapper(schema) assert pa.schema(wrapped_schema) == schema + assert pa.schema(wrapped_schema).metadata == {b"a": b"b"} + assert pa.schema(wrapped_schema, metadata={"a": "c"}).metadata == {b"a": b"c"} def test_field_import_c_schema_interface(): diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index 018099ae7e659..480f19c81dfb9 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -5332,7 +5332,10 @@ def schema(fields, metadata=None): if isinstance(fields, Mapping): fields = fields.items() elif hasattr(fields, "__arrow_c_schema__"): - return Schema._import_from_c_capsule(fields.__arrow_c_schema__()) + result = Schema._import_from_c_capsule(fields.__arrow_c_schema__()) + if metadata is not None: + result = result.with_metadata(metadata) + return result for item in fields: if isinstance(item, tuple): From 2dbc5e26dcbc6826b4eb7a330fa8090836f6b727 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Fri, 17 May 2024 04:24:56 -0400 Subject: [PATCH 124/261] MINOR: [Python][Docs] Use CMake presets to simplify Python build installation (#41500) ### Rationale for this change This should simplify the number of steps users have to go through to get a working Python installation from source Authored-by: Will Ayd Signed-off-by: Joris Van den Bossche --- docs/source/developers/python.rst | 29 ++++++++++++++++++++--------- docs/source/python/data.rst | 2 +- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/docs/source/developers/python.rst b/docs/source/developers/python.rst index be9fac067cb52..e84cd25201e08 100644 --- a/docs/source/developers/python.rst +++ b/docs/source/developers/python.rst @@ -302,10 +302,24 @@ created above (stored in ``$ARROW_HOME``): .. code-block:: - $ mkdir arrow/cpp/build - $ pushd arrow/cpp/build - $ cmake -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \ - -DCMAKE_INSTALL_LIBDIR=lib \ + $ cmake -S arrow/cpp -B arrow/cpp/build \ + -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \ + --preset ninja-release-python + $ cmake --build arrow/cpp/build --target install + +``ninja-release-python`` is not the only preset available - if you would like a +build with more features like CUDA, Flight and Gandiva support you may opt for +the ``ninja-release-python-maximal`` preset. If you wanted less features, (i.e. +removing ORC and dataset support) you could opt for +``ninja-release-python-minimal``. Changing the word ``release`` to ``debug`` +with any of the aforementioned presets will generate a debug build of Arrow. + +The presets are provided as a convenience, but you may instead opt to +specify the individual components: + +.. code-block:: + $ cmake -S arrow/cpp -B arrow/cpp/build \ + -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \ -DCMAKE_BUILD_TYPE=Debug \ -DARROW_BUILD_TESTS=ON \ -DARROW_COMPUTE=ON \ @@ -321,11 +335,8 @@ created above (stored in ``$ARROW_HOME``): -DARROW_WITH_SNAPPY=ON \ -DARROW_WITH_ZLIB=ON \ -DARROW_WITH_ZSTD=ON \ - -DPARQUET_REQUIRE_ENCRYPTION=ON \ - .. - $ make -j4 - $ make install - $ popd + -DPARQUET_REQUIRE_ENCRYPTION=ON + $ cmake --build arrow/cpp/build --target install -j4 There are a number of optional components that can be switched ON by adding flags with ``ON``: diff --git a/docs/source/python/data.rst b/docs/source/python/data.rst index f17475138c9a4..598c8c125fb83 100644 --- a/docs/source/python/data.rst +++ b/docs/source/python/data.rst @@ -561,7 +561,7 @@ schema without having to get any of the batches.:: It can also be sent between languages using the :ref:`C stream interface `. -Conversion of RecordBatch do Tensor +Conversion of RecordBatch to Tensor ----------------------------------- Each array of the ``RecordBatch`` has it's own contiguous memory that is not necessarily From 14b8ca53171435113a0f0f0c4ff1063d12543bc4 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 17 May 2024 14:35:02 +0200 Subject: [PATCH 125/261] GH-41688: [Dev] Include all relevant CMakeLists.txt files in cmake-format precommit hook (#41689) ### Rationale for this change Some CMakeLists.txt files are not included in the pre-commit hook (causing failures on CI through archery if you rely on the pre-commit hook locally) ### What changes are included in this PR? Include all CMakeLists.txt files by default anywhere in the repo, and explicitly exclude the ones we don't want (vendored files). In practice, compared to the current set of files covered by the hook, those new files are included in the search: 'cpp/CMakeLists.txt', 'java/CMakeLists.txt', 'matlab/CMakeLists.txt', 'python/CMakeLists.txt' ### Are these changes tested? Yes * GitHub Issue: #41688 Authored-by: Joris Van den Bossche Signed-off-by: Joris Van den Bossche --- .pre-commit-config.yaml | 7 ++----- dev/archery/archery/utils/lint.py | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7dcc1c9816d12..1e4b91e27ee8a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -116,17 +116,14 @@ repos: name: CMake Format files: >- ( + ?.*CMakeLists\.txt$| ?^ci/.*/.*\.cmake$| ?^cpp/.*/.*\.cmake\.in$| ?^cpp/.*/.*\.cmake$| - ?^cpp/.*/CMakeLists\.txt$| - ?^go/.*/CMakeLists\.txt$| - ?^java/.*/CMakeLists\.txt$| - ?^matlab/.*/CMakeLists\.txt$| - ?^python/.*/CMakeLists\.txt$| ) exclude: >- ( + ?^ci/conan/all/.*CMakeLists\.txt$| ?^cpp/cmake_modules/FindNumPy\.cmake$| ?^cpp/cmake_modules/FindPythonLibsNew\.cmake$| ?^cpp/cmake_modules/UseCython\.cmake$| diff --git a/dev/archery/archery/utils/lint.py b/dev/archery/archery/utils/lint.py index 108c9ded361e7..92b7f79fc1017 100644 --- a/dev/archery/archery/utils/lint.py +++ b/dev/archery/archery/utils/lint.py @@ -157,7 +157,7 @@ def cmake_linter(src, fix=False): 'go/**/CMakeLists.txt', 'java/**/CMakeLists.txt', 'matlab/**/CMakeLists.txt', - 'python/CMakeLists.txt', + 'python/**/CMakeLists.txt', ], exclude_patterns=[ 'cpp/cmake_modules/FindNumPy.cmake', From 8d687b0cfc77609e1e66c7ad500638016d41709b Mon Sep 17 00:00:00 2001 From: Dane Pitkin Date: Fri, 17 May 2024 12:25:09 -0400 Subject: [PATCH 126/261] GH-41620: [Docs] Document merge.conf usage (#41621) ### Rationale for this change As a new committer, I found that the usage of `merge.conf` was not documented and that a placeholder Jira token is still required, even though Arrow no longer uses Jira. ### What changes are included in this PR? * Document merge.conf usage ### Are these changes tested? n/a ### Are there any user-facing changes? No * GitHub Issue: #41620 Authored-by: Dane Pitkin Signed-off-by: Dane Pitkin --- dev/README.md | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/dev/README.md b/dev/README.md index db9a10d527334..b04dd35b1c1ff 100644 --- a/dev/README.md +++ b/dev/README.md @@ -48,17 +48,32 @@ After installed, it runs the merge script. you'll have to install Python dependencies yourself and then run `dev/merge_arrow_pr.py` directly.) +The merge script requires tokens for access control. There are two options +for configuring your tokens: environment variables or a configuration file. + +> Note: Arrow only requires a GitHub token. Parquet can use GitHub or +JIRA tokens. + +#### Pass tokens via Environment Variables + The merge script uses the GitHub REST API. You must set a -`ARROW_GITHUB_API_TOKEN` environment variable to use a -[Personal Access Token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token). +`ARROW_GITHUB_API_TOKEN` environment variable to use a +[Personal Access Token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token). You need to add `workflow` scope to the Personal Access Token. -You can specify the +You can specify the [Personal Access Token](https://confluence.atlassian.com/enterprise/using-personal-access-tokens-1026032365.html) -of your JIRA account in the +of your JIRA account in the `APACHE_JIRA_TOKEN` environment variable. If the variable is not set, the script will ask you for it. +#### Pass tokens via configuration file + +``` +cp ./merge.conf.sample ~/.config/arrow/merge.conf +``` +Update your new `merge.conf` file with your Personal Access Tokens. + Example output: ```text From a04339a49e08bec8c559a32c01eacc9469ef4196 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Fri, 17 May 2024 18:40:53 -0300 Subject: [PATCH 127/261] GH-41711: [C++] macros.h: Fix ARROW_FORCE_INLINE for MSVC (#41712) ### Rationale for this change Define the macro correctly. Nothing is broken at the moment because the macro is not used within Arrow at the moment. ### What changes are included in this PR? Correct definition of the macro. ### Are these changes tested? Yes, by having this commit in other PRs that pass CI tests on Windows. * GitHub Issue: #41711 Authored-by: Felipe Oliveira Carvalho Signed-off-by: Sutou Kouhei --- cpp/src/arrow/util/macros.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/macros.h b/cpp/src/arrow/util/macros.h index d0c05a7908256..484df3400d92d 100644 --- a/cpp/src/arrow/util/macros.h +++ b/cpp/src/arrow/util/macros.h @@ -102,7 +102,7 @@ #elif defined(_MSC_VER) // MSVC #define ARROW_NORETURN __declspec(noreturn) #define ARROW_NOINLINE __declspec(noinline) -#define ARROW_FORCE_INLINE __declspec(forceinline) +#define ARROW_FORCE_INLINE __forceinline #define ARROW_PREDICT_FALSE(x) (x) #define ARROW_PREDICT_TRUE(x) (x) #define ARROW_PREFETCH(addr) From 7aff9d572d57c3e29702db9b1511c00f04926007 Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Sat, 18 May 2024 07:34:08 +0900 Subject: [PATCH 128/261] GH-41558: [C++] Improve fixed_width_test_util.h (#41575) ### Rationale for this change Improve the `fixed_width_test_util.h`. ### What changes are included in this PR? - Move the `fixed_width_test_util.h` to `arrow/testing` - Divide the `fixed_width_test_util` to `.cc` and `.h` - Remove unused headers ### Are these changes tested? Yes ### Are there any user-facing changes? No * GitHub Issue: #41558 Authored-by: Hyunseok Seo Signed-off-by: Felipe Oliveira Carvalho --- cpp/src/arrow/CMakeLists.txt | 1 + cpp/src/arrow/compute/kernels/test_util.cc | 1 + .../compute/kernels/vector_selection_test.cc | 2 +- .../arrow/testing/fixed_width_test_util.cc | 181 ++++++++++++++++ cpp/src/arrow/testing/fixed_width_test_util.h | 76 +++++++ cpp/src/arrow/util/fixed_width_test_util.h | 203 ------------------ 6 files changed, 260 insertions(+), 204 deletions(-) create mode 100644 cpp/src/arrow/testing/fixed_width_test_util.cc create mode 100644 cpp/src/arrow/testing/fixed_width_test_util.h delete mode 100644 cpp/src/arrow/util/fixed_width_test_util.h diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 0f4824ec99daa..57a0b383a677a 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -637,6 +637,7 @@ endif() set(ARROW_TESTING_SRCS io/test_common.cc ipc/test_common.cc + testing/fixed_width_test_util.cc testing/gtest_util.cc testing/random.cc testing/generator.cc diff --git a/cpp/src/arrow/compute/kernels/test_util.cc b/cpp/src/arrow/compute/kernels/test_util.cc index 23d0fd18d578a..2217787663a63 100644 --- a/cpp/src/arrow/compute/kernels/test_util.cc +++ b/cpp/src/arrow/compute/kernels/test_util.cc @@ -31,6 +31,7 @@ #include "arrow/datum.h" #include "arrow/result.h" #include "arrow/table.h" +#include "arrow/testing/fixed_width_test_util.h" #include "arrow/testing/gtest_util.h" namespace arrow { diff --git a/cpp/src/arrow/compute/kernels/vector_selection_test.cc b/cpp/src/arrow/compute/kernels/vector_selection_test.cc index 4c7d85b103f36..6261fa2daec5f 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_test.cc @@ -30,10 +30,10 @@ #include "arrow/compute/kernels/test_util.h" #include "arrow/table.h" #include "arrow/testing/builder.h" +#include "arrow/testing/fixed_width_test_util.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/random.h" #include "arrow/testing/util.h" -#include "arrow/util/fixed_width_test_util.h" #include "arrow/util/logging.h" namespace arrow { diff --git a/cpp/src/arrow/testing/fixed_width_test_util.cc b/cpp/src/arrow/testing/fixed_width_test_util.cc new file mode 100644 index 0000000000000..9c305ed1df97c --- /dev/null +++ b/cpp/src/arrow/testing/fixed_width_test_util.cc @@ -0,0 +1,181 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include + +#include "arrow/array/builder_base.h" +#include "arrow/array/builder_nested.h" +#include "arrow/array/builder_primitive.h" +#include "arrow/testing/fixed_width_test_util.h" +#include "arrow/type.h" +#include "arrow/util/checked_cast.h" + +namespace arrow::util::internal { + +namespace { +template +inline Status AppendNumeric(ArrayBuilder* builder, int64_t* next_value) { + using NumericBuilder = ::arrow::NumericBuilder; + using value_type = typename NumericBuilder::value_type; + auto* numeric_builder = ::arrow::internal::checked_cast(builder); + auto cast_next_value = + static_cast(*next_value % std::numeric_limits::max()); + RETURN_NOT_OK(numeric_builder->Append(cast_next_value)); + *next_value += 1; + return Status::OK(); +} +} // namespace + +std::shared_ptr NestedListGenerator::NestedFSLType( + const std::shared_ptr& inner_type, const std::vector& sizes) { + auto type = inner_type; + for (auto it = sizes.rbegin(); it != sizes.rend(); it++) { + type = fixed_size_list(type, *it); + } + return type; +} + +std::shared_ptr NestedListGenerator::NestedListType( + const std::shared_ptr& inner_type, size_t depth) { + auto list_type = list(inner_type); + for (size_t i = 1; i < depth; i++) { + list_type = list(std::move(list_type)); + } + return list_type; +} + +Result> NestedListGenerator::NestedFSLArray( + const std::shared_ptr& inner_type, const std::vector& list_sizes, + int64_t length) { + auto nested_type = NestedFSLType(inner_type, list_sizes); + ARROW_ASSIGN_OR_RAISE(auto builder, MakeBuilder(nested_type)); + return NestedListArray(builder.get(), list_sizes, length); +} + +Result> NestedListGenerator::NestedListArray( + const std::shared_ptr& inner_type, const std::vector& list_sizes, + int64_t length) { + auto nested_type = NestedListType(inner_type, list_sizes.size()); + ARROW_ASSIGN_OR_RAISE(auto builder, MakeBuilder(nested_type)); + return NestedListArray(builder.get(), list_sizes, length); +} + +void NestedListGenerator::VisitAllNestedListConfigurations( + const std::vector>& inner_value_types, + const std::function&, const std::vector&)>& + visit, + int max_depth, int max_power_of_2_size) { + for (int depth = 1; depth <= max_depth; depth++) { + for (auto& type : inner_value_types) { + assert(is_fixed_width(*type)); + int value_width = type->byte_width(); + + std::vector list_sizes; // stack of list sizes + auto pop = [&]() { // pop the list_sizes stack + assert(!list_sizes.empty()); + value_width /= list_sizes.back(); + list_sizes.pop_back(); + }; + auto next = [&]() { // double the top of the stack + assert(!list_sizes.empty()); + value_width *= 2; + list_sizes.back() *= 2; + return value_width; + }; + auto push_1s = [&]() { // fill the stack with 1s + while (list_sizes.size() < static_cast(depth)) { + list_sizes.push_back(1); + } + }; + + // Loop invariants: + // value_width == product(list_sizes) * type->byte_width() + // value_width is a power-of-2 (1, 2, 4, 8, 16, max_power_of_2_size=32) + push_1s(); + do { + // for (auto x : list_sizes) printf("%d * ", x); + // printf("(%s) %d = %2d\n", type->name().c_str(), type->byte_width(), + // value_width); + visit(type, list_sizes); + while (!list_sizes.empty()) { + if (next() <= max_power_of_2_size) { + push_1s(); + break; + } + pop(); + } + } while (!list_sizes.empty()); + } + } +} + +Status NestedListGenerator::AppendNestedList(ArrayBuilder* nested_builder, + const int* list_sizes, + int64_t* next_inner_value) { + using ::arrow::internal::checked_cast; + ArrayBuilder* builder = nested_builder; + auto type = builder->type(); + if (type->id() == Type::FIXED_SIZE_LIST || type->id() == Type::LIST) { + const int list_size = *list_sizes; + if (type->id() == Type::FIXED_SIZE_LIST) { + auto* fsl_builder = checked_cast(builder); + assert(list_size == checked_cast(*type).list_size()); + RETURN_NOT_OK(fsl_builder->Append()); + builder = fsl_builder->value_builder(); + } else { // type->id() == Type::LIST) + auto* list_builder = checked_cast(builder); + RETURN_NOT_OK(list_builder->Append(/*is_valid=*/true, list_size)); + builder = list_builder->value_builder(); + } + list_sizes++; + for (int i = 0; i < list_size; i++) { + RETURN_NOT_OK(AppendNestedList(builder, list_sizes, next_inner_value)); + } + } else { + switch (type->id()) { + case Type::INT8: + RETURN_NOT_OK(AppendNumeric(builder, next_inner_value)); + break; + case Type::INT16: + RETURN_NOT_OK(AppendNumeric(builder, next_inner_value)); + break; + case Type::INT32: + RETURN_NOT_OK(AppendNumeric(builder, next_inner_value)); + break; + case Type::INT64: + RETURN_NOT_OK(AppendNumeric(builder, next_inner_value)); + break; + default: + return Status::NotImplemented("Unsupported type: ", *type); + } + } + return Status::OK(); +} + +Result> NestedListGenerator::NestedListArray( + ArrayBuilder* nested_builder, const std::vector& list_sizes, int64_t length) { + int64_t next_inner_value = 0; + for (int64_t i = 0; i < length; i++) { + RETURN_NOT_OK(AppendNestedList(nested_builder, list_sizes.data(), &next_inner_value)); + } + return nested_builder->Finish(); +} + +} // namespace arrow::util::internal diff --git a/cpp/src/arrow/testing/fixed_width_test_util.h b/cpp/src/arrow/testing/fixed_width_test_util.h new file mode 100644 index 0000000000000..9e5e6fa68509e --- /dev/null +++ b/cpp/src/arrow/testing/fixed_width_test_util.h @@ -0,0 +1,76 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include + +#include "arrow/testing/visibility.h" +#include "arrow/type.h" +#include "arrow/type_fwd.h" + +namespace arrow::util::internal { + +class ARROW_TESTING_EXPORT NestedListGenerator { + public: + /// \brief Create a nested FixedSizeListType. + /// + /// \return `fixed_size_list(fixed_size_list(..., sizes[1]), sizes[0])` + static std::shared_ptr NestedFSLType( + const std::shared_ptr& inner_type, const std::vector& sizes); + + /// \brief Create a nested FixedListType. + /// + /// \return `list(list(...))` + static std::shared_ptr NestedListType( + const std::shared_ptr& inner_type, size_t depth); + + static Result> NestedFSLArray( + const std::shared_ptr& inner_type, const std::vector& list_sizes, + int64_t length); + + static Result> NestedListArray( + const std::shared_ptr& inner_type, const std::vector& list_sizes, + int64_t length); + + /// \brief Generate all possible nested list configurations of depth 1 to max_depth. + /// + /// Each configuration consists of a single inner value type and a list of sizes. + /// Both can be used with NestedFSLArray and NestedListArray to generate test data. + /// + /// The product of the list sizes and the size of the inner value type is always a power + /// of 2 no greater than max_power_of_2_size. For max_depth=3 and + /// max_power_of_2_size=32, this generates 108 configurations. + static void VisitAllNestedListConfigurations( + const std::vector>& inner_value_types, + const std::function&, + const std::vector&)>& visit, + int max_depth = 3, int max_power_of_2_size = 32); + + private: + // Append([...[[*next_inner_value++, *next_inner_value++, ...]]...]) + static Status AppendNestedList(ArrayBuilder* nested_builder, const int* list_sizes, + int64_t* next_inner_value); + + static Result> NestedListArray( + ArrayBuilder* nested_builder, const std::vector& list_sizes, int64_t length); +}; + +} // namespace arrow::util::internal diff --git a/cpp/src/arrow/util/fixed_width_test_util.h b/cpp/src/arrow/util/fixed_width_test_util.h deleted file mode 100644 index ca141b7ca2c4d..0000000000000 --- a/cpp/src/arrow/util/fixed_width_test_util.h +++ /dev/null @@ -1,203 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include - -#include "arrow/array/builder_primitive.h" -#include "arrow/builder.h" -#include "arrow/type.h" -#include "arrow/util/checked_cast.h" - -namespace arrow::util::internal { - -class NestedListGenerator { - public: - /// \brief Create a nested FixedSizeListType. - /// - /// \return `fixed_size_list(fixed_size_list(..., sizes[1]), sizes[0])` - static std::shared_ptr NestedFSLType( - const std::shared_ptr& inner_type, const std::vector& sizes) { - auto type = inner_type; - for (auto it = sizes.rbegin(); it != sizes.rend(); it++) { - type = fixed_size_list(std::move(type), *it); - } - return type; - } - - /// \brief Create a nested FixedListType. - /// - /// \return `list(list(...))` - static std::shared_ptr NestedListType( - const std::shared_ptr& inner_type, size_t depth) { - auto list_type = list(inner_type); - for (size_t i = 1; i < depth; i++) { - list_type = list(std::move(list_type)); - } - return list_type; - } - - private: - template - static Status AppendNumeric(ArrayBuilder* builder, int64_t* next_value) { - using NumericBuilder = ::arrow::NumericBuilder; - using value_type = typename NumericBuilder::value_type; - auto* numeric_builder = ::arrow::internal::checked_cast(builder); - auto cast_next_value = - static_cast(*next_value % std::numeric_limits::max()); - RETURN_NOT_OK(numeric_builder->Append(cast_next_value)); - *next_value += 1; - return Status::OK(); - } - - // Append([...[[*next_inner_value++, *next_inner_value++, ...]]...]) - static Status AppendNestedList(ArrayBuilder* nested_builder, const int* list_sizes, - int64_t* next_inner_value) { - using ::arrow::internal::checked_cast; - ArrayBuilder* builder = nested_builder; - auto type = builder->type(); - if (type->id() == Type::FIXED_SIZE_LIST || type->id() == Type::LIST) { - const int list_size = *list_sizes; - if (type->id() == Type::FIXED_SIZE_LIST) { - auto* fsl_builder = checked_cast(builder); - assert(list_size == checked_cast(*type).list_size()); - RETURN_NOT_OK(fsl_builder->Append()); - builder = fsl_builder->value_builder(); - } else { // type->id() == Type::LIST) - auto* list_builder = checked_cast(builder); - RETURN_NOT_OK(list_builder->Append(/*is_valid=*/true, list_size)); - builder = list_builder->value_builder(); - } - list_sizes++; - for (int i = 0; i < list_size; i++) { - RETURN_NOT_OK(AppendNestedList(builder, list_sizes, next_inner_value)); - } - } else { - switch (type->id()) { - case Type::INT8: - RETURN_NOT_OK(AppendNumeric(builder, next_inner_value)); - break; - case Type::INT16: - RETURN_NOT_OK(AppendNumeric(builder, next_inner_value)); - break; - case Type::INT32: - RETURN_NOT_OK(AppendNumeric(builder, next_inner_value)); - break; - case Type::INT64: - RETURN_NOT_OK(AppendNumeric(builder, next_inner_value)); - break; - default: - return Status::NotImplemented("Unsupported type: ", *type); - } - } - return Status::OK(); - } - - static Result> NestedListArray( - ArrayBuilder* nested_builder, const std::vector& list_sizes, int64_t length) { - int64_t next_inner_value = 0; - for (int64_t i = 0; i < length; i++) { - RETURN_NOT_OK( - AppendNestedList(nested_builder, list_sizes.data(), &next_inner_value)); - } - return nested_builder->Finish(); - } - - public: - static Result> NestedFSLArray( - const std::shared_ptr& inner_type, const std::vector& list_sizes, - int64_t length) { - auto nested_type = NestedFSLType(inner_type, list_sizes); - ARROW_ASSIGN_OR_RAISE(auto builder, MakeBuilder(nested_type)); - return NestedListArray(builder.get(), list_sizes, length); - } - - static Result> NestedListArray( - const std::shared_ptr& inner_type, const std::vector& list_sizes, - int64_t length) { - auto nested_type = NestedListType(inner_type, list_sizes.size()); - ARROW_ASSIGN_OR_RAISE(auto builder, MakeBuilder(nested_type)); - return NestedListArray(builder.get(), list_sizes, length); - } - - /// \brief Generate all possible nested list configurations of depth 1 to max_depth. - /// - /// Each configuration consists of a single inner value type and a list of sizes. - /// Both can be used with NestedFSLArray and NestedListArray to generate test data. - /// - /// The product of the list sizes and the size of the inner value type is always a power - /// of 2 no greater than max_power_of_2_size. For max_depth=3 and - /// max_power_of_2_size=32, this generates 108 configurations. - /// - /// \tparam Visit a function type with signature - /// void(const std::shared_ptr& inner_type, - /// const std::vector& list_sizes) - template - static void VisitAllNestedListConfigurations( - const std::vector>& inner_value_types, Visit&& visit, - int max_depth = 3, int max_power_of_2_size = 32) { - for (int depth = 1; depth <= max_depth; depth++) { - for (auto& type : inner_value_types) { - assert(is_fixed_width(*type)); - int value_width = type->byte_width(); - - std::vector list_sizes; // stack of list sizes - auto pop = [&]() { // pop the list_sizes stack - assert(!list_sizes.empty()); - value_width /= list_sizes.back(); - list_sizes.pop_back(); - }; - auto next = [&]() { // double the top of the stack - assert(!list_sizes.empty()); - value_width *= 2; - list_sizes.back() *= 2; - return value_width; - }; - auto push_1s = [&]() { // fill the stack with 1s - while (list_sizes.size() < static_cast(depth)) { - list_sizes.push_back(1); - } - }; - - // Loop invariants: - // value_width == product(list_sizes) * type->byte_width() - // value_width is a power-of-2 (1, 2, 4, 8, 16, max_power_of_2_size=32) - push_1s(); - do { - // for (auto x : list_sizes) printf("%d * ", x); - // printf("(%s) %d = %2d\n", type->name().c_str(), type->byte_width(), - // value_width); - visit(type, list_sizes); - // Advance to the next test case - while (!list_sizes.empty()) { - if (next() <= max_power_of_2_size) { - push_1s(); - break; - } - pop(); - } - } while (!list_sizes.empty()); - } - } - } -}; - -} // namespace arrow::util::internal From dcdf4e6953b7fdab6078c444c8d07a606750fec1 Mon Sep 17 00:00:00 2001 From: Rossi Sun Date: Sat, 18 May 2024 11:21:32 +0800 Subject: [PATCH 129/261] GH-41460: [C++] Use ASAN to poison temp vector stack memory (#41695) ### Rationale for this change See #41460. And reduce the overhead of current manual poisoning (filling the entire stack space with `0xFF`s) that happens even in release mode. ### What changes are included in this PR? Use ASAN API to replace the current manual poisoning of the temp vector stack memory. ### Are these changes tested? Wanted to add cases to assert that ASAN poison/unpoison is functioning. However I found it too tricky to catch an ASAN error because ASAN directly uses signals that are hard to interoperate in C++/gtest. So I just manually checked poisoning is working in my local, e.g. by intentionally not unpoisoning the allocated buffer and seeing ASAN unhappy. Just leveraging existing cases that use temp stack such as acero tests, which should cover this change well. ### Are there any user-facing changes? None. * GitHub Issue: #41460 Authored-by: Ruoxi Sun Signed-off-by: Felipe Oliveira Carvalho --- cpp/src/arrow/compute/util_internal.cc | 27 +++++++++++++++++++++++--- cpp/src/arrow/compute/util_internal.h | 10 +++++++++- 2 files changed, 33 insertions(+), 4 deletions(-) diff --git a/cpp/src/arrow/compute/util_internal.cc b/cpp/src/arrow/compute/util_internal.cc index cc26982fef110..9780d1b2f3c2a 100644 --- a/cpp/src/arrow/compute/util_internal.cc +++ b/cpp/src/arrow/compute/util_internal.cc @@ -20,16 +20,29 @@ #include "arrow/compute/util.h" #include "arrow/memory_pool.h" +#ifdef ADDRESS_SANITIZER +#include +#endif + namespace arrow { namespace util { +TempVectorStack::~TempVectorStack() { +#ifdef ADDRESS_SANITIZER + if (buffer_) { + ASAN_UNPOISON_MEMORY_REGION(buffer_->mutable_data(), buffer_size_); + } +#endif +} + Status TempVectorStack::Init(MemoryPool* pool, int64_t size) { num_vectors_ = 0; top_ = 0; buffer_size_ = EstimatedAllocationSize(size); ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(size, pool)); - // Ensure later operations don't accidentally read uninitialized memory. - std::memset(buffer->mutable_data(), 0xFF, size); +#ifdef ADDRESS_SANITIZER + ASAN_POISON_MEMORY_REGION(buffer->mutable_data(), size); +#endif buffer_ = std::move(buffer); return Status::OK(); } @@ -53,12 +66,17 @@ void TempVectorStack::alloc(uint32_t num_bytes, uint8_t** data, int* id) { ARROW_CHECK_LE(new_top, buffer_size_) << "TempVectorStack::alloc overflow: allocating " << estimated_alloc_size << " on top of " << top_ << " in stack of size " << buffer_size_; - *data = buffer_->mutable_data() + top_ + sizeof(uint64_t); +#ifdef ADDRESS_SANITIZER + ASAN_UNPOISON_MEMORY_REGION(buffer_->mutable_data() + top_, estimated_alloc_size); +#endif + *data = buffer_->mutable_data() + top_ + /*one guard*/ sizeof(uint64_t); +#ifndef NDEBUG // We set 8 bytes before the beginning of the allocated range and // 8 bytes after the end to check for stack overflow (which would // result in those known bytes being corrupted). reinterpret_cast(buffer_->mutable_data() + top_)[0] = kGuard1; reinterpret_cast(buffer_->mutable_data() + new_top)[-1] = kGuard2; +#endif *id = num_vectors_++; top_ = new_top; } @@ -72,6 +90,9 @@ void TempVectorStack::release(int id, uint32_t num_bytes) { top_ -= size; ARROW_DCHECK(reinterpret_cast(buffer_->mutable_data() + top_)[0] == kGuard1); +#ifdef ADDRESS_SANITIZER + ASAN_POISON_MEMORY_REGION(buffer_->mutable_data() + top_, size); +#endif --num_vectors_; } diff --git a/cpp/src/arrow/compute/util_internal.h b/cpp/src/arrow/compute/util_internal.h index 043ff118062e4..f6c5ac1f83ac4 100644 --- a/cpp/src/arrow/compute/util_internal.h +++ b/cpp/src/arrow/compute/util_internal.h @@ -20,6 +20,7 @@ #include "arrow/status.h" #include "arrow/type_fwd.h" #include "arrow/util/logging.h" +#include "arrow/util/macros.h" namespace arrow { namespace util { @@ -38,13 +39,20 @@ class ARROW_EXPORT TempVectorStack { friend class TempVectorHolder; public: + TempVectorStack() = default; + ~TempVectorStack(); + + ARROW_DISALLOW_COPY_AND_ASSIGN(TempVectorStack); + + ARROW_DEFAULT_MOVE_AND_ASSIGN(TempVectorStack); + Status Init(MemoryPool* pool, int64_t size); int64_t AllocatedSize() const { return top_; } private: static int64_t EstimatedAllocationSize(int64_t size) { - return PaddedAllocationSize(size) + 2 * sizeof(uint64_t); + return PaddedAllocationSize(size) + /*two guards*/ 2 * sizeof(uint64_t); } static int64_t PaddedAllocationSize(int64_t num_bytes); From cc3e2db300947aaf777f1814ce5ee61f42410d4e Mon Sep 17 00:00:00 2001 From: Joel Lubinitsky <33523178+joellubi@users.noreply.github.com> Date: Mon, 20 May 2024 09:14:50 -0400 Subject: [PATCH 130/261] GH-41697: [Go][Parquet] Release BufferWriter when BufferedPageWriter is closed (#41698) ### Rationale for this change A small buffer gets reallocated after calling `Finish()`, causing that memory to leak. ### What changes are included in this PR? Release the buffer when the pagewriter is closed. ### Are these changes tested? Yes ### Are there any user-facing changes? Memory will not leak on this code path. * GitHub Issue: #41697 --- go/parquet/internal/encoding/types.go | 7 ++++- go/parquet/pqarrow/file_writer_test.go | 42 ++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/go/parquet/internal/encoding/types.go b/go/parquet/internal/encoding/types.go index 147c1746c515a..2d7a5d6b1d166 100644 --- a/go/parquet/internal/encoding/types.go +++ b/go/parquet/internal/encoding/types.go @@ -361,11 +361,16 @@ func (b *BufferWriter) Truncate() { func (b *BufferWriter) Reset(initial int) { if b.buffer != nil { b.buffer.Release() + } else { + b.buffer = memory.NewResizableBuffer(b.mem) } b.pos = 0 b.offset = 0 - b.Reserve(initial) + + if initial > 0 { + b.Reserve(initial) + } } // Reserve ensures that there is at least enough capacity to write nbytes diff --git a/go/parquet/pqarrow/file_writer_test.go b/go/parquet/pqarrow/file_writer_test.go index 425e4479f6d5c..fc965279a928d 100644 --- a/go/parquet/pqarrow/file_writer_test.go +++ b/go/parquet/pqarrow/file_writer_test.go @@ -18,6 +18,7 @@ package pqarrow_test import ( "bytes" + "math" "strings" "testing" @@ -87,3 +88,44 @@ func TestFileWriterNumRows(t *testing.T) { require.NoError(t, writer.Close()) assert.Equal(t, 4, writer.NumRows()) } + +func TestFileWriterBuffered(t *testing.T) { + schema := arrow.NewSchema([]arrow.Field{ + {Name: "one", Nullable: true, Type: arrow.PrimitiveTypes.Float64}, + {Name: "two", Nullable: true, Type: arrow.PrimitiveTypes.Float64}, + }, nil) + + data := `[ + {"one": 1, "two": 2}, + {"one": 1, "two": null}, + {"one": null, "two": 2}, + {"one": null, "two": null} + ]` + + alloc := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer alloc.AssertSize(t, 0) + + record, _, err := array.RecordFromJSON(alloc, schema, strings.NewReader(data)) + require.NoError(t, err) + defer record.Release() + + output := &bytes.Buffer{} + writer, err := pqarrow.NewFileWriter( + schema, + output, + parquet.NewWriterProperties( + parquet.WithAllocator(alloc), + // Ensure enough space so we can close the writer with rows still buffered + parquet.WithMaxRowGroupLength(math.MaxInt64), + ), + pqarrow.NewArrowWriterProperties( + pqarrow.WithAllocator(alloc), + ), + ) + require.NoError(t, err) + + require.NoError(t, writer.WriteBuffered(record)) + + require.NoError(t, writer.Close()) + assert.Equal(t, 4, writer.NumRows()) +} From 5809dafb0f25b4bbf65387b0206d0f94b391c0b9 Mon Sep 17 00:00:00 2001 From: Kevin Gurney Date: Mon, 20 May 2024 13:53:21 -0400 Subject: [PATCH 131/261] GH-41656: [MATLAB] Add C Data Interface format import/export functionality for `arrow.array.Array` (#41737) ### Rationale for this change Now that #41653 and #41654 have been addressed, we should add MATLAB APIs for importing/exporting `arrow.array.Array` objects using the C Data Interface format. This pull request adds two new APIs for importing and exporting `arrow.array.Array` objects using the C Data Interface format. #### Example ```matlab >> expected = arrow.array([1, 2, 3]) expected = Float64Array with 3 elements and 0 null values: 1 | 2 | 3 >> cArray = arrow.c.Array() cArray = Array with properties: Address: 140341875084944 >> cSchema = arrow.c.Schema() cSchema = Schema with properties: Address: 140341880022320 % Export the Array to C Data Interface Format >> expected.export(cArray.Address, cSchema.Address) % Import the Array from C Data Interface Format >> actual = arrow.array.Array.import(cArray, cSchema) actual = Float64Array with 3 elements and 0 null values: 1 | 2 | 3 % The Array is the same after round-tripping to C Data Interface format >> isequal(actual, expected) ans = logical 1 ``` ### What changes are included in this PR? 1. Added new `arrow.array.Array.export(cArrowArrayAddress, cArrowSchemaAddress)` method for exporting `Array` objects to C Data Interface format. 2. Added new static `arrow.array.Array.import(cArray, cSchema)` method for importing `Array`s from C Data Interface format. 3. Added new internal `arrow.c.internal.ArrayImporter` class for importing `Array` objects from C Data Interface format. ### Are these changes tested? Yes. 1. Added new test file `matlab/test/arrow/c/tRoundTrip.m` with basic round-trip tests for importing/exporting `Array` objects using the C Data Interface format. ### Are there any user-facing changes? Yes. 1. There are now two new user-facing APIs added to the `arrow.array.Array` class. These are `arrow.array.Array.export(cArrowArrayAddress, cArrowSchemaAddress)` and `arrow.array.Array.import(cArray, cSchema)`. These APIs can be used to import/export `Array` objects using the C Data Interface format. ### Future Directions 1. Add integration tests for sharing data between MATLAB/mlarrow and Python/pyarrow running in the same process using the [MATLAB interface to Python](https://www.mathworks.com/help/matlab/call-python-libraries.html). 2. Add support for exporting/importing `arrow.tabular.RecordBatch` objects using the C Data Interface format. 3. Add support for the Arrow [C stream interface format](https://arrow.apache.org/docs/format/CStreamInterface.html). ### Notes 1. Thanks @ sgilmore10 for your help with this pull request! * GitHub Issue: #41656 Lead-authored-by: Kevin Gurney Co-authored-by: Kevin Gurney Co-authored-by: Sutou Kouhei Signed-off-by: Kevin Gurney --- .../src/cpp/arrow/matlab/array/proxy/array.cc | 18 ++ .../src/cpp/arrow/matlab/array/proxy/array.h | 2 + .../arrow/matlab/c/proxy/array_importer.cc | 69 +++++++ .../cpp/arrow/matlab/c/proxy/array_importer.h | 37 ++++ matlab/src/cpp/arrow/matlab/error/error.h | 2 + matlab/src/cpp/arrow/matlab/proxy/factory.cc | 2 + matlab/src/matlab/+arrow/+array/Array.m | 24 +++ .../+arrow/+c/+internal/ArrayImporter.m | 50 +++++ matlab/test/arrow/c/tRoundTrip.m | 182 ++++++++++++++++++ .../cmake/BuildMatlabArrowInterface.cmake | 1 + 10 files changed, 387 insertions(+) create mode 100644 matlab/src/cpp/arrow/matlab/c/proxy/array_importer.cc create mode 100644 matlab/src/cpp/arrow/matlab/c/proxy/array_importer.h create mode 100644 matlab/src/matlab/+arrow/+c/+internal/ArrayImporter.m create mode 100644 matlab/test/arrow/c/tRoundTrip.m diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc index b8f85b08632a3..1eb6de74fec65 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/array.cc +++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include "arrow/c/bridge.h" #include "arrow/util/utf8.h" #include "arrow/matlab/array/proxy/array.h" @@ -40,6 +41,7 @@ Array::Array(std::shared_ptr array) : array{std::move(array)} { REGISTER_METHOD(Array, getType); REGISTER_METHOD(Array, isEqual); REGISTER_METHOD(Array, slice); + REGISTER_METHOD(Array, exportToC); } std::shared_ptr Array::unwrap() { return array; } @@ -178,4 +180,20 @@ void Array::slice(libmexclass::proxy::method::Context& context) { output[0]["TypeID"] = factory.createScalar(type_id); context.outputs[0] = output; } + +void Array::exportToC(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + mda::StructArray opts = context.inputs[0]; + const mda::TypedArray array_address_mda = opts[0]["ArrowArrayAddress"]; + const mda::TypedArray schema_address_mda = opts[0]["ArrowSchemaAddress"]; + + auto arrow_array = reinterpret_cast(uint64_t(array_address_mda[0])); + auto arrow_schema = + reinterpret_cast(uint64_t(schema_address_mda[0])); + + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT( + arrow::ExportArray(*array, arrow_array, arrow_schema), context, + error::C_EXPORT_FAILED); +} + } // namespace arrow::matlab::array::proxy diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.h b/matlab/src/cpp/arrow/matlab/array/proxy/array.h index 61ba06a503bc4..c249693ac2797 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.h @@ -45,6 +45,8 @@ class Array : public libmexclass::proxy::Proxy { void slice(libmexclass::proxy::method::Context& context); + void exportToC(libmexclass::proxy::method::Context& context); + std::shared_ptr array; }; diff --git a/matlab/src/cpp/arrow/matlab/c/proxy/array_importer.cc b/matlab/src/cpp/arrow/matlab/c/proxy/array_importer.cc new file mode 100644 index 0000000000000..b6f68332d1757 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/c/proxy/array_importer.cc @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/array.h" +#include "arrow/c/bridge.h" + +#include "arrow/matlab/array/proxy/wrap.h" +#include "arrow/matlab/c/proxy/array_importer.h" +#include "arrow/matlab/error/error.h" + +#include "libmexclass/proxy/ProxyManager.h" + +namespace arrow::matlab::c::proxy { + +ArrayImporter::ArrayImporter() { REGISTER_METHOD(ArrayImporter, import); } + +libmexclass::proxy::MakeResult ArrayImporter::make( + const libmexclass::proxy::FunctionArguments& constructor_arguments) { + return std::make_shared(); +} + +void ArrayImporter::import(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + using namespace libmexclass::proxy; + + mda::StructArray args = context.inputs[0]; + const mda::TypedArray arrow_array_address_mda = args[0]["ArrowArrayAddress"]; + const mda::TypedArray arrow_schema_address_mda = + args[0]["ArrowSchemaAddress"]; + + const auto arrow_array_address = uint64_t(arrow_array_address_mda[0]); + const auto arrow_schema_address = uint64_t(arrow_schema_address_mda[0]); + + auto arrow_array = reinterpret_cast(arrow_array_address); + auto arrow_schema = reinterpret_cast(arrow_schema_address); + + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto array, + arrow::ImportArray(arrow_array, arrow_schema), + context, error::C_IMPORT_FAILED); + + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto array_proxy, + arrow::matlab::array::proxy::wrap(array), context, + error::UNKNOWN_PROXY_FOR_ARRAY_TYPE); + + mda::ArrayFactory factory; + const auto array_proxy_id = ProxyManager::manageProxy(array_proxy); + const auto array_proxy_id_mda = factory.createScalar(array_proxy_id); + const auto array_type_id_mda = + factory.createScalar(static_cast(array->type_id())); + + context.outputs[0] = array_proxy_id_mda; + context.outputs[1] = array_type_id_mda; +} + +} // namespace arrow::matlab::c::proxy diff --git a/matlab/src/cpp/arrow/matlab/c/proxy/array_importer.h b/matlab/src/cpp/arrow/matlab/c/proxy/array_importer.h new file mode 100644 index 0000000000000..6459393058737 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/c/proxy/array_importer.h @@ -0,0 +1,37 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "libmexclass/proxy/Proxy.h" + +namespace arrow::matlab::c::proxy { + +class ArrayImporter : public libmexclass::proxy::Proxy { + public: + ArrayImporter(); + + ~ArrayImporter() = default; + + static libmexclass::proxy::MakeResult make( + const libmexclass::proxy::FunctionArguments& constructor_arguments); + + protected: + void import(libmexclass::proxy::method::Context& context); +}; + +} // namespace arrow::matlab::c::proxy diff --git a/matlab/src/cpp/arrow/matlab/error/error.h b/matlab/src/cpp/arrow/matlab/error/error.h index db8b715141ee8..58c43d8843e4b 100644 --- a/matlab/src/cpp/arrow/matlab/error/error.h +++ b/matlab/src/cpp/arrow/matlab/error/error.h @@ -240,5 +240,7 @@ static const char* ARRAY_SLICE_NON_POSITIVE_OFFSET = static const char* ARRAY_SLICE_NEGATIVE_LENGTH = "arrow:array:slice:NegativeLength"; static const char* ARRAY_SLICE_FAILED_TO_CREATE_ARRAY_PROXY = "arrow:array:slice:FailedToCreateArrayProxy"; +static const char* C_EXPORT_FAILED = "arrow:c:export:ExportFailed"; +static const char* C_IMPORT_FAILED = "arrow:c:import:ImportFailed"; } // namespace arrow::matlab::error diff --git a/matlab/src/cpp/arrow/matlab/proxy/factory.cc b/matlab/src/cpp/arrow/matlab/proxy/factory.cc index d7a8fa9ac2e74..9b95fcf128090 100644 --- a/matlab/src/cpp/arrow/matlab/proxy/factory.cc +++ b/matlab/src/cpp/arrow/matlab/proxy/factory.cc @@ -26,6 +26,7 @@ #include "arrow/matlab/array/proxy/timestamp_array.h" #include "arrow/matlab/buffer/proxy/buffer.h" #include "arrow/matlab/c/proxy/array.h" +#include "arrow/matlab/c/proxy/array_importer.h" #include "arrow/matlab/c/proxy/schema.h" #include "arrow/matlab/error/error.h" #include "arrow/matlab/io/csv/proxy/table_reader.h" @@ -102,6 +103,7 @@ libmexclass::proxy::MakeResult Factory::make_proxy( REGISTER_PROXY(arrow.io.csv.proxy.TableWriter , arrow::matlab::io::csv::proxy::TableWriter); REGISTER_PROXY(arrow.io.csv.proxy.TableReader , arrow::matlab::io::csv::proxy::TableReader); REGISTER_PROXY(arrow.c.proxy.Array , arrow::matlab::c::proxy::Array); + REGISTER_PROXY(arrow.c.proxy.ArrayImporter , arrow::matlab::c::proxy::ArrayImporter); REGISTER_PROXY(arrow.c.proxy.Schema , arrow::matlab::c::proxy::Schema); // clang-format on diff --git a/matlab/src/matlab/+arrow/+array/Array.m b/matlab/src/matlab/+arrow/+array/Array.m index 4402055932b60..01bacdf5755dc 100644 --- a/matlab/src/matlab/+arrow/+array/Array.m +++ b/matlab/src/matlab/+arrow/+array/Array.m @@ -97,6 +97,19 @@ function displayScalarObject(obj) % Invoke isEqual proxy object method tf = obj.Proxy.isEqual(proxyIDs); end + + function export(obj, cArrowArrayAddress, cArrowSchemaAddress) + arguments + obj(1, 1) arrow.array.Array + cArrowArrayAddress(1, 1) uint64 + cArrowSchemaAddress(1, 1) uint64 + end + args = struct(... + ArrowArrayAddress=cArrowArrayAddress,... + ArrowSchemaAddress=cArrowSchemaAddress... + ); + obj.Proxy.exportToC(args); + end end methods (Hidden) @@ -108,4 +121,15 @@ function displayScalarObject(obj) array = traits.ArrayConstructor(proxy); end end + + methods (Static) + function array = import(cArray, cSchema) + arguments + cArray(1, 1) arrow.c.Array + cSchema(1, 1) arrow.c.Schema + end + importer = arrow.c.internal.ArrayImporter(); + array = importer.import(cArray, cSchema); + end + end end diff --git a/matlab/src/matlab/+arrow/+c/+internal/ArrayImporter.m b/matlab/src/matlab/+arrow/+c/+internal/ArrayImporter.m new file mode 100644 index 0000000000000..3f2f7445b3d6d --- /dev/null +++ b/matlab/src/matlab/+arrow/+c/+internal/ArrayImporter.m @@ -0,0 +1,50 @@ +%ARRAYIMPORTER Imports Arrow Array using the C Data Interface Format. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +classdef ArrayImporter < matlab.mixin.Scalar + + properties (Hidden, SetAccess=private, GetAccess=public) + Proxy + end + + methods + + function obj = ArrayImporter() + proxyName = "arrow.c.proxy.ArrayImporter"; + proxy = arrow.internal.proxy.create(proxyName, struct()); + obj.Proxy = proxy; + end + + function array = import(obj, cArray, cSchema) + arguments + obj(1, 1) arrow.c.internal.ArrayImporter + cArray(1, 1) arrow.c.Array + cSchema(1, 1) arrow.c.Schema + end + args = struct(... + ArrowArrayAddress=cArray.Address,... + ArrowSchemaAddress=cSchema.Address... + ); + [proxyID, typeID] = obj.Proxy.import(args); + traits = arrow.type.traits.traits(arrow.type.ID(typeID)); + proxy = libmexclass.proxy.Proxy(Name=traits.ArrayProxyClassName, ID=proxyID); + array = traits.ArrayConstructor(proxy); + end + + end + +end + diff --git a/matlab/test/arrow/c/tRoundTrip.m b/matlab/test/arrow/c/tRoundTrip.m new file mode 100644 index 0000000000000..a72dbe2679a2d --- /dev/null +++ b/matlab/test/arrow/c/tRoundTrip.m @@ -0,0 +1,182 @@ +%TROUNDTRIP Tests for roundtripping using the C Data Interface format. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +classdef tRoundTrip < matlab.unittest.TestCase + + methods (Test) + + function EmptyArray(testCase) + expected = arrow.array(double.empty(0, 1)); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + end + + function ArrayWithNulls(testCase) + % Scalar null + expected = arrow.array(double(NaN)); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + + % Vector with nulls + expected = arrow.array([1, NaN, 3, NaN, 5]); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + + % Vector all nulls + expected = arrow.array([NaN, NaN, NaN]); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + end + + function Float64Array(testCase) + % Scalar + expected = arrow.array(double(1)); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + + % Vector + expected = arrow.array([1, 2, 3]); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + end + + function StringArray(testCase) + % Scalar + expected = arrow.array("A"); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + + % Vector + expected = arrow.array(["A", "B", "C"]); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + end + + function TimestampArray(testCase) + % Scalar + expected = arrow.array(datetime(2024, 1, 1)); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + + % Vector + expected = arrow.array([... + datetime(2024, 1, 1),... + datetime(2024, 1, 2),... + datetime(2024, 1, 3)... + ]); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + end + + function ExportErrorWrongInputTypes(testCase) + A = arrow.array([1, 2, 3]); + fcn = @() A.export("cArray.Address", "cSchema.Address"); + testCase.verifyError(fcn, "MATLAB:validation:UnableToConvert"); + end + + function ExportTooFewInputs(testCase) + A = arrow.array([1, 2, 3]); + fcn = @() A.export(); + testCase.verifyError(fcn, "MATLAB:minrhs"); + end + + function ExportTooManyInputs(testCase) + A = arrow.array([1, 2, 3]); + fcn = @() A.export("A", "B", "C"); + testCase.verifyError(fcn, "MATLAB:TooManyInputs"); + end + + function ImportErrorWrongInputTypes(testCase) + cArray = "arrow.c.Array"; + cSchema = "arrow.c.Schema"; + fcn = @() arrow.array.Array.import(cArray, cSchema); + testCase.verifyError(fcn, "MATLAB:validation:UnableToConvert"); + end + + function ImportTooFewInputs(testCase) + fcn = @() arrow.array.Array.import(); + testCase.verifyError(fcn, "MATLAB:minrhs"); + end + + function ImportTooManyInputs(testCase) + A = arrow.array([1, 2, 3]); + fcn = @() arrow.array.Array.import("A", "B", "C"); + testCase.verifyError(fcn, "MATLAB:TooManyInputs"); + end + + function ImportErrorImportFailed(testCase) + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + % An arrow:c:import:ImportFailed error should be thrown + % if the supplied arrow.c.Array and arrow.c.Schema were + % never populated previously from an exported Array. + fcn = @() arrow.array.Array.import(cArray, cSchema); + testCase.verifyError(fcn, "arrow:c:import:ImportFailed"); + end + + end + +end diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake index 8f37bef77b859..92e9f59145acc 100644 --- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake +++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake @@ -77,6 +77,7 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/a "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/index/validate.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/buffer/proxy/buffer.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/array.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/array_importer.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/schema.cc") From 11decbc10776bbe0361fe7b8bfebcf3e47ae9c6c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Mon, 20 May 2024 17:17:38 -0400 Subject: [PATCH 132/261] GH-41735: [CI][Archery] Update archery to be compatible with pygit2 1.15 API change (#41739) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change pygit2 updated how they expose some of the `GIT_OBJ` variables to use `GIT_OBJECT` prefix here: https://github.com/libgit2/pygit2/commit/8b3861b9092d1e3517b11f6ab06434ea866dd051 ### What changes are included in this PR? Update code to make it compatible with both possible APIs. ### Are these changes tested? Via archery ### Are there any user-facing changes? No * GitHub Issue: #41735 Authored-by: Raúl Cumplido Signed-off-by: Sutou Kouhei --- dev/archery/archery/crossbow/core.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index c85f1f754b997..0b5d242bbaccf 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -427,8 +427,14 @@ def create_branch(self, branch_name, files, parents=None, message='', return branch def create_tag(self, tag_name, commit_id, message=''): + git_object_commit = ( + pygit2.GIT_OBJECT_COMMIT + if getattr(pygit2, 'GIT_OBJECT_COMMIT') + else pygit2.GIT_OBJ_COMMIT + ) tag_id = self.repo.create_tag(tag_name, commit_id, - pygit2.GIT_OBJ_COMMIT, self.signature, + git_object_commit, + self.signature, message) # append to the pushable references From 1cd28729a34a0e761c7dba2612e7f9ec6f4ea31c Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Mon, 20 May 2024 23:56:48 +0100 Subject: [PATCH 133/261] GH-41717: [Java][Vector] fix issue with ByteBuffer rewind in MessageSerializer (#41718) ### Rationale for this change ### What changes are included in this PR? #41717 describes issue and change ### Are these changes tested? CI build ### Are there any user-facing changes? * GitHub Issue: #41717 Authored-by: PJ Fanning Signed-off-by: David Li --- .../org/apache/arrow/vector/ipc/message/MessageSerializer.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java index 9deb42c498cbb..099103cd178f8 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java @@ -701,7 +701,8 @@ public static MessageMetadataResult readMessage(ReadChannel in) throws IOExcepti throw new IOException( "Unexpected end of stream trying to read message."); } - messageBuffer.rewind(); + // see https://github.com/apache/arrow/issues/41717 for reason why we cast to java.nio.Buffer + ByteBuffer rewindBuffer = (ByteBuffer) ((java.nio.Buffer) messageBuffer).rewind(); // Load the message. Message message = Message.getRootAsMessage(messageBuffer); From 66580441ad674c15050710fc1228f3090c855196 Mon Sep 17 00:00:00 2001 From: Rossi Sun Date: Tue, 21 May 2024 08:41:20 +0800 Subject: [PATCH 134/261] GH-41738: [C++] Fix the issue that temp vector stack may be under sized (#41746) ### Rationale for this change See #41738. ### What changes are included in this PR? Allocate the underlying buffer of temp stack vector using padded size. ### Are these changes tested? UT included. ### Are there any user-facing changes? None. * GitHub Issue: #41738 Authored-by: Ruoxi Sun Signed-off-by: Felipe Oliveira Carvalho --- cpp/src/arrow/compute/CMakeLists.txt | 3 +- cpp/src/arrow/compute/util_internal.cc | 4 +- cpp/src/arrow/compute/util_internal.h | 2 + cpp/src/arrow/compute/util_internal_test.cc | 52 +++++++++++++++++++++ 4 files changed, 58 insertions(+), 3 deletions(-) create mode 100644 cpp/src/arrow/compute/util_internal_test.cc diff --git a/cpp/src/arrow/compute/CMakeLists.txt b/cpp/src/arrow/compute/CMakeLists.txt index fb778be113029..0a8018cd580cf 100644 --- a/cpp/src/arrow/compute/CMakeLists.txt +++ b/cpp/src/arrow/compute/CMakeLists.txt @@ -91,7 +91,8 @@ add_arrow_test(internals_test registry_test.cc key_hash_test.cc row/compare_test.cc - row/grouper_test.cc) + row/grouper_test.cc + util_internal_test.cc) add_arrow_compute_test(expression_test SOURCES expression_test.cc) diff --git a/cpp/src/arrow/compute/util_internal.cc b/cpp/src/arrow/compute/util_internal.cc index 9780d1b2f3c2a..7a7875162c434 100644 --- a/cpp/src/arrow/compute/util_internal.cc +++ b/cpp/src/arrow/compute/util_internal.cc @@ -39,9 +39,9 @@ Status TempVectorStack::Init(MemoryPool* pool, int64_t size) { num_vectors_ = 0; top_ = 0; buffer_size_ = EstimatedAllocationSize(size); - ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(size, pool)); + ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(buffer_size_, pool)); #ifdef ADDRESS_SANITIZER - ASAN_POISON_MEMORY_REGION(buffer->mutable_data(), size); + ASAN_POISON_MEMORY_REGION(buffer->mutable_data(), buffer_size_); #endif buffer_ = std::move(buffer); return Status::OK(); diff --git a/cpp/src/arrow/compute/util_internal.h b/cpp/src/arrow/compute/util_internal.h index f6c5ac1f83ac4..5e5b15a5ff600 100644 --- a/cpp/src/arrow/compute/util_internal.h +++ b/cpp/src/arrow/compute/util_internal.h @@ -66,6 +66,8 @@ class ARROW_EXPORT TempVectorStack { int64_t top_; std::unique_ptr buffer_; int64_t buffer_size_; + + friend class TempVectorStackTest; }; template diff --git a/cpp/src/arrow/compute/util_internal_test.cc b/cpp/src/arrow/compute/util_internal_test.cc new file mode 100644 index 0000000000000..fbf34f2228488 --- /dev/null +++ b/cpp/src/arrow/compute/util_internal_test.cc @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "arrow/buffer.h" +#include "arrow/compute/util_internal.h" +#include "arrow/testing/gtest_util.h" + +namespace arrow { +namespace util { + +class TempVectorStackTest : public ::testing::Test { + protected: + static const uint8_t* BufferData(const TempVectorStack& stack) { + return stack.buffer_->data(); + } + + static int64_t BufferCapacity(const TempVectorStack& stack) { + return stack.buffer_->capacity(); + } +}; + +// GH-41738: Test the underlying buffer capacity is sufficient to hold the requested +// vector. +TEST_F(TempVectorStackTest, BufferCapacitySufficiency) { + for (uint32_t stack_size : {1, 7, 8, 63, 64, 65535, 65536}) { + ARROW_SCOPED_TRACE("stack_size = ", stack_size); + TempVectorStack stack; + ASSERT_OK(stack.Init(default_memory_pool(), stack_size)); + + TempVectorHolder v(&stack, stack_size); + ASSERT_LE(v.mutable_data() + stack_size, BufferData(stack) + BufferCapacity(stack)); + } +} + +} // namespace util +} // namespace arrow From b2e8c33c86c819b167a1cbca834da3c9047a9350 Mon Sep 17 00:00:00 2001 From: Tai Le Manh <49281946+tlm365@users.noreply.github.com> Date: Tue, 21 May 2024 15:23:38 +0700 Subject: [PATCH 135/261] GH-41699: [Python][Parquet] Implement to_dict method on SortingColumn (#41704) ### Rationale for this change Resolves #41699 . ### What changes are included in this PR? Add `to_dict` method and test case ### Are these changes tested? Yes ### Are there any user-facing changes? No * GitHub Issue: #41699 Authored-by: Tai Le Manh Signed-off-by: AlenkaF --- python/pyarrow/_parquet.pyx | 16 ++++++++++++++ python/pyarrow/tests/parquet/test_metadata.py | 22 +++++++++++++------ 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index 7bc68a288aa78..f7724b9b1fdc7 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -705,6 +705,22 @@ cdef class SortingColumn: """Whether null values appear before valid values (bool).""" return self.nulls_first + def to_dict(self): + """ + Get dictionary representation of the SortingColumn. + + Returns + ------- + dict + Dictionary with a key for each attribute of this class. + """ + d = dict( + column_index=self.column_index, + descending=self.descending, + nulls_first=self.nulls_first + ) + return d + cdef class RowGroupMetaData(_Weakrefable): """Metadata for a single row group.""" diff --git a/python/pyarrow/tests/parquet/test_metadata.py b/python/pyarrow/tests/parquet/test_metadata.py index bf186bd923c4f..1eb0598b5c58f 100644 --- a/python/pyarrow/tests/parquet/test_metadata.py +++ b/python/pyarrow/tests/parquet/test_metadata.py @@ -303,14 +303,18 @@ def test_parquet_write_disable_statistics(tempdir): def test_parquet_sorting_column(): sorting_col = pq.SortingColumn(10) - assert sorting_col.column_index == 10 - assert sorting_col.descending is False - assert sorting_col.nulls_first is False + assert sorting_col.to_dict() == { + 'column_index': 10, + 'descending': False, + 'nulls_first': False + } sorting_col = pq.SortingColumn(0, descending=True, nulls_first=True) - assert sorting_col.column_index == 0 - assert sorting_col.descending is True - assert sorting_col.nulls_first is True + assert sorting_col.to_dict() == { + 'column_index': 0, + 'descending': True, + 'nulls_first': True + } schema = pa.schema([('a', pa.int64()), ('b', pa.int64())]) sorting_cols = ( @@ -381,9 +385,13 @@ def test_parquet_file_sorting_columns(): # Can retrieve sorting columns from metadata metadata = pq.read_metadata(reader) - assert metadata.num_row_groups == 1 assert sorting_columns == metadata.row_group(0).sorting_columns + metadata_dict = metadata.to_dict() + assert metadata_dict.get('num_columns') == 2 + assert metadata_dict.get('num_rows') == 3 + assert metadata_dict.get('num_row_groups') == 1 + def test_field_id_metadata(): # ARROW-7080 From e3cd0ae3ea648ce5b78a2d9c9e21c54d772cb385 Mon Sep 17 00:00:00 2001 From: David Li Date: Tue, 21 May 2024 19:11:52 +0900 Subject: [PATCH 136/261] GH-41571: [Java] Revert GH-41307 (#41309) (#41628) ### Rationale for this change The commit in question caused a lot of CI issues ### Are these changes tested? N/A ### Are there any user-facing changes? N/A * GitHub Issue: #41571 Authored-by: David Li Signed-off-by: David Li --- java/adapter/avro/pom.xml | 9 + java/adapter/jdbc/pom.xml | 7 + java/adapter/orc/pom.xml | 17 -- java/bom/pom.xml | 21 +- java/c/pom.xml | 1 + java/dataset/pom.xml | 2 +- java/flight/flight-core/pom.xml | 27 ++- java/flight/flight-integration-tests/pom.xml | 2 + java/flight/flight-sql-jdbc-core/pom.xml | 10 + java/flight/flight-sql-jdbc-driver/pom.xml | 1 + java/flight/flight-sql/pom.xml | 5 + java/format/pom.xml | 2 + java/gandiva/pom.xml | 19 +- .../module-info-compiler-maven-plugin/pom.xml | 28 ++- java/maven/pom.xml | 75 ++++--- java/memory/memory-core/pom.xml | 22 +- java/performance/pom.xml | 40 ++++ java/pom.xml | 207 +++++++++++------- java/tools/pom.xml | 22 +- java/vector/pom.xml | 91 +++++++- 20 files changed, 432 insertions(+), 176 deletions(-) diff --git a/java/adapter/avro/pom.xml b/java/adapter/avro/pom.xml index 645e8c4ff2e60..9ddc150253874 100644 --- a/java/adapter/avro/pom.xml +++ b/java/adapter/avro/pom.xml @@ -25,27 +25,36 @@ http://maven.apache.org + + org.apache.arrow arrow-memory-core + + org.apache.arrow arrow-memory-netty runtime + + org.apache.arrow arrow-vector + org.immutables value + org.apache.avro avro ${dep.avro.version} + diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml index 33360c64b13b6..5f72729bb76e7 100644 --- a/java/adapter/jdbc/pom.xml +++ b/java/adapter/jdbc/pom.xml @@ -26,17 +26,20 @@ + org.apache.arrow arrow-memory-core + org.apache.arrow arrow-memory-netty runtime + org.apache.arrow arrow-vector @@ -48,6 +51,7 @@ value + com.h2database h2 @@ -90,6 +94,9 @@ jdk11+ [11,] + + !m2e.version + diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml index ec6f73a3e9e40..f6aadca6de4d3 100644 --- a/java/adapter/orc/pom.xml +++ b/java/adapter/orc/pom.xml @@ -134,22 +134,5 @@ - - - org.apache.maven.plugins - maven-dependency-plugin - - - analyze - - - - org.apache.arrow:arrow-format - - - - - - diff --git a/java/bom/pom.xml b/java/bom/pom.xml index 0af50c638055e..12b9950ad80fc 100644 --- a/java/bom/pom.xml +++ b/java/bom/pom.xml @@ -15,7 +15,7 @@ org.apache apache - 31 + 18 org.apache.arrow @@ -27,19 +27,6 @@ - - 1.8 - 1.8 - 3.11.0 - 3.2.5 - 0.16.1 - 3.7.1 - 3.12.1 - 3.6.1 - 3.2.0 - 3.2.2 - 3.6.3 - 3.5.0 @@ -151,9 +138,11 @@ ${project.version} + + @@ -167,10 +156,12 @@ org.apache.maven.plugins maven-project-info-reports-plugin + 3.5.0 org.apache.maven.plugins maven-site-plugin + 3.12.1 com.diffplug.spotless @@ -197,10 +188,12 @@ org.apache.maven.plugins maven-project-info-reports-plugin + 3.5.0 org.apache.maven.plugins maven-site-plugin + 3.12.1 diff --git a/java/c/pom.xml b/java/c/pom.xml index 43a62a8303bfe..1095e99bbdd3f 100644 --- a/java/c/pom.xml +++ b/java/c/pom.xml @@ -83,4 +83,5 @@ + diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml index 2121119af398e..dd0c76523d0f8 100644 --- a/java/dataset/pom.xml +++ b/java/dataset/pom.xml @@ -201,7 +201,7 @@ org.apache.maven.plugins maven-surefire-plugin - --add-reads=org.apache.arrow.dataset=com.fasterxml.jackson.databind --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED + --add-reads=org.apache.arrow.dataset=com.fasterxml.jackson.databind --add-opens=java.base/java.nio=org.apache.arrow.dataset,org.apache.arrow.memory.core,ALL-UNNAMED diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml index 4c1002ae75f04..9832850108c50 100644 --- a/java/flight/flight-core/pom.xml +++ b/java/flight/flight-core/pom.xml @@ -15,6 +15,7 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT + ../pom.xml flight-core @@ -150,6 +151,13 @@ org.apache.maven.plugins maven-shade-plugin + + 3.2.4 shade-main @@ -236,6 +244,7 @@ org.apache.maven.plugins maven-dependency-plugin + 3.3.0 analyze @@ -255,6 +264,7 @@ org.codehaus.mojo build-helper-maven-plugin + 1.9.1 add-generated-sources-to-classpath @@ -272,6 +282,7 @@ maven-assembly-plugin + 3.7.1 jar-with-dependencies @@ -288,6 +299,13 @@ + + + kr.motd.maven + os-maven-plugin + 1.7.1 + + @@ -295,14 +313,18 @@ jdk11+ [11,] + + !m2e.version + org.apache.maven.plugins maven-surefire-plugin - - --add-opens=org.apache.arrow.flight.core/org.apache.arrow.flight.perf.impl=protobuf.java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED + + --add-opens=org.apache.arrow.flight.core/org.apache.arrow.flight.perf.impl=protobuf.java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED + false ${project.basedir}/../../../testing/data @@ -312,4 +334,5 @@ + diff --git a/java/flight/flight-integration-tests/pom.xml b/java/flight/flight-integration-tests/pom.xml index cd2c28ba8959f..74016d81e91e5 100644 --- a/java/flight/flight-integration-tests/pom.xml +++ b/java/flight/flight-integration-tests/pom.xml @@ -15,6 +15,7 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT + ../pom.xml flight-integration-tests @@ -62,6 +63,7 @@ maven-assembly-plugin + 3.7.1 jar-with-dependencies diff --git a/java/flight/flight-sql-jdbc-core/pom.xml b/java/flight/flight-sql-jdbc-core/pom.xml index ef3f2469b73dd..fbab69df3b305 100644 --- a/java/flight/flight-sql-jdbc-core/pom.xml +++ b/java/flight/flight-sql-jdbc-core/pom.xml @@ -15,6 +15,7 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT + ../pom.xml flight-sql-jdbc-core @@ -46,17 +47,20 @@ + org.apache.arrow arrow-memory-core + org.apache.arrow arrow-memory-netty runtime + org.apache.arrow arrow-vector @@ -132,6 +136,11 @@ + + + src/main/resources + + maven-surefire-plugin @@ -145,6 +154,7 @@ org.codehaus.mojo properties-maven-plugin + 1.2.1 write-project-properties-to-file diff --git a/java/flight/flight-sql-jdbc-driver/pom.xml b/java/flight/flight-sql-jdbc-driver/pom.xml index 4456270e7b347..b3afbe1defdba 100644 --- a/java/flight/flight-sql-jdbc-driver/pom.xml +++ b/java/flight/flight-sql-jdbc-driver/pom.xml @@ -15,6 +15,7 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT + ../pom.xml flight-sql-jdbc-driver diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml index f5926d6e68485..d5366ae988d57 100644 --- a/java/flight/flight-sql/pom.xml +++ b/java/flight/flight-sql/pom.xml @@ -15,6 +15,7 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT + ../pom.xml flight-sql @@ -118,6 +119,9 @@ jdk11+ [11,] + + !m2e.version + @@ -132,4 +136,5 @@ + diff --git a/java/format/pom.xml b/java/format/pom.xml index 4483047e20960..e9eded79de660 100644 --- a/java/format/pom.xml +++ b/java/format/pom.xml @@ -31,6 +31,7 @@ + @@ -41,5 +42,6 @@ + diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml index c5703c62dfe23..00acb89f1d7cf 100644 --- a/java/gandiva/pom.xml +++ b/java/gandiva/pom.xml @@ -22,12 +22,13 @@ jar Arrow Gandiva Java wrappers around the native Gandiva SQL expression compiler. - + 1.8 + 1.8 + 3.25.1 true ../../../cpp/release-build - org.apache.arrow @@ -50,6 +51,7 @@ com.google.protobuf protobuf-java + ${protobuf.version} com.google.guava @@ -60,7 +62,6 @@ slf4j-api - @@ -87,6 +88,14 @@ + + + + kr.motd.maven + os-maven-plugin + 1.7.1 + + @@ -96,6 +105,7 @@ org.apache.maven.plugins maven-source-plugin + 2.2.1 attach-sources @@ -108,6 +118,7 @@ org.apache.maven.plugins maven-javadoc-plugin + 3.6.3 attach-javadocs @@ -120,6 +131,7 @@ org.apache.maven.plugins maven-gpg-plugin + 3.2.2 sign-artifacts @@ -134,4 +146,5 @@ + diff --git a/java/maven/module-info-compiler-maven-plugin/pom.xml b/java/maven/module-info-compiler-maven-plugin/pom.xml index 5909b6b3484fc..6589020d6ecb5 100644 --- a/java/maven/module-info-compiler-maven-plugin/pom.xml +++ b/java/maven/module-info-compiler-maven-plugin/pom.xml @@ -64,14 +64,39 @@ org.apache.maven.plugin-tools maven-plugin-annotations - ${maven.plugin.tools.version} + 3.11.0 provided + + + maven-clean-plugin + 3.3.2 + + + maven-plugin-plugin + 3.12.0 + + + maven-jar-plugin + 3.3.0 + + + maven-install-plugin + 3.1.1 + + + maven-deploy-plugin + 3.1.1 + + + maven-invoker-plugin + 3.1.0 + com.gradle gradle-enterprise-maven-extension @@ -93,6 +118,7 @@ org.apache.maven.plugins maven-plugin-plugin + 3.12.0 true diff --git a/java/maven/pom.xml b/java/maven/pom.xml index 558532012a1ae..f290ded2e2913 100644 --- a/java/maven/pom.xml +++ b/java/maven/pom.xml @@ -15,13 +15,6 @@ Note: Do not inherit from the Arrow parent POM as plugins can be referenced during the parent POM, introducing circular dependencies. --> - - org.apache - apache - 31 - - - org.apache.arrow.maven.plugins arrow-maven-plugins 17.0.0-SNAPSHOT @@ -34,38 +27,25 @@ true - - 1.8 - 1.8 - 3.12.0 - 3.2.5 - 0.16.1 - 3.7.1 - 3.12.1 - 3.6.1 - 3.2.0 - 3.2.2 - 3.6.3 - 3.5.0 - com.diffplug.spotless - spotless-maven-plugin - 2.30.0 + org.apache.maven.plugins + maven-project-info-reports-plugin + 3.5.0 - pl.project13.maven - git-commit-id-plugin - 4.0.5 + org.apache.maven.plugins + maven-site-plugin + 3.12.1 - org.cyclonedx - cyclonedx-maven-plugin - 2.8.0 + com.diffplug.spotless + spotless-maven-plugin + 2.30.0 @@ -139,6 +119,11 @@ **/logback.xml + true + + true + true + org.apache.arrow ${username} @@ -158,17 +143,43 @@ + + org.apache.maven.plugins + maven-resources-plugin + + UTF-8 + + org.apache.maven.plugins maven-compiler-plugin + UTF-8 + 1.8 + 1.8 2048m + false true maven-enforcer-plugin + + validate_java_and_maven_version + + enforce + + verify + false + + + + [3.3.0,4) + + + + avoid_bad_dependencies @@ -194,6 +205,8 @@ pl.project13.maven git-commit-id-plugin + 4.0.5 + dd.MM.yyyy '@' HH:mm:ss z false @@ -235,6 +248,7 @@ org.apache.maven.plugins maven-checkstyle-plugin + 3.1.0 ../dev/checkstyle/checkstyle.xml ../dev/checkstyle/checkstyle.license @@ -274,6 +288,7 @@ org.cyclonedx cyclonedx-maven-plugin + 2.7.11 @@ -338,10 +353,12 @@ org.apache.maven.plugins maven-project-info-reports-plugin + 3.5.0 org.apache.maven.plugins maven-site-plugin + 3.12.1 diff --git a/java/memory/memory-core/pom.xml b/java/memory/memory-core/pom.xml index 8e39ae43d116f..ca5bc603bd4dc 100644 --- a/java/memory/memory-core/pom.xml +++ b/java/memory/memory-core/pom.xml @@ -61,6 +61,9 @@ jdk11+ [11,] + + !m2e.version + @@ -89,6 +92,7 @@ org.apache.maven.plugins maven-surefire-plugin + opens-tests @@ -97,9 +101,12 @@ test - - - + + -Dfoo=bar + + + **/TestArrowBuf.java + **/TestOpens.java @@ -122,6 +129,9 @@ org.apache.maven.plugins maven-compiler-plugin + 8 + 8 + UTF-8 -Xmaxerrs @@ -140,6 +150,12 @@ ${checker.framework.version} + + + org.immutables.value.internal.$processor$.$Processor + + org.checkerframework.checker.nullness.NullnessChecker + diff --git a/java/performance/pom.xml b/java/performance/pom.xml index e9023ece080a3..765b6a58cd8f0 100644 --- a/java/performance/pom.xml +++ b/java/performance/pom.xml @@ -22,7 +22,9 @@ JMH Performance benchmarks for other Arrow libraries. + UTF-8 1.37 + 1.8 benchmarks true .* @@ -81,6 +83,42 @@ + + + + maven-clean-plugin + 3.3.2 + + + maven-deploy-plugin + 3.1.1 + + + maven-install-plugin + 3.1.1 + + + maven-jar-plugin + 3.3.0 + + + maven-javadoc-plugin + 3.6.3 + + + maven-resources-plugin + 3.3.1 + + + maven-source-plugin + 2.2.1 + + + maven-surefire-plugin + 3.2.5 + + + org.apache.maven.plugins @@ -128,6 +166,7 @@ org.codehaus.mojo exec-maven-plugin + 3.2.0 ${skip.perf.benchmarks} test @@ -164,4 +203,5 @@ + diff --git a/java/pom.xml b/java/pom.xml index f3639858d7818..16564ae828b0f 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -15,7 +15,7 @@ org.apache apache - 31 + 18 org.apache.arrow @@ -85,7 +85,7 @@ 33.0.0-jre 4.1.108.Final 1.63.0 - 3.25.1 + 3.23.1 2.17.0 3.4.0 23.5.26 @@ -95,28 +95,10 @@ true 9+181-r4173-1 2.24.0 + 3.12.1 5.11.0 5.2.0 3.42.0 - none - -Xdoclint:none - - 1.8 - 1.8 - 3.11.0 - 3.2.5 - 0.16.1 - 3.7.1 - 3.12.1 - 3.6.1 - 3.2.0 - - 3.2.2 - 3.6.3 - 3.5.0 @@ -286,16 +268,40 @@ 8.3.0 test + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + 3.1.2 + + + org.apache.rat + apache-rat-plugin + 0.16.1 + + + org.apache.maven.plugins + maven-resources-plugin + 3.3.1 + org.apache.maven.plugins maven-compiler-plugin + ${maven-compiler-plugin.version} - true **/module-info.java **/module-info.java false @@ -308,8 +314,18 @@ + + maven-enforcer-plugin + 3.4.1 + + + org.apache.maven.plugins + maven-shade-plugin + 3.5.1 + maven-surefire-plugin + 3.2.5 true true @@ -324,9 +340,22 @@ 1048576 + + + org.junit.jupiter + junit-jupiter-engine + ${dep.junit.jupiter.version} + + + org.apache.maven.surefire + surefire-junit-platform + 3.2.5 + + maven-failsafe-plugin + 3.2.5 ${project.build.directory} @@ -415,22 +444,6 @@ - - - org.apache.drill.tools - drill-fmpp-maven-plugin - [1.0,) - - generate - - - - - false - true - - - @@ -438,7 +451,9 @@ org.apache.maven.plugins maven-javadoc-plugin + 3.6.3 + 8 **/module-info.java @@ -449,6 +464,16 @@ module-info-compiler-maven-plugin ${project.version} + + org.apache.maven.plugins + maven-project-info-reports-plugin + 3.5.0 + + + org.apache.maven.plugins + maven-site-plugin + 3.12.1 + com.gradle gradle-enterprise-maven-extension @@ -496,36 +521,6 @@ spotless-maven-plugin 2.30.0 - - org.codehaus.mojo - build-helper-maven-plugin - 1.9.1 - - - org.codehaus.mojo - properties-maven-plugin - 1.2.1 - - - org.codehaus.mojo - exec-maven-plugin - 3.2.0 - - - pl.project13.maven - git-commit-id-plugin - 4.0.5 - - - org.cyclonedx - cyclonedx-maven-plugin - 2.8.0 - - - org.apache.drill.tools - drill-fmpp-maven-plugin - 1.21.1 - @@ -599,6 +594,11 @@ **/logback.xml + true + + true + true + org.apache.arrow ${username} @@ -618,17 +618,42 @@ + + org.apache.maven.plugins + maven-resources-plugin + + UTF-8 + + org.apache.maven.plugins maven-compiler-plugin + 1.8 + 1.8 2048m + false true maven-enforcer-plugin + + validate_java_and_maven_version + + enforce + + verify + false + + + + [3.3.0,4) + + + + avoid_bad_dependencies @@ -654,6 +679,8 @@ pl.project13.maven git-commit-id-plugin + 4.0.5 + dd.MM.yyyy '@' HH:mm:ss z false @@ -695,6 +722,7 @@ org.apache.maven.plugins maven-checkstyle-plugin + 3.1.0 **/module-info.java dev/checkstyle/checkstyle.xml @@ -758,6 +786,7 @@ org.cyclonedx cyclonedx-maven-plugin + 2.7.11 @@ -788,10 +817,12 @@ org.apache.maven.plugins maven-project-info-reports-plugin + 3.5.0 org.apache.maven.plugins maven-site-plugin + 3.12.1 com.diffplug.spotless @@ -826,6 +857,7 @@ org.apache.maven.plugins maven-javadoc-plugin + 3.6.3 **/module-info.java @@ -853,15 +885,28 @@ org.apache.maven.plugins maven-project-info-reports-plugin + 3.5.0 org.apache.maven.plugins maven-site-plugin + 3.12.1 + + java-nodoclint + + [1.8,) + + + none + -Xdoclint:none + + + arrow-c-data @@ -909,6 +954,7 @@ org.apache.maven.plugins maven-compiler-plugin + true -XDcompilePolicy=simple -Xplugin:ErrorProne @@ -941,6 +987,9 @@ org.apache.maven.plugins maven-compiler-plugin + 8 + 8 + UTF-8 -XDcompilePolicy=simple -Xplugin:ErrorProne -XepExcludedPaths:.*/(target/generated-sources)/.* @@ -964,16 +1013,6 @@ - - - - - jdk11+ - - [11,] - - - org.apache.maven.plugins maven-surefire-plugin @@ -981,13 +1020,6 @@ --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED - - org.apache.maven.plugins - maven-failsafe-plugin - - --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED - - @@ -1028,6 +1060,7 @@ org.jacoco jacoco-maven-plugin + 0.8.11 @@ -1073,6 +1106,7 @@ org.codehaus.mojo exec-maven-plugin + 3.2.0 cdata-cmake @@ -1129,6 +1163,7 @@ org.codehaus.mojo exec-maven-plugin + 3.2.0 jni-cpp-cmake @@ -1235,6 +1270,7 @@ org.codehaus.mojo exec-maven-plugin + 3.2.0 jni-cpp-cmake @@ -1324,4 +1360,5 @@ + diff --git a/java/tools/pom.xml b/java/tools/pom.xml index 58b790c9f027f..b1507cd301f31 100644 --- a/java/tools/pom.xml +++ b/java/tools/pom.xml @@ -54,11 +54,6 @@ 1.3.14 test - com.fasterxml.jackson.core jackson-core @@ -90,6 +85,7 @@ maven-assembly-plugin + 3.7.1 jar-with-dependencies @@ -105,21 +101,7 @@ - - org.apache.maven.plugins - maven-dependency-plugin - - - analyze - verify - - - com.fasterxml.jackson.core:* - - - - - + diff --git a/java/vector/pom.xml b/java/vector/pom.xml index ca932ae6f26f9..07af93a499907 100644 --- a/java/vector/pom.xml +++ b/java/vector/pom.xml @@ -76,7 +76,64 @@ + + + + true + + + false + + apache + apache + https://repo.maven.apache.org/maven2/ + + + + + + + codegen + + ${basedir}/src/main/codegen + + + + + + + org.eclipse.m2e + lifecycle-mapping + 1.0.0 + + + + + + org.apache.drill.tools + drill-fmpp-maven-plugin + [1.0,) + + generate + + + + + false + true + + + + + + + + + + org.apache.maven.plugins @@ -106,10 +163,33 @@ + + maven-resources-plugin + + + + copy-fmpp-resources + + copy-resources + + initialize + + ${project.build.directory}/codegen + + + src/main/codegen + false + + + + + + org.apache.drill.tools drill-fmpp-maven-plugin + 1.21.1 generate-fmpp @@ -120,7 +200,7 @@ src/main/codegen/config.fmpp ${project.build.directory}/generated-sources - src/main/codegen/templates + ${project.build.directory}/codegen/templates @@ -128,6 +208,13 @@ org.apache.maven.plugins maven-shade-plugin + + 3.2.4 @@ -156,6 +243,7 @@ + @@ -188,4 +276,5 @@ + From 1f07404dac920bf81f852f834622f2fc30f8dcfc Mon Sep 17 00:00:00 2001 From: mwish Date: Tue, 21 May 2024 18:38:17 +0800 Subject: [PATCH 137/261] GH-41321: [C++][Parquet] More strict Parquet level checking (#41346) ### Rationale for this change In https://github.com/apache/arrow/issues/41321 , user reports a corrupt when reading from a corrupt parquet file. This is because we lost some checking. Current code works on reading a normal parquet file. But when reading a corrupt file, this need to be more strict. **Currently this patch just enhance the checking on Parquet Level, the correspond value check would be add in later patches** ### What changes are included in this PR? More strict parquet checkings on Level ### Are these changes tested? Already exists test, maybe we can introduce parquet file as test file ### Are there any user-facing changes? More strict checkings * GitHub Issue: #41321 Lead-authored-by: mwish Co-authored-by: mwish Signed-off-by: mwish --- cpp/src/parquet/column_reader.cc | 109 ++++++++++++++++---------- cpp/src/parquet/column_reader.h | 2 +- cpp/src/parquet/column_reader_test.cc | 76 +++++++++++++++++- 3 files changed, 143 insertions(+), 44 deletions(-) diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index a4794c564733a..cfd2fea3746f4 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -101,6 +101,10 @@ inline void CheckNumberDecoded(int64_t number_decoded, int64_t expected) { std::to_string(expected)); } } + +constexpr std::string_view kErrorRepDefLevelNotMatchesNumValues = + "Number of decoded rep / def levels do not match num_values in page header"; + } // namespace LevelDecoder::LevelDecoder() : num_values_remaining_(0) {} @@ -907,6 +911,8 @@ class ColumnReaderImplBase { static_cast(data_size)); } + // Available values in the current data page, value includes repeated values + // and nulls. int64_t available_values_current_page() const { return num_buffered_values_ - num_decoded_values_; } @@ -933,7 +939,7 @@ class ColumnReaderImplBase { int64_t num_buffered_values_; // The number of values from the current data page that have been decoded - // into memory + // into memory or skipped over. int64_t num_decoded_values_; ::arrow::MemoryPool* pool_; @@ -1026,28 +1032,36 @@ class TypedColumnReaderImpl : public TypedColumnReader, // Read definition and repetition levels. Also return the number of definition levels // and number of values to read. This function is called before reading values. + // + // ReadLevels will throw exception when any num-levels read is not equal to the number + // of the levels can be read. void ReadLevels(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, - int64_t* num_def_levels, int64_t* values_to_read) { - batch_size = - std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_); + int64_t* num_def_levels, int64_t* non_null_values_to_read) { + batch_size = std::min(batch_size, this->available_values_current_page()); // If the field is required and non-repeated, there are no definition levels if (this->max_def_level_ > 0 && def_levels != nullptr) { *num_def_levels = this->ReadDefinitionLevels(batch_size, def_levels); + if (ARROW_PREDICT_FALSE(*num_def_levels != batch_size)) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); + } // TODO(wesm): this tallying of values-to-decode can be performed with better // cache-efficiency if fused with the level decoding. - *values_to_read += + *non_null_values_to_read += std::count(def_levels, def_levels + *num_def_levels, this->max_def_level_); } else { // Required field, read all values - *values_to_read = batch_size; + if (num_def_levels != nullptr) { + *num_def_levels = 0; + } + *non_null_values_to_read = batch_size; } // Not present for non-repeated fields if (this->max_rep_level_ > 0 && rep_levels != nullptr) { int64_t num_rep_levels = this->ReadRepetitionLevels(batch_size, rep_levels); - if (def_levels != nullptr && *num_def_levels != num_rep_levels) { - throw ParquetException("Number of decoded rep / def levels did not match"); + if (batch_size != num_rep_levels) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); } } } @@ -1090,8 +1104,7 @@ int64_t TypedColumnReaderImpl::ReadBatchWithDictionary( *indices_read = ReadDictionaryIndices(indices_to_read, indices); int64_t total_indices = std::max(num_def_levels, *indices_read); // Some callers use a batch size of 0 just to get the dictionary. - int64_t expected_values = - std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_); + int64_t expected_values = std::min(batch_size, this->available_values_current_page()); if (total_indices == 0 && expected_values > 0) { std::stringstream ss; ss << "Read 0 values, expected " << expected_values; @@ -1106,7 +1119,8 @@ template int64_t TypedColumnReaderImpl::ReadBatch(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, T* values, int64_t* values_read) { - // HasNext invokes ReadNewPage + // HasNext might invoke ReadNewPage until a data page with + // `available_values_current_page() > 0` is found. if (!HasNext()) { *values_read = 0; return 0; @@ -1115,20 +1129,31 @@ int64_t TypedColumnReaderImpl::ReadBatch(int64_t batch_size, int16_t* def // TODO(wesm): keep reading data pages until batch_size is reached, or the // row group is finished int64_t num_def_levels = 0; - int64_t values_to_read = 0; - ReadLevels(batch_size, def_levels, rep_levels, &num_def_levels, &values_to_read); - - *values_read = this->ReadValues(values_to_read, values); + // Number of non-null values to read within `num_def_levels`. + int64_t non_null_values_to_read = 0; + ReadLevels(batch_size, def_levels, rep_levels, &num_def_levels, + &non_null_values_to_read); + // Should not return more values than available in the current data page, + // since currently, ReadLevels would only consume level from current + // data page. + if (ARROW_PREDICT_FALSE(num_def_levels > this->available_values_current_page())) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); + } + if (non_null_values_to_read != 0) { + *values_read = this->ReadValues(non_null_values_to_read, values); + } else { + *values_read = 0; + } + // Adjust total_values, since if max_def_level_ == 0, num_def_levels would + // be 0 and `values_read` would adjust to `available_values_current_page()`. int64_t total_values = std::max(num_def_levels, *values_read); - int64_t expected_values = - std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_); + int64_t expected_values = std::min(batch_size, this->available_values_current_page()); if (total_values == 0 && expected_values > 0) { std::stringstream ss; ss << "Read 0 values, expected " << expected_values; ParquetException::EofException(ss.str()); } this->ConsumeBufferedValues(total_values); - return total_values; } @@ -1137,7 +1162,8 @@ int64_t TypedColumnReaderImpl::ReadBatchSpaced( int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, T* values, uint8_t* valid_bits, int64_t valid_bits_offset, int64_t* levels_read, int64_t* values_read, int64_t* null_count_out) { - // HasNext invokes ReadNewPage + // HasNext might invoke ReadNewPage until a data page with + // `available_values_current_page() > 0` is found. if (!HasNext()) { *levels_read = 0; *values_read = 0; @@ -1145,21 +1171,24 @@ int64_t TypedColumnReaderImpl::ReadBatchSpaced( return 0; } + // Number of non-null values to read int64_t total_values; // TODO(wesm): keep reading data pages until batch_size is reached, or the // row group is finished - batch_size = - std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_); + batch_size = std::min(batch_size, this->available_values_current_page()); // If the field is required and non-repeated, there are no definition levels if (this->max_def_level_ > 0) { int64_t num_def_levels = this->ReadDefinitionLevels(batch_size, def_levels); + if (ARROW_PREDICT_FALSE(num_def_levels != batch_size)) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); + } // Not present for non-repeated fields if (this->max_rep_level_ > 0) { int64_t num_rep_levels = this->ReadRepetitionLevels(batch_size, rep_levels); - if (num_def_levels != num_rep_levels) { - throw ParquetException("Number of decoded rep / def levels did not match"); + if (ARROW_PREDICT_FALSE(num_def_levels != num_rep_levels)) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); } } @@ -1401,26 +1430,21 @@ class TypedRecordReader : public TypedColumnReaderImpl, int16_t* def_levels = this->def_levels() + levels_written_; int16_t* rep_levels = this->rep_levels() + levels_written_; - // Not present for non-repeated fields - int64_t levels_read = 0; + if (ARROW_PREDICT_FALSE(this->ReadDefinitionLevels(batch_size, def_levels) != + batch_size)) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); + } if (this->max_rep_level_ > 0) { - levels_read = this->ReadDefinitionLevels(batch_size, def_levels); - if (this->ReadRepetitionLevels(batch_size, rep_levels) != levels_read) { - throw ParquetException("Number of decoded rep / def levels did not match"); + int64_t rep_levels_read = this->ReadRepetitionLevels(batch_size, rep_levels); + if (ARROW_PREDICT_FALSE(rep_levels_read != batch_size)) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); } - } else if (this->max_def_level_ > 0) { - levels_read = this->ReadDefinitionLevels(batch_size, def_levels); } - // Exhausted column chunk - if (levels_read == 0) { - break; - } - - levels_written_ += levels_read; + levels_written_ += batch_size; records_read += ReadRecordData(num_records - records_read); } else { - // No repetition or definition levels + // No repetition and definition levels, we can read values directly batch_size = std::min(num_records - records_read, batch_size); records_read += ReadRecordData(batch_size); } @@ -1574,13 +1598,14 @@ class TypedRecordReader : public TypedColumnReaderImpl, int16_t* def_levels = this->def_levels() + levels_written_; int16_t* rep_levels = this->rep_levels() + levels_written_; - int64_t levels_read = 0; - levels_read = this->ReadDefinitionLevels(batch_size, def_levels); - if (this->ReadRepetitionLevels(batch_size, rep_levels) != levels_read) { - throw ParquetException("Number of decoded rep / def levels did not match"); + if (this->ReadDefinitionLevels(batch_size, def_levels) != batch_size) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); + } + if (this->ReadRepetitionLevels(batch_size, rep_levels) != batch_size) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); } - levels_written_ += levels_read; + levels_written_ += batch_size; int64_t remaining_records = num_records - skipped_records; // This updates at_record_start_. skipped_records += DelimitAndSkipRecordsInBuffer(remaining_records); diff --git a/cpp/src/parquet/column_reader.h b/cpp/src/parquet/column_reader.h index 086f6c0e55806..29e1b2a25e437 100644 --- a/cpp/src/parquet/column_reader.h +++ b/cpp/src/parquet/column_reader.h @@ -197,7 +197,7 @@ class PARQUET_EXPORT ColumnReader { template class TypedColumnReader : public ColumnReader { public: - typedef typename DType::c_type T; + using T = typename DType::c_type; // Read a batch of repetition levels, definition levels, and values from the // column. diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc index a48573966a905..9096f195687fb 100644 --- a/cpp/src/parquet/column_reader_test.cc +++ b/cpp/src/parquet/column_reader_test.cc @@ -415,7 +415,7 @@ TEST_F(TestPrimitiveReader, TestReadValuesMissing) { &descr, values, /*num_values=*/2, Encoding::PLAIN, /*indices=*/{}, /*indices_size=*/0, /*def_levels=*/input_def_levels, max_def_level_, /*rep_levels=*/{}, - /*max_rep_level=*/0); + /*max_rep_level=*/max_rep_level_); pages_.push_back(data_page); InitReader(&descr); auto reader = static_cast(reader_.get()); @@ -431,6 +431,80 @@ TEST_F(TestPrimitiveReader, TestReadValuesMissing) { ParquetException); } +// GH-41321: When max_def_level > 0 or max_rep_level > 0, and +// Page has more or less levels than the `num_values` in +// PageHeader. We should detect and throw exception. +TEST_F(TestPrimitiveReader, DefRepLevelNotExpected) { + auto do_check = [&](const NodePtr& type, const std::vector& input_def_levels, + const std::vector& input_rep_levels, int num_values) { + std::vector values(num_values, false); + const ColumnDescriptor descr(type, max_def_level_, max_rep_level_); + + // The data page falls back to plain encoding + std::shared_ptr dummy = AllocateBuffer(); + std::shared_ptr data_page = MakeDataPage( + &descr, values, /*num_values=*/num_values, Encoding::PLAIN, /*indices=*/{}, + /*indices_size=*/0, /*def_levels=*/input_def_levels, max_def_level_, + /*rep_levels=*/input_rep_levels, + /*max_rep_level=*/max_rep_level_); + pages_.push_back(data_page); + InitReader(&descr); + auto reader = static_cast(reader_.get()); + ASSERT_TRUE(reader->HasNext()); + + constexpr int batch_size = 10; + std::vector def_levels(batch_size, 0); + std::vector rep_levels(batch_size, 0); + bool values_out[batch_size]; + int64_t values_read; + EXPECT_THROW_THAT( + [&]() { + reader->ReadBatch(batch_size, def_levels.data(), rep_levels.data(), values_out, + &values_read); + }, + ParquetException, + ::testing::Property(&ParquetException::what, + ::testing::HasSubstr("Number of decoded rep / def levels do " + "not match num_values in page header"))); + }; + // storing def-levels less than value in page-header + { + max_def_level_ = 1; + max_rep_level_ = 0; + NodePtr type = schema::Boolean("a", Repetition::OPTIONAL); + std::vector input_def_levels(1, 1); + std::vector input_rep_levels{}; + do_check(type, input_def_levels, input_rep_levels, /*num_values=*/3); + } + // storing def-levels more than value in page-header + { + max_def_level_ = 1; + max_rep_level_ = 0; + NodePtr type = schema::Boolean("a", Repetition::OPTIONAL); + std::vector input_def_levels(2, 1); + std::vector input_rep_levels{}; + do_check(type, input_def_levels, input_rep_levels, /*num_values=*/1); + } + // storing rep-levels less than value in page-header + { + max_def_level_ = 0; + max_rep_level_ = 1; + NodePtr type = schema::Boolean("a", Repetition::REPEATED); + std::vector input_def_levels{}; + std::vector input_rep_levels(3, 0); + do_check(type, input_def_levels, input_rep_levels, /*num_values=*/4); + } + // storing rep-levels more than value in page-header + { + max_def_level_ = 0; + max_rep_level_ = 1; + NodePtr type = schema::Boolean("a", Repetition::REPEATED); + std::vector input_def_levels{}; + std::vector input_rep_levels(2, 1); + do_check(type, input_def_levels, input_rep_levels, /*num_values=*/1); + } +} + // Repetition level byte length reported in Page but Max Repetition level // is zero for the column. TEST_F(TestPrimitiveReader, TestRepetitionLvlBytesWithMaxRepetitionZero) { From e254c43c095bd6e33d07129257e11760f885f299 Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Tue, 21 May 2024 22:19:58 +0900 Subject: [PATCH 138/261] GH-41389: [Python] Expose byte_width and bit_width of ExtensionType in terms of the storage type (#41413) ### Rationale for this change This update aligns the Python API with Arrow C++ by exposing the actual byte and bit widths of extension types from their storage type. ### What changes are included in this PR? - Expose byte_width and bit_width properties for ExtensionType in Python, reflecting the underlying storage type. - Add unit tests to verify these properties ### Are these changes tested? Yes ### Are there any user-facing changes? Yes * GitHub Issue: #41389 Lead-authored-by: Hyunseok Seo Co-authored-by: Joris Van den Bossche Signed-off-by: Joris Van den Bossche --- python/pyarrow/includes/libarrow.pxd | 2 ++ python/pyarrow/tests/test_extension_type.py | 30 +++++++++++++++++++-- python/pyarrow/types.pxi | 18 +++++++++++++ 3 files changed, 48 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index f461513e8b3cf..8bfc31edc747d 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -2801,6 +2801,8 @@ cdef extern from "arrow/extension_type.h" namespace "arrow": cdef cppclass CExtensionType" arrow::ExtensionType"(CDataType): c_string extension_name() shared_ptr[CDataType] storage_type() + int byte_width() + int bit_width() @staticmethod shared_ptr[CArray] WrapArray(shared_ptr[CDataType] ext_type, diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py index fe38bf651baae..9863d96058947 100644 --- a/python/pyarrow/tests/test_extension_type.py +++ b/python/pyarrow/tests/test_extension_type.py @@ -251,14 +251,14 @@ def test_ext_type_repr(): assert repr(ty) == "IntegerType(DataType(int64))" -def test_ext_type__lifetime(): +def test_ext_type_lifetime(): ty = UuidType() wr = weakref.ref(ty) del ty assert wr() is None -def test_ext_type__storage_type(): +def test_ext_type_storage_type(): ty = UuidType() assert ty.storage_type == pa.binary(16) assert ty.__class__ is UuidType @@ -267,6 +267,32 @@ def test_ext_type__storage_type(): assert ty.__class__ is ParamExtType +def test_ext_type_byte_width(): + # Test for fixed-size binary types + ty = UuidType() + assert ty.byte_width == 16 + ty = ParamExtType(5) + assert ty.byte_width == 5 + + # Test for non fixed-size binary types + ty = LabelType() + with pytest.raises(ValueError, match="Non-fixed width type"): + _ = ty.byte_width + + +def test_ext_type_bit_width(): + # Test for fixed-size binary types + ty = UuidType() + assert ty.bit_width == 128 + ty = ParamExtType(5) + assert ty.bit_width == 40 + + # Test for non fixed-size binary types + ty = LabelType() + with pytest.raises(ValueError, match="Non-fixed width type"): + _ = ty.bit_width + + def test_ext_type_as_py(): ty = UuidType() expected = uuid4() diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index 480f19c81dfb9..5113df36557f4 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -1519,6 +1519,24 @@ cdef class BaseExtensionType(DataType): """ return pyarrow_wrap_data_type(self.ext_type.storage_type()) + @property + def byte_width(self): + """ + The byte width of the extension type. + """ + if self.ext_type.byte_width() == -1: + raise ValueError("Non-fixed width type") + return self.ext_type.byte_width() + + @property + def bit_width(self): + """ + The bit width of the extension type. + """ + if self.ext_type.bit_width() == -1: + raise ValueError("Non-fixed width type") + return self.ext_type.bit_width() + def wrap_array(self, storage): """ Wrap the given storage array as an extension array. From 34f042762061f4e302e133c2d378ea444505049e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 21 May 2024 11:25:44 -0400 Subject: [PATCH 139/261] MINOR: [Go] Bump github.com/hamba/avro/v2 from 2.21.1 to 2.22.0 in /go (#41743) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [github.com/hamba/avro/v2](https://github.com/hamba/avro) from 2.21.1 to 2.22.0.

Release notes

Sourced from github.com/hamba/avro/v2's releases.

v2.22.0

What's Changed

New Contributors

Full Changelog: https://github.com/hamba/avro/compare/v2.21.1...v2.22.0

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/hamba/avro/v2&package-manager=go_modules&previous-version=2.21.1&new-version=2.22.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Matt Topol --- go/go.mod | 2 +- go/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go/go.mod b/go/go.mod index 8fdfea3dbe5eb..e846c61033f47 100644 --- a/go/go.mod +++ b/go/go.mod @@ -47,7 +47,7 @@ require ( require ( github.com/google/uuid v1.6.0 - github.com/hamba/avro/v2 v2.21.1 + github.com/hamba/avro/v2 v2.22.0 github.com/substrait-io/substrait-go v0.4.2 github.com/tidwall/sjson v1.2.5 ) diff --git a/go/go.sum b/go/go.sum index c2db1a72ccf2d..6bceb4e5877ca 100644 --- a/go/go.sum +++ b/go/go.sum @@ -43,8 +43,8 @@ github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbu github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26/go.mod h1:dDKJzRmX4S37WGHujM7tX//fmj1uioxKzKxz3lo4HJo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/hamba/avro/v2 v2.21.1 h1:400/jTdLWQ3ib58y83VXlTJKijRouYQszY1SO0cMGt4= -github.com/hamba/avro/v2 v2.21.1/go.mod h1:ouJ4PkiAEP49u0lAtQyd5Gv04MehKj+7lXwD3zpLpY0= +github.com/hamba/avro/v2 v2.22.0 h1:IaBMFv5xmjo38f0oaP9jZiJFXg+lmHPPg7d9YotMnPg= +github.com/hamba/avro/v2 v2.22.0/go.mod h1:HOeTrE3kvWnBAgsufqhAzDDV5gvS0QXs65Z6BHfGgbg= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= From 28ab4afef423613c20cbe4171c29f9dad258b136 Mon Sep 17 00:00:00 2001 From: ZhangHuiGui <106943008+ZhangHuiGui@users.noreply.github.com> Date: Tue, 21 May 2024 11:56:25 -0400 Subject: [PATCH 140/261] GH-41035: [C++] Add a grouper benchmark for preventing performance regression (#41036) ### Rationale for this change Add a grouper benchmark for preventing performance regression . https://github.com/apache/arrow/pull/40998#issuecomment-2039204161. ### What changes are included in this PR? Added a benchmark. ### Are these changes tested? Needn't. ### Are there any user-facing changes? No * GitHub Issue: #41035 Authored-by: ZhangHuiGui Signed-off-by: Antoine Pitrou --- cpp/src/arrow/compute/row/CMakeLists.txt | 2 + .../arrow/compute/row/grouper_benchmark.cc | 156 ++++++++++++++++++ 2 files changed, 158 insertions(+) create mode 100644 cpp/src/arrow/compute/row/grouper_benchmark.cc diff --git a/cpp/src/arrow/compute/row/CMakeLists.txt b/cpp/src/arrow/compute/row/CMakeLists.txt index 6ae982dbaf3a7..ef03c767f974e 100644 --- a/cpp/src/arrow/compute/row/CMakeLists.txt +++ b/cpp/src/arrow/compute/row/CMakeLists.txt @@ -19,3 +19,5 @@ # in a row-major order. arrow_install_all_headers("arrow/compute/row") + +add_arrow_benchmark(grouper_benchmark PREFIX "arrow-compute") diff --git a/cpp/src/arrow/compute/row/grouper_benchmark.cc b/cpp/src/arrow/compute/row/grouper_benchmark.cc new file mode 100644 index 0000000000000..1e1a16d579009 --- /dev/null +++ b/cpp/src/arrow/compute/row/grouper_benchmark.cc @@ -0,0 +1,156 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "arrow/util/key_value_metadata.h" +#include "arrow/util/string.h" + +#include "arrow/compute/row/grouper.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/testing/random.h" +#include "arrow/util/benchmark_util.h" + +namespace arrow { +namespace compute { + +constexpr auto kSeed = 0x0ff1ce; +constexpr int64_t kRound = 16; +constexpr double true_and_unique_probability = 0.2; + +static ExecBatch MakeRandomExecBatch(const DataTypeVector& types, int64_t num_rows, + double null_probability, + int64_t alignment = kDefaultBufferAlignment, + MemoryPool* memory_pool = nullptr) { + random::RandomArrayGenerator rng(kSeed); + auto num_types = static_cast(types.size()); + + // clang-format off + // For unique probability: + // The proportion of Unique determines the number of groups. + // 1. In most scenarios, unique has a small proportion and exists + // 2. In GroupBy/HashJoin are sometimes used for deduplication and + // in that use case the key is mostly unique + auto metadata = key_value_metadata( + { + "null_probability", + "true_probability", // for boolean type + "unique" // for string type + }, + { + internal::ToChars(null_probability), + internal::ToChars(true_and_unique_probability), + internal::ToChars(static_cast(num_rows * + true_and_unique_probability)) + }); + // clang-format on + + std::vector values; + values.resize(num_types); + for (int i = 0; i < num_types; ++i) { + auto field = ::arrow::field("", types[i], metadata); + values[i] = rng.ArrayOf(*field, num_rows, alignment, memory_pool); + } + + return ExecBatch(std::move(values), num_rows); +} + +static void GrouperBenchmark(benchmark::State& state, const ExecSpan& span, + ExecContext* ctx = nullptr) { + uint32_t num_groups = 0; + for (auto _ : state) { + ASSIGN_OR_ABORT(auto grouper, Grouper::Make(span.GetTypes(), ctx)); + for (int i = 0; i < kRound; ++i) { + ASSIGN_OR_ABORT(auto group_ids, grouper->Consume(span)); + } + num_groups = grouper->num_groups(); + } + + state.SetItemsProcessed(state.iterations() * kRound * span.length); + state.counters["num_groups"] = num_groups; + state.counters["uniqueness"] = static_cast(num_groups) / (kRound * span.length); +} + +static void GrouperWithMultiTypes(benchmark::State& state, const DataTypeVector& types) { + auto ctx = default_exec_context(); + + RegressionArgs args(state, false); + const int64_t num_rows = args.size; + const double null_proportion = args.null_proportion; + + auto exec_batch = MakeRandomExecBatch(types, num_rows, null_proportion, + kDefaultBufferAlignment, ctx->memory_pool()); + ExecSpan exec_span(exec_batch); + ASSIGN_OR_ABORT(auto grouper, Grouper::Make(exec_span.GetTypes(), ctx)); + GrouperBenchmark(state, exec_span, ctx); +} + +void SetArgs(benchmark::internal::Benchmark* bench) { + BenchmarkSetArgsWithSizes(bench, {1 << 10, 1 << 12}); +} + +// This benchmark is mainly to ensure that the construction of our underlying +// RowTable and the performance of the comparison operations in the lower-level +// compare_internal can be tracked (we have not systematically tested these +// underlying operations before). +// +// It mainly covers: +// 1. Basics types, including the impact of null ratio on performance (comparison +// operations will compare null values separately.) +// +// 2. Combination types which will break the CPU-pipeline in column comparision. +// Examples: https://github.com/apache/arrow/pull/41036#issuecomment-2048721547 +// +// 3. Combination types requiring column resorted. These combinations are +// essentially to test the impact of RowTableEncoder's sorting function on +// input columns on the performance of CompareColumnsToRows +// Examples: https://github.com/apache/arrow/pull/40998#issuecomment-2039204161 + +// basic types +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{boolean}", {boolean()})->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{int32}", {int32()})->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{int64}", {int64()})->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{utf8}", {utf8()})->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{fixed_size_binary(32)}", + {fixed_size_binary(32)}) + ->Apply(SetArgs); + +// combination types +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{boolean, utf8}", {boolean(), utf8()}) + ->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{int32, int32}", {int32(), int32()}) + ->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{int64, int32}", {int64(), int32()}) + ->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{boolean, int64, utf8}", + {boolean(), int64(), utf8()}) + ->Apply(SetArgs); + +// combination types requiring column resorted +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{int32, boolean, utf8}", + {int32(), boolean(), utf8()}) + ->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{int32, int64, boolean, utf8}", + {int32(), int64(), boolean(), utf8()}) + ->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, + "{utf8, int32, int64, fixed_size_binary(32), boolean}", + {utf8(), int32(), int64(), fixed_size_binary(32), boolean()}) + ->Apply(SetArgs); + +} // namespace compute +} // namespace arrow From f0678ec6031225d6ac939c68295813de8a628b28 Mon Sep 17 00:00:00 2001 From: Jacob Wujciak-Jens Date: Tue, 21 May 2024 20:26:27 +0100 Subject: [PATCH 141/261] GH-41426: [R][CI] Install CRAN style openssl on gh runners. (#41629) ### Rationale for this change See issue. ### What changes are included in this PR? Enforce usage of binary and install of cran openssl version on intel and arm macos. ### Are these changes tested? Crossbow * GitHub Issue: #41426 Authored-by: Jacob Wujciak-Jens Signed-off-by: Jacob Wujciak-Jens --- dev/tasks/r/github.packages.yml | 18 ++++++++++++++---- r/inst/build_arrow_static.sh | 1 + 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml index 41d8b230f8bf4..0539eae6cc9d9 100644 --- a/dev/tasks/r/github.packages.yml +++ b/dev/tasks/r/github.packages.yml @@ -227,11 +227,20 @@ jobs: working-directory: 'arrow' extra-packages: cpp11 - name: Set CRAN like openssl - if: contains(matrix.platform.name, 'arm64') + if: contains(matrix.platform.name, 'macOS') + # The -E forwards the GITHUB_* envvars + shell: sudo -E Rscript {0} run: | - # The arm64 runners contain openssl 1.1.1t in this path that is always included first so we need to override the - # default setting of the brew --prefix as root dir to avoid version conflicts. - echo "OPENSSL_ROOT_DIR=/opt/R/arm64" >> $GITHUB_ENV + # get the mac-recipes version of openssl from CRAN + source("https://mac.R-project.org/bin/install.R") + install.libs("openssl") + + # override our cmakes default setting of the brew --prefix as root dir to avoid version conflicts. + if (Sys.info()[["machine"]] == "arm64"){ + cat("OPENSSL_ROOT_DIR=/opt/R/arm64\n", file=Sys.getenv("GITHUB_ENV"), append = TRUE) + } else { + cat("OPENSSL_ROOT_DIR=/opt/R/x86_64\n", file=Sys.getenv("GITHUB_ENV"), append = TRUE) + } - name: Build Binary id: build shell: Rscript {0} @@ -239,6 +248,7 @@ jobs: NOT_CRAN: "false" # actions/setup-r sets this implicitly ARROW_R_DEV: "true" LIBARROW_BINARY: "true" # has to be set as long as allowlist not updated + LIBARROW_BUILD: "false" ARROW_R_ENFORCE_CHECKSUM: "true" ARROW_R_CHECKSUM_PATH: "{{ '${{ github.workspace }}' }}/repo/libarrow/bin" run: | diff --git a/r/inst/build_arrow_static.sh b/r/inst/build_arrow_static.sh index d28cbcb08fbec..825a230e78e5e 100755 --- a/r/inst/build_arrow_static.sh +++ b/r/inst/build_arrow_static.sh @@ -99,6 +99,7 @@ ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \ -DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON \ -DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON \ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \ + -DOPENSSL_ROOT_DIR=${OPENSSL_ROOT_DIR} \ -Dre2_SOURCE=${re2_SOURCE:-BUNDLED} \ -Dxsimd_SOURCE=${xsimd_SOURCE:-} \ -Dzstd_SOURCE=${zstd_SOURCE:-} \ From 8169d6e719453acd0e7ca1b6f784d800cca4f113 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 21 May 2024 15:40:16 -0400 Subject: [PATCH 142/261] GH-40078: [C++] Import/Export ArrowDeviceArrayStream (#40807) ### Rationale for this change The original PRs for adding support for importing and exporting the new C Device interface (#36488 / #36489) only added support for the Arrays themselves, not for the stream structure. We should support both. ### What changes are included in this PR? Adding parallel functions for Import/Export of streams that accept `ArrowDeviceArrayStream`. ### Are these changes tested? Test writing in progress, wanted to get this up for review while I write tests. ### Are there any user-facing changes? No, only new functions have been added. * GitHub Issue: #40078 Lead-authored-by: Matt Topol Co-authored-by: Felipe Oliveira Carvalho Co-authored-by: Benjamin Kietzman Co-authored-by: Antoine Pitrou Signed-off-by: Matt Topol --- cpp/src/arrow/array/array_base.h | 8 + cpp/src/arrow/array/array_test.cc | 5 + cpp/src/arrow/array/data.cc | 36 +++ cpp/src/arrow/array/data.h | 21 ++ cpp/src/arrow/array/util.cc | 2 +- cpp/src/arrow/c/bridge.cc | 278 ++++++++++++---- cpp/src/arrow/c/bridge.h | 61 ++++ cpp/src/arrow/c/bridge_test.cc | 516 ++++++++++++++++++++++++++++++ cpp/src/arrow/c/helpers.h | 49 +++ cpp/src/arrow/c/util_internal.h | 22 ++ cpp/src/arrow/record_batch.cc | 107 +++++-- cpp/src/arrow/record_batch.h | 43 ++- python/pyarrow/tests/test_cffi.py | 2 +- 13 files changed, 1051 insertions(+), 99 deletions(-) diff --git a/cpp/src/arrow/array/array_base.h b/cpp/src/arrow/array/array_base.h index 6411aebf80442..716ae0722069e 100644 --- a/cpp/src/arrow/array/array_base.h +++ b/cpp/src/arrow/array/array_base.h @@ -224,6 +224,14 @@ class ARROW_EXPORT Array { /// \return Status Status ValidateFull() const; + /// \brief Return the device_type that this array's data is allocated on + /// + /// This just delegates to calling device_type on the underlying ArrayData + /// object which backs this Array. + /// + /// \return DeviceAllocationType + DeviceAllocationType device_type() const { return data_->device_type(); } + protected: Array() = default; ARROW_DEFAULT_MOVE_AND_ASSIGN(Array); diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc index 7e25ad61fa2ea..32806d9d2edb3 100644 --- a/cpp/src/arrow/array/array_test.cc +++ b/cpp/src/arrow/array/array_test.cc @@ -478,6 +478,7 @@ TEST_F(TestArray, TestMakeArrayOfNull) { ASSERT_EQ(array->type(), type); ASSERT_OK(array->ValidateFull()); ASSERT_EQ(array->length(), length); + ASSERT_EQ(array->device_type(), DeviceAllocationType::kCPU); if (is_union(type->id())) { ASSERT_EQ(array->null_count(), 0); ASSERT_EQ(array->ComputeLogicalNullCount(), length); @@ -719,6 +720,7 @@ TEST_F(TestArray, TestMakeArrayFromScalar) { ASSERT_OK(array->ValidateFull()); ASSERT_EQ(array->length(), length); ASSERT_EQ(array->null_count(), 0); + ASSERT_EQ(array->device_type(), DeviceAllocationType::kCPU); // test case for ARROW-13321 for (int64_t i : {int64_t{0}, length / 2, length - 1}) { @@ -744,6 +746,7 @@ TEST_F(TestArray, TestMakeArrayFromScalarSliced) { auto sliced = array->Slice(1, 4); ASSERT_EQ(sliced->length(), 4); ASSERT_EQ(sliced->null_count(), 0); + ASSERT_EQ(array->device_type(), DeviceAllocationType::kCPU); ARROW_EXPECT_OK(sliced->ValidateFull()); } } @@ -758,6 +761,7 @@ TEST_F(TestArray, TestMakeArrayFromDictionaryScalar) { ASSERT_OK(array->ValidateFull()); ASSERT_EQ(array->length(), 4); ASSERT_EQ(array->null_count(), 0); + ASSERT_EQ(array->device_type(), DeviceAllocationType::kCPU); for (int i = 0; i < 4; i++) { ASSERT_OK_AND_ASSIGN(auto item, array->GetScalar(i)); @@ -797,6 +801,7 @@ TEST_F(TestArray, TestMakeEmptyArray) { ASSERT_OK_AND_ASSIGN(auto array, MakeEmptyArray(type)); ASSERT_OK(array->ValidateFull()); ASSERT_EQ(array->length(), 0); + CheckSpanRoundTrip(*array); } } diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc index ac828a9c35c67..76a43521394c1 100644 --- a/cpp/src/arrow/array/data.cc +++ b/cpp/src/arrow/array/data.cc @@ -224,6 +224,42 @@ int64_t ArrayData::ComputeLogicalNullCount() const { return ArraySpan(*this).ComputeLogicalNullCount(); } +DeviceAllocationType ArrayData::device_type() const { + // we're using 0 as a sentinel value for NOT YET ASSIGNED + // there is explicitly no constant DeviceAllocationType to represent + // the "UNASSIGNED" case as it is invalid for data to not have an + // assigned device type. If it's still 0 at the end, then we return + // CPU as the allocation device type + int type = 0; + for (const auto& buf : buffers) { + if (!buf) continue; + if (type == 0) { + type = static_cast(buf->device_type()); + } else { + DCHECK_EQ(type, static_cast(buf->device_type())); + } + } + + for (const auto& child : child_data) { + if (!child) continue; + if (type == 0) { + type = static_cast(child->device_type()); + } else { + DCHECK_EQ(type, static_cast(child->device_type())); + } + } + + if (dictionary) { + if (type == 0) { + type = static_cast(dictionary->device_type()); + } else { + DCHECK_EQ(type, static_cast(dictionary->device_type())); + } + } + + return type == 0 ? DeviceAllocationType::kCPU : static_cast(type); +} + // ---------------------------------------------------------------------- // Methods for ArraySpan diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h index beec29789ad1e..0c49f36229a40 100644 --- a/cpp/src/arrow/array/data.h +++ b/cpp/src/arrow/array/data.h @@ -101,6 +101,11 @@ struct ARROW_EXPORT ArrayData { int64_t null_count = kUnknownNullCount, int64_t offset = 0) : ArrayData(std::move(type), length, null_count, offset) { this->buffers = std::move(buffers); +#ifndef NDEBUG + // in debug mode, call the `device_type` function to trigger + // the DCHECKs that validate all the buffers are on the same device + ARROW_UNUSED(this->device_type()); +#endif } ArrayData(std::shared_ptr type, int64_t length, @@ -110,6 +115,12 @@ struct ARROW_EXPORT ArrayData { : ArrayData(std::move(type), length, null_count, offset) { this->buffers = std::move(buffers); this->child_data = std::move(child_data); +#ifndef NDEBUG + // in debug mode, call the `device_type` function to trigger + // the DCHECKs that validate all the buffers (including children) + // are on the same device + ARROW_UNUSED(this->device_type()); +#endif } static std::shared_ptr Make(std::shared_ptr type, int64_t length, @@ -358,6 +369,16 @@ struct ARROW_EXPORT ArrayData { /// \see GetNullCount int64_t ComputeLogicalNullCount() const; + /// \brief Returns the device_type of the underlying buffers and children + /// + /// If there are no buffers in this ArrayData object, it just returns + /// DeviceAllocationType::kCPU as a default. We also assume that all buffers + /// should be allocated on the same device type and perform DCHECKs to confirm + /// this in debug mode. + /// + /// \return DeviceAllocationType + DeviceAllocationType device_type() const; + std::shared_ptr type; int64_t length = 0; mutable std::atomic null_count{0}; diff --git a/cpp/src/arrow/array/util.cc b/cpp/src/arrow/array/util.cc index bdba92c9a11fb..41cd6a1c0b260 100644 --- a/cpp/src/arrow/array/util.cc +++ b/cpp/src/arrow/array/util.cc @@ -548,7 +548,7 @@ class NullArrayFactory { } Status Visit(const StructType& type) { - for (int i = 0; i < type_->num_fields(); ++i) { + for (int i = 0; i < type.num_fields(); ++i) { ARROW_ASSIGN_OR_RAISE(out_->child_data[i], CreateChild(type, i, length_)); } return Status::OK(); diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc index 8a530b3798d41..8c5e3637b6e86 100644 --- a/cpp/src/arrow/c/bridge.cc +++ b/cpp/src/arrow/c/bridge.cc @@ -1448,6 +1448,7 @@ namespace { // The ArrowArray is released on destruction. struct ImportedArrayData { struct ArrowArray array_; + DeviceAllocationType device_type_; std::shared_ptr device_sync_; ImportedArrayData() { @@ -1514,6 +1515,7 @@ struct ArrayImporter { recursion_level_ = 0; import_ = std::make_shared(); c_struct_ = &import_->array_; + import_->device_type_ = device_type_; ArrowArrayMove(src, c_struct_); return DoImport(); } @@ -1541,7 +1543,8 @@ struct ArrayImporter { "cannot be imported as RecordBatch"); } return RecordBatch::Make(std::move(schema), data_->length, - std::move(data_->child_data)); + std::move(data_->child_data), import_->device_type_, + import_->device_sync_); } Status ImportChild(const ArrayImporter* parent, struct ArrowArray* src) { @@ -2041,6 +2044,23 @@ Status ExportStreamNext(const std::shared_ptr& src, int64_t i } } +// the int64_t i input here is unused, but exists simply to allow utilizing the +// overload of this with the version for ChunkedArrays. If we removed the int64_t +// from the signature despite it being unused, we wouldn't be able to leverage the +// overloading in the templated exporters. +Status ExportStreamNext(const std::shared_ptr& src, int64_t i, + struct ArrowDeviceArray* out_array) { + std::shared_ptr batch; + RETURN_NOT_OK(src->ReadNext(&batch)); + if (batch == nullptr) { + // End of stream + ArrowArrayMarkReleased(&out_array->array); + return Status::OK(); + } else { + return ExportDeviceRecordBatch(*batch, batch->GetSyncEvent(), out_array); + } +} + Status ExportStreamNext(const std::shared_ptr& src, int64_t i, struct ArrowArray* out_array) { if (i >= src->num_chunks()) { @@ -2052,8 +2072,27 @@ Status ExportStreamNext(const std::shared_ptr& src, int64_t i, } } -template +Status ExportStreamNext(const std::shared_ptr& src, int64_t i, + struct ArrowDeviceArray* out_array) { + if (i >= src->num_chunks()) { + // End of stream + ArrowArrayMarkReleased(&out_array->array); + return Status::OK(); + } else { + return ExportDeviceArray(*src->chunk(static_cast(i)), nullptr, out_array); + } +} + +template class ExportedArrayStream { + using StreamTraits = + std::conditional_t; + using StreamType = typename StreamTraits::CType; + using ArrayTraits = std::conditional_t; + using ArrayType = typename ArrayTraits::CType; + public: struct PrivateData { explicit PrivateData(std::shared_ptr reader) @@ -2067,13 +2106,13 @@ class ExportedArrayStream { ARROW_DISALLOW_COPY_AND_ASSIGN(PrivateData); }; - explicit ExportedArrayStream(struct ArrowArrayStream* stream) : stream_(stream) {} + explicit ExportedArrayStream(StreamType* stream) : stream_(stream) {} Status GetSchema(struct ArrowSchema* out_schema) { return ExportStreamSchema(reader(), out_schema); } - Status GetNext(struct ArrowArray* out_array) { + Status GetNext(ArrayType* out_array) { return ExportStreamNext(reader(), next_batch_num(), out_array); } @@ -2083,38 +2122,35 @@ class ExportedArrayStream { } void Release() { - if (ArrowArrayStreamIsReleased(stream_)) { + if (StreamTraits::IsReleasedFunc(stream_)) { return; } + DCHECK_NE(private_data(), nullptr); delete private_data(); - ArrowArrayStreamMarkReleased(stream_); + StreamTraits::MarkReleased(stream_); } // C-compatible callbacks - static int StaticGetSchema(struct ArrowArrayStream* stream, - struct ArrowSchema* out_schema) { + static int StaticGetSchema(StreamType* stream, struct ArrowSchema* out_schema) { ExportedArrayStream self{stream}; return self.ToCError(self.GetSchema(out_schema)); } - static int StaticGetNext(struct ArrowArrayStream* stream, - struct ArrowArray* out_array) { + static int StaticGetNext(StreamType* stream, ArrayType* out_array) { ExportedArrayStream self{stream}; return self.ToCError(self.GetNext(out_array)); } - static void StaticRelease(struct ArrowArrayStream* stream) { - ExportedArrayStream{stream}.Release(); - } + static void StaticRelease(StreamType* stream) { ExportedArrayStream{stream}.Release(); } - static const char* StaticGetLastError(struct ArrowArrayStream* stream) { + static const char* StaticGetLastError(StreamType* stream) { return ExportedArrayStream{stream}.GetLastError(); } - static Status Make(std::shared_ptr reader, struct ArrowArrayStream* out) { + static Status Make(std::shared_ptr reader, StreamType* out) { out->get_schema = ExportedArrayStream::StaticGetSchema; out->get_next = ExportedArrayStream::StaticGetNext; out->get_last_error = ExportedArrayStream::StaticGetLastError; @@ -2150,19 +2186,36 @@ class ExportedArrayStream { int64_t next_batch_num() { return private_data()->batch_num_++; } - struct ArrowArrayStream* stream_; + StreamType* stream_; }; } // namespace Status ExportRecordBatchReader(std::shared_ptr reader, struct ArrowArrayStream* out) { - return ExportedArrayStream::Make(std::move(reader), out); + memset(out, 0, sizeof(struct ArrowArrayStream)); + return ExportedArrayStream::Make(std::move(reader), out); } Status ExportChunkedArray(std::shared_ptr chunked_array, struct ArrowArrayStream* out) { - return ExportedArrayStream::Make(std::move(chunked_array), out); + memset(out, 0, sizeof(struct ArrowArrayStream)); + return ExportedArrayStream::Make(std::move(chunked_array), out); +} + +Status ExportDeviceRecordBatchReader(std::shared_ptr reader, + struct ArrowDeviceArrayStream* out) { + memset(out, 0, sizeof(struct ArrowDeviceArrayStream)); + out->device_type = static_cast(reader->device_type()); + return ExportedArrayStream::Make(std::move(reader), out); +} + +Status ExportDeviceChunkedArray(std::shared_ptr chunked_array, + DeviceAllocationType device_type, + struct ArrowDeviceArrayStream* out) { + memset(out, 0, sizeof(struct ArrowDeviceArrayStream)); + out->device_type = static_cast(device_type); + return ExportedArrayStream::Make(std::move(chunked_array), out); } ////////////////////////////////////////////////////////////////////////// @@ -2170,33 +2223,65 @@ Status ExportChunkedArray(std::shared_ptr chunked_array, namespace { +template class ArrayStreamReader { + protected: + using StreamTraits = + std::conditional_t; + using StreamType = typename StreamTraits::CType; + using ArrayTraits = std::conditional_t; + using ArrayType = typename ArrayTraits::CType; + public: - explicit ArrayStreamReader(struct ArrowArrayStream* stream) { - ArrowArrayStreamMove(stream, &stream_); - DCHECK(!ArrowArrayStreamIsReleased(&stream_)); + explicit ArrayStreamReader(StreamType* stream, + const DeviceMemoryMapper mapper = DefaultDeviceMemoryMapper) + : mapper_{std::move(mapper)} { + StreamTraits::MoveFunc(stream, &stream_); + DCHECK(!StreamTraits::IsReleasedFunc(&stream_)); } ~ArrayStreamReader() { ReleaseStream(); } void ReleaseStream() { - if (!ArrowArrayStreamIsReleased(&stream_)) { - ArrowArrayStreamRelease(&stream_); - } - DCHECK(ArrowArrayStreamIsReleased(&stream_)); + // all our trait release funcs check IsReleased so we don't + // need to repeat it here + StreamTraits::ReleaseFunc(&stream_); + DCHECK(StreamTraits::IsReleasedFunc(&stream_)); } protected: - Status ReadNextArrayInternal(struct ArrowArray* array) { - ArrowArrayMarkReleased(array); + Status ReadNextArrayInternal(ArrayType* array) { + ArrayTraits::MarkReleased(array); Status status = StatusFromCError(stream_.get_next(&stream_, array)); - if (!status.ok() && !ArrowArrayIsReleased(array)) { - ArrowArrayRelease(array); + if (!status.ok()) { + ArrayTraits::ReleaseFunc(array); } return status; } + Result> ImportRecordBatchInternal( + struct ArrowArray* array, std::shared_ptr schema) { + return ImportRecordBatch(array, schema); + } + + Result> ImportRecordBatchInternal( + struct ArrowDeviceArray* array, std::shared_ptr schema) { + return ImportDeviceRecordBatch(array, schema, mapper_); + } + + Result> ImportArrayInternal( + struct ArrowArray* array, std::shared_ptr type) { + return ImportArray(array, type); + } + + Result> ImportArrayInternal( + struct ArrowDeviceArray* array, std::shared_ptr type) { + return ImportDeviceArray(array, type, mapper_); + } + Result> ReadSchema() { struct ArrowSchema c_schema = {}; ARROW_RETURN_NOT_OK( @@ -2214,19 +2299,19 @@ class ArrayStreamReader { } Status CheckNotReleased() { - if (ArrowArrayStreamIsReleased(&stream_)) { + if (StreamTraits::IsReleasedFunc(&stream_)) { return Status::Invalid( "Attempt to read from a stream that has already been closed"); - } else { - return Status::OK(); } + + return Status::OK(); } Status StatusFromCError(int errno_like) const { return StatusFromCError(&stream_, errno_like); } - static Status StatusFromCError(struct ArrowArrayStream* stream, int errno_like) { + static Status StatusFromCError(StreamType* stream, int errno_like) { if (ARROW_PREDICT_TRUE(errno_like == 0)) { return Status::OK(); } @@ -2250,70 +2335,102 @@ class ArrayStreamReader { return {code, last_error ? std::string(last_error) : ""}; } + DeviceAllocationType get_device_type() const { + if constexpr (IsDevice) { + return static_cast(stream_.device_type); + } else { + return DeviceAllocationType::kCPU; + } + } + private: - mutable struct ArrowArrayStream stream_; + mutable StreamType stream_; + const DeviceMemoryMapper mapper_; }; -class ArrayStreamBatchReader : public RecordBatchReader, public ArrayStreamReader { +template +class ArrayStreamBatchReader : public RecordBatchReader, + public ArrayStreamReader { + using StreamTraits = + std::conditional_t; + using StreamType = typename StreamTraits::CType; + using ArrayTraits = std::conditional_t; + using ArrayType = typename ArrayTraits::CType; + public: - explicit ArrayStreamBatchReader(struct ArrowArrayStream* stream) - : ArrayStreamReader(stream) {} + explicit ArrayStreamBatchReader( + StreamType* stream, const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper) + : ArrayStreamReader(stream, mapper) {} Status Init() { - ARROW_ASSIGN_OR_RAISE(schema_, ReadSchema()); + ARROW_ASSIGN_OR_RAISE(schema_, this->ReadSchema()); return Status::OK(); } std::shared_ptr schema() const override { return schema_; } Status ReadNext(std::shared_ptr* batch) override { - ARROW_RETURN_NOT_OK(CheckNotReleased()); + ARROW_RETURN_NOT_OK(this->CheckNotReleased()); - struct ArrowArray c_array; - ARROW_RETURN_NOT_OK(ReadNextArrayInternal(&c_array)); + ArrayType c_array; + ARROW_RETURN_NOT_OK(this->ReadNextArrayInternal(&c_array)); - if (ArrowArrayIsReleased(&c_array)) { + if (ArrayTraits::IsReleasedFunc(&c_array)) { // End of stream batch->reset(); return Status::OK(); } else { - return ImportRecordBatch(&c_array, schema_).Value(batch); + return this->ImportRecordBatchInternal(&c_array, schema_).Value(batch); } } Status Close() override { - ReleaseStream(); + this->ReleaseStream(); return Status::OK(); } + DeviceAllocationType device_type() const override { return this->get_device_type(); } + private: std::shared_ptr schema_; }; -class ArrayStreamArrayReader : public ArrayStreamReader { +template +class ArrayStreamArrayReader : public ArrayStreamReader { + using StreamTraits = + std::conditional_t; + using StreamType = typename StreamTraits::CType; + using ArrayTraits = std::conditional_t; + using ArrayType = typename ArrayTraits::CType; + public: - explicit ArrayStreamArrayReader(struct ArrowArrayStream* stream) - : ArrayStreamReader(stream) {} + explicit ArrayStreamArrayReader( + StreamType* stream, const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper) + : ArrayStreamReader(stream, mapper) {} Status Init() { - ARROW_ASSIGN_OR_RAISE(field_, ReadField()); + ARROW_ASSIGN_OR_RAISE(field_, this->ReadField()); return Status::OK(); } std::shared_ptr data_type() const { return field_->type(); } Status ReadNext(std::shared_ptr* array) { - ARROW_RETURN_NOT_OK(CheckNotReleased()); + ARROW_RETURN_NOT_OK(this->CheckNotReleased()); - struct ArrowArray c_array; - ARROW_RETURN_NOT_OK(ReadNextArrayInternal(&c_array)); + ArrayType c_array; + ARROW_RETURN_NOT_OK(this->ReadNextArrayInternal(&c_array)); - if (ArrowArrayIsReleased(&c_array)) { + if (ArrayTraits::IsReleasedFunc(&c_array)) { // End of stream array->reset(); return Status::OK(); } else { - return ImportArray(&c_array, field_->type()).Value(array); + return this->ImportArrayInternal(&c_array, field_->type()).Value(array); } } @@ -2321,30 +2438,35 @@ class ArrayStreamArrayReader : public ArrayStreamReader { std::shared_ptr field_; }; -} // namespace - -Result> ImportRecordBatchReader( - struct ArrowArrayStream* stream) { - if (ArrowArrayStreamIsReleased(stream)) { - return Status::Invalid("Cannot import released ArrowArrayStream"); +template > +Result> ImportReader( + typename StreamTraits::CType* stream, + const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper) { + if (StreamTraits::IsReleasedFunc(stream)) { + return Status::Invalid("Cannot import released Arrow Stream"); } - auto reader = std::make_shared(stream); + auto reader = std::make_shared>(stream, mapper); ARROW_RETURN_NOT_OK(reader->Init()); return reader; } -Result> ImportChunkedArray( - struct ArrowArrayStream* stream) { - if (ArrowArrayStreamIsReleased(stream)) { - return Status::Invalid("Cannot import released ArrowArrayStream"); +template > +Result> ImportChunked( + typename StreamTraits::CType* stream, + const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper) { + if (StreamTraits::IsReleasedFunc(stream)) { + return Status::Invalid("Cannot import released Arrow Stream"); } - auto reader = std::make_shared(stream); + auto reader = std::make_shared>(stream, mapper); ARROW_RETURN_NOT_OK(reader->Init()); - std::shared_ptr data_type = reader->data_type(); - + auto data_type = reader->data_type(); ArrayVector chunks; std::shared_ptr chunk; while (true) { @@ -2360,4 +2482,26 @@ Result> ImportChunkedArray( return ChunkedArray::Make(std::move(chunks), std::move(data_type)); } +} // namespace + +Result> ImportRecordBatchReader( + struct ArrowArrayStream* stream) { + return ImportReader(stream); +} + +Result> ImportDeviceRecordBatchReader( + struct ArrowDeviceArrayStream* stream, const DeviceMemoryMapper& mapper) { + return ImportReader(stream, mapper); +} + +Result> ImportChunkedArray( + struct ArrowArrayStream* stream) { + return ImportChunked(stream); +} + +Result> ImportDeviceChunkedArray( + struct ArrowDeviceArrayStream* stream, const DeviceMemoryMapper& mapper) { + return ImportChunked(stream, mapper); +} + } // namespace arrow diff --git a/cpp/src/arrow/c/bridge.h b/cpp/src/arrow/c/bridge.h index 74a302be4c27d..45367e4f93062 100644 --- a/cpp/src/arrow/c/bridge.h +++ b/cpp/src/arrow/c/bridge.h @@ -321,6 +321,31 @@ ARROW_EXPORT Status ExportChunkedArray(std::shared_ptr chunked_array, struct ArrowArrayStream* out); +/// \brief Export C++ RecordBatchReader using the C device stream interface +/// +/// The resulting ArrowDeviceArrayStream struct keeps the record batch reader +/// alive until its release callback is called by the consumer. The device +/// type is determined by calling device_type() on the RecordBatchReader. +/// +/// \param[in] reader RecordBatchReader object to export +/// \param[out] out C struct to export the stream to +ARROW_EXPORT +Status ExportDeviceRecordBatchReader(std::shared_ptr reader, + struct ArrowDeviceArrayStream* out); + +/// \brief Export C++ ChunkedArray using the C device data interface format. +/// +/// The resulting ArrowDeviceArrayStream keeps the chunked array data and buffers +/// alive until its release callback is called by the consumer. +/// +/// \param[in] chunked_array ChunkedArray object to export +/// \param[in] device_type the device type the data is located on +/// \param[out] out C struct to export the stream to +ARROW_EXPORT +Status ExportDeviceChunkedArray(std::shared_ptr chunked_array, + DeviceAllocationType device_type, + struct ArrowDeviceArrayStream* out); + /// \brief Import C++ RecordBatchReader from the C stream interface. /// /// The ArrowArrayStream struct has its contents moved to a private object @@ -343,6 +368,42 @@ Result> ImportRecordBatchReader( ARROW_EXPORT Result> ImportChunkedArray(struct ArrowArrayStream* stream); +/// \brief Import C++ RecordBatchReader from the C device stream interface +/// +/// The ArrowDeviceArrayStream struct has its contents moved to a private object +/// held alive by the resulting record batch reader. +/// +/// \note If there was a required sync event, sync events are accessible by individual +/// buffers of columns. We are not yet bubbling the sync events from the buffers up to +/// the `GetSyncEvent` method of an imported RecordBatch. This will be added in a future +/// update. +/// +/// \param[in,out] stream C device stream interface struct +/// \param[in] mapper mapping from device type and ID to memory manager +/// \return Imported RecordBatchReader object +ARROW_EXPORT +Result> ImportDeviceRecordBatchReader( + struct ArrowDeviceArrayStream* stream, + const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper); + +/// \brief Import C++ ChunkedArray from the C device stream interface +/// +/// The ArrowDeviceArrayStream struct has its contents moved to a private object, +/// is consumed in its entirety, and released before returning all chunks as a +/// ChunkedArray. +/// +/// \note Any chunks that require synchronization for their device memory will have +/// the SyncEvent objects available by checking the individual buffers of each chunk. +/// These SyncEvents should be checked before accessing the data in those buffers. +/// +/// \param[in,out] stream C device stream interface struct +/// \param[in] mapper mapping from device type and ID to memory manager +/// \return Imported ChunkedArray object +ARROW_EXPORT +Result> ImportDeviceChunkedArray( + struct ArrowDeviceArrayStream* stream, + const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper); + /// @} } // namespace arrow diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc index d64fe67accde0..0ecfb5a957760 100644 --- a/cpp/src/arrow/c/bridge_test.cc +++ b/cpp/src/arrow/c/bridge_test.cc @@ -53,11 +53,15 @@ namespace arrow { +using internal::ArrayDeviceExportTraits; +using internal::ArrayDeviceStreamExportTraits; using internal::ArrayExportGuard; using internal::ArrayExportTraits; using internal::ArrayStreamExportGuard; using internal::ArrayStreamExportTraits; using internal::checked_cast; +using internal::DeviceArrayExportGuard; +using internal::DeviceArrayStreamExportGuard; using internal::SchemaExportGuard; using internal::SchemaExportTraits; using internal::Zip; @@ -4746,4 +4750,516 @@ TEST_F(TestArrayStreamRoundtrip, ChunkedArrayRoundtripEmpty) { }); } +//////////////////////////////////////////////////////////////////////////// +// Array device stream export tests + +class TestArrayDeviceStreamExport : public BaseArrayStreamTest { + public: + void AssertStreamSchema(struct ArrowDeviceArrayStream* c_stream, + const Schema& expected) { + struct ArrowSchema c_schema; + ASSERT_EQ(0, c_stream->get_schema(c_stream, &c_schema)); + + SchemaExportGuard schema_guard(&c_schema); + ASSERT_FALSE(ArrowSchemaIsReleased(&c_schema)); + ASSERT_OK_AND_ASSIGN(auto schema, ImportSchema(&c_schema)); + AssertSchemaEqual(expected, *schema, /*check_metadata=*/true); + } + + void AssertStreamEnd(struct ArrowDeviceArrayStream* c_stream) { + struct ArrowDeviceArray c_array; + ASSERT_EQ(0, c_stream->get_next(c_stream, &c_array)); + + DeviceArrayExportGuard guard(&c_array); + ASSERT_TRUE(ArrowDeviceArrayIsReleased(&c_array)); + } + + void AssertStreamNext(struct ArrowDeviceArrayStream* c_stream, + const RecordBatch& expected) { + struct ArrowDeviceArray c_array; + ASSERT_EQ(0, c_stream->get_next(c_stream, &c_array)); + + DeviceArrayExportGuard guard(&c_array); + ASSERT_FALSE(ArrowDeviceArrayIsReleased(&c_array)); + + ASSERT_OK_AND_ASSIGN(auto batch, + ImportDeviceRecordBatch(&c_array, expected.schema(), + TestDeviceArrayRoundtrip::DeviceMapper)); + AssertBatchesEqual(expected, *batch); + } + + void AssertStreamNext(struct ArrowDeviceArrayStream* c_stream, const Array& expected) { + struct ArrowDeviceArray c_array; + ASSERT_EQ(0, c_stream->get_next(c_stream, &c_array)); + + DeviceArrayExportGuard guard(&c_array); + ASSERT_FALSE(ArrowDeviceArrayIsReleased(&c_array)); + + ASSERT_OK_AND_ASSIGN(auto array, + ImportDeviceArray(&c_array, expected.type(), + TestDeviceArrayRoundtrip::DeviceMapper)); + AssertArraysEqual(expected, *array); + } + + static Result> ToDeviceData( + const std::shared_ptr& mm, const ArrayData& data) { + arrow::BufferVector buffers; + for (const auto& buf : data.buffers) { + if (buf) { + ARROW_ASSIGN_OR_RAISE(auto dest, mm->CopyBuffer(buf, mm)); + buffers.push_back(dest); + } else { + buffers.push_back(nullptr); + } + } + + arrow::ArrayDataVector children; + for (const auto& child : data.child_data) { + ARROW_ASSIGN_OR_RAISE(auto dest, ToDeviceData(mm, *child)); + children.push_back(dest); + } + + return ArrayData::Make(data.type, data.length, buffers, children, data.null_count, + data.offset); + } + + static Result> ToDevice(const std::shared_ptr& mm, + const ArrayData& data) { + ARROW_ASSIGN_OR_RAISE(auto result, ToDeviceData(mm, data)); + return MakeArray(result); + } +}; + +TEST_F(TestArrayDeviceStreamExport, Empty) { + auto schema = arrow::schema({field("ints", int32())}); + auto batches = MakeBatches(schema, {}); + ASSERT_OK_AND_ASSIGN( + auto reader, + RecordBatchReader::Make(batches, schema, + static_cast(kMyDeviceType))); + + struct ArrowDeviceArrayStream c_stream; + ASSERT_OK(ExportDeviceRecordBatchReader(reader, &c_stream)); + DeviceArrayStreamExportGuard guard(&c_stream); + + ASSERT_FALSE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + ASSERT_EQ(kMyDeviceType, c_stream.device_type); + AssertStreamSchema(&c_stream, *schema); + AssertStreamEnd(&c_stream); + AssertStreamEnd(&c_stream); +} + +TEST_F(TestArrayDeviceStreamExport, Simple) { + std::shared_ptr device = std::make_shared(1); + auto mm = device->default_memory_manager(); + + ASSERT_OK_AND_ASSIGN(auto arr1, + ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); + ASSERT_EQ(device->device_type(), arr1->device_type()); + ASSERT_OK_AND_ASSIGN(auto arr2, + ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_EQ(device->device_type(), arr2->device_type()); + auto schema = arrow::schema({field("ints", int32())}); + auto batches = MakeBatches(schema, {arr1, arr2}); + ASSERT_OK_AND_ASSIGN(auto reader, + RecordBatchReader::Make(batches, schema, device->device_type())); + + struct ArrowDeviceArrayStream c_stream; + + ASSERT_OK(ExportDeviceRecordBatchReader(reader, &c_stream)); + DeviceArrayStreamExportGuard guard(&c_stream); + + ASSERT_FALSE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + AssertStreamSchema(&c_stream, *schema); + ASSERT_EQ(kMyDeviceType, c_stream.device_type); + AssertStreamNext(&c_stream, *batches[0]); + AssertStreamNext(&c_stream, *batches[1]); + AssertStreamEnd(&c_stream); + AssertStreamEnd(&c_stream); +} + +TEST_F(TestArrayDeviceStreamExport, ArrayLifetime) { + std::shared_ptr device = std::make_shared(1); + auto mm = device->default_memory_manager(); + + ASSERT_OK_AND_ASSIGN(auto arr1, + ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); + ASSERT_EQ(device->device_type(), arr1->device_type()); + ASSERT_OK_AND_ASSIGN(auto arr2, + ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_EQ(device->device_type(), arr2->device_type()); + auto schema = arrow::schema({field("ints", int32())}); + auto batches = MakeBatches(schema, {arr1, arr2}); + ASSERT_OK_AND_ASSIGN(auto reader, + RecordBatchReader::Make(batches, schema, device->device_type())); + + struct ArrowDeviceArrayStream c_stream; + struct ArrowSchema c_schema; + struct ArrowDeviceArray c_array0, c_array1; + + ASSERT_OK(ExportDeviceRecordBatchReader(reader, &c_stream)); + { + DeviceArrayStreamExportGuard guard(&c_stream); + ASSERT_FALSE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + ASSERT_EQ(kMyDeviceType, c_stream.device_type); + + ASSERT_EQ(0, c_stream.get_schema(&c_stream, &c_schema)); + ASSERT_EQ(0, c_stream.get_next(&c_stream, &c_array0)); + ASSERT_EQ(0, c_stream.get_next(&c_stream, &c_array1)); + AssertStreamEnd(&c_stream); + } + + DeviceArrayExportGuard guard0(&c_array0), guard1(&c_array1); + + { + SchemaExportGuard schema_guard(&c_schema); + ASSERT_OK_AND_ASSIGN(auto got_schema, ImportSchema(&c_schema)); + AssertSchemaEqual(*schema, *got_schema, /*check_metadata=*/true); + } + + ASSERT_EQ(kMyDeviceType, c_array0.device_type); + ASSERT_EQ(kMyDeviceType, c_array1.device_type); + + ASSERT_GT(pool_->bytes_allocated(), orig_allocated_); + ASSERT_OK_AND_ASSIGN( + auto batch, + ImportDeviceRecordBatch(&c_array1, schema, TestDeviceArrayRoundtrip::DeviceMapper)); + AssertBatchesEqual(*batches[1], *batch); + ASSERT_EQ(device->device_type(), batch->device_type()); + ASSERT_OK_AND_ASSIGN( + batch, + ImportDeviceRecordBatch(&c_array0, schema, TestDeviceArrayRoundtrip::DeviceMapper)); + AssertBatchesEqual(*batches[0], *batch); + ASSERT_EQ(device->device_type(), batch->device_type()); +} + +TEST_F(TestArrayDeviceStreamExport, Errors) { + auto reader = + std::make_shared(Status::Invalid("some example error")); + + struct ArrowDeviceArrayStream c_stream; + + ASSERT_OK(ExportDeviceRecordBatchReader(reader, &c_stream)); + DeviceArrayStreamExportGuard guard(&c_stream); + + struct ArrowSchema c_schema; + ASSERT_EQ(0, c_stream.get_schema(&c_stream, &c_schema)); + ASSERT_FALSE(ArrowSchemaIsReleased(&c_schema)); + { + SchemaExportGuard schema_guard(&c_schema); + ASSERT_OK_AND_ASSIGN(auto schema, ImportSchema(&c_schema)); + AssertSchemaEqual(schema, arrow::schema({}), /*check_metadata=*/true); + } + + struct ArrowDeviceArray c_array; + ASSERT_EQ(EINVAL, c_stream.get_next(&c_stream, &c_array)); +} + +TEST_F(TestArrayDeviceStreamExport, ChunkedArrayExportEmpty) { + ASSERT_OK_AND_ASSIGN(auto chunked_array, ChunkedArray::Make({}, int32())); + + struct ArrowDeviceArrayStream c_stream; + struct ArrowSchema c_schema; + + ASSERT_OK(ExportDeviceChunkedArray( + chunked_array, static_cast(kMyDeviceType), &c_stream)); + DeviceArrayStreamExportGuard guard(&c_stream); + + { + DeviceArrayStreamExportGuard guard(&c_stream); + ASSERT_FALSE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + + ASSERT_EQ(kMyDeviceType, c_stream.device_type); + ASSERT_EQ(0, c_stream.get_schema(&c_stream, &c_schema)); + AssertStreamEnd(&c_stream); + } + + { + SchemaExportGuard schema_guard(&c_schema); + ASSERT_OK_AND_ASSIGN(auto got_type, ImportType(&c_schema)); + AssertTypeEqual(*chunked_array->type(), *got_type); + } +} + +TEST_F(TestArrayDeviceStreamExport, ChunkedArrayExport) { + std::shared_ptr device = std::make_shared(1); + auto mm = device->default_memory_manager(); + + ASSERT_OK_AND_ASSIGN(auto arr1, + ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); + ASSERT_EQ(device->device_type(), arr1->device_type()); + ASSERT_OK_AND_ASSIGN(auto arr2, + ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_EQ(device->device_type(), arr2->device_type()); + + ASSERT_OK_AND_ASSIGN(auto chunked_array, ChunkedArray::Make({arr1, arr2})); + + struct ArrowDeviceArrayStream c_stream; + struct ArrowSchema c_schema; + struct ArrowDeviceArray c_array0, c_array1; + + ASSERT_OK(ExportDeviceChunkedArray(chunked_array, device->device_type(), &c_stream)); + DeviceArrayStreamExportGuard guard(&c_stream); + + { + DeviceArrayStreamExportGuard guard(&c_stream); + ASSERT_FALSE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + ASSERT_EQ(kMyDeviceType, c_stream.device_type); + + ASSERT_EQ(0, c_stream.get_schema(&c_stream, &c_schema)); + ASSERT_EQ(0, c_stream.get_next(&c_stream, &c_array0)); + ASSERT_EQ(0, c_stream.get_next(&c_stream, &c_array1)); + AssertStreamEnd(&c_stream); + } + + DeviceArrayExportGuard guard0(&c_array0), guard1(&c_array1); + + { + SchemaExportGuard schema_guard(&c_schema); + ASSERT_OK_AND_ASSIGN(auto got_type, ImportType(&c_schema)); + AssertTypeEqual(*chunked_array->type(), *got_type); + } + + ASSERT_EQ(kMyDeviceType, c_array0.device_type); + ASSERT_EQ(kMyDeviceType, c_array1.device_type); + + ASSERT_GT(pool_->bytes_allocated(), orig_allocated_); + ASSERT_OK_AND_ASSIGN(auto array, + ImportDeviceArray(&c_array0, chunked_array->type(), + TestDeviceArrayRoundtrip::DeviceMapper)); + ASSERT_EQ(device->device_type(), array->device_type()); + AssertArraysEqual(*chunked_array->chunk(0), *array); + ASSERT_OK_AND_ASSIGN(array, ImportDeviceArray(&c_array1, chunked_array->type(), + TestDeviceArrayRoundtrip::DeviceMapper)); + ASSERT_EQ(device->device_type(), array->device_type()); + AssertArraysEqual(*chunked_array->chunk(1), *array); +} + +//////////////////////////////////////////////////////////////////////////// +// Array device stream roundtrip tests + +class TestArrayDeviceStreamRoundtrip : public BaseArrayStreamTest { + public: + static Result> ToDeviceData( + const std::shared_ptr& mm, const ArrayData& data) { + arrow::BufferVector buffers; + for (const auto& buf : data.buffers) { + if (buf) { + ARROW_ASSIGN_OR_RAISE(auto dest, mm->CopyBuffer(buf, mm)); + buffers.push_back(dest); + } else { + buffers.push_back(nullptr); + } + } + + arrow::ArrayDataVector children; + for (const auto& child : data.child_data) { + ARROW_ASSIGN_OR_RAISE(auto dest, ToDeviceData(mm, *child)); + children.push_back(dest); + } + + return ArrayData::Make(data.type, data.length, buffers, children, data.null_count, + data.offset); + } + + static Result> ToDevice(const std::shared_ptr& mm, + const ArrayData& data) { + ARROW_ASSIGN_OR_RAISE(auto result, ToDeviceData(mm, data)); + return MakeArray(result); + } + + void Roundtrip(std::shared_ptr* reader, + struct ArrowDeviceArrayStream* c_stream) { + ASSERT_OK(ExportDeviceRecordBatchReader(*reader, c_stream)); + ASSERT_FALSE(ArrowDeviceArrayStreamIsReleased(c_stream)); + + ASSERT_OK_AND_ASSIGN( + auto got_reader, + ImportDeviceRecordBatchReader(c_stream, TestDeviceArrayRoundtrip::DeviceMapper)); + *reader = std::move(got_reader); + } + + void Roundtrip( + std::shared_ptr reader, + std::function&)> check_func) { + ArrowDeviceArrayStream c_stream; + + // NOTE: ReleaseCallback<> is not immediately usable with ArrowDeviceArayStream + // because get_next and get_schema need the original private_data. + std::weak_ptr weak_reader(reader); + ASSERT_EQ(weak_reader.use_count(), 1); // Expiration check will fail otherwise + + ASSERT_OK(ExportDeviceRecordBatchReader(std::move(reader), &c_stream)); + ASSERT_FALSE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + + { + ASSERT_OK_AND_ASSIGN(auto new_reader, + ImportDeviceRecordBatchReader( + &c_stream, TestDeviceArrayRoundtrip::DeviceMapper)); + // stream was moved + ASSERT_TRUE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + ASSERT_FALSE(weak_reader.expired()); + + check_func(new_reader); + } + // Stream was released when `new_reader` was destroyed + ASSERT_TRUE(weak_reader.expired()); + } + + void Roundtrip(std::shared_ptr src, + std::function&)> check_func) { + ArrowDeviceArrayStream c_stream; + + // One original copy to compare the result, one copy held by the stream + std::weak_ptr weak_src(src); + int64_t initial_use_count = weak_src.use_count(); + + ASSERT_OK(ExportDeviceChunkedArray( + std::move(src), static_cast(kMyDeviceType), &c_stream)); + ASSERT_FALSE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + ASSERT_EQ(kMyDeviceType, c_stream.device_type); + + { + ASSERT_OK_AND_ASSIGN( + auto dst, + ImportDeviceChunkedArray(&c_stream, TestDeviceArrayRoundtrip::DeviceMapper)); + // Stream was moved, consumed, and released + ASSERT_TRUE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + + // Stream was released by ImportDeviceChunkedArray but original copy remains + ASSERT_EQ(weak_src.use_count(), initial_use_count - 1); + + check_func(dst); + } + } + + void AssertReaderNext(const std::shared_ptr& reader, + const RecordBatch& expected) { + ASSERT_OK_AND_ASSIGN(auto batch, reader->Next()); + ASSERT_NE(batch, nullptr); + ASSERT_EQ(static_cast(kMyDeviceType), batch->device_type()); + AssertBatchesEqual(expected, *batch); + } + + void AssertReaderEnd(const std::shared_ptr& reader) { + ASSERT_OK_AND_ASSIGN(auto batch, reader->Next()); + ASSERT_EQ(batch, nullptr); + } + + void AssertReaderClosed(const std::shared_ptr& reader) { + ASSERT_THAT(reader->Next(), + Raises(StatusCode::Invalid, ::testing::HasSubstr("already been closed"))); + } + + void AssertReaderClose(const std::shared_ptr& reader) { + ASSERT_OK(reader->Close()); + AssertReaderClosed(reader); + } +}; + +TEST_F(TestArrayDeviceStreamRoundtrip, Simple) { + std::shared_ptr device = std::make_shared(1); + auto mm = device->default_memory_manager(); + + ASSERT_OK_AND_ASSIGN(auto arr1, + ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); + ASSERT_EQ(device->device_type(), arr1->device_type()); + ASSERT_OK_AND_ASSIGN(auto arr2, + ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_EQ(device->device_type(), arr2->device_type()); + auto orig_schema = arrow::schema({field("ints", int32())}); + auto batches = MakeBatches(orig_schema, {arr1, arr2}); + ASSERT_OK_AND_ASSIGN( + auto reader, RecordBatchReader::Make(batches, orig_schema, device->device_type())); + + Roundtrip(std::move(reader), [&](const std::shared_ptr& reader) { + AssertSchemaEqual(*orig_schema, *reader->schema(), /*check_metadata=*/true); + AssertReaderNext(reader, *batches[0]); + AssertReaderNext(reader, *batches[1]); + AssertReaderEnd(reader); + AssertReaderEnd(reader); + AssertReaderClose(reader); + }); +} + +TEST_F(TestArrayDeviceStreamRoundtrip, CloseEarly) { + std::shared_ptr device = std::make_shared(1); + auto mm = device->default_memory_manager(); + + ASSERT_OK_AND_ASSIGN(auto arr1, + ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); + ASSERT_EQ(device->device_type(), arr1->device_type()); + ASSERT_OK_AND_ASSIGN(auto arr2, + ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_EQ(device->device_type(), arr2->device_type()); + auto orig_schema = arrow::schema({field("ints", int32())}); + auto batches = MakeBatches(orig_schema, {arr1, arr2}); + ASSERT_OK_AND_ASSIGN( + auto reader, RecordBatchReader::Make(batches, orig_schema, device->device_type())); + + Roundtrip(std::move(reader), [&](const std::shared_ptr& reader) { + AssertReaderNext(reader, *batches[0]); + AssertReaderClose(reader); + }); +} + +TEST_F(TestArrayDeviceStreamRoundtrip, Errors) { + auto reader = std::make_shared( + Status::Invalid("roundtrip error example")); + + Roundtrip(std::move(reader), [&](const std::shared_ptr& reader) { + EXPECT_THAT(reader->Next(), Raises(StatusCode::Invalid, + ::testing::HasSubstr("roundtrip error example"))); + }); +} + +TEST_F(TestArrayDeviceStreamRoundtrip, SchemaError) { + struct ArrowDeviceArrayStream stream = {}; + stream.get_last_error = [](struct ArrowDeviceArrayStream* stream) { + return "Expected error"; + }; + stream.get_schema = [](struct ArrowDeviceArrayStream* stream, + struct ArrowSchema* schema) { return EIO; }; + stream.get_next = [](struct ArrowDeviceArrayStream* stream, + struct ArrowDeviceArray* array) { return EINVAL; }; + stream.release = [](struct ArrowDeviceArrayStream* stream) { + *static_cast(stream->private_data) = true; + std::memset(stream, 0, sizeof(*stream)); + }; + bool released = false; + stream.private_data = &released; + + EXPECT_RAISES_WITH_MESSAGE_THAT(IOError, ::testing::HasSubstr("Expected error"), + ImportDeviceRecordBatchReader(&stream)); + ASSERT_TRUE(released); +} + +TEST_F(TestArrayDeviceStreamRoundtrip, ChunkedArrayRoundtrip) { + std::shared_ptr device = std::make_shared(1); + auto mm = device->default_memory_manager(); + + ASSERT_OK_AND_ASSIGN(auto arr1, + ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); + ASSERT_EQ(device->device_type(), arr1->device_type()); + ASSERT_OK_AND_ASSIGN(auto arr2, + ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_EQ(device->device_type(), arr2->device_type()); + + ASSERT_OK_AND_ASSIGN(auto src, ChunkedArray::Make({arr1, arr2})); + + Roundtrip(src, [&](const std::shared_ptr& dst) { + AssertTypeEqual(*dst->type(), *src->type()); + AssertChunkedEqual(*dst, *src); + }); +} + +TEST_F(TestArrayDeviceStreamRoundtrip, ChunkedArrayRoundtripEmpty) { + ASSERT_OK_AND_ASSIGN(auto src, ChunkedArray::Make({}, int32())); + + Roundtrip(src, [&](const std::shared_ptr& dst) { + AssertTypeEqual(*dst->type(), *src->type()); + AssertChunkedEqual(*dst, *src); + }); +} + } // namespace arrow diff --git a/cpp/src/arrow/c/helpers.h b/cpp/src/arrow/c/helpers.h index a24f272feac81..6e4df17f43ebf 100644 --- a/cpp/src/arrow/c/helpers.h +++ b/cpp/src/arrow/c/helpers.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include #include @@ -70,9 +71,17 @@ inline int ArrowArrayIsReleased(const struct ArrowArray* array) { return array->release == NULL; } +inline int ArrowDeviceArrayIsReleased(const struct ArrowDeviceArray* array) { + return ArrowArrayIsReleased(&array->array); +} + /// Mark the C array released (for use in release callbacks) inline void ArrowArrayMarkReleased(struct ArrowArray* array) { array->release = NULL; } +inline void ArrowDeviceArrayMarkReleased(struct ArrowDeviceArray* array) { + ArrowArrayMarkReleased(&array->array); +} + /// Move the C array from `src` to `dest` /// /// Note `dest` must *not* point to a valid array already, otherwise there @@ -84,6 +93,14 @@ inline void ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dest) { ArrowArrayMarkReleased(src); } +inline void ArrowDeviceArrayMove(struct ArrowDeviceArray* src, + struct ArrowDeviceArray* dest) { + assert(dest != src); + assert(!ArrowDeviceArrayIsReleased(src)); + memcpy(dest, src, sizeof(struct ArrowDeviceArray)); + ArrowDeviceArrayMarkReleased(src); +} + /// Release the C array, if necessary, by calling its release callback inline void ArrowArrayRelease(struct ArrowArray* array) { if (!ArrowArrayIsReleased(array)) { @@ -93,16 +110,32 @@ inline void ArrowArrayRelease(struct ArrowArray* array) { } } +inline void ArrowDeviceArrayRelease(struct ArrowDeviceArray* array) { + if (!ArrowDeviceArrayIsReleased(array)) { + array->array.release(&array->array); + ARROW_C_ASSERT(ArrowDeviceArrayIsReleased(array), + "ArrowDeviceArrayRelease did not cleanup release callback"); + } +} + /// Query whether the C array stream is released inline int ArrowArrayStreamIsReleased(const struct ArrowArrayStream* stream) { return stream->release == NULL; } +inline int ArrowDeviceArrayStreamIsReleased(const struct ArrowDeviceArrayStream* stream) { + return stream->release == NULL; +} + /// Mark the C array stream released (for use in release callbacks) inline void ArrowArrayStreamMarkReleased(struct ArrowArrayStream* stream) { stream->release = NULL; } +inline void ArrowDeviceArrayStreamMarkReleased(struct ArrowDeviceArrayStream* stream) { + stream->release = NULL; +} + /// Move the C array stream from `src` to `dest` /// /// Note `dest` must *not* point to a valid stream already, otherwise there @@ -115,6 +148,14 @@ inline void ArrowArrayStreamMove(struct ArrowArrayStream* src, ArrowArrayStreamMarkReleased(src); } +inline void ArrowDeviceArrayStreamMove(struct ArrowDeviceArrayStream* src, + struct ArrowDeviceArrayStream* dest) { + assert(dest != src); + assert(!ArrowDeviceArrayStreamIsReleased(src)); + memcpy(dest, src, sizeof(struct ArrowDeviceArrayStream)); + ArrowDeviceArrayStreamMarkReleased(src); +} + /// Release the C array stream, if necessary, by calling its release callback inline void ArrowArrayStreamRelease(struct ArrowArrayStream* stream) { if (!ArrowArrayStreamIsReleased(stream)) { @@ -124,6 +165,14 @@ inline void ArrowArrayStreamRelease(struct ArrowArrayStream* stream) { } } +inline void ArrowDeviceArrayStreamRelease(struct ArrowDeviceArrayStream* stream) { + if (!ArrowDeviceArrayStreamIsReleased(stream)) { + stream->release(stream); + ARROW_C_ASSERT(ArrowDeviceArrayStreamIsReleased(stream), + "ArrowDeviceArrayStreamRelease did not cleanup release callback"); + } +} + #ifdef __cplusplus } #endif diff --git a/cpp/src/arrow/c/util_internal.h b/cpp/src/arrow/c/util_internal.h index 6a33be9b0da8e..dc0e25710e987 100644 --- a/cpp/src/arrow/c/util_internal.h +++ b/cpp/src/arrow/c/util_internal.h @@ -32,12 +32,32 @@ struct ArrayExportTraits { typedef struct ArrowArray CType; static constexpr auto IsReleasedFunc = &ArrowArrayIsReleased; static constexpr auto ReleaseFunc = &ArrowArrayRelease; + static constexpr auto MoveFunc = &ArrowArrayMove; + static constexpr auto MarkReleased = &ArrowArrayMarkReleased; +}; + +struct ArrayDeviceExportTraits { + typedef struct ArrowDeviceArray CType; + static constexpr auto IsReleasedFunc = &ArrowDeviceArrayIsReleased; + static constexpr auto ReleaseFunc = &ArrowDeviceArrayRelease; + static constexpr auto MoveFunc = &ArrowDeviceArrayMove; + static constexpr auto MarkReleased = &ArrowDeviceArrayMarkReleased; }; struct ArrayStreamExportTraits { typedef struct ArrowArrayStream CType; static constexpr auto IsReleasedFunc = &ArrowArrayStreamIsReleased; static constexpr auto ReleaseFunc = &ArrowArrayStreamRelease; + static constexpr auto MoveFunc = &ArrowArrayStreamMove; + static constexpr auto MarkReleased = &ArrowArrayStreamMarkReleased; +}; + +struct ArrayDeviceStreamExportTraits { + typedef struct ArrowDeviceArrayStream CType; + static constexpr auto IsReleasedFunc = &ArrowDeviceArrayStreamIsReleased; + static constexpr auto ReleaseFunc = &ArrowDeviceArrayStreamRelease; + static constexpr auto MoveFunc = &ArrowDeviceArrayStreamMove; + static constexpr auto MarkReleased = &ArrowDeviceArrayStreamMarkReleased; }; // A RAII-style object to release a C Array / Schema struct at block scope exit. @@ -79,7 +99,9 @@ class ExportGuard { using SchemaExportGuard = ExportGuard; using ArrayExportGuard = ExportGuard; +using DeviceArrayExportGuard = ExportGuard; using ArrayStreamExportGuard = ExportGuard; +using DeviceArrayStreamExportGuard = ExportGuard; } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc index 8521d500f5c05..351f72f52365b 100644 --- a/cpp/src/arrow/record_batch.cc +++ b/cpp/src/arrow/record_batch.cc @@ -59,17 +59,31 @@ int RecordBatch::num_columns() const { return schema_->num_fields(); } class SimpleRecordBatch : public RecordBatch { public: SimpleRecordBatch(std::shared_ptr schema, int64_t num_rows, - std::vector> columns) - : RecordBatch(std::move(schema), num_rows), boxed_columns_(std::move(columns)) { + std::vector> columns, + std::shared_ptr sync_event = nullptr) + : RecordBatch(std::move(schema), num_rows), + boxed_columns_(std::move(columns)), + device_type_(DeviceAllocationType::kCPU), + sync_event_(std::move(sync_event)) { + if (boxed_columns_.size() > 0) { + device_type_ = boxed_columns_[0]->device_type(); + } + columns_.resize(boxed_columns_.size()); for (size_t i = 0; i < columns_.size(); ++i) { columns_[i] = boxed_columns_[i]->data(); + DCHECK_EQ(device_type_, columns_[i]->device_type()); } } SimpleRecordBatch(const std::shared_ptr& schema, int64_t num_rows, - std::vector> columns) - : RecordBatch(std::move(schema), num_rows), columns_(std::move(columns)) { + std::vector> columns, + DeviceAllocationType device_type = DeviceAllocationType::kCPU, + std::shared_ptr sync_event = nullptr) + : RecordBatch(std::move(schema), num_rows), + columns_(std::move(columns)), + device_type_(device_type), + sync_event_(std::move(sync_event)) { boxed_columns_.resize(schema_->num_fields()); } @@ -99,6 +113,7 @@ class SimpleRecordBatch : public RecordBatch { const std::shared_ptr& column) const override { ARROW_CHECK(field != nullptr); ARROW_CHECK(column != nullptr); + ARROW_CHECK(column->device_type() == device_type_); if (!field->type()->Equals(column->type())) { return Status::TypeError("Column data type ", field->type()->name(), @@ -113,7 +128,8 @@ class SimpleRecordBatch : public RecordBatch { ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->AddField(i, field)); return RecordBatch::Make(std::move(new_schema), num_rows_, - internal::AddVectorElement(columns_, i, column->data())); + internal::AddVectorElement(columns_, i, column->data()), + device_type_, sync_event_); } Result> SetColumn( @@ -121,6 +137,7 @@ class SimpleRecordBatch : public RecordBatch { const std::shared_ptr& column) const override { ARROW_CHECK(field != nullptr); ARROW_CHECK(column != nullptr); + ARROW_CHECK(column->device_type() == device_type_); if (!field->type()->Equals(column->type())) { return Status::TypeError("Column data type ", field->type()->name(), @@ -135,19 +152,22 @@ class SimpleRecordBatch : public RecordBatch { ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->SetField(i, field)); return RecordBatch::Make(std::move(new_schema), num_rows_, - internal::ReplaceVectorElement(columns_, i, column->data())); + internal::ReplaceVectorElement(columns_, i, column->data()), + device_type_, sync_event_); } Result> RemoveColumn(int i) const override { ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->RemoveField(i)); return RecordBatch::Make(std::move(new_schema), num_rows_, - internal::DeleteVectorElement(columns_, i)); + internal::DeleteVectorElement(columns_, i), device_type_, + sync_event_); } std::shared_ptr ReplaceSchemaMetadata( const std::shared_ptr& metadata) const override { auto new_schema = schema_->WithMetadata(metadata); - return RecordBatch::Make(std::move(new_schema), num_rows_, columns_); + return RecordBatch::Make(std::move(new_schema), num_rows_, columns_, device_type_, + sync_event_); } std::shared_ptr Slice(int64_t offset, int64_t length) const override { @@ -157,7 +177,8 @@ class SimpleRecordBatch : public RecordBatch { arrays.emplace_back(field->Slice(offset, length)); } int64_t num_rows = std::min(num_rows_ - offset, length); - return std::make_shared(schema_, num_rows, std::move(arrays)); + return std::make_shared(schema_, num_rows, std::move(arrays), + device_type_, sync_event_); } Status Validate() const override { @@ -167,11 +188,22 @@ class SimpleRecordBatch : public RecordBatch { return RecordBatch::Validate(); } + const std::shared_ptr& GetSyncEvent() const override { + return sync_event_; + } + + DeviceAllocationType device_type() const override { return device_type_; } + private: std::vector> columns_; // Caching boxed array data mutable std::vector> boxed_columns_; + + // the type of device that the buffers for columns are allocated on. + // all columns should be on the same type of device. + DeviceAllocationType device_type_; + std::shared_ptr sync_event_; }; RecordBatch::RecordBatch(const std::shared_ptr& schema, int64_t num_rows) @@ -179,18 +211,21 @@ RecordBatch::RecordBatch(const std::shared_ptr& schema, int64_t num_rows std::shared_ptr RecordBatch::Make( std::shared_ptr schema, int64_t num_rows, - std::vector> columns) { + std::vector> columns, + std::shared_ptr sync_event) { DCHECK_EQ(schema->num_fields(), static_cast(columns.size())); return std::make_shared(std::move(schema), num_rows, - std::move(columns)); + std::move(columns), std::move(sync_event)); } std::shared_ptr RecordBatch::Make( std::shared_ptr schema, int64_t num_rows, - std::vector> columns) { + std::vector> columns, DeviceAllocationType device_type, + std::shared_ptr sync_event) { DCHECK_EQ(schema->num_fields(), static_cast(columns.size())); return std::make_shared(std::move(schema), num_rows, - std::move(columns)); + std::move(columns), device_type, + std::move(sync_event)); } Result> RecordBatch::MakeEmpty( @@ -466,6 +501,10 @@ bool RecordBatch::Equals(const RecordBatch& other, bool check_metadata, return false; } + if (device_type() != other.device_type()) { + return false; + } + for (int i = 0; i < num_columns(); ++i) { if (!column(i)->Equals(other.column(i), opts)) { return false; @@ -480,6 +519,10 @@ bool RecordBatch::ApproxEquals(const RecordBatch& other, const EqualOptions& opt return false; } + if (device_type() != other.device_type()) { + return false; + } + for (int i = 0; i < num_columns(); ++i) { if (!column(i)->ApproxEquals(other.column(i), opts)) { return false; @@ -505,7 +548,7 @@ Result> RecordBatch::ReplaceSchema( ", did not match new schema field type: ", replace_type->ToString()); } } - return RecordBatch::Make(std::move(schema), num_rows(), columns()); + return RecordBatch::Make(std::move(schema), num_rows(), columns(), GetSyncEvent()); } std::vector RecordBatch::ColumnNames() const { @@ -534,7 +577,7 @@ Result> RecordBatch::RenameColumns( } return RecordBatch::Make(::arrow::schema(std::move(fields)), num_rows(), - std::move(columns)); + std::move(columns), GetSyncEvent()); } Result> RecordBatch::SelectColumns( @@ -555,7 +598,8 @@ Result> RecordBatch::SelectColumns( auto new_schema = std::make_shared(std::move(fields), schema()->metadata()); - return RecordBatch::Make(std::move(new_schema), num_rows(), std::move(columns)); + return RecordBatch::Make(std::move(new_schema), num_rows(), std::move(columns), + GetSyncEvent()); } std::shared_ptr RecordBatch::Slice(int64_t offset) const { @@ -647,12 +691,16 @@ Result> RecordBatchReader::ToTable() { class SimpleRecordBatchReader : public RecordBatchReader { public: SimpleRecordBatchReader(Iterator> it, - std::shared_ptr schema) - : schema_(std::move(schema)), it_(std::move(it)) {} + std::shared_ptr schema, + DeviceAllocationType device_type = DeviceAllocationType::kCPU) + : schema_(std::move(schema)), it_(std::move(it)), device_type_(device_type) {} SimpleRecordBatchReader(std::vector> batches, - std::shared_ptr schema) - : schema_(std::move(schema)), it_(MakeVectorIterator(std::move(batches))) {} + std::shared_ptr schema, + DeviceAllocationType device_type = DeviceAllocationType::kCPU) + : schema_(std::move(schema)), + it_(MakeVectorIterator(std::move(batches))), + device_type_(device_type) {} Status ReadNext(std::shared_ptr* batch) override { return it_.Next().Value(batch); @@ -660,13 +708,17 @@ class SimpleRecordBatchReader : public RecordBatchReader { std::shared_ptr schema() const override { return schema_; } + DeviceAllocationType device_type() const override { return device_type_; } + protected: std::shared_ptr schema_; Iterator> it_; + DeviceAllocationType device_type_; }; Result> RecordBatchReader::Make( - std::vector> batches, std::shared_ptr schema) { + std::vector> batches, std::shared_ptr schema, + DeviceAllocationType device_type) { if (schema == nullptr) { if (batches.size() == 0 || batches[0] == nullptr) { return Status::Invalid("Cannot infer schema from empty vector or nullptr"); @@ -675,16 +727,19 @@ Result> RecordBatchReader::Make( schema = batches[0]->schema(); } - return std::make_shared(std::move(batches), std::move(schema)); + return std::make_shared(std::move(batches), std::move(schema), + device_type); } Result> RecordBatchReader::MakeFromIterator( - Iterator> batches, std::shared_ptr schema) { + Iterator> batches, std::shared_ptr schema, + DeviceAllocationType device_type) { if (schema == nullptr) { return Status::Invalid("Schema cannot be nullptr"); } - return std::make_shared(std::move(batches), std::move(schema)); + return std::make_shared(std::move(batches), std::move(schema), + device_type); } RecordBatchReader::~RecordBatchReader() { @@ -701,6 +756,10 @@ Result> ConcatenateRecordBatches( int cols = batches[0]->num_columns(); auto schema = batches[0]->schema(); for (size_t i = 0; i < batches.size(); ++i) { + if (auto sync = batches[i]->GetSyncEvent()) { + ARROW_RETURN_NOT_OK(sync->Wait()); + } + length += batches[i]->num_rows(); if (!schema->Equals(batches[i]->schema())) { return Status::Invalid( diff --git a/cpp/src/arrow/record_batch.h b/cpp/src/arrow/record_batch.h index cd647a88abd97..b03cbf2251f47 100644 --- a/cpp/src/arrow/record_batch.h +++ b/cpp/src/arrow/record_batch.h @@ -23,6 +23,7 @@ #include #include "arrow/compare.h" +#include "arrow/device.h" #include "arrow/result.h" #include "arrow/status.h" #include "arrow/type_fwd.h" @@ -45,9 +46,12 @@ class ARROW_EXPORT RecordBatch { /// \param[in] num_rows length of fields in the record batch. Each array /// should have the same length as num_rows /// \param[in] columns the record batch fields as vector of arrays - static std::shared_ptr Make(std::shared_ptr schema, - int64_t num_rows, - std::vector> columns); + /// \param[in] sync_event optional synchronization event for non-CPU device + /// memory used by buffers + static std::shared_ptr Make( + std::shared_ptr schema, int64_t num_rows, + std::vector> columns, + std::shared_ptr sync_event = NULLPTR); /// \brief Construct record batch from vector of internal data structures /// \since 0.5.0 @@ -58,9 +62,15 @@ class ARROW_EXPORT RecordBatch { /// \param num_rows the number of semantic rows in the record batch. This /// should be equal to the length of each field /// \param columns the data for the batch's columns + /// \param device_type the type of the device that the Arrow columns are + /// allocated on + /// \param sync_event optional synchronization event for non-CPU device + /// memory used by buffers static std::shared_ptr Make( std::shared_ptr schema, int64_t num_rows, - std::vector> columns); + std::vector> columns, + DeviceAllocationType device_type = DeviceAllocationType::kCPU, + std::shared_ptr sync_event = NULLPTR); /// \brief Create an empty RecordBatch of a given schema /// @@ -260,6 +270,18 @@ class ARROW_EXPORT RecordBatch { /// \return Status virtual Status ValidateFull() const; + /// \brief EXPERIMENTAL: Return a top-level sync event object for this record batch + /// + /// If all of the data for this record batch is in CPU memory, then this + /// will return null. If the data for this batch is + /// on a device, then if synchronization is needed before accessing the + /// data the returned sync event will allow for it. + /// + /// \return null or a Device::SyncEvent + virtual const std::shared_ptr& GetSyncEvent() const = 0; + + virtual DeviceAllocationType device_type() const = 0; + protected: RecordBatch(const std::shared_ptr& schema, int64_t num_rows); @@ -306,6 +328,11 @@ class ARROW_EXPORT RecordBatchReader { /// \brief finalize reader virtual Status Close() { return Status::OK(); } + /// \brief EXPERIMENTAL: Get the device type for record batches this reader produces + /// + /// default implementation is to return DeviceAllocationType::kCPU + virtual DeviceAllocationType device_type() const { return DeviceAllocationType::kCPU; } + class RecordBatchReaderIterator { public: using iterator_category = std::input_iterator_tag; @@ -379,15 +406,19 @@ class ARROW_EXPORT RecordBatchReader { /// \param[in] batches the vector of RecordBatch to read from /// \param[in] schema schema to conform to. Will be inferred from the first /// element if not provided. + /// \param[in] device_type the type of device that the batches are allocated on static Result> Make( - RecordBatchVector batches, std::shared_ptr schema = NULLPTR); + RecordBatchVector batches, std::shared_ptr schema = NULLPTR, + DeviceAllocationType device_type = DeviceAllocationType::kCPU); /// \brief Create a RecordBatchReader from an Iterator of RecordBatch. /// /// \param[in] batches an iterator of RecordBatch to read from. /// \param[in] schema schema that each record batch in iterator will conform to. + /// \param[in] device_type the type of device that the batches are allocated on static Result> MakeFromIterator( - Iterator> batches, std::shared_ptr schema); + Iterator> batches, std::shared_ptr schema, + DeviceAllocationType device_type = DeviceAllocationType::kCPU); }; /// \brief Concatenate record batches diff --git a/python/pyarrow/tests/test_cffi.py b/python/pyarrow/tests/test_cffi.py index 5bf41c3c14b6e..45a3db9b66fc5 100644 --- a/python/pyarrow/tests/test_cffi.py +++ b/python/pyarrow/tests/test_cffi.py @@ -45,7 +45,7 @@ ValueError, match="Cannot import released ArrowArray") assert_stream_released = pytest.raises( - ValueError, match="Cannot import released ArrowArrayStream") + ValueError, match="Cannot import released Arrow Stream") def PyCapsule_IsValid(capsule, name): From 37e5240e2430564b1c2dfa5d1e6a7a6b58576f83 Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Wed, 22 May 2024 12:47:04 +1200 Subject: [PATCH 143/261] GH-41681: [GLib] Generate separate version macros for each GLib library (#41721) ### Rationale for this change This is to support later using the `*_AVAILABLE_IN_*` macros to add `dllexport/dllimport` attributes required for building these libraries with MSVC (#41134) ### What changes are included in this PR? * Add a Python script that generates `DEPRECATED_IN` and `AVAILABLE_IN` macros for each GLib library * Add missing `AVAILABLE_IN` annotations to some methods in the GLib libraries (except the main arrow-glib library as this is being done in #41599) ### Are these changes tested? This doesn't include any behaviour change that can be unit tested. ### Are there any user-facing changes? No * GitHub Issue: #41681 Lead-authored-by: Adam Reeve Co-authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- .pre-commit-config.yaml | 2 +- c_glib/arrow-cuda-glib/arrow-cuda-glib.h | 2 + c_glib/arrow-cuda-glib/cuda.h | 38 ++ c_glib/arrow-cuda-glib/meson.build | 9 +- c_glib/arrow-cuda-glib/version.h.in | 157 +++++ .../arrow-dataset-glib/arrow-dataset-glib.h | 2 + .../arrow-dataset-glib/dataset-definition.h | 3 + c_glib/arrow-dataset-glib/dataset-factory.h | 20 +- c_glib/arrow-dataset-glib/dataset.h | 12 +- c_glib/arrow-dataset-glib/file-format.h | 28 +- c_glib/arrow-dataset-glib/fragment.h | 6 +- c_glib/arrow-dataset-glib/meson.build | 15 +- c_glib/arrow-dataset-glib/partitioning.h | 25 +- c_glib/arrow-dataset-glib/scanner.h | 13 +- c_glib/arrow-dataset-glib/version.h.in | 157 +++++ c_glib/arrow-flight-glib/arrow-flight-glib.h | 2 + c_glib/arrow-flight-glib/client.h | 26 +- c_glib/arrow-flight-glib/common.h | 66 ++- c_glib/arrow-flight-glib/meson.build | 8 + c_glib/arrow-flight-glib/server.h | 41 +- c_glib/arrow-flight-glib/version.h.in | 157 +++++ .../arrow-flight-sql-glib.h | 2 + c_glib/arrow-flight-sql-glib/client.h | 30 +- c_glib/arrow-flight-sql-glib/meson.build | 8 + c_glib/arrow-flight-sql-glib/server.h | 59 +- c_glib/arrow-flight-sql-glib/version.h.in | 157 +++++ c_glib/arrow-glib/meson.build | 16 +- c_glib/arrow-glib/version.h.in | 542 +----------------- c_glib/gandiva-glib/expression.h | 5 + c_glib/gandiva-glib/filter.h | 4 + c_glib/gandiva-glib/function-registry.h | 9 +- c_glib/gandiva-glib/function-signature.h | 14 + c_glib/gandiva-glib/meson.build | 14 +- c_glib/gandiva-glib/native-function.h | 14 + c_glib/gandiva-glib/node.h | 70 +++ c_glib/gandiva-glib/projector.h | 5 + c_glib/gandiva-glib/selection-vector.h | 4 + c_glib/gandiva-glib/version.h.in | 71 +-- c_glib/meson.build | 4 + c_glib/parquet-glib/arrow-file-reader.h | 15 +- c_glib/parquet-glib/arrow-file-writer.h | 36 +- c_glib/parquet-glib/meson.build | 9 +- c_glib/parquet-glib/metadata.h | 49 +- c_glib/parquet-glib/parquet-glib.h | 2 + c_glib/parquet-glib/statistics.h | 52 +- c_glib/parquet-glib/version.h.in | 157 +++++ c_glib/tool/generate-version-header.py | 151 +++++ dev/release/post-11-bump-versions-test.rb | 6 + dev/release/utils-prepare.sh | 10 + 49 files changed, 1492 insertions(+), 812 deletions(-) create mode 100644 c_glib/arrow-cuda-glib/version.h.in create mode 100644 c_glib/arrow-dataset-glib/version.h.in create mode 100644 c_glib/arrow-flight-glib/version.h.in create mode 100644 c_glib/arrow-flight-sql-glib/version.h.in create mode 100644 c_glib/parquet-glib/version.h.in create mode 100755 c_glib/tool/generate-version-header.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1e4b91e27ee8a..863fd918e5911 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -40,7 +40,7 @@ repos: hooks: - id: flake8 name: Python Format - files: ^(python|dev|integration)/ + files: ^(python|dev|c_glib|integration)/ types: - file - python diff --git a/c_glib/arrow-cuda-glib/arrow-cuda-glib.h b/c_glib/arrow-cuda-glib/arrow-cuda-glib.h index b3c7f21087669..572ff92ed9b43 100644 --- a/c_glib/arrow-cuda-glib/arrow-cuda-glib.h +++ b/c_glib/arrow-cuda-glib/arrow-cuda-glib.h @@ -21,4 +21,6 @@ #include +#include + #include diff --git a/c_glib/arrow-cuda-glib/cuda.h b/c_glib/arrow-cuda-glib/cuda.h index 863743a620bf8..f04a3381259bb 100644 --- a/c_glib/arrow-cuda-glib/cuda.h +++ b/c_glib/arrow-cuda-glib/cuda.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GARROW_CUDA_TYPE_DEVICE_MANAGER (garrow_cuda_device_manager_get_type()) +GARROW_CUDA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GArrowCUDADeviceManager, garrow_cuda_device_manager, GARROW_CUDA, @@ -35,6 +38,7 @@ struct _GArrowCUDADeviceManagerClass }; #define GARROW_CUDA_TYPE_CONTEXT (garrow_cuda_context_get_type()) +GARROW_CUDA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE( GArrowCUDAContext, garrow_cuda_context, GARROW_CUDA, CONTEXT, GObject) struct _GArrowCUDAContextClass @@ -43,6 +47,7 @@ struct _GArrowCUDAContextClass }; #define GARROW_CUDA_TYPE_BUFFER (garrow_cuda_buffer_get_type()) +GARROW_CUDA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE( GArrowCUDABuffer, garrow_cuda_buffer, GARROW_CUDA, BUFFER, GArrowBuffer) struct _GArrowCUDABufferClass @@ -51,6 +56,7 @@ struct _GArrowCUDABufferClass }; #define GARROW_CUDA_TYPE_HOST_BUFFER (garrow_cuda_host_buffer_get_type()) +GARROW_CUDA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GArrowCUDAHostBuffer, garrow_cuda_host_buffer, GARROW_CUDA, @@ -62,6 +68,7 @@ struct _GArrowCUDAHostBufferClass }; #define GARROW_CUDA_TYPE_IPC_MEMORY_HANDLE (garrow_cuda_ipc_memory_handle_get_type()) +GARROW_CUDA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GArrowCUDAIPCMemoryHandle, garrow_cuda_ipc_memory_handle, GARROW_CUDA, @@ -73,6 +80,7 @@ struct _GArrowCUDAIPCMemoryHandleClass }; #define GARROW_CUDA_TYPE_BUFFER_INPUT_STREAM (garrow_cuda_buffer_input_stream_get_type()) +GARROW_CUDA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GArrowCUDABufferInputStream, garrow_cuda_buffer_input_stream, GARROW_CUDA, @@ -85,6 +93,7 @@ struct _GArrowCUDABufferInputStreamClass #define GARROW_CUDA_TYPE_BUFFER_OUTPUT_STREAM \ (garrow_cuda_buffer_output_stream_get_type()) +GARROW_CUDA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GArrowCUDABufferOutputStream, garrow_cuda_buffer_output_stream, GARROW_CUDA, @@ -95,71 +104,100 @@ struct _GArrowCUDABufferOutputStreamClass GArrowOutputStreamClass parent_class; }; +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDADeviceManager * garrow_cuda_device_manager_new(GError **error); +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDAContext * garrow_cuda_device_manager_get_context(GArrowCUDADeviceManager *manager, gint gpu_number, GError **error); + +GARROW_CUDA_AVAILABLE_IN_0_12 gsize garrow_cuda_device_manager_get_n_devices(GArrowCUDADeviceManager *manager); +GARROW_CUDA_AVAILABLE_IN_0_12 gint64 garrow_cuda_context_get_allocated_size(GArrowCUDAContext *context); +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDABuffer * garrow_cuda_buffer_new(GArrowCUDAContext *context, gint64 size, GError **error); + +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDABuffer * garrow_cuda_buffer_new_ipc(GArrowCUDAContext *context, GArrowCUDAIPCMemoryHandle *handle, GError **error); + +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDABuffer * garrow_cuda_buffer_new_record_batch(GArrowCUDAContext *context, GArrowRecordBatch *record_batch, GError **error); + +GARROW_CUDA_AVAILABLE_IN_0_12 GBytes * garrow_cuda_buffer_copy_to_host(GArrowCUDABuffer *buffer, gint64 position, gint64 size, GError **error); + +GARROW_CUDA_AVAILABLE_IN_0_12 gboolean garrow_cuda_buffer_copy_from_host(GArrowCUDABuffer *buffer, const guint8 *data, gint64 size, GError **error); + +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDAIPCMemoryHandle * garrow_cuda_buffer_export(GArrowCUDABuffer *buffer, GError **error); + +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDAContext * garrow_cuda_buffer_get_context(GArrowCUDABuffer *buffer); + +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowRecordBatch * garrow_cuda_buffer_read_record_batch(GArrowCUDABuffer *buffer, GArrowSchema *schema, GArrowReadOptions *options, GError **error); +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDAHostBuffer * garrow_cuda_host_buffer_new(gint gpu_number, gint64 size, GError **error); +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDAIPCMemoryHandle * garrow_cuda_ipc_memory_handle_new(const guint8 *data, gsize size, GError **error); +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowBuffer * garrow_cuda_ipc_memory_handle_serialize(GArrowCUDAIPCMemoryHandle *handle, GError **error); +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDABufferInputStream * garrow_cuda_buffer_input_stream_new(GArrowCUDABuffer *buffer); +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDABufferOutputStream * garrow_cuda_buffer_output_stream_new(GArrowCUDABuffer *buffer); +GARROW_CUDA_AVAILABLE_IN_0_12 gboolean garrow_cuda_buffer_output_stream_set_buffer_size(GArrowCUDABufferOutputStream *stream, gint64 size, GError **error); +GARROW_CUDA_AVAILABLE_IN_0_12 gint64 garrow_cuda_buffer_output_stream_get_buffer_size(GArrowCUDABufferOutputStream *stream); + +GARROW_CUDA_AVAILABLE_IN_0_12 gint64 garrow_cuda_buffer_output_stream_get_buffered_size(GArrowCUDABufferOutputStream *stream); diff --git a/c_glib/arrow-cuda-glib/meson.build b/c_glib/arrow-cuda-glib/meson.build index 88029e6dc2073..86d536dcd2494 100644 --- a/c_glib/arrow-cuda-glib/meson.build +++ b/c_glib/arrow-cuda-glib/meson.build @@ -31,10 +31,17 @@ cpp_headers = files( 'cuda.hpp', ) +version_h = configure_file( + input: 'version.h.in', + output: 'version.h', + command: [python3, generate_version_header_py, '--library', 'GARROW_CUDA', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'], +) + +c_headers += version_h + headers = c_headers + cpp_headers install_headers(headers, subdir: 'arrow-cuda-glib') - dependencies = [ arrow_cuda, arrow_glib, diff --git a/c_glib/arrow-cuda-glib/version.h.in b/c_glib/arrow-cuda-glib/version.h.in new file mode 100644 index 0000000000000..27d070d19dc9c --- /dev/null +++ b/c_glib/arrow-cuda-glib/version.h.in @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +/** + * SECTION: version + * @section_id: version-macros + * @title: Version related macros + * @include: arrow-cuda-glib/arrow-cuda-glib.h + * + * Apache Arrow CUDA GLib provides macros that can be used by C pre-processor. + * They are useful to check version related things at compile time. + */ + +/** + * GARROW_CUDA_VERSION_MAJOR: + * + * The major version. + * + * Since: 17.0.0 + */ +#define GARROW_CUDA_VERSION_MAJOR (@VERSION_MAJOR@) + +/** + * GARROW_CUDA_VERSION_MINOR: + * + * The minor version. + * + * Since: 17.0.0 + */ +#define GARROW_CUDA_VERSION_MINOR (@VERSION_MINOR@) + +/** + * GARROW_CUDA_VERSION_MICRO: + * + * The micro version. + * + * Since: 17.0.0 + */ +#define GARROW_CUDA_VERSION_MICRO (@VERSION_MICRO@) + +/** + * GARROW_CUDA_VERSION_TAG: + * + * The version tag. Normally, it's an empty string. It's "SNAPSHOT" + * for snapshot version. + * + * Since: 17.0.0 + */ +#define GARROW_CUDA_VERSION_TAG "@VERSION_TAG@" + +/** + * GARROW_CUDA_VERSION_CHECK: + * @major: A major version to check for. + * @minor: A minor version to check for. + * @micro: A micro version to check for. + * + * You can use this macro in C pre-processor. + * + * Returns: %TRUE if the compile time Apache Arrow GLib version is the + * same as or newer than the passed version, %FALSE otherwise. + * + * Since: 17.0.0 + */ +#define GARROW_CUDA_VERSION_CHECK(major, minor, micro) \ + (GARROW_CUDA_VERSION_MAJOR > (major) || \ + (GARROW_CUDA_VERSION_MAJOR == (major) && \ + GARROW_CUDA_VERSION_MINOR > (minor)) || \ + (GARROW_CUDA_VERSION_MAJOR == (major) && \ + GARROW_CUDA_VERSION_MINOR == (minor) && \ + GARROW_CUDA_VERSION_MICRO >= (micro))) + +/** + * GARROW_CUDA_DISABLE_DEPRECATION_WARNINGS: + * + * If this macro is defined, no deprecated warnings are produced. + * + * You must define this macro before including the + * arrow-glib/arrow-glib.h header. + * + * Since: 17.0.0 + */ + +#ifdef GARROW_CUDA_DISABLE_DEPRECATION_WARNINGS +# define GARROW_CUDA_DEPRECATED +# define GARROW_CUDA_DEPRECATED_FOR(function) +# define GARROW_CUDA_UNAVAILABLE(major, minor) +#else +# define GARROW_CUDA_DEPRECATED G_DEPRECATED +# define GARROW_CUDA_DEPRECATED_FOR(function) G_DEPRECATED_FOR(function) +# define GARROW_CUDA_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor) +#endif + +@ENCODED_VERSIONS@ + +/** + * GARROW_CUDA_VERSION_MIN_REQUIRED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GARROW_CUDA_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GARROW_CUDA_VERSION_MIN_REQUIRED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * arrow-cuda-glib/arrow-cuda-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GARROW_CUDA_VERSION_MIN_REQUIRED +# define GARROW_CUDA_VERSION_MIN_REQUIRED GARROW_VERSION_MIN_REQUIRED +#endif + +/** + * GARROW_CUDA_VERSION_MAX_ALLOWED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GARROW_CUDA_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GARROW_CUDA_VERSION_MAX_ALLOWED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * arrow-cuda-glib/arrow-cuda-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GARROW_CUDA_VERSION_MAX_ALLOWED +# define GARROW_CUDA_VERSION_MAX_ALLOWED GARROW_VERSION_MAX_ALLOWED +#endif + +@AVAILABILITY_MACROS@ diff --git a/c_glib/arrow-dataset-glib/arrow-dataset-glib.h b/c_glib/arrow-dataset-glib/arrow-dataset-glib.h index 58f4e216cc715..7ebf36ddd2b78 100644 --- a/c_glib/arrow-dataset-glib/arrow-dataset-glib.h +++ b/c_glib/arrow-dataset-glib/arrow-dataset-glib.h @@ -21,6 +21,8 @@ #include +#include + #include #include #include diff --git a/c_glib/arrow-dataset-glib/dataset-definition.h b/c_glib/arrow-dataset-glib/dataset-definition.h index f278b05a135f5..bc52d6d3663a3 100644 --- a/c_glib/arrow-dataset-glib/dataset-definition.h +++ b/c_glib/arrow-dataset-glib/dataset-definition.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GADATASET_TYPE_DATASET (gadataset_dataset_get_type()) +GADATASET_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GADatasetDataset, gadataset_dataset, GADATASET, DATASET, GObject) struct _GADatasetDatasetClass { diff --git a/c_glib/arrow-dataset-glib/dataset-factory.h b/c_glib/arrow-dataset-glib/dataset-factory.h index 1dc875837fe21..e7d3bc27aea8f 100644 --- a/c_glib/arrow-dataset-glib/dataset-factory.h +++ b/c_glib/arrow-dataset-glib/dataset-factory.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GADATASET_TYPE_FINISH_OPTIONS (gadataset_finish_options_get_type()) +GADATASET_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE( GADatasetFinishOptions, gadataset_finish_options, GADATASET, FINISH_OPTIONS, GObject) struct _GADatasetFinishOptionsClass @@ -31,11 +32,12 @@ struct _GADatasetFinishOptionsClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_11_0 +GADATASET_AVAILABLE_IN_11_0 GADatasetFinishOptions * gadataset_finish_options_new(void); #define GADATASET_TYPE_DATASET_FACTORY (gadataset_dataset_factory_get_type()) +GADATASET_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GADatasetDatasetFactory, gadataset_dataset_factory, GADATASET, DATASET_FACTORY, GObject) struct _GADatasetDatasetFactoryClass @@ -43,7 +45,7 @@ struct _GADatasetDatasetFactoryClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 GADatasetDataset * gadataset_dataset_factory_finish(GADatasetDatasetFactory *factory, GADatasetFinishOptions *options, @@ -51,6 +53,7 @@ gadataset_dataset_factory_finish(GADatasetDatasetFactory *factory, #define GADATASET_TYPE_FILE_SYSTEM_DATASET_FACTORY \ (gadataset_file_system_dataset_factory_get_type()) +GADATASET_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GADatasetFileSystemDatasetFactory, gadataset_file_system_dataset_factory, GADATASET, @@ -61,32 +64,33 @@ struct _GADatasetFileSystemDatasetFactoryClass GADatasetDatasetFactoryClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 GADatasetFileSystemDatasetFactory * gadataset_file_system_dataset_factory_new(GADatasetFileFormat *file_format); -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 gboolean gadataset_file_system_dataset_factory_set_file_system( GADatasetFileSystemDatasetFactory *factory, GArrowFileSystem *file_system, GError **error); +GADATASET_AVAILABLE_IN_5_0 gboolean gadataset_file_system_dataset_factory_set_file_system_uri( GADatasetFileSystemDatasetFactory *factory, const gchar *uri, GError **error); -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 gboolean gadataset_file_system_dataset_factory_add_path(GADatasetFileSystemDatasetFactory *factory, const gchar *path, GError **error); /* -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 gboolean gadataset_file_system_dataset_factory_add_file( GADatasetFileSystemDatasetFactory *factory, GArrowFileInfo *file, GError **error); -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 gboolean gadataset_file_system_dataset_factory_add_selector( GADatasetFileSystemDatasetFactory *factory, @@ -94,7 +98,7 @@ gadataset_file_system_dataset_factory_add_selector( GError **error); */ -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 GADatasetFileSystemDataset * gadataset_file_system_dataset_factory_finish(GADatasetFileSystemDatasetFactory *factory, GADatasetFinishOptions *options, diff --git a/c_glib/arrow-dataset-glib/dataset.h b/c_glib/arrow-dataset-glib/dataset.h index 57f6c7729f073..657de330e6c49 100644 --- a/c_glib/arrow-dataset-glib/dataset.h +++ b/c_glib/arrow-dataset-glib/dataset.h @@ -25,18 +25,19 @@ G_BEGIN_DECLS -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 GADatasetScannerBuilder * gadataset_dataset_begin_scan(GADatasetDataset *dataset, GError **error); -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 GArrowTable * gadataset_dataset_to_table(GADatasetDataset *dataset, GError **error); -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 gchar * gadataset_dataset_get_type_name(GADatasetDataset *dataset); #define GADATASET_TYPE_FILE_SYSTEM_DATASET_WRITE_OPTIONS \ (gadataset_file_system_dataset_write_options_get_type()) +GADATASET_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GADatasetFileSystemDatasetWriteOptions, gadataset_file_system_dataset_write_options, GADATASET, @@ -47,11 +48,12 @@ struct _GADatasetFileSystemDatasetWriteOptionsClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 GADatasetFileSystemDatasetWriteOptions * gadataset_file_system_dataset_write_options_new(void); #define GADATASET_TYPE_FILE_SYSTEM_DATASET (gadataset_file_system_dataset_get_type()) +GADATASET_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GADatasetFileSystemDataset, gadataset_file_system_dataset, GADATASET, @@ -62,7 +64,7 @@ struct _GADatasetFileSystemDatasetClass GADatasetDatasetClass parent_class; }; -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 gboolean gadataset_file_system_dataset_write_scanner( GADatasetScanner *scanner, diff --git a/c_glib/arrow-dataset-glib/file-format.h b/c_glib/arrow-dataset-glib/file-format.h index 29487e59d70dd..f70523597e7c6 100644 --- a/c_glib/arrow-dataset-glib/file-format.h +++ b/c_glib/arrow-dataset-glib/file-format.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GADATASET_TYPE_FILE_WRITE_OPTIONS (gadataset_file_write_options_get_type()) +GADATASET_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GADatasetFileWriteOptions, gadataset_file_write_options, GADATASET, @@ -35,6 +38,7 @@ struct _GADatasetFileWriteOptionsClass }; #define GADATASET_TYPE_FILE_WRITER (gadataset_file_writer_get_type()) +GADATASET_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GADatasetFileWriter, gadataset_file_writer, GADATASET, FILE_WRITER, GObject) struct _GADatasetFileWriterClass @@ -42,21 +46,22 @@ struct _GADatasetFileWriterClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 gboolean gadataset_file_writer_write_record_batch(GADatasetFileWriter *writer, GArrowRecordBatch *record_batch, GError **error); -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 gboolean gadataset_file_writer_write_record_batch_reader(GADatasetFileWriter *writer, GArrowRecordBatchReader *reader, GError **error); -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 gboolean gadataset_file_writer_finish(GADatasetFileWriter *writer, GError **error); #define GADATASET_TYPE_FILE_FORMAT (gadataset_file_format_get_type()) +GADATASET_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE( GADatasetFileFormat, gadataset_file_format, GADATASET, FILE_FORMAT, GObject) struct _GADatasetFileFormatClass @@ -64,13 +69,13 @@ struct _GADatasetFileFormatClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_3_0 +GADATASET_AVAILABLE_IN_3_0 gchar * gadataset_file_format_get_type_name(GADatasetFileFormat *format); -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 GADatasetFileWriteOptions * gadataset_file_format_get_default_write_options(GADatasetFileFormat *format); -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 GADatasetFileWriter * gadataset_file_format_open_writer(GADatasetFileFormat *format, GArrowOutputStream *destination, @@ -80,12 +85,13 @@ gadataset_file_format_open_writer(GADatasetFileFormat *format, GADatasetFileWriteOptions *options, GError **error); -GARROW_AVAILABLE_IN_3_0 +GADATASET_AVAILABLE_IN_3_0 gboolean gadataset_file_format_equal(GADatasetFileFormat *format, GADatasetFileFormat *other_format); #define GADATASET_TYPE_CSV_FILE_FORMAT (gadataset_csv_file_format_get_type()) +GADATASET_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GADatasetCSVFileFormat, gadataset_csv_file_format, GADATASET, @@ -96,11 +102,12 @@ struct _GADatasetCSVFileFormatClass GADatasetFileFormatClass parent_class; }; -GARROW_AVAILABLE_IN_3_0 +GADATASET_AVAILABLE_IN_3_0 GADatasetCSVFileFormat * gadataset_csv_file_format_new(void); #define GADATASET_TYPE_IPC_FILE_FORMAT (gadataset_ipc_file_format_get_type()) +GADATASET_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GADatasetIPCFileFormat, gadataset_ipc_file_format, GADATASET, @@ -111,11 +118,12 @@ struct _GADatasetIPCFileFormatClass GADatasetFileFormatClass parent_class; }; -GARROW_AVAILABLE_IN_3_0 +GADATASET_AVAILABLE_IN_3_0 GADatasetIPCFileFormat * gadataset_ipc_file_format_new(void); #define GADATASET_TYPE_PARQUET_FILE_FORMAT (gadataset_parquet_file_format_get_type()) +GADATASET_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GADatasetParquetFileFormat, gadataset_parquet_file_format, GADATASET, @@ -126,7 +134,7 @@ struct _GADatasetParquetFileFormatClass GADatasetFileFormatClass parent_class; }; -GARROW_AVAILABLE_IN_3_0 +GADATASET_AVAILABLE_IN_3_0 GADatasetParquetFileFormat * gadataset_parquet_file_format_new(void); diff --git a/c_glib/arrow-dataset-glib/fragment.h b/c_glib/arrow-dataset-glib/fragment.h index 49acc360a3679..80eb9e19df3cc 100644 --- a/c_glib/arrow-dataset-glib/fragment.h +++ b/c_glib/arrow-dataset-glib/fragment.h @@ -21,11 +21,14 @@ #include +#include + G_BEGIN_DECLS /* arrow::dataset::Fragment */ #define GADATASET_TYPE_FRAGMENT (gadataset_fragment_get_type()) +GADATASET_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE( GADatasetFragment, gadataset_fragment, GADATASET, FRAGMENT, GObject) struct _GADatasetFragmentClass @@ -36,6 +39,7 @@ struct _GADatasetFragmentClass /* arrow::dataset::InMemoryFragment */ #define GADATASET_TYPE_IN_MEMORY_FRAGMENT (gadataset_in_memory_fragment_get_type()) +GADATASET_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE(GADatasetInMemoryFragment, gadataset_in_memory_fragment, GADATASET, @@ -46,7 +50,7 @@ struct _GADatasetInMemoryFragmentClass GADatasetFragmentClass parent_class; }; -GARROW_AVAILABLE_IN_4_0 +GADATASET_AVAILABLE_IN_4_0 GADatasetInMemoryFragment * gadataset_in_memory_fragment_new(GArrowSchema *schema, GArrowRecordBatch **record_batches, diff --git a/c_glib/arrow-dataset-glib/meson.build b/c_glib/arrow-dataset-glib/meson.build index 0c869a4183efa..686129dbe2fc0 100644 --- a/c_glib/arrow-dataset-glib/meson.build +++ b/c_glib/arrow-dataset-glib/meson.build @@ -17,6 +17,8 @@ # specific language governing permissions and limitations # under the License. +project_name = 'arrow-dataset-glib' + sources = files( 'dataset-factory.cpp', 'dataset.cpp', @@ -47,20 +49,27 @@ cpp_headers = files( 'scanner.hpp', ) +version_h = configure_file( + input: 'version.h.in', + output: 'version.h', + command: [python3, generate_version_header_py, '--library', 'GADATASET', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'], +) + +c_headers += version_h + enums = gnome.mkenums('enums', sources: c_headers, identifier_prefix: 'GADataset', symbol_prefix: 'gadataset', c_template: 'enums.c.template', h_template: 'enums.h.template', - install_dir: join_paths(include_dir, 'arrow-dataset-glib'), + install_dir: join_paths(include_dir, project_name), install_header: true) enums_source = enums[0] enums_header = enums[1] - headers = c_headers + cpp_headers -install_headers(headers, subdir: 'arrow-dataset-glib') +install_headers(headers, subdir: project_name) dependencies = [ arrow_dataset, diff --git a/c_glib/arrow-dataset-glib/partitioning.h b/c_glib/arrow-dataset-glib/partitioning.h index ccf476272e429..7671958d88e61 100644 --- a/c_glib/arrow-dataset-glib/partitioning.h +++ b/c_glib/arrow-dataset-glib/partitioning.h @@ -21,6 +21,8 @@ #include +#include + G_BEGIN_DECLS /** @@ -39,6 +41,7 @@ typedef enum { #define GADATASET_TYPE_PARTITIONING_FACTORY_OPTIONS \ (gadataset_partitioning_factory_options_get_type()) +GADATASET_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GADatasetPartitioningFactoryOptions, gadataset_partitioning_factory_options, GADATASET, @@ -49,11 +52,12 @@ struct _GADatasetPartitioningFactoryOptionsClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_11_0 +GADATASET_AVAILABLE_IN_11_0 GADatasetPartitioningFactoryOptions * gadataset_partitioning_factory_options_new(void); #define GADATASET_TYPE_PARTITIONING (gadataset_partitioning_get_type()) +GADATASET_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GADatasetPartitioning, gadataset_partitioning, GADATASET, PARTITIONING, GObject) struct _GADatasetPartitioningClass @@ -61,16 +65,17 @@ struct _GADatasetPartitioningClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 gchar * gadataset_partitioning_get_type_name(GADatasetPartitioning *partitioning); -GARROW_AVAILABLE_IN_12_0 +GADATASET_AVAILABLE_IN_12_0 GADatasetPartitioning * gadataset_partitioning_create_default(void); #define GADATASET_TYPE_KEY_VALUE_PARTITIONING_OPTIONS \ (gadataset_key_value_partitioning_options_get_type()) +GADATASET_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GADatasetKeyValuePartitioningOptions, gadataset_key_value_partitioning_options, GADATASET, @@ -81,12 +86,13 @@ struct _GADatasetKeyValuePartitioningOptionsClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_11_0 +GADATASET_AVAILABLE_IN_11_0 GADatasetKeyValuePartitioningOptions * gadataset_key_value_partitioning_options_new(void); #define GADATASET_TYPE_KEY_VALUE_PARTITIONING \ (gadataset_key_value_partitioning_get_type()) +GADATASET_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GADatasetKeyValuePartitioning, gadataset_key_value_partitioning, GADATASET, @@ -99,6 +105,7 @@ struct _GADatasetKeyValuePartitioningClass #define GADATASET_TYPE_DIRECTORY_PARTITIONING \ (gadataset_directory_partitioning_get_type()) +GADATASET_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GADatasetDirectoryPartitioning, gadataset_directory_partitioning, GADATASET, @@ -109,7 +116,7 @@ struct _GADatasetDirectoryPartitioningClass GADatasetKeyValuePartitioningClass parent_class; }; -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 GADatasetDirectoryPartitioning * gadataset_directory_partitioning_new(GArrowSchema *schema, GList *dictionaries, @@ -118,6 +125,7 @@ gadataset_directory_partitioning_new(GArrowSchema *schema, #define GADATASET_TYPE_HIVE_PARTITIONING_OPTIONS \ (gadataset_hive_partitioning_options_get_type()) +GADATASET_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GADatasetHivePartitioningOptions, gadataset_hive_partitioning_options, GADATASET, @@ -128,11 +136,12 @@ struct _GADatasetHivePartitioningOptionsClass GADatasetKeyValuePartitioningOptionsClass parent_class; }; -GARROW_AVAILABLE_IN_11_0 +GADATASET_AVAILABLE_IN_11_0 GADatasetHivePartitioningOptions * gadataset_hive_partitioning_options_new(void); #define GADATASET_TYPE_HIVE_PARTITIONING (gadataset_hive_partitioning_get_type()) +GADATASET_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GADatasetHivePartitioning, gadataset_hive_partitioning, GADATASET, @@ -143,13 +152,13 @@ struct _GADatasetHivePartitioningClass GADatasetKeyValuePartitioningClass parent_class; }; -GARROW_AVAILABLE_IN_11_0 +GADATASET_AVAILABLE_IN_11_0 GADatasetHivePartitioning * gadataset_hive_partitioning_new(GArrowSchema *schema, GList *dictionaries, GADatasetHivePartitioningOptions *options, GError **error); -GARROW_AVAILABLE_IN_11_0 +GADATASET_AVAILABLE_IN_11_0 gchar * gadataset_hive_partitioning_get_null_fallback(GADatasetHivePartitioning *partitioning); diff --git a/c_glib/arrow-dataset-glib/scanner.h b/c_glib/arrow-dataset-glib/scanner.h index 3c7432fb268e4..ad462391568a3 100644 --- a/c_glib/arrow-dataset-glib/scanner.h +++ b/c_glib/arrow-dataset-glib/scanner.h @@ -21,21 +21,24 @@ #include #include +#include G_BEGIN_DECLS #define GADATASET_TYPE_SCANNER (gadataset_scanner_get_type()) +GADATASET_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GADatasetScanner, gadataset_scanner, GADATASET, SCANNER, GObject) struct _GADatasetScannerClass { GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 GArrowTable * gadataset_scanner_to_table(GADatasetScanner *scanner, GError **error); #define GADATASET_TYPE_SCANNER_BUILDER (gadataset_scanner_builder_get_type()) +GADATASET_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GADatasetScannerBuilder, gadataset_scanner_builder, GADATASET, SCANNER_BUILDER, GObject) struct _GADatasetScannerBuilderClass @@ -43,20 +46,20 @@ struct _GADatasetScannerBuilderClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 GADatasetScannerBuilder * gadataset_scanner_builder_new(GADatasetDataset *dataset, GError **error); -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 GADatasetScannerBuilder * gadataset_scanner_builder_new_record_batch_reader(GArrowRecordBatchReader *reader); -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 gboolean gadataset_scanner_builder_set_filter(GADatasetScannerBuilder *builder, GArrowExpression *expression, GError **error); -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 GADatasetScanner * gadataset_scanner_builder_finish(GADatasetScannerBuilder *builder, GError **error); diff --git a/c_glib/arrow-dataset-glib/version.h.in b/c_glib/arrow-dataset-glib/version.h.in new file mode 100644 index 0000000000000..47d726c5b7896 --- /dev/null +++ b/c_glib/arrow-dataset-glib/version.h.in @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +/** + * SECTION: version + * @section_id: version-macros + * @title: Version related macros + * @include: arrow-dataset-glib/arrow-dataset-glib.h + * + * Apache Arrow Dataset GLib provides macros that can be used by C pre-processor. + * They are useful to check version related things at compile time. + */ + +/** + * GADATASET_VERSION_MAJOR: + * + * The major version. + * + * Since: 17.0.0 + */ +#define GADATASET_VERSION_MAJOR (@VERSION_MAJOR@) + +/** + * GADATASET_VERSION_MINOR: + * + * The minor version. + * + * Since: 17.0.0 + */ +#define GADATASET_VERSION_MINOR (@VERSION_MINOR@) + +/** + * GADATASET_VERSION_MICRO: + * + * The micro version. + * + * Since: 17.0.0 + */ +#define GADATASET_VERSION_MICRO (@VERSION_MICRO@) + +/** + * GADATASET_VERSION_TAG: + * + * The version tag. Normally, it's an empty string. It's "SNAPSHOT" + * for snapshot version. + * + * Since: 17.0.0 + */ +#define GADATASET_VERSION_TAG "@VERSION_TAG@" + +/** + * GADATASET_VERSION_CHECK: + * @major: A major version to check for. + * @minor: A minor version to check for. + * @micro: A micro version to check for. + * + * You can use this macro in C pre-processor. + * + * Returns: %TRUE if the compile time Apache Arrow GLib version is the + * same as or newer than the passed version, %FALSE otherwise. + * + * Since: 17.0.0 + */ +#define GADATASET_VERSION_CHECK(major, minor, micro) \ + (GADATASET_VERSION_MAJOR > (major) || \ + (GADATASET_VERSION_MAJOR == (major) && \ + GADATASET_VERSION_MINOR > (minor)) || \ + (GADATASET_VERSION_MAJOR == (major) && \ + GADATASET_VERSION_MINOR == (minor) && \ + GADATASET_VERSION_MICRO >= (micro))) + +/** + * GADATASET_DISABLE_DEPRECATION_WARNINGS: + * + * If this macro is defined, no deprecated warnings are produced. + * + * You must define this macro before including the + * arrow-glib/arrow-glib.h header. + * + * Since: 17.0.0 + */ + +#ifdef GADATASET_DISABLE_DEPRECATION_WARNINGS +# define GADATASET_DEPRECATED +# define GADATASET_DEPRECATED_FOR(function) +# define GADATASET_UNAVAILABLE(major, minor) +#else +# define GADATASET_DEPRECATED G_DEPRECATED +# define GADATASET_DEPRECATED_FOR(function) G_DEPRECATED_FOR(function) +# define GADATASET_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor) +#endif + +@ENCODED_VERSIONS@ + +/** + * GADATASET_VERSION_MIN_REQUIRED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GADATASET_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GADATASET_VERSION_MIN_REQUIRED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * arrow-dataset-glib/arrow-dataset-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GADATASET_VERSION_MIN_REQUIRED +# define GADATASET_VERSION_MIN_REQUIRED GARROW_VERSION_MIN_REQUIRED +#endif + +/** + * GADATASET_VERSION_MAX_ALLOWED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GADATASET_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GADATASET_VERSION_MAX_ALLOWED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * arrow-dataset-glib/arrow-dataset-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GADATASET_VERSION_MAX_ALLOWED +# define GADATASET_VERSION_MAX_ALLOWED GARROW_VERSION_MAX_ALLOWED +#endif + +@AVAILABILITY_MACROS@ diff --git a/c_glib/arrow-flight-glib/arrow-flight-glib.h b/c_glib/arrow-flight-glib/arrow-flight-glib.h index 6fc8f43d8406e..7e973dd125dd4 100644 --- a/c_glib/arrow-flight-glib/arrow-flight-glib.h +++ b/c_glib/arrow-flight-glib/arrow-flight-glib.h @@ -19,6 +19,8 @@ #pragma once +#include + #include #include #include diff --git a/c_glib/arrow-flight-glib/client.h b/c_glib/arrow-flight-glib/client.h index f67d58371d583..a91bbe55e3c04 100644 --- a/c_glib/arrow-flight-glib/client.h +++ b/c_glib/arrow-flight-glib/client.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GAFLIGHT_TYPE_STREAM_READER (gaflight_stream_reader_get_type()) +GAFLIGHT_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GAFlightStreamReader, gaflight_stream_reader, GAFLIGHT, @@ -35,6 +36,7 @@ struct _GAFlightStreamReaderClass }; #define GAFLIGHT_TYPE_CALL_OPTIONS (gaflight_call_options_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GAFlightCallOptions, gaflight_call_options, GAFLIGHT, CALL_OPTIONS, GObject) struct _GAFlightCallOptionsClass @@ -42,25 +44,26 @@ struct _GAFlightCallOptionsClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightCallOptions * gaflight_call_options_new(void); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHT_AVAILABLE_IN_9_0 void gaflight_call_options_add_header(GAFlightCallOptions *options, const gchar *name, const gchar *value); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHT_AVAILABLE_IN_9_0 void gaflight_call_options_clear_headers(GAFlightCallOptions *options); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHT_AVAILABLE_IN_9_0 void gaflight_call_options_foreach_header(GAFlightCallOptions *options, GAFlightHeaderFunc func, gpointer user_data); #define GAFLIGHT_TYPE_CLIENT_OPTIONS (gaflight_client_options_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GAFlightClientOptions, gaflight_client_options, GAFLIGHT, CLIENT_OPTIONS, GObject) struct _GAFlightClientOptionsClass @@ -68,28 +71,29 @@ struct _GAFlightClientOptionsClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightClientOptions * gaflight_client_options_new(void); #define GAFLIGHT_TYPE_CLIENT (gaflight_client_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightClient, gaflight_client, GAFLIGHT, CLIENT, GObject) struct _GAFlightClientClass { GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightClient * gaflight_client_new(GAFlightLocation *location, GAFlightClientOptions *options, GError **error); -GARROW_AVAILABLE_IN_8_0 +GAFLIGHT_AVAILABLE_IN_8_0 gboolean gaflight_client_close(GAFlightClient *client, GError **error); -GARROW_AVAILABLE_IN_12_0 +GAFLIGHT_AVAILABLE_IN_12_0 gboolean gaflight_client_authenticate_basic_token(GAFlightClient *client, const gchar *user, @@ -99,21 +103,21 @@ gaflight_client_authenticate_basic_token(GAFlightClient *client, gchar **bearer_value, GError **error); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GList * gaflight_client_list_flights(GAFlightClient *client, GAFlightCriteria *criteria, GAFlightCallOptions *options, GError **error); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHT_AVAILABLE_IN_9_0 GAFlightInfo * gaflight_client_get_flight_info(GAFlightClient *client, GAFlightDescriptor *descriptor, GAFlightCallOptions *options, GError **error); -GARROW_AVAILABLE_IN_6_0 +GAFLIGHT_AVAILABLE_IN_6_0 GAFlightStreamReader * gaflight_client_do_get(GAFlightClient *client, GAFlightTicket *ticket, diff --git a/c_glib/arrow-flight-glib/common.h b/c_glib/arrow-flight-glib/common.h index fcb23b1885ea7..b1d89f79c357e 100644 --- a/c_glib/arrow-flight-glib/common.h +++ b/c_glib/arrow-flight-glib/common.h @@ -21,6 +21,8 @@ #include +#include + G_BEGIN_DECLS typedef void (*GAFlightHeaderFunc)(const gchar *name, @@ -28,40 +30,43 @@ typedef void (*GAFlightHeaderFunc)(const gchar *name, gpointer user_data); #define GAFLIGHT_TYPE_CRITERIA (gaflight_criteria_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightCriteria, gaflight_criteria, GAFLIGHT, CRITERIA, GObject) struct _GAFlightCriteriaClass { GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightCriteria * gaflight_criteria_new(GBytes *expression); #define GAFLIGHT_TYPE_LOCATION (gaflight_location_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightLocation, gaflight_location, GAFLIGHT, LOCATION, GObject) struct _GAFlightLocationClass { GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightLocation * gaflight_location_new(const gchar *uri, GError **error); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gchar * gaflight_location_to_string(GAFlightLocation *location); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gchar * gaflight_location_get_scheme(GAFlightLocation *location); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gboolean gaflight_location_equal(GAFlightLocation *location, GAFlightLocation *other_location); #define GAFLIGHT_TYPE_DESCRIPTOR (gaflight_descriptor_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GAFlightDescriptor, gaflight_descriptor, GAFLIGHT, DESCRIPTOR, GObject) struct _GAFlightDescriptorClass @@ -69,16 +74,17 @@ struct _GAFlightDescriptorClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gchar * gaflight_descriptor_to_string(GAFlightDescriptor *descriptor); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gboolean gaflight_descriptor_equal(GAFlightDescriptor *descriptor, GAFlightDescriptor *other_descriptor); #define GAFLIGHT_TYPE_PATH_DESCRIPTOR (gaflight_path_descriptor_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightPathDescriptor, gaflight_path_descriptor, GAFLIGHT, @@ -89,15 +95,16 @@ struct _GAFlightPathDescriptorClass GAFlightDescriptorClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightPathDescriptor * gaflight_path_descriptor_new(const gchar **paths, gsize n_paths); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gchar ** gaflight_path_descriptor_get_paths(GAFlightPathDescriptor *descriptor); #define GAFLIGHT_TYPE_COMMAND_DESCRIPTOR (gaflight_command_descriptor_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightCommandDescriptor, gaflight_command_descriptor, GAFLIGHT, @@ -108,56 +115,59 @@ struct _GAFlightCommandDescriptorClass GAFlightDescriptorClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightCommandDescriptor * gaflight_command_descriptor_new(const gchar *command); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gchar * gaflight_command_descriptor_get_command(GAFlightCommandDescriptor *descriptor); #define GAFLIGHT_TYPE_TICKET (gaflight_ticket_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightTicket, gaflight_ticket, GAFLIGHT, TICKET, GObject) struct _GAFlightTicketClass { GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightTicket * gaflight_ticket_new(GBytes *data); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gboolean gaflight_ticket_equal(GAFlightTicket *ticket, GAFlightTicket *other_ticket); #define GAFLIGHT_TYPE_ENDPOINT (gaflight_endpoint_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightEndpoint, gaflight_endpoint, GAFLIGHT, ENDPOINT, GObject) struct _GAFlightEndpointClass { GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightEndpoint * gaflight_endpoint_new(GAFlightTicket *ticket, GList *locations); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gboolean gaflight_endpoint_equal(GAFlightEndpoint *endpoint, GAFlightEndpoint *other_endpoint); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GList * gaflight_endpoint_get_locations(GAFlightEndpoint *endpoint); #define GAFLIGHT_TYPE_INFO (gaflight_info_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightInfo, gaflight_info, GAFLIGHT, INFO, GObject) struct _GAFlightInfoClass { GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightInfo * gaflight_info_new(GArrowSchema *schema, GAFlightDescriptor *descriptor, @@ -166,27 +176,28 @@ gaflight_info_new(GArrowSchema *schema, gint64 total_bytes, GError **error); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gboolean gaflight_info_equal(GAFlightInfo *info, GAFlightInfo *other_info); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GArrowSchema * gaflight_info_get_schema(GAFlightInfo *info, GArrowReadOptions *options, GError **error); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightDescriptor * gaflight_info_get_descriptor(GAFlightInfo *info); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GList * gaflight_info_get_endpoints(GAFlightInfo *info); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gint64 gaflight_info_get_total_records(GAFlightInfo *info); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gint64 gaflight_info_get_total_bytes(GAFlightInfo *info); #define GAFLIGHT_TYPE_STREAM_CHUNK (gaflight_stream_chunk_get_type()) +GAFLIGHT_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GAFlightStreamChunk, gaflight_stream_chunk, GAFLIGHT, STREAM_CHUNK, GObject) struct _GAFlightStreamChunkClass @@ -194,14 +205,15 @@ struct _GAFlightStreamChunkClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_6_0 +GAFLIGHT_AVAILABLE_IN_6_0 GArrowRecordBatch * gaflight_stream_chunk_get_data(GAFlightStreamChunk *chunk); -GARROW_AVAILABLE_IN_6_0 +GAFLIGHT_AVAILABLE_IN_6_0 GArrowBuffer * gaflight_stream_chunk_get_metadata(GAFlightStreamChunk *chunk); #define GAFLIGHT_TYPE_RECORD_BATCH_READER (gaflight_record_batch_reader_get_type()) +GAFLIGHT_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GAFlightRecordBatchReader, gaflight_record_batch_reader, GAFLIGHT, @@ -212,11 +224,11 @@ struct _GAFlightRecordBatchReaderClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_6_0 +GAFLIGHT_AVAILABLE_IN_6_0 GAFlightStreamChunk * gaflight_record_batch_reader_read_next(GAFlightRecordBatchReader *reader, GError **error); -GARROW_AVAILABLE_IN_6_0 +GAFLIGHT_AVAILABLE_IN_6_0 GArrowTable * gaflight_record_batch_reader_read_all(GAFlightRecordBatchReader *reader, GError **error); diff --git a/c_glib/arrow-flight-glib/meson.build b/c_glib/arrow-flight-glib/meson.build index 70db7400b124a..2d684a4ee361e 100644 --- a/c_glib/arrow-flight-glib/meson.build +++ b/c_glib/arrow-flight-glib/meson.build @@ -37,6 +37,14 @@ cpp_headers = files( 'server.hpp', ) +version_h = configure_file( + input: 'version.h.in', + output: 'version.h', + command: [python3, generate_version_header_py, '--library', 'GAFLIGHT', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'], +) + +c_headers += version_h + headers = c_headers + cpp_headers install_headers(headers, subdir: 'arrow-flight-glib') diff --git a/c_glib/arrow-flight-glib/server.h b/c_glib/arrow-flight-glib/server.h index 89f5a0a596e9e..7e594febb172f 100644 --- a/c_glib/arrow-flight-glib/server.h +++ b/c_glib/arrow-flight-glib/server.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GAFLIGHT_TYPE_DATA_STREAM (gaflight_data_stream_get_type()) +GAFLIGHT_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GAFlightDataStream, gaflight_data_stream, GAFLIGHT, DATA_STREAM, GObject) struct _GAFlightDataStreamClass @@ -32,6 +33,7 @@ struct _GAFlightDataStreamClass }; #define GAFLIGHT_TYPE_RECORD_BATCH_STREAM (gaflight_record_batch_stream_get_type()) +GAFLIGHT_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GAFlightRecordBatchStream, gaflight_record_batch_stream, GAFLIGHT, @@ -42,12 +44,13 @@ struct _GAFlightRecordBatchStreamClass GAFlightDataStreamClass parent_class; }; -GARROW_AVAILABLE_IN_6_0 +GAFLIGHT_AVAILABLE_IN_6_0 GAFlightRecordBatchStream * gaflight_record_batch_stream_new(GArrowRecordBatchReader *reader, GArrowWriteOptions *options); #define GAFLIGHT_TYPE_MESSAGE_READER (gaflight_message_reader_get_type()) +GAFLIGHT_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE(GAFlightMessageReader, gaflight_message_reader, GAFLIGHT, @@ -58,11 +61,12 @@ struct _GAFlightMessageReaderClass GAFlightRecordBatchReaderClass parent_class; }; -GARROW_AVAILABLE_IN_14_0 +GAFLIGHT_AVAILABLE_IN_14_0 GAFlightDescriptor * gaflight_message_reader_get_descriptor(GAFlightMessageReader *reader); #define GAFLIGHT_TYPE_SERVER_CALL_CONTEXT (gaflight_server_call_context_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightServerCallContext, gaflight_server_call_context, GAFLIGHT, @@ -73,13 +77,14 @@ struct _GAFlightServerCallContextClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_14_0 +GAFLIGHT_AVAILABLE_IN_14_0 void gaflight_server_call_context_foreach_incoming_header(GAFlightServerCallContext *context, GAFlightHeaderFunc func, gpointer user_data); #define GAFLIGHT_TYPE_SERVER_AUTH_SENDER (gaflight_server_auth_sender_get_type()) +GAFLIGHT_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GAFlightServerAuthSender, gaflight_server_auth_sender, GAFLIGHT, @@ -90,13 +95,14 @@ struct _GAFlightServerAuthSenderClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_12_0 +GAFLIGHT_AVAILABLE_IN_12_0 gboolean gaflight_server_auth_sender_write(GAFlightServerAuthSender *sender, GBytes *message, GError **error); #define GAFLIGHT_TYPE_SERVER_AUTH_READER (gaflight_server_auth_reader_get_type()) +GAFLIGHT_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GAFlightServerAuthReader, gaflight_server_auth_reader, GAFLIGHT, @@ -107,11 +113,12 @@ struct _GAFlightServerAuthReaderClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_12_0 +GAFLIGHT_AVAILABLE_IN_12_0 GBytes * gaflight_server_auth_reader_read(GAFlightServerAuthReader *reader, GError **error); #define GAFLIGHT_TYPE_SERVER_AUTH_HANDLER (gaflight_server_auth_handler_get_type()) +GAFLIGHT_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GAFlightServerAuthHandler, gaflight_server_auth_handler, GAFLIGHT, @@ -124,6 +131,7 @@ struct _GAFlightServerAuthHandlerClass #define GAFLIGHT_TYPE_SERVER_CUSTOM_AUTH_HANDLER \ (gaflight_server_custom_auth_handler_get_type()) +GAFLIGHT_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GAFlightServerCustomAuthHandler, gaflight_server_custom_auth_handler, GAFLIGHT, @@ -152,7 +160,7 @@ struct _GAFlightServerCustomAuthHandlerClass GError **error); }; -GARROW_AVAILABLE_IN_12_0 +GAFLIGHT_AVAILABLE_IN_12_0 void gaflight_server_custom_auth_handler_authenticate(GAFlightServerCustomAuthHandler *handler, GAFlightServerCallContext *context, @@ -160,7 +168,7 @@ gaflight_server_custom_auth_handler_authenticate(GAFlightServerCustomAuthHandler GAFlightServerAuthReader *reader, GError **error); -GARROW_AVAILABLE_IN_12_0 +GAFLIGHT_AVAILABLE_IN_12_0 GBytes * gaflight_server_custom_auth_handler_is_valid(GAFlightServerCustomAuthHandler *handler, GAFlightServerCallContext *context, @@ -168,6 +176,7 @@ gaflight_server_custom_auth_handler_is_valid(GAFlightServerCustomAuthHandler *ha GError **error); #define GAFLIGHT_TYPE_SERVER_OPTIONS (gaflight_server_options_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GAFlightServerOptions, gaflight_server_options, GAFLIGHT, SERVER_OPTIONS, GObject) struct _GAFlightServerOptionsClass @@ -175,14 +184,16 @@ struct _GAFlightServerOptionsClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightServerOptions * gaflight_server_options_new(GAFlightLocation *location); #define GAFLIGHT_TYPE_SERVABLE (gaflight_servable_get_type()) +GAFLIGHT_AVAILABLE_IN_9_0 G_DECLARE_INTERFACE(GAFlightServable, gaflight_servable, GAFLIGHT, SERVABLE, GObject) #define GAFLIGHT_TYPE_SERVER (gaflight_server_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightServer, gaflight_server, GAFLIGHT, SERVER, GObject) /** * GAFlightServerClass: @@ -209,34 +220,34 @@ struct _GAFlightServerClass GError **error); }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gboolean gaflight_server_listen(GAFlightServer *server, GAFlightServerOptions *options, GError **error); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gint gaflight_server_get_port(GAFlightServer *server); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gboolean gaflight_server_shutdown(GAFlightServer *server, GError **error); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gboolean gaflight_server_wait(GAFlightServer *server, GError **error); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GList * gaflight_server_list_flights(GAFlightServer *server, GAFlightServerCallContext *context, GAFlightCriteria *criteria, GError **error); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHT_AVAILABLE_IN_9_0 GAFlightInfo * gaflight_server_get_flight_info(GAFlightServer *server, GAFlightServerCallContext *context, GAFlightDescriptor *request, GError **error); -GARROW_AVAILABLE_IN_6_0 +GAFLIGHT_AVAILABLE_IN_6_0 GAFlightDataStream * gaflight_server_do_get(GAFlightServer *server, GAFlightServerCallContext *context, diff --git a/c_glib/arrow-flight-glib/version.h.in b/c_glib/arrow-flight-glib/version.h.in new file mode 100644 index 0000000000000..45e0437ab1e71 --- /dev/null +++ b/c_glib/arrow-flight-glib/version.h.in @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +/** + * SECTION: version + * @section_id: version-macros + * @title: Version related macros + * @include: arrow-flight-glib/arrow-flight-glib.h + * + * Apache Arrow Flight GLib provides macros that can be used by C pre-processor. + * They are useful to check version related things at compile time. + */ + +/** + * GAFLIGHT_VERSION_MAJOR: + * + * The major version. + * + * Since: 17.0.0 + */ +#define GAFLIGHT_VERSION_MAJOR (@VERSION_MAJOR@) + +/** + * GAFLIGHT_VERSION_MINOR: + * + * The minor version. + * + * Since: 17.0.0 + */ +#define GAFLIGHT_VERSION_MINOR (@VERSION_MINOR@) + +/** + * GAFLIGHT_VERSION_MICRO: + * + * The micro version. + * + * Since: 17.0.0 + */ +#define GAFLIGHT_VERSION_MICRO (@VERSION_MICRO@) + +/** + * GAFLIGHT_VERSION_TAG: + * + * The version tag. Normally, it's an empty string. It's "SNAPSHOT" + * for snapshot version. + * + * Since: 17.0.0 + */ +#define GAFLIGHT_VERSION_TAG "@VERSION_TAG@" + +/** + * GAFLIGHT_VERSION_CHECK: + * @major: A major version to check for. + * @minor: A minor version to check for. + * @micro: A micro version to check for. + * + * You can use this macro in C pre-processor. + * + * Returns: %TRUE if the compile time Apache Arrow GLib version is the + * same as or newer than the passed version, %FALSE otherwise. + * + * Since: 17.0.0 + */ +#define GAFLIGHT_VERSION_CHECK(major, minor, micro) \ + (GAFLIGHT_VERSION_MAJOR > (major) || \ + (GAFLIGHT_VERSION_MAJOR == (major) && \ + GAFLIGHT_VERSION_MINOR > (minor)) || \ + (GAFLIGHT_VERSION_MAJOR == (major) && \ + GAFLIGHT_VERSION_MINOR == (minor) && \ + GAFLIGHT_VERSION_MICRO >= (micro))) + +/** + * GAFLIGHT_DISABLE_DEPRECATION_WARNINGS: + * + * If this macro is defined, no deprecated warnings are produced. + * + * You must define this macro before including the + * arrow-glib/arrow-glib.h header. + * + * Since: 17.0.0 + */ + +#ifdef GAFLIGHT_DISABLE_DEPRECATION_WARNINGS +# define GAFLIGHT_DEPRECATED +# define GAFLIGHT_DEPRECATED_FOR(function) +# define GAFLIGHT_UNAVAILABLE(major, minor) +#else +# define GAFLIGHT_DEPRECATED G_DEPRECATED +# define GAFLIGHT_DEPRECATED_FOR(function) G_DEPRECATED_FOR(function) +# define GAFLIGHT_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor) +#endif + +@ENCODED_VERSIONS@ + +/** + * GAFLIGHT_VERSION_MIN_REQUIRED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GAFLIGHT_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GAFLIGHT_VERSION_MIN_REQUIRED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * arrow-flight-glib/arrow-flight-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GAFLIGHT_VERSION_MIN_REQUIRED +# define GAFLIGHT_VERSION_MIN_REQUIRED GARROW_VERSION_MIN_REQUIRED +#endif + +/** + * GAFLIGHT_VERSION_MAX_ALLOWED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GAFLIGHT_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GAFLIGHT_VERSION_MAX_ALLOWED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * arrow-flight-glib/arrow-flight-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GAFLIGHT_VERSION_MAX_ALLOWED +# define GAFLIGHT_VERSION_MAX_ALLOWED GARROW_VERSION_MAX_ALLOWED +#endif + +@AVAILABILITY_MACROS@ diff --git a/c_glib/arrow-flight-sql-glib/arrow-flight-sql-glib.h b/c_glib/arrow-flight-sql-glib/arrow-flight-sql-glib.h index 8ebe39aee57a8..94e72d06f2b47 100644 --- a/c_glib/arrow-flight-sql-glib/arrow-flight-sql-glib.h +++ b/c_glib/arrow-flight-sql-glib/arrow-flight-sql-glib.h @@ -19,5 +19,7 @@ #pragma once +#include + #include #include diff --git a/c_glib/arrow-flight-sql-glib/client.h b/c_glib/arrow-flight-sql-glib/client.h index 9a5a8987f7195..b9e9baf41a59f 100644 --- a/c_glib/arrow-flight-sql-glib/client.h +++ b/c_glib/arrow-flight-sql-glib/client.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GAFLIGHTSQL_TYPE_PREPARED_STATEMENT (gaflightsql_prepared_statement_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLPreparedStatement, gaflightsql_prepared_statement, GAFLIGHTSQL, @@ -34,52 +37,53 @@ struct _GAFlightSQLPreparedStatementClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GAFlightInfo * gaflightsql_prepared_statement_execute(GAFlightSQLPreparedStatement *statement, GAFlightCallOptions *options, GError **error); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 gint64 gaflightsql_prepared_statement_execute_update(GAFlightSQLPreparedStatement *statement, GAFlightCallOptions *options, GError **error); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GArrowSchema * gaflightsql_prepared_statement_get_parameter_schema( GAFlightSQLPreparedStatement *statement); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GArrowSchema * gaflightsql_prepared_statement_get_dataset_schema( GAFlightSQLPreparedStatement *statement); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 gboolean gaflightsql_prepared_statement_set_record_batch(GAFlightSQLPreparedStatement *statement, GArrowRecordBatch *record_batch, GError **error); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 gboolean gaflightsql_prepared_statement_set_record_batch_reader( GAFlightSQLPreparedStatement *statement, GArrowRecordBatchReader *reader, GError **error); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 gboolean gaflightsql_prepared_statement_close(GAFlightSQLPreparedStatement *statement, GAFlightCallOptions *options, GError **error); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 gboolean gaflightsql_prepared_statement_is_closed(GAFlightSQLPreparedStatement *statement); #define GAFLIGHTSQL_TYPE_CLIENT (gaflightsql_client_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_9_0 G_DECLARE_DERIVABLE_TYPE( GAFlightSQLClient, gaflightsql_client, GAFLIGHTSQL, CLIENT, GObject) struct _GAFlightSQLClientClass @@ -87,32 +91,32 @@ struct _GAFlightSQLClientClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_9_0 +GAFLIGHTSQL_AVAILABLE_IN_9_0 GAFlightSQLClient * gaflightsql_client_new(GAFlightClient *client); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHTSQL_AVAILABLE_IN_9_0 GAFlightInfo * gaflightsql_client_execute(GAFlightSQLClient *client, const gchar *query, GAFlightCallOptions *options, GError **error); -GARROW_AVAILABLE_IN_13_0 +GAFLIGHTSQL_AVAILABLE_IN_13_0 gint64 gaflightsql_client_execute_update(GAFlightSQLClient *client, const gchar *query, GAFlightCallOptions *options, GError **error); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHTSQL_AVAILABLE_IN_9_0 GAFlightStreamReader * gaflightsql_client_do_get(GAFlightSQLClient *client, GAFlightTicket *ticket, GAFlightCallOptions *options, GError **error); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GAFlightSQLPreparedStatement * gaflightsql_client_prepare(GAFlightSQLClient *client, const gchar *query, diff --git a/c_glib/arrow-flight-sql-glib/meson.build b/c_glib/arrow-flight-sql-glib/meson.build index e7abc605bb819..6a7c89224b303 100644 --- a/c_glib/arrow-flight-sql-glib/meson.build +++ b/c_glib/arrow-flight-sql-glib/meson.build @@ -34,6 +34,14 @@ cpp_headers = files( 'server.hpp', ) +version_h = configure_file( + input: 'version.h.in', + output: 'version.h', + command: [python3, generate_version_header_py, '--library', 'GAFLIGHTSQL', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'], +) + +c_headers += version_h + headers = c_headers + cpp_headers install_headers(headers, subdir: 'arrow-flight-sql-glib') diff --git a/c_glib/arrow-flight-sql-glib/server.h b/c_glib/arrow-flight-sql-glib/server.h index d6fd7e4d10394..8cf0aace77644 100644 --- a/c_glib/arrow-flight-sql-glib/server.h +++ b/c_glib/arrow-flight-sql-glib/server.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GAFLIGHTSQL_TYPE_COMMAND (gaflightsql_command_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_9_0 G_DECLARE_DERIVABLE_TYPE( GAFlightSQLCommand, gaflightsql_command, GAFLIGHTSQL, COMMAND, GObject) struct _GAFlightSQLCommandClass @@ -32,6 +35,7 @@ struct _GAFlightSQLCommandClass }; #define GAFLIGHTSQL_TYPE_STATEMENT_QUERY (gaflightsql_statement_query_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_9_0 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLStatementQuery, gaflightsql_statement_query, GAFLIGHTSQL, @@ -42,11 +46,12 @@ struct _GAFlightSQLStatementQueryClass GAFlightSQLCommandClass parent_class; }; -GARROW_AVAILABLE_IN_9_0 +GAFLIGHTSQL_AVAILABLE_IN_9_0 const gchar * gaflightsql_statement_query_get_query(GAFlightSQLStatementQuery *command); #define GAFLIGHTSQL_TYPE_STATEMENT_UPDATE (gaflightsql_statement_update_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_13_0 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLStatementUpdate, gaflightsql_statement_update, GAFLIGHTSQL, @@ -57,12 +62,13 @@ struct _GAFlightSQLStatementUpdateClass GAFlightSQLCommandClass parent_class; }; -GARROW_AVAILABLE_IN_13_0 +GAFLIGHTSQL_AVAILABLE_IN_13_0 const gchar * gaflightsql_statement_update_get_query(GAFlightSQLStatementUpdate *command); #define GAFLIGHTSQL_TYPE_PREPARED_STATEMENT_UPDATE \ (gaflightsql_prepared_statement_update_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLPreparedStatementUpdate, gaflightsql_prepared_statement_update, GAFLIGHTSQL, @@ -73,13 +79,14 @@ struct _GAFlightSQLPreparedStatementUpdateClass GAFlightSQLCommandClass parent_class; }; -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GBytes * gaflightsql_prepared_statement_update_get_handle( GAFlightSQLPreparedStatementUpdate *command); #define GAFLIGHTSQL_TYPE_STATEMENT_QUERY_TICKET \ (gaflightsql_statement_query_ticket_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_9_0 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLStatementQueryTicket, gaflightsql_statement_query_ticket, GAFLIGHTSQL, @@ -90,15 +97,16 @@ struct _GAFlightSQLStatementQueryTicketClass GAFlightSQLCommandClass parent_class; }; -GARROW_AVAILABLE_IN_9_0 +GAFLIGHTSQL_AVAILABLE_IN_9_0 GBytes * gaflightsql_statement_query_ticket_generate_handle(const gchar *query, GError **error); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHTSQL_AVAILABLE_IN_9_0 GBytes * gaflightsql_statement_query_ticket_get_handle(GAFlightSQLStatementQueryTicket *command); #define GAFLIGHTSQL_TYPE_CREATE_PREPARED_STATEMENT_REQUEST \ (gaflightsql_create_prepared_statement_request_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLCreatePreparedStatementRequest, gaflightsql_create_prepared_statement_request, GAFLIGHTSQL, @@ -109,18 +117,19 @@ struct _GAFlightSQLCreatePreparedStatementRequestClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 const gchar * gaflightsql_create_prepared_statement_request_get_query( GAFlightSQLCreatePreparedStatementRequest *request); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 const gchar * gaflightsql_create_prepared_statement_request_get_transaction_id( GAFlightSQLCreatePreparedStatementRequest *request); #define GAFLIGHTSQL_TYPE_CREATE_PREPARED_STATEMENT_RESULT \ (gaflightsql_create_prepared_statement_result_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLCreatePreparedStatementResult, gaflightsql_create_prepared_statement_result, GAFLIGHTSQL, @@ -131,36 +140,37 @@ struct _GAFlightSQLCreatePreparedStatementResultClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GAFlightSQLCreatePreparedStatementResult * gaflightsql_create_prepared_statement_result_new(void); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 void gaflightsql_create_prepared_statement_result_set_dataset_schema( GAFlightSQLCreatePreparedStatementResult *result, GArrowSchema *schema); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GArrowSchema * gaflightsql_create_prepared_statement_result_get_dataset_schema( GAFlightSQLCreatePreparedStatementResult *result); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 void gaflightsql_create_prepared_statement_result_set_parameter_schema( GAFlightSQLCreatePreparedStatementResult *result, GArrowSchema *schema); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GArrowSchema * gaflightsql_create_prepared_statement_result_get_parameter_schema( GAFlightSQLCreatePreparedStatementResult *result); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 void gaflightsql_create_prepared_statement_result_set_handle( GAFlightSQLCreatePreparedStatementResult *result, GBytes *handle); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GBytes * gaflightsql_create_prepared_statement_result_get_handle( GAFlightSQLCreatePreparedStatementResult *result); #define GAFLIGHTSQL_TYPE_CLOSE_PREPARED_STATEMENT_REQUEST \ (gaflightsql_close_prepared_statement_request_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLClosePreparedStatementRequest, gaflightsql_close_prepared_statement_request, GAFLIGHTSQL, @@ -171,12 +181,13 @@ struct _GAFlightSQLClosePreparedStatementRequestClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GBytes * gaflightsql_close_prepared_statement_request_get_handle( GAFlightSQLClosePreparedStatementRequest *request); #define GAFLIGHTSQL_TYPE_SERVER (gaflightsql_server_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_9_0 G_DECLARE_DERIVABLE_TYPE( GAFlightSQLServer, gaflightsql_server, GAFLIGHTSQL, SERVER, GAFlightServer) /** @@ -231,27 +242,27 @@ struct _GAFlightSQLServerClass GError **error); }; -GARROW_AVAILABLE_IN_9_0 +GAFLIGHTSQL_AVAILABLE_IN_9_0 GAFlightInfo * gaflightsql_server_get_flight_info_statement(GAFlightSQLServer *server, GAFlightServerCallContext *context, GAFlightSQLStatementQuery *command, GAFlightDescriptor *descriptor, GError **error); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHTSQL_AVAILABLE_IN_9_0 GAFlightDataStream * gaflightsql_server_do_get_statement(GAFlightSQLServer *server, GAFlightServerCallContext *context, GAFlightSQLStatementQueryTicket *ticket, GError **error); -GARROW_AVAILABLE_IN_13_0 +GAFLIGHTSQL_AVAILABLE_IN_13_0 gint64 gaflightsql_server_do_put_command_statement_update(GAFlightSQLServer *server, GAFlightServerCallContext *context, GAFlightSQLStatementUpdate *command, GError **error); -/* We can restore this after we bump version to 14.0.0-SNAPSHOT. */ -/* GARROW_AVAILABLE_IN_14_0 */ + +GAFLIGHTSQL_AVAILABLE_IN_14_0 gint64 gaflightsql_server_do_put_prepared_statement_update( GAFlightSQLServer *server, @@ -259,16 +270,16 @@ gaflightsql_server_do_put_prepared_statement_update( GAFlightSQLPreparedStatementUpdate *command, GAFlightMessageReader *reader, GError **error); -/* We can restore this after we bump version to 14.0.0-SNAPSHOT. */ -/* GARROW_AVAILABLE_IN_14_0 */ + +GAFLIGHTSQL_AVAILABLE_IN_14_0 GAFlightSQLCreatePreparedStatementResult * gaflightsql_server_create_prepared_statement( GAFlightSQLServer *server, GAFlightServerCallContext *context, GAFlightSQLCreatePreparedStatementRequest *request, GError **error); -/* We can restore this after we bump version to 14.0.0-SNAPSHOT. */ -/* GARROW_AVAILABLE_IN_14_0 */ + +GAFLIGHTSQL_AVAILABLE_IN_14_0 void gaflightsql_server_close_prepared_statement( GAFlightSQLServer *server, diff --git a/c_glib/arrow-flight-sql-glib/version.h.in b/c_glib/arrow-flight-sql-glib/version.h.in new file mode 100644 index 0000000000000..3ff707983b307 --- /dev/null +++ b/c_glib/arrow-flight-sql-glib/version.h.in @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +/** + * SECTION: version + * @section_id: version-macros + * @title: Version related macros + * @include: arrow-flight-sql-glib/arrow-flight-sql-glib.h + * + * Apache Arrow Flight SQL GLib provides macros that can be used by C pre-processor. + * They are useful to check version related things at compile time. + */ + +/** + * GAFLIGHTSQL_VERSION_MAJOR: + * + * The major version. + * + * Since: 17.0.0 + */ +#define GAFLIGHTSQL_VERSION_MAJOR (@VERSION_MAJOR@) + +/** + * GAFLIGHTSQL_VERSION_MINOR: + * + * The minor version. + * + * Since: 17.0.0 + */ +#define GAFLIGHTSQL_VERSION_MINOR (@VERSION_MINOR@) + +/** + * GAFLIGHTSQL_VERSION_MICRO: + * + * The micro version. + * + * Since: 17.0.0 + */ +#define GAFLIGHTSQL_VERSION_MICRO (@VERSION_MICRO@) + +/** + * GAFLIGHTSQL_VERSION_TAG: + * + * The version tag. Normally, it's an empty string. It's "SNAPSHOT" + * for snapshot version. + * + * Since: 17.0.0 + */ +#define GAFLIGHTSQL_VERSION_TAG "@VERSION_TAG@" + +/** + * GAFLIGHTSQL_VERSION_CHECK: + * @major: A major version to check for. + * @minor: A minor version to check for. + * @micro: A micro version to check for. + * + * You can use this macro in C pre-processor. + * + * Returns: %TRUE if the compile time Apache Arrow GLib version is the + * same as or newer than the passed version, %FALSE otherwise. + * + * Since: 17.0.0 + */ +#define GAFLIGHTSQL_VERSION_CHECK(major, minor, micro) \ + (GAFLIGHTSQL_VERSION_MAJOR > (major) || \ + (GAFLIGHTSQL_VERSION_MAJOR == (major) && \ + GAFLIGHTSQL_VERSION_MINOR > (minor)) || \ + (GAFLIGHTSQL_VERSION_MAJOR == (major) && \ + GAFLIGHTSQL_VERSION_MINOR == (minor) && \ + GAFLIGHTSQL_VERSION_MICRO >= (micro))) + +/** + * GAFLIGHTSQL_DISABLE_DEPRECATION_WARNINGS: + * + * If this macro is defined, no deprecated warnings are produced. + * + * You must define this macro before including the + * arrow-glib/arrow-glib.h header. + * + * Since: 17.0.0 + */ + +#ifdef GAFLIGHTSQL_DISABLE_DEPRECATION_WARNINGS +# define GAFLIGHTSQL_DEPRECATED +# define GAFLIGHTSQL_DEPRECATED_FOR(function) +# define GAFLIGHTSQL_UNAVAILABLE(major, minor) +#else +# define GAFLIGHTSQL_DEPRECATED G_DEPRECATED +# define GAFLIGHTSQL_DEPRECATED_FOR(function) G_DEPRECATED_FOR(function) +# define GAFLIGHTSQL_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor) +#endif + +@ENCODED_VERSIONS@ + +/** + * GAFLIGHTSQL_VERSION_MIN_REQUIRED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GAFLIGHTSQL_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GAFLIGHTSQL_VERSION_MIN_REQUIRED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * arrow-flight-sql-glib/arrow-flight-sql-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GAFLIGHTSQL_VERSION_MIN_REQUIRED +# define GAFLIGHTSQL_VERSION_MIN_REQUIRED GARROW_VERSION_MIN_REQUIRED +#endif + +/** + * GAFLIGHTSQL_VERSION_MAX_ALLOWED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GAFLIGHTSQL_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GAFLIGHTSQL_VERSION_MAX_ALLOWED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * arrow-flight-sql-glib/arrow-flight-sql-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GAFLIGHTSQL_VERSION_MAX_ALLOWED +# define GAFLIGHTSQL_VERSION_MAX_ALLOWED GARROW_VERSION_MAX_ALLOWED +#endif + +@AVAILABILITY_MACROS@ diff --git a/c_glib/arrow-glib/meson.build b/c_glib/arrow-glib/meson.build index 67909ff22c428..11fe8f61a3c2d 100644 --- a/c_glib/arrow-glib/meson.build +++ b/c_glib/arrow-glib/meson.build @@ -205,14 +205,12 @@ cpp_internal_headers = files( 'internal-index.hpp', ) -version_h_conf = configuration_data() -version_h_conf.set('GARROW_VERSION_MAJOR', version_major) -version_h_conf.set('GARROW_VERSION_MINOR', version_minor) -version_h_conf.set('GARROW_VERSION_MICRO', version_micro) -version_h_conf.set('GARROW_VERSION_TAG', version_tag) -version_h = configure_file(input: 'version.h.in', - output: 'version.h', - configuration: version_h_conf) +version_h = configure_file( + input: 'version.h.in', + output: 'version.h', + command: [python3, generate_version_header_py, '--library', 'GARROW', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'], +) + c_headers += version_h enums = gnome.mkenums('enums', @@ -226,11 +224,9 @@ enums = gnome.mkenums('enums', enums_source = enums[0] enums_header = enums[1] - headers = c_headers + cpp_headers install_headers(headers, subdir: meson.project_name()) - gobject = dependency('gobject-2.0') gobject_libdir = gobject.get_variable(pkgconfig: 'libdir') # This is for Homebrew. "pkg-config --cflags gio-2.0" includes the diff --git a/c_glib/arrow-glib/version.h.in b/c_glib/arrow-glib/version.h.in index a83c68a2a16dc..fe90afb223903 100644 --- a/c_glib/arrow-glib/version.h.in +++ b/c_glib/arrow-glib/version.h.in @@ -19,6 +19,8 @@ #pragma once +#include + /** * SECTION: version * @section_id: version-macros @@ -36,7 +38,7 @@ * * Since: 0.10.0 */ -#define GARROW_VERSION_MAJOR (@GARROW_VERSION_MAJOR@) +#define GARROW_VERSION_MAJOR (@VERSION_MAJOR@) /** * GARROW_VERSION_MINOR: @@ -45,7 +47,7 @@ * * Since: 0.10.0 */ -#define GARROW_VERSION_MINOR (@GARROW_VERSION_MINOR@) +#define GARROW_VERSION_MINOR (@VERSION_MINOR@) /** * GARROW_VERSION_MICRO: @@ -54,7 +56,7 @@ * * Since: 0.10.0 */ -#define GARROW_VERSION_MICRO (@GARROW_VERSION_MICRO@) +#define GARROW_VERSION_MICRO (@VERSION_MICRO@) /** * GARROW_VERSION_TAG: @@ -64,7 +66,7 @@ * * Since: 0.10.0 */ -#define GARROW_VERSION_TAG "@GARROW_VERSION_TAG@" +#define GARROW_VERSION_TAG "@VERSION_TAG@" /** * GARROW_VERSION_CHECK: @@ -108,212 +110,7 @@ # define GARROW_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor) #endif -/** - * GARROW_VERSION_16_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 16.0.0 - */ -#define GARROW_VERSION_16_0 G_ENCODE_VERSION(16, 0) - -/** - * GARROW_VERSION_15_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 15.0.0 - */ -#define GARROW_VERSION_15_0 G_ENCODE_VERSION(15, 0) - -/** - * GARROW_VERSION_14_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 14.0.0 - */ -#define GARROW_VERSION_14_0 G_ENCODE_VERSION(14, 0) - -/** - * GARROW_VERSION_13_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 13.0.0 - */ -#define GARROW_VERSION_13_0 G_ENCODE_VERSION(13, 0) - -/** - * GARROW_VERSION_12_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 12.0.0 - */ -#define GARROW_VERSION_12_0 G_ENCODE_VERSION(12, 0) - -/** - * GARROW_VERSION_11_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 11.0.0 - */ -#define GARROW_VERSION_11_0 G_ENCODE_VERSION(11, 0) - -/** - * GARROW_VERSION_10_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 10.0.0 - */ -#define GARROW_VERSION_10_0 G_ENCODE_VERSION(10, 0) - -/** - * GARROW_VERSION_9_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 9.0.0 - */ -#define GARROW_VERSION_9_0 G_ENCODE_VERSION(9, 0) - -/** - * GARROW_VERSION_8_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 8.0.0 - */ -#define GARROW_VERSION_8_0 G_ENCODE_VERSION(8, 0) - -/** - * GARROW_VERSION_7_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 7.0.0 - */ -#define GARROW_VERSION_7_0 G_ENCODE_VERSION(7, 0) - -/** - * GARROW_VERSION_6_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 6.0.0 - */ -#define GARROW_VERSION_6_0 G_ENCODE_VERSION(6, 0) - -/** - * GARROW_VERSION_5_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 5.0.0 - */ -#define GARROW_VERSION_5_0 G_ENCODE_VERSION(5, 0) - -/** - * GARROW_VERSION_4_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 4.0.0 - */ -#define GARROW_VERSION_4_0 G_ENCODE_VERSION(4, 0) - -/** - * GARROW_VERSION_3_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 3.0.0 - */ -#define GARROW_VERSION_3_0 G_ENCODE_VERSION(3, 0) - -/** - * GARROW_VERSION_2_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 2.0.0 - */ -#define GARROW_VERSION_2_0 G_ENCODE_VERSION(2, 0) - -/** - * GARROW_VERSION_1_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 1.0.0 - */ -#define GARROW_VERSION_1_0 G_ENCODE_VERSION(1, 0) - -/** - * GARROW_VERSION_0_17: - * - * You can use this macro value for compile time API version check. - * - * Since: 0.17.0 - */ -#define GARROW_VERSION_0_17 G_ENCODE_VERSION(0, 17) - -/** - * GARROW_VERSION_0_16: - * - * You can use this macro value for compile time API version check. - * - * Since: 0.16.0 - */ -#define GARROW_VERSION_0_16 G_ENCODE_VERSION(0, 16) - -/** - * GARROW_VERSION_0_15: - * - * You can use this macro value for compile time API version check. - * - * Since: 0.15.0 - */ -#define GARROW_VERSION_0_15 G_ENCODE_VERSION(0, 15) - -/** - * GARROW_VERSION_0_14: - * - * You can use this macro value for compile time API version check. - * - * Since: 0.14.0 - */ -#define GARROW_VERSION_0_14 G_ENCODE_VERSION(0, 14) - -/** - * GARROW_VERSION_0_13: - * - * You can use this macro value for compile time API version check. - * - * Since: 0.13.0 - */ -#define GARROW_VERSION_0_13 G_ENCODE_VERSION(0, 13) - -/** - * GARROW_VERSION_0_12: - * - * You can use this macro value for compile time API version check. - * - * Since: 0.12.0 - */ -#define GARROW_VERSION_0_12 G_ENCODE_VERSION(0, 12) - -/** - * GARROW_VERSION_0_10: - * - * You can use this macro value for compile time API version check. - * - * Since: 0.10.0 - */ -#define GARROW_VERSION_0_10 G_ENCODE_VERSION(0, 10) +@ENCODED_VERSIONS@ /** * GARROW_VERSION_MIN_REQUIRED: @@ -359,327 +156,4 @@ G_ENCODE_VERSION(GARROW_VERSION_MAJOR, GARROW_VERSION_MINOR) #endif - -#define GARROW_AVAILABLE_IN_ALL - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_16_0 -# define GARROW_DEPRECATED_IN_16_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_16_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_16_0 -# define GARROW_DEPRECATED_IN_16_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_16_0 -# define GARROW_AVAILABLE_IN_16_0 GARROW_UNAVAILABLE(16, 0) -#else -# define GARROW_AVAILABLE_IN_16_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_15_0 -# define GARROW_DEPRECATED_IN_15_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_15_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_15_0 -# define GARROW_DEPRECATED_IN_15_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_15_0 -# define GARROW_AVAILABLE_IN_15_0 GARROW_UNAVAILABLE(15, 0) -#else -# define GARROW_AVAILABLE_IN_15_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_14_0 -# define GARROW_DEPRECATED_IN_14_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_14_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_14_0 -# define GARROW_DEPRECATED_IN_14_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_14_0 -# define GARROW_AVAILABLE_IN_14_0 GARROW_UNAVAILABLE(14, 0) -#else -# define GARROW_AVAILABLE_IN_14_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_13_0 -# define GARROW_DEPRECATED_IN_13_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_13_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_13_0 -# define GARROW_DEPRECATED_IN_13_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_13_0 -# define GARROW_AVAILABLE_IN_13_0 GARROW_UNAVAILABLE(13, 0) -#else -# define GARROW_AVAILABLE_IN_13_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_12_0 -# define GARROW_DEPRECATED_IN_12_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_12_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_12_0 -# define GARROW_DEPRECATED_IN_12_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_12_0 -# define GARROW_AVAILABLE_IN_12_0 GARROW_UNAVAILABLE(12, 0) -#else -# define GARROW_AVAILABLE_IN_12_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_11_0 -# define GARROW_DEPRECATED_IN_11_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_11_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_11_0 -# define GARROW_DEPRECATED_IN_11_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_11_0 -# define GARROW_AVAILABLE_IN_11_0 GARROW_UNAVAILABLE(11, 0) -#else -# define GARROW_AVAILABLE_IN_11_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_10_0 -# define GARROW_DEPRECATED_IN_10_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_10_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_10_0 -# define GARROW_DEPRECATED_IN_10_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_10_0 -# define GARROW_AVAILABLE_IN_10_0 GARROW_UNAVAILABLE(10, 0) -#else -# define GARROW_AVAILABLE_IN_10_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_9_0 -# define GARROW_DEPRECATED_IN_9_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_9_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_9_0 -# define GARROW_DEPRECATED_IN_9_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_9_0 -# define GARROW_AVAILABLE_IN_9_0 GARROW_UNAVAILABLE(9, 0) -#else -# define GARROW_AVAILABLE_IN_9_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_8_0 -# define GARROW_DEPRECATED_IN_8_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_8_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_8_0 -# define GARROW_DEPRECATED_IN_8_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_8_0 -# define GARROW_AVAILABLE_IN_8_0 GARROW_UNAVAILABLE(8, 0) -#else -# define GARROW_AVAILABLE_IN_8_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_7_0 -# define GARROW_DEPRECATED_IN_7_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_7_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_7_0 -# define GARROW_DEPRECATED_IN_7_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_7_0 -# define GARROW_AVAILABLE_IN_7_0 GARROW_UNAVAILABLE(7, 0) -#else -# define GARROW_AVAILABLE_IN_7_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_6_0 -# define GARROW_DEPRECATED_IN_6_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_6_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_6_0 -# define GARROW_DEPRECATED_IN_6_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_6_0 -# define GARROW_AVAILABLE_IN_6_0 GARROW_UNAVAILABLE(6, 0) -#else -# define GARROW_AVAILABLE_IN_6_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_5_0 -# define GARROW_DEPRECATED_IN_5_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_5_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_5_0 -# define GARROW_DEPRECATED_IN_5_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_5_0 -# define GARROW_AVAILABLE_IN_5_0 GARROW_UNAVAILABLE(5, 0) -#else -# define GARROW_AVAILABLE_IN_5_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_4_0 -# define GARROW_DEPRECATED_IN_4_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_4_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_4_0 -# define GARROW_DEPRECATED_IN_4_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_4_0 -# define GARROW_AVAILABLE_IN_4_0 GARROW_UNAVAILABLE(4, 0) -#else -# define GARROW_AVAILABLE_IN_4_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_3_0 -# define GARROW_DEPRECATED_IN_3_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_3_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_3_0 -# define GARROW_DEPRECATED_IN_3_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_3_0 -# define GARROW_AVAILABLE_IN_3_0 GARROW_UNAVAILABLE(3, 0) -#else -# define GARROW_AVAILABLE_IN_3_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_2_0 -# define GARROW_DEPRECATED_IN_2_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_2_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_2_0 -# define GARROW_DEPRECATED_IN_2_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_2_0 -# define GARROW_AVAILABLE_IN_2_0 GARROW_UNAVAILABLE(2, 0) -#else -# define GARROW_AVAILABLE_IN_2_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_1_0 -# define GARROW_DEPRECATED_IN_1_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_1_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_1_0 -# define GARROW_DEPRECATED_IN_1_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_1_0 -# define GARROW_AVAILABLE_IN_1_0 GARROW_UNAVAILABLE(1, 0) -#else -# define GARROW_AVAILABLE_IN_1_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_17 -# define GARROW_DEPRECATED_IN_0_17 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_0_17_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_0_17 -# define GARROW_DEPRECATED_IN_0_17_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_17 -# define GARROW_AVAILABLE_IN_0_17 GARROW_UNAVAILABLE(0, 17) -#else -# define GARROW_AVAILABLE_IN_0_17 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_16 -# define GARROW_DEPRECATED_IN_0_16 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_0_16_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_0_16 -# define GARROW_DEPRECATED_IN_0_16_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_16 -# define GARROW_AVAILABLE_IN_0_16 GARROW_UNAVAILABLE(0, 16) -#else -# define GARROW_AVAILABLE_IN_0_16 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_15 -# define GARROW_DEPRECATED_IN_0_15 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_0_15_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_0_15 -# define GARROW_DEPRECATED_IN_0_15_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_15 -# define GARROW_AVAILABLE_IN_0_15 GARROW_UNAVAILABLE(0, 15) -#else -# define GARROW_AVAILABLE_IN_0_15 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_14 -# define GARROW_DEPRECATED_IN_0_14 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_0_14_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_0_14 -# define GARROW_DEPRECATED_IN_0_14_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_14 -# define GARROW_AVAILABLE_IN_0_14 GARROW_UNAVAILABLE(0, 14) -#else -# define GARROW_AVAILABLE_IN_0_14 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_13 -# define GARROW_DEPRECATED_IN_0_13 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_0_13_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_0_13 -# define GARROW_DEPRECATED_IN_0_13_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_13 -# define GARROW_AVAILABLE_IN_0_13 GARROW_UNAVAILABLE(0, 13) -#else -# define GARROW_AVAILABLE_IN_0_13 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_12 -# define GARROW_DEPRECATED_IN_0_12 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_0_12_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_0_12 -# define GARROW_DEPRECATED_IN_0_12_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_12 -# define GARROW_AVAILABLE_IN_0_12 GARROW_UNAVAILABLE(0, 12) -#else -# define GARROW_AVAILABLE_IN_0_12 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_10 -# define GARROW_DEPRECATED_IN_0_10 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_0_10_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_0_10 -# define GARROW_DEPRECATED_IN_0_10_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_10 -# define GARROW_AVAILABLE_IN_0_10 GARROW_UNAVAILABLE(0, 10) -#else -# define GARROW_AVAILABLE_IN_0_10 -#endif +@AVAILABILITY_MACROS@ diff --git a/c_glib/gandiva-glib/expression.h b/c_glib/gandiva-glib/expression.h index f8f061ceb08fa..bb7eb22ac01dc 100644 --- a/c_glib/gandiva-glib/expression.h +++ b/c_glib/gandiva-glib/expression.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GGANDIVA_TYPE_EXPRESSION (ggandiva_expression_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE( GGandivaExpression, ggandiva_expression, GGANDIVA, EXPRESSION, GObject) @@ -34,12 +35,16 @@ struct _GGandivaExpressionClass GObjectClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaExpression * ggandiva_expression_new(GGandivaNode *root_node, GArrowField *result_field); + +GGANDIVA_AVAILABLE_IN_0_12 gchar * ggandiva_expression_to_string(GGandivaExpression *expression); #define GGANDIVA_TYPE_CONDITION (ggandiva_condition_get_type()) +GGANDIVA_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE( GGandivaCondition, ggandiva_condition, GGANDIVA, CONDITION, GGandivaExpression) diff --git a/c_glib/gandiva-glib/filter.h b/c_glib/gandiva-glib/filter.h index b95981198e0c4..0a2199ccfa106 100644 --- a/c_glib/gandiva-glib/filter.h +++ b/c_glib/gandiva-glib/filter.h @@ -25,6 +25,7 @@ G_BEGIN_DECLS #define GGANDIVA_TYPE_FILTER (ggandiva_filter_get_type()) +GGANDIVA_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE(GGandivaFilter, ggandiva_filter, GGANDIVA, FILTER, GObject) struct _GGandivaFilterClass @@ -32,8 +33,11 @@ struct _GGandivaFilterClass GObjectClass parent_class; }; +GGANDIVA_AVAILABLE_IN_4_0 GGandivaFilter * ggandiva_filter_new(GArrowSchema *schema, GGandivaCondition *condition, GError **error); + +GGANDIVA_AVAILABLE_IN_4_0 gboolean ggandiva_filter_evaluate(GGandivaFilter *filter, GArrowRecordBatch *record_batch, diff --git a/c_glib/gandiva-glib/function-registry.h b/c_glib/gandiva-glib/function-registry.h index ed21e120a2533..e13f4b36d28dc 100644 --- a/c_glib/gandiva-glib/function-registry.h +++ b/c_glib/gandiva-glib/function-registry.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GGANDIVA_TYPE_FUNCTION_REGISTRY (ggandiva_function_registry_get_type()) +GGANDIVA_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE(GGandivaFunctionRegistry, ggandiva_function_registry, GGANDIVA, @@ -35,14 +36,20 @@ struct _GGandivaFunctionRegistryClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_15_0 +GGANDIVA_AVAILABLE_IN_15_0 GGandivaFunctionRegistry * ggandiva_function_registry_default(void); + +GGANDIVA_AVAILABLE_IN_14_0 GGandivaFunctionRegistry * ggandiva_function_registry_new(void); + +GGANDIVA_AVAILABLE_IN_14_0 GGandivaNativeFunction * ggandiva_function_registry_lookup(GGandivaFunctionRegistry *function_registry, GGandivaFunctionSignature *function_signature); + +GGANDIVA_AVAILABLE_IN_14_0 GList * ggandiva_function_registry_get_native_functions( GGandivaFunctionRegistry *function_registry); diff --git a/c_glib/gandiva-glib/function-signature.h b/c_glib/gandiva-glib/function-signature.h index ef6834ea85723..4fd8cc8a7e761 100644 --- a/c_glib/gandiva-glib/function-signature.h +++ b/c_glib/gandiva-glib/function-signature.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GGANDIVA_TYPE_FUNCTION_SIGNATURE (ggandiva_function_signature_get_type()) +GGANDIVA_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE(GGandivaFunctionSignature, ggandiva_function_signature, GGANDIVA, @@ -35,20 +38,31 @@ struct _GGandivaFunctionSignatureClass GObjectClass parent_class; }; +GGANDIVA_AVAILABLE_IN_14_0 GGandivaFunctionSignature * ggandiva_function_signature_new(const gchar *base_name, GList *parameter_types, GArrowDataType *return_type); + +GGANDIVA_AVAILABLE_IN_14_0 gboolean ggandiva_function_signature_equal(GGandivaFunctionSignature *function_signature, GGandivaFunctionSignature *other_function_signature); + +GGANDIVA_AVAILABLE_IN_14_0 gchar * ggandiva_function_signature_to_string(GGandivaFunctionSignature *function_signature); + +GGANDIVA_AVAILABLE_IN_14_0 GArrowDataType * ggandiva_function_signature_get_return_type( GGandivaFunctionSignature *function_signature); + +GGANDIVA_AVAILABLE_IN_14_0 gchar * ggandiva_function_signature_get_base_name(GGandivaFunctionSignature *function_signature); + +GGANDIVA_AVAILABLE_IN_14_0 GList * ggandiva_function_signature_get_param_types( GGandivaFunctionSignature *function_signature); diff --git a/c_glib/gandiva-glib/meson.build b/c_glib/gandiva-glib/meson.build index d5cab109dcf89..23f950ddb96dc 100644 --- a/c_glib/gandiva-glib/meson.build +++ b/c_glib/gandiva-glib/meson.build @@ -53,14 +53,12 @@ cpp_headers = files( 'selection-vector.hpp', ) -version_h_conf = configuration_data() -version_h_conf.set('GGANDIVA_VERSION_MAJOR', version_major) -version_h_conf.set('GGANDIVA_VERSION_MINOR', version_minor) -version_h_conf.set('GGANDIVA_VERSION_MICRO', version_micro) -version_h_conf.set('GGANDIVA_VERSION_TAG', version_tag) -version_h = configure_file(input: 'version.h.in', - output: 'version.h', - configuration: version_h_conf) +version_h = configure_file( + input: 'version.h.in', + output: 'version.h', + command: [python3, generate_version_header_py, '--library', 'GGANDIVA', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'], +) + c_headers += version_h enums = gnome.mkenums('enums', diff --git a/c_glib/gandiva-glib/native-function.h b/c_glib/gandiva-glib/native-function.h index 5ceef396ef40c..934d29ab7e33b 100644 --- a/c_glib/gandiva-glib/native-function.h +++ b/c_glib/gandiva-glib/native-function.h @@ -40,6 +40,7 @@ typedef enum { } GGandivaResultNullableType; #define GGANDIVA_TYPE_NATIVE_FUNCTION (ggandiva_native_function_get_type()) +GGANDIVA_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE( GGandivaNativeFunction, ggandiva_native_function, GGANDIVA, NATIVE_FUNCTION, GObject) @@ -48,20 +49,33 @@ struct _GGandivaNativeFunctionClass GObjectClass parent_class; }; +GGANDIVA_AVAILABLE_IN_14_0 GList * ggandiva_native_function_get_signatures(GGandivaNativeFunction *native_function); + +GGANDIVA_AVAILABLE_IN_14_0 gboolean ggandiva_native_function_equal(GGandivaNativeFunction *native_function, GGandivaNativeFunction *other_native_function); + +GGANDIVA_AVAILABLE_IN_14_0 gchar * ggandiva_native_function_to_string(GGandivaNativeFunction *native_function); + +GGANDIVA_AVAILABLE_IN_14_0 GGandivaResultNullableType ggandiva_native_function_get_result_nullable_type( GGandivaNativeFunction *native_function); + +GGANDIVA_AVAILABLE_IN_14_0 gboolean ggandiva_native_function_need_context(GGandivaNativeFunction *native_function); + +GGANDIVA_AVAILABLE_IN_14_0 gboolean ggandiva_native_function_need_function_holder(GGandivaNativeFunction *native_function); + +GGANDIVA_AVAILABLE_IN_14_0 gboolean ggandiva_native_function_can_return_errors(GGandivaNativeFunction *native_function); diff --git a/c_glib/gandiva-glib/node.h b/c_glib/gandiva-glib/node.h index 715a3d6ebaf18..49d5a8c43124e 100644 --- a/c_glib/gandiva-glib/node.h +++ b/c_glib/gandiva-glib/node.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GGANDIVA_TYPE_NODE (ggandiva_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12; G_DECLARE_DERIVABLE_TYPE(GGandivaNode, ggandiva_node, GGANDIVA, NODE, GObject) struct _GGandivaNodeClass @@ -33,10 +34,12 @@ struct _GGandivaNodeClass GObjectClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_16; gchar * ggandiva_node_to_string(GGandivaNode *node); #define GGANDIVA_TYPE_FIELD_NODE (ggandiva_field_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12; G_DECLARE_DERIVABLE_TYPE( GGandivaFieldNode, ggandiva_field_node, GGANDIVA, FIELD_NODE, GGandivaNode) struct _GGandivaFieldNodeClass @@ -44,10 +47,12 @@ struct _GGandivaFieldNodeClass GGandivaNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12; GGandivaFieldNode * ggandiva_field_node_new(GArrowField *field); #define GGANDIVA_TYPE_FUNCTION_NODE (ggandiva_function_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12; G_DECLARE_DERIVABLE_TYPE( GGandivaFunctionNode, ggandiva_function_node, GGANDIVA, FUNCTION_NODE, GGandivaNode) struct _GGandivaFunctionNodeClass @@ -55,14 +60,18 @@ struct _GGandivaFunctionNodeClass GGandivaNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12; GGandivaFunctionNode * ggandiva_function_node_new(const gchar *name, GList *parameters, GArrowDataType *return_type); + +GGANDIVA_AVAILABLE_IN_0_12; GList * ggandiva_function_node_get_parameters(GGandivaFunctionNode *node); #define GGANDIVA_TYPE_LITERAL_NODE (ggandiva_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12; G_DECLARE_DERIVABLE_TYPE( GGandivaLiteralNode, ggandiva_literal_node, GGANDIVA, LITERAL_NODE, GGandivaNode) struct _GGandivaLiteralNodeClass @@ -71,6 +80,7 @@ struct _GGandivaLiteralNodeClass }; #define GGANDIVA_TYPE_NULL_LITERAL_NODE (ggandiva_null_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12; G_DECLARE_DERIVABLE_TYPE(GGandivaNullLiteralNode, ggandiva_null_literal_node, GGANDIVA, @@ -81,10 +91,12 @@ struct _GGandivaNullLiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12; GGandivaNullLiteralNode * ggandiva_null_literal_node_new(GArrowDataType *return_type, GError **error); #define GGANDIVA_TYPE_BOOLEAN_LITERAL_NODE (ggandiva_boolean_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12; G_DECLARE_DERIVABLE_TYPE(GGandivaBooleanLiteralNode, ggandiva_boolean_literal_node, GGANDIVA, @@ -95,12 +107,16 @@ struct _GGandivaBooleanLiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12; GGandivaBooleanLiteralNode * ggandiva_boolean_literal_node_new(gboolean value); + +GGANDIVA_AVAILABLE_IN_0_12; gboolean ggandiva_boolean_literal_node_get_value(GGandivaBooleanLiteralNode *node); #define GGANDIVA_TYPE_INT8_LITERAL_NODE (ggandiva_int8_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12; G_DECLARE_DERIVABLE_TYPE(GGandivaInt8LiteralNode, ggandiva_int8_literal_node, GGANDIVA, @@ -111,12 +127,16 @@ struct _GGandivaInt8LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12; GGandivaInt8LiteralNode * ggandiva_int8_literal_node_new(gint8 value); + +GGANDIVA_AVAILABLE_IN_0_12; gint8 ggandiva_int8_literal_node_get_value(GGandivaInt8LiteralNode *node); #define GGANDIVA_TYPE_UINT8_LITERAL_NODE (ggandiva_uint8_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12; G_DECLARE_DERIVABLE_TYPE(GGandivaUInt8LiteralNode, ggandiva_uint8_literal_node, GGANDIVA, @@ -127,12 +147,16 @@ struct _GGandivaUInt8LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12; GGandivaUInt8LiteralNode * ggandiva_uint8_literal_node_new(guint8 value); + +GGANDIVA_AVAILABLE_IN_0_12; guint8 ggandiva_uint8_literal_node_get_value(GGandivaUInt8LiteralNode *node); #define GGANDIVA_TYPE_INT16_LITERAL_NODE (ggandiva_int16_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12; G_DECLARE_DERIVABLE_TYPE(GGandivaInt16LiteralNode, ggandiva_int16_literal_node, GGANDIVA, @@ -143,12 +167,16 @@ struct _GGandivaInt16LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12; GGandivaInt16LiteralNode * ggandiva_int16_literal_node_new(gint16 value); + +GGANDIVA_AVAILABLE_IN_0_12; gint16 ggandiva_int16_literal_node_get_value(GGandivaInt16LiteralNode *node); #define GGANDIVA_TYPE_UINT16_LITERAL_NODE (ggandiva_uint16_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12; G_DECLARE_DERIVABLE_TYPE(GGandivaUInt16LiteralNode, ggandiva_uint16_literal_node, GGANDIVA, @@ -159,12 +187,16 @@ struct _GGandivaUInt16LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12; GGandivaUInt16LiteralNode * ggandiva_uint16_literal_node_new(guint16 value); + +GGANDIVA_AVAILABLE_IN_0_12; guint16 ggandiva_uint16_literal_node_get_value(GGandivaUInt16LiteralNode *node); #define GGANDIVA_TYPE_INT32_LITERAL_NODE (ggandiva_int32_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12; G_DECLARE_DERIVABLE_TYPE(GGandivaInt32LiteralNode, ggandiva_int32_literal_node, GGANDIVA, @@ -175,12 +207,16 @@ struct _GGandivaInt32LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12; GGandivaInt32LiteralNode * ggandiva_int32_literal_node_new(gint32 value); + +GGANDIVA_AVAILABLE_IN_0_12; gint32 ggandiva_int32_literal_node_get_value(GGandivaInt32LiteralNode *node); #define GGANDIVA_TYPE_UINT32_LITERAL_NODE (ggandiva_uint32_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12; G_DECLARE_DERIVABLE_TYPE(GGandivaUInt32LiteralNode, ggandiva_uint32_literal_node, GGANDIVA, @@ -191,12 +227,16 @@ struct _GGandivaUInt32LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12; GGandivaUInt32LiteralNode * ggandiva_uint32_literal_node_new(guint32 value); + +GGANDIVA_AVAILABLE_IN_0_12; guint32 ggandiva_uint32_literal_node_get_value(GGandivaUInt32LiteralNode *node); #define GGANDIVA_TYPE_INT64_LITERAL_NODE (ggandiva_int64_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12; G_DECLARE_DERIVABLE_TYPE(GGandivaInt64LiteralNode, ggandiva_int64_literal_node, GGANDIVA, @@ -207,12 +247,16 @@ struct _GGandivaInt64LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12; GGandivaInt64LiteralNode * ggandiva_int64_literal_node_new(gint64 value); + +GGANDIVA_AVAILABLE_IN_0_12; gint64 ggandiva_int64_literal_node_get_value(GGandivaInt64LiteralNode *node); #define GGANDIVA_TYPE_UINT64_LITERAL_NODE (ggandiva_uint64_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12; G_DECLARE_DERIVABLE_TYPE(GGandivaUInt64LiteralNode, ggandiva_uint64_literal_node, GGANDIVA, @@ -223,12 +267,16 @@ struct _GGandivaUInt64LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12; GGandivaUInt64LiteralNode * ggandiva_uint64_literal_node_new(guint64 value); + +GGANDIVA_AVAILABLE_IN_0_12; guint64 ggandiva_uint64_literal_node_get_value(GGandivaUInt64LiteralNode *node); #define GGANDIVA_TYPE_FLOAT_LITERAL_NODE (ggandiva_float_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12; G_DECLARE_DERIVABLE_TYPE(GGandivaFloatLiteralNode, ggandiva_float_literal_node, GGANDIVA, @@ -239,12 +287,16 @@ struct _GGandivaFloatLiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12; GGandivaFloatLiteralNode * ggandiva_float_literal_node_new(gfloat value); + +GGANDIVA_AVAILABLE_IN_0_12; gfloat ggandiva_float_literal_node_get_value(GGandivaFloatLiteralNode *node); #define GGANDIVA_TYPE_DOUBLE_LITERAL_NODE (ggandiva_double_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12; G_DECLARE_DERIVABLE_TYPE(GGandivaDoubleLiteralNode, ggandiva_double_literal_node, GGANDIVA, @@ -255,12 +307,16 @@ struct _GGandivaDoubleLiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12; GGandivaDoubleLiteralNode * ggandiva_double_literal_node_new(gdouble value); + +GGANDIVA_AVAILABLE_IN_0_12; gdouble ggandiva_double_literal_node_get_value(GGandivaDoubleLiteralNode *node); #define GGANDIVA_TYPE_BINARY_LITERAL_NODE (ggandiva_binary_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12; G_DECLARE_DERIVABLE_TYPE(GGandivaBinaryLiteralNode, ggandiva_binary_literal_node, GGANDIVA, @@ -271,14 +327,20 @@ struct _GGandivaBinaryLiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12; GGandivaBinaryLiteralNode * ggandiva_binary_literal_node_new(const guint8 *value, gsize size); + +GGANDIVA_AVAILABLE_IN_0_12; GGandivaBinaryLiteralNode * ggandiva_binary_literal_node_new_bytes(GBytes *value); + +GGANDIVA_AVAILABLE_IN_0_12; GBytes * ggandiva_binary_literal_node_get_value(GGandivaBinaryLiteralNode *node); #define GGANDIVA_TYPE_STRING_LITERAL_NODE (ggandiva_string_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12; G_DECLARE_DERIVABLE_TYPE(GGandivaStringLiteralNode, ggandiva_string_literal_node, GGANDIVA, @@ -289,12 +351,16 @@ struct _GGandivaStringLiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12; GGandivaStringLiteralNode * ggandiva_string_literal_node_new(const gchar *value); + +GGANDIVA_AVAILABLE_IN_0_12; const gchar * ggandiva_string_literal_node_get_value(GGandivaStringLiteralNode *node); #define GGANDIVA_TYPE_IF_NODE (ggandiva_if_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12; G_DECLARE_DERIVABLE_TYPE( GGandivaIfNode, ggandiva_if_node, GGANDIVA, IF_NODE, GGandivaNode) struct _GGandivaIfNodeClass @@ -302,6 +368,7 @@ struct _GGandivaIfNodeClass GGandivaNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12; GGandivaIfNode * ggandiva_if_node_new(GGandivaNode *condition_node, GGandivaNode *then_node, @@ -310,6 +377,7 @@ ggandiva_if_node_new(GGandivaNode *condition_node, GError **error); #define GGANDIVA_TYPE_BOOLEAN_NODE (ggandiva_boolean_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE( GGandivaBooleanNode, ggandiva_boolean_node, GGANDIVA, BOOLEAN_NODE, GGandivaNode) @@ -323,6 +391,7 @@ GList * ggandiva_boolean_node_get_children(GGandivaBooleanNode *node); #define GGANDIVA_TYPE_AND_NODE (ggandiva_and_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE( GGandivaAndNode, ggandiva_and_node, GGANDIVA, AND_NODE, GGandivaBooleanNode) struct _GGandivaAndNodeClass @@ -335,6 +404,7 @@ GGandivaAndNode * ggandiva_and_node_new(GList *children); #define GGANDIVA_TYPE_OR_NODE (ggandiva_or_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE( GGandivaOrNode, ggandiva_or_node, GGANDIVA, OR_NODE, GGandivaBooleanNode) struct _GGandivaOrNodeClass diff --git a/c_glib/gandiva-glib/projector.h b/c_glib/gandiva-glib/projector.h index e0afec5cb1ba1..5fbf9c290beab 100644 --- a/c_glib/gandiva-glib/projector.h +++ b/c_glib/gandiva-glib/projector.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GGANDIVA_TYPE_PROJECTOR (ggandiva_projector_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE( GGandivaProjector, ggandiva_projector, GGANDIVA, PROJECTOR, GObject) @@ -32,14 +33,18 @@ struct _GGandivaProjectorClass GObjectClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaProjector * ggandiva_projector_new(GArrowSchema *schema, GList *expressions, GError **error); + +GGANDIVA_AVAILABLE_IN_0_12 GList * ggandiva_projector_evaluate(GGandivaProjector *projector, GArrowRecordBatch *record_batch, GError **error); #define GGANDIVA_TYPE_SELECTABLE_PROJECTOR (ggandiva_selectable_projector_get_type()) +GGANDIVA_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE(GGandivaSelectableProjector, ggandiva_selectable_projector, GGANDIVA, diff --git a/c_glib/gandiva-glib/selection-vector.h b/c_glib/gandiva-glib/selection-vector.h index 6d78192e35e28..558b9b950cf84 100644 --- a/c_glib/gandiva-glib/selection-vector.h +++ b/c_glib/gandiva-glib/selection-vector.h @@ -47,6 +47,7 @@ typedef enum { } GGandivaSelectionVectorMode; #define GGANDIVA_TYPE_SELECTION_VECTOR (ggandiva_selection_vector_get_type()) +GGANDIVA_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE( GGandivaSelectionVector, ggandiva_selection_vector, GGANDIVA, SELECTION_VECTOR, GObject) @@ -65,6 +66,7 @@ ggandiva_selection_vector_to_array(GGandivaSelectionVector *selection_vector); #define GGANDIVA_TYPE_UINT16_SELECTION_VECTOR \ (ggandiva_uint16_selection_vector_get_type()) +GGANDIVA_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt16SelectionVector, ggandiva_uint16_selection_vector, GGANDIVA, @@ -82,6 +84,7 @@ ggandiva_uint16_selection_vector_new(gint64 max_slots, GError **error); #define GGANDIVA_TYPE_UINT32_SELECTION_VECTOR \ (ggandiva_uint32_selection_vector_get_type()) +GGANDIVA_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt32SelectionVector, ggandiva_uint32_selection_vector, GGANDIVA, @@ -99,6 +102,7 @@ ggandiva_uint32_selection_vector_new(gint64 max_slots, GError **error); #define GGANDIVA_TYPE_UINT64_SELECTION_VECTOR \ (ggandiva_uint64_selection_vector_get_type()) +GGANDIVA_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt64SelectionVector, ggandiva_uint64_selection_vector, GGANDIVA, diff --git a/c_glib/gandiva-glib/version.h.in b/c_glib/gandiva-glib/version.h.in index 3c9e87c9d52e1..8c7ebe0ba6c2e 100644 --- a/c_glib/gandiva-glib/version.h.in +++ b/c_glib/gandiva-glib/version.h.in @@ -38,7 +38,7 @@ * * Since: 1.0.0 */ -#define GGANDIVA_VERSION_MAJOR (@GGANDIVA_VERSION_MAJOR@) +#define GGANDIVA_VERSION_MAJOR (@VERSION_MAJOR@) /** * GGANDIVA_VERSION_MINOR: @@ -47,7 +47,7 @@ * * Since: 1.0.0 */ -#define GGANDIVA_VERSION_MINOR (@GGANDIVA_VERSION_MINOR@) +#define GGANDIVA_VERSION_MINOR (@VERSION_MINOR@) /** * GGANDIVA_VERSION_MICRO: @@ -56,7 +56,7 @@ * * Since: 1.0.0 */ -#define GGANDIVA_VERSION_MICRO (@GGANDIVA_VERSION_MICRO@) +#define GGANDIVA_VERSION_MICRO (@VERSION_MICRO@) /** * GGANDIVA_VERSION_TAG: @@ -66,7 +66,7 @@ * * Since: 1.0.0 */ -#define GGANDIVA_VERSION_TAG "@GGANDIVA_VERSION_TAG@" +#define GGANDIVA_VERSION_TAG "@VERSION_TAG@" /** * GGANDIVA_VERSION_CHECK: @@ -110,23 +110,7 @@ # define GGANDIVA_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor) #endif -/** - * GGANDIVA_VERSION_1_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 1.0.0 - */ -#define GGANDIVA_VERSION_1_0 G_ENCODE_VERSION(1, 0) - -/** - * GGANDIVA_VERSION_4_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 4.0.0 - */ -#define GGANDIVA_VERSION_4_0 G_ENCODE_VERSION(4, 0) +@ENCODED_VERSIONS@ /** * GGANDIVA_VERSION_MIN_REQUIRED: @@ -172,47 +156,4 @@ G_ENCODE_VERSION(GGANDIVA_VERSION_MAJOR, GGANDIVA_VERSION_MINOR) #endif - -#define GGANDIVA_AVAILABLE_IN_ALL - -#if GGANDIVA_VERSION_MIN_REQUIRED >= GGANDIVA_VERSION_4_0 -# define GGANDIVA_DEPRECATED_IN_4_0 GGANDIVA_DEPRECATED -# define GGANDIVA_DEPRECATED_IN_4_0_FOR(function) GGANDIVA_DEPRECATED_FOR(function) -#else -# define GGANDIVA_DEPRECATED_IN_4_0 -# define GGANDIVA_DEPRECATED_IN_4_0_FOR(function) -#endif - -#if GGANDIVA_VERSION_MAX_ALLOWED < GGANDIVA_VERSION_4_0 -# define GGANDIVA_AVAILABLE_IN_4_0 GGANDIVA_UNAVAILABLE(4, 0) -#else -# define GGANDIVA_AVAILABLE_IN_4_0 -#endif - -#if GGANDIVA_VERSION_MIN_REQUIRED >= GGANDIVA_VERSION_1_0 -# define GGANDIVA_DEPRECATED_IN_1_0 GGANDIVA_DEPRECATED -# define GGANDIVA_DEPRECATED_IN_1_0_FOR(function) GGANDIVA_DEPRECATED_FOR(function) -#else -# define GGANDIVA_DEPRECATED_IN_1_0 -# define GGANDIVA_DEPRECATED_IN_1_0_FOR(function) -#endif - -#if GGANDIVA_VERSION_MAX_ALLOWED < GGANDIVA_VERSION_1_0 -# define GGANDIVA_AVAILABLE_IN_1_0 GGANDIVA_UNAVAILABLE(1, 0) -#else -# define GGANDIVA_AVAILABLE_IN_1_0 -#endif - -#if GGANDIVA_VERSION_MIN_REQUIRED >= GGANDIVA_VERSION_0_17 -# define GGANDIVA_DEPRECATED_IN_0_17 GGANDIVA_DEPRECATED -# define GGANDIVA_DEPRECATED_IN_0_17_FOR(function) GGANDIVA_DEPRECATED_FOR(function) -#else -# define GGANDIVA_DEPRECATED_IN_0_17 -# define GGANDIVA_DEPRECATED_IN_0_17_FOR(function) -#endif - -#if GGANDIVA_VERSION_MAX_ALLOWED < GGANDIVA_VERSION_0_17 -# define GGANDIVA_AVAILABLE_IN_0_17 GGANDIVA_UNAVAILABLE(0, 17) -#else -# define GGANDIVA_AVAILABLE_IN_0_17 -#endif +@AVAILABILITY_MACROS@ diff --git a/c_glib/meson.build b/c_glib/meson.build index 08a9cd182e02e..06aa5b941e77c 100644 --- a/c_glib/meson.build +++ b/c_glib/meson.build @@ -171,6 +171,10 @@ if cxx.get_id() != 'msvc' endif add_project_arguments(cxx.get_supported_arguments(cxx_flags), language: 'cpp') +python = import('python') +python3 = python.find_installation('python3') +generate_version_header_py = project_source_root / 'tool' / 'generate-version-header.py' + subdir('arrow-glib') if arrow_cuda.found() subdir('arrow-cuda-glib') diff --git a/c_glib/parquet-glib/arrow-file-reader.h b/c_glib/parquet-glib/arrow-file-reader.h index 63c14ac71da86..52d7293bad0fa 100644 --- a/c_glib/parquet-glib/arrow-file-reader.h +++ b/c_glib/parquet-glib/arrow-file-reader.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GPARQUET_TYPE_ARROW_FILE_READER (gparquet_arrow_file_reader_get_type()) +GPARQUET_AVAILABLE_IN_0_11 G_DECLARE_DERIVABLE_TYPE(GParquetArrowFileReader, gparquet_arrow_file_reader, GPARQUET, @@ -34,15 +35,19 @@ struct _GParquetArrowFileReaderClass GObjectClass parent_class; }; +GPARQUET_AVAILABLE_IN_0_11 GParquetArrowFileReader * gparquet_arrow_file_reader_new_arrow(GArrowSeekableInputStream *source, GError **error); + +GPARQUET_AVAILABLE_IN_0_11 GParquetArrowFileReader * gparquet_arrow_file_reader_new_path(const gchar *path, GError **error); +GPARQUET_AVAILABLE_IN_0_11 GArrowTable * gparquet_arrow_file_reader_read_table(GParquetArrowFileReader *reader, GError **error); -GARROW_AVAILABLE_IN_1_0 +GPARQUET_AVAILABLE_IN_1_0 GArrowTable * gparquet_arrow_file_reader_read_row_group(GParquetArrowFileReader *reader, gint row_group_index, @@ -50,26 +55,30 @@ gparquet_arrow_file_reader_read_row_group(GParquetArrowFileReader *reader, gsize n_column_indices, GError **error); +GPARQUET_AVAILABLE_IN_0_12 GArrowSchema * gparquet_arrow_file_reader_get_schema(GParquetArrowFileReader *reader, GError **error); +GPARQUET_AVAILABLE_IN_0_15 GArrowChunkedArray * gparquet_arrow_file_reader_read_column_data(GParquetArrowFileReader *reader, gint i, GError **error); +GPARQUET_AVAILABLE_IN_0_11 gint gparquet_arrow_file_reader_get_n_row_groups(GParquetArrowFileReader *reader); -GARROW_AVAILABLE_IN_6_0 +GPARQUET_AVAILABLE_IN_6_0 gint64 gparquet_arrow_file_reader_get_n_rows(GParquetArrowFileReader *reader); +GPARQUET_AVAILABLE_IN_0_11 void gparquet_arrow_file_reader_set_use_threads(GParquetArrowFileReader *reader, gboolean use_threads); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 GParquetFileMetadata * gparquet_arrow_file_reader_get_metadata(GParquetArrowFileReader *reader); diff --git a/c_glib/parquet-glib/arrow-file-writer.h b/c_glib/parquet-glib/arrow-file-writer.h index 592ea4ae3f1ba..71cbfa195e842 100644 --- a/c_glib/parquet-glib/arrow-file-writer.h +++ b/c_glib/parquet-glib/arrow-file-writer.h @@ -20,10 +20,12 @@ #pragma once #include +#include G_BEGIN_DECLS #define GPARQUET_TYPE_WRITER_PROPERTIES (gparquet_writer_properties_get_type()) +GPARQUET_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GParquetWriterProperties, gparquet_writer_properties, GPARQUET, @@ -34,61 +36,62 @@ struct _GParquetWriterPropertiesClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 GParquetWriterProperties * gparquet_writer_properties_new(void); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 void gparquet_writer_properties_set_compression(GParquetWriterProperties *properties, GArrowCompressionType compression_type, const gchar *path); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 GArrowCompressionType gparquet_writer_properties_get_compression_path(GParquetWriterProperties *properties, const gchar *path); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 void gparquet_writer_properties_enable_dictionary(GParquetWriterProperties *properties, const gchar *path); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 void gparquet_writer_properties_disable_dictionary(GParquetWriterProperties *properties, const gchar *path); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 gboolean gparquet_writer_properties_is_dictionary_enabled(GParquetWriterProperties *properties, const gchar *path); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 void gparquet_writer_properties_set_dictionary_page_size_limit( GParquetWriterProperties *properties, gint64 limit); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 gint64 gparquet_writer_properties_get_dictionary_page_size_limit( GParquetWriterProperties *properties); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 void gparquet_writer_properties_set_batch_size(GParquetWriterProperties *properties, gint64 batch_size); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 gint64 gparquet_writer_properties_get_batch_size(GParquetWriterProperties *properties); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 void gparquet_writer_properties_set_max_row_group_length(GParquetWriterProperties *properties, gint64 length); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 gint64 gparquet_writer_properties_get_max_row_group_length(GParquetWriterProperties *properties); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 void gparquet_writer_properties_set_data_page_size(GParquetWriterProperties *properties, gint64 data_page_size); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 gint64 gparquet_writer_properties_get_data_page_size(GParquetWriterProperties *properties); #define GPARQUET_TYPE_ARROW_FILE_WRITER (gparquet_arrow_file_writer_get_type()) +GPARQUET_AVAILABLE_IN_0_11 G_DECLARE_DERIVABLE_TYPE(GParquetArrowFileWriter, gparquet_arrow_file_writer, GPARQUET, @@ -99,23 +102,28 @@ struct _GParquetArrowFileWriterClass GObjectClass parent_class; }; +GPARQUET_AVAILABLE_IN_0_11 GParquetArrowFileWriter * gparquet_arrow_file_writer_new_arrow(GArrowSchema *schema, GArrowOutputStream *sink, GParquetWriterProperties *writer_properties, GError **error); + +GPARQUET_AVAILABLE_IN_0_11 GParquetArrowFileWriter * gparquet_arrow_file_writer_new_path(GArrowSchema *schema, const gchar *path, GParquetWriterProperties *writer_properties, GError **error); +GPARQUET_AVAILABLE_IN_0_11 gboolean gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer, GArrowTable *table, guint64 chunk_size, GError **error); +GPARQUET_AVAILABLE_IN_0_11 gboolean gparquet_arrow_file_writer_close(GParquetArrowFileWriter *writer, GError **error); diff --git a/c_glib/parquet-glib/meson.build b/c_glib/parquet-glib/meson.build index 67de0bf2d91fb..22fbbbbae64ff 100644 --- a/c_glib/parquet-glib/meson.build +++ b/c_glib/parquet-glib/meson.build @@ -42,10 +42,17 @@ cpp_headers = files( 'parquet-glib.hpp', ) +version_h = configure_file( + input: 'version.h.in', + output: 'version.h', + command: [python3, generate_version_header_py, '--library', 'GPARQUET', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'], +) + +c_headers += version_h + headers = c_headers + cpp_headers install_headers(headers, subdir: project_name) - dependencies = [ arrow, parquet, diff --git a/c_glib/parquet-glib/metadata.h b/c_glib/parquet-glib/metadata.h index 1c9fce7cc778d..d79bf009751ca 100644 --- a/c_glib/parquet-glib/metadata.h +++ b/c_glib/parquet-glib/metadata.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GPARQUET_TYPE_COLUMN_CHUNK_METADATA (gparquet_column_chunk_metadata_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetColumnChunkMetadata, gparquet_column_chunk_metadata, GPARQUET, @@ -34,28 +35,29 @@ struct _GParquetColumnChunkMetadataClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_column_chunk_metadata_equal(GParquetColumnChunkMetadata *metadata, GParquetColumnChunkMetadata *other_metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_column_chunk_metadata_get_total_size(GParquetColumnChunkMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_column_chunk_metadata_get_total_compressed_size( GParquetColumnChunkMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_column_chunk_metadata_get_file_offset(GParquetColumnChunkMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_column_chunk_metadata_can_decompress(GParquetColumnChunkMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 GParquetStatistics * gparquet_column_chunk_metadata_get_statistics(GParquetColumnChunkMetadata *metadata); #define GPARQUET_TYPE_ROW_GROUP_METADATA (gparquet_row_group_metadata_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetRowGroupMetadata, gparquet_row_group_metadata, GPARQUET, @@ -66,35 +68,36 @@ struct _GParquetRowGroupMetadataClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_row_group_metadata_equal(GParquetRowGroupMetadata *metadata, GParquetRowGroupMetadata *other_metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint gparquet_row_group_metadata_get_n_columns(GParquetRowGroupMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 GParquetColumnChunkMetadata * gparquet_row_group_metadata_get_column_chunk(GParquetRowGroupMetadata *metadata, gint index, GError **error); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_row_group_metadata_get_n_rows(GParquetRowGroupMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_row_group_metadata_get_total_size(GParquetRowGroupMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_row_group_metadata_get_total_compressed_size(GParquetRowGroupMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_row_group_metadata_get_file_offset(GParquetRowGroupMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_row_group_metadata_can_decompress(GParquetRowGroupMetadata *metadata); #define GPARQUET_TYPE_FILE_METADATA (gparquet_file_metadata_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE( GParquetFileMetadata, gparquet_file_metadata, GPARQUET, FILE_METADATA, GObject) struct _GParquetFileMetadataClass @@ -102,34 +105,34 @@ struct _GParquetFileMetadataClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_file_metadata_equal(GParquetFileMetadata *metadata, GParquetFileMetadata *other_metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint gparquet_file_metadata_get_n_columns(GParquetFileMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint gparquet_file_metadata_get_n_schema_elements(GParquetFileMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_file_metadata_get_n_rows(GParquetFileMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint gparquet_file_metadata_get_n_row_groups(GParquetFileMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 GParquetRowGroupMetadata * gparquet_file_metadata_get_row_group(GParquetFileMetadata *metadata, gint index, GError **error); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 const gchar * gparquet_file_metadata_get_created_by(GParquetFileMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 guint32 gparquet_file_metadata_get_size(GParquetFileMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_file_metadata_can_decompress(GParquetFileMetadata *metadata); diff --git a/c_glib/parquet-glib/parquet-glib.h b/c_glib/parquet-glib/parquet-glib.h index 23659421ce3d8..308adb87a7ed2 100644 --- a/c_glib/parquet-glib/parquet-glib.h +++ b/c_glib/parquet-glib/parquet-glib.h @@ -19,6 +19,8 @@ #pragma once +#include + #include #include #include diff --git a/c_glib/parquet-glib/statistics.h b/c_glib/parquet-glib/statistics.h index f28e2a3713638..25e02df8774b2 100644 --- a/c_glib/parquet-glib/statistics.h +++ b/c_glib/parquet-glib/statistics.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GPARQUET_TYPE_STATISTICS (gparquet_statistics_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE( GParquetStatistics, gparquet_statistics, GPARQUET, STATISTICS, GObject) struct _GParquetStatisticsClass @@ -31,30 +34,31 @@ struct _GParquetStatisticsClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_statistics_equal(GParquetStatistics *statistics, GParquetStatistics *other_statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_statistics_has_n_nulls(GParquetStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_statistics_get_n_nulls(GParquetStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_statistics_has_n_distinct_values(GParquetStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_statistics_get_n_distinct_values(GParquetStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_statistics_get_n_values(GParquetStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_statistics_has_min_max(GParquetStatistics *statistics); #define GPARQUET_TYPE_BOOLEAN_STATISTICS (gparquet_boolean_statistics_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetBooleanStatistics, gparquet_boolean_statistics, GPARQUET, @@ -65,14 +69,15 @@ struct _GParquetBooleanStatisticsClass GParquetStatisticsClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_boolean_statistics_get_min(GParquetBooleanStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_boolean_statistics_get_max(GParquetBooleanStatistics *statistics); #define GPARQUET_TYPE_INT32_STATISTICS (gparquet_int32_statistics_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetInt32Statistics, gparquet_int32_statistics, GPARQUET, @@ -83,14 +88,15 @@ struct _GParquetInt32StatisticsClass GParquetStatisticsClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint32 gparquet_int32_statistics_get_min(GParquetInt32Statistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint32 gparquet_int32_statistics_get_max(GParquetInt32Statistics *statistics); #define GPARQUET_TYPE_INT64_STATISTICS (gparquet_int64_statistics_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetInt64Statistics, gparquet_int64_statistics, GPARQUET, @@ -101,14 +107,15 @@ struct _GParquetInt64StatisticsClass GParquetStatisticsClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_int64_statistics_get_min(GParquetInt64Statistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_int64_statistics_get_max(GParquetInt64Statistics *statistics); #define GPARQUET_TYPE_FLOAT_STATISTICS (gparquet_float_statistics_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetFloatStatistics, gparquet_float_statistics, GPARQUET, @@ -119,14 +126,15 @@ struct _GParquetFloatStatisticsClass GParquetStatisticsClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gfloat gparquet_float_statistics_get_min(GParquetFloatStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gfloat gparquet_float_statistics_get_max(GParquetFloatStatistics *statistics); #define GPARQUET_TYPE_DOUBLE_STATISTICS (gparquet_double_statistics_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetDoubleStatistics, gparquet_double_statistics, GPARQUET, @@ -137,14 +145,15 @@ struct _GParquetDoubleStatisticsClass GParquetStatisticsClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gdouble gparquet_double_statistics_get_min(GParquetDoubleStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gdouble gparquet_double_statistics_get_max(GParquetDoubleStatistics *statistics); #define GPARQUET_TYPE_BYTE_ARRAY_STATISTICS (gparquet_byte_array_statistics_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetByteArrayStatistics, gparquet_byte_array_statistics, GPARQUET, @@ -155,15 +164,16 @@ struct _GParquetByteArrayStatisticsClass GParquetStatisticsClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 GBytes * gparquet_byte_array_statistics_get_min(GParquetByteArrayStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 GBytes * gparquet_byte_array_statistics_get_max(GParquetByteArrayStatistics *statistics); #define GPARQUET_TYPE_FIXED_LENGTH_BYTE_ARRAY_STATISTICS \ (gparquet_fixed_length_byte_array_statistics_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetFixedLengthByteArrayStatistics, gparquet_fixed_length_byte_array_statistics, GPARQUET, @@ -174,11 +184,11 @@ struct _GParquetFixedLengthByteArrayStatisticsClass GParquetStatisticsClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 GBytes * gparquet_fixed_length_byte_array_statistics_get_min( GParquetFixedLengthByteArrayStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 GBytes * gparquet_fixed_length_byte_array_statistics_get_max( GParquetFixedLengthByteArrayStatistics *statistics); diff --git a/c_glib/parquet-glib/version.h.in b/c_glib/parquet-glib/version.h.in new file mode 100644 index 0000000000000..4baef99c0eec9 --- /dev/null +++ b/c_glib/parquet-glib/version.h.in @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +/** + * SECTION: version + * @section_id: version-macros + * @title: Version related macros + * @include: parquet-glib/parquet-glib.h + * + * Parquet GLib provides macros that can be used by C pre-processor. + * They are useful to check version related things at compile time. + */ + +/** + * GPARQUET_VERSION_MAJOR: + * + * The major version. + * + * Since: 17.0.0 + */ +#define GPARQUET_VERSION_MAJOR (@VERSION_MAJOR@) + +/** + * GPARQUET_VERSION_MINOR: + * + * The minor version. + * + * Since: 17.0.0 + */ +#define GPARQUET_VERSION_MINOR (@VERSION_MINOR@) + +/** + * GPARQUET_VERSION_MICRO: + * + * The micro version. + * + * Since: 17.0.0 + */ +#define GPARQUET_VERSION_MICRO (@VERSION_MICRO@) + +/** + * GPARQUET_VERSION_TAG: + * + * The version tag. Normally, it's an empty string. It's "SNAPSHOT" + * for snapshot version. + * + * Since: 17.0.0 + */ +#define GPARQUET_VERSION_TAG "@VERSION_TAG@" + +/** + * GPARQUET_VERSION_CHECK: + * @major: A major version to check for. + * @minor: A minor version to check for. + * @micro: A micro version to check for. + * + * You can use this macro in C pre-processor. + * + * Returns: %TRUE if the compile time Apache Arrow GLib version is the + * same as or newer than the passed version, %FALSE otherwise. + * + * Since: 17.0.0 + */ +#define GPARQUET_VERSION_CHECK(major, minor, micro) \ + (GPARQUET_VERSION_MAJOR > (major) || \ + (GPARQUET_VERSION_MAJOR == (major) && \ + GPARQUET_VERSION_MINOR > (minor)) || \ + (GPARQUET_VERSION_MAJOR == (major) && \ + GPARQUET_VERSION_MINOR == (minor) && \ + GPARQUET_VERSION_MICRO >= (micro))) + +/** + * GPARQUET_DISABLE_DEPRECATION_WARNINGS: + * + * If this macro is defined, no deprecated warnings are produced. + * + * You must define this macro before including the + * arrow-glib/arrow-glib.h header. + * + * Since: 17.0.0 + */ + +#ifdef GPARQUET_DISABLE_DEPRECATION_WARNINGS +# define GPARQUET_DEPRECATED +# define GPARQUET_DEPRECATED_FOR(function) +# define GPARQUET_UNAVAILABLE(major, minor) +#else +# define GPARQUET_DEPRECATED G_DEPRECATED +# define GPARQUET_DEPRECATED_FOR(function) G_DEPRECATED_FOR(function) +# define GPARQUET_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor) +#endif + +@ENCODED_VERSIONS@ + +/** + * GPARQUET_VERSION_MIN_REQUIRED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GPARQUET_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GPARQUET_VERSION_MIN_REQUIRED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * parquet-glib/parquet-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GPARQUET_VERSION_MIN_REQUIRED +# define GPARQUET_VERSION_MIN_REQUIRED GARROW_VERSION_MIN_REQUIRED +#endif + +/** + * GPARQUET_VERSION_MAX_ALLOWED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GPARQUET_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GPARQUET_VERSION_MAX_ALLOWED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * parquet-glib/parquet-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GPARQUET_VERSION_MAX_ALLOWED +# define GPARQUET_VERSION_MAX_ALLOWED GARROW_VERSION_MAX_ALLOWED +#endif + +@AVAILABILITY_MACROS@ diff --git a/c_glib/tool/generate-version-header.py b/c_glib/tool/generate-version-header.py new file mode 100755 index 0000000000000..bfc9979251416 --- /dev/null +++ b/c_glib/tool/generate-version-header.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +import argparse +from io import TextIOBase +from pathlib import Path +import re + + +def main(): + parser = argparse.ArgumentParser( + description="Generate C header with version macros") + parser.add_argument( + "--library", + required=True, + help="The library name to use in macro prefixes") + parser.add_argument( + "--version", + required=True, + help="The library version number") + parser.add_argument( + "--input", + type=Path, + required=True, + help="Path to the input template file") + parser.add_argument( + "--output", + type=Path, + required=True, + help="Path to the output file to generate") + + args = parser.parse_args() + + with open(args.input, "r", encoding="utf-8") as input_file, \ + open(args.output, "w", encoding="utf-8") as output_file: + write_header( + input_file, output_file, args.library, args.version) + + +def write_header( + input_file: TextIOBase, + output_file: TextIOBase, + library_name: str, + version: str): + if "-" in version: + version, version_tag = version.split("-") + else: + version_tag = "" + version_major, version_minor, version_micro = [int(v) for v in version.split(".")] + + encoded_versions = generate_encoded_versions(library_name) + availability_macros = generate_availability_macros(library_name) + + replacements = { + "VERSION_MAJOR": str(version_major), + "VERSION_MINOR": str(version_minor), + "VERSION_MICRO": str(version_micro), + "VERSION_TAG": version_tag, + "ENCODED_VERSIONS": encoded_versions, + "AVAILABILITY_MACROS": availability_macros, + } + + output_file.write(re.sub( + r"@([A-Z_]+)@", lambda match: replacements[match[1]], input_file.read())) + + +def generate_encoded_versions(library: str) -> str: + macros = [] + + for major_version, minor_version in ALL_VERSIONS: + macros.append(f"""/** + * {library}_VERSION_{major_version}_{minor_version}: + * + * You can use this macro value for compile time API version check. + * + * Since: {major_version}.{minor_version}.0 + */ +#define {library}_VERSION_{major_version}_{minor_version} G_ENCODE_VERSION({major_version}, {minor_version})""") # noqa: E501 + + return "\n\n".join(macros) + + +def generate_availability_macros(library: str) -> str: + macros = [f"""#define {library}_AVAILABLE_IN_ALL"""] + + for major_version, minor_version in ALL_VERSIONS: + macros.append(f"""#if {library}_VERSION_MIN_REQUIRED >= {library}_VERSION_{major_version}_{minor_version} +# define {library}_DEPRECATED_IN_{major_version}_{minor_version} {library}_DEPRECATED +# define {library}_DEPRECATED_IN_{major_version}_{minor_version}_FOR(function) {library}_DEPRECATED_FOR(function) +#else +# define {library}_DEPRECATED_IN_{major_version}_{minor_version} +# define {library}_DEPRECATED_IN_{major_version}_{minor_version}_FOR(function) +#endif + +#if {library}_VERSION_MAX_ALLOWED < {library}_VERSION_{major_version}_{minor_version} +# define {library}_AVAILABLE_IN_{major_version}_{minor_version} {library}_UNAVAILABLE({major_version}, {minor_version}) +#else +# define {library}_AVAILABLE_IN_{major_version}_{minor_version} +#endif""") # noqa: E501 + + return "\n\n".join(macros) + + +ALL_VERSIONS = [ + (17, 0), + (16, 0), + (15, 0), + (14, 0), + (13, 0), + (12, 0), + (11, 0), + (10, 0), + (9, 0), + (8, 0), + (7, 0), + (6, 0), + (5, 0), + (4, 0), + (3, 0), + (2, 0), + (1, 0), + (0, 17), + (0, 16), + (0, 15), + (0, 14), + (0, 13), + (0, 12), + (0, 11), + (0, 10), +] + + +if __name__ == '__main__': + main() diff --git a/dev/release/post-11-bump-versions-test.rb b/dev/release/post-11-bump-versions-test.rb index 78d9320bfb312..df8ea408f8ea7 100644 --- a/dev/release/post-11-bump-versions-test.rb +++ b/dev/release/post-11-bump-versions-test.rb @@ -197,6 +197,12 @@ def test_version_post_tag ] if release_type == :major expected_changes += [ + { + path: "c_glib/tool/generate-version-header.py", + hunks: [ + ["+ (#{@next_major_version}, 0),"], + ], + }, { path: "docs/source/index.rst", hunks: [ diff --git a/dev/release/utils-prepare.sh b/dev/release/utils-prepare.sh index 51367087228a4..c3a0b33e5a2a0 100644 --- a/dev/release/utils-prepare.sh +++ b/dev/release/utils-prepare.sh @@ -40,6 +40,16 @@ update_versions() { meson.build rm -f meson.build.bak git add meson.build + + # Add a new version entry only when the next release is a new major release + if [ "${type}" = "snapshot" -a \ + "${next_version}" = "${major_version}.0.0" ]; then + sed -i.bak -E -e \ + "s/^ALL_VERSIONS = \[$/&\\n (${major_version}, 0),/" \ + tool/generate-version-header.py + rm -f tool/generate-version-header.py.bak + git add tool/generate-version-header.py + fi popd pushd "${ARROW_DIR}/ci/scripts" From 065a6da8520bd65fb4f59b2e3e496fe1124ac685 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 22 May 2024 10:37:52 +0200 Subject: [PATCH 144/261] GH-41748: [Python][Parquet] Update BYTE_STREAM_SPLIT description in write_table() docstring (#41759) ### Rationale for this change In PR #40094 (issue GH-39978), we forgot to update the `write_table` docstring with an accurate description of the supported data types for BYTE_STREAM_SPLIT. ### Are these changes tested? No (only a doc change). ### Are there any user-facing changes? No. * GitHub Issue: #41748 Authored-by: Antoine Pitrou Signed-off-by: Joris Van den Bossche --- python/pyarrow/parquet/core.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py index f54a203c8794c..81798b1544474 100644 --- a/python/pyarrow/parquet/core.py +++ b/python/pyarrow/parquet/core.py @@ -797,8 +797,9 @@ def _sanitize_table(table, new_schema, flavor): Specify if the byte_stream_split encoding should be used in general or only for some columns. If both dictionary and byte_stream_stream are enabled, then dictionary is preferred. - The byte_stream_split encoding is valid only for floating-point data types - and should be combined with a compression codec. + The byte_stream_split encoding is valid for integer, floating-point + and fixed-size binary data types (including decimals); it should be + combined with a compression codec so as to achieve size reduction. column_encoding : string or dict, default None Specify the encoding scheme on a per column basis. Can only be used when ``use_dictionary`` is set to False, and From f3d46398d3c81d9575ffd77ce3b86d4b993a4888 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 22 May 2024 15:06:04 +0200 Subject: [PATCH 145/261] GH-41760: [C++][Parquet] Add file metadata read/write benchmark (#41761) Following the discussions on the Parquet ML (see [this thread](https://lists.apache.org/thread/5jyhzkwyrjk9z52g0b49g31ygnz73gxo) and [this thread](https://lists.apache.org/thread/vs3w2z5bk6s3c975rrkqdttr1dpsdn7h)), and the various complaints about poor Parquet metadata performance on wide schemas, this adds a benchmark to measure the overhead of Parquet file metadata parsing or serialization for different numbers of row groups and columns. Sample output: ``` ----------------------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations UserCounters... ----------------------------------------------------------------------------------------------------------------------- WriteFileMetadataAndData/num_columns:1/num_row_groups:1 11743 ns 11741 ns 59930 data_size=54 file_size=290 items_per_second=85.1726k/s WriteFileMetadataAndData/num_columns:1/num_row_groups:100 843137 ns 842920 ns 832 data_size=5.4k file_size=20.486k items_per_second=1.18635k/s WriteFileMetadataAndData/num_columns:1/num_row_groups:1000 8232304 ns 8230294 ns 85 data_size=54k file_size=207.687k items_per_second=121.502/s WriteFileMetadataAndData/num_columns:10/num_row_groups:1 101214 ns 101190 ns 6910 data_size=540 file_size=2.11k items_per_second=9.8824k/s WriteFileMetadataAndData/num_columns:10/num_row_groups:100 8026185 ns 8024361 ns 87 data_size=54k file_size=193.673k items_per_second=124.621/s WriteFileMetadataAndData/num_columns:10/num_row_groups:1000 81370293 ns 81343455 ns 8 data_size=540k file_size=1.94392M items_per_second=12.2936/s WriteFileMetadataAndData/num_columns:100/num_row_groups:1 955862 ns 955528 ns 733 data_size=5.4k file_size=20.694k items_per_second=1.04654k/s WriteFileMetadataAndData/num_columns:100/num_row_groups:100 80115516 ns 80086117 ns 9 data_size=540k file_size=1.94729M items_per_second=12.4866/s WriteFileMetadataAndData/num_columns:100/num_row_groups:1000 856428565 ns 856065370 ns 1 data_size=5.4M file_size=19.7673M items_per_second=1.16814/s WriteFileMetadataAndData/num_columns:1000/num_row_groups:1 9330003 ns 9327439 ns 75 data_size=54k file_size=211.499k items_per_second=107.211/s WriteFileMetadataAndData/num_columns:1000/num_row_groups:100 834609159 ns 834354590 ns 1 data_size=5.4M file_size=19.9623M items_per_second=1.19853/s ReadFileMetadata/num_columns:1/num_row_groups:1 3824 ns 3824 ns 182381 data_size=54 file_size=290 items_per_second=261.518k/s ReadFileMetadata/num_columns:1/num_row_groups:100 88519 ns 88504 ns 7879 data_size=5.4k file_size=20.486k items_per_second=11.299k/s ReadFileMetadata/num_columns:1/num_row_groups:1000 849558 ns 849391 ns 825 data_size=54k file_size=207.687k items_per_second=1.17731k/s ReadFileMetadata/num_columns:10/num_row_groups:1 19918 ns 19915 ns 35449 data_size=540 file_size=2.11k items_per_second=50.2138k/s ReadFileMetadata/num_columns:10/num_row_groups:100 715822 ns 715667 ns 975 data_size=54k file_size=193.673k items_per_second=1.3973k/s ReadFileMetadata/num_columns:10/num_row_groups:1000 7017008 ns 7015432 ns 100 data_size=540k file_size=1.94392M items_per_second=142.543/s ReadFileMetadata/num_columns:100/num_row_groups:1 175988 ns 175944 ns 3958 data_size=5.4k file_size=20.694k items_per_second=5.68363k/s ReadFileMetadata/num_columns:100/num_row_groups:100 6814382 ns 6812781 ns 103 data_size=540k file_size=1.94729M items_per_second=146.783/s ReadFileMetadata/num_columns:100/num_row_groups:1000 77858645 ns 77822157 ns 9 data_size=5.4M file_size=19.7673M items_per_second=12.8498/s ReadFileMetadata/num_columns:1000/num_row_groups:1 1670001 ns 1669563 ns 419 data_size=54k file_size=211.499k items_per_second=598.959/s ReadFileMetadata/num_columns:1000/num_row_groups:100 77339599 ns 77292924 ns 9 data_size=5.4M file_size=19.9623M items_per_second=12.9378/s ``` * GitHub Issue: #41760 Authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- cpp/src/parquet/CMakeLists.txt | 1 + cpp/src/parquet/metadata_benchmark.cc | 156 ++++++++++++++++++++++++++ 2 files changed, 157 insertions(+) create mode 100644 cpp/src/parquet/metadata_benchmark.cc diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index 93f2e72d8d661..5ac5085a694c8 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -432,6 +432,7 @@ add_parquet_benchmark(column_reader_benchmark) add_parquet_benchmark(column_io_benchmark) add_parquet_benchmark(encoding_benchmark) add_parquet_benchmark(level_conversion_benchmark) +add_parquet_benchmark(metadata_benchmark) add_parquet_benchmark(page_index_benchmark SOURCES page_index_benchmark.cc benchmark_util.cc) add_parquet_benchmark(arrow/reader_writer_benchmark PREFIX "parquet-arrow") diff --git a/cpp/src/parquet/metadata_benchmark.cc b/cpp/src/parquet/metadata_benchmark.cc new file mode 100644 index 0000000000000..97a99be798cbb --- /dev/null +++ b/cpp/src/parquet/metadata_benchmark.cc @@ -0,0 +1,156 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include + +#include "arrow/buffer.h" +#include "arrow/io/memory.h" +#include "arrow/util/logging.h" + +#include "parquet/column_writer.h" +#include "parquet/file_reader.h" +#include "parquet/file_writer.h" +#include "parquet/metadata.h" +#include "parquet/platform.h" +#include "parquet/schema.h" + +namespace parquet { + +using ::arrow::Buffer; +using ::arrow::io::BufferOutputStream; +using ::arrow::io::BufferReader; +using schema::GroupNode; +using schema::NodePtr; +using schema::NodeVector; + +class MetadataBenchmark { + public: + explicit MetadataBenchmark(benchmark::State* state) + : MetadataBenchmark(static_cast(state->range(0)), + static_cast(state->range(1))) {} + + MetadataBenchmark(int num_columns, int num_row_groups) + : num_columns_(num_columns), num_row_groups_(num_row_groups) { + NodeVector fields; + for (int i = 0; i < num_columns_; ++i) { + std::stringstream ss; + ss << "col" << i; + fields.push_back(parquet::schema::Int32(ss.str(), Repetition::REQUIRED)); + } + schema_root_ = std::static_pointer_cast( + GroupNode::Make("schema", Repetition::REQUIRED, fields)); + + WriterProperties::Builder prop_builder; + writer_properties_ = prop_builder.version(ParquetVersion::PARQUET_2_6) + ->disable_dictionary() + ->data_page_version(ParquetDataPageVersion::V2) + ->build(); + } + + std::shared_ptr WriteFile(benchmark::State* state) { + PARQUET_ASSIGN_OR_THROW(auto sink, BufferOutputStream::Create()); + + auto writer = ParquetFileWriter::Open(sink, schema_root_, writer_properties_); + std::vector int32_values(1, 42); + int64_t data_size = 0; + for (int rg = 0; rg < num_row_groups_; ++rg) { + auto row_group_writer = writer->AppendRowGroup(); + for (int col = 0; col < num_columns_; ++col) { + auto col_writer = row_group_writer->NextColumn(); + ARROW_CHECK_EQ(col_writer->type(), Type::INT32); + auto typed_col_writer = static_cast(col_writer); + typed_col_writer->WriteBatch( + /*num_values=*/static_cast(int32_values.size()), + /*def_levels=*/nullptr, /*rep_levels=*/nullptr, int32_values.data()); + typed_col_writer->Close(); + } + row_group_writer->Close(); + data_size += row_group_writer->total_compressed_bytes_written(); + } + writer->Close(); + PARQUET_ASSIGN_OR_THROW(auto buf, sink->Finish()); + state->counters["file_size"] = static_cast(buf->size()); + // Note that "data_size" includes the Thrift page headers + state->counters["data_size"] = static_cast(data_size); + return buf; + } + + void ReadFile(std::shared_ptr contents) { + auto source = std::make_shared(contents); + ReaderProperties props; + auto reader = ParquetFileReader::Open(source, props); + auto metadata = reader->metadata(); + ARROW_CHECK_EQ(metadata->num_columns(), num_columns_); + ARROW_CHECK_EQ(metadata->num_row_groups(), num_row_groups_); + // There should be one row per row group + ARROW_CHECK_EQ(metadata->num_rows(), num_row_groups_); + reader->Close(); + } + + private: + int num_columns_; + int num_row_groups_; + std::shared_ptr schema_root_; + std::shared_ptr writer_properties_; +}; + +void WriteMetadataSetArgs(benchmark::internal::Benchmark* bench) { + bench->ArgNames({"num_columns", "num_row_groups"}); + + for (int num_columns : {1, 10, 100}) { + for (int num_row_groups : {1, 100, 1000}) { + bench->Args({num_columns, num_row_groups}); + } + } + /* For larger num_columns, restrict num_row_groups to small values + * to avoid blowing up benchmark execution time. + */ + for (int num_row_groups : {1, 100}) { + bench->Args({/*num_columns=*/1000, num_row_groups}); + } +} + +void ReadMetadataSetArgs(benchmark::internal::Benchmark* bench) { + WriteMetadataSetArgs(bench); +} + +void WriteFileMetadataAndData(benchmark::State& state) { + MetadataBenchmark benchmark(&state); + + for (auto _ : state) { + auto sink = benchmark.WriteFile(&state); + } + state.SetItemsProcessed(state.iterations()); +} + +void ReadFileMetadata(benchmark::State& state) { + MetadataBenchmark benchmark(&state); + auto contents = benchmark.WriteFile(&state); + + for (auto _ : state) { + benchmark.ReadFile(contents); + } + state.SetItemsProcessed(state.iterations()); +} + +BENCHMARK(WriteFileMetadataAndData)->Apply(WriteMetadataSetArgs); +BENCHMARK(ReadFileMetadata)->Apply(ReadMetadataSetArgs); + +} // namespace parquet From 8967ddc6d98584c88702c65763f33ec5f02324f7 Mon Sep 17 00:00:00 2001 From: mwish Date: Wed, 22 May 2024 23:08:15 +0800 Subject: [PATCH 146/261] GH-41726: [C++][Parquet] Minor: moving EncodedStats by default rather than copying (#41727) ### Rationale for this change Moving EncodedStats because it holds two std::string. This could benifit for non-SSO optimized data for FLBA/String statistics ( It seems to be useless for SSO optimized data?) ### What changes are included in this PR? 1. construct `EncodedStats` by `std::move` 2. Making uncompressing size checking ahead of compressing ### Are these changes tested? Covered by existing. ### Are there any user-facing changes? No * GitHub Issue: #41726 Authored-by: mwish Signed-off-by: mwish --- cpp/src/parquet/arrow/writer.cc | 4 ++-- cpp/src/parquet/column_page.h | 12 ++++++------ cpp/src/parquet/column_reader.cc | 12 ++++++------ cpp/src/parquet/column_writer.cc | 14 +++++++------- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/cpp/src/parquet/arrow/writer.cc b/cpp/src/parquet/arrow/writer.cc index 5238986c428d3..bd6f542d11c72 100644 --- a/cpp/src/parquet/arrow/writer.cc +++ b/cpp/src/parquet/arrow/writer.cc @@ -547,8 +547,8 @@ Status GetSchemaMetadata(const ::arrow::Schema& schema, ::arrow::MemoryPool* poo // The serialized schema is not UTF-8, which is required for Thrift std::string schema_as_string = serialized->ToString(); std::string schema_base64 = ::arrow::util::base64_encode(schema_as_string); - result->Append(kArrowSchemaKey, schema_base64); - *out = result; + result->Append(kArrowSchemaKey, std::move(schema_base64)); + *out = std::move(result); return Status::OK(); } diff --git a/cpp/src/parquet/column_page.h b/cpp/src/parquet/column_page.h index 905f805b8c9cc..b389ffd98e6c7 100644 --- a/cpp/src/parquet/column_page.h +++ b/cpp/src/parquet/column_page.h @@ -75,13 +75,13 @@ class DataPage : public Page { protected: DataPage(PageType::type type, const std::shared_ptr& buffer, int32_t num_values, Encoding::type encoding, int64_t uncompressed_size, - const EncodedStatistics& statistics = EncodedStatistics(), + EncodedStatistics statistics = EncodedStatistics(), std::optional first_row_index = std::nullopt) : Page(buffer, type), num_values_(num_values), encoding_(encoding), uncompressed_size_(uncompressed_size), - statistics_(statistics), + statistics_(std::move(statistics)), first_row_index_(std::move(first_row_index)) {} int32_t num_values_; @@ -97,10 +97,10 @@ class DataPageV1 : public DataPage { DataPageV1(const std::shared_ptr& buffer, int32_t num_values, Encoding::type encoding, Encoding::type definition_level_encoding, Encoding::type repetition_level_encoding, int64_t uncompressed_size, - const EncodedStatistics& statistics = EncodedStatistics(), + EncodedStatistics statistics = EncodedStatistics(), std::optional first_row_index = std::nullopt) : DataPage(PageType::DATA_PAGE, buffer, num_values, encoding, uncompressed_size, - statistics, std::move(first_row_index)), + std::move(statistics), std::move(first_row_index)), definition_level_encoding_(definition_level_encoding), repetition_level_encoding_(repetition_level_encoding) {} @@ -119,10 +119,10 @@ class DataPageV2 : public DataPage { int32_t num_rows, Encoding::type encoding, int32_t definition_levels_byte_length, int32_t repetition_levels_byte_length, int64_t uncompressed_size, bool is_compressed = false, - const EncodedStatistics& statistics = EncodedStatistics(), + EncodedStatistics statistics = EncodedStatistics(), std::optional first_row_index = std::nullopt) : DataPage(PageType::DATA_PAGE_V2, buffer, num_values, encoding, uncompressed_size, - statistics, std::move(first_row_index)), + std::move(statistics), std::move(first_row_index)), num_nulls_(num_nulls), num_rows_(num_rows), definition_levels_byte_length_(definition_levels_byte_length), diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index cfd2fea3746f4..407201a89ef08 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -538,11 +538,11 @@ std::shared_ptr SerializedPageReader::NextPage() { page_buffer = DecompressIfNeeded(std::move(page_buffer), compressed_len, uncompressed_len); - return std::make_shared(page_buffer, header.num_values, - LoadEnumSafe(&header.encoding), - LoadEnumSafe(&header.definition_level_encoding), - LoadEnumSafe(&header.repetition_level_encoding), - uncompressed_len, data_page_statistics); + return std::make_shared( + page_buffer, header.num_values, LoadEnumSafe(&header.encoding), + LoadEnumSafe(&header.definition_level_encoding), + LoadEnumSafe(&header.repetition_level_encoding), uncompressed_len, + std::move(data_page_statistics)); } else if (page_type == PageType::DATA_PAGE_V2) { ++page_ordinal_; const format::DataPageHeaderV2& header = current_page_header_.data_page_header_v2; @@ -569,7 +569,7 @@ std::shared_ptr SerializedPageReader::NextPage() { page_buffer, header.num_values, header.num_nulls, header.num_rows, LoadEnumSafe(&header.encoding), header.definition_levels_byte_length, header.repetition_levels_byte_length, uncompressed_len, is_compressed, - data_page_statistics); + std::move(data_page_statistics)); } else { throw ParquetException( "Internal error, we have already skipped non-data pages in ShouldSkipPage()"); diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index eae8fc6125499..9059cd1641745 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -379,6 +379,11 @@ class SerializedPageWriter : public PageWriter { int64_t WriteDataPage(const DataPage& page) override { const int64_t uncompressed_size = page.uncompressed_size(); + if (uncompressed_size > std::numeric_limits::max()) { + throw ParquetException("Uncompressed data page size overflows INT32_MAX. Size:", + uncompressed_size); + } + std::shared_ptr compressed_data = page.buffer(); const uint8_t* output_data_buffer = compressed_data->data(); int64_t output_data_len = compressed_data->size(); @@ -399,11 +404,6 @@ class SerializedPageWriter : public PageWriter { } format::PageHeader page_header; - - if (uncompressed_size > std::numeric_limits::max()) { - throw ParquetException("Uncompressed data page size overflows INT32_MAX. Size:", - uncompressed_size); - } page_header.__set_uncompressed_page_size(static_cast(uncompressed_size)); page_header.__set_compressed_page_size(static_cast(output_data_len)); @@ -1018,13 +1018,13 @@ void ColumnWriterImpl::BuildDataPageV1(int64_t definition_levels_rle_size, compressed_data->CopySlice(0, compressed_data->size(), allocator_)); std::unique_ptr page_ptr = std::make_unique( compressed_data_copy, num_values, encoding_, Encoding::RLE, Encoding::RLE, - uncompressed_size, page_stats, first_row_index); + uncompressed_size, std::move(page_stats), first_row_index); total_compressed_bytes_ += page_ptr->size() + sizeof(format::PageHeader); data_pages_.push_back(std::move(page_ptr)); } else { // Eagerly write pages DataPageV1 page(compressed_data, num_values, encoding_, Encoding::RLE, Encoding::RLE, - uncompressed_size, page_stats, first_row_index); + uncompressed_size, std::move(page_stats), first_row_index); WriteDataPage(page); } } From 9ba9253e8527a7f3e2c6e47e631e278b8ca84e53 Mon Sep 17 00:00:00 2001 From: mwish Date: Thu, 23 May 2024 01:23:01 +0800 Subject: [PATCH 147/261] GH-41702: [C++][Parquet] Thrift: generate template method to accelerate reading thrift (#41703) ### Rationale for this change By default, the Thrift serializer and deserializer call many virtual functions. However, the Thrift C++ compiler has an option to generate template methods that does away with the cost of calling virtual functions. It seems to make the metadata read/write benchmarks around 10% faster. ### What changes are included in this PR? 1. `cpp/build-support/update-thrift.sh`: enable `templates` option to Thirft C++ compilerargument 2. `cpp/src/parquet/thrift_internal.h`: use generated code 3. `cpp/src/generated`: update generated files. ### Are these changes tested? Covered by existing tests. ### Are there any user-facing changes? No. * GitHub Issue: #41702 Authored-by: mwish Signed-off-by: Antoine Pitrou --- cpp/build-support/update-thrift.sh | 2 +- cpp/src/generated/parquet_types.cpp | 4759 +------------------------- cpp/src/generated/parquet_types.h | 426 ++- cpp/src/generated/parquet_types.tcc | 4867 +++++++++++++++++++++++++++ cpp/src/parquet/thrift_internal.h | 11 +- 5 files changed, 5147 insertions(+), 4918 deletions(-) create mode 100644 cpp/src/generated/parquet_types.tcc diff --git a/cpp/build-support/update-thrift.sh b/cpp/build-support/update-thrift.sh index 9b8f2539cffe3..9e050a5e49d64 100755 --- a/cpp/build-support/update-thrift.sh +++ b/cpp/build-support/update-thrift.sh @@ -20,4 +20,4 @@ # Run this from cpp/ directory. thrift is expected to be in your path -thrift --gen cpp:moveable_types -out src/generated src/parquet/parquet.thrift +thrift --gen cpp:moveable_types,templates -out src/generated src/parquet/parquet.thrift diff --git a/cpp/src/generated/parquet_types.cpp b/cpp/src/generated/parquet_types.cpp index 8932c4a4f8d19..1ba0c4626233f 100644 --- a/cpp/src/generated/parquet_types.cpp +++ b/cpp/src/generated/parquet_types.cpp @@ -640,128 +640,6 @@ std::ostream& operator<<(std::ostream& out, const SizeStatistics& obj) } -uint32_t SizeStatistics::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->unencoded_byte_array_data_bytes); - this->__isset.unencoded_byte_array_data_bytes = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->repetition_level_histogram.clear(); - uint32_t _size0; - ::apache::thrift::protocol::TType _etype3; - xfer += iprot->readListBegin(_etype3, _size0); - this->repetition_level_histogram.resize(_size0); - uint32_t _i4; - for (_i4 = 0; _i4 < _size0; ++_i4) - { - xfer += iprot->readI64(this->repetition_level_histogram[_i4]); - } - xfer += iprot->readListEnd(); - } - this->__isset.repetition_level_histogram = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->definition_level_histogram.clear(); - uint32_t _size5; - ::apache::thrift::protocol::TType _etype8; - xfer += iprot->readListBegin(_etype8, _size5); - this->definition_level_histogram.resize(_size5); - uint32_t _i9; - for (_i9 = 0; _i9 < _size5; ++_i9) - { - xfer += iprot->readI64(this->definition_level_histogram[_i9]); - } - xfer += iprot->readListEnd(); - } - this->__isset.definition_level_histogram = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t SizeStatistics::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("SizeStatistics"); - - if (this->__isset.unencoded_byte_array_data_bytes) { - xfer += oprot->writeFieldBegin("unencoded_byte_array_data_bytes", ::apache::thrift::protocol::T_I64, 1); - xfer += oprot->writeI64(this->unencoded_byte_array_data_bytes); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.repetition_level_histogram) { - xfer += oprot->writeFieldBegin("repetition_level_histogram", ::apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->repetition_level_histogram.size())); - std::vector ::const_iterator _iter10; - for (_iter10 = this->repetition_level_histogram.begin(); _iter10 != this->repetition_level_histogram.end(); ++_iter10) - { - xfer += oprot->writeI64((*_iter10)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.definition_level_histogram) { - xfer += oprot->writeFieldBegin("definition_level_histogram", ::apache::thrift::protocol::T_LIST, 3); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->definition_level_histogram.size())); - std::vector ::const_iterator _iter11; - for (_iter11 = this->definition_level_histogram.begin(); _iter11 != this->definition_level_histogram.end(); ++_iter11) - { - xfer += oprot->writeI64((*_iter11)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(SizeStatistics &a, SizeStatistics &b) { using ::std::swap; swap(a.unencoded_byte_array_data_bytes, b.unencoded_byte_array_data_bytes); @@ -856,153 +734,6 @@ std::ostream& operator<<(std::ostream& out, const Statistics& obj) } -uint32_t Statistics::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->max); - this->__isset.max = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->min); - this->__isset.min = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->null_count); - this->__isset.null_count = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->distinct_count); - this->__isset.distinct_count = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->max_value); - this->__isset.max_value = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->min_value); - this->__isset.min_value = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->is_max_value_exact); - this->__isset.is_max_value_exact = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->is_min_value_exact); - this->__isset.is_min_value_exact = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t Statistics::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("Statistics"); - - if (this->__isset.max) { - xfer += oprot->writeFieldBegin("max", ::apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeBinary(this->max); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.min) { - xfer += oprot->writeFieldBegin("min", ::apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->min); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.null_count) { - xfer += oprot->writeFieldBegin("null_count", ::apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->null_count); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.distinct_count) { - xfer += oprot->writeFieldBegin("distinct_count", ::apache::thrift::protocol::T_I64, 4); - xfer += oprot->writeI64(this->distinct_count); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.max_value) { - xfer += oprot->writeFieldBegin("max_value", ::apache::thrift::protocol::T_STRING, 5); - xfer += oprot->writeBinary(this->max_value); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.min_value) { - xfer += oprot->writeFieldBegin("min_value", ::apache::thrift::protocol::T_STRING, 6); - xfer += oprot->writeBinary(this->min_value); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.is_max_value_exact) { - xfer += oprot->writeFieldBegin("is_max_value_exact", ::apache::thrift::protocol::T_BOOL, 7); - xfer += oprot->writeBool(this->is_max_value_exact); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.is_min_value_exact) { - xfer += oprot->writeFieldBegin("is_min_value_exact", ::apache::thrift::protocol::T_BOOL, 8); - xfer += oprot->writeBool(this->is_min_value_exact); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(Statistics &a, Statistics &b) { using ::std::swap; swap(a.max, b.max); @@ -1087,44 +818,6 @@ std::ostream& operator<<(std::ostream& out, const StringType& obj) } -uint32_t StringType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t StringType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("StringType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(StringType &a, StringType &b) { using ::std::swap; (void) a; @@ -1162,44 +855,6 @@ std::ostream& operator<<(std::ostream& out, const UUIDType& obj) } -uint32_t UUIDType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t UUIDType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("UUIDType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(UUIDType &a, UUIDType &b) { using ::std::swap; (void) a; @@ -1237,44 +892,6 @@ std::ostream& operator<<(std::ostream& out, const MapType& obj) } -uint32_t MapType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t MapType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("MapType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(MapType &a, MapType &b) { using ::std::swap; (void) a; @@ -1312,44 +929,6 @@ std::ostream& operator<<(std::ostream& out, const ListType& obj) } -uint32_t ListType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t ListType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ListType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(ListType &a, ListType &b) { using ::std::swap; (void) a; @@ -1387,44 +966,6 @@ std::ostream& operator<<(std::ostream& out, const EnumType& obj) } -uint32_t EnumType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t EnumType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EnumType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(EnumType &a, EnumType &b) { using ::std::swap; (void) a; @@ -1462,44 +1003,6 @@ std::ostream& operator<<(std::ostream& out, const DateType& obj) } -uint32_t DateType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t DateType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DateType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(DateType &a, DateType &b) { using ::std::swap; (void) a; @@ -1537,44 +1040,6 @@ std::ostream& operator<<(std::ostream& out, const Float16Type& obj) } -uint32_t Float16Type::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t Float16Type::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("Float16Type"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(Float16Type &a, Float16Type &b) { using ::std::swap; (void) a; @@ -1612,44 +1077,6 @@ std::ostream& operator<<(std::ostream& out, const NullType& obj) } -uint32_t NullType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t NullType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("NullType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(NullType &a, NullType &b) { using ::std::swap; (void) a; @@ -1695,79 +1122,6 @@ std::ostream& operator<<(std::ostream& out, const DecimalType& obj) } -uint32_t DecimalType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_scale = false; - bool isset_precision = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->scale); - isset_scale = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->precision); - isset_precision = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_scale) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_precision) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t DecimalType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DecimalType"); - - xfer += oprot->writeFieldBegin("scale", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->scale); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("precision", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->precision); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(DecimalType &a, DecimalType &b) { using ::std::swap; swap(a.scale, b.scale); @@ -1811,44 +1165,6 @@ std::ostream& operator<<(std::ostream& out, const MilliSeconds& obj) } -uint32_t MilliSeconds::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t MilliSeconds::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("MilliSeconds"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(MilliSeconds &a, MilliSeconds &b) { using ::std::swap; (void) a; @@ -1886,44 +1202,6 @@ std::ostream& operator<<(std::ostream& out, const MicroSeconds& obj) } -uint32_t MicroSeconds::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t MicroSeconds::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("MicroSeconds"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(MicroSeconds &a, MicroSeconds &b) { using ::std::swap; (void) a; @@ -1961,44 +1239,6 @@ std::ostream& operator<<(std::ostream& out, const NanoSeconds& obj) } -uint32_t NanoSeconds::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t NanoSeconds::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("NanoSeconds"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(NanoSeconds &a, NanoSeconds &b) { using ::std::swap; (void) a; @@ -2051,88 +1291,6 @@ std::ostream& operator<<(std::ostream& out, const TimeUnit& obj) } -uint32_t TimeUnit::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->MILLIS.read(iprot); - this->__isset.MILLIS = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->MICROS.read(iprot); - this->__isset.MICROS = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->NANOS.read(iprot); - this->__isset.NANOS = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t TimeUnit::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TimeUnit"); - - if (this->__isset.MILLIS) { - xfer += oprot->writeFieldBegin("MILLIS", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->MILLIS.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.MICROS) { - xfer += oprot->writeFieldBegin("MICROS", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->MICROS.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.NANOS) { - xfer += oprot->writeFieldBegin("NANOS", ::apache::thrift::protocol::T_STRUCT, 3); - xfer += this->NANOS.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(TimeUnit &a, TimeUnit &b) { using ::std::swap; swap(a.MILLIS, b.MILLIS); @@ -2195,79 +1353,6 @@ std::ostream& operator<<(std::ostream& out, const TimestampType& obj) } -uint32_t TimestampType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_isAdjustedToUTC = false; - bool isset_unit = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->isAdjustedToUTC); - isset_isAdjustedToUTC = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->unit.read(iprot); - isset_unit = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_isAdjustedToUTC) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_unit) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t TimestampType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TimestampType"); - - xfer += oprot->writeFieldBegin("isAdjustedToUTC", ::apache::thrift::protocol::T_BOOL, 1); - xfer += oprot->writeBool(this->isAdjustedToUTC); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("unit", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->unit.write(oprot); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(TimestampType &a, TimestampType &b) { using ::std::swap; swap(a.isAdjustedToUTC, b.isAdjustedToUTC); @@ -2319,79 +1404,6 @@ std::ostream& operator<<(std::ostream& out, const TimeType& obj) } -uint32_t TimeType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_isAdjustedToUTC = false; - bool isset_unit = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->isAdjustedToUTC); - isset_isAdjustedToUTC = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->unit.read(iprot); - isset_unit = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_isAdjustedToUTC) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_unit) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t TimeType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TimeType"); - - xfer += oprot->writeFieldBegin("isAdjustedToUTC", ::apache::thrift::protocol::T_BOOL, 1); - xfer += oprot->writeBool(this->isAdjustedToUTC); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("unit", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->unit.write(oprot); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(TimeType &a, TimeType &b) { using ::std::swap; swap(a.isAdjustedToUTC, b.isAdjustedToUTC); @@ -2443,79 +1455,6 @@ std::ostream& operator<<(std::ostream& out, const IntType& obj) } -uint32_t IntType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_bitWidth = false; - bool isset_isSigned = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_BYTE) { - xfer += iprot->readByte(this->bitWidth); - isset_bitWidth = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->isSigned); - isset_isSigned = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_bitWidth) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_isSigned) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t IntType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("IntType"); - - xfer += oprot->writeFieldBegin("bitWidth", ::apache::thrift::protocol::T_BYTE, 1); - xfer += oprot->writeByte(this->bitWidth); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("isSigned", ::apache::thrift::protocol::T_BOOL, 2); - xfer += oprot->writeBool(this->isSigned); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(IntType &a, IntType &b) { using ::std::swap; swap(a.bitWidth, b.bitWidth); @@ -2559,44 +1498,6 @@ std::ostream& operator<<(std::ostream& out, const JsonType& obj) } -uint32_t JsonType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t JsonType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("JsonType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(JsonType &a, JsonType &b) { using ::std::swap; (void) a; @@ -2634,44 +1535,6 @@ std::ostream& operator<<(std::ostream& out, const BsonType& obj) } -uint32_t BsonType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t BsonType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("BsonType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(BsonType &a, BsonType &b) { using ::std::swap; (void) a; @@ -2779,231 +1642,6 @@ std::ostream& operator<<(std::ostream& out, const LogicalType& obj) } -uint32_t LogicalType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->STRING.read(iprot); - this->__isset.STRING = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->MAP.read(iprot); - this->__isset.MAP = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->LIST.read(iprot); - this->__isset.LIST = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->ENUM.read(iprot); - this->__isset.ENUM = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->DECIMAL.read(iprot); - this->__isset.DECIMAL = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->DATE.read(iprot); - this->__isset.DATE = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->TIME.read(iprot); - this->__isset.TIME = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->TIMESTAMP.read(iprot); - this->__isset.TIMESTAMP = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 10: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->INTEGER.read(iprot); - this->__isset.INTEGER = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 11: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->UNKNOWN.read(iprot); - this->__isset.UNKNOWN = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 12: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->JSON.read(iprot); - this->__isset.JSON = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 13: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->BSON.read(iprot); - this->__isset.BSON = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 14: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->UUID.read(iprot); - this->__isset.UUID = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 15: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->FLOAT16.read(iprot); - this->__isset.FLOAT16 = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t LogicalType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("LogicalType"); - - if (this->__isset.STRING) { - xfer += oprot->writeFieldBegin("STRING", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->STRING.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.MAP) { - xfer += oprot->writeFieldBegin("MAP", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->MAP.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.LIST) { - xfer += oprot->writeFieldBegin("LIST", ::apache::thrift::protocol::T_STRUCT, 3); - xfer += this->LIST.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.ENUM) { - xfer += oprot->writeFieldBegin("ENUM", ::apache::thrift::protocol::T_STRUCT, 4); - xfer += this->ENUM.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.DECIMAL) { - xfer += oprot->writeFieldBegin("DECIMAL", ::apache::thrift::protocol::T_STRUCT, 5); - xfer += this->DECIMAL.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.DATE) { - xfer += oprot->writeFieldBegin("DATE", ::apache::thrift::protocol::T_STRUCT, 6); - xfer += this->DATE.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.TIME) { - xfer += oprot->writeFieldBegin("TIME", ::apache::thrift::protocol::T_STRUCT, 7); - xfer += this->TIME.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.TIMESTAMP) { - xfer += oprot->writeFieldBegin("TIMESTAMP", ::apache::thrift::protocol::T_STRUCT, 8); - xfer += this->TIMESTAMP.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.INTEGER) { - xfer += oprot->writeFieldBegin("INTEGER", ::apache::thrift::protocol::T_STRUCT, 10); - xfer += this->INTEGER.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.UNKNOWN) { - xfer += oprot->writeFieldBegin("UNKNOWN", ::apache::thrift::protocol::T_STRUCT, 11); - xfer += this->UNKNOWN.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.JSON) { - xfer += oprot->writeFieldBegin("JSON", ::apache::thrift::protocol::T_STRUCT, 12); - xfer += this->JSON.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.BSON) { - xfer += oprot->writeFieldBegin("BSON", ::apache::thrift::protocol::T_STRUCT, 13); - xfer += this->BSON.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.UUID) { - xfer += oprot->writeFieldBegin("UUID", ::apache::thrift::protocol::T_STRUCT, 14); - xfer += this->UUID.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.FLOAT16) { - xfer += oprot->writeFieldBegin("FLOAT16", ::apache::thrift::protocol::T_STRUCT, 15); - xfer += this->FLOAT16.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(LogicalType &a, LogicalType &b) { using ::std::swap; swap(a.STRING, b.STRING); @@ -3173,187 +1811,6 @@ std::ostream& operator<<(std::ostream& out, const SchemaElement& obj) } -uint32_t SchemaElement::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_name = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast96; - xfer += iprot->readI32(ecast96); - this->type = static_cast(ecast96); - this->__isset.type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->type_length); - this->__isset.type_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast97; - xfer += iprot->readI32(ecast97); - this->repetition_type = static_cast(ecast97); - this->__isset.repetition_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->name); - isset_name = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_children); - this->__isset.num_children = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast98; - xfer += iprot->readI32(ecast98); - this->converted_type = static_cast(ecast98); - this->__isset.converted_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->scale); - this->__isset.scale = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->precision); - this->__isset.precision = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->field_id); - this->__isset.field_id = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 10: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->logicalType.read(iprot); - this->__isset.logicalType = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_name) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t SchemaElement::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("SchemaElement"); - - if (this->__isset.type) { - xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(static_cast(this->type)); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.type_length) { - xfer += oprot->writeFieldBegin("type_length", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->type_length); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.repetition_type) { - xfer += oprot->writeFieldBegin("repetition_type", ::apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(static_cast(this->repetition_type)); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldBegin("name", ::apache::thrift::protocol::T_STRING, 4); - xfer += oprot->writeString(this->name); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.num_children) { - xfer += oprot->writeFieldBegin("num_children", ::apache::thrift::protocol::T_I32, 5); - xfer += oprot->writeI32(this->num_children); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.converted_type) { - xfer += oprot->writeFieldBegin("converted_type", ::apache::thrift::protocol::T_I32, 6); - xfer += oprot->writeI32(static_cast(this->converted_type)); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.scale) { - xfer += oprot->writeFieldBegin("scale", ::apache::thrift::protocol::T_I32, 7); - xfer += oprot->writeI32(this->scale); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.precision) { - xfer += oprot->writeFieldBegin("precision", ::apache::thrift::protocol::T_I32, 8); - xfer += oprot->writeI32(this->precision); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.field_id) { - xfer += oprot->writeFieldBegin("field_id", ::apache::thrift::protocol::T_I32, 9); - xfer += oprot->writeI32(this->field_id); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.logicalType) { - xfer += oprot->writeFieldBegin("logicalType", ::apache::thrift::protocol::T_STRUCT, 10); - xfer += this->logicalType.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(SchemaElement &a, SchemaElement &b) { using ::std::swap; swap(a.type, b.type); @@ -3471,128 +1928,6 @@ std::ostream& operator<<(std::ostream& out, const DataPageHeader& obj) } -uint32_t DataPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_num_values = false; - bool isset_encoding = false; - bool isset_definition_level_encoding = false; - bool isset_repetition_level_encoding = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast103; - xfer += iprot->readI32(ecast103); - this->encoding = static_cast(ecast103); - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast104; - xfer += iprot->readI32(ecast104); - this->definition_level_encoding = static_cast(ecast104); - isset_definition_level_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast105; - xfer += iprot->readI32(ecast105); - this->repetition_level_encoding = static_cast(ecast105); - isset_repetition_level_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->statistics.read(iprot); - this->__isset.statistics = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_definition_level_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_repetition_level_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t DataPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DataPageHeader"); - - xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->num_values); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(static_cast(this->encoding)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("definition_level_encoding", ::apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(static_cast(this->definition_level_encoding)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("repetition_level_encoding", ::apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32(static_cast(this->repetition_level_encoding)); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.statistics) { - xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 5); - xfer += this->statistics.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(DataPageHeader &a, DataPageHeader &b) { using ::std::swap; swap(a.num_values, b.num_values); @@ -3659,44 +1994,6 @@ std::ostream& operator<<(std::ostream& out, const IndexPageHeader& obj) } -uint32_t IndexPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t IndexPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("IndexPageHeader"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(IndexPageHeader &a, IndexPageHeader &b) { using ::std::swap; (void) a; @@ -3747,94 +2044,6 @@ std::ostream& operator<<(std::ostream& out, const DictionaryPageHeader& obj) } -uint32_t DictionaryPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_num_values = false; - bool isset_encoding = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast114; - xfer += iprot->readI32(ecast114); - this->encoding = static_cast(ecast114); - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->is_sorted); - this->__isset.is_sorted = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t DictionaryPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DictionaryPageHeader"); - - xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->num_values); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(static_cast(this->encoding)); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.is_sorted) { - xfer += oprot->writeFieldBegin("is_sorted", ::apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->is_sorted); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(DictionaryPageHeader &a, DictionaryPageHeader &b) { using ::std::swap; swap(a.num_values, b.num_values); @@ -3923,167 +2132,6 @@ std::ostream& operator<<(std::ostream& out, const DataPageHeaderV2& obj) } -uint32_t DataPageHeaderV2::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_num_values = false; - bool isset_num_nulls = false; - bool isset_num_rows = false; - bool isset_encoding = false; - bool isset_definition_levels_byte_length = false; - bool isset_repetition_levels_byte_length = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_nulls); - isset_num_nulls = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_rows); - isset_num_rows = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast119; - xfer += iprot->readI32(ecast119); - this->encoding = static_cast(ecast119); - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->definition_levels_byte_length); - isset_definition_levels_byte_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->repetition_levels_byte_length); - isset_repetition_levels_byte_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->is_compressed); - this->__isset.is_compressed = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->statistics.read(iprot); - this->__isset.statistics = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_nulls) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_rows) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_definition_levels_byte_length) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_repetition_levels_byte_length) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t DataPageHeaderV2::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DataPageHeaderV2"); - - xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->num_values); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("num_nulls", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->num_nulls); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(this->num_rows); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32(static_cast(this->encoding)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("definition_levels_byte_length", ::apache::thrift::protocol::T_I32, 5); - xfer += oprot->writeI32(this->definition_levels_byte_length); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("repetition_levels_byte_length", ::apache::thrift::protocol::T_I32, 6); - xfer += oprot->writeI32(this->repetition_levels_byte_length); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.is_compressed) { - xfer += oprot->writeFieldBegin("is_compressed", ::apache::thrift::protocol::T_BOOL, 7); - xfer += oprot->writeBool(this->is_compressed); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.statistics) { - xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 8); - xfer += this->statistics.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b) { using ::std::swap; swap(a.num_values, b.num_values); @@ -4168,44 +2216,6 @@ std::ostream& operator<<(std::ostream& out, const SplitBlockAlgorithm& obj) } -uint32_t SplitBlockAlgorithm::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t SplitBlockAlgorithm::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("SplitBlockAlgorithm"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(SplitBlockAlgorithm &a, SplitBlockAlgorithm &b) { using ::std::swap; (void) a; @@ -4248,62 +2258,6 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterAlgorithm& obj) } -uint32_t BloomFilterAlgorithm::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->BLOCK.read(iprot); - this->__isset.BLOCK = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t BloomFilterAlgorithm::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("BloomFilterAlgorithm"); - - if (this->__isset.BLOCK) { - xfer += oprot->writeFieldBegin("BLOCK", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->BLOCK.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(BloomFilterAlgorithm &a, BloomFilterAlgorithm &b) { using ::std::swap; swap(a.BLOCK, b.BLOCK); @@ -4346,44 +2300,6 @@ std::ostream& operator<<(std::ostream& out, const XxHash& obj) } -uint32_t XxHash::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t XxHash::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("XxHash"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(XxHash &a, XxHash &b) { using ::std::swap; (void) a; @@ -4426,62 +2342,6 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterHash& obj) } -uint32_t BloomFilterHash::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->XXHASH.read(iprot); - this->__isset.XXHASH = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t BloomFilterHash::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("BloomFilterHash"); - - if (this->__isset.XXHASH) { - xfer += oprot->writeFieldBegin("XXHASH", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->XXHASH.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(BloomFilterHash &a, BloomFilterHash &b) { using ::std::swap; swap(a.XXHASH, b.XXHASH); @@ -4511,57 +2371,19 @@ void BloomFilterHash::printTo(std::ostream& out) const { out << "BloomFilterHash("; out << "XXHASH="; (__isset.XXHASH ? (out << to_string(XXHASH)) : (out << "")); out << ")"; -} - - -Uncompressed::~Uncompressed() noexcept { -} - -std::ostream& operator<<(std::ostream& out, const Uncompressed& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t Uncompressed::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } +} - xfer += iprot->readStructEnd(); - return xfer; +Uncompressed::~Uncompressed() noexcept { } -uint32_t Uncompressed::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("Uncompressed"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; +std::ostream& operator<<(std::ostream& out, const Uncompressed& obj) +{ + obj.printTo(out); + return out; } + void swap(Uncompressed &a, Uncompressed &b) { using ::std::swap; (void) a; @@ -4604,62 +2426,6 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterCompression& obj) } -uint32_t BloomFilterCompression::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->UNCOMPRESSED.read(iprot); - this->__isset.UNCOMPRESSED = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t BloomFilterCompression::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("BloomFilterCompression"); - - if (this->__isset.UNCOMPRESSED) { - xfer += oprot->writeFieldBegin("UNCOMPRESSED", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->UNCOMPRESSED.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(BloomFilterCompression &a, BloomFilterCompression &b) { using ::std::swap; swap(a.UNCOMPRESSED, b.UNCOMPRESSED); @@ -4718,109 +2484,6 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterHeader& obj) } -uint32_t BloomFilterHeader::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_numBytes = false; - bool isset_algorithm = false; - bool isset_hash = false; - bool isset_compression = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->numBytes); - isset_numBytes = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->algorithm.read(iprot); - isset_algorithm = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->hash.read(iprot); - isset_hash = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->compression.read(iprot); - isset_compression = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_numBytes) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_algorithm) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_hash) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_compression) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t BloomFilterHeader::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("BloomFilterHeader"); - - xfer += oprot->writeFieldBegin("numBytes", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->numBytes); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("algorithm", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->algorithm.write(oprot); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("hash", ::apache::thrift::protocol::T_STRUCT, 3); - xfer += this->hash.write(oprot); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("compression", ::apache::thrift::protocol::T_STRUCT, 4); - xfer += this->compression.write(oprot); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(BloomFilterHeader &a, BloomFilterHeader &b) { using ::std::swap; swap(a.numBytes, b.numBytes); @@ -4913,161 +2576,6 @@ std::ostream& operator<<(std::ostream& out, const PageHeader& obj) } -uint32_t PageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_type = false; - bool isset_uncompressed_page_size = false; - bool isset_compressed_page_size = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast152; - xfer += iprot->readI32(ecast152); - this->type = static_cast(ecast152); - isset_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->uncompressed_page_size); - isset_uncompressed_page_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->compressed_page_size); - isset_compressed_page_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->crc); - this->__isset.crc = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->data_page_header.read(iprot); - this->__isset.data_page_header = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->index_page_header.read(iprot); - this->__isset.index_page_header = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->dictionary_page_header.read(iprot); - this->__isset.dictionary_page_header = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->data_page_header_v2.read(iprot); - this->__isset.data_page_header_v2 = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_type) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_uncompressed_page_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_compressed_page_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t PageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("PageHeader"); - - xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(static_cast(this->type)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("uncompressed_page_size", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->uncompressed_page_size); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("compressed_page_size", ::apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(this->compressed_page_size); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.crc) { - xfer += oprot->writeFieldBegin("crc", ::apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32(this->crc); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.data_page_header) { - xfer += oprot->writeFieldBegin("data_page_header", ::apache::thrift::protocol::T_STRUCT, 5); - xfer += this->data_page_header.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.index_page_header) { - xfer += oprot->writeFieldBegin("index_page_header", ::apache::thrift::protocol::T_STRUCT, 6); - xfer += this->index_page_header.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.dictionary_page_header) { - xfer += oprot->writeFieldBegin("dictionary_page_header", ::apache::thrift::protocol::T_STRUCT, 7); - xfer += this->dictionary_page_header.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.data_page_header_v2) { - xfer += oprot->writeFieldBegin("data_page_header_v2", ::apache::thrift::protocol::T_STRUCT, 8); - xfer += this->data_page_header_v2.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(PageHeader &a, PageHeader &b) { using ::std::swap; swap(a.type, b.type); @@ -5161,77 +2669,6 @@ std::ostream& operator<<(std::ostream& out, const KeyValue& obj) } -uint32_t KeyValue::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_key = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->key); - isset_key = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->value); - this->__isset.value = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_key) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t KeyValue::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("KeyValue"); - - xfer += oprot->writeFieldBegin("key", ::apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeString(this->key); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.value) { - xfer += oprot->writeFieldBegin("value", ::apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeString(this->value); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(KeyValue &a, KeyValue &b) { using ::std::swap; swap(a.key, b.key); @@ -5292,94 +2729,6 @@ std::ostream& operator<<(std::ostream& out, const SortingColumn& obj) } -uint32_t SortingColumn::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_column_idx = false; - bool isset_descending = false; - bool isset_nulls_first = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->column_idx); - isset_column_idx = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->descending); - isset_descending = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->nulls_first); - isset_nulls_first = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_column_idx) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_descending) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_nulls_first) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t SortingColumn::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("SortingColumn"); - - xfer += oprot->writeFieldBegin("column_idx", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->column_idx); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("descending", ::apache::thrift::protocol::T_BOOL, 2); - xfer += oprot->writeBool(this->descending); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("nulls_first", ::apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->nulls_first); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(SortingColumn &a, SortingColumn &b) { using ::std::swap; swap(a.column_idx, b.column_idx); @@ -5441,98 +2790,6 @@ std::ostream& operator<<(std::ostream& out, const PageEncodingStats& obj) } -uint32_t PageEncodingStats::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_page_type = false; - bool isset_encoding = false; - bool isset_count = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast165; - xfer += iprot->readI32(ecast165); - this->page_type = static_cast(ecast165); - isset_page_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast166; - xfer += iprot->readI32(ecast166); - this->encoding = static_cast(ecast166); - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->count); - isset_count = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_page_type) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_count) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t PageEncodingStats::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("PageEncodingStats"); - - xfer += oprot->writeFieldBegin("page_type", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(static_cast(this->page_type)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(static_cast(this->encoding)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("count", ::apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(this->count); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(PageEncodingStats &a, PageEncodingStats &b) { using ::std::swap; swap(a.page_type, b.page_type); @@ -5654,359 +2911,6 @@ std::ostream& operator<<(std::ostream& out, const ColumnMetaData& obj) } -uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_type = false; - bool isset_encodings = false; - bool isset_path_in_schema = false; - bool isset_codec = false; - bool isset_num_values = false; - bool isset_total_uncompressed_size = false; - bool isset_total_compressed_size = false; - bool isset_data_page_offset = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast171; - xfer += iprot->readI32(ecast171); - this->type = static_cast(ecast171); - isset_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->encodings.clear(); - uint32_t _size172; - ::apache::thrift::protocol::TType _etype175; - xfer += iprot->readListBegin(_etype175, _size172); - this->encodings.resize(_size172); - uint32_t _i176; - for (_i176 = 0; _i176 < _size172; ++_i176) - { - int32_t ecast177; - xfer += iprot->readI32(ecast177); - this->encodings[_i176] = static_cast(ecast177); - } - xfer += iprot->readListEnd(); - } - isset_encodings = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->path_in_schema.clear(); - uint32_t _size178; - ::apache::thrift::protocol::TType _etype181; - xfer += iprot->readListBegin(_etype181, _size178); - this->path_in_schema.resize(_size178); - uint32_t _i182; - for (_i182 = 0; _i182 < _size178; ++_i182) - { - xfer += iprot->readString(this->path_in_schema[_i182]); - } - xfer += iprot->readListEnd(); - } - isset_path_in_schema = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast183; - xfer += iprot->readI32(ecast183); - this->codec = static_cast(ecast183); - isset_codec = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_uncompressed_size); - isset_total_uncompressed_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_compressed_size); - isset_total_compressed_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->key_value_metadata.clear(); - uint32_t _size184; - ::apache::thrift::protocol::TType _etype187; - xfer += iprot->readListBegin(_etype187, _size184); - this->key_value_metadata.resize(_size184); - uint32_t _i188; - for (_i188 = 0; _i188 < _size184; ++_i188) - { - xfer += this->key_value_metadata[_i188].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.key_value_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->data_page_offset); - isset_data_page_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 10: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->index_page_offset); - this->__isset.index_page_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 11: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->dictionary_page_offset); - this->__isset.dictionary_page_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 12: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->statistics.read(iprot); - this->__isset.statistics = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 13: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->encoding_stats.clear(); - uint32_t _size189; - ::apache::thrift::protocol::TType _etype192; - xfer += iprot->readListBegin(_etype192, _size189); - this->encoding_stats.resize(_size189); - uint32_t _i193; - for (_i193 = 0; _i193 < _size189; ++_i193) - { - xfer += this->encoding_stats[_i193].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.encoding_stats = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 14: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->bloom_filter_offset); - this->__isset.bloom_filter_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 15: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->bloom_filter_length); - this->__isset.bloom_filter_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 16: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->size_statistics.read(iprot); - this->__isset.size_statistics = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_type) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encodings) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_path_in_schema) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_codec) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_total_uncompressed_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_total_compressed_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_data_page_offset) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t ColumnMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnMetaData"); - - xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(static_cast(this->type)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("encodings", ::apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I32, static_cast(this->encodings.size())); - std::vector ::const_iterator _iter194; - for (_iter194 = this->encodings.begin(); _iter194 != this->encodings.end(); ++_iter194) - { - xfer += oprot->writeI32(static_cast((*_iter194))); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 3); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); - std::vector ::const_iterator _iter195; - for (_iter195 = this->path_in_schema.begin(); _iter195 != this->path_in_schema.end(); ++_iter195) - { - xfer += oprot->writeString((*_iter195)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("codec", ::apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32(static_cast(this->codec)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I64, 5); - xfer += oprot->writeI64(this->num_values); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("total_uncompressed_size", ::apache::thrift::protocol::T_I64, 6); - xfer += oprot->writeI64(this->total_uncompressed_size); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("total_compressed_size", ::apache::thrift::protocol::T_I64, 7); - xfer += oprot->writeI64(this->total_compressed_size); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.key_value_metadata) { - xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 8); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); - std::vector ::const_iterator _iter196; - for (_iter196 = this->key_value_metadata.begin(); _iter196 != this->key_value_metadata.end(); ++_iter196) - { - xfer += (*_iter196).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldBegin("data_page_offset", ::apache::thrift::protocol::T_I64, 9); - xfer += oprot->writeI64(this->data_page_offset); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.index_page_offset) { - xfer += oprot->writeFieldBegin("index_page_offset", ::apache::thrift::protocol::T_I64, 10); - xfer += oprot->writeI64(this->index_page_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.dictionary_page_offset) { - xfer += oprot->writeFieldBegin("dictionary_page_offset", ::apache::thrift::protocol::T_I64, 11); - xfer += oprot->writeI64(this->dictionary_page_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.statistics) { - xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 12); - xfer += this->statistics.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.encoding_stats) { - xfer += oprot->writeFieldBegin("encoding_stats", ::apache::thrift::protocol::T_LIST, 13); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->encoding_stats.size())); - std::vector ::const_iterator _iter197; - for (_iter197 = this->encoding_stats.begin(); _iter197 != this->encoding_stats.end(); ++_iter197) - { - xfer += (*_iter197).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.bloom_filter_offset) { - xfer += oprot->writeFieldBegin("bloom_filter_offset", ::apache::thrift::protocol::T_I64, 14); - xfer += oprot->writeI64(this->bloom_filter_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.bloom_filter_length) { - xfer += oprot->writeFieldBegin("bloom_filter_length", ::apache::thrift::protocol::T_I32, 15); - xfer += oprot->writeI32(this->bloom_filter_length); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.size_statistics) { - xfer += oprot->writeFieldBegin("size_statistics", ::apache::thrift::protocol::T_STRUCT, 16); - xfer += this->size_statistics.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(ColumnMetaData &a, ColumnMetaData &b) { using ::std::swap; swap(a.type, b.type); @@ -6139,44 +3043,6 @@ std::ostream& operator<<(std::ostream& out, const EncryptionWithFooterKey& obj) } -uint32_t EncryptionWithFooterKey::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t EncryptionWithFooterKey::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EncryptionWithFooterKey"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(EncryptionWithFooterKey &a, EncryptionWithFooterKey &b) { using ::std::swap; (void) a; @@ -6223,97 +3089,6 @@ std::ostream& operator<<(std::ostream& out, const EncryptionWithColumnKey& obj) } -uint32_t EncryptionWithColumnKey::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_path_in_schema = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->path_in_schema.clear(); - uint32_t _size206; - ::apache::thrift::protocol::TType _etype209; - xfer += iprot->readListBegin(_etype209, _size206); - this->path_in_schema.resize(_size206); - uint32_t _i210; - for (_i210 = 0; _i210 < _size206; ++_i210) - { - xfer += iprot->readString(this->path_in_schema[_i210]); - } - xfer += iprot->readListEnd(); - } - isset_path_in_schema = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->key_metadata); - this->__isset.key_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_path_in_schema) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t EncryptionWithColumnKey::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EncryptionWithColumnKey"); - - xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); - std::vector ::const_iterator _iter211; - for (_iter211 = this->path_in_schema.begin(); _iter211 != this->path_in_schema.end(); ++_iter211) - { - xfer += oprot->writeString((*_iter211)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - if (this->__isset.key_metadata) { - xfer += oprot->writeFieldBegin("key_metadata", ::apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->key_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(EncryptionWithColumnKey &a, EncryptionWithColumnKey &b) { using ::std::swap; swap(a.path_in_schema, b.path_in_schema); @@ -6372,75 +3147,6 @@ std::ostream& operator<<(std::ostream& out, const ColumnCryptoMetaData& obj) } -uint32_t ColumnCryptoMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->ENCRYPTION_WITH_FOOTER_KEY.read(iprot); - this->__isset.ENCRYPTION_WITH_FOOTER_KEY = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->ENCRYPTION_WITH_COLUMN_KEY.read(iprot); - this->__isset.ENCRYPTION_WITH_COLUMN_KEY = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t ColumnCryptoMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnCryptoMetaData"); - - if (this->__isset.ENCRYPTION_WITH_FOOTER_KEY) { - xfer += oprot->writeFieldBegin("ENCRYPTION_WITH_FOOTER_KEY", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->ENCRYPTION_WITH_FOOTER_KEY.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.ENCRYPTION_WITH_COLUMN_KEY) { - xfer += oprot->writeFieldBegin("ENCRYPTION_WITH_COLUMN_KEY", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->ENCRYPTION_WITH_COLUMN_KEY.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(ColumnCryptoMetaData &a, ColumnCryptoMetaData &b) { using ::std::swap; swap(a.ENCRYPTION_WITH_FOOTER_KEY, b.ENCRYPTION_WITH_FOOTER_KEY); @@ -6533,168 +3239,6 @@ std::ostream& operator<<(std::ostream& out, const ColumnChunk& obj) } -uint32_t ColumnChunk::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_file_offset = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->file_path); - this->__isset.file_path = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->file_offset); - isset_file_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->meta_data.read(iprot); - this->__isset.meta_data = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->offset_index_offset); - this->__isset.offset_index_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->offset_index_length); - this->__isset.offset_index_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->column_index_offset); - this->__isset.column_index_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->column_index_length); - this->__isset.column_index_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->crypto_metadata.read(iprot); - this->__isset.crypto_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->encrypted_column_metadata); - this->__isset.encrypted_column_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_file_offset) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t ColumnChunk::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnChunk"); - - if (this->__isset.file_path) { - xfer += oprot->writeFieldBegin("file_path", ::apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeString(this->file_path); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldBegin("file_offset", ::apache::thrift::protocol::T_I64, 2); - xfer += oprot->writeI64(this->file_offset); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.meta_data) { - xfer += oprot->writeFieldBegin("meta_data", ::apache::thrift::protocol::T_STRUCT, 3); - xfer += this->meta_data.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.offset_index_offset) { - xfer += oprot->writeFieldBegin("offset_index_offset", ::apache::thrift::protocol::T_I64, 4); - xfer += oprot->writeI64(this->offset_index_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.offset_index_length) { - xfer += oprot->writeFieldBegin("offset_index_length", ::apache::thrift::protocol::T_I32, 5); - xfer += oprot->writeI32(this->offset_index_length); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.column_index_offset) { - xfer += oprot->writeFieldBegin("column_index_offset", ::apache::thrift::protocol::T_I64, 6); - xfer += oprot->writeI64(this->column_index_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.column_index_length) { - xfer += oprot->writeFieldBegin("column_index_length", ::apache::thrift::protocol::T_I32, 7); - xfer += oprot->writeI32(this->column_index_length); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.crypto_metadata) { - xfer += oprot->writeFieldBegin("crypto_metadata", ::apache::thrift::protocol::T_STRUCT, 8); - xfer += this->crypto_metadata.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.encrypted_column_metadata) { - xfer += oprot->writeFieldBegin("encrypted_column_metadata", ::apache::thrift::protocol::T_STRING, 9); - xfer += oprot->writeBinary(this->encrypted_column_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(ColumnChunk &a, ColumnChunk &b) { using ::std::swap; swap(a.file_path, b.file_path); @@ -6817,186 +3361,6 @@ std::ostream& operator<<(std::ostream& out, const RowGroup& obj) } -uint32_t RowGroup::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_columns = false; - bool isset_total_byte_size = false; - bool isset_num_rows = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->columns.clear(); - uint32_t _size224; - ::apache::thrift::protocol::TType _etype227; - xfer += iprot->readListBegin(_etype227, _size224); - this->columns.resize(_size224); - uint32_t _i228; - for (_i228 = 0; _i228 < _size224; ++_i228) - { - xfer += this->columns[_i228].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_columns = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_byte_size); - isset_total_byte_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->num_rows); - isset_num_rows = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->sorting_columns.clear(); - uint32_t _size229; - ::apache::thrift::protocol::TType _etype232; - xfer += iprot->readListBegin(_etype232, _size229); - this->sorting_columns.resize(_size229); - uint32_t _i233; - for (_i233 = 0; _i233 < _size229; ++_i233) - { - xfer += this->sorting_columns[_i233].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.sorting_columns = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->file_offset); - this->__isset.file_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_compressed_size); - this->__isset.total_compressed_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_I16) { - xfer += iprot->readI16(this->ordinal); - this->__isset.ordinal = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_columns) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_total_byte_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_rows) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t RowGroup::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("RowGroup"); - - xfer += oprot->writeFieldBegin("columns", ::apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->columns.size())); - std::vector ::const_iterator _iter234; - for (_iter234 = this->columns.begin(); _iter234 != this->columns.end(); ++_iter234) - { - xfer += (*_iter234).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("total_byte_size", ::apache::thrift::protocol::T_I64, 2); - xfer += oprot->writeI64(this->total_byte_size); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->num_rows); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.sorting_columns) { - xfer += oprot->writeFieldBegin("sorting_columns", ::apache::thrift::protocol::T_LIST, 4); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->sorting_columns.size())); - std::vector ::const_iterator _iter235; - for (_iter235 = this->sorting_columns.begin(); _iter235 != this->sorting_columns.end(); ++_iter235) - { - xfer += (*_iter235).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.file_offset) { - xfer += oprot->writeFieldBegin("file_offset", ::apache::thrift::protocol::T_I64, 5); - xfer += oprot->writeI64(this->file_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.total_compressed_size) { - xfer += oprot->writeFieldBegin("total_compressed_size", ::apache::thrift::protocol::T_I64, 6); - xfer += oprot->writeI64(this->total_compressed_size); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.ordinal) { - xfer += oprot->writeFieldBegin("ordinal", ::apache::thrift::protocol::T_I16, 7); - xfer += oprot->writeI16(this->ordinal); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(RowGroup &a, RowGroup &b) { using ::std::swap; swap(a.columns, b.columns); @@ -7075,44 +3439,6 @@ std::ostream& operator<<(std::ostream& out, const TypeDefinedOrder& obj) } -uint32_t TypeDefinedOrder::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t TypeDefinedOrder::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TypeDefinedOrder"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(TypeDefinedOrder &a, TypeDefinedOrder &b) { using ::std::swap; (void) a; @@ -7155,62 +3481,6 @@ std::ostream& operator<<(std::ostream& out, const ColumnOrder& obj) } -uint32_t ColumnOrder::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->TYPE_ORDER.read(iprot); - this->__isset.TYPE_ORDER = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t ColumnOrder::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnOrder"); - - if (this->__isset.TYPE_ORDER) { - xfer += oprot->writeFieldBegin("TYPE_ORDER", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->TYPE_ORDER.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(ColumnOrder &a, ColumnOrder &b) { using ::std::swap; swap(a.TYPE_ORDER, b.TYPE_ORDER); @@ -7265,94 +3535,6 @@ std::ostream& operator<<(std::ostream& out, const PageLocation& obj) } -uint32_t PageLocation::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_offset = false; - bool isset_compressed_page_size = false; - bool isset_first_row_index = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->offset); - isset_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->compressed_page_size); - isset_compressed_page_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->first_row_index); - isset_first_row_index = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_offset) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_compressed_page_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_first_row_index) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t PageLocation::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("PageLocation"); - - xfer += oprot->writeFieldBegin("offset", ::apache::thrift::protocol::T_I64, 1); - xfer += oprot->writeI64(this->offset); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("compressed_page_size", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->compressed_page_size); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("first_row_index", ::apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->first_row_index); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(PageLocation &a, PageLocation &b) { using ::std::swap; swap(a.offset, b.offset); @@ -7411,117 +3593,6 @@ std::ostream& operator<<(std::ostream& out, const OffsetIndex& obj) } -uint32_t OffsetIndex::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_page_locations = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->page_locations.clear(); - uint32_t _size252; - ::apache::thrift::protocol::TType _etype255; - xfer += iprot->readListBegin(_etype255, _size252); - this->page_locations.resize(_size252); - uint32_t _i256; - for (_i256 = 0; _i256 < _size252; ++_i256) - { - xfer += this->page_locations[_i256].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_page_locations = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->unencoded_byte_array_data_bytes.clear(); - uint32_t _size257; - ::apache::thrift::protocol::TType _etype260; - xfer += iprot->readListBegin(_etype260, _size257); - this->unencoded_byte_array_data_bytes.resize(_size257); - uint32_t _i261; - for (_i261 = 0; _i261 < _size257; ++_i261) - { - xfer += iprot->readI64(this->unencoded_byte_array_data_bytes[_i261]); - } - xfer += iprot->readListEnd(); - } - this->__isset.unencoded_byte_array_data_bytes = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_page_locations) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t OffsetIndex::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("OffsetIndex"); - - xfer += oprot->writeFieldBegin("page_locations", ::apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->page_locations.size())); - std::vector ::const_iterator _iter262; - for (_iter262 = this->page_locations.begin(); _iter262 != this->page_locations.end(); ++_iter262) - { - xfer += (*_iter262).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - if (this->__isset.unencoded_byte_array_data_bytes) { - xfer += oprot->writeFieldBegin("unencoded_byte_array_data_bytes", ::apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->unencoded_byte_array_data_bytes.size())); - std::vector ::const_iterator _iter263; - for (_iter263 = this->unencoded_byte_array_data_bytes.begin(); _iter263 != this->unencoded_byte_array_data_bytes.end(); ++_iter263) - { - xfer += oprot->writeI64((*_iter263)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(OffsetIndex &a, OffsetIndex &b) { using ::std::swap; swap(a.page_locations, b.page_locations); @@ -7601,270 +3672,6 @@ std::ostream& operator<<(std::ostream& out, const ColumnIndex& obj) } -uint32_t ColumnIndex::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_null_pages = false; - bool isset_min_values = false; - bool isset_max_values = false; - bool isset_boundary_order = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->null_pages.clear(); - uint32_t _size268; - ::apache::thrift::protocol::TType _etype271; - xfer += iprot->readListBegin(_etype271, _size268); - this->null_pages.resize(_size268); - uint32_t _i272; - for (_i272 = 0; _i272 < _size268; ++_i272) - { - xfer += iprot->readBool(this->null_pages[_i272]); - } - xfer += iprot->readListEnd(); - } - isset_null_pages = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->min_values.clear(); - uint32_t _size273; - ::apache::thrift::protocol::TType _etype276; - xfer += iprot->readListBegin(_etype276, _size273); - this->min_values.resize(_size273); - uint32_t _i277; - for (_i277 = 0; _i277 < _size273; ++_i277) - { - xfer += iprot->readBinary(this->min_values[_i277]); - } - xfer += iprot->readListEnd(); - } - isset_min_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->max_values.clear(); - uint32_t _size278; - ::apache::thrift::protocol::TType _etype281; - xfer += iprot->readListBegin(_etype281, _size278); - this->max_values.resize(_size278); - uint32_t _i282; - for (_i282 = 0; _i282 < _size278; ++_i282) - { - xfer += iprot->readBinary(this->max_values[_i282]); - } - xfer += iprot->readListEnd(); - } - isset_max_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast283; - xfer += iprot->readI32(ecast283); - this->boundary_order = static_cast(ecast283); - isset_boundary_order = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->null_counts.clear(); - uint32_t _size284; - ::apache::thrift::protocol::TType _etype287; - xfer += iprot->readListBegin(_etype287, _size284); - this->null_counts.resize(_size284); - uint32_t _i288; - for (_i288 = 0; _i288 < _size284; ++_i288) - { - xfer += iprot->readI64(this->null_counts[_i288]); - } - xfer += iprot->readListEnd(); - } - this->__isset.null_counts = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->repetition_level_histograms.clear(); - uint32_t _size289; - ::apache::thrift::protocol::TType _etype292; - xfer += iprot->readListBegin(_etype292, _size289); - this->repetition_level_histograms.resize(_size289); - uint32_t _i293; - for (_i293 = 0; _i293 < _size289; ++_i293) - { - xfer += iprot->readI64(this->repetition_level_histograms[_i293]); - } - xfer += iprot->readListEnd(); - } - this->__isset.repetition_level_histograms = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->definition_level_histograms.clear(); - uint32_t _size294; - ::apache::thrift::protocol::TType _etype297; - xfer += iprot->readListBegin(_etype297, _size294); - this->definition_level_histograms.resize(_size294); - uint32_t _i298; - for (_i298 = 0; _i298 < _size294; ++_i298) - { - xfer += iprot->readI64(this->definition_level_histograms[_i298]); - } - xfer += iprot->readListEnd(); - } - this->__isset.definition_level_histograms = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_null_pages) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_min_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_max_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_boundary_order) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t ColumnIndex::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnIndex"); - - xfer += oprot->writeFieldBegin("null_pages", ::apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_BOOL, static_cast(this->null_pages.size())); - std::vector ::const_iterator _iter299; - for (_iter299 = this->null_pages.begin(); _iter299 != this->null_pages.end(); ++_iter299) - { - xfer += oprot->writeBool((*_iter299)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("min_values", ::apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->min_values.size())); - std::vector ::const_iterator _iter300; - for (_iter300 = this->min_values.begin(); _iter300 != this->min_values.end(); ++_iter300) - { - xfer += oprot->writeBinary((*_iter300)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("max_values", ::apache::thrift::protocol::T_LIST, 3); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->max_values.size())); - std::vector ::const_iterator _iter301; - for (_iter301 = this->max_values.begin(); _iter301 != this->max_values.end(); ++_iter301) - { - xfer += oprot->writeBinary((*_iter301)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("boundary_order", ::apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32(static_cast(this->boundary_order)); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.null_counts) { - xfer += oprot->writeFieldBegin("null_counts", ::apache::thrift::protocol::T_LIST, 5); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->null_counts.size())); - std::vector ::const_iterator _iter302; - for (_iter302 = this->null_counts.begin(); _iter302 != this->null_counts.end(); ++_iter302) - { - xfer += oprot->writeI64((*_iter302)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.repetition_level_histograms) { - xfer += oprot->writeFieldBegin("repetition_level_histograms", ::apache::thrift::protocol::T_LIST, 6); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->repetition_level_histograms.size())); - std::vector ::const_iterator _iter303; - for (_iter303 = this->repetition_level_histograms.begin(); _iter303 != this->repetition_level_histograms.end(); ++_iter303) - { - xfer += oprot->writeI64((*_iter303)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.definition_level_histograms) { - xfer += oprot->writeFieldBegin("definition_level_histograms", ::apache::thrift::protocol::T_LIST, 7); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->definition_level_histograms.size())); - std::vector ::const_iterator _iter304; - for (_iter304 = this->definition_level_histograms.begin(); _iter304 != this->definition_level_histograms.end(); ++_iter304) - { - xfer += oprot->writeI64((*_iter304)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(ColumnIndex &a, ColumnIndex &b) { using ::std::swap; swap(a.null_pages, b.null_pages); @@ -7958,88 +3765,6 @@ std::ostream& operator<<(std::ostream& out, const AesGcmV1& obj) } -uint32_t AesGcmV1::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_prefix); - this->__isset.aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_file_unique); - this->__isset.aad_file_unique = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->supply_aad_prefix); - this->__isset.supply_aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t AesGcmV1::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("AesGcmV1"); - - if (this->__isset.aad_prefix) { - xfer += oprot->writeFieldBegin("aad_prefix", ::apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeBinary(this->aad_prefix); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.aad_file_unique) { - xfer += oprot->writeFieldBegin("aad_file_unique", ::apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->aad_file_unique); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.supply_aad_prefix) { - xfer += oprot->writeFieldBegin("supply_aad_prefix", ::apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->supply_aad_prefix); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(AesGcmV1 &a, AesGcmV1 &b) { using ::std::swap; swap(a.aad_prefix, b.aad_prefix); @@ -8109,88 +3834,6 @@ std::ostream& operator<<(std::ostream& out, const AesGcmCtrV1& obj) } -uint32_t AesGcmCtrV1::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_prefix); - this->__isset.aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_file_unique); - this->__isset.aad_file_unique = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->supply_aad_prefix); - this->__isset.supply_aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t AesGcmCtrV1::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("AesGcmCtrV1"); - - if (this->__isset.aad_prefix) { - xfer += oprot->writeFieldBegin("aad_prefix", ::apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeBinary(this->aad_prefix); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.aad_file_unique) { - xfer += oprot->writeFieldBegin("aad_file_unique", ::apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->aad_file_unique); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.supply_aad_prefix) { - xfer += oprot->writeFieldBegin("supply_aad_prefix", ::apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->supply_aad_prefix); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(AesGcmCtrV1 &a, AesGcmCtrV1 &b) { using ::std::swap; swap(a.aad_prefix, b.aad_prefix); @@ -8255,75 +3898,6 @@ std::ostream& operator<<(std::ostream& out, const EncryptionAlgorithm& obj) } -uint32_t EncryptionAlgorithm::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->AES_GCM_V1.read(iprot); - this->__isset.AES_GCM_V1 = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->AES_GCM_CTR_V1.read(iprot); - this->__isset.AES_GCM_CTR_V1 = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t EncryptionAlgorithm::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EncryptionAlgorithm"); - - if (this->__isset.AES_GCM_V1) { - xfer += oprot->writeFieldBegin("AES_GCM_V1", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->AES_GCM_V1.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.AES_GCM_CTR_V1) { - xfer += oprot->writeFieldBegin("AES_GCM_CTR_V1", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->AES_GCM_CTR_V1.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(EncryptionAlgorithm &a, EncryptionAlgorithm &b) { using ::std::swap; swap(a.AES_GCM_V1, b.AES_GCM_V1); @@ -8413,254 +3987,6 @@ std::ostream& operator<<(std::ostream& out, const FileMetaData& obj) } -uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_version = false; - bool isset_schema = false; - bool isset_num_rows = false; - bool isset_row_groups = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->version); - isset_version = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->schema.clear(); - uint32_t _size321; - ::apache::thrift::protocol::TType _etype324; - xfer += iprot->readListBegin(_etype324, _size321); - this->schema.resize(_size321); - uint32_t _i325; - for (_i325 = 0; _i325 < _size321; ++_i325) - { - xfer += this->schema[_i325].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_schema = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->num_rows); - isset_num_rows = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->row_groups.clear(); - uint32_t _size326; - ::apache::thrift::protocol::TType _etype329; - xfer += iprot->readListBegin(_etype329, _size326); - this->row_groups.resize(_size326); - uint32_t _i330; - for (_i330 = 0; _i330 < _size326; ++_i330) - { - xfer += this->row_groups[_i330].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_row_groups = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->key_value_metadata.clear(); - uint32_t _size331; - ::apache::thrift::protocol::TType _etype334; - xfer += iprot->readListBegin(_etype334, _size331); - this->key_value_metadata.resize(_size331); - uint32_t _i335; - for (_i335 = 0; _i335 < _size331; ++_i335) - { - xfer += this->key_value_metadata[_i335].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.key_value_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->created_by); - this->__isset.created_by = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->column_orders.clear(); - uint32_t _size336; - ::apache::thrift::protocol::TType _etype339; - xfer += iprot->readListBegin(_etype339, _size336); - this->column_orders.resize(_size336); - uint32_t _i340; - for (_i340 = 0; _i340 < _size336; ++_i340) - { - xfer += this->column_orders[_i340].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.column_orders = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->encryption_algorithm.read(iprot); - this->__isset.encryption_algorithm = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->footer_signing_key_metadata); - this->__isset.footer_signing_key_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_version) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_schema) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_rows) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_row_groups) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t FileMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("FileMetaData"); - - xfer += oprot->writeFieldBegin("version", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->version); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("schema", ::apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->schema.size())); - std::vector ::const_iterator _iter341; - for (_iter341 = this->schema.begin(); _iter341 != this->schema.end(); ++_iter341) - { - xfer += (*_iter341).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->num_rows); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("row_groups", ::apache::thrift::protocol::T_LIST, 4); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->row_groups.size())); - std::vector ::const_iterator _iter342; - for (_iter342 = this->row_groups.begin(); _iter342 != this->row_groups.end(); ++_iter342) - { - xfer += (*_iter342).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - if (this->__isset.key_value_metadata) { - xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 5); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); - std::vector ::const_iterator _iter343; - for (_iter343 = this->key_value_metadata.begin(); _iter343 != this->key_value_metadata.end(); ++_iter343) - { - xfer += (*_iter343).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.created_by) { - xfer += oprot->writeFieldBegin("created_by", ::apache::thrift::protocol::T_STRING, 6); - xfer += oprot->writeString(this->created_by); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.column_orders) { - xfer += oprot->writeFieldBegin("column_orders", ::apache::thrift::protocol::T_LIST, 7); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->column_orders.size())); - std::vector ::const_iterator _iter344; - for (_iter344 = this->column_orders.begin(); _iter344 != this->column_orders.end(); ++_iter344) - { - xfer += (*_iter344).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.encryption_algorithm) { - xfer += oprot->writeFieldBegin("encryption_algorithm", ::apache::thrift::protocol::T_STRUCT, 8); - xfer += this->encryption_algorithm.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.footer_signing_key_metadata) { - xfer += oprot->writeFieldBegin("footer_signing_key_metadata", ::apache::thrift::protocol::T_STRING, 9); - xfer += oprot->writeBinary(this->footer_signing_key_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(FileMetaData &a, FileMetaData &b) { using ::std::swap; swap(a.version, b.version); @@ -8760,77 +4086,6 @@ std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj) } -uint32_t FileCryptoMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_encryption_algorithm = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->encryption_algorithm.read(iprot); - isset_encryption_algorithm = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->key_metadata); - this->__isset.key_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_encryption_algorithm) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t FileCryptoMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("FileCryptoMetaData"); - - xfer += oprot->writeFieldBegin("encryption_algorithm", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->encryption_algorithm.write(oprot); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.key_metadata) { - xfer += oprot->writeFieldBegin("key_metadata", ::apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->key_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(FileCryptoMetaData &a, FileCryptoMetaData &b) { using ::std::swap; swap(a.encryption_algorithm, b.encryption_algorithm); diff --git a/cpp/src/generated/parquet_types.h b/cpp/src/generated/parquet_types.h index 9dc6794c4030b..6cf85fe5e73cc 100644 --- a/cpp/src/generated/parquet_types.h +++ b/cpp/src/generated/parquet_types.h @@ -466,7 +466,7 @@ typedef struct _SizeStatistics__isset { * in this structure can help determine the number of nulls at a particular * nesting level and maximum length of lists). */ -class SizeStatistics : public virtual ::apache::thrift::TBase { +class SizeStatistics { public: SizeStatistics(const SizeStatistics&); @@ -546,8 +546,10 @@ class SizeStatistics : public virtual ::apache::thrift::TBase { bool operator < (const SizeStatistics & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -572,7 +574,7 @@ typedef struct _Statistics__isset { * Statistics per row group and per page * All fields are optional. */ -class Statistics : public virtual ::apache::thrift::TBase { +class Statistics { public: Statistics(const Statistics&); @@ -697,8 +699,10 @@ class Statistics : public virtual ::apache::thrift::TBase { bool operator < (const Statistics & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -711,7 +715,7 @@ std::ostream& operator<<(std::ostream& out, const Statistics& obj); /** * Empty structs to use as logical type annotations */ -class StringType : public virtual ::apache::thrift::TBase { +class StringType { public: StringType(const StringType&) noexcept; @@ -733,8 +737,10 @@ class StringType : public virtual ::apache::thrift::TBase { bool operator < (const StringType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -744,7 +750,7 @@ void swap(StringType &a, StringType &b); std::ostream& operator<<(std::ostream& out, const StringType& obj); -class UUIDType : public virtual ::apache::thrift::TBase { +class UUIDType { public: UUIDType(const UUIDType&) noexcept; @@ -766,8 +772,10 @@ class UUIDType : public virtual ::apache::thrift::TBase { bool operator < (const UUIDType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -777,7 +785,7 @@ void swap(UUIDType &a, UUIDType &b); std::ostream& operator<<(std::ostream& out, const UUIDType& obj); -class MapType : public virtual ::apache::thrift::TBase { +class MapType { public: MapType(const MapType&) noexcept; @@ -799,8 +807,10 @@ class MapType : public virtual ::apache::thrift::TBase { bool operator < (const MapType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -810,7 +820,7 @@ void swap(MapType &a, MapType &b); std::ostream& operator<<(std::ostream& out, const MapType& obj); -class ListType : public virtual ::apache::thrift::TBase { +class ListType { public: ListType(const ListType&) noexcept; @@ -832,8 +842,10 @@ class ListType : public virtual ::apache::thrift::TBase { bool operator < (const ListType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -843,7 +855,7 @@ void swap(ListType &a, ListType &b); std::ostream& operator<<(std::ostream& out, const ListType& obj); -class EnumType : public virtual ::apache::thrift::TBase { +class EnumType { public: EnumType(const EnumType&) noexcept; @@ -865,8 +877,10 @@ class EnumType : public virtual ::apache::thrift::TBase { bool operator < (const EnumType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -876,7 +890,7 @@ void swap(EnumType &a, EnumType &b); std::ostream& operator<<(std::ostream& out, const EnumType& obj); -class DateType : public virtual ::apache::thrift::TBase { +class DateType { public: DateType(const DateType&) noexcept; @@ -898,8 +912,10 @@ class DateType : public virtual ::apache::thrift::TBase { bool operator < (const DateType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -909,7 +925,7 @@ void swap(DateType &a, DateType &b); std::ostream& operator<<(std::ostream& out, const DateType& obj); -class Float16Type : public virtual ::apache::thrift::TBase { +class Float16Type { public: Float16Type(const Float16Type&) noexcept; @@ -931,8 +947,10 @@ class Float16Type : public virtual ::apache::thrift::TBase { bool operator < (const Float16Type & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -949,7 +967,7 @@ std::ostream& operator<<(std::ostream& out, const Float16Type& obj); * null and the physical type can't be determined. This annotation signals * the case where the physical type was guessed from all null values. */ -class NullType : public virtual ::apache::thrift::TBase { +class NullType { public: NullType(const NullType&) noexcept; @@ -971,8 +989,10 @@ class NullType : public virtual ::apache::thrift::TBase { bool operator < (const NullType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -993,7 +1013,7 @@ std::ostream& operator<<(std::ostream& out, const NullType& obj); * * Allowed for physical types: INT32, INT64, FIXED, and BINARY */ -class DecimalType : public virtual ::apache::thrift::TBase { +class DecimalType { public: DecimalType(const DecimalType&) noexcept; @@ -1027,8 +1047,10 @@ class DecimalType : public virtual ::apache::thrift::TBase { bool operator < (const DecimalType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1041,7 +1063,7 @@ std::ostream& operator<<(std::ostream& out, const DecimalType& obj); /** * Time units for logical types */ -class MilliSeconds : public virtual ::apache::thrift::TBase { +class MilliSeconds { public: MilliSeconds(const MilliSeconds&) noexcept; @@ -1063,8 +1085,10 @@ class MilliSeconds : public virtual ::apache::thrift::TBase { bool operator < (const MilliSeconds & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1074,7 +1098,7 @@ void swap(MilliSeconds &a, MilliSeconds &b); std::ostream& operator<<(std::ostream& out, const MilliSeconds& obj); -class MicroSeconds : public virtual ::apache::thrift::TBase { +class MicroSeconds { public: MicroSeconds(const MicroSeconds&) noexcept; @@ -1096,8 +1120,10 @@ class MicroSeconds : public virtual ::apache::thrift::TBase { bool operator < (const MicroSeconds & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1107,7 +1133,7 @@ void swap(MicroSeconds &a, MicroSeconds &b); std::ostream& operator<<(std::ostream& out, const MicroSeconds& obj); -class NanoSeconds : public virtual ::apache::thrift::TBase { +class NanoSeconds { public: NanoSeconds(const NanoSeconds&) noexcept; @@ -1129,8 +1155,10 @@ class NanoSeconds : public virtual ::apache::thrift::TBase { bool operator < (const NanoSeconds & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1146,7 +1174,7 @@ typedef struct _TimeUnit__isset { bool NANOS :1; } _TimeUnit__isset; -class TimeUnit : public virtual ::apache::thrift::TBase { +class TimeUnit { public: TimeUnit(const TimeUnit&) noexcept; @@ -1191,8 +1219,10 @@ class TimeUnit : public virtual ::apache::thrift::TBase { bool operator < (const TimeUnit & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1207,7 +1237,7 @@ std::ostream& operator<<(std::ostream& out, const TimeUnit& obj); * * Allowed for physical types: INT64 */ -class TimestampType : public virtual ::apache::thrift::TBase { +class TimestampType { public: TimestampType(const TimestampType&) noexcept; @@ -1240,8 +1270,10 @@ class TimestampType : public virtual ::apache::thrift::TBase { bool operator < (const TimestampType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1256,7 +1288,7 @@ std::ostream& operator<<(std::ostream& out, const TimestampType& obj); * * Allowed for physical types: INT32 (millis), INT64 (micros, nanos) */ -class TimeType : public virtual ::apache::thrift::TBase { +class TimeType { public: TimeType(const TimeType&) noexcept; @@ -1289,8 +1321,10 @@ class TimeType : public virtual ::apache::thrift::TBase { bool operator < (const TimeType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1307,7 +1341,7 @@ std::ostream& operator<<(std::ostream& out, const TimeType& obj); * * Allowed for physical types: INT32, INT64 */ -class IntType : public virtual ::apache::thrift::TBase { +class IntType { public: IntType(const IntType&) noexcept; @@ -1341,8 +1375,10 @@ class IntType : public virtual ::apache::thrift::TBase { bool operator < (const IntType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1357,7 +1393,7 @@ std::ostream& operator<<(std::ostream& out, const IntType& obj); * * Allowed for physical types: BINARY */ -class JsonType : public virtual ::apache::thrift::TBase { +class JsonType { public: JsonType(const JsonType&) noexcept; @@ -1379,8 +1415,10 @@ class JsonType : public virtual ::apache::thrift::TBase { bool operator < (const JsonType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1395,7 +1433,7 @@ std::ostream& operator<<(std::ostream& out, const JsonType& obj); * * Allowed for physical types: BINARY */ -class BsonType : public virtual ::apache::thrift::TBase { +class BsonType { public: BsonType(const BsonType&) noexcept; @@ -1417,8 +1455,10 @@ class BsonType : public virtual ::apache::thrift::TBase { bool operator < (const BsonType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1452,7 +1492,7 @@ typedef struct _LogicalType__isset { * SchemaElement must also set the corresponding ConvertedType (if any) * from the following table. */ -class LogicalType : public virtual ::apache::thrift::TBase { +class LogicalType { public: LogicalType(const LogicalType&) noexcept; @@ -1574,8 +1614,10 @@ class LogicalType : public virtual ::apache::thrift::TBase { bool operator < (const LogicalType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1603,7 +1645,7 @@ typedef struct _SchemaElement__isset { * - if it is a primitive type (leaf) then type is defined and num_children is undefined * the nodes are listed in depth first traversal order. */ -class SchemaElement : public virtual ::apache::thrift::TBase { +class SchemaElement { public: SchemaElement(const SchemaElement&); @@ -1754,8 +1796,10 @@ class SchemaElement : public virtual ::apache::thrift::TBase { bool operator < (const SchemaElement & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1772,7 +1816,7 @@ typedef struct _DataPageHeader__isset { /** * Data page header */ -class DataPageHeader : public virtual ::apache::thrift::TBase { +class DataPageHeader { public: DataPageHeader(const DataPageHeader&); @@ -1848,8 +1892,10 @@ class DataPageHeader : public virtual ::apache::thrift::TBase { bool operator < (const DataPageHeader & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1859,7 +1905,7 @@ void swap(DataPageHeader &a, DataPageHeader &b); std::ostream& operator<<(std::ostream& out, const DataPageHeader& obj); -class IndexPageHeader : public virtual ::apache::thrift::TBase { +class IndexPageHeader { public: IndexPageHeader(const IndexPageHeader&) noexcept; @@ -1881,8 +1927,10 @@ class IndexPageHeader : public virtual ::apache::thrift::TBase { bool operator < (const IndexPageHeader & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1902,7 +1950,7 @@ typedef struct _DictionaryPageHeader__isset { * can be placed in a column chunk. * */ -class DictionaryPageHeader : public virtual ::apache::thrift::TBase { +class DictionaryPageHeader { public: DictionaryPageHeader(const DictionaryPageHeader&) noexcept; @@ -1957,8 +2005,10 @@ class DictionaryPageHeader : public virtual ::apache::thrift::TBase { bool operator < (const DictionaryPageHeader & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1979,7 +2029,7 @@ typedef struct _DataPageHeaderV2__isset { * The remaining section containing the data is compressed if is_compressed is true * */ -class DataPageHeaderV2 : public virtual ::apache::thrift::TBase { +class DataPageHeaderV2 { public: DataPageHeaderV2(const DataPageHeaderV2&); @@ -2085,8 +2135,10 @@ class DataPageHeaderV2 : public virtual ::apache::thrift::TBase { bool operator < (const DataPageHeaderV2 & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2099,7 +2151,7 @@ std::ostream& operator<<(std::ostream& out, const DataPageHeaderV2& obj); /** * Block-based algorithm type annotation. * */ -class SplitBlockAlgorithm : public virtual ::apache::thrift::TBase { +class SplitBlockAlgorithm { public: SplitBlockAlgorithm(const SplitBlockAlgorithm&) noexcept; @@ -2121,8 +2173,10 @@ class SplitBlockAlgorithm : public virtual ::apache::thrift::TBase { bool operator < (const SplitBlockAlgorithm & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2139,7 +2193,7 @@ typedef struct _BloomFilterAlgorithm__isset { /** * The algorithm used in Bloom filter. * */ -class BloomFilterAlgorithm : public virtual ::apache::thrift::TBase { +class BloomFilterAlgorithm { public: BloomFilterAlgorithm(const BloomFilterAlgorithm&) noexcept; @@ -2173,8 +2227,10 @@ class BloomFilterAlgorithm : public virtual ::apache::thrift::TBase { bool operator < (const BloomFilterAlgorithm & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2189,7 +2245,7 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterAlgorithm& obj); * algorithm. It uses 64 bits version of xxHash. * */ -class XxHash : public virtual ::apache::thrift::TBase { +class XxHash { public: XxHash(const XxHash&) noexcept; @@ -2211,8 +2267,10 @@ class XxHash : public virtual ::apache::thrift::TBase { bool operator < (const XxHash & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2231,7 +2289,7 @@ typedef struct _BloomFilterHash__isset { * using plain encoding. * */ -class BloomFilterHash : public virtual ::apache::thrift::TBase { +class BloomFilterHash { public: BloomFilterHash(const BloomFilterHash&) noexcept; @@ -2265,8 +2323,10 @@ class BloomFilterHash : public virtual ::apache::thrift::TBase { bool operator < (const BloomFilterHash & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2280,7 +2340,7 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterHash& obj); * The compression used in the Bloom filter. * */ -class Uncompressed : public virtual ::apache::thrift::TBase { +class Uncompressed { public: Uncompressed(const Uncompressed&) noexcept; @@ -2302,8 +2362,10 @@ class Uncompressed : public virtual ::apache::thrift::TBase { bool operator < (const Uncompressed & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2317,7 +2379,7 @@ typedef struct _BloomFilterCompression__isset { bool UNCOMPRESSED :1; } _BloomFilterCompression__isset; -class BloomFilterCompression : public virtual ::apache::thrift::TBase { +class BloomFilterCompression { public: BloomFilterCompression(const BloomFilterCompression&) noexcept; @@ -2348,8 +2410,10 @@ class BloomFilterCompression : public virtual ::apache::thrift::TBase { bool operator < (const BloomFilterCompression & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2364,7 +2428,7 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterCompression& obj); * and followed by its bitset. * */ -class BloomFilterHeader : public virtual ::apache::thrift::TBase { +class BloomFilterHeader { public: BloomFilterHeader(const BloomFilterHeader&) noexcept; @@ -2419,8 +2483,10 @@ class BloomFilterHeader : public virtual ::apache::thrift::TBase { bool operator < (const BloomFilterHeader & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2438,7 +2504,7 @@ typedef struct _PageHeader__isset { bool data_page_header_v2 :1; } _PageHeader__isset; -class PageHeader : public virtual ::apache::thrift::TBase { +class PageHeader { public: PageHeader(const PageHeader&); @@ -2545,8 +2611,10 @@ class PageHeader : public virtual ::apache::thrift::TBase { bool operator < (const PageHeader & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2563,7 +2631,7 @@ typedef struct _KeyValue__isset { /** * Wrapper struct to store key values */ -class KeyValue : public virtual ::apache::thrift::TBase { +class KeyValue { public: KeyValue(const KeyValue&); @@ -2601,8 +2669,10 @@ class KeyValue : public virtual ::apache::thrift::TBase { bool operator < (const KeyValue & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2615,7 +2685,7 @@ std::ostream& operator<<(std::ostream& out, const KeyValue& obj); /** * Wrapper struct to specify sort order */ -class SortingColumn : public virtual ::apache::thrift::TBase { +class SortingColumn { public: SortingColumn(const SortingColumn&) noexcept; @@ -2665,8 +2735,10 @@ class SortingColumn : public virtual ::apache::thrift::TBase { bool operator < (const SortingColumn & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2679,7 +2751,7 @@ std::ostream& operator<<(std::ostream& out, const SortingColumn& obj); /** * statistics of a given page type and encoding */ -class PageEncodingStats : public virtual ::apache::thrift::TBase { +class PageEncodingStats { public: PageEncodingStats(const PageEncodingStats&) noexcept; @@ -2732,8 +2804,10 @@ class PageEncodingStats : public virtual ::apache::thrift::TBase { bool operator < (const PageEncodingStats & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2757,7 +2831,7 @@ typedef struct _ColumnMetaData__isset { /** * Description for column metadata */ -class ColumnMetaData : public virtual ::apache::thrift::TBase { +class ColumnMetaData { public: ColumnMetaData(const ColumnMetaData&); @@ -2950,8 +3024,10 @@ class ColumnMetaData : public virtual ::apache::thrift::TBase { bool operator < (const ColumnMetaData & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2961,7 +3037,7 @@ void swap(ColumnMetaData &a, ColumnMetaData &b); std::ostream& operator<<(std::ostream& out, const ColumnMetaData& obj); -class EncryptionWithFooterKey : public virtual ::apache::thrift::TBase { +class EncryptionWithFooterKey { public: EncryptionWithFooterKey(const EncryptionWithFooterKey&) noexcept; @@ -2983,8 +3059,10 @@ class EncryptionWithFooterKey : public virtual ::apache::thrift::TBase { bool operator < (const EncryptionWithFooterKey & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2998,7 +3076,7 @@ typedef struct _EncryptionWithColumnKey__isset { bool key_metadata :1; } _EncryptionWithColumnKey__isset; -class EncryptionWithColumnKey : public virtual ::apache::thrift::TBase { +class EncryptionWithColumnKey { public: EncryptionWithColumnKey(const EncryptionWithColumnKey&); @@ -3041,8 +3119,10 @@ class EncryptionWithColumnKey : public virtual ::apache::thrift::TBase { bool operator < (const EncryptionWithColumnKey & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3057,7 +3137,7 @@ typedef struct _ColumnCryptoMetaData__isset { bool ENCRYPTION_WITH_COLUMN_KEY :1; } _ColumnCryptoMetaData__isset; -class ColumnCryptoMetaData : public virtual ::apache::thrift::TBase { +class ColumnCryptoMetaData { public: ColumnCryptoMetaData(const ColumnCryptoMetaData&); @@ -3095,8 +3175,10 @@ class ColumnCryptoMetaData : public virtual ::apache::thrift::TBase { bool operator < (const ColumnCryptoMetaData & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3117,7 +3199,7 @@ typedef struct _ColumnChunk__isset { bool encrypted_column_metadata :1; } _ColumnChunk__isset; -class ColumnChunk : public virtual ::apache::thrift::TBase { +class ColumnChunk { public: ColumnChunk(const ColumnChunk&); @@ -3241,8 +3323,10 @@ class ColumnChunk : public virtual ::apache::thrift::TBase { bool operator < (const ColumnChunk & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3259,7 +3343,7 @@ typedef struct _RowGroup__isset { bool ordinal :1; } _RowGroup__isset; -class RowGroup : public virtual ::apache::thrift::TBase { +class RowGroup { public: RowGroup(const RowGroup&); @@ -3357,8 +3441,10 @@ class RowGroup : public virtual ::apache::thrift::TBase { bool operator < (const RowGroup & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3371,7 +3457,7 @@ std::ostream& operator<<(std::ostream& out, const RowGroup& obj); /** * Empty struct to signal the order defined by the physical or logical type */ -class TypeDefinedOrder : public virtual ::apache::thrift::TBase { +class TypeDefinedOrder { public: TypeDefinedOrder(const TypeDefinedOrder&) noexcept; @@ -3393,8 +3479,10 @@ class TypeDefinedOrder : public virtual ::apache::thrift::TBase { bool operator < (const TypeDefinedOrder & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3420,7 +3508,7 @@ typedef struct _ColumnOrder__isset { * If the reader does not support the value of this union, min and max stats * for this column should be ignored. */ -class ColumnOrder : public virtual ::apache::thrift::TBase { +class ColumnOrder { public: ColumnOrder(const ColumnOrder&) noexcept; @@ -3501,8 +3589,10 @@ class ColumnOrder : public virtual ::apache::thrift::TBase { bool operator < (const ColumnOrder & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3512,7 +3602,7 @@ void swap(ColumnOrder &a, ColumnOrder &b); std::ostream& operator<<(std::ostream& out, const ColumnOrder& obj); -class PageLocation : public virtual ::apache::thrift::TBase { +class PageLocation { public: PageLocation(const PageLocation&) noexcept; @@ -3563,8 +3653,10 @@ class PageLocation : public virtual ::apache::thrift::TBase { bool operator < (const PageLocation & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3578,7 +3670,7 @@ typedef struct _OffsetIndex__isset { bool unencoded_byte_array_data_bytes :1; } _OffsetIndex__isset; -class OffsetIndex : public virtual ::apache::thrift::TBase { +class OffsetIndex { public: OffsetIndex(const OffsetIndex&); @@ -3624,8 +3716,10 @@ class OffsetIndex : public virtual ::apache::thrift::TBase { bool operator < (const OffsetIndex & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3645,7 +3739,7 @@ typedef struct _ColumnIndex__isset { * Description for ColumnIndex. * Each [i] refers to the page at OffsetIndex.page_locations[i] */ -class ColumnIndex : public virtual ::apache::thrift::TBase { +class ColumnIndex { public: ColumnIndex(const ColumnIndex&); @@ -3756,8 +3850,10 @@ class ColumnIndex : public virtual ::apache::thrift::TBase { bool operator < (const ColumnIndex & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3773,7 +3869,7 @@ typedef struct _AesGcmV1__isset { bool supply_aad_prefix :1; } _AesGcmV1__isset; -class AesGcmV1 : public virtual ::apache::thrift::TBase { +class AesGcmV1 { public: AesGcmV1(const AesGcmV1&); @@ -3831,8 +3927,10 @@ class AesGcmV1 : public virtual ::apache::thrift::TBase { bool operator < (const AesGcmV1 & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3848,7 +3946,7 @@ typedef struct _AesGcmCtrV1__isset { bool supply_aad_prefix :1; } _AesGcmCtrV1__isset; -class AesGcmCtrV1 : public virtual ::apache::thrift::TBase { +class AesGcmCtrV1 { public: AesGcmCtrV1(const AesGcmCtrV1&); @@ -3906,8 +4004,10 @@ class AesGcmCtrV1 : public virtual ::apache::thrift::TBase { bool operator < (const AesGcmCtrV1 & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3922,7 +4022,7 @@ typedef struct _EncryptionAlgorithm__isset { bool AES_GCM_CTR_V1 :1; } _EncryptionAlgorithm__isset; -class EncryptionAlgorithm : public virtual ::apache::thrift::TBase { +class EncryptionAlgorithm { public: EncryptionAlgorithm(const EncryptionAlgorithm&); @@ -3960,8 +4060,10 @@ class EncryptionAlgorithm : public virtual ::apache::thrift::TBase { bool operator < (const EncryptionAlgorithm & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3982,7 +4084,7 @@ typedef struct _FileMetaData__isset { /** * Description for file metadata */ -class FileMetaData : public virtual ::apache::thrift::TBase { +class FileMetaData { public: FileMetaData(const FileMetaData&); @@ -4116,8 +4218,10 @@ class FileMetaData : public virtual ::apache::thrift::TBase { bool operator < (const FileMetaData & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -4134,7 +4238,7 @@ typedef struct _FileCryptoMetaData__isset { /** * Crypto metadata for files with encrypted footer * */ -class FileCryptoMetaData : public virtual ::apache::thrift::TBase { +class FileCryptoMetaData { public: FileCryptoMetaData(const FileCryptoMetaData&); @@ -4180,8 +4284,10 @@ class FileCryptoMetaData : public virtual ::apache::thrift::TBase { bool operator < (const FileCryptoMetaData & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -4192,4 +4298,6 @@ std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj); }} // namespace +#include "parquet_types.tcc" + #endif diff --git a/cpp/src/generated/parquet_types.tcc b/cpp/src/generated/parquet_types.tcc new file mode 100644 index 0000000000000..ee02d7f0139fc --- /dev/null +++ b/cpp/src/generated/parquet_types.tcc @@ -0,0 +1,4867 @@ +/** + * Autogenerated by Thrift Compiler (0.19.0) + * + * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING + * @generated + */ +#ifndef parquet_TYPES_TCC +#define parquet_TYPES_TCC + +#include "parquet_types.h" + +namespace parquet { namespace format { + +template +uint32_t SizeStatistics::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->unencoded_byte_array_data_bytes); + this->__isset.unencoded_byte_array_data_bytes = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->repetition_level_histogram.clear(); + uint32_t _size0; + ::apache::thrift::protocol::TType _etype3; + xfer += iprot->readListBegin(_etype3, _size0); + this->repetition_level_histogram.resize(_size0); + uint32_t _i4; + for (_i4 = 0; _i4 < _size0; ++_i4) + { + xfer += iprot->readI64(this->repetition_level_histogram[_i4]); + } + xfer += iprot->readListEnd(); + } + this->__isset.repetition_level_histogram = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->definition_level_histogram.clear(); + uint32_t _size5; + ::apache::thrift::protocol::TType _etype8; + xfer += iprot->readListBegin(_etype8, _size5); + this->definition_level_histogram.resize(_size5); + uint32_t _i9; + for (_i9 = 0; _i9 < _size5; ++_i9) + { + xfer += iprot->readI64(this->definition_level_histogram[_i9]); + } + xfer += iprot->readListEnd(); + } + this->__isset.definition_level_histogram = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t SizeStatistics::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("SizeStatistics"); + + if (this->__isset.unencoded_byte_array_data_bytes) { + xfer += oprot->writeFieldBegin("unencoded_byte_array_data_bytes", ::apache::thrift::protocol::T_I64, 1); + xfer += oprot->writeI64(this->unencoded_byte_array_data_bytes); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.repetition_level_histogram) { + xfer += oprot->writeFieldBegin("repetition_level_histogram", ::apache::thrift::protocol::T_LIST, 2); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->repetition_level_histogram.size())); + std::vector ::const_iterator _iter10; + for (_iter10 = this->repetition_level_histogram.begin(); _iter10 != this->repetition_level_histogram.end(); ++_iter10) + { + xfer += oprot->writeI64((*_iter10)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.definition_level_histogram) { + xfer += oprot->writeFieldBegin("definition_level_histogram", ::apache::thrift::protocol::T_LIST, 3); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->definition_level_histogram.size())); + std::vector ::const_iterator _iter11; + for (_iter11 = this->definition_level_histogram.begin(); _iter11 != this->definition_level_histogram.end(); ++_iter11) + { + xfer += oprot->writeI64((*_iter11)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t Statistics::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->max); + this->__isset.max = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->min); + this->__isset.min = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->null_count); + this->__isset.null_count = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->distinct_count); + this->__isset.distinct_count = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->max_value); + this->__isset.max_value = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->min_value); + this->__isset.min_value = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->is_max_value_exact); + this->__isset.is_max_value_exact = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->is_min_value_exact); + this->__isset.is_min_value_exact = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t Statistics::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("Statistics"); + + if (this->__isset.max) { + xfer += oprot->writeFieldBegin("max", ::apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeBinary(this->max); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.min) { + xfer += oprot->writeFieldBegin("min", ::apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeBinary(this->min); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.null_count) { + xfer += oprot->writeFieldBegin("null_count", ::apache::thrift::protocol::T_I64, 3); + xfer += oprot->writeI64(this->null_count); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.distinct_count) { + xfer += oprot->writeFieldBegin("distinct_count", ::apache::thrift::protocol::T_I64, 4); + xfer += oprot->writeI64(this->distinct_count); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.max_value) { + xfer += oprot->writeFieldBegin("max_value", ::apache::thrift::protocol::T_STRING, 5); + xfer += oprot->writeBinary(this->max_value); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.min_value) { + xfer += oprot->writeFieldBegin("min_value", ::apache::thrift::protocol::T_STRING, 6); + xfer += oprot->writeBinary(this->min_value); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.is_max_value_exact) { + xfer += oprot->writeFieldBegin("is_max_value_exact", ::apache::thrift::protocol::T_BOOL, 7); + xfer += oprot->writeBool(this->is_max_value_exact); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.is_min_value_exact) { + xfer += oprot->writeFieldBegin("is_min_value_exact", ::apache::thrift::protocol::T_BOOL, 8); + xfer += oprot->writeBool(this->is_min_value_exact); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t StringType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t StringType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("StringType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t UUIDType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t UUIDType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("UUIDType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t MapType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t MapType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("MapType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t ListType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t ListType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("ListType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t EnumType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t EnumType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("EnumType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t DateType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t DateType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("DateType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t Float16Type::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t Float16Type::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("Float16Type"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t NullType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t NullType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("NullType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t DecimalType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_scale = false; + bool isset_precision = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->scale); + isset_scale = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->precision); + isset_precision = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_scale) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_precision) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t DecimalType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("DecimalType"); + + xfer += oprot->writeFieldBegin("scale", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->scale); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("precision", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(this->precision); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t MilliSeconds::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t MilliSeconds::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("MilliSeconds"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t MicroSeconds::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t MicroSeconds::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("MicroSeconds"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t NanoSeconds::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t NanoSeconds::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("NanoSeconds"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t TimeUnit::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->MILLIS.read(iprot); + this->__isset.MILLIS = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->MICROS.read(iprot); + this->__isset.MICROS = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->NANOS.read(iprot); + this->__isset.NANOS = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t TimeUnit::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("TimeUnit"); + + if (this->__isset.MILLIS) { + xfer += oprot->writeFieldBegin("MILLIS", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->MILLIS.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.MICROS) { + xfer += oprot->writeFieldBegin("MICROS", ::apache::thrift::protocol::T_STRUCT, 2); + xfer += this->MICROS.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.NANOS) { + xfer += oprot->writeFieldBegin("NANOS", ::apache::thrift::protocol::T_STRUCT, 3); + xfer += this->NANOS.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t TimestampType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_isAdjustedToUTC = false; + bool isset_unit = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->isAdjustedToUTC); + isset_isAdjustedToUTC = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->unit.read(iprot); + isset_unit = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_isAdjustedToUTC) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_unit) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t TimestampType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("TimestampType"); + + xfer += oprot->writeFieldBegin("isAdjustedToUTC", ::apache::thrift::protocol::T_BOOL, 1); + xfer += oprot->writeBool(this->isAdjustedToUTC); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("unit", ::apache::thrift::protocol::T_STRUCT, 2); + xfer += this->unit.write(oprot); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t TimeType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_isAdjustedToUTC = false; + bool isset_unit = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->isAdjustedToUTC); + isset_isAdjustedToUTC = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->unit.read(iprot); + isset_unit = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_isAdjustedToUTC) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_unit) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t TimeType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("TimeType"); + + xfer += oprot->writeFieldBegin("isAdjustedToUTC", ::apache::thrift::protocol::T_BOOL, 1); + xfer += oprot->writeBool(this->isAdjustedToUTC); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("unit", ::apache::thrift::protocol::T_STRUCT, 2); + xfer += this->unit.write(oprot); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t IntType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_bitWidth = false; + bool isset_isSigned = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_BYTE) { + xfer += iprot->readByte(this->bitWidth); + isset_bitWidth = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->isSigned); + isset_isSigned = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_bitWidth) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_isSigned) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t IntType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("IntType"); + + xfer += oprot->writeFieldBegin("bitWidth", ::apache::thrift::protocol::T_BYTE, 1); + xfer += oprot->writeByte(this->bitWidth); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("isSigned", ::apache::thrift::protocol::T_BOOL, 2); + xfer += oprot->writeBool(this->isSigned); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t JsonType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t JsonType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("JsonType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t BsonType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t BsonType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("BsonType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t LogicalType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->STRING.read(iprot); + this->__isset.STRING = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->MAP.read(iprot); + this->__isset.MAP = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->LIST.read(iprot); + this->__isset.LIST = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->ENUM.read(iprot); + this->__isset.ENUM = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->DECIMAL.read(iprot); + this->__isset.DECIMAL = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->DATE.read(iprot); + this->__isset.DATE = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->TIME.read(iprot); + this->__isset.TIME = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->TIMESTAMP.read(iprot); + this->__isset.TIMESTAMP = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 10: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->INTEGER.read(iprot); + this->__isset.INTEGER = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 11: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->UNKNOWN.read(iprot); + this->__isset.UNKNOWN = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 12: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->JSON.read(iprot); + this->__isset.JSON = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 13: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->BSON.read(iprot); + this->__isset.BSON = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 14: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->UUID.read(iprot); + this->__isset.UUID = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 15: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->FLOAT16.read(iprot); + this->__isset.FLOAT16 = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t LogicalType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("LogicalType"); + + if (this->__isset.STRING) { + xfer += oprot->writeFieldBegin("STRING", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->STRING.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.MAP) { + xfer += oprot->writeFieldBegin("MAP", ::apache::thrift::protocol::T_STRUCT, 2); + xfer += this->MAP.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.LIST) { + xfer += oprot->writeFieldBegin("LIST", ::apache::thrift::protocol::T_STRUCT, 3); + xfer += this->LIST.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.ENUM) { + xfer += oprot->writeFieldBegin("ENUM", ::apache::thrift::protocol::T_STRUCT, 4); + xfer += this->ENUM.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.DECIMAL) { + xfer += oprot->writeFieldBegin("DECIMAL", ::apache::thrift::protocol::T_STRUCT, 5); + xfer += this->DECIMAL.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.DATE) { + xfer += oprot->writeFieldBegin("DATE", ::apache::thrift::protocol::T_STRUCT, 6); + xfer += this->DATE.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.TIME) { + xfer += oprot->writeFieldBegin("TIME", ::apache::thrift::protocol::T_STRUCT, 7); + xfer += this->TIME.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.TIMESTAMP) { + xfer += oprot->writeFieldBegin("TIMESTAMP", ::apache::thrift::protocol::T_STRUCT, 8); + xfer += this->TIMESTAMP.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.INTEGER) { + xfer += oprot->writeFieldBegin("INTEGER", ::apache::thrift::protocol::T_STRUCT, 10); + xfer += this->INTEGER.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.UNKNOWN) { + xfer += oprot->writeFieldBegin("UNKNOWN", ::apache::thrift::protocol::T_STRUCT, 11); + xfer += this->UNKNOWN.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.JSON) { + xfer += oprot->writeFieldBegin("JSON", ::apache::thrift::protocol::T_STRUCT, 12); + xfer += this->JSON.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.BSON) { + xfer += oprot->writeFieldBegin("BSON", ::apache::thrift::protocol::T_STRUCT, 13); + xfer += this->BSON.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.UUID) { + xfer += oprot->writeFieldBegin("UUID", ::apache::thrift::protocol::T_STRUCT, 14); + xfer += this->UUID.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.FLOAT16) { + xfer += oprot->writeFieldBegin("FLOAT16", ::apache::thrift::protocol::T_STRUCT, 15); + xfer += this->FLOAT16.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t SchemaElement::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_name = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast96; + xfer += iprot->readI32(ecast96); + this->type = static_cast(ecast96); + this->__isset.type = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->type_length); + this->__isset.type_length = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast97; + xfer += iprot->readI32(ecast97); + this->repetition_type = static_cast(ecast97); + this->__isset.repetition_type = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->name); + isset_name = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->num_children); + this->__isset.num_children = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast98; + xfer += iprot->readI32(ecast98); + this->converted_type = static_cast(ecast98); + this->__isset.converted_type = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->scale); + this->__isset.scale = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->precision); + this->__isset.precision = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 9: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->field_id); + this->__isset.field_id = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 10: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->logicalType.read(iprot); + this->__isset.logicalType = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_name) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t SchemaElement::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("SchemaElement"); + + if (this->__isset.type) { + xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(static_cast(this->type)); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.type_length) { + xfer += oprot->writeFieldBegin("type_length", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(this->type_length); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.repetition_type) { + xfer += oprot->writeFieldBegin("repetition_type", ::apache::thrift::protocol::T_I32, 3); + xfer += oprot->writeI32(static_cast(this->repetition_type)); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldBegin("name", ::apache::thrift::protocol::T_STRING, 4); + xfer += oprot->writeString(this->name); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.num_children) { + xfer += oprot->writeFieldBegin("num_children", ::apache::thrift::protocol::T_I32, 5); + xfer += oprot->writeI32(this->num_children); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.converted_type) { + xfer += oprot->writeFieldBegin("converted_type", ::apache::thrift::protocol::T_I32, 6); + xfer += oprot->writeI32(static_cast(this->converted_type)); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.scale) { + xfer += oprot->writeFieldBegin("scale", ::apache::thrift::protocol::T_I32, 7); + xfer += oprot->writeI32(this->scale); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.precision) { + xfer += oprot->writeFieldBegin("precision", ::apache::thrift::protocol::T_I32, 8); + xfer += oprot->writeI32(this->precision); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.field_id) { + xfer += oprot->writeFieldBegin("field_id", ::apache::thrift::protocol::T_I32, 9); + xfer += oprot->writeI32(this->field_id); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.logicalType) { + xfer += oprot->writeFieldBegin("logicalType", ::apache::thrift::protocol::T_STRUCT, 10); + xfer += this->logicalType.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t DataPageHeader::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_num_values = false; + bool isset_encoding = false; + bool isset_definition_level_encoding = false; + bool isset_repetition_level_encoding = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->num_values); + isset_num_values = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast103; + xfer += iprot->readI32(ecast103); + this->encoding = static_cast(ecast103); + isset_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast104; + xfer += iprot->readI32(ecast104); + this->definition_level_encoding = static_cast(ecast104); + isset_definition_level_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast105; + xfer += iprot->readI32(ecast105); + this->repetition_level_encoding = static_cast(ecast105); + isset_repetition_level_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->statistics.read(iprot); + this->__isset.statistics = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_num_values) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_encoding) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_definition_level_encoding) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_repetition_level_encoding) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t DataPageHeader::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("DataPageHeader"); + + xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->num_values); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(static_cast(this->encoding)); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("definition_level_encoding", ::apache::thrift::protocol::T_I32, 3); + xfer += oprot->writeI32(static_cast(this->definition_level_encoding)); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("repetition_level_encoding", ::apache::thrift::protocol::T_I32, 4); + xfer += oprot->writeI32(static_cast(this->repetition_level_encoding)); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.statistics) { + xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 5); + xfer += this->statistics.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t IndexPageHeader::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t IndexPageHeader::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("IndexPageHeader"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t DictionaryPageHeader::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_num_values = false; + bool isset_encoding = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->num_values); + isset_num_values = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast114; + xfer += iprot->readI32(ecast114); + this->encoding = static_cast(ecast114); + isset_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->is_sorted); + this->__isset.is_sorted = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_num_values) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_encoding) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t DictionaryPageHeader::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("DictionaryPageHeader"); + + xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->num_values); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(static_cast(this->encoding)); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.is_sorted) { + xfer += oprot->writeFieldBegin("is_sorted", ::apache::thrift::protocol::T_BOOL, 3); + xfer += oprot->writeBool(this->is_sorted); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t DataPageHeaderV2::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_num_values = false; + bool isset_num_nulls = false; + bool isset_num_rows = false; + bool isset_encoding = false; + bool isset_definition_levels_byte_length = false; + bool isset_repetition_levels_byte_length = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->num_values); + isset_num_values = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->num_nulls); + isset_num_nulls = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->num_rows); + isset_num_rows = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast119; + xfer += iprot->readI32(ecast119); + this->encoding = static_cast(ecast119); + isset_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->definition_levels_byte_length); + isset_definition_levels_byte_length = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->repetition_levels_byte_length); + isset_repetition_levels_byte_length = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->is_compressed); + this->__isset.is_compressed = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->statistics.read(iprot); + this->__isset.statistics = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_num_values) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_num_nulls) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_num_rows) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_encoding) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_definition_levels_byte_length) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_repetition_levels_byte_length) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t DataPageHeaderV2::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("DataPageHeaderV2"); + + xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->num_values); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("num_nulls", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(this->num_nulls); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I32, 3); + xfer += oprot->writeI32(this->num_rows); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 4); + xfer += oprot->writeI32(static_cast(this->encoding)); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("definition_levels_byte_length", ::apache::thrift::protocol::T_I32, 5); + xfer += oprot->writeI32(this->definition_levels_byte_length); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("repetition_levels_byte_length", ::apache::thrift::protocol::T_I32, 6); + xfer += oprot->writeI32(this->repetition_levels_byte_length); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.is_compressed) { + xfer += oprot->writeFieldBegin("is_compressed", ::apache::thrift::protocol::T_BOOL, 7); + xfer += oprot->writeBool(this->is_compressed); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.statistics) { + xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 8); + xfer += this->statistics.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t SplitBlockAlgorithm::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t SplitBlockAlgorithm::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("SplitBlockAlgorithm"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t BloomFilterAlgorithm::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->BLOCK.read(iprot); + this->__isset.BLOCK = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t BloomFilterAlgorithm::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("BloomFilterAlgorithm"); + + if (this->__isset.BLOCK) { + xfer += oprot->writeFieldBegin("BLOCK", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->BLOCK.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t XxHash::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t XxHash::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("XxHash"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t BloomFilterHash::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->XXHASH.read(iprot); + this->__isset.XXHASH = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t BloomFilterHash::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("BloomFilterHash"); + + if (this->__isset.XXHASH) { + xfer += oprot->writeFieldBegin("XXHASH", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->XXHASH.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t Uncompressed::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t Uncompressed::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("Uncompressed"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t BloomFilterCompression::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->UNCOMPRESSED.read(iprot); + this->__isset.UNCOMPRESSED = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t BloomFilterCompression::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("BloomFilterCompression"); + + if (this->__isset.UNCOMPRESSED) { + xfer += oprot->writeFieldBegin("UNCOMPRESSED", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->UNCOMPRESSED.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t BloomFilterHeader::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_numBytes = false; + bool isset_algorithm = false; + bool isset_hash = false; + bool isset_compression = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->numBytes); + isset_numBytes = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->algorithm.read(iprot); + isset_algorithm = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->hash.read(iprot); + isset_hash = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->compression.read(iprot); + isset_compression = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_numBytes) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_algorithm) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_hash) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_compression) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t BloomFilterHeader::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("BloomFilterHeader"); + + xfer += oprot->writeFieldBegin("numBytes", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->numBytes); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("algorithm", ::apache::thrift::protocol::T_STRUCT, 2); + xfer += this->algorithm.write(oprot); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("hash", ::apache::thrift::protocol::T_STRUCT, 3); + xfer += this->hash.write(oprot); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("compression", ::apache::thrift::protocol::T_STRUCT, 4); + xfer += this->compression.write(oprot); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t PageHeader::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_type = false; + bool isset_uncompressed_page_size = false; + bool isset_compressed_page_size = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast152; + xfer += iprot->readI32(ecast152); + this->type = static_cast(ecast152); + isset_type = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->uncompressed_page_size); + isset_uncompressed_page_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->compressed_page_size); + isset_compressed_page_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->crc); + this->__isset.crc = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->data_page_header.read(iprot); + this->__isset.data_page_header = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->index_page_header.read(iprot); + this->__isset.index_page_header = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->dictionary_page_header.read(iprot); + this->__isset.dictionary_page_header = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->data_page_header_v2.read(iprot); + this->__isset.data_page_header_v2 = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_type) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_uncompressed_page_size) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_compressed_page_size) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t PageHeader::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("PageHeader"); + + xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(static_cast(this->type)); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("uncompressed_page_size", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(this->uncompressed_page_size); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("compressed_page_size", ::apache::thrift::protocol::T_I32, 3); + xfer += oprot->writeI32(this->compressed_page_size); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.crc) { + xfer += oprot->writeFieldBegin("crc", ::apache::thrift::protocol::T_I32, 4); + xfer += oprot->writeI32(this->crc); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.data_page_header) { + xfer += oprot->writeFieldBegin("data_page_header", ::apache::thrift::protocol::T_STRUCT, 5); + xfer += this->data_page_header.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.index_page_header) { + xfer += oprot->writeFieldBegin("index_page_header", ::apache::thrift::protocol::T_STRUCT, 6); + xfer += this->index_page_header.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.dictionary_page_header) { + xfer += oprot->writeFieldBegin("dictionary_page_header", ::apache::thrift::protocol::T_STRUCT, 7); + xfer += this->dictionary_page_header.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.data_page_header_v2) { + xfer += oprot->writeFieldBegin("data_page_header_v2", ::apache::thrift::protocol::T_STRUCT, 8); + xfer += this->data_page_header_v2.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t KeyValue::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_key = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->key); + isset_key = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->value); + this->__isset.value = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_key) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t KeyValue::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("KeyValue"); + + xfer += oprot->writeFieldBegin("key", ::apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeString(this->key); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.value) { + xfer += oprot->writeFieldBegin("value", ::apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeString(this->value); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t SortingColumn::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_column_idx = false; + bool isset_descending = false; + bool isset_nulls_first = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->column_idx); + isset_column_idx = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->descending); + isset_descending = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->nulls_first); + isset_nulls_first = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_column_idx) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_descending) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_nulls_first) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t SortingColumn::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("SortingColumn"); + + xfer += oprot->writeFieldBegin("column_idx", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->column_idx); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("descending", ::apache::thrift::protocol::T_BOOL, 2); + xfer += oprot->writeBool(this->descending); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("nulls_first", ::apache::thrift::protocol::T_BOOL, 3); + xfer += oprot->writeBool(this->nulls_first); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t PageEncodingStats::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_page_type = false; + bool isset_encoding = false; + bool isset_count = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast165; + xfer += iprot->readI32(ecast165); + this->page_type = static_cast(ecast165); + isset_page_type = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast166; + xfer += iprot->readI32(ecast166); + this->encoding = static_cast(ecast166); + isset_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->count); + isset_count = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_page_type) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_encoding) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_count) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t PageEncodingStats::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("PageEncodingStats"); + + xfer += oprot->writeFieldBegin("page_type", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(static_cast(this->page_type)); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(static_cast(this->encoding)); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("count", ::apache::thrift::protocol::T_I32, 3); + xfer += oprot->writeI32(this->count); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t ColumnMetaData::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_type = false; + bool isset_encodings = false; + bool isset_path_in_schema = false; + bool isset_codec = false; + bool isset_num_values = false; + bool isset_total_uncompressed_size = false; + bool isset_total_compressed_size = false; + bool isset_data_page_offset = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast171; + xfer += iprot->readI32(ecast171); + this->type = static_cast(ecast171); + isset_type = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->encodings.clear(); + uint32_t _size172; + ::apache::thrift::protocol::TType _etype175; + xfer += iprot->readListBegin(_etype175, _size172); + this->encodings.resize(_size172); + uint32_t _i176; + for (_i176 = 0; _i176 < _size172; ++_i176) + { + int32_t ecast177; + xfer += iprot->readI32(ecast177); + this->encodings[_i176] = static_cast(ecast177); + } + xfer += iprot->readListEnd(); + } + isset_encodings = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->path_in_schema.clear(); + uint32_t _size178; + ::apache::thrift::protocol::TType _etype181; + xfer += iprot->readListBegin(_etype181, _size178); + this->path_in_schema.resize(_size178); + uint32_t _i182; + for (_i182 = 0; _i182 < _size178; ++_i182) + { + xfer += iprot->readString(this->path_in_schema[_i182]); + } + xfer += iprot->readListEnd(); + } + isset_path_in_schema = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast183; + xfer += iprot->readI32(ecast183); + this->codec = static_cast(ecast183); + isset_codec = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->num_values); + isset_num_values = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->total_uncompressed_size); + isset_total_uncompressed_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->total_compressed_size); + isset_total_compressed_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->key_value_metadata.clear(); + uint32_t _size184; + ::apache::thrift::protocol::TType _etype187; + xfer += iprot->readListBegin(_etype187, _size184); + this->key_value_metadata.resize(_size184); + uint32_t _i188; + for (_i188 = 0; _i188 < _size184; ++_i188) + { + xfer += this->key_value_metadata[_i188].read(iprot); + } + xfer += iprot->readListEnd(); + } + this->__isset.key_value_metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 9: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->data_page_offset); + isset_data_page_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 10: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->index_page_offset); + this->__isset.index_page_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 11: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->dictionary_page_offset); + this->__isset.dictionary_page_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 12: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->statistics.read(iprot); + this->__isset.statistics = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 13: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->encoding_stats.clear(); + uint32_t _size189; + ::apache::thrift::protocol::TType _etype192; + xfer += iprot->readListBegin(_etype192, _size189); + this->encoding_stats.resize(_size189); + uint32_t _i193; + for (_i193 = 0; _i193 < _size189; ++_i193) + { + xfer += this->encoding_stats[_i193].read(iprot); + } + xfer += iprot->readListEnd(); + } + this->__isset.encoding_stats = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 14: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->bloom_filter_offset); + this->__isset.bloom_filter_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 15: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->bloom_filter_length); + this->__isset.bloom_filter_length = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 16: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->size_statistics.read(iprot); + this->__isset.size_statistics = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_type) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_encodings) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_path_in_schema) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_codec) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_num_values) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_total_uncompressed_size) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_total_compressed_size) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_data_page_offset) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t ColumnMetaData::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("ColumnMetaData"); + + xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(static_cast(this->type)); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("encodings", ::apache::thrift::protocol::T_LIST, 2); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I32, static_cast(this->encodings.size())); + std::vector ::const_iterator _iter194; + for (_iter194 = this->encodings.begin(); _iter194 != this->encodings.end(); ++_iter194) + { + xfer += oprot->writeI32(static_cast((*_iter194))); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 3); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); + std::vector ::const_iterator _iter195; + for (_iter195 = this->path_in_schema.begin(); _iter195 != this->path_in_schema.end(); ++_iter195) + { + xfer += oprot->writeString((*_iter195)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("codec", ::apache::thrift::protocol::T_I32, 4); + xfer += oprot->writeI32(static_cast(this->codec)); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I64, 5); + xfer += oprot->writeI64(this->num_values); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("total_uncompressed_size", ::apache::thrift::protocol::T_I64, 6); + xfer += oprot->writeI64(this->total_uncompressed_size); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("total_compressed_size", ::apache::thrift::protocol::T_I64, 7); + xfer += oprot->writeI64(this->total_compressed_size); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.key_value_metadata) { + xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 8); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); + std::vector ::const_iterator _iter196; + for (_iter196 = this->key_value_metadata.begin(); _iter196 != this->key_value_metadata.end(); ++_iter196) + { + xfer += (*_iter196).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldBegin("data_page_offset", ::apache::thrift::protocol::T_I64, 9); + xfer += oprot->writeI64(this->data_page_offset); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.index_page_offset) { + xfer += oprot->writeFieldBegin("index_page_offset", ::apache::thrift::protocol::T_I64, 10); + xfer += oprot->writeI64(this->index_page_offset); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.dictionary_page_offset) { + xfer += oprot->writeFieldBegin("dictionary_page_offset", ::apache::thrift::protocol::T_I64, 11); + xfer += oprot->writeI64(this->dictionary_page_offset); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.statistics) { + xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 12); + xfer += this->statistics.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.encoding_stats) { + xfer += oprot->writeFieldBegin("encoding_stats", ::apache::thrift::protocol::T_LIST, 13); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->encoding_stats.size())); + std::vector ::const_iterator _iter197; + for (_iter197 = this->encoding_stats.begin(); _iter197 != this->encoding_stats.end(); ++_iter197) + { + xfer += (*_iter197).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.bloom_filter_offset) { + xfer += oprot->writeFieldBegin("bloom_filter_offset", ::apache::thrift::protocol::T_I64, 14); + xfer += oprot->writeI64(this->bloom_filter_offset); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.bloom_filter_length) { + xfer += oprot->writeFieldBegin("bloom_filter_length", ::apache::thrift::protocol::T_I32, 15); + xfer += oprot->writeI32(this->bloom_filter_length); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.size_statistics) { + xfer += oprot->writeFieldBegin("size_statistics", ::apache::thrift::protocol::T_STRUCT, 16); + xfer += this->size_statistics.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t EncryptionWithFooterKey::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t EncryptionWithFooterKey::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("EncryptionWithFooterKey"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t EncryptionWithColumnKey::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_path_in_schema = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->path_in_schema.clear(); + uint32_t _size206; + ::apache::thrift::protocol::TType _etype209; + xfer += iprot->readListBegin(_etype209, _size206); + this->path_in_schema.resize(_size206); + uint32_t _i210; + for (_i210 = 0; _i210 < _size206; ++_i210) + { + xfer += iprot->readString(this->path_in_schema[_i210]); + } + xfer += iprot->readListEnd(); + } + isset_path_in_schema = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->key_metadata); + this->__isset.key_metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_path_in_schema) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t EncryptionWithColumnKey::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("EncryptionWithColumnKey"); + + xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 1); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); + std::vector ::const_iterator _iter211; + for (_iter211 = this->path_in_schema.begin(); _iter211 != this->path_in_schema.end(); ++_iter211) + { + xfer += oprot->writeString((*_iter211)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + if (this->__isset.key_metadata) { + xfer += oprot->writeFieldBegin("key_metadata", ::apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeBinary(this->key_metadata); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t ColumnCryptoMetaData::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->ENCRYPTION_WITH_FOOTER_KEY.read(iprot); + this->__isset.ENCRYPTION_WITH_FOOTER_KEY = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->ENCRYPTION_WITH_COLUMN_KEY.read(iprot); + this->__isset.ENCRYPTION_WITH_COLUMN_KEY = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t ColumnCryptoMetaData::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("ColumnCryptoMetaData"); + + if (this->__isset.ENCRYPTION_WITH_FOOTER_KEY) { + xfer += oprot->writeFieldBegin("ENCRYPTION_WITH_FOOTER_KEY", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->ENCRYPTION_WITH_FOOTER_KEY.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.ENCRYPTION_WITH_COLUMN_KEY) { + xfer += oprot->writeFieldBegin("ENCRYPTION_WITH_COLUMN_KEY", ::apache::thrift::protocol::T_STRUCT, 2); + xfer += this->ENCRYPTION_WITH_COLUMN_KEY.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t ColumnChunk::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_file_offset = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->file_path); + this->__isset.file_path = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->file_offset); + isset_file_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->meta_data.read(iprot); + this->__isset.meta_data = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->offset_index_offset); + this->__isset.offset_index_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->offset_index_length); + this->__isset.offset_index_length = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->column_index_offset); + this->__isset.column_index_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->column_index_length); + this->__isset.column_index_length = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->crypto_metadata.read(iprot); + this->__isset.crypto_metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 9: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->encrypted_column_metadata); + this->__isset.encrypted_column_metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_file_offset) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t ColumnChunk::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("ColumnChunk"); + + if (this->__isset.file_path) { + xfer += oprot->writeFieldBegin("file_path", ::apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeString(this->file_path); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldBegin("file_offset", ::apache::thrift::protocol::T_I64, 2); + xfer += oprot->writeI64(this->file_offset); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.meta_data) { + xfer += oprot->writeFieldBegin("meta_data", ::apache::thrift::protocol::T_STRUCT, 3); + xfer += this->meta_data.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.offset_index_offset) { + xfer += oprot->writeFieldBegin("offset_index_offset", ::apache::thrift::protocol::T_I64, 4); + xfer += oprot->writeI64(this->offset_index_offset); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.offset_index_length) { + xfer += oprot->writeFieldBegin("offset_index_length", ::apache::thrift::protocol::T_I32, 5); + xfer += oprot->writeI32(this->offset_index_length); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.column_index_offset) { + xfer += oprot->writeFieldBegin("column_index_offset", ::apache::thrift::protocol::T_I64, 6); + xfer += oprot->writeI64(this->column_index_offset); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.column_index_length) { + xfer += oprot->writeFieldBegin("column_index_length", ::apache::thrift::protocol::T_I32, 7); + xfer += oprot->writeI32(this->column_index_length); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.crypto_metadata) { + xfer += oprot->writeFieldBegin("crypto_metadata", ::apache::thrift::protocol::T_STRUCT, 8); + xfer += this->crypto_metadata.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.encrypted_column_metadata) { + xfer += oprot->writeFieldBegin("encrypted_column_metadata", ::apache::thrift::protocol::T_STRING, 9); + xfer += oprot->writeBinary(this->encrypted_column_metadata); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t RowGroup::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_columns = false; + bool isset_total_byte_size = false; + bool isset_num_rows = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->columns.clear(); + uint32_t _size224; + ::apache::thrift::protocol::TType _etype227; + xfer += iprot->readListBegin(_etype227, _size224); + this->columns.resize(_size224); + uint32_t _i228; + for (_i228 = 0; _i228 < _size224; ++_i228) + { + xfer += this->columns[_i228].read(iprot); + } + xfer += iprot->readListEnd(); + } + isset_columns = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->total_byte_size); + isset_total_byte_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->num_rows); + isset_num_rows = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->sorting_columns.clear(); + uint32_t _size229; + ::apache::thrift::protocol::TType _etype232; + xfer += iprot->readListBegin(_etype232, _size229); + this->sorting_columns.resize(_size229); + uint32_t _i233; + for (_i233 = 0; _i233 < _size229; ++_i233) + { + xfer += this->sorting_columns[_i233].read(iprot); + } + xfer += iprot->readListEnd(); + } + this->__isset.sorting_columns = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->file_offset); + this->__isset.file_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->total_compressed_size); + this->__isset.total_compressed_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_I16) { + xfer += iprot->readI16(this->ordinal); + this->__isset.ordinal = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_columns) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_total_byte_size) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_num_rows) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t RowGroup::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("RowGroup"); + + xfer += oprot->writeFieldBegin("columns", ::apache::thrift::protocol::T_LIST, 1); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->columns.size())); + std::vector ::const_iterator _iter234; + for (_iter234 = this->columns.begin(); _iter234 != this->columns.end(); ++_iter234) + { + xfer += (*_iter234).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("total_byte_size", ::apache::thrift::protocol::T_I64, 2); + xfer += oprot->writeI64(this->total_byte_size); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3); + xfer += oprot->writeI64(this->num_rows); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.sorting_columns) { + xfer += oprot->writeFieldBegin("sorting_columns", ::apache::thrift::protocol::T_LIST, 4); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->sorting_columns.size())); + std::vector ::const_iterator _iter235; + for (_iter235 = this->sorting_columns.begin(); _iter235 != this->sorting_columns.end(); ++_iter235) + { + xfer += (*_iter235).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.file_offset) { + xfer += oprot->writeFieldBegin("file_offset", ::apache::thrift::protocol::T_I64, 5); + xfer += oprot->writeI64(this->file_offset); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.total_compressed_size) { + xfer += oprot->writeFieldBegin("total_compressed_size", ::apache::thrift::protocol::T_I64, 6); + xfer += oprot->writeI64(this->total_compressed_size); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.ordinal) { + xfer += oprot->writeFieldBegin("ordinal", ::apache::thrift::protocol::T_I16, 7); + xfer += oprot->writeI16(this->ordinal); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t TypeDefinedOrder::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t TypeDefinedOrder::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("TypeDefinedOrder"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t ColumnOrder::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->TYPE_ORDER.read(iprot); + this->__isset.TYPE_ORDER = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t ColumnOrder::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("ColumnOrder"); + + if (this->__isset.TYPE_ORDER) { + xfer += oprot->writeFieldBegin("TYPE_ORDER", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->TYPE_ORDER.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t PageLocation::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_offset = false; + bool isset_compressed_page_size = false; + bool isset_first_row_index = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->offset); + isset_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->compressed_page_size); + isset_compressed_page_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->first_row_index); + isset_first_row_index = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_offset) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_compressed_page_size) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_first_row_index) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t PageLocation::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("PageLocation"); + + xfer += oprot->writeFieldBegin("offset", ::apache::thrift::protocol::T_I64, 1); + xfer += oprot->writeI64(this->offset); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("compressed_page_size", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(this->compressed_page_size); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("first_row_index", ::apache::thrift::protocol::T_I64, 3); + xfer += oprot->writeI64(this->first_row_index); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t OffsetIndex::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_page_locations = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->page_locations.clear(); + uint32_t _size252; + ::apache::thrift::protocol::TType _etype255; + xfer += iprot->readListBegin(_etype255, _size252); + this->page_locations.resize(_size252); + uint32_t _i256; + for (_i256 = 0; _i256 < _size252; ++_i256) + { + xfer += this->page_locations[_i256].read(iprot); + } + xfer += iprot->readListEnd(); + } + isset_page_locations = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->unencoded_byte_array_data_bytes.clear(); + uint32_t _size257; + ::apache::thrift::protocol::TType _etype260; + xfer += iprot->readListBegin(_etype260, _size257); + this->unencoded_byte_array_data_bytes.resize(_size257); + uint32_t _i261; + for (_i261 = 0; _i261 < _size257; ++_i261) + { + xfer += iprot->readI64(this->unencoded_byte_array_data_bytes[_i261]); + } + xfer += iprot->readListEnd(); + } + this->__isset.unencoded_byte_array_data_bytes = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_page_locations) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t OffsetIndex::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("OffsetIndex"); + + xfer += oprot->writeFieldBegin("page_locations", ::apache::thrift::protocol::T_LIST, 1); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->page_locations.size())); + std::vector ::const_iterator _iter262; + for (_iter262 = this->page_locations.begin(); _iter262 != this->page_locations.end(); ++_iter262) + { + xfer += (*_iter262).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + if (this->__isset.unencoded_byte_array_data_bytes) { + xfer += oprot->writeFieldBegin("unencoded_byte_array_data_bytes", ::apache::thrift::protocol::T_LIST, 2); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->unencoded_byte_array_data_bytes.size())); + std::vector ::const_iterator _iter263; + for (_iter263 = this->unencoded_byte_array_data_bytes.begin(); _iter263 != this->unencoded_byte_array_data_bytes.end(); ++_iter263) + { + xfer += oprot->writeI64((*_iter263)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t ColumnIndex::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_null_pages = false; + bool isset_min_values = false; + bool isset_max_values = false; + bool isset_boundary_order = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->null_pages.clear(); + uint32_t _size268; + ::apache::thrift::protocol::TType _etype271; + xfer += iprot->readListBegin(_etype271, _size268); + this->null_pages.resize(_size268); + uint32_t _i272; + for (_i272 = 0; _i272 < _size268; ++_i272) + { + xfer += iprot->readBool(this->null_pages[_i272]); + } + xfer += iprot->readListEnd(); + } + isset_null_pages = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->min_values.clear(); + uint32_t _size273; + ::apache::thrift::protocol::TType _etype276; + xfer += iprot->readListBegin(_etype276, _size273); + this->min_values.resize(_size273); + uint32_t _i277; + for (_i277 = 0; _i277 < _size273; ++_i277) + { + xfer += iprot->readBinary(this->min_values[_i277]); + } + xfer += iprot->readListEnd(); + } + isset_min_values = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->max_values.clear(); + uint32_t _size278; + ::apache::thrift::protocol::TType _etype281; + xfer += iprot->readListBegin(_etype281, _size278); + this->max_values.resize(_size278); + uint32_t _i282; + for (_i282 = 0; _i282 < _size278; ++_i282) + { + xfer += iprot->readBinary(this->max_values[_i282]); + } + xfer += iprot->readListEnd(); + } + isset_max_values = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast283; + xfer += iprot->readI32(ecast283); + this->boundary_order = static_cast(ecast283); + isset_boundary_order = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->null_counts.clear(); + uint32_t _size284; + ::apache::thrift::protocol::TType _etype287; + xfer += iprot->readListBegin(_etype287, _size284); + this->null_counts.resize(_size284); + uint32_t _i288; + for (_i288 = 0; _i288 < _size284; ++_i288) + { + xfer += iprot->readI64(this->null_counts[_i288]); + } + xfer += iprot->readListEnd(); + } + this->__isset.null_counts = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->repetition_level_histograms.clear(); + uint32_t _size289; + ::apache::thrift::protocol::TType _etype292; + xfer += iprot->readListBegin(_etype292, _size289); + this->repetition_level_histograms.resize(_size289); + uint32_t _i293; + for (_i293 = 0; _i293 < _size289; ++_i293) + { + xfer += iprot->readI64(this->repetition_level_histograms[_i293]); + } + xfer += iprot->readListEnd(); + } + this->__isset.repetition_level_histograms = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->definition_level_histograms.clear(); + uint32_t _size294; + ::apache::thrift::protocol::TType _etype297; + xfer += iprot->readListBegin(_etype297, _size294); + this->definition_level_histograms.resize(_size294); + uint32_t _i298; + for (_i298 = 0; _i298 < _size294; ++_i298) + { + xfer += iprot->readI64(this->definition_level_histograms[_i298]); + } + xfer += iprot->readListEnd(); + } + this->__isset.definition_level_histograms = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_null_pages) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_min_values) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_max_values) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_boundary_order) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t ColumnIndex::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("ColumnIndex"); + + xfer += oprot->writeFieldBegin("null_pages", ::apache::thrift::protocol::T_LIST, 1); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_BOOL, static_cast(this->null_pages.size())); + std::vector ::const_iterator _iter299; + for (_iter299 = this->null_pages.begin(); _iter299 != this->null_pages.end(); ++_iter299) + { + xfer += oprot->writeBool((*_iter299)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("min_values", ::apache::thrift::protocol::T_LIST, 2); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->min_values.size())); + std::vector ::const_iterator _iter300; + for (_iter300 = this->min_values.begin(); _iter300 != this->min_values.end(); ++_iter300) + { + xfer += oprot->writeBinary((*_iter300)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("max_values", ::apache::thrift::protocol::T_LIST, 3); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->max_values.size())); + std::vector ::const_iterator _iter301; + for (_iter301 = this->max_values.begin(); _iter301 != this->max_values.end(); ++_iter301) + { + xfer += oprot->writeBinary((*_iter301)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("boundary_order", ::apache::thrift::protocol::T_I32, 4); + xfer += oprot->writeI32(static_cast(this->boundary_order)); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.null_counts) { + xfer += oprot->writeFieldBegin("null_counts", ::apache::thrift::protocol::T_LIST, 5); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->null_counts.size())); + std::vector ::const_iterator _iter302; + for (_iter302 = this->null_counts.begin(); _iter302 != this->null_counts.end(); ++_iter302) + { + xfer += oprot->writeI64((*_iter302)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.repetition_level_histograms) { + xfer += oprot->writeFieldBegin("repetition_level_histograms", ::apache::thrift::protocol::T_LIST, 6); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->repetition_level_histograms.size())); + std::vector ::const_iterator _iter303; + for (_iter303 = this->repetition_level_histograms.begin(); _iter303 != this->repetition_level_histograms.end(); ++_iter303) + { + xfer += oprot->writeI64((*_iter303)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.definition_level_histograms) { + xfer += oprot->writeFieldBegin("definition_level_histograms", ::apache::thrift::protocol::T_LIST, 7); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->definition_level_histograms.size())); + std::vector ::const_iterator _iter304; + for (_iter304 = this->definition_level_histograms.begin(); _iter304 != this->definition_level_histograms.end(); ++_iter304) + { + xfer += oprot->writeI64((*_iter304)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t AesGcmV1::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->aad_prefix); + this->__isset.aad_prefix = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->aad_file_unique); + this->__isset.aad_file_unique = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->supply_aad_prefix); + this->__isset.supply_aad_prefix = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t AesGcmV1::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("AesGcmV1"); + + if (this->__isset.aad_prefix) { + xfer += oprot->writeFieldBegin("aad_prefix", ::apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeBinary(this->aad_prefix); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.aad_file_unique) { + xfer += oprot->writeFieldBegin("aad_file_unique", ::apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeBinary(this->aad_file_unique); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.supply_aad_prefix) { + xfer += oprot->writeFieldBegin("supply_aad_prefix", ::apache::thrift::protocol::T_BOOL, 3); + xfer += oprot->writeBool(this->supply_aad_prefix); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t AesGcmCtrV1::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->aad_prefix); + this->__isset.aad_prefix = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->aad_file_unique); + this->__isset.aad_file_unique = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->supply_aad_prefix); + this->__isset.supply_aad_prefix = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t AesGcmCtrV1::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("AesGcmCtrV1"); + + if (this->__isset.aad_prefix) { + xfer += oprot->writeFieldBegin("aad_prefix", ::apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeBinary(this->aad_prefix); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.aad_file_unique) { + xfer += oprot->writeFieldBegin("aad_file_unique", ::apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeBinary(this->aad_file_unique); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.supply_aad_prefix) { + xfer += oprot->writeFieldBegin("supply_aad_prefix", ::apache::thrift::protocol::T_BOOL, 3); + xfer += oprot->writeBool(this->supply_aad_prefix); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t EncryptionAlgorithm::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->AES_GCM_V1.read(iprot); + this->__isset.AES_GCM_V1 = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->AES_GCM_CTR_V1.read(iprot); + this->__isset.AES_GCM_CTR_V1 = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t EncryptionAlgorithm::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("EncryptionAlgorithm"); + + if (this->__isset.AES_GCM_V1) { + xfer += oprot->writeFieldBegin("AES_GCM_V1", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->AES_GCM_V1.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.AES_GCM_CTR_V1) { + xfer += oprot->writeFieldBegin("AES_GCM_CTR_V1", ::apache::thrift::protocol::T_STRUCT, 2); + xfer += this->AES_GCM_CTR_V1.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t FileMetaData::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_version = false; + bool isset_schema = false; + bool isset_num_rows = false; + bool isset_row_groups = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->version); + isset_version = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->schema.clear(); + uint32_t _size321; + ::apache::thrift::protocol::TType _etype324; + xfer += iprot->readListBegin(_etype324, _size321); + this->schema.resize(_size321); + uint32_t _i325; + for (_i325 = 0; _i325 < _size321; ++_i325) + { + xfer += this->schema[_i325].read(iprot); + } + xfer += iprot->readListEnd(); + } + isset_schema = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->num_rows); + isset_num_rows = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->row_groups.clear(); + uint32_t _size326; + ::apache::thrift::protocol::TType _etype329; + xfer += iprot->readListBegin(_etype329, _size326); + this->row_groups.resize(_size326); + uint32_t _i330; + for (_i330 = 0; _i330 < _size326; ++_i330) + { + xfer += this->row_groups[_i330].read(iprot); + } + xfer += iprot->readListEnd(); + } + isset_row_groups = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->key_value_metadata.clear(); + uint32_t _size331; + ::apache::thrift::protocol::TType _etype334; + xfer += iprot->readListBegin(_etype334, _size331); + this->key_value_metadata.resize(_size331); + uint32_t _i335; + for (_i335 = 0; _i335 < _size331; ++_i335) + { + xfer += this->key_value_metadata[_i335].read(iprot); + } + xfer += iprot->readListEnd(); + } + this->__isset.key_value_metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->created_by); + this->__isset.created_by = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->column_orders.clear(); + uint32_t _size336; + ::apache::thrift::protocol::TType _etype339; + xfer += iprot->readListBegin(_etype339, _size336); + this->column_orders.resize(_size336); + uint32_t _i340; + for (_i340 = 0; _i340 < _size336; ++_i340) + { + xfer += this->column_orders[_i340].read(iprot); + } + xfer += iprot->readListEnd(); + } + this->__isset.column_orders = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->encryption_algorithm.read(iprot); + this->__isset.encryption_algorithm = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 9: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->footer_signing_key_metadata); + this->__isset.footer_signing_key_metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_version) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_schema) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_num_rows) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_row_groups) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t FileMetaData::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("FileMetaData"); + + xfer += oprot->writeFieldBegin("version", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->version); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("schema", ::apache::thrift::protocol::T_LIST, 2); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->schema.size())); + std::vector ::const_iterator _iter341; + for (_iter341 = this->schema.begin(); _iter341 != this->schema.end(); ++_iter341) + { + xfer += (*_iter341).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3); + xfer += oprot->writeI64(this->num_rows); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("row_groups", ::apache::thrift::protocol::T_LIST, 4); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->row_groups.size())); + std::vector ::const_iterator _iter342; + for (_iter342 = this->row_groups.begin(); _iter342 != this->row_groups.end(); ++_iter342) + { + xfer += (*_iter342).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + if (this->__isset.key_value_metadata) { + xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 5); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); + std::vector ::const_iterator _iter343; + for (_iter343 = this->key_value_metadata.begin(); _iter343 != this->key_value_metadata.end(); ++_iter343) + { + xfer += (*_iter343).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.created_by) { + xfer += oprot->writeFieldBegin("created_by", ::apache::thrift::protocol::T_STRING, 6); + xfer += oprot->writeString(this->created_by); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.column_orders) { + xfer += oprot->writeFieldBegin("column_orders", ::apache::thrift::protocol::T_LIST, 7); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->column_orders.size())); + std::vector ::const_iterator _iter344; + for (_iter344 = this->column_orders.begin(); _iter344 != this->column_orders.end(); ++_iter344) + { + xfer += (*_iter344).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.encryption_algorithm) { + xfer += oprot->writeFieldBegin("encryption_algorithm", ::apache::thrift::protocol::T_STRUCT, 8); + xfer += this->encryption_algorithm.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.footer_signing_key_metadata) { + xfer += oprot->writeFieldBegin("footer_signing_key_metadata", ::apache::thrift::protocol::T_STRING, 9); + xfer += oprot->writeBinary(this->footer_signing_key_metadata); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t FileCryptoMetaData::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_encryption_algorithm = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->encryption_algorithm.read(iprot); + isset_encryption_algorithm = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->key_metadata); + this->__isset.key_metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_encryption_algorithm) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t FileCryptoMetaData::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("FileCryptoMetaData"); + + xfer += oprot->writeFieldBegin("encryption_algorithm", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->encryption_algorithm.write(oprot); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.key_metadata) { + xfer += oprot->writeFieldBegin("key_metadata", ::apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeBinary(this->key_metadata); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +}} // namespace + +#endif diff --git a/cpp/src/parquet/thrift_internal.h b/cpp/src/parquet/thrift_internal.h index 7491f118d32a0..9e02331b44ba0 100644 --- a/cpp/src/parquet/thrift_internal.h +++ b/cpp/src/parquet/thrift_internal.h @@ -446,13 +446,12 @@ class ThriftDeserializer { T* deserialized_msg) { // Deserialize msg bytes into c++ thrift msg using memory transport. auto tmem_transport = CreateReadOnlyMemoryBuffer(const_cast(buf), *len); - apache::thrift::protocol::TCompactProtocolFactoryT tproto_factory; - // Protect against CPU and memory bombs - tproto_factory.setStringSizeLimit(string_size_limit_); - tproto_factory.setContainerSizeLimit(container_size_limit_); - auto tproto = tproto_factory.getProtocol(tmem_transport); + auto tproto = apache::thrift::protocol::TCompactProtocolT( + tmem_transport, string_size_limit_, container_size_limit_); try { - deserialized_msg->read(tproto.get()); + deserialized_msg + ->template read>( + &tproto); } catch (std::exception& e) { std::stringstream ss; ss << "Couldn't deserialize thrift: " << e.what() << "\n"; From 9bd0ddb4bf10a70101fa0c59dd881179dfd76994 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 22 May 2024 23:03:38 +0200 Subject: [PATCH 148/261] GH-41725: [Python] CMake: ignore Parquet encryption option if Parquet itself is not enabled (fix Java integration build) (#41776) ### Rationale for this change Because of refactoring in https://github.com/apache/arrow/issues/41480, explicitly enabling `PYARROW_WITH_PARQUET_ENCRYPTION` without enabling `PYARROW_WITH_PARQUET` (and without Arrow C++ being built with Parquet support) now raises an error, while before we checked in `setup.py` that both were enabled for enabling encryption support. This patch mimics that logic in CMakeLists.txt with a warning added. ### What changes are included in this PR? When PyArrow with Parquet Encryption is enabled but PyArrow with Parquet itself is not, ignore the encryption setting, but warn about it. ### Are these changes tested? Yes * GitHub Issue: #41725 Authored-by: Joris Van den Bossche Signed-off-by: Sutou Kouhei --- python/CMakeLists.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 07acb9e31a731..a8bbed117163d 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -370,12 +370,18 @@ if(PYARROW_BUILD_ACERO) endif() endif() -if(PYARROW_BUILD_PARQUET OR PYARROW_BUILD_PARQUET_ENCRYPTION) +if(PYARROW_BUILD_PARQUET) message(STATUS "Building PyArrow with Parquet") if(NOT ARROW_PARQUET) message(FATAL_ERROR "You must build Arrow C++ with ARROW_PARQUET=ON") endif() find_package(Parquet REQUIRED) +else() + if(PYARROW_BUILD_PARQUET_ENCRYPTION) + message(WARNING "Building PyArrow with Parquet Encryption is requested, but Parquet itself is not enabled. Ignoring the Parquet Encryption setting." + ) + set(PYARROW_BUILD_PARQUET_ENCRYPTION OFF) + endif() endif() # Check for only Arrow C++ options From 9185d7dad773ed8768f90fb63ad3ef7e7a92f108 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 23 May 2024 01:56:52 +0200 Subject: [PATCH 149/261] GH-41783: [C++] Make git-dependent definitions internal (#41781) ### Rationale for this change Exposing the ARROW_GIT_ID and ARROW_GIT_DESCRIPTION preprocessor variables in our public headers tends to make incremental builds less efficient, since those values change very often during development. Also, these values don't need to be preprocessor variables since they're just strings: you can't write useful `#if` directives with them. Instead, they can be inspected using `GetBuildInfo()`. ### Are these changes tested? By existing builds and tests. ### Are there any user-facing changes? Use cases depending on these preprocessor variables, which is unlikely, may break. They can be fixed by calling `arrow::GetBuildInfo()` instead. * GitHub Issue: #41783 Authored-by: Antoine Pitrou Signed-off-by: Sutou Kouhei --- .pre-commit-config.yaml | 2 +- cpp/cmake_modules/ThirdpartyToolchain.cmake | 6 ------ cpp/src/arrow/CMakeLists.txt | 6 ++++++ cpp/src/arrow/config.cc | 1 + cpp/src/arrow/util/config.h.cmake | 3 --- cpp/src/arrow/util/config_internal.h.cmake | 22 +++++++++++++++++++++ dev/archery/archery/utils/lint.py | 2 +- 7 files changed, 31 insertions(+), 11 deletions(-) create mode 100644 cpp/src/arrow/util/config_internal.h.cmake diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 863fd918e5911..e0b8009b03184 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -127,7 +127,7 @@ repos: ?^cpp/cmake_modules/FindNumPy\.cmake$| ?^cpp/cmake_modules/FindPythonLibsNew\.cmake$| ?^cpp/cmake_modules/UseCython\.cmake$| - ?^cpp/src/arrow/util/config\.h\.cmake$| + ?^cpp/src/arrow/util/.*\.h\.cmake$| ) - repo: https://github.com/sphinx-contrib/sphinx-lint rev: v0.9.1 diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index c24442dcb8749..f102c7bb81683 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -5348,9 +5348,3 @@ if(ARROW_WITH_UCX) endif() message(STATUS "All bundled static libraries: ${ARROW_BUNDLED_STATIC_LIBS}") - -# Write out the package configurations. - -configure_file("src/arrow/util/config.h.cmake" "src/arrow/util/config.h" ESCAPE_QUOTES) -install(FILES "${ARROW_BINARY_DIR}/src/arrow/util/config.h" - DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/util") diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 57a0b383a677a..150a304975cad 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -351,6 +351,12 @@ macro(append_runtime_avx512_src SRCS SRC) endif() endmacro() +# Write out compile-time configuration constants +configure_file("util/config.h.cmake" "util/config.h" ESCAPE_QUOTES) +configure_file("util/config_internal.h.cmake" "util/config_internal.h" ESCAPE_QUOTES) +install(FILES "${CMAKE_CURRENT_BINARY_DIR}/util/config.h" + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/util") + set(ARROW_SRCS builder.cc buffer.cc diff --git a/cpp/src/arrow/config.cc b/cpp/src/arrow/config.cc index 9e32e5437325f..a0e3a079b3157 100644 --- a/cpp/src/arrow/config.cc +++ b/cpp/src/arrow/config.cc @@ -20,6 +20,7 @@ #include #include "arrow/util/config.h" +#include "arrow/util/config_internal.h" #include "arrow/util/cpu_info.h" #include "arrow/vendored/datetime.h" diff --git a/cpp/src/arrow/util/config.h.cmake b/cpp/src/arrow/util/config.h.cmake index 9fbd685084fd5..08c2ae173601b 100644 --- a/cpp/src/arrow/util/config.h.cmake +++ b/cpp/src/arrow/util/config.h.cmake @@ -31,9 +31,6 @@ #define ARROW_BUILD_TYPE "@UPPERCASE_BUILD_TYPE@" -#define ARROW_GIT_ID "@ARROW_GIT_ID@" -#define ARROW_GIT_DESCRIPTION "@ARROW_GIT_DESCRIPTION@" - #define ARROW_PACKAGE_KIND "@ARROW_PACKAGE_KIND@" #cmakedefine ARROW_COMPUTE diff --git a/cpp/src/arrow/util/config_internal.h.cmake b/cpp/src/arrow/util/config_internal.h.cmake new file mode 100644 index 0000000000000..e90f7ee12da4d --- /dev/null +++ b/cpp/src/arrow/util/config_internal.h.cmake @@ -0,0 +1,22 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// These variables are not exposed as they can make compilation caching +// and increment builds less efficient. + +#define ARROW_GIT_ID "@ARROW_GIT_ID@" +#define ARROW_GIT_DESCRIPTION "@ARROW_GIT_DESCRIPTION@" diff --git a/dev/archery/archery/utils/lint.py b/dev/archery/archery/utils/lint.py index 92b7f79fc1017..c9d05fffd9168 100644 --- a/dev/archery/archery/utils/lint.py +++ b/dev/archery/archery/utils/lint.py @@ -163,7 +163,7 @@ def cmake_linter(src, fix=False): 'cpp/cmake_modules/FindNumPy.cmake', 'cpp/cmake_modules/FindPythonLibsNew.cmake', 'cpp/cmake_modules/UseCython.cmake', - 'cpp/src/arrow/util/config.h.cmake', + 'cpp/src/arrow/util/*.h.cmake', ] ) method = cmake_format.fix if fix else cmake_format.check From 420c01ab98295c7ee2f3ef640da911e4e8276a1c Mon Sep 17 00:00:00 2001 From: Laurent Goujon Date: Wed, 22 May 2024 18:55:38 -0700 Subject: [PATCH 150/261] GH-41787: Update fmpp-maven-plugin output directory (#41788) ### Rationale for this change Per convention fmpp-maven-plugin should not directly generate files under target/generated-sources but into a subdirectory. ### What changes are included in this PR? Changing config to output files under `target/generated-sources/fmpp` ### Are these changes tested? CI ### Are there any user-facing changes? No * GitHub Issue: #41787 Authored-by: Laurent Goujon Signed-off-by: David Li --- java/vector/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/vector/pom.xml b/java/vector/pom.xml index 07af93a499907..a315bbc03afb6 100644 --- a/java/vector/pom.xml +++ b/java/vector/pom.xml @@ -199,7 +199,7 @@ generate-sources src/main/codegen/config.fmpp - ${project.build.directory}/generated-sources + ${project.build.directory}/generated-sources/fmpp ${project.build.directory}/codegen/templates From e690ed14a1590388e32790c1dfd0bf4010d980fa Mon Sep 17 00:00:00 2001 From: h-vetinari Date: Fri, 24 May 2024 02:11:18 +1100 Subject: [PATCH 151/261] MINOR: [Python] Increase timeout in TestThreadedCSVTableRead::test_cancellation (#41768) We hit this in conda-forge on some runners: ``` =================================== FAILURES =================================== __________________ TestThreadedCSVTableRead.test_cancellation __________________ [...] # Interruption should have arrived timely > assert last_duration <= 1.0 E assert 1.2137842178344727 <= 1.0 pyarrow/tests/test_csv.py:1473: AssertionError ----------------------------- Captured stdout call ----------------------------- workload size: 100000 workload size: 300000 workload size: 900000 workload size: 2700000 workload size: 8100000 ``` Give a little bit more time for the expected duration so conda-forge's test suite doesn't fail spuriously (and then have to be restarted manually). Authored-by: H. Vetinari Signed-off-by: Antoine Pitrou --- python/pyarrow/tests/test_csv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py index bc1dd8a09a768..9ddb5197e9120 100644 --- a/python/pyarrow/tests/test_csv.py +++ b/python/pyarrow/tests/test_csv.py @@ -1470,7 +1470,7 @@ def signal_from_thread(): pytest.fail("Failed to get an interruption during CSV reading") # Interruption should have arrived timely - assert last_duration <= 1.0 + assert last_duration <= 2.0 e = exc_info.__context__ assert isinstance(e, pa.ArrowCancelled) assert e.signum == signum From 84b9a1926ef3a21486e4517b603f35c8a6d302c4 Mon Sep 17 00:00:00 2001 From: h-vetinari Date: Fri, 24 May 2024 02:18:29 +1100 Subject: [PATCH 152/261] MINOR: [Python] try harder to set up s3_server fixture (#41754) In conda-forge, when running the aarch tests in emulation, we regularly run into the [issue](https://github.com/conda-forge/pyarrow-feedstock/issues/117) that the fixture setup fails. Extending the timeouts fixes this. Since it only happens once per session, it doesn't hurt to take a little bit more time. Authored-by: H. Vetinari Signed-off-by: Antoine Pitrou --- python/pyarrow/tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py index 57bc3c8fc6616..343b602995db6 100644 --- a/python/pyarrow/tests/conftest.py +++ b/python/pyarrow/tests/conftest.py @@ -192,7 +192,7 @@ def wrapper(*args, **kwargs): @pytest.fixture(scope='session') def s3_server(s3_connection, tmpdir_factory): - @retry(attempts=5, delay=0.1, backoff=2) + @retry(attempts=5, delay=1, backoff=2) def minio_server_health_check(address): resp = urllib.request.urlopen(f"http://{address}/minio/health/cluster") assert resp.getcode() == 200 From ecd769c3e9dade8dcb381fba7c2ac059d46a3a17 Mon Sep 17 00:00:00 2001 From: Alan Stoate Date: Fri, 24 May 2024 01:32:02 +1000 Subject: [PATCH 153/261] GH-39858: [C++][Device] Add Copy/View slice functions to a CPU pointer (#41477) ### Rationale for this change Currently ```MemoryManager``` objects define functionality to Copy or View entire buffers. Occasionally there is the need to only copy a single value or slice from a buffer to a piece of CPU memory (see https://github.com/apache/arrow/pull/39770#discussion_r1470135438). It's overkill to do a bunch of whole Buffer operations and manually slicing just to copy 4 or 8 bytes. ### What changes are included in this PR? Add the ```MemoryManager::CopyBufferSliceToCPU``` function, which initially attempts to use memcpy for the specified slice. If this is not possible, it defaults to copying the entire buffer and then viewing/copying the slice. Update ```ArrayImporter::ImportStringValuesBuffer``` to use this function. ### Are these changes tested? ```ArrayImporter::ImportStringValuesBuffer``` is tested as a part of ```arrow-c-bridge-test``` * GitHub Issue: #39858 Lead-authored-by: Alan Stoate Co-authored-by: Mac Lilly Co-authored-by: Felipe Oliveira Carvalho Signed-off-by: Antoine Pitrou --- cpp/src/arrow/c/bridge.cc | 27 ++++++++++----------------- cpp/src/arrow/device.cc | 26 ++++++++++++++++++++++++++ cpp/src/arrow/device.h | 4 ++++ 3 files changed, 40 insertions(+), 17 deletions(-) diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc index 8c5e3637b6e86..3e2e04ba0b6ec 100644 --- a/cpp/src/arrow/c/bridge.cc +++ b/cpp/src/arrow/c/bridge.cc @@ -1871,24 +1871,17 @@ struct ArrayImporter { template Status ImportStringValuesBuffer(int32_t offsets_buffer_id, int32_t buffer_id, int64_t byte_width = 1) { - if (device_type_ == DeviceAllocationType::kCPU) { - auto offsets = data_->GetValues(offsets_buffer_id); + int64_t buffer_size = 0; + if (c_struct_->length > 0) { + int64_t last_offset_value_offset = + (c_struct_->length + c_struct_->offset) * sizeof(OffsetType); + OffsetType last_offset_value; + RETURN_NOT_OK(MemoryManager::CopyBufferSliceToCPU( + data_->buffers[offsets_buffer_id], last_offset_value_offset, sizeof(OffsetType), + reinterpret_cast(&last_offset_value))); // Compute visible size of buffer - int64_t buffer_size = - (c_struct_->length > 0) ? byte_width * offsets[c_struct_->length] : 0; - return ImportBuffer(buffer_id, buffer_size); - } - - // we only need the value of the last offset so let's just copy that - // one value from device to host. - auto single_value_buf = - SliceBuffer(data_->buffers[offsets_buffer_id], - c_struct_->length * sizeof(OffsetType), sizeof(OffsetType)); - ARROW_ASSIGN_OR_RAISE( - auto cpubuf, Buffer::ViewOrCopy(single_value_buf, default_cpu_memory_manager())); - auto offsets = cpubuf->data_as(); - // Compute visible size of buffer - int64_t buffer_size = (c_struct_->length > 0) ? byte_width * offsets[0] : 0; + buffer_size = byte_width * last_offset_value; + } return ImportBuffer(buffer_id, buffer_size); } diff --git a/cpp/src/arrow/device.cc b/cpp/src/arrow/device.cc index 98b8f7b30397e..01a2b8df5398d 100644 --- a/cpp/src/arrow/device.cc +++ b/cpp/src/arrow/device.cc @@ -116,6 +116,32 @@ Result> MemoryManager::ViewBuffer( " on ", to->device()->ToString(), " not supported"); } +Status MemoryManager::CopyBufferSliceToCPU(const std::shared_ptr& buf, + int64_t offset, int64_t length, + uint8_t* out_data) { + if (ARROW_PREDICT_TRUE(buf->is_cpu())) { + memcpy(out_data, buf->data() + offset, static_cast(length)); + return Status::OK(); + } + + auto& from = buf->memory_manager(); + auto cpu_mm = default_cpu_memory_manager(); + // Try a view first + auto maybe_buffer_result = from->ViewBufferTo(buf, cpu_mm); + if (!COPY_BUFFER_SUCCESS(maybe_buffer_result)) { + // View failed, try a copy instead + maybe_buffer_result = from->CopyBufferTo(buf, cpu_mm); + } + ARROW_ASSIGN_OR_RAISE(auto maybe_buffer, std::move(maybe_buffer_result)); + if (maybe_buffer != nullptr) { + memcpy(out_data, maybe_buffer->data() + offset, static_cast(length)); + return Status::OK(); + } + + return Status::NotImplemented("Copying buffer slice from ", from->device()->ToString(), + " to CPU not supported"); +} + #undef COPY_BUFFER_RETURN #undef COPY_BUFFER_SUCCESS diff --git a/cpp/src/arrow/device.h b/cpp/src/arrow/device.h index a591167ef9a45..f5cca0d27d7b2 100644 --- a/cpp/src/arrow/device.h +++ b/cpp/src/arrow/device.h @@ -249,6 +249,10 @@ class ARROW_EXPORT MemoryManager : public std::enable_shared_from_this> ViewBuffer( const std::shared_ptr& source, const std::shared_ptr& to); + /// \brief Copy a slice of a buffer into a CPU pointer + static Status CopyBufferSliceToCPU(const std::shared_ptr& buf, int64_t offset, + int64_t length, uint8_t* out_data); + /// \brief Create a new SyncEvent. /// /// This version should construct the appropriate event for the device and From c8f89d06bb79909dbd52d7d1504bdcadb220fadd Mon Sep 17 00:00:00 2001 From: ZhangHuiGui <106943008+ZhangHuiGui@users.noreply.github.com> Date: Thu, 23 May 2024 11:37:31 -0400 Subject: [PATCH 154/261] GH-41720: [C++][Acero] Remove an useless parameter for QueryContext::Init called in hash_join_benchmark (#41716) ### Rationale for this change My local compilation parameters will include the compilation of some basic benchmarks. I discovered this compilation problem today. It seems that #41334 of `QueryContext::Init` is not synchronized to `hash_join_benchmark.cc`, and CI has not found this problem. . ### What changes are included in this PR? Remove the first arg . ### Are these changes tested? Needn't ### Are there any user-facing changes? No * GitHub Issue: #41720 Lead-authored-by: ZhangHuiGui <2689496754@qq.com> Co-authored-by: ZhangHuiGui Signed-off-by: Antoine Pitrou --- ci/scripts/cpp_build.sh | 1 + cpp/src/arrow/acero/hash_join_benchmark.cc | 2 +- docker-compose.yml | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index a1f40fc360e2f..6a3a53f2533cd 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -120,6 +120,7 @@ else -DARROW_BUILD_BENCHMARKS=${ARROW_BUILD_BENCHMARKS:-OFF} \ -DARROW_BUILD_EXAMPLES=${ARROW_BUILD_EXAMPLES:-OFF} \ -DARROW_BUILD_INTEGRATION=${ARROW_BUILD_INTEGRATION:-OFF} \ + -DARROW_BUILD_OPENMP_BENCHMARKS=${ARROW_BUILD_OPENMP_BENCHMARKS:-OFF} \ -DARROW_BUILD_SHARED=${ARROW_BUILD_SHARED:-ON} \ -DARROW_BUILD_STATIC=${ARROW_BUILD_STATIC:-ON} \ -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS:-OFF} \ diff --git a/cpp/src/arrow/acero/hash_join_benchmark.cc b/cpp/src/arrow/acero/hash_join_benchmark.cc index ad1bd67cc8ec7..1f8e02e9f0fcf 100644 --- a/cpp/src/arrow/acero/hash_join_benchmark.cc +++ b/cpp/src/arrow/acero/hash_join_benchmark.cc @@ -148,7 +148,7 @@ class JoinBenchmark { }; scheduler_ = TaskScheduler::Make(); - DCHECK_OK(ctx_.Init(settings.num_threads, nullptr)); + DCHECK_OK(ctx_.Init(nullptr)); auto register_task_group_callback = [&](std::function task, std::function cont) { diff --git a/docker-compose.yml b/docker-compose.yml index a1d8f60a268d8..9f575e2030179 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -286,6 +286,7 @@ services: <<: [*common, *ccache, *sccache, *cpp] ARROW_BUILD_BENCHMARKS: "ON" ARROW_BUILD_EXAMPLES: "ON" + ARROW_BUILD_OPENMP_BENCHMARKS: "ON" ARROW_ENABLE_TIMING_TESTS: # inherit ARROW_EXTRA_ERROR_CONTEXT: "ON" ARROW_MIMALLOC: "ON" From 8bcdc0f3849e616dc09b8c19bbc1387c1773639b Mon Sep 17 00:00:00 2001 From: mwish Date: Fri, 24 May 2024 00:34:37 +0800 Subject: [PATCH 155/261] GH-41186: [C++][Parquet][Doc] Denote PARQUET:field_id in parquet.rst (#41187) ### Rationale for this change Denote PARQUET:field_id in parquet.rst ### What changes are included in this PR? Just a doc improvement ### Are these changes tested? No ### Are there any user-facing changes? No * GitHub Issue: #41186 Lead-authored-by: mwish Co-authored-by: Antoine Pitrou Co-authored-by: mwish <1506118561@qq.com> Co-authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- docs/source/cpp/parquet.rst | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/docs/source/cpp/parquet.rst b/docs/source/cpp/parquet.rst index 96897d139b351..9d2a5d791fed0 100644 --- a/docs/source/cpp/parquet.rst +++ b/docs/source/cpp/parquet.rst @@ -522,8 +522,8 @@ An Arrow Dictionary type is written out as its value type. It can still be recreated at read time using Parquet metadata (see "Roundtripping Arrow types" below). -Roundtripping Arrow types -~~~~~~~~~~~~~~~~~~~~~~~~~ +Roundtripping Arrow types and schema +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ While there is no bijection between Arrow types and Parquet types, it is possible to serialize the Arrow schema as part of the Parquet file metadata. @@ -531,8 +531,7 @@ This is enabled using :func:`ArrowWriterProperties::store_schema`. On the read path, the serialized schema will be automatically recognized and will recreate the original Arrow data, converting the Parquet data as -required (for example, a LargeList will be recreated from the Parquet LIST -type). +required. As an example, when serializing an Arrow LargeList to Parquet: @@ -542,6 +541,20 @@ As an example, when serializing an Arrow LargeList to Parquet: :func:`ArrowWriterProperties::store_schema` was enabled when writing the file; otherwise, it is decoded as an Arrow List. +Parquet field id +"""""""""""""""" + +The Parquet format supports an optional integer *field id* which can be assigned +to a given field. This is used for example in the +`Apache Iceberg specification `__. + +On the writer side, if ``PARQUET:field_id`` is present as a metadata key on an +Arrow field, then its value is parsed as a non-negative integer and is used as +the field id for the corresponding Parquet field. + +On the reader side, Arrow will convert such a field id to a metadata key named +``PARQUET:field_id`` on the corresponding Arrow field. + Serialization details """"""""""""""""""""" @@ -549,6 +562,7 @@ The Arrow schema is serialized as a :ref:`Arrow IPC ` schema message then base64-encoded and stored under the ``ARROW:schema`` metadata key in the Parquet file metadata. + Limitations ~~~~~~~~~~~ From e086bbf48cf625e6fa2a7990efba8b9c5a99d09f Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 24 May 2024 08:11:16 +0900 Subject: [PATCH 156/261] GH-41780: [C++][Flight][Benchmark] Ensure waiting server ready (#41793) ### Rationale for this change We should read from result stream to get an error of this RPC. If we don't read from result stream, we can't detect an error of this RPC. ### What changes are included in this PR? Call `Drain()` to detect an error. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #41780 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- cpp/src/arrow/flight/flight_benchmark.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/flight/flight_benchmark.cc b/cpp/src/arrow/flight/flight_benchmark.cc index f53b1c6dcea30..057ef15c3c7ae 100644 --- a/cpp/src/arrow/flight/flight_benchmark.cc +++ b/cpp/src/arrow/flight/flight_benchmark.cc @@ -131,7 +131,8 @@ struct PerformanceStats { Status WaitForReady(FlightClient* client, const FlightCallOptions& call_options) { Action action{"ping", nullptr}; for (int attempt = 0; attempt < 10; attempt++) { - if (client->DoAction(call_options, action).ok()) { + auto result_stream_result = client->DoAction(call_options, action); + if (result_stream_result.ok() && (*result_stream_result)->Drain().ok()) { return Status::OK(); } std::this_thread::sleep_for(std::chrono::milliseconds(1000)); From 522b097f94fd2d4664e43e25dd0abeb442629d9c Mon Sep 17 00:00:00 2001 From: mwish Date: Fri, 24 May 2024 08:04:38 +0800 Subject: [PATCH 157/261] GH-41547: [C++] Thirdparty: Upgrade xsimd to 13.0.0 (#41548) ### Rationale for this change Arrow now uses xsimd 9.0.1, currently, some conversion for batch is now support in neon, see: https://github.com/apache/arrow/pull/40335#issuecomment-1983609146 . So we can upgrading it. The xsimd currently released 13.0.0, see: https://github.com/xtensor-stack/xsimd/issues/1015 For conan, seems community is updating it: https://github.com/conan-io/conan-center-index/pull/23859 . Maybe we can wait for a while ### What changes are included in this PR? Update xsimd to 13.0.0 ### Are these changes tested? Tested by existing test code ### Are there any user-facing changes? no * GitHub Issue: #41547 Authored-by: mwish Signed-off-by: Sutou Kouhei --- cpp/thirdparty/versions.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 4983f3cee2c2d..06506d32bef7c 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -113,8 +113,8 @@ ARROW_UCX_BUILD_VERSION=1.12.1 ARROW_UCX_BUILD_SHA256_CHECKSUM=9bef31aed0e28bf1973d28d74d9ac4f8926c43ca3b7010bd22a084e164e31b71 ARROW_UTF8PROC_BUILD_VERSION=v2.7.0 ARROW_UTF8PROC_BUILD_SHA256_CHECKSUM=4bb121e297293c0fd55f08f83afab6d35d48f0af4ecc07523ad8ec99aa2b12a1 -ARROW_XSIMD_BUILD_VERSION=9.0.1 -ARROW_XSIMD_BUILD_SHA256_CHECKSUM=b1bb5f92167fd3a4f25749db0be7e61ed37e0a5d943490f3accdcd2cd2918cc0 +ARROW_XSIMD_BUILD_VERSION=13.0.0 +ARROW_XSIMD_BUILD_SHA256_CHECKSUM=8bdbbad0c3e7afa38d88d0d484d70a1671a1d8aefff03f4223ab2eb6a41110a3 ARROW_ZLIB_BUILD_VERSION=1.3.1 ARROW_ZLIB_BUILD_SHA256_CHECKSUM=9a93b2b7dfdac77ceba5a558a580e74667dd6fede4585b91eefb60f03b72df23 ARROW_ZSTD_BUILD_VERSION=1.5.6 From 799021ae6f7755dee7b92562bbd0f97428fd8925 Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Fri, 24 May 2024 12:41:50 +1200 Subject: [PATCH 158/261] GH-41134: [GLib] Support building arrow-glib with MSVC (#41599) ### Rationale for this change Allow Windows users to more easily build the GLib libraries. ### What changes are included in this PR? * Minor fixes to allow building with MSVC: * Changes some uses of variable length arrays to `std::vector`, because MSVC doesn't support variable length arrays * Moves some function definitions that use C++ types outside of `G_BEGIN_DECLS`/`G_END_DECLS` blocks (which expand to `extern C { }`), because this caused MSVC to error * Fix libraries not having any exported symbols with MSVC, which defaults to hiding symbols * Add `visibility.h` which defines a new `GARROW_EXTERN` macro that adds `dllimport` or `dllexport` attributes when using MSVC. * Include the `GARROW_EXTERN` macro in the definitions of the `GARROW_AVAILABLE_IN_*` macros. * Add a new CI job that builds the GLib libraries with MSVC on Windows, using vcpkg to install pkgconfig and glib. * For now only `arrow-glib` is built, I can follow up with the other libraries after this PR. That will require introducing new per-library version macros. ### Are these changes tested? The build will be tested in CI but I've only done some quick manual tests that the built library works correctly, I haven't got the ruby tests running against the build yet. ### Are there any user-facing changes? No? Eventually some documentation should be updated when all the GLib libraries can be built with MSVC though * GitHub Issue: #41134 Lead-authored-by: Adam Reeve Co-authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- .github/workflows/ruby.yml | 96 +++++++++- c_glib/arrow-cuda-glib/meson.build | 1 + c_glib/arrow-cuda-glib/version.h.in | 2 + c_glib/arrow-dataset-glib/meson.build | 1 + c_glib/arrow-dataset-glib/version.h.in | 2 + c_glib/arrow-flight-glib/client.hpp | 6 + c_glib/arrow-flight-glib/common.hpp | 20 ++ c_glib/arrow-flight-glib/meson.build | 1 + c_glib/arrow-flight-glib/server.hpp | 16 ++ c_glib/arrow-flight-glib/version.h.in | 2 + c_glib/arrow-flight-sql-glib/meson.build | 1 + c_glib/arrow-flight-sql-glib/version.h.in | 2 + c_glib/arrow-glib/array-builder.cpp | 10 +- c_glib/arrow-glib/array-builder.h | 184 +++++++++++++++++++ c_glib/arrow-glib/basic-array-definition.h | 4 + c_glib/arrow-glib/basic-array.h | 142 ++++++++++++++ c_glib/arrow-glib/basic-array.hpp | 10 + c_glib/arrow-glib/basic-data-type.cpp | 4 +- c_glib/arrow-glib/basic-data-type.h | 73 ++++++++ c_glib/arrow-glib/buffer.h | 39 ++++ c_glib/arrow-glib/buffer.hpp | 12 ++ c_glib/arrow-glib/chunked-array-definition.h | 1 + c_glib/arrow-glib/chunked-array.h | 19 ++ c_glib/arrow-glib/chunked-array.hpp | 5 + c_glib/arrow-glib/codec.h | 5 + c_glib/arrow-glib/codec.hpp | 6 + c_glib/arrow-glib/composite-array.h | 47 +++++ c_glib/arrow-glib/composite-data-type.h | 41 +++++ c_glib/arrow-glib/compute-definition.h | 5 + c_glib/arrow-glib/compute.h | 42 +++++ c_glib/arrow-glib/datum.h | 6 + c_glib/arrow-glib/decimal.h | 13 ++ c_glib/arrow-glib/error.h | 3 + c_glib/arrow-glib/error.hpp | 8 + c_glib/arrow-glib/expression.h | 3 + c_glib/arrow-glib/expression.hpp | 3 + c_glib/arrow-glib/field.h | 11 ++ c_glib/arrow-glib/file-system.h | 10 + c_glib/arrow-glib/file-system.hpp | 7 + c_glib/arrow-glib/file.h | 7 + c_glib/arrow-glib/input-stream.cpp | 32 ++-- c_glib/arrow-glib/input-stream.h | 30 +++ c_glib/arrow-glib/input-stream.hpp | 14 ++ c_glib/arrow-glib/interval.h | 2 + c_glib/arrow-glib/ipc-options.h | 2 + c_glib/arrow-glib/ipc-options.hpp | 4 + c_glib/arrow-glib/local-file-system.h | 2 + c_glib/arrow-glib/memory-pool.h | 10 + c_glib/arrow-glib/meson.build | 1 + c_glib/arrow-glib/orc-file-reader.h | 14 ++ c_glib/arrow-glib/output-stream.cpp | 32 ++-- c_glib/arrow-glib/output-stream.h | 14 ++ c_glib/arrow-glib/output-stream.hpp | 9 + c_glib/arrow-glib/readable.h | 3 + c_glib/arrow-glib/reader.h | 46 +++++ c_glib/arrow-glib/reader.hpp | 19 ++ c_glib/arrow-glib/record-batch.h | 19 ++ c_glib/arrow-glib/record-batch.hpp | 5 + c_glib/arrow-glib/scalar.h | 39 ++++ c_glib/arrow-glib/schema.h | 16 ++ c_glib/arrow-glib/schema.hpp | 3 + c_glib/arrow-glib/table-builder.h | 12 ++ c_glib/arrow-glib/table.h | 21 +++ c_glib/arrow-glib/table.hpp | 4 + c_glib/arrow-glib/tensor.h | 27 +++ c_glib/arrow-glib/timestamp-parser.h | 3 + c_glib/arrow-glib/version.h.in | 2 + c_glib/arrow-glib/writable-file.h | 2 + c_glib/arrow-glib/writable.h | 4 + c_glib/arrow-glib/writer.h | 8 + c_glib/gandiva-glib/meson.build | 1 + c_glib/gandiva-glib/node.h | 104 +++++------ c_glib/gandiva-glib/version.h.in | 2 + c_glib/parquet-glib/meson.build | 1 + c_glib/parquet-glib/version.h.in | 2 + c_glib/tool/generate-version-header.py | 27 ++- c_glib/vcpkg.json | 8 + ci/scripts/c_glib_build.sh | 23 ++- ci/scripts/install_vcpkg.sh | 2 +- dev/release/01-prepare-test.rb | 7 + dev/release/post-11-bump-versions-test.rb | 7 + dev/release/utils-prepare.sh | 6 + 82 files changed, 1361 insertions(+), 98 deletions(-) create mode 100644 c_glib/vcpkg.json diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml index 11e3c93ed0806..04f944f56c665 100644 --- a/.github/workflows/ruby.yml +++ b/.github/workflows/ruby.yml @@ -186,7 +186,7 @@ jobs: shell: bash run: ci/scripts/ruby_test.sh $(pwd) $(pwd)/build - windows: + windows-mingw: name: AMD64 Windows MinGW ${{ matrix.mingw-n-bits }} GLib & Ruby runs-on: windows-2019 if: ${{ !contains(github.event.pull_request.title, 'WIP') }} @@ -267,7 +267,6 @@ jobs: ridk exec bash ci\scripts\cpp_build.sh "${source_dir}" "${build_dir}" - name: Build GLib run: | - $Env:CMAKE_BUILD_PARALLEL_LEVEL = $Env:NUMBER_OF_PROCESSORS $source_dir = "$(ridk exec cygpath --unix "$(Get-Location)")" $build_dir = "$(ridk exec cygpath --unix "$(Get-Location)\build")" $ErrorActionPreference = "Continue" @@ -305,3 +304,96 @@ jobs: $Env:MAKE = "ridk exec make" $ErrorActionPreference = "Continue" rake -f ruby\Rakefile + + windows-msvc: + name: AMD64 Windows MSVC GLib + runs-on: windows-2019 + if: ${{ !contains(github.event.pull_request.title, 'WIP') }} + timeout-minutes: 90 + strategy: + fail-fast: false + env: + ARROW_BOOST_USE_SHARED: OFF + ARROW_BUILD_BENCHMARKS: OFF + ARROW_BUILD_SHARED: ON + ARROW_BUILD_STATIC: OFF + ARROW_BUILD_TESTS: OFF + ARROW_ACERO: ON + ARROW_DATASET: ON + ARROW_FLIGHT: OFF + ARROW_FLIGHT_SQL: OFF + ARROW_GANDIVA: OFF + ARROW_HDFS: OFF + ARROW_HOME: "${{ github.workspace }}/dist" + ARROW_JEMALLOC: OFF + ARROW_MIMALLOC: ON + ARROW_ORC: OFF + ARROW_PARQUET: ON + ARROW_SUBSTRAIT: OFF + ARROW_USE_GLOG: OFF + ARROW_VERBOSE_THIRDPARTY_BUILD: OFF + ARROW_WITH_BROTLI: OFF + ARROW_WITH_BZ2: OFF + ARROW_WITH_LZ4: OFF + ARROW_WITH_OPENTELEMETRY: OFF + ARROW_WITH_SNAPPY: ON + ARROW_WITH_ZLIB: OFF + ARROW_WITH_ZSTD: ON + BOOST_SOURCE: BUNDLED + CMAKE_CXX_STANDARD: "17" + CMAKE_GENERATOR: Ninja + CMAKE_INSTALL_PREFIX: "${{ github.workspace }}/dist" + CMAKE_UNITY_BUILD: ON + steps: + - name: Disable Crash Dialogs + run: | + reg add ` + "HKCU\SOFTWARE\Microsoft\Windows\Windows Error Reporting" ` + /v DontShowUI ` + /t REG_DWORD ` + /d 1 ` + /f + - name: Checkout Arrow + uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: recursive + - name: Install vcpkg + shell: bash + run: | + ci/scripts/install_vcpkg.sh ./vcpkg + - name: Install meson + run: | + python -m pip install meson + - name: Install ccache + shell: bash + run: | + ci/scripts/install_ccache.sh 4.6.3 /usr + - name: Setup ccache + shell: bash + run: | + ci/scripts/ccache_setup.sh + - name: ccache info + id: ccache-info + shell: bash + run: | + echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT + - name: Cache ccache + uses: actions/cache@v4 + with: + path: ${{ steps.ccache-info.outputs.cache-dir }} + key: glib-ccache-msvc-${{ env.CACHE_VERSION }}-${{ hashFiles('cpp/**') }} + restore-keys: glib-ccache-msvc-${{ env.CACHE_VERSION }}- + env: + # We can invalidate the current cache by updating this. + CACHE_VERSION: "2024-05-09" + - name: Build C++ + shell: cmd + run: | + call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 + bash -c "ci/scripts/cpp_build.sh $(pwd) $(pwd)/build" + - name: Build GLib + shell: cmd + run: | + call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 + bash -c "VCPKG_ROOT=\"$(pwd)/vcpkg\" ci/scripts/c_glib_build.sh $(pwd) $(pwd)/build" diff --git a/c_glib/arrow-cuda-glib/meson.build b/c_glib/arrow-cuda-glib/meson.build index 86d536dcd2494..47bed70f03b60 100644 --- a/c_glib/arrow-cuda-glib/meson.build +++ b/c_glib/arrow-cuda-glib/meson.build @@ -52,6 +52,7 @@ libarrow_cuda_glib = library('arrow-cuda-glib', dependencies: dependencies, implicit_include_directories: false, include_directories: base_include_directories, + cpp_args: ['-DGARROW_CUDA_COMPILATION'], soversion: so_version, version: library_version) arrow_cuda_glib = declare_dependency(link_with: libarrow_cuda_glib, diff --git a/c_glib/arrow-cuda-glib/version.h.in b/c_glib/arrow-cuda-glib/version.h.in index 27d070d19dc9c..0ab5bfd562b41 100644 --- a/c_glib/arrow-cuda-glib/version.h.in +++ b/c_glib/arrow-cuda-glib/version.h.in @@ -154,4 +154,6 @@ # define GARROW_CUDA_VERSION_MAX_ALLOWED GARROW_VERSION_MAX_ALLOWED #endif +@VISIBILITY_MACROS@ + @AVAILABILITY_MACROS@ diff --git a/c_glib/arrow-dataset-glib/meson.build b/c_glib/arrow-dataset-glib/meson.build index 686129dbe2fc0..2d54efadfa230 100644 --- a/c_glib/arrow-dataset-glib/meson.build +++ b/c_glib/arrow-dataset-glib/meson.build @@ -81,6 +81,7 @@ libarrow_dataset_glib = library('arrow-dataset-glib', dependencies: dependencies, implicit_include_directories: false, include_directories: base_include_directories, + cpp_args: ['-DGADATASET_COMPILATION'], soversion: so_version, version: library_version) arrow_dataset_glib = declare_dependency(link_with: libarrow_dataset_glib, diff --git a/c_glib/arrow-dataset-glib/version.h.in b/c_glib/arrow-dataset-glib/version.h.in index 47d726c5b7896..7e678bda3a875 100644 --- a/c_glib/arrow-dataset-glib/version.h.in +++ b/c_glib/arrow-dataset-glib/version.h.in @@ -154,4 +154,6 @@ # define GADATASET_VERSION_MAX_ALLOWED GARROW_VERSION_MAX_ALLOWED #endif +@VISIBILITY_MACROS@ + @AVAILABILITY_MACROS@ diff --git a/c_glib/arrow-flight-glib/client.hpp b/c_glib/arrow-flight-glib/client.hpp index 6d7bdcecf3006..185a28e6dc4bd 100644 --- a/c_glib/arrow-flight-glib/client.hpp +++ b/c_glib/arrow-flight-glib/client.hpp @@ -23,17 +23,23 @@ #include +GAFLIGHT_EXTERN GAFlightStreamReader * gaflight_stream_reader_new_raw(arrow::flight::FlightStreamReader *flight_reader, gboolean is_owner); +GAFLIGHT_EXTERN arrow::flight::FlightCallOptions * gaflight_call_options_get_raw(GAFlightCallOptions *options); +GAFLIGHT_EXTERN arrow::flight::FlightClientOptions * gaflight_client_options_get_raw(GAFlightClientOptions *options); +GAFLIGHT_EXTERN std::shared_ptr gaflight_client_get_raw(GAFlightClient *client); + +GAFLIGHT_EXTERN GAFlightClient * gaflight_client_new_raw(std::shared_ptr *flight_client); diff --git a/c_glib/arrow-flight-glib/common.hpp b/c_glib/arrow-flight-glib/common.hpp index b748d6f382184..db56fff579baf 100644 --- a/c_glib/arrow-flight-glib/common.hpp +++ b/c_glib/arrow-flight-glib/common.hpp @@ -23,39 +23,59 @@ #include +GAFLIGHT_EXTERN GAFlightCriteria * gaflight_criteria_new_raw(const arrow::flight::Criteria *flight_criteria); + +GAFLIGHT_EXTERN arrow::flight::Criteria * gaflight_criteria_get_raw(GAFlightCriteria *criteria); +GAFLIGHT_EXTERN arrow::flight::Location * gaflight_location_get_raw(GAFlightLocation *location); +GAFLIGHT_EXTERN GAFlightDescriptor * gaflight_descriptor_new_raw(const arrow::flight::FlightDescriptor *flight_descriptor); + +GAFLIGHT_EXTERN arrow::flight::FlightDescriptor * gaflight_descriptor_get_raw(GAFlightDescriptor *descriptor); +GAFLIGHT_EXTERN GAFlightTicket * gaflight_ticket_new_raw(const arrow::flight::Ticket *flight_ticket); + +GAFLIGHT_EXTERN arrow::flight::Ticket * gaflight_ticket_get_raw(GAFlightTicket *ticket); +GAFLIGHT_EXTERN GAFlightEndpoint * gaflight_endpoint_new_raw(const arrow::flight::FlightEndpoint *flight_endpoint, GAFlightTicket *ticket); + +GAFLIGHT_EXTERN arrow::flight::FlightEndpoint * gaflight_endpoint_get_raw(GAFlightEndpoint *endpoint); +GAFLIGHT_EXTERN GAFlightInfo * gaflight_info_new_raw(arrow::flight::FlightInfo *flight_info); + +GAFLIGHT_EXTERN arrow::flight::FlightInfo * gaflight_info_get_raw(GAFlightInfo *info); +GAFLIGHT_EXTERN GAFlightStreamChunk * gaflight_stream_chunk_new_raw(arrow::flight::FlightStreamChunk *flight_chunk); + +GAFLIGHT_EXTERN arrow::flight::FlightStreamChunk * gaflight_stream_chunk_get_raw(GAFlightStreamChunk *chunk); +GAFLIGHT_EXTERN arrow::flight::MetadataRecordBatchReader * gaflight_record_batch_reader_get_raw(GAFlightRecordBatchReader *reader); diff --git a/c_glib/arrow-flight-glib/meson.build b/c_glib/arrow-flight-glib/meson.build index 2d684a4ee361e..c1422e0d10a7d 100644 --- a/c_glib/arrow-flight-glib/meson.build +++ b/c_glib/arrow-flight-glib/meson.build @@ -58,6 +58,7 @@ libarrow_flight_glib = library('arrow-flight-glib', dependencies: dependencies, implicit_include_directories: false, include_directories: base_include_directories, + cpp_args: ['-DGAFLIGHT_COMPILATION'], soversion: so_version, version: library_version) arrow_flight_glib = declare_dependency(link_with: libarrow_flight_glib, diff --git a/c_glib/arrow-flight-glib/server.hpp b/c_glib/arrow-flight-glib/server.hpp index 70da867d5b0e9..ec4815751c8d8 100644 --- a/c_glib/arrow-flight-glib/server.hpp +++ b/c_glib/arrow-flight-glib/server.hpp @@ -23,34 +23,49 @@ #include +GAFLIGHT_EXTERN arrow::flight::FlightDataStream * gaflight_data_stream_get_raw(GAFlightDataStream *stream); +GAFLIGHT_EXTERN GAFlightMessageReader * gaflight_message_reader_new_raw(arrow::flight::FlightMessageReader *flight_reader, gboolean is_owner); + +GAFLIGHT_EXTERN arrow::flight::FlightMessageReader * gaflight_message_reader_get_raw(GAFlightMessageReader *reader); +GAFLIGHT_EXTERN GAFlightServerCallContext * gaflight_server_call_context_new_raw( const arrow::flight::ServerCallContext *flight_call_context); + +GAFLIGHT_EXTERN const arrow::flight::ServerCallContext * gaflight_server_call_context_get_raw(GAFlightServerCallContext *call_context); +GAFLIGHT_EXTERN GAFlightServerAuthSender * gaflight_server_auth_sender_new_raw(arrow::flight::ServerAuthSender *flight_sender); + +GAFLIGHT_EXTERN arrow::flight::ServerAuthSender * gaflight_server_auth_sender_get_raw(GAFlightServerAuthSender *sender); +GAFLIGHT_EXTERN GAFlightServerAuthReader * gaflight_server_auth_reader_new_raw(arrow::flight::ServerAuthReader *flight_reader); + +GAFLIGHT_EXTERN arrow::flight::ServerAuthReader * gaflight_server_auth_reader_get_raw(GAFlightServerAuthReader *reader); +GAFLIGHT_EXTERN std::shared_ptr gaflight_server_auth_handler_get_raw(GAFlightServerAuthHandler *handler); +GAFLIGHT_EXTERN arrow::flight::FlightServerOptions * gaflight_server_options_get_raw(GAFlightServerOptions *options); @@ -61,6 +76,7 @@ struct _GAFlightServableInterface arrow::flight::FlightServerBase *(*get_raw)(GAFlightServable *servable); }; +GAFLIGHT_EXTERN arrow::flight::FlightServerBase * gaflight_servable_get_raw(GAFlightServable *servable); diff --git a/c_glib/arrow-flight-glib/version.h.in b/c_glib/arrow-flight-glib/version.h.in index 45e0437ab1e71..4a42c7f5aa91e 100644 --- a/c_glib/arrow-flight-glib/version.h.in +++ b/c_glib/arrow-flight-glib/version.h.in @@ -154,4 +154,6 @@ # define GAFLIGHT_VERSION_MAX_ALLOWED GARROW_VERSION_MAX_ALLOWED #endif +@VISIBILITY_MACROS@ + @AVAILABILITY_MACROS@ diff --git a/c_glib/arrow-flight-sql-glib/meson.build b/c_glib/arrow-flight-sql-glib/meson.build index 6a7c89224b303..d588ba4917c76 100644 --- a/c_glib/arrow-flight-sql-glib/meson.build +++ b/c_glib/arrow-flight-sql-glib/meson.build @@ -55,6 +55,7 @@ libarrow_flight_sql_glib = library('arrow-flight-sql-glib', dependencies: dependencies, implicit_include_directories: false, include_directories: base_include_directories, + cpp_args: ['-DGAFLIGHTSQL_COMPILATION'], soversion: so_version, version: library_version) arrow_flight_sql_glib = \ diff --git a/c_glib/arrow-flight-sql-glib/version.h.in b/c_glib/arrow-flight-sql-glib/version.h.in index 3ff707983b307..e4373109b9008 100644 --- a/c_glib/arrow-flight-sql-glib/version.h.in +++ b/c_glib/arrow-flight-sql-glib/version.h.in @@ -154,4 +154,6 @@ # define GAFLIGHTSQL_VERSION_MAX_ALLOWED GARROW_VERSION_MAX_ALLOWED #endif +@VISIBILITY_MACROS@ + @AVAILABILITY_MACROS@ diff --git a/c_glib/arrow-glib/array-builder.cpp b/c_glib/arrow-glib/array-builder.cpp index b498ecb51cedb..9b7c608ca8a5b 100644 --- a/c_glib/arrow-glib/array-builder.cpp +++ b/c_glib/arrow-glib/array-builder.cpp @@ -231,8 +231,8 @@ garrow_array_builder_append_values(GArrowArrayBuilder *builder, if (n_remains > 0) { ++n_loops; } + std::vector data(value_size * chunk_size); for (gint64 i = 0; i < n_loops; ++i) { - uint8_t data[value_size * chunk_size]; uint8_t *valid_bytes = nullptr; uint8_t valid_bytes_buffer[chunk_size]; if (is_valids_length > 0) { @@ -255,7 +255,7 @@ garrow_array_builder_append_values(GArrowArrayBuilder *builder, value = values[offset + j]; } if (value) { - get_value_function(data + (value_size * j), value, value_size); + get_value_function(data.data() + (value_size * j), value, value_size); } else { is_valid = false; if (!valid_bytes) { @@ -267,7 +267,7 @@ garrow_array_builder_append_values(GArrowArrayBuilder *builder, valid_bytes_buffer[j] = is_valid; } } - auto status = arrow_builder->AppendValues(data, n_values, valid_bytes); + auto status = arrow_builder->AppendValues(data.data(), n_values, valid_bytes); if (!garrow_error_check(error, status, context)) { return FALSE; } @@ -1035,13 +1035,13 @@ garrow_boolean_array_builder_append_values(GArrowBooleanArrayBuilder *builder, gint64 is_valids_length, GError **error) { - guint8 arrow_values[values_length]; + std::vector arrow_values(values_length); for (gint64 i = 0; i < values_length; ++i) { arrow_values[i] = values[i]; } return garrow_array_builder_append_values( GARROW_ARRAY_BUILDER(builder), - arrow_values, + arrow_values.data(), values_length, is_valids, is_valids_length, diff --git a/c_glib/arrow-glib/array-builder.h b/c_glib/arrow-glib/array-builder.h index 8a1385b9b8c1b..6a0d0154833a7 100644 --- a/c_glib/arrow-glib/array-builder.h +++ b/c_glib/arrow-glib/array-builder.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_ARRAY_BUILDER (garrow_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowArrayBuilder, garrow_array_builder, GARROW, ARRAY_BUILDER, GObject) struct _GArrowArrayBuilderClass @@ -33,11 +34,15 @@ struct _GArrowArrayBuilderClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDataType * garrow_array_builder_get_value_data_type(GArrowArrayBuilder *builder); + +GARROW_AVAILABLE_IN_ALL GArrowType garrow_array_builder_get_value_type(GArrowArrayBuilder *builder); +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_array_builder_finish(GArrowArrayBuilder *builder, GError **error); @@ -86,6 +91,7 @@ garrow_array_builder_append_empty_values(GArrowArrayBuilder *builder, GError **error); #define GARROW_TYPE_NULL_ARRAY_BUILDER (garrow_null_array_builder_get_type()) +GARROW_AVAILABLE_IN_0_13 G_DECLARE_DERIVABLE_TYPE(GArrowNullArrayBuilder, garrow_null_array_builder, GARROW, @@ -114,6 +120,7 @@ garrow_null_array_builder_append_nulls(GArrowNullArrayBuilder *builder, #endif #define GARROW_TYPE_BOOLEAN_ARRAY_BUILDER (garrow_boolean_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowBooleanArrayBuilder, garrow_boolean_array_builder, GARROW, @@ -124,10 +131,12 @@ struct _GArrowBooleanArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBooleanArrayBuilder * garrow_boolean_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_boolean_array_builder_append_value) gboolean garrow_boolean_array_builder_append(GArrowBooleanArrayBuilder *builder, @@ -139,6 +148,8 @@ gboolean garrow_boolean_array_builder_append_value(GArrowBooleanArrayBuilder *builder, gboolean value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_boolean_array_builder_append_values(GArrowBooleanArrayBuilder *builder, const gboolean *values, @@ -159,6 +170,7 @@ garrow_boolean_array_builder_append_nulls(GArrowBooleanArrayBuilder *builder, #endif #define GARROW_TYPE_INT_ARRAY_BUILDER (garrow_int_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowIntArrayBuilder, garrow_int_array_builder, GARROW, @@ -169,10 +181,12 @@ struct _GArrowIntArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowIntArrayBuilder * garrow_int_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_int_array_builder_append_value) gboolean garrow_int_array_builder_append(GArrowIntArrayBuilder *builder, @@ -184,6 +198,7 @@ gboolean garrow_int_array_builder_append_value(GArrowIntArrayBuilder *builder, gint64 value, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_int_array_builder_append_values(GArrowIntArrayBuilder *builder, const gint64 *values, @@ -192,9 +207,12 @@ garrow_int_array_builder_append_values(GArrowIntArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_int_array_builder_append_null(GArrowIntArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_int_array_builder_append_nulls(GArrowIntArrayBuilder *builder, @@ -203,6 +221,7 @@ garrow_int_array_builder_append_nulls(GArrowIntArrayBuilder *builder, #endif #define GARROW_TYPE_UINT_ARRAY_BUILDER (garrow_uint_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUIntArrayBuilder, garrow_uint_array_builder, GARROW, @@ -213,10 +232,12 @@ struct _GArrowUIntArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUIntArrayBuilder * garrow_uint_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint_array_builder_append_value) gboolean garrow_uint_array_builder_append(GArrowUIntArrayBuilder *builder, @@ -228,6 +249,8 @@ gboolean garrow_uint_array_builder_append_value(GArrowUIntArrayBuilder *builder, guint64 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_uint_array_builder_append_values(GArrowUIntArrayBuilder *builder, const guint64 *values, @@ -236,9 +259,12 @@ garrow_uint_array_builder_append_values(GArrowUIntArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_uint_array_builder_append_null(GArrowUIntArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_uint_array_builder_append_nulls(GArrowUIntArrayBuilder *builder, @@ -247,6 +273,7 @@ garrow_uint_array_builder_append_nulls(GArrowUIntArrayBuilder *builder, #endif #define GARROW_TYPE_INT8_ARRAY_BUILDER (garrow_int8_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowInt8ArrayBuilder, garrow_int8_array_builder, GARROW, @@ -257,10 +284,12 @@ struct _GArrowInt8ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt8ArrayBuilder * garrow_int8_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_int8_array_builder_append_value) gboolean garrow_int8_array_builder_append(GArrowInt8ArrayBuilder *builder, @@ -272,6 +301,8 @@ gboolean garrow_int8_array_builder_append_value(GArrowInt8ArrayBuilder *builder, gint8 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_int8_array_builder_append_values(GArrowInt8ArrayBuilder *builder, const gint8 *values, @@ -280,9 +311,12 @@ garrow_int8_array_builder_append_values(GArrowInt8ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_int8_array_builder_append_null(GArrowInt8ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_int8_array_builder_append_nulls(GArrowInt8ArrayBuilder *builder, @@ -291,6 +325,7 @@ garrow_int8_array_builder_append_nulls(GArrowInt8ArrayBuilder *builder, #endif #define GARROW_TYPE_UINT8_ARRAY_BUILDER (garrow_uint8_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUInt8ArrayBuilder, garrow_uint8_array_builder, GARROW, @@ -301,10 +336,12 @@ struct _GArrowUInt8ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt8ArrayBuilder * garrow_uint8_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint8_array_builder_append_value) gboolean garrow_uint8_array_builder_append(GArrowUInt8ArrayBuilder *builder, @@ -316,6 +353,8 @@ gboolean garrow_uint8_array_builder_append_value(GArrowUInt8ArrayBuilder *builder, guint8 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_uint8_array_builder_append_values(GArrowUInt8ArrayBuilder *builder, const guint8 *values, @@ -324,9 +363,12 @@ garrow_uint8_array_builder_append_values(GArrowUInt8ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_uint8_array_builder_append_null(GArrowUInt8ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_uint8_array_builder_append_nulls(GArrowUInt8ArrayBuilder *builder, @@ -335,6 +377,7 @@ garrow_uint8_array_builder_append_nulls(GArrowUInt8ArrayBuilder *builder, #endif #define GARROW_TYPE_INT16_ARRAY_BUILDER (garrow_int16_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowInt16ArrayBuilder, garrow_int16_array_builder, GARROW, @@ -345,6 +388,7 @@ struct _GArrowInt16ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt16ArrayBuilder * garrow_int16_array_builder_new(void); @@ -360,6 +404,7 @@ gboolean garrow_int16_array_builder_append_value(GArrowInt16ArrayBuilder *builder, gint16 value, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_int16_array_builder_append_values(GArrowInt16ArrayBuilder *builder, const gint16 *values, @@ -368,9 +413,12 @@ garrow_int16_array_builder_append_values(GArrowInt16ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_int16_array_builder_append_null(GArrowInt16ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_int16_array_builder_append_nulls(GArrowInt16ArrayBuilder *builder, @@ -379,6 +427,7 @@ garrow_int16_array_builder_append_nulls(GArrowInt16ArrayBuilder *builder, #endif #define GARROW_TYPE_UINT16_ARRAY_BUILDER (garrow_uint16_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUInt16ArrayBuilder, garrow_uint16_array_builder, GARROW, @@ -389,10 +438,12 @@ struct _GArrowUInt16ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt16ArrayBuilder * garrow_uint16_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint16_array_builder_append_value) gboolean garrow_uint16_array_builder_append(GArrowUInt16ArrayBuilder *builder, @@ -404,6 +455,8 @@ gboolean garrow_uint16_array_builder_append_value(GArrowUInt16ArrayBuilder *builder, guint16 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_uint16_array_builder_append_values(GArrowUInt16ArrayBuilder *builder, const guint16 *values, @@ -412,10 +465,13 @@ garrow_uint16_array_builder_append_values(GArrowUInt16ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_uint16_array_builder_append_null(GArrowUInt16ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_uint16_array_builder_append_nulls(GArrowUInt16ArrayBuilder *builder, @@ -424,6 +480,7 @@ garrow_uint16_array_builder_append_nulls(GArrowUInt16ArrayBuilder *builder, #endif #define GARROW_TYPE_INT32_ARRAY_BUILDER (garrow_int32_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowInt32ArrayBuilder, garrow_int32_array_builder, GARROW, @@ -434,10 +491,12 @@ struct _GArrowInt32ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt32ArrayBuilder * garrow_int32_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_int32_array_builder_append_value) gboolean garrow_int32_array_builder_append(GArrowInt32ArrayBuilder *builder, @@ -449,6 +508,8 @@ gboolean garrow_int32_array_builder_append_value(GArrowInt32ArrayBuilder *builder, gint32 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_int32_array_builder_append_values(GArrowInt32ArrayBuilder *builder, const gint32 *values, @@ -457,9 +518,12 @@ garrow_int32_array_builder_append_values(GArrowInt32ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_int32_array_builder_append_null(GArrowInt32ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_int32_array_builder_append_nulls(GArrowInt32ArrayBuilder *builder, @@ -468,6 +532,7 @@ garrow_int32_array_builder_append_nulls(GArrowInt32ArrayBuilder *builder, #endif #define GARROW_TYPE_UINT32_ARRAY_BUILDER (garrow_uint32_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUInt32ArrayBuilder, garrow_uint32_array_builder, GARROW, @@ -478,10 +543,12 @@ struct _GArrowUInt32ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt32ArrayBuilder * garrow_uint32_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint32_array_builder_append_value) gboolean garrow_uint32_array_builder_append(GArrowUInt32ArrayBuilder *builder, @@ -493,6 +560,8 @@ gboolean garrow_uint32_array_builder_append_value(GArrowUInt32ArrayBuilder *builder, guint32 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_uint32_array_builder_append_values(GArrowUInt32ArrayBuilder *builder, const guint32 *values, @@ -501,10 +570,13 @@ garrow_uint32_array_builder_append_values(GArrowUInt32ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_uint32_array_builder_append_null(GArrowUInt32ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_uint32_array_builder_append_nulls(GArrowUInt32ArrayBuilder *builder, @@ -513,6 +585,7 @@ garrow_uint32_array_builder_append_nulls(GArrowUInt32ArrayBuilder *builder, #endif #define GARROW_TYPE_INT64_ARRAY_BUILDER (garrow_int64_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowInt64ArrayBuilder, garrow_int64_array_builder, GARROW, @@ -523,10 +596,12 @@ struct _GArrowInt64ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt64ArrayBuilder * garrow_int64_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_int64_array_builder_append_value) gboolean garrow_int64_array_builder_append(GArrowInt64ArrayBuilder *builder, @@ -538,6 +613,8 @@ gboolean garrow_int64_array_builder_append_value(GArrowInt64ArrayBuilder *builder, gint64 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_int64_array_builder_append_values(GArrowInt64ArrayBuilder *builder, const gint64 *values, @@ -546,9 +623,12 @@ garrow_int64_array_builder_append_values(GArrowInt64ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_int64_array_builder_append_null(GArrowInt64ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_int64_array_builder_append_nulls(GArrowInt64ArrayBuilder *builder, @@ -557,6 +637,7 @@ garrow_int64_array_builder_append_nulls(GArrowInt64ArrayBuilder *builder, #endif #define GARROW_TYPE_UINT64_ARRAY_BUILDER (garrow_uint64_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUInt64ArrayBuilder, garrow_uint64_array_builder, GARROW, @@ -567,10 +648,12 @@ struct _GArrowUInt64ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt64ArrayBuilder * garrow_uint64_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint64_array_builder_append_value) gboolean garrow_uint64_array_builder_append(GArrowUInt64ArrayBuilder *builder, @@ -582,6 +665,8 @@ gboolean garrow_uint64_array_builder_append_value(GArrowUInt64ArrayBuilder *builder, guint64 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_uint64_array_builder_append_values(GArrowUInt64ArrayBuilder *builder, const guint64 *values, @@ -590,10 +675,13 @@ garrow_uint64_array_builder_append_values(GArrowUInt64ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_uint64_array_builder_append_null(GArrowUInt64ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_uint64_array_builder_append_nulls(GArrowUInt64ArrayBuilder *builder, @@ -602,6 +690,7 @@ garrow_uint64_array_builder_append_nulls(GArrowUInt64ArrayBuilder *builder, #endif #define GARROW_TYPE_HALF_FLOAT_ARRAY_BUILDER (garrow_half_float_array_builder_get_type()) +GARROW_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GArrowHalfFloatArrayBuilder, garrow_half_float_array_builder, GARROW, @@ -631,6 +720,7 @@ garrow_half_float_array_builder_append_values(GArrowHalfFloatArrayBuilder *build GError **error); #define GARROW_TYPE_FLOAT_ARRAY_BUILDER (garrow_float_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowFloatArrayBuilder, garrow_float_array_builder, GARROW, @@ -641,10 +731,12 @@ struct _GArrowFloatArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowFloatArrayBuilder * garrow_float_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_float_array_builder_append_value) gboolean garrow_float_array_builder_append(GArrowFloatArrayBuilder *builder, @@ -656,6 +748,8 @@ gboolean garrow_float_array_builder_append_value(GArrowFloatArrayBuilder *builder, gfloat value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_float_array_builder_append_values(GArrowFloatArrayBuilder *builder, const gfloat *values, @@ -664,9 +758,12 @@ garrow_float_array_builder_append_values(GArrowFloatArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_float_array_builder_append_null(GArrowFloatArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_float_array_builder_append_nulls(GArrowFloatArrayBuilder *builder, @@ -675,6 +772,7 @@ garrow_float_array_builder_append_nulls(GArrowFloatArrayBuilder *builder, #endif #define GARROW_TYPE_DOUBLE_ARRAY_BUILDER (garrow_double_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDoubleArrayBuilder, garrow_double_array_builder, GARROW, @@ -685,10 +783,12 @@ struct _GArrowDoubleArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDoubleArrayBuilder * garrow_double_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_double_array_builder_append_value) gboolean garrow_double_array_builder_append(GArrowDoubleArrayBuilder *builder, @@ -700,6 +800,8 @@ gboolean garrow_double_array_builder_append_value(GArrowDoubleArrayBuilder *builder, gdouble value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_double_array_builder_append_values(GArrowDoubleArrayBuilder *builder, const gdouble *values, @@ -708,10 +810,13 @@ garrow_double_array_builder_append_values(GArrowDoubleArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_double_array_builder_append_null(GArrowDoubleArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_double_array_builder_append_nulls(GArrowDoubleArrayBuilder *builder, @@ -720,6 +825,7 @@ garrow_double_array_builder_append_nulls(GArrowDoubleArrayBuilder *builder, #endif #define GARROW_TYPE_BINARY_ARRAY_BUILDER (garrow_binary_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowBinaryArrayBuilder, garrow_binary_array_builder, GARROW, @@ -730,10 +836,12 @@ struct _GArrowBinaryArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBinaryArrayBuilder * garrow_binary_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_binary_array_builder_append_value) gboolean garrow_binary_array_builder_append(GArrowBinaryArrayBuilder *builder, @@ -761,10 +869,12 @@ garrow_binary_array_builder_append_values(GArrowBinaryArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_binary_array_builder_append_null(GArrowBinaryArrayBuilder *builder, GError **error); + GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) GARROW_AVAILABLE_IN_0_16 gboolean @@ -775,6 +885,7 @@ garrow_binary_array_builder_append_nulls(GArrowBinaryArrayBuilder *builder, #define GARROW_TYPE_LARGE_BINARY_ARRAY_BUILDER \ (garrow_large_binary_array_builder_get_type()) +GARROW_AVAILABLE_IN_0_16 G_DECLARE_DERIVABLE_TYPE(GArrowLargeBinaryArrayBuilder, garrow_large_binary_array_builder, GARROW, @@ -821,6 +932,7 @@ garrow_large_binary_array_builder_append_nulls(GArrowLargeBinaryArrayBuilder *bu #endif #define GARROW_TYPE_STRING_ARRAY_BUILDER (garrow_string_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowStringArrayBuilder, garrow_string_array_builder, GARROW, @@ -831,10 +943,12 @@ struct _GArrowStringArrayBuilderClass GArrowBinaryArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowStringArrayBuilder * garrow_string_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_string_array_builder_append_value) gboolean garrow_string_array_builder_append(GArrowStringArrayBuilder *builder, @@ -863,6 +977,7 @@ garrow_string_array_builder_append_string_len(GArrowStringArrayBuilder *builder, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_16_FOR(garrow_string_array_builder_append_strings) gboolean garrow_string_array_builder_append_values(GArrowStringArrayBuilder *builder, @@ -883,6 +998,7 @@ garrow_string_array_builder_append_strings(GArrowStringArrayBuilder *builder, #define GARROW_TYPE_LARGE_STRING_ARRAY_BUILDER \ (garrow_large_string_array_builder_get_type()) +GARROW_AVAILABLE_IN_0_16 G_DECLARE_DERIVABLE_TYPE(GArrowLargeStringArrayBuilder, garrow_large_string_array_builder, GARROW, @@ -919,6 +1035,7 @@ garrow_large_string_array_builder_append_strings(GArrowLargeStringArrayBuilder * #define GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY_BUILDER \ (garrow_fixed_size_binary_array_builder_get_type()) +GARROW_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeBinaryArrayBuilder, garrow_fixed_size_binary_array_builder, GARROW, @@ -963,6 +1080,7 @@ garrow_fixed_size_binary_array_builder_append_values_packed( GError **error); #define GARROW_TYPE_DATE32_ARRAY_BUILDER (garrow_date32_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDate32ArrayBuilder, garrow_date32_array_builder, GARROW, @@ -973,10 +1091,12 @@ struct _GArrowDate32ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDate32ArrayBuilder * garrow_date32_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_date32_array_builder_append_value) gboolean garrow_date32_array_builder_append(GArrowDate32ArrayBuilder *builder, @@ -988,6 +1108,8 @@ gboolean garrow_date32_array_builder_append_value(GArrowDate32ArrayBuilder *builder, gint32 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_date32_array_builder_append_values(GArrowDate32ArrayBuilder *builder, const gint32 *values, @@ -996,10 +1118,13 @@ garrow_date32_array_builder_append_values(GArrowDate32ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_date32_array_builder_append_null(GArrowDate32ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_date32_array_builder_append_nulls(GArrowDate32ArrayBuilder *builder, @@ -1008,6 +1133,7 @@ garrow_date32_array_builder_append_nulls(GArrowDate32ArrayBuilder *builder, #endif #define GARROW_TYPE_DATE64_ARRAY_BUILDER (garrow_date64_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDate64ArrayBuilder, garrow_date64_array_builder, GARROW, @@ -1018,10 +1144,12 @@ struct _GArrowDate64ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDate64ArrayBuilder * garrow_date64_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_date64_array_builder_append_value) gboolean garrow_date64_array_builder_append(GArrowDate64ArrayBuilder *builder, @@ -1033,6 +1161,8 @@ gboolean garrow_date64_array_builder_append_value(GArrowDate64ArrayBuilder *builder, gint64 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_date64_array_builder_append_values(GArrowDate64ArrayBuilder *builder, const gint64 *values, @@ -1041,10 +1171,13 @@ garrow_date64_array_builder_append_values(GArrowDate64ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_date64_array_builder_append_null(GArrowDate64ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_date64_array_builder_append_nulls(GArrowDate64ArrayBuilder *builder, @@ -1053,6 +1186,7 @@ garrow_date64_array_builder_append_nulls(GArrowDate64ArrayBuilder *builder, #endif #define GARROW_TYPE_TIMESTAMP_ARRAY_BUILDER (garrow_timestamp_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTimestampArrayBuilder, garrow_timestamp_array_builder, GARROW, @@ -1063,10 +1197,12 @@ struct _GArrowTimestampArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTimestampArrayBuilder * garrow_timestamp_array_builder_new(GArrowTimestampDataType *data_type); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_timestamp_array_builder_append_value) gboolean garrow_timestamp_array_builder_append(GArrowTimestampArrayBuilder *builder, @@ -1078,6 +1214,8 @@ gboolean garrow_timestamp_array_builder_append_value(GArrowTimestampArrayBuilder *builder, gint64 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_timestamp_array_builder_append_values(GArrowTimestampArrayBuilder *builder, const gint64 *values, @@ -1086,10 +1224,13 @@ garrow_timestamp_array_builder_append_values(GArrowTimestampArrayBuilder *builde gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_timestamp_array_builder_append_null(GArrowTimestampArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_timestamp_array_builder_append_nulls(GArrowTimestampArrayBuilder *builder, @@ -1098,6 +1239,7 @@ garrow_timestamp_array_builder_append_nulls(GArrowTimestampArrayBuilder *builder #endif #define GARROW_TYPE_TIME32_ARRAY_BUILDER (garrow_time32_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTime32ArrayBuilder, garrow_time32_array_builder, GARROW, @@ -1108,10 +1250,12 @@ struct _GArrowTime32ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTime32ArrayBuilder * garrow_time32_array_builder_new(GArrowTime32DataType *data_type); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_time32_array_builder_append_value) gboolean garrow_time32_array_builder_append(GArrowTime32ArrayBuilder *builder, @@ -1123,6 +1267,8 @@ gboolean garrow_time32_array_builder_append_value(GArrowTime32ArrayBuilder *builder, gint32 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_time32_array_builder_append_values(GArrowTime32ArrayBuilder *builder, const gint32 *values, @@ -1131,10 +1277,13 @@ garrow_time32_array_builder_append_values(GArrowTime32ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_time32_array_builder_append_null(GArrowTime32ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_time32_array_builder_append_nulls(GArrowTime32ArrayBuilder *builder, @@ -1143,6 +1292,7 @@ garrow_time32_array_builder_append_nulls(GArrowTime32ArrayBuilder *builder, #endif #define GARROW_TYPE_TIME64_ARRAY_BUILDER (garrow_time64_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTime64ArrayBuilder, garrow_time64_array_builder, GARROW, @@ -1153,10 +1303,12 @@ struct _GArrowTime64ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTime64ArrayBuilder * garrow_time64_array_builder_new(GArrowTime64DataType *data_type); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_time64_array_builder_append_value) gboolean garrow_time64_array_builder_append(GArrowTime64ArrayBuilder *builder, @@ -1168,6 +1320,8 @@ gboolean garrow_time64_array_builder_append_value(GArrowTime64ArrayBuilder *builder, gint64 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_time64_array_builder_append_values(GArrowTime64ArrayBuilder *builder, const gint64 *values, @@ -1176,10 +1330,13 @@ garrow_time64_array_builder_append_values(GArrowTime64ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_time64_array_builder_append_null(GArrowTime64ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_time64_array_builder_append_nulls(GArrowTime64ArrayBuilder *builder, @@ -1189,6 +1346,7 @@ garrow_time64_array_builder_append_nulls(GArrowTime64ArrayBuilder *builder, #define GARROW_TYPE_MONTH_INTERVAL_ARRAY_BUILDER \ (garrow_month_interval_array_builder_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowMonthIntervalArrayBuilder, garrow_month_interval_array_builder, GARROW, @@ -1220,6 +1378,7 @@ garrow_month_interval_array_builder_append_values( #define GARROW_TYPE_DAY_TIME_INTERVAL_ARRAY_BUILDER \ (garrow_day_time_interval_array_builder_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowDayTimeIntervalArrayBuilder, garrow_day_time_interval_array_builder, GARROW, @@ -1252,6 +1411,7 @@ garrow_day_time_interval_array_builder_append_values( #define GARROW_TYPE_MONTH_DAY_NANO_INTERVAL_ARRAY_BUILDER \ (garrow_month_day_nano_interval_array_builder_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowMonthDayNanoIntervalArrayBuilder, garrow_month_day_nano_interval_array_builder, GARROW, @@ -1284,6 +1444,7 @@ garrow_month_day_nano_interval_array_builder_append_values( #define GARROW_TYPE_BINARY_DICTIONARY_ARRAY_BUILDER \ (garrow_binary_dictionary_array_builder_get_type()) +GARROW_AVAILABLE_IN_2_0 G_DECLARE_DERIVABLE_TYPE(GArrowBinaryDictionaryArrayBuilder, garrow_binary_dictionary_array_builder, GARROW, @@ -1350,6 +1511,7 @@ garrow_binary_dictionary_array_builder_reset_full( #define GARROW_TYPE_STRING_DICTIONARY_ARRAY_BUILDER \ (garrow_string_dictionary_array_builder_get_type()) +GARROW_AVAILABLE_IN_2_0 G_DECLARE_DERIVABLE_TYPE(GArrowStringDictionaryArrayBuilder, garrow_string_dictionary_array_builder, GARROW, @@ -1408,6 +1570,7 @@ garrow_string_dictionary_array_builder_reset_full( GArrowStringDictionaryArrayBuilder *builder); #define GARROW_TYPE_LIST_ARRAY_BUILDER (garrow_list_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowListArrayBuilder, garrow_list_array_builder, GARROW, @@ -1418,10 +1581,12 @@ struct _GArrowListArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowListArrayBuilder * garrow_list_array_builder_new(GArrowListDataType *data_type, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_list_array_builder_append_value) gboolean garrow_list_array_builder_append(GArrowListArrayBuilder *builder, GError **error); @@ -1430,15 +1595,18 @@ GARROW_AVAILABLE_IN_0_12 gboolean garrow_list_array_builder_append_value(GArrowListArrayBuilder *builder, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_list_array_builder_append_null(GArrowListArrayBuilder *builder, GError **error); #endif +GARROW_AVAILABLE_IN_ALL GArrowArrayBuilder * garrow_list_array_builder_get_value_builder(GArrowListArrayBuilder *builder); #define GARROW_TYPE_LARGE_LIST_ARRAY_BUILDER (garrow_large_list_array_builder_get_type()) +GARROW_AVAILABLE_IN_0_16 G_DECLARE_DERIVABLE_TYPE(GArrowLargeListArrayBuilder, garrow_large_list_array_builder, GARROW, @@ -1468,6 +1636,7 @@ GArrowArrayBuilder * garrow_large_list_array_builder_get_value_builder(GArrowLargeListArrayBuilder *builder); #define GARROW_TYPE_STRUCT_ARRAY_BUILDER (garrow_struct_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowStructArrayBuilder, garrow_struct_array_builder, GARROW, @@ -1478,10 +1647,12 @@ struct _GArrowStructArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowStructArrayBuilder * garrow_struct_array_builder_new(GArrowStructDataType *data_type, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_struct_array_builder_append_value) gboolean garrow_struct_array_builder_append(GArrowStructArrayBuilder *builder, GError **error); @@ -1491,6 +1662,7 @@ gboolean garrow_struct_array_builder_append_value(GArrowStructArrayBuilder *builder, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_struct_array_builder_append_null(GArrowStructArrayBuilder *builder, @@ -1498,15 +1670,19 @@ garrow_struct_array_builder_append_null(GArrowStructArrayBuilder *builder, #endif #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_12_0_FOR(garrow_array_builder_get_child) GArrowArrayBuilder * garrow_struct_array_builder_get_field_builder(GArrowStructArrayBuilder *builder, gint i); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_12_0_FOR(garrow_array_builder_get_children) GList * garrow_struct_array_builder_get_field_builders(GArrowStructArrayBuilder *builder); #endif #define GARROW_TYPE_MAP_ARRAY_BUILDER (garrow_map_array_builder_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowMapArrayBuilder, garrow_map_array_builder, GARROW, @@ -1554,6 +1730,7 @@ GArrowArrayBuilder * garrow_map_array_builder_get_value_builder(GArrowMapArrayBuilder *builder); #define GARROW_TYPE_DECIMAL128_ARRAY_BUILDER (garrow_decimal128_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128ArrayBuilder, garrow_decimal128_array_builder, GARROW, @@ -1564,10 +1741,12 @@ struct _GArrowDecimal128ArrayBuilderClass GArrowFixedSizeBinaryArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDecimal128ArrayBuilder * garrow_decimal128_array_builder_new(GArrowDecimal128DataType *data_type); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_decimal128_array_builder_append_value) gboolean garrow_decimal128_array_builder_append(GArrowDecimal128ArrayBuilder *builder, @@ -1596,6 +1775,7 @@ garrow_decimal128_array_builder_append_null(GArrowDecimal128ArrayBuilder *builde #endif #define GARROW_TYPE_DECIMAL256_ARRAY_BUILDER (garrow_decimal256_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256ArrayBuilder, garrow_decimal256_array_builder, GARROW, @@ -1606,6 +1786,7 @@ struct _GArrowDecimal256ArrayBuilderClass GArrowFixedSizeBinaryArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDecimal256ArrayBuilder * garrow_decimal256_array_builder_new(GArrowDecimal256DataType *data_type); @@ -1624,6 +1805,7 @@ garrow_decimal256_array_builder_append_values(GArrowDecimal256ArrayBuilder *buil GError **error); #define GARROW_TYPE_UNION_ARRAY_BUILDER (garrow_union_array_builder_get_type()) +GARROW_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GArrowUnionArrayBuilder, garrow_union_array_builder, GARROW, @@ -1648,6 +1830,7 @@ garrow_union_array_builder_append_value(GArrowUnionArrayBuilder *builder, #define GARROW_TYPE_DENSE_UNION_ARRAY_BUILDER \ (garrow_dense_union_array_builder_get_type()) +GARROW_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionArrayBuilder, garrow_dense_union_array_builder, GARROW, @@ -1664,6 +1847,7 @@ garrow_dense_union_array_builder_new(GArrowDenseUnionDataType *data_type, GError #define GARROW_TYPE_SPARSE_UNION_ARRAY_BUILDER \ (garrow_sparse_union_array_builder_get_type()) +GARROW_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionArrayBuilder, garrow_sparse_union_array_builder, GARROW, diff --git a/c_glib/arrow-glib/basic-array-definition.h b/c_glib/arrow-glib/basic-array-definition.h index 54642dae018ec..2fa67c09c1cc4 100644 --- a/c_glib/arrow-glib/basic-array-definition.h +++ b/c_glib/arrow-glib/basic-array-definition.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GARROW_TYPE_ARRAY (garrow_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowArray, garrow_array, GARROW, ARRAY, GObject) struct _GArrowArrayClass { @@ -31,6 +34,7 @@ struct _GArrowArrayClass }; #define GARROW_TYPE_EXTENSION_ARRAY (garrow_extension_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowExtensionArray, garrow_extension_array, GARROW, EXTENSION_ARRAY, GArrowArray) struct _GArrowExtensionArrayClass diff --git a/c_glib/arrow-glib/basic-array.h b/c_glib/arrow-glib/basic-array.h index ee6f40b1ddc24..95679aa37c57a 100644 --- a/c_glib/arrow-glib/basic-array.h +++ b/c_glib/arrow-glib/basic-array.h @@ -27,6 +27,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_EQUAL_OPTIONS (garrow_equal_options_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowEqualOptions, garrow_equal_options, GARROW, EQUAL_OPTIONS, GObject) struct _GArrowEqualOptionsClass @@ -52,6 +53,7 @@ garrow_array_export(GArrowArray *array, gpointer *c_abi_schema, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_array_equal(GArrowArray *array, GArrowArray *other_array); GARROW_AVAILABLE_IN_5_0 @@ -59,8 +61,11 @@ gboolean garrow_array_equal_options(GArrowArray *array, GArrowArray *other_array, GArrowEqualOptions *options); +GARROW_AVAILABLE_IN_ALL gboolean garrow_array_equal_approx(GArrowArray *array, GArrowArray *other_array); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_array_equal_range(GArrowArray *array, gint64 start_index, @@ -69,37 +74,60 @@ garrow_array_equal_range(GArrowArray *array, gint64 end_index, GArrowEqualOptions *options); +GARROW_AVAILABLE_IN_ALL gboolean garrow_array_is_null(GArrowArray *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_array_is_valid(GArrowArray *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_array_get_length(GArrowArray *array); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_array_get_offset(GArrowArray *array); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_array_get_n_nulls(GArrowArray *array); + +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_array_get_null_bitmap(GArrowArray *array); + +GARROW_AVAILABLE_IN_ALL GArrowDataType * garrow_array_get_value_data_type(GArrowArray *array); + +GARROW_AVAILABLE_IN_ALL GArrowType garrow_array_get_value_type(GArrowArray *array); + +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_array_slice(GArrowArray *array, gint64 offset, gint64 length); + +GARROW_AVAILABLE_IN_ALL gchar * garrow_array_to_string(GArrowArray *array, GError **error); + GARROW_AVAILABLE_IN_0_15 GArrowArray * garrow_array_view(GArrowArray *array, GArrowDataType *return_type, GError **error); + GARROW_AVAILABLE_IN_0_15 gchar * garrow_array_diff_unified(GArrowArray *array, GArrowArray *other_array); + GARROW_AVAILABLE_IN_4_0 GArrowArray * garrow_array_concatenate(GArrowArray *array, GList *other_arrays, GError **error); #define GARROW_TYPE_NULL_ARRAY (garrow_null_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowNullArray, garrow_null_array, GARROW, NULL_ARRAY, GArrowArray) struct _GArrowNullArrayClass @@ -107,10 +135,12 @@ struct _GArrowNullArrayClass GArrowArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowNullArray * garrow_null_array_new(gint64 length); #define GARROW_TYPE_PRIMITIVE_ARRAY (garrow_primitive_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowPrimitiveArray, garrow_primitive_array, GARROW, PRIMITIVE_ARRAY, GArrowArray) struct _GArrowPrimitiveArrayClass @@ -119,6 +149,7 @@ struct _GArrowPrimitiveArrayClass }; #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_1_0_FOR(garrow_primitive_array_get_data_buffer) GArrowBuffer * garrow_primitive_array_get_buffer(GArrowPrimitiveArray *array); @@ -128,6 +159,7 @@ GArrowBuffer * garrow_primitive_array_get_data_buffer(GArrowPrimitiveArray *array); #define GARROW_TYPE_BOOLEAN_ARRAY (garrow_boolean_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowBooleanArray, garrow_boolean_array, GARROW, BOOLEAN_ARRAY, GArrowPrimitiveArray) struct _GArrowBooleanArrayClass @@ -135,18 +167,23 @@ struct _GArrowBooleanArrayClass GArrowPrimitiveArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBooleanArray * garrow_boolean_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gboolean garrow_boolean_array_get_value(GArrowBooleanArray *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL gboolean * garrow_boolean_array_get_values(GArrowBooleanArray *array, gint64 *length); #define GARROW_TYPE_NUMERIC_ARRAY (garrow_numeric_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowNumericArray, garrow_numeric_array, GARROW, NUMERIC_ARRAY, GArrowPrimitiveArray) struct _GArrowNumericArrayClass @@ -155,6 +192,7 @@ struct _GArrowNumericArrayClass }; #define GARROW_TYPE_INT8_ARRAY (garrow_int8_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowInt8Array, garrow_int8_array, GARROW, INT8_ARRAY, GArrowNumericArray) struct _GArrowInt8ArrayClass @@ -162,18 +200,23 @@ struct _GArrowInt8ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt8Array * garrow_int8_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint8 garrow_int8_array_get_value(GArrowInt8Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint8 * garrow_int8_array_get_values(GArrowInt8Array *array, gint64 *length); #define GARROW_TYPE_UINT8_ARRAY (garrow_uint8_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowUInt8Array, garrow_uint8_array, GARROW, UINT8_ARRAY, GArrowNumericArray) struct _GArrowUInt8ArrayClass @@ -181,18 +224,23 @@ struct _GArrowUInt8ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt8Array * garrow_uint8_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL guint8 garrow_uint8_array_get_value(GArrowUInt8Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const guint8 * garrow_uint8_array_get_values(GArrowUInt8Array *array, gint64 *length); #define GARROW_TYPE_INT16_ARRAY (garrow_int16_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowInt16Array, garrow_int16_array, GARROW, INT16_ARRAY, GArrowNumericArray) struct _GArrowInt16ArrayClass @@ -200,18 +248,23 @@ struct _GArrowInt16ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt16Array * garrow_int16_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint16 garrow_int16_array_get_value(GArrowInt16Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint16 * garrow_int16_array_get_values(GArrowInt16Array *array, gint64 *length); #define GARROW_TYPE_UINT16_ARRAY (garrow_uint16_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowUInt16Array, garrow_uint16_array, GARROW, UINT16_ARRAY, GArrowNumericArray) struct _GArrowUInt16ArrayClass @@ -219,18 +272,23 @@ struct _GArrowUInt16ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt16Array * garrow_uint16_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL guint16 garrow_uint16_array_get_value(GArrowUInt16Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const guint16 * garrow_uint16_array_get_values(GArrowUInt16Array *array, gint64 *length); #define GARROW_TYPE_INT32_ARRAY (garrow_int32_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowInt32Array, garrow_int32_array, GARROW, INT32_ARRAY, GArrowNumericArray) struct _GArrowInt32ArrayClass @@ -238,18 +296,23 @@ struct _GArrowInt32ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt32Array * garrow_int32_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint32 garrow_int32_array_get_value(GArrowInt32Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint32 * garrow_int32_array_get_values(GArrowInt32Array *array, gint64 *length); #define GARROW_TYPE_UINT32_ARRAY (garrow_uint32_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowUInt32Array, garrow_uint32_array, GARROW, UINT32_ARRAY, GArrowNumericArray) struct _GArrowUInt32ArrayClass @@ -257,18 +320,23 @@ struct _GArrowUInt32ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt32Array * garrow_uint32_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL guint32 garrow_uint32_array_get_value(GArrowUInt32Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const guint32 * garrow_uint32_array_get_values(GArrowUInt32Array *array, gint64 *length); #define GARROW_TYPE_INT64_ARRAY (garrow_int64_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowInt64Array, garrow_int64_array, GARROW, INT64_ARRAY, GArrowNumericArray) struct _GArrowInt64ArrayClass @@ -276,18 +344,23 @@ struct _GArrowInt64ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt64Array * garrow_int64_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint64 garrow_int64_array_get_value(GArrowInt64Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint64 * garrow_int64_array_get_values(GArrowInt64Array *array, gint64 *length); #define GARROW_TYPE_UINT64_ARRAY (garrow_uint64_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowUInt64Array, garrow_uint64_array, GARROW, UINT64_ARRAY, GArrowNumericArray) struct _GArrowUInt64ArrayClass @@ -295,18 +368,23 @@ struct _GArrowUInt64ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt64Array * garrow_uint64_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL guint64 garrow_uint64_array_get_value(GArrowUInt64Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const guint64 * garrow_uint64_array_get_values(GArrowUInt64Array *array, gint64 *length); #define GARROW_TYPE_HALF_FLOAT_ARRAY (garrow_half_float_array_get_type()) +GARROW_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GArrowHalfFloatArray, garrow_half_float_array, GARROW, @@ -332,6 +410,7 @@ const guint16 * garrow_half_float_array_get_values(GArrowHalfFloatArray *array, gint64 *length); #define GARROW_TYPE_FLOAT_ARRAY (garrow_float_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowFloatArray, garrow_float_array, GARROW, FLOAT_ARRAY, GArrowNumericArray) struct _GArrowFloatArrayClass @@ -339,18 +418,23 @@ struct _GArrowFloatArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowFloatArray * garrow_float_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gfloat garrow_float_array_get_value(GArrowFloatArray *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gfloat * garrow_float_array_get_values(GArrowFloatArray *array, gint64 *length); #define GARROW_TYPE_DOUBLE_ARRAY (garrow_double_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowDoubleArray, garrow_double_array, GARROW, DOUBLE_ARRAY, GArrowNumericArray) struct _GArrowDoubleArrayClass @@ -358,18 +442,23 @@ struct _GArrowDoubleArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDoubleArray * garrow_double_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gdouble garrow_double_array_get_value(GArrowDoubleArray *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gdouble * garrow_double_array_get_values(GArrowDoubleArray *array, gint64 *length); #define GARROW_TYPE_BINARY_ARRAY (garrow_binary_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowBinaryArray, garrow_binary_array, GARROW, BINARY_ARRAY, GArrowArray) struct _GArrowBinaryArrayClass @@ -377,6 +466,7 @@ struct _GArrowBinaryArrayClass GArrowArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBinaryArray * garrow_binary_array_new(gint64 length, GArrowBuffer *value_offsets, @@ -384,9 +474,12 @@ garrow_binary_array_new(gint64 length, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL GBytes * garrow_binary_array_get_value(GArrowBinaryArray *array, gint64 i); + #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_1_0_FOR(garrow_binary_array_get_data_buffer) GArrowBuffer * garrow_binary_array_get_buffer(GArrowBinaryArray *array); @@ -394,10 +487,13 @@ garrow_binary_array_get_buffer(GArrowBinaryArray *array); GARROW_AVAILABLE_IN_1_0 GArrowBuffer * garrow_binary_array_get_data_buffer(GArrowBinaryArray *array); + +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_binary_array_get_offsets_buffer(GArrowBinaryArray *array); #define GARROW_TYPE_LARGE_BINARY_ARRAY (garrow_large_binary_array_get_type()) +GARROW_AVAILABLE_IN_0_16 G_DECLARE_DERIVABLE_TYPE(GArrowLargeBinaryArray, garrow_large_binary_array, GARROW, @@ -428,11 +524,13 @@ garrow_large_binary_array_get_buffer(GArrowLargeBinaryArray *array); GARROW_AVAILABLE_IN_1_0 GArrowBuffer * garrow_large_binary_array_get_data_buffer(GArrowLargeBinaryArray *array); + GARROW_AVAILABLE_IN_0_16 GArrowBuffer * garrow_large_binary_array_get_offsets_buffer(GArrowLargeBinaryArray *array); #define GARROW_TYPE_STRING_ARRAY (garrow_string_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowStringArray, garrow_string_array, GARROW, STRING_ARRAY, GArrowBinaryArray) struct _GArrowStringArrayClass @@ -440,6 +538,7 @@ struct _GArrowStringArrayClass GArrowBinaryArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowStringArray * garrow_string_array_new(gint64 length, GArrowBuffer *value_offsets, @@ -447,10 +546,12 @@ garrow_string_array_new(gint64 length, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gchar * garrow_string_array_get_string(GArrowStringArray *array, gint64 i); #define GARROW_TYPE_LARGE_STRING_ARRAY (garrow_large_string_array_get_type()) +GARROW_AVAILABLE_IN_0_16 G_DECLARE_DERIVABLE_TYPE(GArrowLargeStringArray, garrow_large_string_array, GARROW, @@ -474,6 +575,7 @@ gchar * garrow_large_string_array_get_string(GArrowLargeStringArray *array, gint64 i); #define GARROW_TYPE_DATE32_ARRAY (garrow_date32_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowDate32Array, garrow_date32_array, GARROW, DATE32_ARRAY, GArrowNumericArray) struct _GArrowDate32ArrayClass @@ -481,18 +583,23 @@ struct _GArrowDate32ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDate32Array * garrow_date32_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint32 garrow_date32_array_get_value(GArrowDate32Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint32 * garrow_date32_array_get_values(GArrowDate32Array *array, gint64 *length); #define GARROW_TYPE_DATE64_ARRAY (garrow_date64_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowDate64Array, garrow_date64_array, GARROW, DATE64_ARRAY, GArrowNumericArray) struct _GArrowDate64ArrayClass @@ -500,18 +607,23 @@ struct _GArrowDate64ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDate64Array * garrow_date64_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint64 garrow_date64_array_get_value(GArrowDate64Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint64 * garrow_date64_array_get_values(GArrowDate64Array *array, gint64 *length); #define GARROW_TYPE_TIMESTAMP_ARRAY (garrow_timestamp_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTimestampArray, garrow_timestamp_array, GARROW, @@ -522,6 +634,7 @@ struct _GArrowTimestampArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTimestampArray * garrow_timestamp_array_new(GArrowTimestampDataType *data_type, gint64 length, @@ -529,12 +642,16 @@ garrow_timestamp_array_new(GArrowTimestampDataType *data_type, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint64 garrow_timestamp_array_get_value(GArrowTimestampArray *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint64 * garrow_timestamp_array_get_values(GArrowTimestampArray *array, gint64 *length); #define GARROW_TYPE_TIME32_ARRAY (garrow_time32_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowTime32Array, garrow_time32_array, GARROW, TIME32_ARRAY, GArrowNumericArray) struct _GArrowTime32ArrayClass @@ -542,6 +659,7 @@ struct _GArrowTime32ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTime32Array * garrow_time32_array_new(GArrowTime32DataType *data_type, gint64 length, @@ -549,12 +667,16 @@ garrow_time32_array_new(GArrowTime32DataType *data_type, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint32 garrow_time32_array_get_value(GArrowTime32Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint32 * garrow_time32_array_get_values(GArrowTime32Array *array, gint64 *length); #define GARROW_TYPE_TIME64_ARRAY (garrow_time64_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowTime64Array, garrow_time64_array, GARROW, TIME64_ARRAY, GArrowNumericArray) struct _GArrowTime64ArrayClass @@ -562,6 +684,7 @@ struct _GArrowTime64ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTime64Array * garrow_time64_array_new(GArrowTime64DataType *data_type, gint64 length, @@ -569,12 +692,16 @@ garrow_time64_array_new(GArrowTime64DataType *data_type, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint64 garrow_time64_array_get_value(GArrowTime64Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint64 * garrow_time64_array_get_values(GArrowTime64Array *array, gint64 *length); #define GARROW_TYPE_MONTH_INTERVAL_ARRAY (garrow_month_interval_array_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowMonthIntervalArray, garrow_month_interval_array, GARROW, @@ -594,11 +721,13 @@ garrow_month_interval_array_new(gint64 length, GARROW_AVAILABLE_IN_8_0 gint32 garrow_month_interval_array_get_value(GArrowMonthIntervalArray *array, gint64 i); + GARROW_AVAILABLE_IN_8_0 const gint32 * garrow_month_interval_array_get_values(GArrowMonthIntervalArray *array, gint64 *length); #define GARROW_TYPE_DAY_TIME_INTERVAL_ARRAY (garrow_day_time_interval_array_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowDayTimeIntervalArray, garrow_day_time_interval_array, GARROW, @@ -618,12 +747,14 @@ garrow_day_time_interval_array_new(gint64 length, GARROW_AVAILABLE_IN_8_0 GArrowDayMillisecond * garrow_day_time_interval_array_get_value(GArrowDayTimeIntervalArray *array, gint64 i); + GARROW_AVAILABLE_IN_8_0 GList * garrow_day_time_interval_array_get_values(GArrowDayTimeIntervalArray *array); #define GARROW_TYPE_MONTH_DAY_NANO_INTERVAL_ARRAY \ (garrow_month_day_nano_interval_array_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowMonthDayNanoIntervalArray, garrow_month_day_nano_interval_array, GARROW, @@ -649,6 +780,7 @@ GList * garrow_month_day_nano_interval_array_get_values(GArrowMonthDayNanoIntervalArray *array); #define GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY (garrow_fixed_size_binary_array_get_type()) +GARROW_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeBinaryArray, garrow_fixed_size_binary_array, GARROW, @@ -669,14 +801,17 @@ garrow_fixed_size_binary_array_new(GArrowFixedSizeBinaryDataType *data_type, GARROW_AVAILABLE_IN_3_0 gint32 garrow_fixed_size_binary_array_get_byte_width(GArrowFixedSizeBinaryArray *array); + GARROW_AVAILABLE_IN_3_0 GBytes * garrow_fixed_size_binary_array_get_value(GArrowFixedSizeBinaryArray *array, gint64 i); + GARROW_AVAILABLE_IN_3_0 GBytes * garrow_fixed_size_binary_array_get_values_bytes(GArrowFixedSizeBinaryArray *array); #define GARROW_TYPE_DECIMAL128_ARRAY (garrow_decimal128_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128Array, garrow_decimal128_array, GARROW, @@ -687,12 +822,16 @@ struct _GArrowDecimal128ArrayClass GArrowFixedSizeBinaryArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL gchar * garrow_decimal128_array_format_value(GArrowDecimal128Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL GArrowDecimal128 * garrow_decimal128_array_get_value(GArrowDecimal128Array *array, gint64 i); #define GARROW_TYPE_DECIMAL256_ARRAY (garrow_decimal256_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256Array, garrow_decimal256_array, GARROW, @@ -703,8 +842,11 @@ struct _GArrowDecimal256ArrayClass GArrowFixedSizeBinaryArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL gchar * garrow_decimal256_array_format_value(GArrowDecimal256Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL GArrowDecimal256 * garrow_decimal256_array_get_value(GArrowDecimal256Array *array, gint64 i); diff --git a/c_glib/arrow-glib/basic-array.hpp b/c_glib/arrow-glib/basic-array.hpp index f010cf3db4bc3..b2a7ed6ae075f 100644 --- a/c_glib/arrow-glib/basic-array.hpp +++ b/c_glib/arrow-glib/basic-array.hpp @@ -23,22 +23,32 @@ #include +GARROW_EXTERN arrow::EqualOptions * garrow_equal_options_get_raw(GArrowEqualOptions *equal_options); +GARROW_EXTERN GArrowArray * garrow_array_new_raw(std::shared_ptr *arrow_array); + +GARROW_EXTERN GArrowArray * garrow_array_new_raw(std::shared_ptr *arrow_array, const gchar *first_property_name, ...); + +GARROW_EXTERN GArrowArray * garrow_array_new_raw_valist(std::shared_ptr *arrow_array, const gchar *first_property_name, va_list args); + +GARROW_EXTERN GArrowExtensionArray * garrow_extension_array_new_raw(std::shared_ptr *arrow_array, GArrowArray *storage); + +GARROW_EXTERN std::shared_ptr garrow_array_get_raw(GArrowArray *array); diff --git a/c_glib/arrow-glib/basic-data-type.cpp b/c_glib/arrow-glib/basic-data-type.cpp index 36646a9733cd3..d1c06000065dc 100644 --- a/c_glib/arrow-glib/basic-data-type.cpp +++ b/c_glib/arrow-glib/basic-data-type.cpp @@ -1801,6 +1801,8 @@ garrow_extension_data_type_wrap_chunked_array(GArrowExtensionDataType *data_type return garrow_chunked_array_new_raw(&arrow_extension_chunked_array); } +G_END_DECLS + static std::shared_ptr garrow_extension_data_type_get_storage_data_type_raw(GArrowExtensionDataType *data_type) { @@ -1808,8 +1810,6 @@ garrow_extension_data_type_get_storage_data_type_raw(GArrowExtensionDataType *da return garrow_data_type_get_raw(priv->storage_data_type); } -G_END_DECLS - namespace garrow { GExtensionType::GExtensionType(GArrowExtensionDataType *garrow_data_type) : arrow::ExtensionType( diff --git a/c_glib/arrow-glib/basic-data-type.h b/c_glib/arrow-glib/basic-data-type.h index 01c9e5ef6e40a..77180018c9be8 100644 --- a/c_glib/arrow-glib/basic-data-type.h +++ b/c_glib/arrow-glib/basic-data-type.h @@ -28,6 +28,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_DATA_TYPE (garrow_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDataType, garrow_data_type, GARROW, DATA_TYPE, GObject) struct _GArrowDataTypeClass { @@ -42,17 +43,24 @@ GARROW_AVAILABLE_IN_6_0 gpointer garrow_data_type_export(GArrowDataType *data_type, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_data_type_equal(GArrowDataType *data_type, GArrowDataType *other_data_type); + +GARROW_AVAILABLE_IN_ALL gchar * garrow_data_type_to_string(GArrowDataType *data_type); + +GARROW_AVAILABLE_IN_ALL GArrowType garrow_data_type_get_id(GArrowDataType *data_type); + GARROW_AVAILABLE_IN_3_0 gchar * garrow_data_type_get_name(GArrowDataType *data_type); #define GARROW_TYPE_FIXED_WIDTH_DATA_TYPE (garrow_fixed_width_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowFixedWidthDataType, garrow_fixed_width_data_type, GARROW, @@ -63,6 +71,7 @@ struct _GArrowFixedWidthDataTypeClass GArrowDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL gint garrow_fixed_width_data_type_get_bit_width(GArrowFixedWidthDataType *data_type); /* TODO: @@ -71,6 +80,7 @@ GList *garrow_fixed_width_data_type_get_buffer_layout(GArrowFixedWidthDataType */ #define GARROW_TYPE_NULL_DATA_TYPE (garrow_null_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowNullDataType, garrow_null_data_type, GARROW, NULL_DATA_TYPE, GArrowDataType) struct _GArrowNullDataTypeClass @@ -78,10 +88,12 @@ struct _GArrowNullDataTypeClass GArrowDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowNullDataType * garrow_null_data_type_new(void); #define GARROW_TYPE_BOOLEAN_DATA_TYPE (garrow_boolean_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowBooleanDataType, garrow_boolean_data_type, GARROW, @@ -92,10 +104,12 @@ struct _GArrowBooleanDataTypeClass GArrowFixedWidthDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBooleanDataType * garrow_boolean_data_type_new(void); #define GARROW_TYPE_NUMERIC_DATA_TYPE (garrow_numeric_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowNumericDataType, garrow_numeric_data_type, GARROW, @@ -107,6 +121,7 @@ struct _GArrowNumericDataTypeClass }; #define GARROW_TYPE_INTEGER_DATA_TYPE (garrow_integer_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowIntegerDataType, garrow_integer_data_type, GARROW, @@ -122,6 +137,7 @@ gboolean garrow_integer_data_type_is_signed(GArrowIntegerDataType *data_type); #define GARROW_TYPE_INT8_DATA_TYPE (garrow_int8_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowInt8DataType, garrow_int8_data_type, GARROW, @@ -132,10 +148,12 @@ struct _GArrowInt8DataTypeClass GArrowIntegerDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt8DataType * garrow_int8_data_type_new(void); #define GARROW_TYPE_UINT8_DATA_TYPE (garrow_uint8_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUInt8DataType, garrow_uint8_data_type, GARROW, @@ -146,10 +164,12 @@ struct _GArrowUInt8DataTypeClass GArrowIntegerDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt8DataType * garrow_uint8_data_type_new(void); #define GARROW_TYPE_INT16_DATA_TYPE (garrow_int16_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowInt16DataType, garrow_int16_data_type, GARROW, @@ -160,10 +180,12 @@ struct _GArrowInt16DataTypeClass GArrowIntegerDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt16DataType * garrow_int16_data_type_new(void); #define GARROW_TYPE_UINT16_DATA_TYPE (garrow_uint16_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUInt16DataType, garrow_uint16_data_type, GARROW, @@ -174,10 +196,12 @@ struct _GArrowUInt16DataTypeClass GArrowIntegerDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt16DataType * garrow_uint16_data_type_new(void); #define GARROW_TYPE_INT32_DATA_TYPE (garrow_int32_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowInt32DataType, garrow_int32_data_type, GARROW, @@ -188,10 +212,12 @@ struct _GArrowInt32DataTypeClass GArrowIntegerDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt32DataType * garrow_int32_data_type_new(void); #define GARROW_TYPE_UINT32_DATA_TYPE (garrow_uint32_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUInt32DataType, garrow_uint32_data_type, GARROW, @@ -202,10 +228,12 @@ struct _GArrowUInt32DataTypeClass GArrowIntegerDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt32DataType * garrow_uint32_data_type_new(void); #define GARROW_TYPE_INT64_DATA_TYPE (garrow_int64_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowInt64DataType, garrow_int64_data_type, GARROW, @@ -216,10 +244,12 @@ struct _GArrowInt64DataTypeClass GArrowIntegerDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt64DataType * garrow_int64_data_type_new(void); #define GARROW_TYPE_UINT64_DATA_TYPE (garrow_uint64_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUInt64DataType, garrow_uint64_data_type, GARROW, @@ -230,10 +260,12 @@ struct _GArrowUInt64DataTypeClass GArrowIntegerDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt64DataType * garrow_uint64_data_type_new(void); #define GARROW_TYPE_FLOATING_POINT_DATA_TYPE (garrow_floating_point_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowFloatingPointDataType, garrow_floating_point_data_type, GARROW, @@ -245,6 +277,7 @@ struct _GArrowFloatingPointDataTypeClass }; #define GARROW_TYPE_HALF_FLOAT_DATA_TYPE (garrow_half_float_data_type_get_type()) +GARROW_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GArrowHalfFloatDataType, garrow_half_float_data_type, GARROW, @@ -260,6 +293,7 @@ GArrowHalfFloatDataType * garrow_half_float_data_type_new(void); #define GARROW_TYPE_FLOAT_DATA_TYPE (garrow_float_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowFloatDataType, garrow_float_data_type, GARROW, @@ -270,10 +304,12 @@ struct _GArrowFloatDataTypeClass GArrowFloatingPointDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowFloatDataType * garrow_float_data_type_new(void); #define GARROW_TYPE_DOUBLE_DATA_TYPE (garrow_double_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDoubleDataType, garrow_double_data_type, GARROW, @@ -284,10 +320,12 @@ struct _GArrowDoubleDataTypeClass GArrowFloatingPointDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDoubleDataType * garrow_double_data_type_new(void); #define GARROW_TYPE_BINARY_DATA_TYPE (garrow_binary_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowBinaryDataType, garrow_binary_data_type, GARROW, BINARY_DATA_TYPE, GArrowDataType) struct _GArrowBinaryDataTypeClass @@ -295,11 +333,13 @@ struct _GArrowBinaryDataTypeClass GArrowDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBinaryDataType * garrow_binary_data_type_new(void); #define GARROW_TYPE_FIXED_SIZE_BINARY_DATA_TYPE \ (garrow_fixed_size_binary_data_type_get_type()) +GARROW_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeBinaryDataType, garrow_fixed_size_binary_data_type, GARROW, @@ -319,6 +359,7 @@ garrow_fixed_size_binary_data_type_get_byte_width( GArrowFixedSizeBinaryDataType *data_type); #define GARROW_TYPE_LARGE_BINARY_DATA_TYPE (garrow_large_binary_data_type_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowLargeBinaryDataType, garrow_large_binary_data_type, GARROW, @@ -334,6 +375,7 @@ GArrowLargeBinaryDataType * garrow_large_binary_data_type_new(void); #define GARROW_TYPE_STRING_DATA_TYPE (garrow_string_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowStringDataType, garrow_string_data_type, GARROW, @@ -344,10 +386,12 @@ struct _GArrowStringDataTypeClass GArrowBinaryDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowStringDataType * garrow_string_data_type_new(void); #define GARROW_TYPE_LARGE_STRING_DATA_TYPE (garrow_large_string_data_type_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowLargeStringDataType, garrow_large_string_data_type, GARROW, @@ -363,6 +407,7 @@ GArrowLargeStringDataType * garrow_large_string_data_type_new(void); #define GARROW_TYPE_TEMPORAL_DATA_TYPE (garrow_temporal_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTemporalDataType, garrow_temporal_data_type, GARROW, @@ -374,6 +419,7 @@ struct _GArrowTemporalDataTypeClass }; #define GARROW_TYPE_DATE32_DATA_TYPE (garrow_date32_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDate32DataType, garrow_date32_data_type, GARROW, @@ -384,10 +430,12 @@ struct _GArrowDate32DataTypeClass GArrowTemporalDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDate32DataType * garrow_date32_data_type_new(void); #define GARROW_TYPE_DATE64_DATA_TYPE (garrow_date64_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDate64DataType, garrow_date64_data_type, GARROW, @@ -398,10 +446,12 @@ struct _GArrowDate64DataTypeClass GArrowTemporalDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDate64DataType * garrow_date64_data_type_new(void); #define GARROW_TYPE_TIMESTAMP_DATA_TYPE (garrow_timestamp_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTimestampDataType, garrow_timestamp_data_type, GARROW, @@ -412,12 +462,16 @@ struct _GArrowTimestampDataTypeClass GArrowTemporalDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTimestampDataType * garrow_timestamp_data_type_new(GArrowTimeUnit unit, GTimeZone *time_zone); + +GARROW_AVAILABLE_IN_ALL GArrowTimeUnit garrow_timestamp_data_type_get_unit(GArrowTimestampDataType *data_type); #define GARROW_TYPE_TIME_DATA_TYPE (garrow_time_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTimeDataType, garrow_time_data_type, GARROW, @@ -428,10 +482,12 @@ struct _GArrowTimeDataTypeClass GArrowTemporalDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTimeUnit garrow_time_data_type_get_unit(GArrowTimeDataType *time_data_type); #define GARROW_TYPE_TIME32_DATA_TYPE (garrow_time32_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTime32DataType, garrow_time32_data_type, GARROW, @@ -442,10 +498,12 @@ struct _GArrowTime32DataTypeClass GArrowTimeDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTime32DataType * garrow_time32_data_type_new(GArrowTimeUnit unit, GError **error); #define GARROW_TYPE_TIME64_DATA_TYPE (garrow_time64_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTime64DataType, garrow_time64_data_type, GARROW, @@ -456,10 +514,12 @@ struct _GArrowTime64DataTypeClass GArrowTimeDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTime64DataType * garrow_time64_data_type_new(GArrowTimeUnit unit, GError **error); #define GARROW_TYPE_INTERVAL_DATA_TYPE (garrow_interval_data_type_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE(GArrowIntervalDataType, garrow_interval_data_type, GARROW, @@ -475,6 +535,7 @@ GArrowIntervalType garrow_interval_data_type_get_interval_type(GArrowIntervalDataType *type); #define GARROW_TYPE_MONTH_INTERVAL_DATA_TYPE (garrow_month_interval_data_type_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE(GArrowMonthIntervalDataType, garrow_month_interval_data_type, GARROW, @@ -491,6 +552,7 @@ garrow_month_interval_data_type_new(void); #define GARROW_TYPE_DAY_TIME_INTERVAL_DATA_TYPE \ (garrow_day_time_interval_data_type_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE(GArrowDayTimeIntervalDataType, garrow_day_time_interval_data_type, GARROW, @@ -507,6 +569,7 @@ garrow_day_time_interval_data_type_new(void); #define GARROW_TYPE_MONTH_DAY_NANO_INTERVAL_DATA_TYPE \ (garrow_month_day_nano_interval_data_type_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE(GArrowMonthDayNanoIntervalDataType, garrow_month_day_nano_interval_data_type, GARROW, @@ -522,6 +585,7 @@ GArrowMonthDayNanoIntervalDataType * garrow_month_day_nano_interval_data_type_new(void); #define GARROW_TYPE_DECIMAL_DATA_TYPE (garrow_decimal_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimalDataType, garrow_decimal_data_type, GARROW, @@ -532,14 +596,20 @@ struct _GArrowDecimalDataTypeClass GArrowFixedSizeBinaryDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDecimalDataType * garrow_decimal_data_type_new(gint32 precision, gint32 scale, GError **error); + +GARROW_AVAILABLE_IN_ALL gint32 garrow_decimal_data_type_get_precision(GArrowDecimalDataType *decimal_data_type); + +GARROW_AVAILABLE_IN_ALL gint32 garrow_decimal_data_type_get_scale(GArrowDecimalDataType *decimal_data_type); #define GARROW_TYPE_DECIMAL128_DATA_TYPE (garrow_decimal128_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128DataType, garrow_decimal128_data_type, GARROW, @@ -559,6 +629,7 @@ GArrowDecimal128DataType * garrow_decimal128_data_type_new(gint32 precision, gint32 scale, GError **error); #define GARROW_TYPE_DECIMAL256_DATA_TYPE (garrow_decimal256_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256DataType, garrow_decimal256_data_type, GARROW, @@ -578,6 +649,7 @@ GArrowDecimal256DataType * garrow_decimal256_data_type_new(gint32 precision, gint32 scale, GError **error); #define GARROW_TYPE_EXTENSION_DATA_TYPE (garrow_extension_data_type_get_type()) +GARROW_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GArrowExtensionDataType, garrow_extension_data_type, GARROW, @@ -628,6 +700,7 @@ garrow_extension_data_type_wrap_chunked_array(GArrowExtensionDataType *data_type #define GARROW_TYPE_EXTENSION_DATA_TYPE_REGISTRY \ (garrow_extension_data_type_registry_get_type()) +GARROW_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GArrowExtensionDataTypeRegistry, garrow_extension_data_type_registry, GARROW, diff --git a/c_glib/arrow-glib/buffer.h b/c_glib/arrow-glib/buffer.h index 8f93a5ef0ddb2..29308e935aba2 100644 --- a/c_glib/arrow-glib/buffer.h +++ b/c_glib/arrow-glib/buffer.h @@ -21,44 +21,70 @@ #include +#include + G_BEGIN_DECLS #define GARROW_TYPE_BUFFER (garrow_buffer_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowBuffer, garrow_buffer, GARROW, BUFFER, GObject) struct _GArrowBufferClass { GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_buffer_new(const guint8 *data, gint64 size); + +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_buffer_new_bytes(GBytes *data); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_buffer_equal(GArrowBuffer *buffer, GArrowBuffer *other_buffer); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_buffer_equal_n_bytes(GArrowBuffer *buffer, GArrowBuffer *other_buffer, gint64 n_bytes); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_buffer_is_mutable(GArrowBuffer *buffer); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_buffer_get_capacity(GArrowBuffer *buffer); + +GARROW_AVAILABLE_IN_ALL GBytes * garrow_buffer_get_data(GArrowBuffer *buffer); + +GARROW_AVAILABLE_IN_ALL GBytes * garrow_buffer_get_mutable_data(GArrowBuffer *buffer); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_buffer_get_size(GArrowBuffer *buffer); + +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_buffer_get_parent(GArrowBuffer *buffer); +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_buffer_copy(GArrowBuffer *buffer, gint64 start, gint64 size, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_buffer_slice(GArrowBuffer *buffer, gint64 offset, gint64 size); #define GARROW_TYPE_MUTABLE_BUFFER (garrow_mutable_buffer_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowMutableBuffer, garrow_mutable_buffer, GARROW, MUTABLE_BUFFER, GArrowBuffer) struct _GArrowMutableBufferClass @@ -66,12 +92,19 @@ struct _GArrowMutableBufferClass GArrowBufferClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowMutableBuffer * garrow_mutable_buffer_new(guint8 *data, gint64 size); + +GARROW_AVAILABLE_IN_ALL GArrowMutableBuffer * garrow_mutable_buffer_new_bytes(GBytes *data); + +GARROW_AVAILABLE_IN_ALL GArrowMutableBuffer * garrow_mutable_buffer_slice(GArrowMutableBuffer *buffer, gint64 offset, gint64 size); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_mutable_buffer_set_data(GArrowMutableBuffer *buffer, gint64 offset, @@ -80,6 +113,7 @@ garrow_mutable_buffer_set_data(GArrowMutableBuffer *buffer, GError **error); #define GARROW_TYPE_RESIZABLE_BUFFER (garrow_resizable_buffer_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowResizableBuffer, garrow_resizable_buffer, GARROW, @@ -90,12 +124,17 @@ struct _GArrowResizableBufferClass GArrowMutableBufferClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowResizableBuffer * garrow_resizable_buffer_new(gint64 initial_size, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_resizable_buffer_resize(GArrowResizableBuffer *buffer, gint64 new_size, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_resizable_buffer_reserve(GArrowResizableBuffer *buffer, gint64 new_capacity, diff --git a/c_glib/arrow-glib/buffer.hpp b/c_glib/arrow-glib/buffer.hpp index 5d922371c3b6b..7e4d7ecee1c1c 100644 --- a/c_glib/arrow-glib/buffer.hpp +++ b/c_glib/arrow-glib/buffer.hpp @@ -23,20 +23,32 @@ #include +GARROW_EXTERN GArrowBuffer * garrow_buffer_new_raw(std::shared_ptr *arrow_buffer); + +GARROW_EXTERN GArrowBuffer * garrow_buffer_new_raw_bytes(std::shared_ptr *arrow_buffer, GBytes *data); + +GARROW_EXTERN GArrowBuffer * garrow_buffer_new_raw_parent(std::shared_ptr *arrow_buffer, GArrowBuffer *parent); + +GARROW_EXTERN std::shared_ptr garrow_buffer_get_raw(GArrowBuffer *buffer); +GARROW_EXTERN GArrowMutableBuffer * garrow_mutable_buffer_new_raw(std::shared_ptr *arrow_buffer); + +GARROW_EXTERN GArrowMutableBuffer * garrow_mutable_buffer_new_raw_bytes(std::shared_ptr *arrow_buffer, GBytes *data); + +GARROW_EXTERN GArrowResizableBuffer * garrow_resizable_buffer_new_raw(std::shared_ptr *arrow_buffer); diff --git a/c_glib/arrow-glib/chunked-array-definition.h b/c_glib/arrow-glib/chunked-array-definition.h index b687735419eeb..744f1077ea754 100644 --- a/c_glib/arrow-glib/chunked-array-definition.h +++ b/c_glib/arrow-glib/chunked-array-definition.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_CHUNKED_ARRAY (garrow_chunked_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowChunkedArray, garrow_chunked_array, GARROW, CHUNKED_ARRAY, GObject) struct _GArrowChunkedArrayClass diff --git a/c_glib/arrow-glib/chunked-array.h b/c_glib/arrow-glib/chunked-array.h index 6ca497942ff2e..712d16504f624 100644 --- a/c_glib/arrow-glib/chunked-array.h +++ b/c_glib/arrow-glib/chunked-array.h @@ -24,42 +24,61 @@ G_BEGIN_DECLS +GARROW_AVAILABLE_IN_ALL GArrowChunkedArray * garrow_chunked_array_new(GList *chunks, GError **error); + GARROW_AVAILABLE_IN_11_0 GArrowChunkedArray * garrow_chunked_array_new_empty(GArrowDataType *data_type, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_chunked_array_equal(GArrowChunkedArray *chunked_array, GArrowChunkedArray *other_chunked_array); +GARROW_AVAILABLE_IN_ALL GArrowDataType * garrow_chunked_array_get_value_data_type(GArrowChunkedArray *chunked_array); + +GARROW_AVAILABLE_IN_ALL GArrowType garrow_chunked_array_get_value_type(GArrowChunkedArray *chunked_array); GARROW_DEPRECATED_IN_0_15_FOR(garrow_chunked_array_get_n_rows) guint64 garrow_chunked_array_get_length(GArrowChunkedArray *chunked_array); + GARROW_AVAILABLE_IN_0_15 guint64 garrow_chunked_array_get_n_rows(GArrowChunkedArray *chunked_array); + +GARROW_AVAILABLE_IN_ALL guint64 garrow_chunked_array_get_n_nulls(GArrowChunkedArray *chunked_array); + +GARROW_AVAILABLE_IN_ALL guint garrow_chunked_array_get_n_chunks(GArrowChunkedArray *chunked_array); +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_chunked_array_get_chunk(GArrowChunkedArray *chunked_array, guint i); + +GARROW_AVAILABLE_IN_ALL GList * garrow_chunked_array_get_chunks(GArrowChunkedArray *chunked_array); + +GARROW_AVAILABLE_IN_ALL GArrowChunkedArray * garrow_chunked_array_slice(GArrowChunkedArray *chunked_array, guint64 offset, guint64 length); + +GARROW_AVAILABLE_IN_ALL gchar * garrow_chunked_array_to_string(GArrowChunkedArray *chunked_array, GError **error); + GARROW_AVAILABLE_IN_4_0 GArrowArray * garrow_chunked_array_combine(GArrowChunkedArray *chunked_array, GError **error); diff --git a/c_glib/arrow-glib/chunked-array.hpp b/c_glib/arrow-glib/chunked-array.hpp index 9ce6cc76adfbf..674ef9606b96e 100644 --- a/c_glib/arrow-glib/chunked-array.hpp +++ b/c_glib/arrow-glib/chunked-array.hpp @@ -23,10 +23,15 @@ #include +GARROW_EXTERN GArrowChunkedArray * garrow_chunked_array_new_raw(std::shared_ptr *arrow_chunked_array); + +GARROW_EXTERN GArrowChunkedArray * garrow_chunked_array_new_raw(std::shared_ptr *arrow_chunked_array, GArrowDataType *data_type); + +GARROW_EXTERN std::shared_ptr garrow_chunked_array_get_raw(GArrowChunkedArray *chunked_array); diff --git a/c_glib/arrow-glib/codec.h b/c_glib/arrow-glib/codec.h index 9b8611bb0a7ee..5865634a7d8e4 100644 --- a/c_glib/arrow-glib/codec.h +++ b/c_glib/arrow-glib/codec.h @@ -50,20 +50,25 @@ typedef enum { } GArrowCompressionType; #define GARROW_TYPE_CODEC (garrow_codec_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowCodec, garrow_codec, GARROW, CODEC, GObject) struct _GArrowCodecClass { GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowCodec * garrow_codec_new(GArrowCompressionType type, GError **error); +GARROW_AVAILABLE_IN_ALL const gchar * garrow_codec_get_name(GArrowCodec *codec); + GARROW_AVAILABLE_IN_2_0 GArrowCompressionType garrow_codec_get_compression_type(GArrowCodec *codec); + GARROW_AVAILABLE_IN_2_0 gint garrow_codec_get_compression_level(GArrowCodec *codec); diff --git a/c_glib/arrow-glib/codec.hpp b/c_glib/arrow-glib/codec.hpp index f4cfaba18a00e..baea842ddf6b5 100644 --- a/c_glib/arrow-glib/codec.hpp +++ b/c_glib/arrow-glib/codec.hpp @@ -23,12 +23,18 @@ #include +GARROW_EXTERN GArrowCompressionType garrow_compression_type_from_raw(arrow::Compression::type arrow_type); + +GARROW_EXTERN arrow::Compression::type garrow_compression_type_to_raw(GArrowCompressionType type); +GARROW_EXTERN GArrowCodec * garrow_codec_new_raw(std::shared_ptr *arrow_codec); + +GARROW_EXTERN std::shared_ptr garrow_codec_get_raw(GArrowCodec *codec); diff --git a/c_glib/arrow-glib/composite-array.h b/c_glib/arrow-glib/composite-array.h index c6e19f1c74e22..b8ba901363d0a 100644 --- a/c_glib/arrow-glib/composite-array.h +++ b/c_glib/arrow-glib/composite-array.h @@ -27,6 +27,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_LIST_ARRAY (garrow_list_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowListArray, garrow_list_array, GARROW, LIST_ARRAY, GArrowArray) struct _GArrowListArrayClass @@ -34,6 +35,7 @@ struct _GArrowListArrayClass GArrowArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowListArray * garrow_list_array_new(GArrowDataType *data_type, gint64 length, @@ -42,24 +44,32 @@ garrow_list_array_new(GArrowDataType *data_type, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL GArrowDataType * garrow_list_array_get_value_type(GArrowListArray *array); + +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_list_array_get_value(GArrowListArray *array, gint64 i); + GARROW_AVAILABLE_IN_2_0 GArrowArray * garrow_list_array_get_values(GArrowListArray *array); + GARROW_AVAILABLE_IN_2_0 gint32 garrow_list_array_get_value_offset(GArrowListArray *array, gint64 i); + GARROW_AVAILABLE_IN_2_0 gint32 garrow_list_array_get_value_length(GArrowListArray *array, gint64 i); + GARROW_AVAILABLE_IN_2_0 const gint32 * garrow_list_array_get_value_offsets(GArrowListArray *array, gint64 *n_offsets); #define GARROW_TYPE_LARGE_LIST_ARRAY (garrow_large_list_array_get_type()) +GARROW_AVAILABLE_IN_0_16 G_DECLARE_DERIVABLE_TYPE( GArrowLargeListArray, garrow_large_list_array, GARROW, LARGE_LIST_ARRAY, GArrowArray) struct _GArrowLargeListArrayClass @@ -79,23 +89,29 @@ garrow_large_list_array_new(GArrowDataType *data_type, GARROW_AVAILABLE_IN_0_16 GArrowDataType * garrow_large_list_array_get_value_type(GArrowLargeListArray *array); + GARROW_AVAILABLE_IN_0_16 GArrowArray * garrow_large_list_array_get_value(GArrowLargeListArray *array, gint64 i); + GARROW_AVAILABLE_IN_2_0 GArrowArray * garrow_large_list_array_get_values(GArrowLargeListArray *array); + GARROW_AVAILABLE_IN_2_0 gint64 garrow_large_list_array_get_value_offset(GArrowLargeListArray *array, gint64 i); + GARROW_AVAILABLE_IN_2_0 gint64 garrow_large_list_array_get_value_length(GArrowLargeListArray *array, gint64 i); + GARROW_AVAILABLE_IN_2_0 const gint64 * garrow_large_list_array_get_value_offsets(GArrowLargeListArray *array, gint64 *n_offsets); #define GARROW_TYPE_STRUCT_ARRAY (garrow_struct_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowStructArray, garrow_struct_array, GARROW, STRUCT_ARRAY, GArrowArray) struct _GArrowStructArrayClass @@ -103,6 +119,7 @@ struct _GArrowStructArrayClass GArrowArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowStructArray * garrow_struct_array_new(GArrowDataType *data_type, gint64 length, @@ -110,9 +127,11 @@ garrow_struct_array_new(GArrowDataType *data_type, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_struct_array_get_field(GArrowStructArray *array, gint i); +GARROW_AVAILABLE_IN_ALL GList * garrow_struct_array_get_fields(GArrowStructArray *array); @@ -121,6 +140,7 @@ GList * garrow_struct_array_flatten(GArrowStructArray *array, GError **error); #define GARROW_TYPE_MAP_ARRAY (garrow_map_array_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE( GArrowMapArray, garrow_map_array, GARROW, MAP_ARRAY, GArrowListArray) struct _GArrowMapArrayClass @@ -134,14 +154,17 @@ garrow_map_array_new(GArrowArray *offsets, GArrowArray *keys, GArrowArray *items, GError **error); + GARROW_AVAILABLE_IN_0_17 GArrowArray * garrow_map_array_get_keys(GArrowMapArray *array); + GARROW_AVAILABLE_IN_0_17 GArrowArray * garrow_map_array_get_items(GArrowMapArray *array); #define GARROW_TYPE_UNION_ARRAY (garrow_union_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowUnionArray, garrow_union_array, GARROW, UNION_ARRAY, GArrowArray) struct _GArrowUnionArrayClass @@ -152,13 +175,17 @@ struct _GArrowUnionArrayClass GARROW_AVAILABLE_IN_12_0 gint8 garrow_union_array_get_type_code(GArrowUnionArray *array, gint64 i); + GARROW_AVAILABLE_IN_12_0 gint garrow_union_array_get_child_id(GArrowUnionArray *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_union_array_get_field(GArrowUnionArray *array, gint i); #define GARROW_TYPE_SPARSE_UNION_ARRAY (garrow_sparse_union_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionArray, garrow_sparse_union_array, GARROW, @@ -169,8 +196,11 @@ struct _GArrowSparseUnionArrayClass GArrowUnionArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowSparseUnionArray * garrow_sparse_union_array_new(GArrowInt8Array *type_ids, GList *fields, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowSparseUnionArray * garrow_sparse_union_array_new_data_type(GArrowSparseUnionDataType *data_type, GArrowInt8Array *type_ids, @@ -178,6 +208,7 @@ garrow_sparse_union_array_new_data_type(GArrowSparseUnionDataType *data_type, GError **error); #define GARROW_TYPE_DENSE_UNION_ARRAY (garrow_dense_union_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionArray, garrow_dense_union_array, GARROW, @@ -188,22 +219,27 @@ struct _GArrowDenseUnionArrayClass GArrowUnionArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDenseUnionArray * garrow_dense_union_array_new(GArrowInt8Array *type_ids, GArrowInt32Array *value_offsets, GList *fields, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowDenseUnionArray * garrow_dense_union_array_new_data_type(GArrowDenseUnionDataType *data_type, GArrowInt8Array *type_ids, GArrowInt32Array *value_offsets, GList *fields, GError **error); + GARROW_AVAILABLE_IN_12_0 gint32 garrow_dense_union_array_get_value_offset(GArrowDenseUnionArray *array, gint64 i); #define GARROW_TYPE_DICTIONARY_ARRAY (garrow_dictionary_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowDictionaryArray, garrow_dictionary_array, GARROW, DICTIONARY_ARRAY, GArrowArray) struct _GArrowDictionaryArrayClass @@ -211,22 +247,29 @@ struct _GArrowDictionaryArrayClass GArrowArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDictionaryArray * garrow_dictionary_array_new(GArrowDataType *data_type, GArrowArray *indices, GArrowArray *dictionary, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_dictionary_array_get_indices(GArrowDictionaryArray *array); + +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_dictionary_array_get_dictionary(GArrowDictionaryArray *array); + #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_1_0_FOR(garrow_array_get_value_data_type) GArrowDictionaryDataType * garrow_dictionary_array_get_dictionary_data_type(GArrowDictionaryArray *array); #endif #define GARROW_TYPE_RUN_END_ENCODED_ARRAY (garrow_run_end_encoded_array_get_type()) +GARROW_AVAILABLE_IN_13_0 G_DECLARE_DERIVABLE_TYPE(GArrowRunEndEncodedArray, garrow_run_end_encoded_array, GARROW, @@ -248,9 +291,11 @@ garrow_run_end_encoded_array_new(GArrowDataType *data_type, GARROW_AVAILABLE_IN_13_0 GArrowArray * garrow_run_end_encoded_array_get_run_ends(GArrowRunEndEncodedArray *array); + GARROW_AVAILABLE_IN_13_0 GArrowArray * garrow_run_end_encoded_array_get_values(GArrowRunEndEncodedArray *array); + GARROW_AVAILABLE_IN_13_0 GArrowArray * garrow_run_end_encoded_array_get_logical_run_ends(GArrowRunEndEncodedArray *array, @@ -258,9 +303,11 @@ garrow_run_end_encoded_array_get_logical_run_ends(GArrowRunEndEncodedArray *arra GARROW_AVAILABLE_IN_13_0 GArrowArray * garrow_run_end_encoded_array_get_logical_values(GArrowRunEndEncodedArray *array); + GARROW_AVAILABLE_IN_13_0 gint64 garrow_run_end_encoded_array_find_physical_offset(GArrowRunEndEncodedArray *array); + GARROW_AVAILABLE_IN_13_0 gint64 garrow_run_end_encoded_array_find_physical_length(GArrowRunEndEncodedArray *array); diff --git a/c_glib/arrow-glib/composite-data-type.h b/c_glib/arrow-glib/composite-data-type.h index e71d277a305c6..7a0a462af00f9 100644 --- a/c_glib/arrow-glib/composite-data-type.h +++ b/c_glib/arrow-glib/composite-data-type.h @@ -27,6 +27,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_LIST_DATA_TYPE (garrow_list_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowListDataType, garrow_list_data_type, GARROW, LIST_DATA_TYPE, GArrowDataType) struct _GArrowListDataTypeClass @@ -34,18 +35,23 @@ struct _GArrowListDataTypeClass GArrowDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowListDataType * garrow_list_data_type_new(GArrowField *field); + #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_13_FOR(garrow_list_data_type_get_field) GArrowField * garrow_list_data_type_get_value_field(GArrowListDataType *list_data_type); #endif + GARROW_AVAILABLE_IN_0_13 GArrowField * garrow_list_data_type_get_field(GArrowListDataType *list_data_type); #define GARROW_TYPE_LARGE_LIST_DATA_TYPE (garrow_large_list_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowLargeListDataType, garrow_large_list_data_type, GARROW, @@ -59,11 +65,13 @@ struct _GArrowLargeListDataTypeClass GARROW_AVAILABLE_IN_0_16 GArrowLargeListDataType * garrow_large_list_data_type_new(GArrowField *field); + GARROW_AVAILABLE_IN_0_16 GArrowField * garrow_large_list_data_type_get_field(GArrowLargeListDataType *large_list_data_type); #define GARROW_TYPE_STRUCT_DATA_TYPE (garrow_struct_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowStructDataType, garrow_struct_data_type, GARROW, STRUCT_DATA_TYPE, GArrowDataType) struct _GArrowStructDataTypeClass @@ -71,22 +79,34 @@ struct _GArrowStructDataTypeClass GArrowDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowStructDataType * garrow_struct_data_type_new(GList *fields); + +GARROW_AVAILABLE_IN_ALL gint garrow_struct_data_type_get_n_fields(GArrowStructDataType *struct_data_type); + +GARROW_AVAILABLE_IN_ALL GList * garrow_struct_data_type_get_fields(GArrowStructDataType *struct_data_type); + +GARROW_AVAILABLE_IN_ALL GArrowField * garrow_struct_data_type_get_field(GArrowStructDataType *struct_data_type, gint i); + +GARROW_AVAILABLE_IN_ALL GArrowField * garrow_struct_data_type_get_field_by_name(GArrowStructDataType *struct_data_type, const gchar *name); + +GARROW_AVAILABLE_IN_ALL gint garrow_struct_data_type_get_field_index(GArrowStructDataType *struct_data_type, const gchar *name); #define GARROW_TYPE_MAP_DATA_TYPE (garrow_map_data_type_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE( GArrowMapDataType, garrow_map_data_type, GARROW, MAP_DATA_TYPE, GArrowListDataType) struct _GArrowMapDataTypeClass @@ -105,6 +125,7 @@ GArrowDataType * garrow_map_data_type_get_item_type(GArrowMapDataType *map_data_type); #define GARROW_TYPE_UNION_DATA_TYPE (garrow_union_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowUnionDataType, garrow_union_data_type, GARROW, UNION_DATA_TYPE, GArrowDataType) struct _GArrowUnionDataTypeClass @@ -112,17 +133,25 @@ struct _GArrowUnionDataTypeClass GArrowDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL gint garrow_union_data_type_get_n_fields(GArrowUnionDataType *union_data_type); + +GARROW_AVAILABLE_IN_ALL GList * garrow_union_data_type_get_fields(GArrowUnionDataType *union_data_type); + +GARROW_AVAILABLE_IN_ALL GArrowField * garrow_union_data_type_get_field(GArrowUnionDataType *union_data_type, gint i); + +GARROW_AVAILABLE_IN_ALL gint8 * garrow_union_data_type_get_type_codes(GArrowUnionDataType *union_data_type, gsize *n_type_codes); #define GARROW_TYPE_SPARSE_UNION_DATA_TYPE (garrow_sparse_union_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionDataType, garrow_sparse_union_data_type, GARROW, @@ -133,10 +162,12 @@ struct _GArrowSparseUnionDataTypeClass GArrowUnionDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowSparseUnionDataType * garrow_sparse_union_data_type_new(GList *fields, gint8 *type_codes, gsize n_type_codes); #define GARROW_TYPE_DENSE_UNION_DATA_TYPE (garrow_dense_union_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionDataType, garrow_dense_union_data_type, GARROW, @@ -147,10 +178,12 @@ struct _GArrowDenseUnionDataTypeClass GArrowUnionDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDenseUnionDataType * garrow_dense_union_data_type_new(GList *fields, gint8 *type_codes, gsize n_type_codes); #define GARROW_TYPE_DICTIONARY_DATA_TYPE (garrow_dictionary_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDictionaryDataType, garrow_dictionary_data_type, GARROW, @@ -161,22 +194,29 @@ struct _GArrowDictionaryDataTypeClass GArrowFixedWidthDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDictionaryDataType * garrow_dictionary_data_type_new(GArrowDataType *index_data_type, GArrowDataType *value_data_type, gboolean ordered); + +GARROW_AVAILABLE_IN_ALL GArrowDataType * garrow_dictionary_data_type_get_index_data_type( GArrowDictionaryDataType *dictionary_data_type); + GARROW_AVAILABLE_IN_0_14 GArrowDataType * garrow_dictionary_data_type_get_value_data_type( GArrowDictionaryDataType *dictionary_data_type); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_dictionary_data_type_is_ordered(GArrowDictionaryDataType *dictionary_data_type); #define GARROW_TYPE_RUN_END_ENCODED_DATA_TYPE \ (garrow_run_end_encoded_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowRunEndEncodedDataType, garrow_run_end_encoded_data_type, GARROW, @@ -195,6 +235,7 @@ GARROW_AVAILABLE_IN_13_0 GArrowDataType * garrow_run_end_encoded_data_type_get_run_end_data_type( GArrowRunEndEncodedDataType *data_type); + GARROW_AVAILABLE_IN_13_0 GArrowDataType * garrow_run_end_encoded_data_type_get_value_data_type( diff --git a/c_glib/arrow-glib/compute-definition.h b/c_glib/arrow-glib/compute-definition.h index b699e9e99a9fc..a060f16f62cf6 100644 --- a/c_glib/arrow-glib/compute-definition.h +++ b/c_glib/arrow-glib/compute-definition.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GARROW_TYPE_FUNCTION_OPTIONS (garrow_function_options_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowFunctionOptions, garrow_function_options, GARROW, FUNCTION_OPTIONS, GObject) struct _GArrowFunctionOptionsClass @@ -32,6 +35,7 @@ struct _GArrowFunctionOptionsClass }; #define GARROW_TYPE_CAST_OPTIONS (garrow_cast_options_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowCastOptions, garrow_cast_options, GARROW, CAST_OPTIONS, GArrowFunctionOptions) struct _GArrowCastOptionsClass @@ -40,6 +44,7 @@ struct _GArrowCastOptionsClass }; #define GARROW_TYPE_EXPRESSION (garrow_expression_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowExpression, garrow_expression, GARROW, EXPRESSION, GObject) struct _GArrowExpressionClass { diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index 029cab136ad8f..54b0ddb014fbb 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_EXECUTE_CONTEXT (garrow_execute_context_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE( GArrowExecuteContext, garrow_execute_context, GARROW, EXECUTE_CONTEXT, GObject) struct _GArrowExecuteContextClass @@ -46,6 +47,7 @@ gchar * garrow_function_options_to_string(GArrowFunctionOptions *options); #define GARROW_TYPE_FUNCTION_DOC (garrow_function_doc_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GArrowFunctionDoc, garrow_function_doc, GARROW, FUNCTION_DOC, GObject) struct _GArrowFunctionDocClass @@ -67,6 +69,7 @@ gchar * garrow_function_doc_get_options_class_name(GArrowFunctionDoc *doc); #define GARROW_TYPE_FUNCTION (garrow_function_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE(GArrowFunction, garrow_function, GARROW, FUNCTION, GObject) struct _GArrowFunctionClass { @@ -110,6 +113,7 @@ gchar * garrow_function_to_string(GArrowFunction *function); #define GARROW_TYPE_EXECUTE_NODE_OPTIONS (garrow_execute_node_options_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GArrowExecuteNodeOptions, garrow_execute_node_options, GARROW, @@ -121,6 +125,7 @@ struct _GArrowExecuteNodeOptionsClass }; #define GARROW_TYPE_SOURCE_NODE_OPTIONS (garrow_source_node_options_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GArrowSourceNodeOptions, garrow_source_node_options, GARROW, @@ -142,6 +147,7 @@ GArrowSourceNodeOptions * garrow_source_node_options_new_table(GArrowTable *table); #define GARROW_TYPE_FILTER_NODE_OPTIONS (garrow_filter_node_options_get_type()) +GARROW_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GArrowFilterNodeOptions, garrow_filter_node_options, GARROW, @@ -157,6 +163,7 @@ GArrowFilterNodeOptions * garrow_filter_node_options_new(GArrowExpression *expression); #define GARROW_TYPE_PROJECT_NODE_OPTIONS (garrow_project_node_options_get_type()) +GARROW_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GArrowProjectNodeOptions, garrow_project_node_options, GARROW, @@ -172,6 +179,7 @@ GArrowProjectNodeOptions * garrow_project_node_options_new(GList *expressions, gchar **names, gsize n_names); #define GARROW_TYPE_AGGREGATION (garrow_aggregation_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GArrowAggregation, garrow_aggregation, GARROW, AGGREGATION, GObject) struct _GArrowAggregationClass @@ -187,6 +195,7 @@ garrow_aggregation_new(const gchar *function, const gchar *output); #define GARROW_TYPE_AGGREGATE_NODE_OPTIONS (garrow_aggregate_node_options_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GArrowAggregateNodeOptions, garrow_aggregate_node_options, GARROW, @@ -205,6 +214,7 @@ garrow_aggregate_node_options_new(GList *aggregations, GError **error); #define GARROW_TYPE_SINK_NODE_OPTIONS (garrow_sink_node_options_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GArrowSinkNodeOptions, garrow_sink_node_options, GARROW, @@ -249,6 +259,7 @@ typedef enum { } GArrowJoinType; #define GARROW_TYPE_HASH_JOIN_NODE_OPTIONS (garrow_hash_join_node_options_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE(GArrowHashJoinNodeOptions, garrow_hash_join_node_options, GARROW, @@ -281,6 +292,7 @@ garrow_hash_join_node_options_set_right_outputs(GArrowHashJoinNodeOptions *optio GError **error); #define GARROW_TYPE_EXECUTE_NODE (garrow_execute_node_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GArrowExecuteNode, garrow_execute_node, GARROW, EXECUTE_NODE, GObject) struct _GArrowExecuteNodeClass @@ -296,6 +308,7 @@ GArrowSchema * garrow_execute_node_get_output_schema(GArrowExecuteNode *node); #define GARROW_TYPE_EXECUTE_PLAN (garrow_execute_plan_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GArrowExecutePlan, garrow_execute_plan, GARROW, EXECUTE_PLAN, GObject) struct _GArrowExecutePlanClass @@ -365,10 +378,12 @@ GARROW_AVAILABLE_IN_6_0 gboolean garrow_execute_plan_wait(GArrowExecutePlan *plan, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowCastOptions * garrow_cast_options_new(void); #define GARROW_TYPE_SCALAR_AGGREGATE_OPTIONS (garrow_scalar_aggregate_options_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowScalarAggregateOptions, garrow_scalar_aggregate_options, GARROW, @@ -401,6 +416,7 @@ typedef enum { } GArrowCountMode; #define GARROW_TYPE_COUNT_OPTIONS (garrow_count_options_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GArrowCountOptions, garrow_count_options, GARROW, COUNT_OPTIONS, GArrowFunctionOptions) struct _GArrowCountOptionsClass @@ -428,6 +444,7 @@ typedef enum { } GArrowFilterNullSelectionBehavior; #define GARROW_TYPE_FILTER_OPTIONS (garrow_filter_options_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowFilterOptions, garrow_filter_options, GARROW, @@ -443,6 +460,7 @@ GArrowFilterOptions * garrow_filter_options_new(void); #define GARROW_TYPE_TAKE_OPTIONS (garrow_take_options_get_type()) +GARROW_AVAILABLE_IN_0_14 G_DECLARE_DERIVABLE_TYPE( GArrowTakeOptions, garrow_take_options, GARROW, TAKE_OPTIONS, GArrowFunctionOptions) struct _GArrowTakeOptionsClass @@ -487,6 +505,7 @@ typedef enum /**/ { } GArrowNullPlacement; #define GARROW_TYPE_ARRAY_SORT_OPTIONS (garrow_array_sort_options_get_type()) +GARROW_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GArrowArraySortOptions, garrow_array_sort_options, GARROW, @@ -506,6 +525,7 @@ garrow_array_sort_options_equal(GArrowArraySortOptions *options, GArrowArraySortOptions *other_options); #define GARROW_TYPE_SORT_KEY (garrow_sort_key_get_type()) +GARROW_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GArrowSortKey, garrow_sort_key, GARROW, SORT_KEY, GObject) struct _GArrowSortKeyClass { @@ -521,6 +541,7 @@ gboolean garrow_sort_key_equal(GArrowSortKey *sort_key, GArrowSortKey *other_sort_key); #define GARROW_TYPE_SORT_OPTIONS (garrow_sort_options_get_type()) +GARROW_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE( GArrowSortOptions, garrow_sort_options, GARROW, SORT_OPTIONS, GArrowFunctionOptions) struct _GArrowSortOptionsClass @@ -545,6 +566,7 @@ void garrow_sort_options_add_sort_key(GArrowSortOptions *options, GArrowSortKey *sort_key); #define GARROW_TYPE_SET_LOOKUP_OPTIONS (garrow_set_lookup_options_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GArrowSetLookupOptions, garrow_set_lookup_options, GARROW, @@ -560,6 +582,7 @@ GArrowSetLookupOptions * garrow_set_lookup_options_new(GArrowDatum *value_set); #define GARROW_TYPE_VARIANCE_OPTIONS (garrow_variance_options_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GArrowVarianceOptions, garrow_variance_options, GARROW, @@ -620,6 +643,7 @@ typedef enum { } GArrowRoundMode; #define GARROW_TYPE_ROUND_OPTIONS (garrow_round_options_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE( GArrowRoundOptions, garrow_round_options, GARROW, ROUND_OPTIONS, GArrowFunctionOptions) struct _GArrowRoundOptionsClass @@ -633,6 +657,7 @@ garrow_round_options_new(void); #define GARROW_TYPE_ROUND_TO_MULTIPLE_OPTIONS \ (garrow_round_to_multiple_options_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE(GArrowRoundToMultipleOptions, garrow_round_to_multiple_options, GARROW, @@ -648,6 +673,7 @@ GArrowRoundToMultipleOptions * garrow_round_to_multiple_options_new(void); #define GARROW_TYPE_MATCH_SUBSTRING_OPTIONS (garrow_match_substring_options_get_type()) +GARROW_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GArrowMatchSubstringOptions, garrow_match_substring_options, GARROW, @@ -683,6 +709,7 @@ typedef enum /*< underscore_name=garrow_utf8_normalize_form >*/ { } GArrowUTF8NormalizeForm; #define GARROW_TYPE_UTF8_NORMALIZE_OPTIONS (garrow_utf8_normalize_options_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowUTF8NormalizeOptions, garrow_utf8_normalize_options, GARROW, @@ -719,6 +746,7 @@ typedef enum { } GArrowQuantileInterpolation; #define GARROW_TYPE_QUANTILE_OPTIONS (garrow_quantile_options_get_type()) +GARROW_AVAILABLE_IN_9_0 G_DECLARE_DERIVABLE_TYPE(GArrowQuantileOptions, garrow_quantile_options, GARROW, @@ -745,6 +773,7 @@ garrow_quantile_options_set_qs(GArrowQuantileOptions *options, gsize n); #define GARROW_TYPE_INDEX_OPTIONS (garrow_index_options_get_type()) +GARROW_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE( GArrowIndexOptions, garrow_index_options, GARROW, INDEX_OPTIONS, GArrowFunctionOptions) struct _GArrowIndexOptionsClass @@ -782,6 +811,7 @@ typedef enum { } GArrowRankTiebreaker; #define GARROW_TYPE_RANK_OPTIONS (garrow_rank_options_get_type()) +GARROW_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE( GArrowRankOptions, garrow_rank_options, GARROW, RANK_OPTIONS, GArrowFunctionOptions) struct _GArrowRankOptionsClass @@ -805,18 +835,25 @@ GARROW_AVAILABLE_IN_12_0 void garrow_rank_options_add_sort_key(GArrowRankOptions *options, GArrowSortKey *sort_key); +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_array_cast(GArrowArray *array, GArrowDataType *target_data_type, GArrowCastOptions *options, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_array_unique(GArrowArray *array, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowDictionaryArray * garrow_array_dictionary_encode(GArrowArray *array, GError **error); + GARROW_AVAILABLE_IN_0_13 gint64 garrow_array_count(GArrowArray *array, GArrowCountOptions *options, GError **error); + GARROW_AVAILABLE_IN_0_13 GArrowStructArray * garrow_array_count_values(GArrowArray *array, GError **error); @@ -987,6 +1024,7 @@ garrow_record_batch_filter(GArrowRecordBatch *record_batch, GError **error); #define GARROW_TYPE_RUN_END_ENCODE_OPTIONS (garrow_run_end_encode_options_get_type()) +GARROW_AVAILABLE_IN_13_0 G_DECLARE_DERIVABLE_TYPE(GArrowRunEndEncodeOptions, garrow_run_end_encode_options, GARROW, @@ -1011,6 +1049,7 @@ GArrowArray * garrow_run_end_encoded_array_decode(GArrowRunEndEncodedArray *array, GError **error); #define GARROW_TYPE_STRPTIME_OPTIONS (garrow_strptime_options_get_type()) +GARROW_AVAILABLE_IN_16_0 G_DECLARE_DERIVABLE_TYPE(GArrowStrptimeOptions, garrow_strptime_options, GARROW, @@ -1026,6 +1065,7 @@ GArrowStrptimeOptions * garrow_strptime_options_new(void); #define GARROW_TYPE_STRFTIME_OPTIONS (garrow_strftime_options_get_type()) +GARROW_AVAILABLE_IN_16_0 G_DECLARE_DERIVABLE_TYPE(GArrowStrftimeOptions, garrow_strftime_options, GARROW, @@ -1041,6 +1081,7 @@ GArrowStrftimeOptions * garrow_strftime_options_new(void); #define GARROW_TYPE_SPLIT_PATTERN_OPTIONS (garrow_split_pattern_options_get_type()) +GARROW_AVAILABLE_IN_16_0 G_DECLARE_DERIVABLE_TYPE(GArrowSplitPatternOptions, garrow_split_pattern_options, GARROW, @@ -1056,6 +1097,7 @@ GArrowSplitPatternOptions * garrow_split_pattern_options_new(void); #define GARROW_TYPE_STRUCT_FIELD_OPTIONS (garrow_struct_field_options_get_type()) +GARROW_AVAILABLE_IN_16_0 G_DECLARE_DERIVABLE_TYPE(GArrowStructFieldOptions, garrow_struct_field_options, GARROW, diff --git a/c_glib/arrow-glib/datum.h b/c_glib/arrow-glib/datum.h index df5e9a1c2cf4f..fc9a2fe7ab907 100644 --- a/c_glib/arrow-glib/datum.h +++ b/c_glib/arrow-glib/datum.h @@ -28,6 +28,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_DATUM (garrow_datum_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE(GArrowDatum, garrow_datum, GARROW, DATUM, GObject) struct _GArrowDatumClass { @@ -60,6 +61,7 @@ garrow_datum_to_string(GArrowDatum *datum); /* GARROW_TYPE_NONE_DATUM */ #define GARROW_TYPE_SCALAR_DATUM (garrow_scalar_datum_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowScalarDatum, garrow_scalar_datum, GARROW, SCALAR_DATUM, GArrowDatum) struct _GArrowScalarDatumClass @@ -72,6 +74,7 @@ GArrowScalarDatum * garrow_scalar_datum_new(GArrowScalar *value); #define GARROW_TYPE_ARRAY_DATUM (garrow_array_datum_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE( GArrowArrayDatum, garrow_array_datum, GARROW, ARRAY_DATUM, GArrowDatum) struct _GArrowArrayDatumClass @@ -84,6 +87,7 @@ GArrowArrayDatum * garrow_array_datum_new(GArrowArray *value); #define GARROW_TYPE_CHUNKED_ARRAY_DATUM (garrow_chunked_array_datum_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE(GArrowChunkedArrayDatum, garrow_chunked_array_datum, GARROW, @@ -99,6 +103,7 @@ GArrowChunkedArrayDatum * garrow_chunked_array_datum_new(GArrowChunkedArray *value); #define GARROW_TYPE_RECORD_BATCH_DATUM (garrow_record_batch_datum_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchDatum, garrow_record_batch_datum, GARROW, @@ -114,6 +119,7 @@ GArrowRecordBatchDatum * garrow_record_batch_datum_new(GArrowRecordBatch *value); #define GARROW_TYPE_TABLE_DATUM (garrow_table_datum_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE( GArrowTableDatum, garrow_table_datum, GARROW, TABLE_DATUM, GArrowDatum) struct _GArrowTableDatumClass diff --git a/c_glib/arrow-glib/decimal.h b/c_glib/arrow-glib/decimal.h index b967fa36d5611..f64afa800a19b 100644 --- a/c_glib/arrow-glib/decimal.h +++ b/c_glib/arrow-glib/decimal.h @@ -27,6 +27,7 @@ G_BEGIN_DECLS /* Disabled because it conflicts with GARROW_TYPE_DECIMAL128 in GArrowType. */ /* #define GARROW_TYPE_DECIMAL128 (garrow_decimal128_get_type()) */ +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128, garrow_decimal128, GARROW, DECIMAL128, GObject) struct _GArrowDecimal128Class @@ -34,8 +35,10 @@ struct _GArrowDecimal128Class GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDecimal128 * garrow_decimal128_new_string(const gchar *data, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowDecimal128 * garrow_decimal128_new_integer(const gint64 data); GARROW_AVAILABLE_IN_3_0 @@ -62,25 +65,34 @@ GARROW_AVAILABLE_IN_0_12 gboolean garrow_decimal128_greater_than_or_equal(GArrowDecimal128 *decimal, GArrowDecimal128 *other_decimal); +GARROW_AVAILABLE_IN_ALL gchar * garrow_decimal128_to_string_scale(GArrowDecimal128 *decimal, gint32 scale); +GARROW_AVAILABLE_IN_ALL gchar * garrow_decimal128_to_string(GArrowDecimal128 *decimal); GARROW_AVAILABLE_IN_3_0 GBytes * garrow_decimal128_to_bytes(GArrowDecimal128 *decimal); +GARROW_AVAILABLE_IN_ALL void garrow_decimal128_abs(GArrowDecimal128 *decimal); +GARROW_AVAILABLE_IN_ALL void garrow_decimal128_negate(GArrowDecimal128 *decimal); +GARROW_AVAILABLE_IN_ALL gint64 garrow_decimal128_to_integer(GArrowDecimal128 *decimal); +GARROW_AVAILABLE_IN_ALL GArrowDecimal128 * garrow_decimal128_plus(GArrowDecimal128 *left, GArrowDecimal128 *right); +GARROW_AVAILABLE_IN_ALL GArrowDecimal128 * garrow_decimal128_minus(GArrowDecimal128 *left, GArrowDecimal128 *right); +GARROW_AVAILABLE_IN_ALL GArrowDecimal128 * garrow_decimal128_multiply(GArrowDecimal128 *left, GArrowDecimal128 *right); +GARROW_AVAILABLE_IN_ALL GArrowDecimal128 * garrow_decimal128_divide(GArrowDecimal128 *left, GArrowDecimal128 *right, @@ -95,6 +107,7 @@ garrow_decimal128_rescale(GArrowDecimal128 *decimal, /* Disabled because it conflicts with GARROW_TYPE_DECIMAL256 in GArrowType. */ /* #define GARROW_TYPE_DECIMAL256 (garrow_decimal256_get_type()) */ +GARROW_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256, garrow_decimal256, GARROW, DECIMAL256, GObject) struct _GArrowDecimal256Class diff --git a/c_glib/arrow-glib/error.h b/c_glib/arrow-glib/error.h index 4414417a1a25b..e0c6a591a021b 100644 --- a/c_glib/arrow-glib/error.h +++ b/c_glib/arrow-glib/error.h @@ -21,6 +21,8 @@ #include +#include + G_BEGIN_DECLS /** @@ -66,6 +68,7 @@ typedef enum { #define GARROW_ERROR garrow_error_quark() +GARROW_AVAILABLE_IN_ALL GQuark garrow_error_quark(void); diff --git a/c_glib/arrow-glib/error.hpp b/c_glib/arrow-glib/error.hpp index 90a0f3161878e..c2c9b3c63028a 100644 --- a/c_glib/arrow-glib/error.hpp +++ b/c_glib/arrow-glib/error.hpp @@ -23,18 +23,26 @@ #include +GARROW_EXTERN gboolean garrow_error_check(GError **error, const arrow::Status &status, const char *context); + +GARROW_EXTERN GArrowError garrow_error_from_status(const arrow::Status &status); + +GARROW_EXTERN arrow::StatusCode garrow_error_to_status_code(GError *error, arrow::StatusCode default_code); + +GARROW_EXTERN arrow::Status garrow_error_to_status(GError *error, arrow::StatusCode default_code, const char *context); namespace garrow { + GARROW_EXTERN gboolean check(GError **error, const arrow::Status &status, const char *context); diff --git a/c_glib/arrow-glib/expression.h b/c_glib/arrow-glib/expression.h index 3141ed4df18b7..5a6bfb456fc64 100644 --- a/c_glib/arrow-glib/expression.h +++ b/c_glib/arrow-glib/expression.h @@ -31,6 +31,7 @@ gboolean garrow_expression_equal(GArrowExpression *expression, GArrowExpression *other_expression); #define GARROW_TYPE_LITERAL_EXPRESSION (garrow_literal_expression_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GArrowLiteralExpression, garrow_literal_expression, GARROW, @@ -46,6 +47,7 @@ GArrowLiteralExpression * garrow_literal_expression_new(GArrowDatum *datum); #define GARROW_TYPE_FIELD_EXPRESSION (garrow_field_expression_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GArrowFieldExpression, garrow_field_expression, GARROW, @@ -61,6 +63,7 @@ GArrowFieldExpression * garrow_field_expression_new(const gchar *reference, GError **error); #define GARROW_TYPE_CALL_EXPRESSION (garrow_call_expression_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GArrowCallExpression, garrow_call_expression, GARROW, CALL_EXPRESSION, GArrowExpression) struct _GArrowCallExpressionClass diff --git a/c_glib/arrow-glib/expression.hpp b/c_glib/arrow-glib/expression.hpp index 60d5c9fe2f1bd..cc96badbe67aa 100644 --- a/c_glib/arrow-glib/expression.hpp +++ b/c_glib/arrow-glib/expression.hpp @@ -23,7 +23,10 @@ #include +GARROW_EXTERN GArrowExpression * garrow_expression_new_raw(const arrow::compute::Expression &arrow_expression); + +GARROW_EXTERN arrow::compute::Expression * garrow_expression_get_raw(GArrowExpression *expression); diff --git a/c_glib/arrow-glib/field.h b/c_glib/arrow-glib/field.h index 8de63757878c9..4be13f6135975 100644 --- a/c_glib/arrow-glib/field.h +++ b/c_glib/arrow-glib/field.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_FIELD (garrow_field_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowField, garrow_field, GARROW, FIELD, GObject) struct _GArrowFieldClass { @@ -34,8 +35,10 @@ GARROW_AVAILABLE_IN_6_0 GArrowField * garrow_field_import(gpointer c_abi_schema, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowField * garrow_field_new(const gchar *name, GArrowDataType *data_type); +GARROW_AVAILABLE_IN_ALL GArrowField * garrow_field_new_full(const gchar *name, GArrowDataType *data_type, gboolean nullable); @@ -43,18 +46,26 @@ GARROW_AVAILABLE_IN_6_0 gpointer garrow_field_export(GArrowField *field, GError **error); +GARROW_AVAILABLE_IN_ALL const gchar * garrow_field_get_name(GArrowField *field); + +GARROW_AVAILABLE_IN_ALL GArrowDataType * garrow_field_get_data_type(GArrowField *field); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_field_is_nullable(GArrowField *field); +GARROW_AVAILABLE_IN_ALL gboolean garrow_field_equal(GArrowField *field, GArrowField *other_field); +GARROW_AVAILABLE_IN_ALL gchar * garrow_field_to_string(GArrowField *field); + GARROW_AVAILABLE_IN_3_0 gchar * garrow_field_to_string_metadata(GArrowField *field, gboolean show_metadata); diff --git a/c_glib/arrow-glib/file-system.h b/c_glib/arrow-glib/file-system.h index d3d5fde73fe23..2e500672e145c 100644 --- a/c_glib/arrow-glib/file-system.h +++ b/c_glib/arrow-glib/file-system.h @@ -53,6 +53,7 @@ typedef enum { /* arrow::fs::FileInfo */ #define GARROW_TYPE_FILE_INFO (garrow_file_info_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowFileInfo, garrow_file_info, GARROW, FILE_INFO, GObject) struct _GArrowFileInfoClass { @@ -80,6 +81,7 @@ garrow_file_info_to_string(GArrowFileInfo *file_info); /* arrow::fs::FileSelector */ #define GARROW_TYPE_FILE_SELECTOR (garrow_file_selector_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE( GArrowFileSelector, garrow_file_selector, GARROW, FILE_SELECTOR, GObject) struct _GArrowFileSelectorClass @@ -90,6 +92,7 @@ struct _GArrowFileSelectorClass /* arrow::fs::FileSystem */ #define GARROW_TYPE_FILE_SYSTEM (garrow_file_system_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE( GArrowFileSystem, garrow_file_system, GARROW, FILE_SYSTEM, GObject) struct _GArrowFileSystemClass @@ -197,6 +200,7 @@ garrow_file_system_open_append_stream(GArrowFileSystem *file_system, /* arrow::fs::SubTreeFileSystem */ #define GARROW_TYPE_SUB_TREE_FILE_SYSTEM (garrow_sub_tree_file_system_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowSubTreeFileSystem, garrow_sub_tree_file_system, GARROW, @@ -215,6 +219,7 @@ garrow_sub_tree_file_system_new(const gchar *base_path, /* arrow::fs::SlowFileSystem */ #define GARROW_TYPE_SLOW_FILE_SYSTEM (garrow_slow_file_system_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowSlowFileSystem, garrow_slow_file_system, GARROW, @@ -244,6 +249,7 @@ garrow_slow_file_system_new_average_latency_and_seed(GArrowFileSystem *base_file gint32 seed); #define GARROW_TYPE_MOCK_FILE_SYSTEM (garrow_mock_file_system_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowMockFileSystem, garrow_mock_file_system, GARROW, @@ -255,6 +261,7 @@ struct _GArrowMockFileSystemClass }; #define GARROW_TYPE_HDFS_FILE_SYSTEM (garrow_hdfs_file_system_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowHDFSFileSystem, garrow_hdfs_file_system, GARROW, @@ -290,6 +297,7 @@ typedef enum { } GArrowS3LogLevel; #define GARROW_TYPE_S3_GLOBAL_OPTIONS (garrow_s3_global_options_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE( GArrowS3GlobalOptions, garrow_s3_global_options, GARROW, S3_GLOBAL_OPTIONS, GObject) struct _GArrowS3GlobalOptionsClass @@ -312,6 +320,7 @@ gboolean garrow_s3_finalize(GError **error); #define GARROW_TYPE_S3_FILE_SYSTEM (garrow_s3_file_system_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE( GArrowS3FileSystem, garrow_s3_file_system, GARROW, S3_FILE_SYSTEM, GArrowFileSystem) struct _GArrowS3FileSystemClass @@ -320,6 +329,7 @@ struct _GArrowS3FileSystemClass }; #define GARROW_TYPE_GCS_FILE_SYSTEM (garrow_gcs_file_system_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE( GArrowGCSFileSystem, garrow_gcs_file_system, GARROW, GCS_FILE_SYSTEM, GArrowFileSystem) struct _GArrowGCSFileSystemClass diff --git a/c_glib/arrow-glib/file-system.hpp b/c_glib/arrow-glib/file-system.hpp index f41fc6e9c75b0..c535958301c5c 100644 --- a/c_glib/arrow-glib/file-system.hpp +++ b/c_glib/arrow-glib/file-system.hpp @@ -23,28 +23,35 @@ #include +GARROW_EXTERN GArrowFileInfo * garrow_file_info_new_raw(const arrow::fs::FileInfo &arrow_file_info); +GARROW_EXTERN arrow::fs::FileInfo * garrow_file_info_get_raw(GArrowFileInfo *file_info); +GARROW_EXTERN GArrowFileSystem * garrow_file_system_new_raw(std::shared_ptr *arrow_file_system); +GARROW_EXTERN std::shared_ptr garrow_file_system_get_raw(GArrowFileSystem *file_system); +GARROW_EXTERN GArrowSubTreeFileSystem * garrow_sub_tree_file_system_new_raw( std::shared_ptr *arrow_file_system, GArrowFileSystem *base_file_system); +GARROW_EXTERN GArrowSlowFileSystem * garrow_slow_file_system_new_raw(std::shared_ptr *arrow_file_system, GArrowFileSystem *base_file_system); #ifdef ARROW_S3 +GARROW_EXTERN arrow::fs::S3GlobalOptions * garrow_s3_global_options_get_raw(GArrowS3GlobalOptions *options); #endif diff --git a/c_glib/arrow-glib/file.h b/c_glib/arrow-glib/file.h index 42afed139463c..799dd83b9c243 100644 --- a/c_glib/arrow-glib/file.h +++ b/c_glib/arrow-glib/file.h @@ -27,15 +27,22 @@ G_BEGIN_DECLS #define GARROW_TYPE_FILE (garrow_file_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_INTERFACE(GArrowFile, garrow_file, GARROW, FILE, GObject) +GARROW_AVAILABLE_IN_ALL gboolean garrow_file_close(GArrowFile *file, GError **error); + GARROW_AVAILABLE_IN_0_13 gboolean garrow_file_is_closed(GArrowFile *file); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_file_tell(GArrowFile *file, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowFileMode garrow_file_get_mode(GArrowFile *file); diff --git a/c_glib/arrow-glib/input-stream.cpp b/c_glib/arrow-glib/input-stream.cpp index 03a3f03fff7ce..52c79993e4ca8 100644 --- a/c_glib/arrow-glib/input-stream.cpp +++ b/c_glib/arrow-glib/input-stream.cpp @@ -35,6 +35,22 @@ #include #include +static std::shared_ptr +garrow_input_stream_get_raw_file_interface(GArrowFile *file) +{ + auto input_stream = GARROW_INPUT_STREAM(file); + auto arrow_input_stream = garrow_input_stream_get_raw(input_stream); + return arrow_input_stream; +} + +static std::shared_ptr +garrow_input_stream_get_raw_readable_interface(GArrowReadable *readable) +{ + auto input_stream = GARROW_INPUT_STREAM(readable); + auto arrow_input_stream = garrow_input_stream_get_raw(input_stream); + return arrow_input_stream; +} + G_BEGIN_DECLS /** @@ -71,28 +87,12 @@ enum { PROP_INPUT_STREAM = 1 }; -static std::shared_ptr -garrow_input_stream_get_raw_file_interface(GArrowFile *file) -{ - auto input_stream = GARROW_INPUT_STREAM(file); - auto arrow_input_stream = garrow_input_stream_get_raw(input_stream); - return arrow_input_stream; -} - static void garrow_input_stream_file_interface_init(GArrowFileInterface *iface) { iface->get_raw = garrow_input_stream_get_raw_file_interface; } -static std::shared_ptr -garrow_input_stream_get_raw_readable_interface(GArrowReadable *readable) -{ - auto input_stream = GARROW_INPUT_STREAM(readable); - auto arrow_input_stream = garrow_input_stream_get_raw(input_stream); - return arrow_input_stream; -} - static void garrow_input_stream_readable_interface_init(GArrowReadableInterface *iface) { diff --git a/c_glib/arrow-glib/input-stream.h b/c_glib/arrow-glib/input-stream.h index 3e2a2ecdbd4fa..676f2f44b0041 100644 --- a/c_glib/arrow-glib/input-stream.h +++ b/c_glib/arrow-glib/input-stream.h @@ -30,6 +30,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_INPUT_STREAM (garrow_input_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowInputStream, garrow_input_stream, GARROW, INPUT_STREAM, GInputStream) struct _GArrowInputStreamClass @@ -37,16 +38,22 @@ struct _GArrowInputStreamClass GInputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL gboolean garrow_input_stream_advance(GArrowInputStream *input_stream, gint64 n_bytes, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_input_stream_align(GArrowInputStream *input_stream, gint32 alignment, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowTensor * garrow_input_stream_read_tensor(GArrowInputStream *input_stream, GError **error); + GARROW_AVAILABLE_IN_1_0 GArrowRecordBatch * garrow_input_stream_read_record_batch(GArrowInputStream *input_stream, @@ -55,6 +62,7 @@ garrow_input_stream_read_record_batch(GArrowInputStream *input_stream, GError **error); #define GARROW_TYPE_SEEKABLE_INPUT_STREAM (garrow_seekable_input_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowSeekableInputStream, garrow_seekable_input_stream, GARROW, @@ -65,12 +73,17 @@ struct _GArrowSeekableInputStreamClass GArrowInputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL guint64 garrow_seekable_input_stream_get_size(GArrowSeekableInputStream *input_stream, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_seekable_input_stream_get_support_zero_copy( GArrowSeekableInputStream *input_stream); + +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_seekable_input_stream_read_at(GArrowSeekableInputStream *input_stream, gint64 position, @@ -89,6 +102,7 @@ garrow_seekable_input_stream_peek(GArrowSeekableInputStream *input_stream, GError **error); #define GARROW_TYPE_BUFFER_INPUT_STREAM (garrow_buffer_input_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowBufferInputStream, garrow_buffer_input_stream, GARROW, @@ -99,13 +113,16 @@ struct _GArrowBufferInputStreamClass GArrowSeekableInputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBufferInputStream * garrow_buffer_input_stream_new(GArrowBuffer *buffer); +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_buffer_input_stream_get_buffer(GArrowBufferInputStream *input_stream); #define GARROW_TYPE_FILE_INPUT_STREAM (garrow_file_input_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowFileInputStream, garrow_file_input_stream, GARROW, @@ -116,15 +133,21 @@ struct _GArrowFileInputStreamClass GArrowSeekableInputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowFileInputStream * garrow_file_input_stream_new(const gchar *path, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowFileInputStream * garrow_file_input_stream_new_file_descriptor(gint file_descriptor, GError **error); + +GARROW_AVAILABLE_IN_ALL gint garrow_file_input_stream_get_file_descriptor(GArrowFileInputStream *stream); #define GARROW_TYPE_MEMORY_MAPPED_INPUT_STREAM \ (garrow_memory_mapped_input_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowMemoryMappedInputStream, garrow_memory_mapped_input_stream, GARROW, @@ -135,10 +158,12 @@ struct _GArrowMemoryMappedInputStreamClass GArrowSeekableInputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowMemoryMappedInputStream * garrow_memory_mapped_input_stream_new(const gchar *path, GError **error); #define GARROW_TYPE_GIO_INPUT_STREAM (garrow_gio_input_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowGIOInputStream, garrow_gio_input_stream, GARROW, @@ -149,15 +174,19 @@ struct _GArrowGIOInputStreamClass GArrowSeekableInputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowGIOInputStream * garrow_gio_input_stream_new(GInputStream *gio_input_stream); + #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL G_GNUC_DEPRECATED GInputStream * garrow_gio_input_stream_get_raw(GArrowGIOInputStream *input_stream); #endif #define GARROW_TYPE_COMPRESSED_INPUT_STREAM (garrow_compressed_input_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowCompressedInputStream, garrow_compressed_input_stream, GARROW, @@ -168,6 +197,7 @@ struct _GArrowCompressedInputStreamClass GArrowInputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowCompressedInputStream * garrow_compressed_input_stream_new(GArrowCodec *codec, GArrowInputStream *raw, diff --git a/c_glib/arrow-glib/input-stream.hpp b/c_glib/arrow-glib/input-stream.hpp index 7ae759370ddbd..0400398c4777f 100644 --- a/c_glib/arrow-glib/input-stream.hpp +++ b/c_glib/arrow-glib/input-stream.hpp @@ -26,34 +26,48 @@ #include +GARROW_EXTERN GArrowInputStream * garrow_input_stream_new_raw(std::shared_ptr *arrow_input_stream); + +GARROW_EXTERN std::shared_ptr garrow_input_stream_get_raw(GArrowInputStream *input_stream); +GARROW_EXTERN GArrowSeekableInputStream * garrow_seekable_input_stream_new_raw( std::shared_ptr *arrow_random_access_file); + +GARROW_EXTERN std::shared_ptr garrow_seekable_input_stream_get_raw(GArrowSeekableInputStream *input_stream); +GARROW_EXTERN GArrowBufferInputStream * garrow_buffer_input_stream_new_raw( std::shared_ptr *arrow_buffer_reader, GArrowBuffer *buffer); + +GARROW_EXTERN std::shared_ptr garrow_buffer_input_stream_get_raw(GArrowBufferInputStream *input_stream); +GARROW_EXTERN GArrowFileInputStream * garrow_file_input_stream_new_raw(std::shared_ptr *arrow_stream); +GARROW_EXTERN GArrowMemoryMappedInputStream * garrow_memory_mapped_input_stream_new_raw( std::shared_ptr *arrow_stream); +GARROW_EXTERN GArrowCompressedInputStream * garrow_compressed_input_stream_new_raw( std::shared_ptr *arrow_raw, GArrowCodec *codec, GArrowInputStream *raw); + +GARROW_EXTERN std::shared_ptr garrow_compressed_input_stream_get_raw(GArrowCompressedInputStream *stream); diff --git a/c_glib/arrow-glib/interval.h b/c_glib/arrow-glib/interval.h index a6c9e1ff1e1ef..8c23b9a509bb4 100644 --- a/c_glib/arrow-glib/interval.h +++ b/c_glib/arrow-glib/interval.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_DAY_MILLISECOND (garrow_day_millisecond_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE( GArrowDayMillisecond, garrow_day_millisecond, GARROW, DAY_MILLISECOND, GObject) @@ -47,6 +48,7 @@ garrow_day_millisecond_less_than(GArrowDayMillisecond *day_millisecond, GArrowDayMillisecond *other_day_millisecond); #define GARROW_TYPE_MONTH_DAY_NANO (garrow_month_day_nano_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE( GArrowMonthDayNano, garrow_month_day_nano, GARROW, MONTH_DAY_NANO, GObject) diff --git a/c_glib/arrow-glib/ipc-options.h b/c_glib/arrow-glib/ipc-options.h index 418b08f080152..1ddff059d2faf 100644 --- a/c_glib/arrow-glib/ipc-options.h +++ b/c_glib/arrow-glib/ipc-options.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_READ_OPTIONS (garrow_read_options_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE( GArrowReadOptions, garrow_read_options, GARROW, READ_OPTIONS, GObject) struct _GArrowReadOptionsClass @@ -46,6 +47,7 @@ garrow_read_options_set_included_fields(GArrowReadOptions *options, gsize n_fields); #define GARROW_TYPE_WRITE_OPTIONS (garrow_write_options_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE( GArrowWriteOptions, garrow_write_options, GARROW, WRITE_OPTIONS, GObject) struct _GArrowWriteOptionsClass diff --git a/c_glib/arrow-glib/ipc-options.hpp b/c_glib/arrow-glib/ipc-options.hpp index f57fbd3c11e5a..838d05d41dbac 100644 --- a/c_glib/arrow-glib/ipc-options.hpp +++ b/c_glib/arrow-glib/ipc-options.hpp @@ -23,10 +23,14 @@ #include +GARROW_EXTERN arrow::ipc::IpcReadOptions * garrow_read_options_get_raw(GArrowReadOptions *options); + +GARROW_EXTERN arrow::ipc::DictionaryMemo * garrow_read_options_get_dictionary_memo_raw(GArrowReadOptions *options); +GARROW_EXTERN arrow::ipc::IpcWriteOptions * garrow_write_options_get_raw(GArrowWriteOptions *options); diff --git a/c_glib/arrow-glib/local-file-system.h b/c_glib/arrow-glib/local-file-system.h index 9af4f8e8b168d..6ad2ee9f231ab 100644 --- a/c_glib/arrow-glib/local-file-system.h +++ b/c_glib/arrow-glib/local-file-system.h @@ -27,6 +27,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_LOCAL_FILE_SYSTEM_OPTIONS \ (garrow_local_file_system_options_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowLocalFileSystemOptions, garrow_local_file_system_options, GARROW, @@ -44,6 +45,7 @@ garrow_local_file_system_options_new(void); /* arrow::fs::LocalFileSystem */ #define GARROW_TYPE_LOCAL_FILE_SYSTEM (garrow_local_file_system_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowLocalFileSystem, garrow_local_file_system, GARROW, diff --git a/c_glib/arrow-glib/memory-pool.h b/c_glib/arrow-glib/memory-pool.h index de2a5d717a183..7da15a9eb1b47 100644 --- a/c_glib/arrow-glib/memory-pool.h +++ b/c_glib/arrow-glib/memory-pool.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GARROW_TYPE_MEMORY_POOL (garrow_memory_pool_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowMemoryPool, garrow_memory_pool, GARROW, MEMORY_POOL, GObject) struct _GArrowMemoryPoolClass @@ -31,12 +34,19 @@ struct _GArrowMemoryPoolClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowMemoryPool * garrow_memory_pool_default(); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_memory_pool_get_bytes_allocated(GArrowMemoryPool *memory_pool); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_memory_pool_get_max_memory(GArrowMemoryPool *memory_pool); + +GARROW_AVAILABLE_IN_ALL gchar * garrow_memory_pool_get_backend_name(GArrowMemoryPool *memory_pool); diff --git a/c_glib/arrow-glib/meson.build b/c_glib/arrow-glib/meson.build index 11fe8f61a3c2d..fd32b35badcb1 100644 --- a/c_glib/arrow-glib/meson.build +++ b/c_glib/arrow-glib/meson.build @@ -249,6 +249,7 @@ libarrow_glib = library('arrow-glib', dependencies: dependencies, implicit_include_directories: false, include_directories: base_include_directories, + cpp_args: ['-DGARROW_COMPILATION'], soversion: so_version, version: library_version) arrow_glib = declare_dependency(link_with: libarrow_glib, diff --git a/c_glib/arrow-glib/orc-file-reader.h b/c_glib/arrow-glib/orc-file-reader.h index 20089eb2866c6..4eb3df5242e48 100644 --- a/c_glib/arrow-glib/orc-file-reader.h +++ b/c_glib/arrow-glib/orc-file-reader.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_ORC_FILE_READER (garrow_orc_file_reader_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowORCFileReader, garrow_orc_file_reader, GARROW, ORC_FILE_READER, GObject) struct _GArrowORCFileReaderClass @@ -31,10 +32,12 @@ struct _GArrowORCFileReaderClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowORCFileReader * garrow_orc_file_reader_new(GArrowSeekableInputStream *file, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_orc_file_reader_set_field_indices) void garrow_orc_file_reader_set_field_indexes(GArrowORCFileReader *reader, @@ -47,6 +50,7 @@ garrow_orc_file_reader_set_field_indices(GArrowORCFileReader *reader, const gint *field_indices, guint n_field_indices); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_orc_file_reader_get_field_indices) const gint * garrow_orc_file_reader_get_field_indexes(GArrowORCFileReader *reader, @@ -56,14 +60,24 @@ GARROW_AVAILABLE_IN_0_12 const gint * garrow_orc_file_reader_get_field_indices(GArrowORCFileReader *reader, guint *n_field_indices); + +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_orc_file_reader_read_type(GArrowORCFileReader *reader, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowTable * garrow_orc_file_reader_read_stripes(GArrowORCFileReader *reader, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowRecordBatch * garrow_orc_file_reader_read_stripe(GArrowORCFileReader *reader, gint64 i, GError **error); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_orc_file_reader_get_n_stripes(GArrowORCFileReader *reader); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_orc_file_reader_get_n_rows(GArrowORCFileReader *reader); diff --git a/c_glib/arrow-glib/output-stream.cpp b/c_glib/arrow-glib/output-stream.cpp index 83de2eb38a72a..d9bdf7ad8b786 100644 --- a/c_glib/arrow-glib/output-stream.cpp +++ b/c_glib/arrow-glib/output-stream.cpp @@ -33,6 +33,22 @@ #include #include +static std::shared_ptr +garrow_output_stream_get_raw_file_interface(GArrowFile *file) +{ + auto output_stream = GARROW_OUTPUT_STREAM(file); + auto arrow_output_stream = garrow_output_stream_get_raw(output_stream); + return arrow_output_stream; +} + +static std::shared_ptr +garrow_output_stream_get_raw_writable_interface(GArrowWritable *writable) +{ + auto output_stream = GARROW_OUTPUT_STREAM(writable); + auto arrow_output_stream = garrow_output_stream_get_raw(output_stream); + return arrow_output_stream; +} + G_BEGIN_DECLS /** @@ -65,28 +81,12 @@ enum { PROP_OUTPUT_STREAM }; -static std::shared_ptr -garrow_output_stream_get_raw_file_interface(GArrowFile *file) -{ - auto output_stream = GARROW_OUTPUT_STREAM(file); - auto arrow_output_stream = garrow_output_stream_get_raw(output_stream); - return arrow_output_stream; -} - static void garrow_output_stream_file_interface_init(GArrowFileInterface *iface) { iface->get_raw = garrow_output_stream_get_raw_file_interface; } -static std::shared_ptr -garrow_output_stream_get_raw_writable_interface(GArrowWritable *writable) -{ - auto output_stream = GARROW_OUTPUT_STREAM(writable); - auto arrow_output_stream = garrow_output_stream_get_raw(output_stream); - return arrow_output_stream; -} - static void garrow_output_stream_writable_interface_init(GArrowWritableInterface *iface) { diff --git a/c_glib/arrow-glib/output-stream.h b/c_glib/arrow-glib/output-stream.h index 1b18c08c14a5f..5c8b4b9374fc6 100644 --- a/c_glib/arrow-glib/output-stream.h +++ b/c_glib/arrow-glib/output-stream.h @@ -30,6 +30,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_OUTPUT_STREAM (garrow_output_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowOutputStream, garrow_output_stream, GARROW, OUTPUT_STREAM, GObject) struct _GArrowOutputStreamClass @@ -37,8 +38,11 @@ struct _GArrowOutputStreamClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL gboolean garrow_output_stream_align(GArrowOutputStream *stream, gint32 alignment, GError **error); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_output_stream_write_tensor(GArrowOutputStream *stream, GArrowTensor *tensor, @@ -51,6 +55,7 @@ garrow_output_stream_write_record_batch(GArrowOutputStream *stream, GError **error); #define GARROW_TYPE_FILE_OUTPUT_STREAM (garrow_file_output_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowFileOutputStream, garrow_file_output_stream, GARROW, @@ -61,10 +66,12 @@ struct _GArrowFileOutputStreamClass GArrowOutputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowFileOutputStream * garrow_file_output_stream_new(const gchar *path, gboolean append, GError **error); #define GARROW_TYPE_BUFFER_OUTPUT_STREAM (garrow_buffer_output_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowBufferOutputStream, garrow_buffer_output_stream, GARROW, @@ -75,10 +82,12 @@ struct _GArrowBufferOutputStreamClass GArrowOutputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBufferOutputStream * garrow_buffer_output_stream_new(GArrowResizableBuffer *buffer); #define GARROW_TYPE_GIO_OUTPUT_STREAM (garrow_gio_output_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowGIOOutputStream, garrow_gio_output_stream, GARROW, @@ -89,15 +98,19 @@ struct _GArrowGIOOutputStreamClass GArrowOutputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowGIOOutputStream * garrow_gio_output_stream_new(GOutputStream *gio_output_stream); + #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL G_GNUC_DEPRECATED GOutputStream * garrow_gio_output_stream_get_raw(GArrowGIOOutputStream *output_stream); #endif #define GARROW_TYPE_COMPRESSED_OUTPUT_STREAM (garrow_compressed_output_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowCompressedOutputStream, garrow_compressed_output_stream, GARROW, @@ -108,6 +121,7 @@ struct _GArrowCompressedOutputStreamClass GArrowOutputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowCompressedOutputStream * garrow_compressed_output_stream_new(GArrowCodec *codec, GArrowOutputStream *raw, diff --git a/c_glib/arrow-glib/output-stream.hpp b/c_glib/arrow-glib/output-stream.hpp index e41c65da88d82..515d969efc245 100644 --- a/c_glib/arrow-glib/output-stream.hpp +++ b/c_glib/arrow-glib/output-stream.hpp @@ -25,23 +25,32 @@ #include +GARROW_EXTERN GArrowOutputStream * garrow_output_stream_new_raw( std::shared_ptr *arrow_output_stream); + +GARROW_EXTERN std::shared_ptr garrow_output_stream_get_raw(GArrowOutputStream *output_stream); +GARROW_EXTERN GArrowFileOutputStream * garrow_file_output_stream_new_raw( std::shared_ptr *arrow_file_output_stream); + +GARROW_EXTERN GArrowBufferOutputStream * garrow_buffer_output_stream_new_raw( std::shared_ptr *arrow_buffer_output_stream); +GARROW_EXTERN GArrowCompressedOutputStream * garrow_compressed_output_stream_new_raw( std::shared_ptr *arrow_raw, GArrowCodec *codec, GArrowOutputStream *raw); + +GARROW_EXTERN std::shared_ptr garrow_compressed_output_stream_get_raw(GArrowCompressedOutputStream *stream); diff --git a/c_glib/arrow-glib/readable.h b/c_glib/arrow-glib/readable.h index d0b1f5b6a99ee..266b45849057e 100644 --- a/c_glib/arrow-glib/readable.h +++ b/c_glib/arrow-glib/readable.h @@ -25,10 +25,13 @@ G_BEGIN_DECLS #define GARROW_TYPE_READABLE (garrow_readable_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_INTERFACE(GArrowReadable, garrow_readable, GARROW, READABLE, GObject) +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_readable_read(GArrowReadable *readable, gint64 n_bytes, GError **error); + GARROW_AVAILABLE_IN_0_17 GBytes * garrow_readable_read_bytes(GArrowReadable *readable, gint64 n_bytes, GError **error); diff --git a/c_glib/arrow-glib/reader.h b/c_glib/arrow-glib/reader.h index 96e4c5bbb5890..5401aa3bb1fc5 100644 --- a/c_glib/arrow-glib/reader.h +++ b/c_glib/arrow-glib/reader.h @@ -29,6 +29,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_RECORD_BATCH_READER (garrow_record_batch_reader_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchReader, garrow_record_batch_reader, GARROW, @@ -53,22 +54,29 @@ GARROW_AVAILABLE_IN_6_0 gpointer garrow_record_batch_reader_export(GArrowRecordBatchReader *reader, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_record_batch_reader_get_schema(GArrowRecordBatchReader *reader); + #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL G_GNUC_DEPRECATED_FOR(garrow_record_batch_reader_read_next) GArrowRecordBatch * garrow_record_batch_reader_get_next_record_batch(GArrowRecordBatchReader *reader, GError **error); #endif #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL G_GNUC_DEPRECATED_FOR(garrow_record_batch_reader_read_next) GArrowRecordBatch * garrow_record_batch_reader_read_next_record_batch(GArrowRecordBatchReader *reader, GError **error); #endif + +GARROW_AVAILABLE_IN_ALL GArrowRecordBatch * garrow_record_batch_reader_read_next(GArrowRecordBatchReader *reader, GError **error); + GARROW_AVAILABLE_IN_6_0 GArrowTable * garrow_record_batch_reader_read_all(GArrowRecordBatchReader *reader, GError **error); @@ -78,6 +86,7 @@ GList * garrow_record_batch_reader_get_sources(GArrowRecordBatchReader *reader); #define GARROW_TYPE_TABLE_BATCH_READER (garrow_table_batch_reader_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTableBatchReader, garrow_table_batch_reader, GARROW, @@ -88,6 +97,7 @@ struct _GArrowTableBatchReaderClass GArrowRecordBatchReaderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTableBatchReader * garrow_table_batch_reader_new(GArrowTable *table); @@ -98,6 +108,7 @@ garrow_table_batch_reader_set_max_chunk_size(GArrowTableBatchReader *reader, #define GARROW_TYPE_RECORD_BATCH_STREAM_READER \ (garrow_record_batch_stream_reader_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchStreamReader, garrow_record_batch_stream_reader, GARROW, @@ -108,10 +119,12 @@ struct _GArrowRecordBatchStreamReaderClass GArrowRecordBatchReaderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowRecordBatchStreamReader * garrow_record_batch_stream_reader_new(GArrowInputStream *stream, GError **error); #define GARROW_TYPE_RECORD_BATCH_FILE_READER (garrow_record_batch_file_reader_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchFileReader, garrow_record_batch_file_reader, GARROW, @@ -122,28 +135,39 @@ struct _GArrowRecordBatchFileReaderClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowRecordBatchFileReader * garrow_record_batch_file_reader_new(GArrowSeekableInputStream *file, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_record_batch_file_reader_get_schema(GArrowRecordBatchFileReader *reader); + +GARROW_AVAILABLE_IN_ALL guint garrow_record_batch_file_reader_get_n_record_batches(GArrowRecordBatchFileReader *reader); + +GARROW_AVAILABLE_IN_ALL GArrowMetadataVersion garrow_record_batch_file_reader_get_version(GArrowRecordBatchFileReader *reader); + #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL G_GNUC_DEPRECATED_FOR(garrow_record_batch_file_reader_read_record_batch) GArrowRecordBatch * garrow_record_batch_file_reader_get_record_batch(GArrowRecordBatchFileReader *reader, guint i, GError **error); #endif + +GARROW_AVAILABLE_IN_ALL GArrowRecordBatch * garrow_record_batch_file_reader_read_record_batch(GArrowRecordBatchFileReader *reader, guint i, GError **error); #define GARROW_TYPE_FEATHER_FILE_READER (garrow_feather_file_reader_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowFeatherFileReader, garrow_feather_file_reader, GARROW, @@ -154,18 +178,26 @@ struct _GArrowFeatherFileReaderClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowFeatherFileReader * garrow_feather_file_reader_new(GArrowSeekableInputStream *file, GError **error); +GARROW_AVAILABLE_IN_ALL gint garrow_feather_file_reader_get_version(GArrowFeatherFileReader *reader); + +GARROW_AVAILABLE_IN_ALL GArrowTable * garrow_feather_file_reader_read(GArrowFeatherFileReader *reader, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowTable * garrow_feather_file_reader_read_indices(GArrowFeatherFileReader *reader, const gint *indices, guint n_indices, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowTable * garrow_feather_file_reader_read_names(GArrowFeatherFileReader *reader, const gchar **names, @@ -173,6 +205,7 @@ garrow_feather_file_reader_read_names(GArrowFeatherFileReader *reader, GError **error); #define GARROW_TYPE_CSV_READ_OPTIONS (garrow_csv_read_options_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowCSVReadOptions, garrow_csv_read_options, GARROW, CSV_READ_OPTIONS, GObject) struct _GArrowCSVReadOptionsClass @@ -180,16 +213,23 @@ struct _GArrowCSVReadOptionsClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowCSVReadOptions * garrow_csv_read_options_new(void); + +GARROW_AVAILABLE_IN_ALL void garrow_csv_read_options_add_column_type(GArrowCSVReadOptions *options, const gchar *name, GArrowDataType *data_type); +GARROW_AVAILABLE_IN_ALL void garrow_csv_read_options_add_schema(GArrowCSVReadOptions *options, GArrowSchema *schema); + +GARROW_AVAILABLE_IN_ALL GHashTable * garrow_csv_read_options_get_column_types(GArrowCSVReadOptions *options); + GARROW_AVAILABLE_IN_0_14 void garrow_csv_read_options_set_null_values(GArrowCSVReadOptions *options, @@ -251,16 +291,20 @@ garrow_csv_read_options_add_timestamp_parser(GArrowCSVReadOptions *options, GArrowTimestampParser *parser); #define GARROW_TYPE_CSV_READER (garrow_csv_reader_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowCSVReader, garrow_csv_reader, GARROW, CSV_READER, GObject) struct _GArrowCSVReaderClass { GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowCSVReader * garrow_csv_reader_new(GArrowInputStream *input, GArrowCSVReadOptions *options, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowTable * garrow_csv_reader_read(GArrowCSVReader *reader, GError **error); @@ -279,6 +323,7 @@ typedef enum { } GArrowJSONReadUnexpectedFieldBehavior; #define GARROW_TYPE_JSON_READ_OPTIONS (garrow_json_read_options_get_type()) +GARROW_AVAILABLE_IN_0_14 G_DECLARE_DERIVABLE_TYPE( GArrowJSONReadOptions, garrow_json_read_options, GARROW, JSON_READ_OPTIONS, GObject) struct _GArrowJSONReadOptionsClass @@ -291,6 +336,7 @@ GArrowJSONReadOptions * garrow_json_read_options_new(void); #define GARROW_TYPE_JSON_READER (garrow_json_reader_get_type()) +GARROW_AVAILABLE_IN_0_14 G_DECLARE_DERIVABLE_TYPE( GArrowJSONReader, garrow_json_reader, GARROW, JSON_READER, GObject) struct _GArrowJSONReaderClass diff --git a/c_glib/arrow-glib/reader.hpp b/c_glib/arrow-glib/reader.hpp index 192497ef52e31..beec6766af2e6 100644 --- a/c_glib/arrow-glib/reader.hpp +++ b/c_glib/arrow-glib/reader.hpp @@ -27,42 +27,61 @@ #include +GARROW_EXTERN GArrowRecordBatchReader * garrow_record_batch_reader_new_raw( std::shared_ptr *arrow_reader, GList *sources); + +GARROW_EXTERN std::shared_ptr garrow_record_batch_reader_get_raw(GArrowRecordBatchReader *reader); +GARROW_EXTERN GArrowTableBatchReader * garrow_table_batch_reader_new_raw(std::shared_ptr *arrow_reader, GArrowTable *table); + +GARROW_EXTERN std::shared_ptr garrow_table_batch_reader_get_raw(GArrowTableBatchReader *reader); +GARROW_EXTERN GArrowRecordBatchStreamReader * garrow_record_batch_stream_reader_new_raw( std::shared_ptr *arrow_reader); +GARROW_EXTERN GArrowRecordBatchFileReader * garrow_record_batch_file_reader_new_raw( std::shared_ptr *arrow_reader); + +GARROW_EXTERN std::shared_ptr garrow_record_batch_file_reader_get_raw(GArrowRecordBatchFileReader *reader); +GARROW_EXTERN GArrowFeatherFileReader * garrow_feather_file_reader_new_raw( std::shared_ptr *arrow_reader); + +GARROW_EXTERN std::shared_ptr garrow_feather_file_reader_get_raw(GArrowFeatherFileReader *reader); +GARROW_EXTERN GArrowCSVReader * garrow_csv_reader_new_raw(std::shared_ptr *arrow_reader, GArrowInputStream *input); + +GARROW_EXTERN std::shared_ptr garrow_csv_reader_get_raw(GArrowCSVReader *reader); +GARROW_EXTERN GArrowJSONReader * garrow_json_reader_new_raw(std::shared_ptr *arrow_reader, GArrowInputStream *input); + +GARROW_EXTERN std::shared_ptr garrow_json_reader_get_raw(GArrowJSONReader *reader); diff --git a/c_glib/arrow-glib/record-batch.h b/c_glib/arrow-glib/record-batch.h index 3c995658224cb..e7ffd83795ed4 100644 --- a/c_glib/arrow-glib/record-batch.h +++ b/c_glib/arrow-glib/record-batch.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_RECORD_BATCH (garrow_record_batch_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowRecordBatch, garrow_record_batch, GARROW, RECORD_BATCH, GObject) struct _GArrowRecordBatchClass @@ -37,6 +38,7 @@ GARROW_AVAILABLE_IN_6_0 GArrowRecordBatch * garrow_record_batch_import(gpointer c_abi_array, GArrowSchema *schema, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowRecordBatch * garrow_record_batch_new(GArrowSchema *schema, guint32 n_rows, @@ -50,6 +52,7 @@ garrow_record_batch_export(GArrowRecordBatch *record_batch, gpointer *c_abi_schema, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_record_batch_equal(GArrowRecordBatch *record_batch, GArrowRecordBatch *other_record_batch); @@ -59,28 +62,43 @@ garrow_record_batch_equal_metadata(GArrowRecordBatch *record_batch, GArrowRecordBatch *other_record_batch, gboolean check_metadata); +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_record_batch_get_schema(GArrowRecordBatch *record_batch); + GARROW_AVAILABLE_IN_0_15 GArrowArray * garrow_record_batch_get_column_data(GArrowRecordBatch *record_batch, gint i); + +GARROW_AVAILABLE_IN_ALL const gchar * garrow_record_batch_get_column_name(GArrowRecordBatch *record_batch, gint i); + +GARROW_AVAILABLE_IN_ALL guint garrow_record_batch_get_n_columns(GArrowRecordBatch *record_batch); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_record_batch_get_n_rows(GArrowRecordBatch *record_batch); + +GARROW_AVAILABLE_IN_ALL GArrowRecordBatch * garrow_record_batch_slice(GArrowRecordBatch *record_batch, gint64 offset, gint64 length); +GARROW_AVAILABLE_IN_ALL gchar * garrow_record_batch_to_string(GArrowRecordBatch *record_batch, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowRecordBatch * garrow_record_batch_add_column(GArrowRecordBatch *record_batch, guint i, GArrowField *field, GArrowArray *column, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowRecordBatch * garrow_record_batch_remove_column(GArrowRecordBatch *record_batch, guint i, @@ -92,6 +110,7 @@ garrow_record_batch_serialize(GArrowRecordBatch *record_batch, GError **error); #define GARROW_TYPE_RECORD_BATCH_ITERATOR (garrow_record_batch_iterator_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchIterator, garrow_record_batch_iterator, GARROW, diff --git a/c_glib/arrow-glib/record-batch.hpp b/c_glib/arrow-glib/record-batch.hpp index 4c3e5e8a78231..75c0432b390ba 100644 --- a/c_glib/arrow-glib/record-batch.hpp +++ b/c_glib/arrow-glib/record-batch.hpp @@ -23,13 +23,18 @@ #include +GARROW_EXTERN GArrowRecordBatch * garrow_record_batch_new_raw(std::shared_ptr *arrow_record_batch); + +GARROW_EXTERN std::shared_ptr garrow_record_batch_get_raw(GArrowRecordBatch *record_batch); +GARROW_EXTERN GArrowRecordBatchIterator * garrow_record_batch_iterator_new_raw(arrow::RecordBatchIterator *arrow_iterator); +GARROW_EXTERN arrow::RecordBatchIterator * garrow_record_batch_iterator_get_raw(GArrowRecordBatchIterator *iterator); diff --git a/c_glib/arrow-glib/scalar.h b/c_glib/arrow-glib/scalar.h index b4a6229c62fd1..5f9015d29c61c 100644 --- a/c_glib/arrow-glib/scalar.h +++ b/c_glib/arrow-glib/scalar.h @@ -25,6 +25,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_SCALAR (garrow_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowScalar, garrow_scalar, GARROW, SCALAR, GObject) struct _GArrowScalarClass { @@ -64,6 +65,7 @@ garrow_scalar_cast(GArrowScalar *scalar, GError **error); #define GARROW_TYPE_NULL_SCALAR (garrow_null_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowNullScalar, garrow_null_scalar, GARROW, NULL_SCALAR, GArrowScalar) struct _GArrowNullScalarClass @@ -76,6 +78,7 @@ GArrowNullScalar * garrow_null_scalar_new(void); #define GARROW_TYPE_BOOLEAN_SCALAR (garrow_boolean_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowBooleanScalar, garrow_boolean_scalar, GARROW, BOOLEAN_SCALAR, GArrowScalar) struct _GArrowBooleanScalarClass @@ -91,6 +94,7 @@ gboolean garrow_boolean_scalar_get_value(GArrowBooleanScalar *scalar); #define GARROW_TYPE_INT8_SCALAR (garrow_int8_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowInt8Scalar, garrow_int8_scalar, GARROW, INT8_SCALAR, GArrowScalar) struct _GArrowInt8ScalarClass @@ -106,6 +110,7 @@ gint8 garrow_int8_scalar_get_value(GArrowInt8Scalar *scalar); #define GARROW_TYPE_INT16_SCALAR (garrow_int16_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowInt16Scalar, garrow_int16_scalar, GARROW, INT16_SCALAR, GArrowScalar) struct _GArrowInt16ScalarClass @@ -121,6 +126,7 @@ gint16 garrow_int16_scalar_get_value(GArrowInt16Scalar *scalar); #define GARROW_TYPE_INT32_SCALAR (garrow_int32_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowInt32Scalar, garrow_int32_scalar, GARROW, INT32_SCALAR, GArrowScalar) struct _GArrowInt32ScalarClass @@ -136,6 +142,7 @@ gint32 garrow_int32_scalar_get_value(GArrowInt32Scalar *scalar); #define GARROW_TYPE_INT64_SCALAR (garrow_int64_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowInt64Scalar, garrow_int64_scalar, GARROW, INT64_SCALAR, GArrowScalar) struct _GArrowInt64ScalarClass @@ -151,6 +158,7 @@ gint64 garrow_int64_scalar_get_value(GArrowInt64Scalar *scalar); #define GARROW_TYPE_UINT8_SCALAR (garrow_uint8_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowUInt8Scalar, garrow_uint8_scalar, GARROW, UINT8_SCALAR, GArrowScalar) struct _GArrowUInt8ScalarClass @@ -166,6 +174,7 @@ guint8 garrow_uint8_scalar_get_value(GArrowUInt8Scalar *scalar); #define GARROW_TYPE_UINT16_SCALAR (garrow_uint16_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowUInt16Scalar, garrow_uint16_scalar, GARROW, UINT16_SCALAR, GArrowScalar) struct _GArrowUInt16ScalarClass @@ -181,6 +190,7 @@ guint16 garrow_uint16_scalar_get_value(GArrowUInt16Scalar *scalar); #define GARROW_TYPE_UINT32_SCALAR (garrow_uint32_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowUInt32Scalar, garrow_uint32_scalar, GARROW, UINT32_SCALAR, GArrowScalar) struct _GArrowUInt32ScalarClass @@ -196,6 +206,7 @@ guint32 garrow_uint32_scalar_get_value(GArrowUInt32Scalar *scalar); #define GARROW_TYPE_UINT64_SCALAR (garrow_uint64_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowUInt64Scalar, garrow_uint64_scalar, GARROW, UINT64_SCALAR, GArrowScalar) struct _GArrowUInt64ScalarClass @@ -211,6 +222,7 @@ guint64 garrow_uint64_scalar_get_value(GArrowUInt64Scalar *scalar); #define GARROW_TYPE_HALF_FLOAT_SCALAR (garrow_half_float_scalar_get_type()) +GARROW_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GArrowHalfFloatScalar, garrow_half_float_scalar, GARROW, @@ -229,6 +241,7 @@ guint16 garrow_half_float_scalar_get_value(GArrowHalfFloatScalar *scalar); #define GARROW_TYPE_FLOAT_SCALAR (garrow_float_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowFloatScalar, garrow_float_scalar, GARROW, FLOAT_SCALAR, GArrowScalar) struct _GArrowFloatScalarClass @@ -244,6 +257,7 @@ gfloat garrow_float_scalar_get_value(GArrowFloatScalar *scalar); #define GARROW_TYPE_DOUBLE_SCALAR (garrow_double_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowDoubleScalar, garrow_double_scalar, GARROW, DOUBLE_SCALAR, GArrowScalar) struct _GArrowDoubleScalarClass @@ -259,6 +273,7 @@ gdouble garrow_double_scalar_get_value(GArrowDoubleScalar *scalar); #define GARROW_TYPE_BASE_BINARY_SCALAR (garrow_base_binary_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowBaseBinaryScalar, garrow_base_binary_scalar, GARROW, @@ -274,6 +289,7 @@ GArrowBuffer * garrow_base_binary_scalar_get_value(GArrowBaseBinaryScalar *scalar); #define GARROW_TYPE_BINARY_SCALAR (garrow_binary_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowBinaryScalar, garrow_binary_scalar, GARROW, BINARY_SCALAR, GArrowBaseBinaryScalar) struct _GArrowBinaryScalarClass @@ -286,6 +302,7 @@ GArrowBinaryScalar * garrow_binary_scalar_new(GArrowBuffer *value); #define GARROW_TYPE_STRING_SCALAR (garrow_string_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowStringScalar, garrow_string_scalar, GARROW, STRING_SCALAR, GArrowBaseBinaryScalar) struct _GArrowStringScalarClass @@ -298,6 +315,7 @@ GArrowStringScalar * garrow_string_scalar_new(GArrowBuffer *value); #define GARROW_TYPE_LARGE_BINARY_SCALAR (garrow_large_binary_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowLargeBinaryScalar, garrow_large_binary_scalar, GARROW, @@ -313,6 +331,7 @@ GArrowLargeBinaryScalar * garrow_large_binary_scalar_new(GArrowBuffer *value); #define GARROW_TYPE_LARGE_STRING_SCALAR (garrow_large_string_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowLargeStringScalar, garrow_large_string_scalar, GARROW, @@ -328,6 +347,7 @@ GArrowLargeStringScalar * garrow_large_string_scalar_new(GArrowBuffer *value); #define GARROW_TYPE_FIXED_SIZE_BINARY_SCALAR (garrow_fixed_size_binary_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeBinaryScalar, garrow_fixed_size_binary_scalar, GARROW, @@ -344,6 +364,7 @@ garrow_fixed_size_binary_scalar_new(GArrowFixedSizeBinaryDataType *data_type, GArrowBuffer *value); #define GARROW_TYPE_DATE32_SCALAR (garrow_date32_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowDate32Scalar, garrow_date32_scalar, GARROW, DATE32_SCALAR, GArrowScalar) struct _GArrowDate32ScalarClass @@ -359,6 +380,7 @@ gint32 garrow_date32_scalar_get_value(GArrowDate32Scalar *scalar); #define GARROW_TYPE_DATE64_SCALAR (garrow_date64_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowDate64Scalar, garrow_date64_scalar, GARROW, DATE64_SCALAR, GArrowScalar) struct _GArrowDate64ScalarClass @@ -374,6 +396,7 @@ gint64 garrow_date64_scalar_get_value(GArrowDate64Scalar *scalar); #define GARROW_TYPE_TIME32_SCALAR (garrow_time32_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowTime32Scalar, garrow_time32_scalar, GARROW, TIME32_SCALAR, GArrowScalar) struct _GArrowTime32ScalarClass @@ -389,6 +412,7 @@ gint32 garrow_time32_scalar_get_value(GArrowTime32Scalar *scalar); #define GARROW_TYPE_TIME64_SCALAR (garrow_time64_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowTime64Scalar, garrow_time64_scalar, GARROW, TIME64_SCALAR, GArrowScalar) struct _GArrowTime64ScalarClass @@ -404,6 +428,7 @@ gint64 garrow_time64_scalar_get_value(GArrowTime64Scalar *scalar); #define GARROW_TYPE_TIMESTAMP_SCALAR (garrow_timestamp_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowTimestampScalar, garrow_timestamp_scalar, GARROW, TIMESTAMP_SCALAR, GArrowScalar) struct _GArrowTimestampScalarClass @@ -419,6 +444,7 @@ gint64 garrow_timestamp_scalar_get_value(GArrowTimestampScalar *scalar); #define GARROW_TYPE_MONTH_INTERVAL_SCALAR (garrow_month_interval_scalar_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowMonthIntervalScalar, garrow_month_interval_scalar, GARROW, @@ -437,6 +463,7 @@ gint32 garrow_month_interval_scalar_get_value(GArrowMonthIntervalScalar *scalar); #define GARROW_TYPE_DAY_TIME_INTERVAL_SCALAR (garrow_day_time_interval_scalar_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowDayTimeIntervalScalar, garrow_day_time_interval_scalar, GARROW, @@ -456,6 +483,7 @@ garrow_day_time_interval_scalar_get_value(GArrowDayTimeIntervalScalar *scalar); #define GARROW_TYPE_MONTH_DAY_NANO_INTERVAL_SCALAR \ (garrow_month_day_nano_interval_scalar_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowMonthDayNanoIntervalScalar, garrow_month_day_nano_interval_scalar, GARROW, @@ -474,6 +502,7 @@ GArrowMonthDayNano * garrow_month_day_nano_interval_scalar_get_value(GArrowMonthDayNanoIntervalScalar *scalar); #define GARROW_TYPE_DECIMAL128_SCALAR (garrow_decimal128_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128Scalar, garrow_decimal128_scalar, GARROW, @@ -493,6 +522,7 @@ GArrowDecimal128 * garrow_decimal128_scalar_get_value(GArrowDecimal128Scalar *scalar); #define GARROW_TYPE_DECIMAL256_SCALAR (garrow_decimal256_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256Scalar, garrow_decimal256_scalar, GARROW, @@ -512,6 +542,7 @@ GArrowDecimal256 * garrow_decimal256_scalar_get_value(GArrowDecimal256Scalar *scalar); #define GARROW_TYPE_BASE_LIST_SCALAR (garrow_base_list_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowBaseListScalar, garrow_base_list_scalar, GARROW, BASE_LIST_SCALAR, GArrowScalar) struct _GArrowBaseListScalarClass @@ -524,6 +555,7 @@ GArrowArray * garrow_base_list_scalar_get_value(GArrowBaseListScalar *scalar); #define GARROW_TYPE_LIST_SCALAR (garrow_list_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowListScalar, garrow_list_scalar, GARROW, LIST_SCALAR, GArrowBaseListScalar) struct _GArrowListScalarClass @@ -536,6 +568,7 @@ GArrowListScalar * garrow_list_scalar_new(GArrowListArray *value); #define GARROW_TYPE_LARGE_LIST_SCALAR (garrow_large_list_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowLargeListScalar, garrow_large_list_scalar, GARROW, @@ -551,6 +584,7 @@ GArrowLargeListScalar * garrow_large_list_scalar_new(GArrowLargeListArray *value); #define GARROW_TYPE_MAP_SCALAR (garrow_map_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowMapScalar, garrow_map_scalar, GARROW, MAP_SCALAR, GArrowBaseListScalar) struct _GArrowMapScalarClass @@ -563,6 +597,7 @@ GArrowMapScalar * garrow_map_scalar_new(GArrowStructArray *value); #define GARROW_TYPE_STRUCT_SCALAR (garrow_struct_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowStructScalar, garrow_struct_scalar, GARROW, STRUCT_SCALAR, GArrowScalar) struct _GArrowStructScalarClass @@ -578,6 +613,7 @@ GList * garrow_struct_scalar_get_value(GArrowStructScalar *scalar); #define GARROW_TYPE_UNION_SCALAR (garrow_union_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowUnionScalar, garrow_union_scalar, GARROW, UNION_SCALAR, GArrowScalar) struct _GArrowUnionScalarClass @@ -593,6 +629,7 @@ GArrowScalar * garrow_union_scalar_get_value(GArrowUnionScalar *scalar); #define GARROW_TYPE_SPARSE_UNION_SCALAR (garrow_sparse_union_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionScalar, garrow_sparse_union_scalar, GARROW, @@ -610,6 +647,7 @@ garrow_sparse_union_scalar_new(GArrowSparseUnionDataType *data_type, GArrowScalar *value); #define GARROW_TYPE_DENSE_UNION_SCALAR (garrow_dense_union_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionScalar, garrow_dense_union_scalar, GARROW, @@ -627,6 +665,7 @@ garrow_dense_union_scalar_new(GArrowDenseUnionDataType *data_type, GArrowScalar *value); #define GARROW_TYPE_EXTENSION_SCALAR (garrow_extension_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowExtensionScalar, garrow_extension_scalar, GARROW, EXTENSION_SCALAR, GArrowScalar) struct _GArrowExtensionScalarClass diff --git a/c_glib/arrow-glib/schema.h b/c_glib/arrow-glib/schema.h index 93cd5bd542cf8..aab740397b7d6 100644 --- a/c_glib/arrow-glib/schema.h +++ b/c_glib/arrow-glib/schema.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_SCHEMA (garrow_schema_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowSchema, garrow_schema, GARROW, SCHEMA, GObject) struct _GArrowSchemaClass { @@ -34,6 +35,7 @@ GARROW_AVAILABLE_IN_6_0 GArrowSchema * garrow_schema_import(gpointer c_abi_schema, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_schema_new(GList *fields); @@ -41,34 +43,48 @@ GARROW_AVAILABLE_IN_6_0 gpointer garrow_schema_export(GArrowSchema *schema, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_schema_equal(GArrowSchema *schema, GArrowSchema *other_schema); + +GARROW_AVAILABLE_IN_ALL GArrowField * garrow_schema_get_field(GArrowSchema *schema, guint i); + +GARROW_AVAILABLE_IN_ALL GArrowField * garrow_schema_get_field_by_name(GArrowSchema *schema, const gchar *name); GARROW_AVAILABLE_IN_0_15 gint garrow_schema_get_field_index(GArrowSchema *schema, const gchar *name); +GARROW_AVAILABLE_IN_ALL guint garrow_schema_n_fields(GArrowSchema *schema); + +GARROW_AVAILABLE_IN_ALL GList * garrow_schema_get_fields(GArrowSchema *schema); +GARROW_AVAILABLE_IN_ALL gchar * garrow_schema_to_string(GArrowSchema *schema); + GARROW_AVAILABLE_IN_0_17 gchar * garrow_schema_to_string_metadata(GArrowSchema *schema, gboolean show_metadata); +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_schema_add_field(GArrowSchema *schema, guint i, GArrowField *field, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_schema_remove_field(GArrowSchema *schema, guint i, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_schema_replace_field(GArrowSchema *schema, guint i, diff --git a/c_glib/arrow-glib/schema.hpp b/c_glib/arrow-glib/schema.hpp index 333f73391c900..ba6c459495461 100644 --- a/c_glib/arrow-glib/schema.hpp +++ b/c_glib/arrow-glib/schema.hpp @@ -23,7 +23,10 @@ #include +GARROW_EXTERN GArrowSchema * garrow_schema_new_raw(std::shared_ptr *arrow_schema); + +GARROW_EXTERN std::shared_ptr garrow_schema_get_raw(GArrowSchema *schema); diff --git a/c_glib/arrow-glib/table-builder.h b/c_glib/arrow-glib/table-builder.h index 0e13352bbdde3..6fad1ae79a40f 100644 --- a/c_glib/arrow-glib/table-builder.h +++ b/c_glib/arrow-glib/table-builder.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_RECORD_BATCH_BUILDER (garrow_record_batch_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchBuilder, garrow_record_batch_builder, GARROW, @@ -36,34 +37,45 @@ struct _GArrowRecordBatchBuilderClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowRecordBatchBuilder * garrow_record_batch_builder_new(GArrowSchema *schema, GError **error); +GARROW_AVAILABLE_IN_ALL gint64 garrow_record_batch_builder_get_initial_capacity(GArrowRecordBatchBuilder *builder); + +GARROW_AVAILABLE_IN_ALL void garrow_record_batch_builder_set_initial_capacity(GArrowRecordBatchBuilder *builder, gint64 capacity); +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_record_batch_builder_get_schema(GArrowRecordBatchBuilder *builder); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_13_FOR(garrow_record_batch_builder_get_n_columns) gint garrow_record_batch_builder_get_n_fields(GArrowRecordBatchBuilder *builder); #endif + GARROW_AVAILABLE_IN_0_13 gint garrow_record_batch_builder_get_n_columns(GArrowRecordBatchBuilder *builder); + #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_13_FOR(garrow_record_batch_builder_get_column_builder) GArrowArrayBuilder * garrow_record_batch_builder_get_field(GArrowRecordBatchBuilder *builder, gint i); #endif + GARROW_AVAILABLE_IN_0_13 GArrowArrayBuilder * garrow_record_batch_builder_get_column_builder(GArrowRecordBatchBuilder *builder, gint i); +GARROW_AVAILABLE_IN_ALL GArrowRecordBatch * garrow_record_batch_builder_flush(GArrowRecordBatchBuilder *builder, GError **error); diff --git a/c_glib/arrow-glib/table.h b/c_glib/arrow-glib/table.h index 1bf64d25a4f3f..d790e413df5fc 100644 --- a/c_glib/arrow-glib/table.h +++ b/c_glib/arrow-glib/table.h @@ -29,6 +29,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_TABLE_CONCATENATE_OPTIONS \ (garrow_table_concatenate_options_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTableConcatenateOptions, garrow_table_concatenate_options, GARROW, @@ -44,6 +45,7 @@ GArrowTableConcatenateOptions * garrow_table_concatenate_options_new(void); #define GARROW_TYPE_TABLE (garrow_table_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTable, garrow_table, GARROW, TABLE, GObject) struct _GArrowTableClass { @@ -53,18 +55,21 @@ struct _GArrowTableClass GARROW_AVAILABLE_IN_0_12 GArrowTable * garrow_table_new_values(GArrowSchema *schema, GList *values, GError **error); + GARROW_AVAILABLE_IN_0_15 GArrowTable * garrow_table_new_chunked_arrays(GArrowSchema *schema, GArrowChunkedArray **chunked_arrays, gsize n_chunked_arrays, GError **error); + GARROW_AVAILABLE_IN_0_12 GArrowTable * garrow_table_new_arrays(GArrowSchema *schema, GArrowArray **arrays, gsize n_arrays, GError **error); + GARROW_AVAILABLE_IN_0_12 GArrowTable * garrow_table_new_record_batches(GArrowSchema *schema, @@ -72,22 +77,29 @@ garrow_table_new_record_batches(GArrowSchema *schema, gsize n_record_batches, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_table_equal(GArrowTable *table, GArrowTable *other_table); + GARROW_AVAILABLE_IN_0_17 gboolean garrow_table_equal_metadata(GArrowTable *table, GArrowTable *other_table, gboolean check_metadata); +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_table_get_schema(GArrowTable *table); + GARROW_AVAILABLE_IN_0_15 GArrowChunkedArray * garrow_table_get_column_data(GArrowTable *table, gint i); +GARROW_AVAILABLE_IN_ALL guint garrow_table_get_n_columns(GArrowTable *table); + +GARROW_AVAILABLE_IN_ALL guint64 garrow_table_get_n_rows(GArrowTable *table); @@ -98,8 +110,11 @@ garrow_table_add_column(GArrowTable *table, GArrowField *field, GArrowChunkedArray *chunked_array, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowTable * garrow_table_remove_column(GArrowTable *table, guint i, GError **error); + GARROW_AVAILABLE_IN_0_15 GArrowTable * garrow_table_replace_column(GArrowTable *table, @@ -107,22 +122,28 @@ garrow_table_replace_column(GArrowTable *table, GArrowField *field, GArrowChunkedArray *chunked_array, GError **error); + +GARROW_AVAILABLE_IN_ALL gchar * garrow_table_to_string(GArrowTable *table, GError **error); + GARROW_AVAILABLE_IN_0_14 GArrowTable * garrow_table_concatenate(GArrowTable *table, GList *other_tables, GArrowTableConcatenateOptions *options, GError **error); + GARROW_AVAILABLE_IN_0_14 GArrowTable * garrow_table_slice(GArrowTable *table, gint64 offset, gint64 length); + GARROW_AVAILABLE_IN_0_16 GArrowTable * garrow_table_combine_chunks(GArrowTable *table, GError **error); #define GARROW_TYPE_FEATHER_WRITE_PROPERTIES (garrow_feather_write_properties_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowFeatherWriteProperties, garrow_feather_write_properties, GARROW, diff --git a/c_glib/arrow-glib/table.hpp b/c_glib/arrow-glib/table.hpp index 3077c2ece9b37..79fc97471a42c 100644 --- a/c_glib/arrow-glib/table.hpp +++ b/c_glib/arrow-glib/table.hpp @@ -24,10 +24,14 @@ #include +GARROW_EXTERN GArrowTable * garrow_table_new_raw(std::shared_ptr *arrow_table); + +GARROW_EXTERN std::shared_ptr garrow_table_get_raw(GArrowTable *table); +GARROW_EXTERN arrow::ipc::feather::WriteProperties * garrow_feather_write_properties_get_raw(GArrowFeatherWriteProperties *properties); diff --git a/c_glib/arrow-glib/tensor.h b/c_glib/arrow-glib/tensor.h index a6d11b248110e..5971c3af12600 100644 --- a/c_glib/arrow-glib/tensor.h +++ b/c_glib/arrow-glib/tensor.h @@ -25,12 +25,14 @@ G_BEGIN_DECLS #define GARROW_TYPE_TENSOR (garrow_tensor_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTensor, garrow_tensor, GARROW, TENSOR, GObject) struct _GArrowTensorClass { GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTensor * garrow_tensor_new(GArrowDataType *data_type, GArrowBuffer *data, @@ -40,30 +42,55 @@ garrow_tensor_new(GArrowDataType *data_type, gsize n_strides, gchar **dimension_names, gsize n_dimension_names); +GARROW_AVAILABLE_IN_ALL gboolean garrow_tensor_equal(GArrowTensor *tensor, GArrowTensor *other_tensor); + +GARROW_AVAILABLE_IN_ALL GArrowDataType * garrow_tensor_get_value_data_type(GArrowTensor *tensor); + +GARROW_AVAILABLE_IN_ALL GArrowType garrow_tensor_get_value_type(GArrowTensor *tensor); + +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_tensor_get_buffer(GArrowTensor *tensor); + +GARROW_AVAILABLE_IN_ALL gint64 * garrow_tensor_get_shape(GArrowTensor *tensor, gint *n_dimensions); + +GARROW_AVAILABLE_IN_ALL gint64 * garrow_tensor_get_strides(GArrowTensor *tensor, gint *n_strides); + +GARROW_AVAILABLE_IN_ALL gint garrow_tensor_get_n_dimensions(GArrowTensor *tensor); + +GARROW_AVAILABLE_IN_ALL const gchar * garrow_tensor_get_dimension_name(GArrowTensor *tensor, gint i); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_tensor_get_size(GArrowTensor *tensor); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_tensor_is_mutable(GArrowTensor *tensor); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_tensor_is_contiguous(GArrowTensor *tensor); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_tensor_is_row_major(GArrowTensor *tensor); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_tensor_is_column_major(GArrowTensor *tensor); diff --git a/c_glib/arrow-glib/timestamp-parser.h b/c_glib/arrow-glib/timestamp-parser.h index 05cad54746eeb..a7265d6ef46fb 100644 --- a/c_glib/arrow-glib/timestamp-parser.h +++ b/c_glib/arrow-glib/timestamp-parser.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_TIMESTAMP_PARSER (garrow_timestamp_parser_get_type()) +GARROW_AVAILABLE_IN_16_0 G_DECLARE_DERIVABLE_TYPE( GArrowTimestampParser, garrow_timestamp_parser, GARROW, TIMESTAMP_PARSER, GObject) struct _GArrowTimestampParserClass @@ -39,6 +40,7 @@ garrow_timestamp_parser_get_kind(GArrowTimestampParser *parser); #define GARROW_TYPE_STRPTIME_TIMESTAMP_PARSER \ (garrow_strptime_timestamp_parser_get_type()) +GARROW_AVAILABLE_IN_16_0 G_DECLARE_DERIVABLE_TYPE(GArrowStrptimeTimestampParser, garrow_strptime_timestamp_parser, GARROW, @@ -58,6 +60,7 @@ const gchar * garrow_strptime_timestamp_parser_get_format(GArrowStrptimeTimestampParser *parser); #define GARROW_TYPE_ISO8601_TIMESTAMP_PARSER (garrow_iso8601_timestamp_parser_get_type()) +GARROW_AVAILABLE_IN_16_0 G_DECLARE_DERIVABLE_TYPE(GArrowISO8601TimestampParser, garrow_iso8601_timestamp_parser, GARROW, diff --git a/c_glib/arrow-glib/version.h.in b/c_glib/arrow-glib/version.h.in index fe90afb223903..b530a088c8e38 100644 --- a/c_glib/arrow-glib/version.h.in +++ b/c_glib/arrow-glib/version.h.in @@ -156,4 +156,6 @@ G_ENCODE_VERSION(GARROW_VERSION_MAJOR, GARROW_VERSION_MINOR) #endif +@VISIBILITY_MACROS@ + @AVAILABILITY_MACROS@ diff --git a/c_glib/arrow-glib/writable-file.h b/c_glib/arrow-glib/writable-file.h index 555705767e4aa..e9aa9122e92fa 100644 --- a/c_glib/arrow-glib/writable-file.h +++ b/c_glib/arrow-glib/writable-file.h @@ -24,9 +24,11 @@ G_BEGIN_DECLS #define GARROW_TYPE_WRITABLE_FILE (garrow_writable_file_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_INTERFACE( GArrowWritableFile, garrow_writable_file, GARROW, WRITABLE_FILE, GObject) +GARROW_AVAILABLE_IN_ALL gboolean garrow_writable_file_write_at(GArrowWritableFile *writable_file, gint64 position, diff --git a/c_glib/arrow-glib/writable.h b/c_glib/arrow-glib/writable.h index a556443967b5a..dcc1e67668e78 100644 --- a/c_glib/arrow-glib/writable.h +++ b/c_glib/arrow-glib/writable.h @@ -24,13 +24,17 @@ G_BEGIN_DECLS #define GARROW_TYPE_WRITABLE (garrow_writable_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_INTERFACE(GArrowWritable, garrow_writable, GARROW, WRITABLE, GObject) +GARROW_AVAILABLE_IN_ALL gboolean garrow_writable_write(GArrowWritable *writable, const guint8 *data, gint64 n_bytes, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_writable_flush(GArrowWritable *writable, GError **error); diff --git a/c_glib/arrow-glib/writer.h b/c_glib/arrow-glib/writer.h index 30b0ea987da39..46bbdddec8c9d 100644 --- a/c_glib/arrow-glib/writer.h +++ b/c_glib/arrow-glib/writer.h @@ -28,6 +28,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_RECORD_BATCH_WRITER (garrow_record_batch_writer_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchWriter, garrow_record_batch_writer, GARROW, @@ -38,19 +39,23 @@ struct _GArrowRecordBatchWriterClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL gboolean garrow_record_batch_writer_write_record_batch(GArrowRecordBatchWriter *writer, GArrowRecordBatch *record_batch, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_record_batch_writer_write_table(GArrowRecordBatchWriter *writer, GArrowTable *table, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_record_batch_writer_close(GArrowRecordBatchWriter *writer, GError **error); #define GARROW_TYPE_RECORD_BATCH_STREAM_WRITER \ (garrow_record_batch_stream_writer_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchStreamWriter, garrow_record_batch_stream_writer, GARROW, @@ -61,12 +66,14 @@ struct _GArrowRecordBatchStreamWriterClass GArrowRecordBatchWriterClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowRecordBatchStreamWriter * garrow_record_batch_stream_writer_new(GArrowOutputStream *sink, GArrowSchema *schema, GError **error); #define GARROW_TYPE_RECORD_BATCH_FILE_WRITER (garrow_record_batch_file_writer_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchFileWriter, garrow_record_batch_file_writer, GARROW, @@ -77,6 +84,7 @@ struct _GArrowRecordBatchFileWriterClass GArrowRecordBatchStreamWriterClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowRecordBatchFileWriter * garrow_record_batch_file_writer_new(GArrowOutputStream *sink, GArrowSchema *schema, diff --git a/c_glib/gandiva-glib/meson.build b/c_glib/gandiva-glib/meson.build index 23f950ddb96dc..8cd00b3805b91 100644 --- a/c_glib/gandiva-glib/meson.build +++ b/c_glib/gandiva-glib/meson.build @@ -85,6 +85,7 @@ libgandiva_glib = library('gandiva-glib', dependencies: dependencies, implicit_include_directories: false, include_directories: base_include_directories, + cpp_args: ['-DGGANDIVA_COMPILATION'], soversion: so_version, version: library_version) gandiva_glib = declare_dependency(link_with: libgandiva_glib, diff --git a/c_glib/gandiva-glib/node.h b/c_glib/gandiva-glib/node.h index 49d5a8c43124e..1733cac918c51 100644 --- a/c_glib/gandiva-glib/node.h +++ b/c_glib/gandiva-glib/node.h @@ -26,7 +26,7 @@ G_BEGIN_DECLS #define GGANDIVA_TYPE_NODE (ggandiva_node_get_type()) -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaNode, ggandiva_node, GGANDIVA, NODE, GObject) struct _GGandivaNodeClass @@ -34,12 +34,12 @@ struct _GGandivaNodeClass GObjectClass parent_class; }; -GGANDIVA_AVAILABLE_IN_0_16; +GGANDIVA_AVAILABLE_IN_0_16 gchar * ggandiva_node_to_string(GGandivaNode *node); #define GGANDIVA_TYPE_FIELD_NODE (ggandiva_field_node_get_type()) -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE( GGandivaFieldNode, ggandiva_field_node, GGANDIVA, FIELD_NODE, GGandivaNode) struct _GGandivaFieldNodeClass @@ -47,12 +47,12 @@ struct _GGandivaFieldNodeClass GGandivaNodeClass parent_class; }; -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaFieldNode * ggandiva_field_node_new(GArrowField *field); #define GGANDIVA_TYPE_FUNCTION_NODE (ggandiva_function_node_get_type()) -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE( GGandivaFunctionNode, ggandiva_function_node, GGANDIVA, FUNCTION_NODE, GGandivaNode) struct _GGandivaFunctionNodeClass @@ -60,18 +60,18 @@ struct _GGandivaFunctionNodeClass GGandivaNodeClass parent_class; }; -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaFunctionNode * ggandiva_function_node_new(const gchar *name, GList *parameters, GArrowDataType *return_type); -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 GList * ggandiva_function_node_get_parameters(GGandivaFunctionNode *node); #define GGANDIVA_TYPE_LITERAL_NODE (ggandiva_literal_node_get_type()) -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE( GGandivaLiteralNode, ggandiva_literal_node, GGANDIVA, LITERAL_NODE, GGandivaNode) struct _GGandivaLiteralNodeClass @@ -80,7 +80,7 @@ struct _GGandivaLiteralNodeClass }; #define GGANDIVA_TYPE_NULL_LITERAL_NODE (ggandiva_null_literal_node_get_type()) -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaNullLiteralNode, ggandiva_null_literal_node, GGANDIVA, @@ -91,12 +91,12 @@ struct _GGandivaNullLiteralNodeClass GGandivaLiteralNodeClass parent_class; }; -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaNullLiteralNode * ggandiva_null_literal_node_new(GArrowDataType *return_type, GError **error); #define GGANDIVA_TYPE_BOOLEAN_LITERAL_NODE (ggandiva_boolean_literal_node_get_type()) -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaBooleanLiteralNode, ggandiva_boolean_literal_node, GGANDIVA, @@ -107,16 +107,16 @@ struct _GGandivaBooleanLiteralNodeClass GGandivaLiteralNodeClass parent_class; }; -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaBooleanLiteralNode * ggandiva_boolean_literal_node_new(gboolean value); -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 gboolean ggandiva_boolean_literal_node_get_value(GGandivaBooleanLiteralNode *node); #define GGANDIVA_TYPE_INT8_LITERAL_NODE (ggandiva_int8_literal_node_get_type()) -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaInt8LiteralNode, ggandiva_int8_literal_node, GGANDIVA, @@ -127,16 +127,16 @@ struct _GGandivaInt8LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaInt8LiteralNode * ggandiva_int8_literal_node_new(gint8 value); -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 gint8 ggandiva_int8_literal_node_get_value(GGandivaInt8LiteralNode *node); #define GGANDIVA_TYPE_UINT8_LITERAL_NODE (ggandiva_uint8_literal_node_get_type()) -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt8LiteralNode, ggandiva_uint8_literal_node, GGANDIVA, @@ -147,16 +147,16 @@ struct _GGandivaUInt8LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaUInt8LiteralNode * ggandiva_uint8_literal_node_new(guint8 value); -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 guint8 ggandiva_uint8_literal_node_get_value(GGandivaUInt8LiteralNode *node); #define GGANDIVA_TYPE_INT16_LITERAL_NODE (ggandiva_int16_literal_node_get_type()) -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaInt16LiteralNode, ggandiva_int16_literal_node, GGANDIVA, @@ -167,16 +167,16 @@ struct _GGandivaInt16LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaInt16LiteralNode * ggandiva_int16_literal_node_new(gint16 value); -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 gint16 ggandiva_int16_literal_node_get_value(GGandivaInt16LiteralNode *node); #define GGANDIVA_TYPE_UINT16_LITERAL_NODE (ggandiva_uint16_literal_node_get_type()) -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt16LiteralNode, ggandiva_uint16_literal_node, GGANDIVA, @@ -187,16 +187,16 @@ struct _GGandivaUInt16LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaUInt16LiteralNode * ggandiva_uint16_literal_node_new(guint16 value); -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 guint16 ggandiva_uint16_literal_node_get_value(GGandivaUInt16LiteralNode *node); #define GGANDIVA_TYPE_INT32_LITERAL_NODE (ggandiva_int32_literal_node_get_type()) -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaInt32LiteralNode, ggandiva_int32_literal_node, GGANDIVA, @@ -207,16 +207,16 @@ struct _GGandivaInt32LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaInt32LiteralNode * ggandiva_int32_literal_node_new(gint32 value); -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 gint32 ggandiva_int32_literal_node_get_value(GGandivaInt32LiteralNode *node); #define GGANDIVA_TYPE_UINT32_LITERAL_NODE (ggandiva_uint32_literal_node_get_type()) -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt32LiteralNode, ggandiva_uint32_literal_node, GGANDIVA, @@ -227,16 +227,16 @@ struct _GGandivaUInt32LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaUInt32LiteralNode * ggandiva_uint32_literal_node_new(guint32 value); -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 guint32 ggandiva_uint32_literal_node_get_value(GGandivaUInt32LiteralNode *node); #define GGANDIVA_TYPE_INT64_LITERAL_NODE (ggandiva_int64_literal_node_get_type()) -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaInt64LiteralNode, ggandiva_int64_literal_node, GGANDIVA, @@ -247,16 +247,16 @@ struct _GGandivaInt64LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaInt64LiteralNode * ggandiva_int64_literal_node_new(gint64 value); -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 gint64 ggandiva_int64_literal_node_get_value(GGandivaInt64LiteralNode *node); #define GGANDIVA_TYPE_UINT64_LITERAL_NODE (ggandiva_uint64_literal_node_get_type()) -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt64LiteralNode, ggandiva_uint64_literal_node, GGANDIVA, @@ -267,16 +267,16 @@ struct _GGandivaUInt64LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaUInt64LiteralNode * ggandiva_uint64_literal_node_new(guint64 value); -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 guint64 ggandiva_uint64_literal_node_get_value(GGandivaUInt64LiteralNode *node); #define GGANDIVA_TYPE_FLOAT_LITERAL_NODE (ggandiva_float_literal_node_get_type()) -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaFloatLiteralNode, ggandiva_float_literal_node, GGANDIVA, @@ -287,16 +287,16 @@ struct _GGandivaFloatLiteralNodeClass GGandivaLiteralNodeClass parent_class; }; -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaFloatLiteralNode * ggandiva_float_literal_node_new(gfloat value); -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 gfloat ggandiva_float_literal_node_get_value(GGandivaFloatLiteralNode *node); #define GGANDIVA_TYPE_DOUBLE_LITERAL_NODE (ggandiva_double_literal_node_get_type()) -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaDoubleLiteralNode, ggandiva_double_literal_node, GGANDIVA, @@ -307,16 +307,16 @@ struct _GGandivaDoubleLiteralNodeClass GGandivaLiteralNodeClass parent_class; }; -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaDoubleLiteralNode * ggandiva_double_literal_node_new(gdouble value); -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 gdouble ggandiva_double_literal_node_get_value(GGandivaDoubleLiteralNode *node); #define GGANDIVA_TYPE_BINARY_LITERAL_NODE (ggandiva_binary_literal_node_get_type()) -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaBinaryLiteralNode, ggandiva_binary_literal_node, GGANDIVA, @@ -327,20 +327,20 @@ struct _GGandivaBinaryLiteralNodeClass GGandivaLiteralNodeClass parent_class; }; -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaBinaryLiteralNode * ggandiva_binary_literal_node_new(const guint8 *value, gsize size); -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaBinaryLiteralNode * ggandiva_binary_literal_node_new_bytes(GBytes *value); -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 GBytes * ggandiva_binary_literal_node_get_value(GGandivaBinaryLiteralNode *node); #define GGANDIVA_TYPE_STRING_LITERAL_NODE (ggandiva_string_literal_node_get_type()) -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaStringLiteralNode, ggandiva_string_literal_node, GGANDIVA, @@ -351,16 +351,16 @@ struct _GGandivaStringLiteralNodeClass GGandivaLiteralNodeClass parent_class; }; -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaStringLiteralNode * ggandiva_string_literal_node_new(const gchar *value); -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 const gchar * ggandiva_string_literal_node_get_value(GGandivaStringLiteralNode *node); #define GGANDIVA_TYPE_IF_NODE (ggandiva_if_node_get_type()) -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE( GGandivaIfNode, ggandiva_if_node, GGANDIVA, IF_NODE, GGandivaNode) struct _GGandivaIfNodeClass @@ -368,7 +368,7 @@ struct _GGandivaIfNodeClass GGandivaNodeClass parent_class; }; -GGANDIVA_AVAILABLE_IN_0_12; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaIfNode * ggandiva_if_node_new(GGandivaNode *condition_node, GGandivaNode *then_node, diff --git a/c_glib/gandiva-glib/version.h.in b/c_glib/gandiva-glib/version.h.in index 8c7ebe0ba6c2e..857c7367bd7e2 100644 --- a/c_glib/gandiva-glib/version.h.in +++ b/c_glib/gandiva-glib/version.h.in @@ -156,4 +156,6 @@ G_ENCODE_VERSION(GGANDIVA_VERSION_MAJOR, GGANDIVA_VERSION_MINOR) #endif +@VISIBILITY_MACROS@ + @AVAILABILITY_MACROS@ diff --git a/c_glib/parquet-glib/meson.build b/c_glib/parquet-glib/meson.build index 22fbbbbae64ff..a3de1d0933f7f 100644 --- a/c_glib/parquet-glib/meson.build +++ b/c_glib/parquet-glib/meson.build @@ -64,6 +64,7 @@ libparquet_glib = library('parquet-glib', dependencies: dependencies, implicit_include_directories: false, include_directories: base_include_directories, + cpp_args: ['-DGPARQUET_COMPILATION'], soversion: so_version, version: library_version) parquet_glib = declare_dependency(link_with: libparquet_glib, diff --git a/c_glib/parquet-glib/version.h.in b/c_glib/parquet-glib/version.h.in index 4baef99c0eec9..142b3b83e0f3d 100644 --- a/c_glib/parquet-glib/version.h.in +++ b/c_glib/parquet-glib/version.h.in @@ -154,4 +154,6 @@ # define GPARQUET_VERSION_MAX_ALLOWED GARROW_VERSION_MAX_ALLOWED #endif +@VISIBILITY_MACROS@ + @AVAILABILITY_MACROS@ diff --git a/c_glib/tool/generate-version-header.py b/c_glib/tool/generate-version-header.py index bfc9979251416..f2fc26132c143 100755 --- a/c_glib/tool/generate-version-header.py +++ b/c_glib/tool/generate-version-header.py @@ -66,6 +66,7 @@ def write_header( version_major, version_minor, version_micro = [int(v) for v in version.split(".")] encoded_versions = generate_encoded_versions(library_name) + visibility_macros = generate_visibility_macros(library_name) availability_macros = generate_availability_macros(library_name) replacements = { @@ -74,6 +75,7 @@ def write_header( "VERSION_MICRO": str(version_micro), "VERSION_TAG": version_tag, "ENCODED_VERSIONS": encoded_versions, + "VISIBILITY_MACROS": visibility_macros, "AVAILABILITY_MACROS": availability_macros, } @@ -81,6 +83,25 @@ def write_header( r"@([A-Z_]+)@", lambda match: replacements[match[1]], input_file.read())) +def generate_visibility_macros(library: str) -> str: + return f"""#if (defined(_WIN32) || defined(__CYGWIN__)) && defined(_MSVC_LANG) && \ + !defined({library}_STATIC_COMPILATION) +# define {library}_EXPORT __declspec(dllexport) +# define {library}_IMPORT __declspec(dllimport) +#else +# define {library}_EXPORT +# define {library}_IMPORT +#endif + +#ifdef {library}_COMPILATION +# define {library}_API {library}_EXPORT +#else +# define {library}_API {library}_IMPORT +#endif + +#define {library}_EXTERN {library}_API extern""" + + def generate_encoded_versions(library: str) -> str: macros = [] @@ -98,7 +119,7 @@ def generate_encoded_versions(library: str) -> str: def generate_availability_macros(library: str) -> str: - macros = [f"""#define {library}_AVAILABLE_IN_ALL"""] + macros = [f"""#define {library}_AVAILABLE_IN_ALL {library}_EXTERN"""] for major_version, minor_version in ALL_VERSIONS: macros.append(f"""#if {library}_VERSION_MIN_REQUIRED >= {library}_VERSION_{major_version}_{minor_version} @@ -110,9 +131,9 @@ def generate_availability_macros(library: str) -> str: #endif #if {library}_VERSION_MAX_ALLOWED < {library}_VERSION_{major_version}_{minor_version} -# define {library}_AVAILABLE_IN_{major_version}_{minor_version} {library}_UNAVAILABLE({major_version}, {minor_version}) +# define {library}_AVAILABLE_IN_{major_version}_{minor_version} {library}_EXTERN {library}_UNAVAILABLE({major_version}, {minor_version}) #else -# define {library}_AVAILABLE_IN_{major_version}_{minor_version} +# define {library}_AVAILABLE_IN_{major_version}_{minor_version} {library}_EXTERN #endif""") # noqa: E501 return "\n\n".join(macros) diff --git a/c_glib/vcpkg.json b/c_glib/vcpkg.json new file mode 100644 index 0000000000000..4a14a1e437ff6 --- /dev/null +++ b/c_glib/vcpkg.json @@ -0,0 +1,8 @@ +{ + "name": "arrow-glib", + "version-string": "17.0.0-SNAPSHOT", + "dependencies": [ + "glib", + "pkgconf" + ] +} diff --git a/ci/scripts/c_glib_build.sh b/ci/scripts/c_glib_build.sh index 6a6295e4ff0bd..ee01bb220710e 100755 --- a/ci/scripts/c_glib_build.sh +++ b/ci/scripts/c_glib_build.sh @@ -28,14 +28,35 @@ build_root=${2} : ${BUILD_DOCS_C_GLIB:=OFF} with_doc=$([ "${BUILD_DOCS_C_GLIB}" == "ON" ] && echo "true" || echo "false") -export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig + +if [ -n "${MSYSTEM:-}" ]; then + # Fix ARROW_HOME when running under MSYS2 + export ARROW_HOME="$(cygpath --unix "${ARROW_HOME}")" +fi + +meson_pkg_config_path="${ARROW_HOME}/lib/pkgconfig" mkdir -p ${build_dir} +if [ -n "${VCPKG_ROOT:-}" ]; then + vcpkg_install_root="${build_root}/vcpkg_installed" + $VCPKG_ROOT/vcpkg install --x-manifest-root=${source_dir} --x-install-root=${vcpkg_install_root} + export PKG_CONFIG="${vcpkg_install_root}/x64-windows/tools/pkgconf/pkgconf.exe" + meson_pkg_config_path="${vcpkg_install_root}/x64-windows/lib/pkgconfig:${meson_pkg_config_path}" +fi + +if [ -n "${VCToolsInstallDir:-}" -a -n "${MSYSTEM:-}" ]; then + # Meson finds the gnu link.exe instead of MSVC link.exe when running in MSYS2/git bash, + # so we need to make sure the MSCV link.exe is first in $PATH + export PATH="$(cygpath --unix "${VCToolsInstallDir}")/bin/HostX64/x64:${PATH}" +fi + # Build with Meson meson setup \ + --backend=ninja \ --prefix=$ARROW_HOME \ --libdir=lib \ + --pkg-config-path="${meson_pkg_config_path}" \ -Ddoc=${with_doc} \ -Dvapi=${ARROW_GLIB_VAPI} \ -Dwerror=${ARROW_GLIB_WERROR} \ diff --git a/ci/scripts/install_vcpkg.sh b/ci/scripts/install_vcpkg.sh index cc80582326ec5..861aeeea8bd0f 100755 --- a/ci/scripts/install_vcpkg.sh +++ b/ci/scripts/install_vcpkg.sh @@ -25,7 +25,7 @@ if [ "$#" -lt 1 ]; then fi arrow_dir=$(cd -- "$(dirname -- "$0")/../.." && pwd -P) -default_vcpkg_version=$(cat "${arrow_dir}/.env" | grep "VCPKG" | cut -d "=" -f2 | tr -d '"') +default_vcpkg_version=$(source "${arrow_dir}/.env" && echo "$VCPKG" || echo "") default_vcpkg_ports_patch="${arrow_dir}/ci/vcpkg/ports.patch" vcpkg_destination=$1 diff --git a/dev/release/01-prepare-test.rb b/dev/release/01-prepare-test.rb index 8fb23f45f0f3a..11e75612818ac 100644 --- a/dev/release/01-prepare-test.rb +++ b/dev/release/01-prepare-test.rb @@ -108,6 +108,13 @@ def test_version_pre_tag "+version = '#{@release_version}'"], ], }, + { + path: "c_glib/vcpkg.json", + hunks: [ + ["- \"version-string\": \"#{@snapshot_version}\",", + "+ \"version-string\": \"#{@release_version}\","], + ], + }, { path: "ci/scripts/PKGBUILD", hunks: [ diff --git a/dev/release/post-11-bump-versions-test.rb b/dev/release/post-11-bump-versions-test.rb index df8ea408f8ea7..5706b1303667a 100644 --- a/dev/release/post-11-bump-versions-test.rb +++ b/dev/release/post-11-bump-versions-test.rb @@ -86,6 +86,13 @@ def test_version_post_tag "+version = '#{@next_snapshot_version}'"], ], }, + { + path: "c_glib/vcpkg.json", + hunks: [ + ["- \"version-string\": \"#{@snapshot_version}\",", + "+ \"version-string\": \"#{@next_snapshot_version}\","], + ], + }, { path: "ci/scripts/PKGBUILD", hunks: [ diff --git a/dev/release/utils-prepare.sh b/dev/release/utils-prepare.sh index c3a0b33e5a2a0..015f7109cd251 100644 --- a/dev/release/utils-prepare.sh +++ b/dev/release/utils-prepare.sh @@ -50,6 +50,12 @@ update_versions() { rm -f tool/generate-version-header.py.bak git add tool/generate-version-header.py fi + + sed -i.bak -E -e \ + "s/\"version-string\": \".+\"/\"version-string\": \"${version}\"/" \ + vcpkg.json + rm -f vcpkg.json.bak + git add vcpkg.json popd pushd "${ARROW_DIR}/ci/scripts" From fb61e9f7a821dcb3c753fa6d6c36eec3714c257b Mon Sep 17 00:00:00 2001 From: Wenbo Li <424026452@qq.com> Date: Fri, 24 May 2024 08:44:41 +0800 Subject: [PATCH 159/261] MINOR: [Docs][Python] Fixed a typo in dataset join docs (#41802) ### Rationale for this change Found a typo in a code example of Python docs. ### What changes are included in this PR? `ds1.join(ds2, key="id")` should be `ds1.join(ds2, keys="id")` ### Are these changes tested? No, just docs. ### Are there any user-facing changes? Yes, but just docs. Authored-by: Wenbo Li <424026452@qq.com> Signed-off-by: Sutou Kouhei --- docs/source/python/compute.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/python/compute.rst b/docs/source/python/compute.rst index ce3dfabb0e689..c2b46c8f3f673 100644 --- a/docs/source/python/compute.rst +++ b/docs/source/python/compute.rst @@ -275,7 +275,7 @@ take two datasets and join them: ds1 = ds.dataset(table1) ds2 = ds.dataset(table2) - joined_ds = ds1.join(ds2, key="id") + joined_ds = ds1.join(ds2, keys="id") The resulting dataset will be an :class:`.InMemoryDataset` containing the joined data:: From 0a79448ba39496ec4876cf2f1f0d083bf12152c5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 24 May 2024 10:36:22 +0900 Subject: [PATCH 160/261] MINOR: [Java] Bump com.google.api.grpc:proto-google-common-protos from 2.37.1 to 2.39.1 in /java (#41740) Bumps [com.google.api.grpc:proto-google-common-protos](https://github.com/googleapis/sdk-platform-java) from 2.37.1 to 2.39.1.
Release notes

Sourced from com.google.api.grpc:proto-google-common-protos's releases.

v2.39.0

2.39.0 (2024-04-18)

Features

  • add libraries_bom_version to generation configuration (#2639) (56c7ca5)
  • Add ChannelPoolSettings Getter for gRPC's ChannelProvider (#2612) (d0c5191)
  • add config change (#2604) (8312706)
  • add entry point (#2616) (b19fa33)
  • add generation config comparator (#2587) (a94c2f0)
  • Add JavadocJar Task to build.gradle for self service libraries (#2593) (993f5ac)
  • Client/StubSettings' getEndpoint() returns the resolved endpoint (#2440) (4942bc1)
  • generate selected libraries (#2598) (739ddbb)
  • Validate the Universe Domain inside Java-Core (#2592) (35d789f)

Bug Fixes

  • add main to generate_repo.py (#2607) (fedeb32)
  • correct deep-remove and deep-preserve regexes (#2572) (4c7fd88)
  • first attempt should use the min of RPC timeout and total timeout (#2641) (0349232)
  • remove duplicated calls to AutoValue builders (#2636) (53a3727)
  • remove unnecessary slf4j and AbstractGoogleClientRequest native image configs (0cb7d0e)
  • remove unnecessary slf4j and AbstractGoogleClientRequest native image configs (#2628) (0cb7d0e)

Dependencies

  • update arrow.version to v15.0.2 (#2589) (777acf3)
  • update dependency com.google.cloud.opentelemetry:detector-resources-support to v0.28.0 (#2649) (e4ed176)
  • update dependency gitpython to v3.1.41 [security] (#2625) (e41bd8f)
  • update dependency net.bytebuddy:byte-buddy to v1.14.13 (#2646) (73ac5a4)
  • update dependency org.threeten:threeten-extra to v1.8.0 (#2650) (226325a)
  • update dependency org.threeten:threetenbp to v1.6.9 (#2602) (371753e)
  • update dependency org.threeten:threetenbp to v1.6.9 (#2665) (8935bc8)
  • update google api dependencies (#2584) (cd20604)
  • update googleapis/java-cloud-bom digest to 7071341 (#2608) (8d74140)
  • update netty dependencies to v4.1.109.final (#2597) (8990693)
  • update opentelemetry-java monorepo to v1.37.0 (#2652) (f8fa2e9)
  • update protobuf dependencies to v3.25.3 (#2491) (b0e5041)
  • update slf4j monorepo to v2.0.13 (#2647) (f030e29)

v2.38.1

2.38.1 (2024-03-15)

Bug Fixes

  • deps: add detector-resource-support dependencies (#2559) (53f2c85)

... (truncated)

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=com.google.api.grpc:proto-google-common-protos&package-manager=maven&previous-version=2.37.1&new-version=2.39.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: David Li --- java/flight/flight-core/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml index 9832850108c50..a7ea80d68fffe 100644 --- a/java/flight/flight-core/pom.xml +++ b/java/flight/flight-core/pom.xml @@ -125,7 +125,7 @@ com.google.api.grpc proto-google-common-protos - 2.37.1 + 2.39.1 test From 417a86b9ff11493dc35b95f4d8599ea269b1439c Mon Sep 17 00:00:00 2001 From: Laurent Goujon Date: Thu, 23 May 2024 18:36:57 -0700 Subject: [PATCH 161/261] GH-41789: [Java] Clean up immutables and checkerframework dependencies (#41790) ### Rationale for this change As annotation processing is directly configured in `maven-compiler-plugin`, project dependencies should not include annotation processors in their dependencies, and annotations should be limited to `provided` scope as much as possible. ### What changes are included in this PR? Clean up immutables and checkerframework dependencies to address the issue above: * switch from `org.immutables:value` to `org.immutables:value-annotations` * update `org.immutables` dependencies from 2.10.0 to 2.10.1 * change `org.checkerframework:checker-qual` default scope from `compile` to `provided` * add `org.immutables:value` and `org.checkerframework:checker` to the list of banned dependencies ### Are these changes tested? CI only ### Are there any user-facing changes? No * GitHub Issue: #41789 Authored-by: Laurent Goujon Signed-off-by: David Li --- java/adapter/avro/pom.xml | 2 +- java/adapter/jdbc/pom.xml | 2 +- java/adapter/orc/pom.xml | 2 +- java/algorithm/pom.xml | 2 +- java/c/pom.xml | 2 +- java/compression/pom.xml | 2 +- java/dataset/pom.xml | 2 +- java/flight/flight-core/pom.xml | 2 +- java/flight/flight-sql/pom.xml | 2 +- java/gandiva/pom.xml | 2 +- java/memory/memory-core/pom.xml | 2 +- .../memory/memory-core/src/main/java/module-info.java | 2 -- java/memory/memory-netty/pom.xml | 2 +- java/memory/memory-unsafe/pom.xml | 2 +- java/pom.xml | 11 +++++++---- java/tools/pom.xml | 2 +- java/vector/pom.xml | 2 +- 17 files changed, 22 insertions(+), 21 deletions(-) diff --git a/java/adapter/avro/pom.xml b/java/adapter/avro/pom.xml index 9ddc150253874..0046fcac62a22 100644 --- a/java/adapter/avro/pom.xml +++ b/java/adapter/avro/pom.xml @@ -47,7 +47,7 @@ org.immutables - value + value-annotations diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml index 5f72729bb76e7..17681538ac97e 100644 --- a/java/adapter/jdbc/pom.xml +++ b/java/adapter/jdbc/pom.xml @@ -48,7 +48,7 @@ org.immutables - value + value-annotations diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml index f6aadca6de4d3..ca817510bf3e3 100644 --- a/java/adapter/orc/pom.xml +++ b/java/adapter/orc/pom.xml @@ -46,7 +46,7 @@ org.immutables - value + value-annotations org.apache.orc diff --git a/java/algorithm/pom.xml b/java/algorithm/pom.xml index 92fa5c8553505..0854da48b718a 100644 --- a/java/algorithm/pom.xml +++ b/java/algorithm/pom.xml @@ -44,7 +44,7 @@ org.immutables - value + value-annotations diff --git a/java/c/pom.xml b/java/c/pom.xml index 1095e99bbdd3f..bfb233315a839 100644 --- a/java/c/pom.xml +++ b/java/c/pom.xml @@ -50,7 +50,7 @@ org.immutables - value + value-annotations org.apache.arrow diff --git a/java/compression/pom.xml b/java/compression/pom.xml index ff1c7e1690440..26467dbaf2db3 100644 --- a/java/compression/pom.xml +++ b/java/compression/pom.xml @@ -37,7 +37,7 @@ org.immutables - value + value-annotations org.apache.commons diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml index dd0c76523d0f8..3dea16204a4db 100644 --- a/java/dataset/pom.xml +++ b/java/dataset/pom.xml @@ -46,7 +46,7 @@ org.immutables - value + value-annotations org.apache.arrow diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml index a7ea80d68fffe..b565572b383ab 100644 --- a/java/flight/flight-core/pom.xml +++ b/java/flight/flight-core/pom.xml @@ -119,7 +119,7 @@ org.immutables - value + value-annotations diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml index d5366ae988d57..e6d703c673ad5 100644 --- a/java/flight/flight-sql/pom.xml +++ b/java/flight/flight-sql/pom.xml @@ -52,7 +52,7 @@ org.immutables - value + value-annotations org.apache.arrow diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml index 00acb89f1d7cf..26a28d55d238e 100644 --- a/java/gandiva/pom.xml +++ b/java/gandiva/pom.xml @@ -36,7 +36,7 @@ org.immutables - value + value-annotations org.apache.arrow diff --git a/java/memory/memory-core/pom.xml b/java/memory/memory-core/pom.xml index ca5bc603bd4dc..1e29ccf8ab9db 100644 --- a/java/memory/memory-core/pom.xml +++ b/java/memory/memory-core/pom.xml @@ -33,7 +33,7 @@ org.immutables - value + value-annotations org.checkerframework diff --git a/java/memory/memory-core/src/main/java/module-info.java b/java/memory/memory-core/src/main/java/module-info.java index 5024b7f45769c..d8c22dd993dd9 100644 --- a/java/memory/memory-core/src/main/java/module-info.java +++ b/java/memory/memory-core/src/main/java/module-info.java @@ -23,7 +23,5 @@ exports org.apache.arrow.util; requires transitive jdk.unsupported; requires jsr305; - requires org.immutables.value; requires org.slf4j; - requires org.checkerframework.checker.qual; } diff --git a/java/memory/memory-netty/pom.xml b/java/memory/memory-netty/pom.xml index bb4ccd6c26f2a..d815276b09e50 100644 --- a/java/memory/memory-netty/pom.xml +++ b/java/memory/memory-netty/pom.xml @@ -53,7 +53,7 @@ org.immutables - value + value-annotations diff --git a/java/memory/memory-unsafe/pom.xml b/java/memory/memory-unsafe/pom.xml index 502ac2cc8051a..f1aa8fde1faa1 100644 --- a/java/memory/memory-unsafe/pom.xml +++ b/java/memory/memory-unsafe/pom.xml @@ -28,7 +28,7 @@ org.immutables - value + value-annotations diff --git a/java/pom.xml b/java/pom.xml index 16564ae828b0f..9956c3b2e9f5f 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -115,6 +115,7 @@ org.checkerframework checker-qual ${checker.framework.version} + provided com.google.flatbuffers @@ -149,8 +150,8 @@ org.immutables - value - 2.10.0 + value-annotations + 2.10.1 provided @@ -309,7 +310,7 @@ org.immutables value - 2.10.0 + 2.10.1 @@ -669,6 +670,9 @@ org.mortbay.jetty:servlet-api org.mortbay.jetty:servlet-api-2.5 log4j:log4j + + org.immutables:value + org.checkerframework:checker @@ -777,7 +781,6 @@ javax.annotation:javax.annotation-api:* org.apache.hadoop:hadoop-client-api - org.checkerframework:checker-qual diff --git a/java/tools/pom.xml b/java/tools/pom.xml index b1507cd301f31..5d9db75e525bd 100644 --- a/java/tools/pom.xml +++ b/java/tools/pom.xml @@ -36,7 +36,7 @@ org.immutables - value + value-annotations com.google.guava diff --git a/java/vector/pom.xml b/java/vector/pom.xml index a315bbc03afb6..c39504df2b207 100644 --- a/java/vector/pom.xml +++ b/java/vector/pom.xml @@ -32,7 +32,7 @@ org.immutables - value + value-annotations com.fasterxml.jackson.core From 8a76082e3a4b31ba74063093a3f279726625e245 Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Fri, 24 May 2024 09:28:19 +0530 Subject: [PATCH 162/261] GH-40930: [Java] Implement a function to retrieve reference buffers in StringView (#41796) ### Rationale for this change This PR includes a minor changes to the `getBuffers` method in `BaseVariableWidthViewVector`. ### What changes are included in this PR? Previously the fixed buffers were the only ones returned from this method because of lack of clarity in the initial implementation stage. In this PR, it includes the variadic buffers to the result. A test case has also being added. ### Are these changes tested? Yes ### Are there any user-facing changes? No * GitHub Issue: #40930 Authored-by: Vibhatha Lakmal Abeykoon Signed-off-by: David Li --- .../arrow/vector/BaseVariableWidthViewVector.java | 15 +++++++-------- .../org/apache/arrow/vector/TestVectorReset.java | 12 ++++++++++++ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java index ec700a0dc2592..620b9989943d5 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java @@ -703,13 +703,6 @@ public Field getField() { * impact the reference counts for this buffer, so it only should be used for in-context * access. Also note that this buffer changes regularly, thus * external classes shouldn't hold a reference to it (unless they change it). - *

- * Note: This method only returns validityBuffer and valueBuffer. - * But it doesn't return the data buffers. - *

- * TODO: Implement a strategy to retrieve the data buffers. - * data buffer retrieval. - * * @param clear Whether to clear vector before returning, the buffers will still be refcounted * but the returned array will be the only reference to them * @return The underlying {@link ArrowBuf buffers} that is used by this @@ -722,9 +715,15 @@ public ArrowBuf[] getBuffers(boolean clear) { if (getBufferSize() == 0) { buffers = new ArrowBuf[0]; } else { - buffers = new ArrowBuf[2]; + final int dataBufferSize = dataBuffers.size(); + // validity and view buffers + final int fixedBufferSize = 2; + buffers = new ArrowBuf[fixedBufferSize + dataBufferSize]; buffers[0] = validityBuffer; buffers[1] = viewBuffer; + for (int i = fixedBufferSize; i < fixedBufferSize + dataBufferSize; i++) { + buffers[i] = dataBuffers.get(i - fixedBufferSize); + } } if (clear) { for (final ArrowBuf buffer : buffers) { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java index 71009a3337510..19700e02161c7 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java @@ -93,6 +93,18 @@ public void testVariableTypeReset() { } } + @Test + public void testVariableViewTypeReset() { + try (final ViewVarCharVector vector = new ViewVarCharVector("ViewVarChar", allocator)) { + vector.allocateNewSafe(); + vector.set(0, "a".getBytes(StandardCharsets.UTF_8)); + vector.setLastSet(0); + vector.setValueCount(1); + resetVectorAndVerify(vector, vector.getBuffers(false)); + assertEquals(-1, vector.getLastSet()); + } + } + @Test public void testLargeVariableTypeReset() { try (final LargeVarCharVector vector = new LargeVarCharVector("LargeVarChar", allocator)) { From 4ace0a18cd0a6c3a826fe20017b193e55126fefe Mon Sep 17 00:00:00 2001 From: Laurent Goujon Date: Thu, 23 May 2024 20:59:29 -0700 Subject: [PATCH 163/261] GH-41799: [Java] Migrate to com.gradle:develocity-maven-extension (#41800) ### Rationale for this change `com.gradle:gradle-enterprise-maven-extension` has been relocated under a new name. Updating the dependency to pick up latest changes and bug fixes ### What changes are included in this PR? Migrate `com.gradle:gradle-enterprise-maven-extension` artifact to `com.gradle:develociy-maven-extension`. Apply steps described at https://docs.gradle.com/develocity/maven-extension/legacy/#develocity_migration to migrate associated files and configuration ### Are these changes tested? No (not sure if someone can validate configuration changes directly with ge.apache.org) ### Are there any user-facing changes? No * GitHub Issue: #41799 Authored-by: Laurent Goujon Signed-off-by: David Li --- .gitignore | 2 +- .mvn/{gradle-enterprise.xml => develocity.xml} | 10 +++++----- .mvn/extensions.xml | 6 +++--- java/maven/module-info-compiler-maven-plugin/pom.xml | 6 +++--- java/pom.xml | 6 +++--- 5 files changed, 15 insertions(+), 15 deletions(-) rename .mvn/{gradle-enterprise.xml => develocity.xml} (78%) diff --git a/.gitignore b/.gitignore index c7f5aa90e18e6..e2e84fee57e3c 100644 --- a/.gitignore +++ b/.gitignore @@ -102,4 +102,4 @@ __debug_bin .envrc # Develocity -.mvn/.gradle-enterprise/ +.mvn/.develocity.xml diff --git a/.mvn/gradle-enterprise.xml b/.mvn/develocity.xml similarity index 78% rename from .mvn/gradle-enterprise.xml rename to .mvn/develocity.xml index bae5a3f147e68..df3cbccd2b6cb 100644 --- a/.mvn/gradle-enterprise.xml +++ b/.mvn/develocity.xml @@ -1,4 +1,4 @@ - + - + https://ge.apache.org false - true + true true true #{isFalse(env['CI'])} - ALWAYS + true true #{{'0.0.0.0'}} @@ -42,4 +42,4 @@ false - + diff --git a/.mvn/extensions.xml b/.mvn/extensions.xml index b446c647e47e6..b56ab0fd7772a 100644 --- a/.mvn/extensions.xml +++ b/.mvn/extensions.xml @@ -22,12 +22,12 @@ com.gradle - gradle-enterprise-maven-extension - 1.20 + develocity-maven-extension + 1.21.4 com.gradle common-custom-user-data-maven-extension - 1.12.5 + 2.0 diff --git a/java/maven/module-info-compiler-maven-plugin/pom.xml b/java/maven/module-info-compiler-maven-plugin/pom.xml index 6589020d6ecb5..9c1e8fe058110 100644 --- a/java/maven/module-info-compiler-maven-plugin/pom.xml +++ b/java/maven/module-info-compiler-maven-plugin/pom.xml @@ -99,9 +99,9 @@ com.gradle - gradle-enterprise-maven-extension + develocity-maven-extension - + @@ -109,7 +109,7 @@ - + diff --git a/java/pom.xml b/java/pom.xml index 9956c3b2e9f5f..925ec585152bc 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -477,9 +477,9 @@ com.gradle - gradle-enterprise-maven-extension + develocity-maven-extension - + @@ -504,7 +504,7 @@ - + From b40d368019bb93928371c9f806599dabfaa0242d Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Fri, 24 May 2024 09:31:51 +0530 Subject: [PATCH 164/261] GH-41686: [Java] Nullability of struct child vectors not preserved in TransferPair (#41785) ### Rationale for this change Nullability of the struct child vectors are not preserved in the StructWriter templates. ### What changes are included in this PR? - [X] Adding nullability and test cases. - [X] Updating to JUNIT5 ### Are these changes tested? New test case added and the change is being validated by existing tests. ### Are there any user-facing changes? No * GitHub Issue: #41686 Authored-by: Vibhatha Lakmal Abeykoon Signed-off-by: David Li --- .../main/codegen/templates/StructWriters.java | 1 + .../apache/arrow/vector/TestMapVector.java | 58 +++++++++++++------ 2 files changed, 40 insertions(+), 19 deletions(-) diff --git a/java/vector/src/main/codegen/templates/StructWriters.java b/java/vector/src/main/codegen/templates/StructWriters.java index b6dd2b75c526a..b676173ac39d9 100644 --- a/java/vector/src/main/codegen/templates/StructWriters.java +++ b/java/vector/src/main/codegen/templates/StructWriters.java @@ -61,6 +61,7 @@ public class ${mode}StructWriter extends AbstractFieldWriter { this.initialCapacity = 0; for (Field child : container.getField().getChildren()) { MinorType minorType = Types.getMinorTypeForArrowType(child.getType()); + addVectorAsNullable = child.isNullable(); switch (minorType) { case STRUCT: struct(child.getName()); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java index 43f4c3b536fdc..3ffbcc29c9e59 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java @@ -17,13 +17,15 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; +import java.util.Collections; +import java.util.List; import java.util.Map; import org.apache.arrow.memory.ArrowBuf; @@ -37,23 +39,24 @@ import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.JsonStringArrayList; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestMapVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -90,7 +93,7 @@ public void testBasicOperation() { mapReader.setPosition(i); for (int j = 0; j < i + 1; j++) { mapReader.next(); - assertEquals("record: " + i, j, mapReader.key().readLong().longValue()); + assertEquals(j, mapReader.key().readLong().longValue(), "record: " + i); assertEquals(j, mapReader.value().readInteger().intValue()); } } @@ -136,7 +139,7 @@ public void testBasicOperationNulls() { } else { for (int j = 0; j < i + 1; j++) { mapReader.next(); - assertEquals("record: " + i, j, mapReader.key().readLong().longValue()); + assertEquals(j, mapReader.key().readLong().longValue(), "record: " + i); if (i == 5) { assertFalse(mapReader.value().isSet()); } else { @@ -194,11 +197,11 @@ public void testCopyFrom() throws Exception { // assert the output vector is correct FieldReader reader = outVector.getReader(); - assertTrue("shouldn't be null", reader.isSet()); + assertTrue(reader.isSet(), "shouldn't be null"); reader.setPosition(1); - assertFalse("should be null", reader.isSet()); + assertFalse(reader.isSet(), "should be null"); reader.setPosition(2); - assertTrue("shouldn't be null", reader.isSet()); + assertTrue(reader.isSet(), "shouldn't be null"); /* index 0 */ @@ -460,15 +463,15 @@ public void testSplitAndTransfer() throws Exception { dataLength2 = toOffsetBuffer.getInt((i + 1) * MapVector.OFFSET_WIDTH) - toOffsetBuffer.getInt(i * MapVector.OFFSET_WIDTH); - assertEquals("Different data lengths at index: " + i + " and start: " + start, - dataLength1, dataLength2); + assertEquals(dataLength1, dataLength2, + "Different data lengths at index: " + i + " and start: " + start); offset1 = offsetBuffer.getInt((start + i) * MapVector.OFFSET_WIDTH); offset2 = toOffsetBuffer.getInt(i * MapVector.OFFSET_WIDTH); for (int j = 0; j < dataLength1; j++) { - assertEquals("Different data at indexes: " + offset1 + " and " + offset2, - dataVector.getObject(offset1), dataVector1.getObject(offset2)); + assertEquals(dataVector.getObject(offset1), dataVector1.getObject(offset2), + "Different data at indexes: " + offset1 + " and " + offset2); offset1++; offset2++; @@ -1178,4 +1181,21 @@ public void testGetTransferPairWithFieldAndCallBack() { toVector.clear(); } } + + @Test + public void testMakeTransferPairPreserveNullability() { + Field intField = new Field("int", FieldType.notNullable(MinorType.INT.getType()), null); + List fields = Collections.singletonList(intField); + Field structField = new Field("struct", FieldType.notNullable(ArrowType.Struct.INSTANCE), fields); + Field structField2 = new Field("struct", FieldType.notNullable(ArrowType.Struct.INSTANCE), fields); + FieldVector vec = structField.createVector(allocator); + + TransferPair tp = vec.getTransferPair(structField2, allocator); + tp.transfer(); + + FieldVector res = (FieldVector) tp.getTo(); + + assertEquals(intField, vec.getField().getChildren().get(0)); + assertEquals(intField, res.getField().getChildren().get(0)); + } } From 0c96be359084e727b82482d69f58aa79199d703d Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Fri, 24 May 2024 09:32:32 +0530 Subject: [PATCH 165/261] GH-41730: [Java] Adding variadicBufferCounts to RecordBatch (#41732) ### Rationale for this change This PR adds the `variadicBufferCounts` attribute to `ArrowRecordBatch` in Java module. Furthermore, it also updates the `TypeLayout` functions `getTypeBufferCount` and `getTypeLayout` functions along with the corresponding test cases. Previously these changes were listed as issues https://github.com/apache/arrow/issues/40934, https://github.com/apache/arrow/issues/40935 and https://github.com/apache/arrow/issues/40931. These two tickets will also be closed by this PR. ### What changes are included in this PR? The introduced two functions to `TypeLayout` is deprecating the old API and adds a new API. In this PR we are updating a few modules to use the new API. Corresponding tests for the changed functions have also been added. This also updates the usage of `ArrowRecordBatch` across other modules and `TypeLayout` usage across a few modules. Some modules were excluded as mentioned in the issues non-goals section to be completed in a follow up effort as the scope and required tasks remain at large. These modules will still use the deprecated API for TypeLayouts, but documented in the code for updating to the new API in a follow up effort. ### Closing Subtasks - [X] https://github.com/apache/arrow/issues/40934 - [X] https://github.com/apache/arrow/issues/40935 - [X] https://github.com/apache/arrow/issues/40931 ### Are these changes tested? The changes are tested using existing tests and new tests ### Are there any user-facing changes? Yes **This PR includes breaking changes to public APIs.** * GitHub Issue: #41730 Lead-authored-by: Vibhatha Lakmal Abeykoon Co-authored-by: Vibhatha Abeykoon Signed-off-by: David Li --- .../apache/arrow/c/StructVectorLoader.java | 17 ++- .../apache/arrow/c/StructVectorUnloader.java | 22 +++- .../vector/BaseVariableWidthViewVector.java | 19 ++- .../org/apache/arrow/vector/TypeLayout.java | 22 ++-- .../org/apache/arrow/vector/VectorLoader.java | 19 ++- .../apache/arrow/vector/VectorUnloader.java | 23 +++- .../arrow/vector/ViewVarCharVector.java | 2 +- .../arrow/vector/ipc/JsonFileReader.java | 1 + .../arrow/vector/ipc/JsonFileWriter.java | 1 + .../vector/ipc/message/ArrowRecordBatch.java | 89 ++++++++++++- .../validate/ValidateVectorBufferVisitor.java | 1 + .../apache/arrow/vector/TestTypeLayout.java | 122 ++++++++++++++---- .../arrow/vector/TestVarCharViewVector.java | 66 ++++++++++ 13 files changed, 343 insertions(+), 61 deletions(-) diff --git a/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java b/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java index d9afd0189d807..27acf84d30157 100644 --- a/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java +++ b/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java @@ -90,8 +90,12 @@ public StructVector load(BufferAllocator allocator, ArrowRecordBatch recordBatch .fromCompressionType(recordBatch.getBodyCompression().getCodec()); decompressionNeeded = codecType != CompressionUtil.CodecType.NO_COMPRESSION; CompressionCodec codec = decompressionNeeded ? factory.createCodec(codecType) : NoCompressionCodec.INSTANCE; + Iterator variadicBufferCounts = null; + if (recordBatch.getVariadicBufferCounts() != null && !recordBatch.getVariadicBufferCounts().isEmpty()) { + variadicBufferCounts = recordBatch.getVariadicBufferCounts().iterator(); + } for (FieldVector fieldVector : result.getChildrenFromFields()) { - loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes, codec); + loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes, codec, variadicBufferCounts); } result.loadFieldBuffers(new ArrowFieldNode(recordBatch.getLength(), 0), Collections.singletonList(null)); if (nodes.hasNext() || buffers.hasNext()) { @@ -102,10 +106,15 @@ public StructVector load(BufferAllocator allocator, ArrowRecordBatch recordBatch } private void loadBuffers(FieldVector vector, Field field, Iterator buffers, Iterator nodes, - CompressionCodec codec) { + CompressionCodec codec, Iterator variadicBufferCounts) { checkArgument(nodes.hasNext(), "no more field nodes for field %s and vector %s", field, vector); ArrowFieldNode fieldNode = nodes.next(); - int bufferLayoutCount = TypeLayout.getTypeBufferCount(field.getType()); + // variadicBufferLayoutCount will be 0 for vectors of type except BaseVariableWidthViewVector + long variadicBufferLayoutCount = 0; + if (variadicBufferCounts != null) { + variadicBufferLayoutCount = variadicBufferCounts.next(); + } + int bufferLayoutCount = (int) (variadicBufferLayoutCount + TypeLayout.getTypeBufferCount(field.getType())); List ownBuffers = new ArrayList<>(bufferLayoutCount); for (int j = 0; j < bufferLayoutCount; j++) { ArrowBuf nextBuf = buffers.next(); @@ -138,7 +147,7 @@ private void loadBuffers(FieldVector vector, Field field, Iterator buf for (int i = 0; i < childrenFromFields.size(); i++) { Field child = children.get(i); FieldVector fieldVector = childrenFromFields.get(i); - loadBuffers(fieldVector, child, buffers, nodes, codec); + loadBuffers(fieldVector, child, buffers, nodes, codec, variadicBufferCounts); } } } diff --git a/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java b/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java index aa6d9b4d0f6a7..8d015157ebf38 100644 --- a/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java +++ b/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java @@ -21,6 +21,7 @@ import java.util.List; import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.vector.BaseVariableWidthViewVector; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.TypeLayout; import org.apache.arrow.vector.complex.StructVector; @@ -87,17 +88,28 @@ public StructVectorUnloader(StructVector root, boolean includeNullCount, Compres public ArrowRecordBatch getRecordBatch() { List nodes = new ArrayList<>(); List buffers = new ArrayList<>(); + List variadicBufferCounts = new ArrayList<>(); for (FieldVector vector : root.getChildrenFromFields()) { - appendNodes(vector, nodes, buffers); + appendNodes(vector, nodes, buffers, variadicBufferCounts); } return new ArrowRecordBatch(root.getValueCount(), nodes, buffers, CompressionUtil.createBodyCompression(codec), - alignBuffers); + variadicBufferCounts, alignBuffers); } - private void appendNodes(FieldVector vector, List nodes, List buffers) { + private long getVariadicBufferCount(FieldVector vector) { + if (vector instanceof BaseVariableWidthViewVector) { + return ((BaseVariableWidthViewVector) vector).getDataBuffers().size(); + } + return 0L; + } + + private void appendNodes(FieldVector vector, List nodes, List buffers, + List variadicBufferCounts) { nodes.add(new ArrowFieldNode(vector.getValueCount(), includeNullCount ? vector.getNullCount() : -1)); List fieldBuffers = vector.getFieldBuffers(); - int expectedBufferCount = TypeLayout.getTypeBufferCount(vector.getField().getType()); + long variadicBufferCount = getVariadicBufferCount(vector); + int expectedBufferCount = (int) (TypeLayout.getTypeBufferCount(vector.getField().getType()) + variadicBufferCount); + variadicBufferCounts.add(variadicBufferCount); if (fieldBuffers.size() != expectedBufferCount) { throw new IllegalArgumentException(String.format("wrong number of buffers for field %s in vector %s. found: %s", vector.getField(), vector.getClass().getSimpleName(), fieldBuffers)); @@ -106,7 +118,7 @@ private void appendNodes(FieldVector vector, List nodes, List getDataBuffers() { return dataBuffers; @@ -368,8 +368,21 @@ public List getChildrenFromFields() { */ @Override public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { - // TODO: https://github.com/apache/arrow/issues/40931 - throw new UnsupportedOperationException("loadFieldBuffers is not supported for BaseVariableWidthViewVector"); + ArrowBuf bitBuf = ownBuffers.get(0); + ArrowBuf viewBuf = ownBuffers.get(1); + List dataBufs = ownBuffers.subList(2, ownBuffers.size()); + + this.clear(); + + this.viewBuffer = viewBuf.getReferenceManager().retain(viewBuf, allocator); + this.validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuf, allocator); + + for (ArrowBuf dataBuf : dataBufs) { + this.dataBuffers.add(dataBuf.getReferenceManager().retain(dataBuf, allocator)); + } + + lastSet = fieldNode.getLength() - 1; + valueCount = fieldNode.getLength(); } /** diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java index ea92efdc55f61..0d01d77632bde 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java @@ -28,6 +28,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeVisitor; import org.apache.arrow.vector.types.pojo.ArrowType.Binary; +import org.apache.arrow.vector.types.pojo.ArrowType.BinaryView; import org.apache.arrow.vector.types.pojo.ArrowType.Bool; import org.apache.arrow.vector.types.pojo.ArrowType.Date; import org.apache.arrow.vector.types.pojo.ArrowType.Decimal; @@ -186,8 +187,7 @@ public TypeLayout visit(Binary type) { @Override public TypeLayout visit(ArrowType.BinaryView type) { - // TODO: https://github.com/apache/arrow/issues/40934 - throw new UnsupportedOperationException("BinaryView not supported"); + return newVariableWidthViewTypeLayout(); } @Override @@ -197,8 +197,7 @@ public TypeLayout visit(Utf8 type) { @Override public TypeLayout visit(Utf8View type) { - // TODO: https://github.com/apache/arrow/issues/40934 - throw new UnsupportedOperationException("Utf8View not supported"); + return newVariableWidthViewTypeLayout(); } @Override @@ -216,7 +215,12 @@ private TypeLayout newVariableWidthTypeLayout() { BufferLayout.byteVector()); } + private TypeLayout newVariableWidthViewTypeLayout() { + return newPrimitiveTypeLayout(BufferLayout.validityVector(), BufferLayout.byteVector()); + } + private TypeLayout newLargeVariableWidthTypeLayout() { + // NOTE: only considers the non variadic buffers return newPrimitiveTypeLayout(BufferLayout.validityVector(), BufferLayout.largeOffsetBuffer(), BufferLayout.byteVector()); } @@ -377,9 +381,9 @@ public Integer visit(Binary type) { } @Override - public Integer visit(ArrowType.BinaryView type) { - // TODO: https://github.com/apache/arrow/issues/40935 - return VARIABLE_WIDTH_BUFFER_COUNT; + public Integer visit(BinaryView type) { + // NOTE: only consider the validity and view buffers + return 2; } @Override @@ -389,8 +393,8 @@ public Integer visit(Utf8 type) { @Override public Integer visit(Utf8View type) { - // TODO: https://github.com/apache/arrow/issues/40935 - return VARIABLE_WIDTH_BUFFER_COUNT; + // NOTE: only consider the validity and view buffers + return 2; } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java index 510cef24c7e16..9590e70f46770 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java @@ -80,8 +80,13 @@ public void load(ArrowRecordBatch recordBatch) { CompressionUtil.CodecType.fromCompressionType(recordBatch.getBodyCompression().getCodec()); decompressionNeeded = codecType != CompressionUtil.CodecType.NO_COMPRESSION; CompressionCodec codec = decompressionNeeded ? factory.createCodec(codecType) : NoCompressionCodec.INSTANCE; + Iterator variadicBufferCounts = null; + if (recordBatch.getVariadicBufferCounts() != null && !recordBatch.getVariadicBufferCounts().isEmpty()) { + variadicBufferCounts = recordBatch.getVariadicBufferCounts().iterator(); + } + for (FieldVector fieldVector : root.getFieldVectors()) { - loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes, codec); + loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes, codec, variadicBufferCounts); } root.setRowCount(recordBatch.getLength()); if (nodes.hasNext() || buffers.hasNext()) { @@ -95,10 +100,16 @@ private void loadBuffers( Field field, Iterator buffers, Iterator nodes, - CompressionCodec codec) { + CompressionCodec codec, + Iterator variadicBufferCounts) { checkArgument(nodes.hasNext(), "no more field nodes for field %s and vector %s", field, vector); ArrowFieldNode fieldNode = nodes.next(); - int bufferLayoutCount = TypeLayout.getTypeBufferCount(field.getType()); + // variadicBufferLayoutCount will be 0 for vectors of type except BaseVariableWidthViewVector + long variadicBufferLayoutCount = 0; + if (variadicBufferCounts != null) { + variadicBufferLayoutCount = variadicBufferCounts.next(); + } + int bufferLayoutCount = (int) (variadicBufferLayoutCount + TypeLayout.getTypeBufferCount(field.getType())); List ownBuffers = new ArrayList<>(bufferLayoutCount); for (int j = 0; j < bufferLayoutCount; j++) { ArrowBuf nextBuf = buffers.next(); @@ -130,7 +141,7 @@ private void loadBuffers( for (int i = 0; i < childrenFromFields.size(); i++) { Field child = children.get(i); FieldVector fieldVector = childrenFromFields.get(i); - loadBuffers(fieldVector, child, buffers, nodes, codec); + loadBuffers(fieldVector, child, buffers, nodes, codec, variadicBufferCounts); } } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java index 1d44e37ac71af..8528099b6d619 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java @@ -80,19 +80,30 @@ public VectorUnloader( public ArrowRecordBatch getRecordBatch() { List nodes = new ArrayList<>(); List buffers = new ArrayList<>(); + List variadicBufferCounts = new ArrayList<>(); for (FieldVector vector : root.getFieldVectors()) { - appendNodes(vector, nodes, buffers); + appendNodes(vector, nodes, buffers, variadicBufferCounts); } // Do NOT retain buffers in ArrowRecordBatch constructor since we have already retained them. return new ArrowRecordBatch( - root.getRowCount(), nodes, buffers, CompressionUtil.createBodyCompression(codec), alignBuffers, - /*retainBuffers*/ false); + root.getRowCount(), nodes, buffers, CompressionUtil.createBodyCompression(codec), + variadicBufferCounts, alignBuffers, /*retainBuffers*/ false); } - private void appendNodes(FieldVector vector, List nodes, List buffers) { + private long getVariadicBufferCount(FieldVector vector) { + if (vector instanceof BaseVariableWidthViewVector) { + return ((BaseVariableWidthViewVector) vector).getDataBuffers().size(); + } + return 0L; + } + + private void appendNodes(FieldVector vector, List nodes, List buffers, + List variadicBufferCounts) { nodes.add(new ArrowFieldNode(vector.getValueCount(), includeNullCount ? vector.getNullCount() : -1)); List fieldBuffers = vector.getFieldBuffers(); - int expectedBufferCount = TypeLayout.getTypeBufferCount(vector.getField().getType()); + long variadicBufferCount = getVariadicBufferCount(vector); + int expectedBufferCount = (int) (TypeLayout.getTypeBufferCount(vector.getField().getType()) + variadicBufferCount); + variadicBufferCounts.add(variadicBufferCount); if (fieldBuffers.size() != expectedBufferCount) { throw new IllegalArgumentException(String.format( "wrong number of buffers for field %s in vector %s. found: %s", @@ -107,7 +118,7 @@ private void appendNodes(FieldVector vector, List nodes, List vectorTypes = typeLayout.getBufferTypes(); ArrowBuf[] vectorBuffers = new ArrowBuf[vectorTypes.size()]; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java index f5e267e81256c..670881b238ecb 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java @@ -208,6 +208,7 @@ private void writeBatch(VectorSchemaRoot recordBatch) throws IOException { } private void writeFromVectorIntoJson(Field field, FieldVector vector) throws IOException { + // TODO: https://github.com/apache/arrow/issues/41733 List vectorTypes = TypeLayout.getTypeLayout(field.getType()).getBufferTypes(); List vectorBuffers = vector.getFieldBuffers(); if (vectorTypes.size() != vectorBuffers.size()) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java index f81d049a9257f..b910cfc6ecc25 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java @@ -56,17 +56,19 @@ public class ArrowRecordBatch implements ArrowMessage { private final List buffersLayout; + private final List variadicBufferCounts; + private boolean closed = false; public ArrowRecordBatch( int length, List nodes, List buffers) { - this(length, nodes, buffers, NoCompressionCodec.DEFAULT_BODY_COMPRESSION, true); + this(length, nodes, buffers, NoCompressionCodec.DEFAULT_BODY_COMPRESSION, null, true); } public ArrowRecordBatch( int length, List nodes, List buffers, ArrowBodyCompression bodyCompression) { - this(length, nodes, buffers, bodyCompression, true); + this(length, nodes, buffers, bodyCompression, null, true); } /** @@ -81,7 +83,7 @@ public ArrowRecordBatch( public ArrowRecordBatch( int length, List nodes, List buffers, ArrowBodyCompression bodyCompression, boolean alignBuffers) { - this(length, nodes, buffers, bodyCompression, alignBuffers, /*retainBuffers*/ true); + this(length, nodes, buffers, bodyCompression, null, alignBuffers, /*retainBuffers*/ true); } /** @@ -98,12 +100,48 @@ public ArrowRecordBatch( public ArrowRecordBatch( int length, List nodes, List buffers, ArrowBodyCompression bodyCompression, boolean alignBuffers, boolean retainBuffers) { + this(length, nodes, buffers, bodyCompression, null, alignBuffers, retainBuffers); + } + + /** + * Construct a record batch from nodes. + * + * @param length how many rows in this batch + * @param nodes field level info + * @param buffers will be retained until this recordBatch is closed + * @param bodyCompression compression info. + * @param variadicBufferCounts the number of buffers in each variadic section. + * @param alignBuffers Whether to align buffers to an 8 byte boundary. + */ + public ArrowRecordBatch( + int length, List nodes, List buffers, + ArrowBodyCompression bodyCompression, List variadicBufferCounts, boolean alignBuffers) { + this(length, nodes, buffers, bodyCompression, variadicBufferCounts, alignBuffers, /*retainBuffers*/ true); + } + + /** + * Construct a record batch from nodes. + * + * @param length how many rows in this batch + * @param nodes field level info + * @param buffers will be retained until this recordBatch is closed + * @param bodyCompression compression info. + * @param variadicBufferCounts the number of buffers in each variadic section. + * @param alignBuffers Whether to align buffers to an 8 byte boundary. + * @param retainBuffers Whether to retain() each source buffer in the constructor. If false, the caller is + * responsible for retaining the buffers beforehand. + */ + public ArrowRecordBatch( + int length, List nodes, List buffers, + ArrowBodyCompression bodyCompression, List variadicBufferCounts, boolean alignBuffers, + boolean retainBuffers) { super(); this.length = length; this.nodes = nodes; this.buffers = buffers; Preconditions.checkArgument(bodyCompression != null, "body compression cannot be null"); this.bodyCompression = bodyCompression; + this.variadicBufferCounts = variadicBufferCounts; List arrowBuffers = new ArrayList<>(buffers.size()); long offset = 0; for (ArrowBuf arrowBuf : buffers) { @@ -129,12 +167,14 @@ public ArrowRecordBatch( // to distinguish this from the public constructor. private ArrowRecordBatch( boolean dummy, int length, List nodes, - List buffers, ArrowBodyCompression bodyCompression) { + List buffers, ArrowBodyCompression bodyCompression, + List variadicBufferCounts) { this.length = length; this.nodes = nodes; this.buffers = buffers; Preconditions.checkArgument(bodyCompression != null, "body compression cannot be null"); this.bodyCompression = bodyCompression; + this.variadicBufferCounts = variadicBufferCounts; this.closed = false; List arrowBuffers = new ArrayList<>(); long offset = 0; @@ -179,6 +219,14 @@ public List getBuffers() { return buffers; } + /** + * Get the record batch variadic buffer counts. + * @return the variadic buffer counts + */ + public List getVariadicBufferCounts() { + return variadicBufferCounts; + } + /** * Create a new ArrowRecordBatch which has the same information as this batch but whose buffers * are owned by that Allocator. @@ -195,7 +243,7 @@ public ArrowRecordBatch cloneWithTransfer(final BufferAllocator allocator) { .writerIndex(buf.writerIndex())) .collect(Collectors.toList()); close(); - return new ArrowRecordBatch(false, length, nodes, newBufs, bodyCompression); + return new ArrowRecordBatch(false, length, nodes, newBufs, bodyCompression, variadicBufferCounts); } /** @@ -217,6 +265,24 @@ public int writeTo(FlatBufferBuilder builder) { if (bodyCompression.getCodec() != NoCompressionCodec.COMPRESSION_TYPE) { compressOffset = bodyCompression.writeTo(builder); } + + // Start the variadicBufferCounts vector. + int variadicBufferCountsOffset = 0; + if (variadicBufferCounts != null && !variadicBufferCounts.isEmpty()) { + variadicBufferCountsOffset = variadicBufferCounts.size(); + int elementSizeInBytes = 8; // Size of long in bytes + builder.startVector(elementSizeInBytes, variadicBufferCountsOffset, elementSizeInBytes); + + // Add each long to the builder. Note that elements should be added in reverse order. + for (int i = variadicBufferCounts.size() - 1; i >= 0; i--) { + long value = variadicBufferCounts.get(i); + builder.addLong(value); + } + + // End the vector. This returns an offset that you can use to refer to the vector. + variadicBufferCountsOffset = builder.endVector(); + } + RecordBatch.startRecordBatch(builder); RecordBatch.addLength(builder, length); RecordBatch.addNodes(builder, nodesOffset); @@ -224,6 +290,12 @@ public int writeTo(FlatBufferBuilder builder) { if (bodyCompression.getCodec() != NoCompressionCodec.COMPRESSION_TYPE) { RecordBatch.addCompression(builder, compressOffset); } + + // Add the variadicBufferCounts to the RecordBatch + if (variadicBufferCounts != null && !variadicBufferCounts.isEmpty()) { + RecordBatch.addVariadicBufferCounts(builder, variadicBufferCountsOffset); + } + return RecordBatch.endRecordBatch(builder); } @@ -247,8 +319,13 @@ public void close() { @Override public String toString() { + int variadicBufCount = 0; + if (variadicBufferCounts != null && !variadicBufferCounts.isEmpty()) { + variadicBufCount = variadicBufferCounts.size(); + } return "ArrowRecordBatch [length=" + length + ", nodes=" + nodes + ", #buffers=" + buffers.size() + - ", buffersLayout=" + buffersLayout + ", closed=" + closed + "]"; + ", #variadicBufferCounts=" + variadicBufCount + ", buffersLayout=" + buffersLayout + + ", closed=" + closed + "]"; } /** diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java index 0a67db0455b41..af5a67049f722 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java @@ -51,6 +51,7 @@ private void validateVectorCommon(ValueVector vector) { if (vector instanceof FieldVector) { FieldVector fieldVector = (FieldVector) vector; + // TODO: https://github.com/apache/arrow/issues/41734 int typeBufferCount = TypeLayout.getTypeBufferCount(arrowType); validateOrThrow(fieldVector.getFieldBuffers().size() == typeBufferCount, "Expected %s buffers in vector of type %s, got %s.", diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java b/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java index 97930f433d301..5a58133f2e2bd 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java @@ -17,82 +17,158 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import java.util.Random; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.types.DateUnit; import org.apache.arrow.vector.types.FloatingPointPrecision; import org.apache.arrow.vector.types.IntervalUnit; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.UnionMode; import org.apache.arrow.vector.types.pojo.ArrowType; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestTypeLayout { + private BufferAllocator allocator; + + @BeforeEach + public void prepare() { + allocator = new RootAllocator(Integer.MAX_VALUE); + } + + @AfterEach + public void shutdown() { + allocator.close(); + } + + @Test public void testTypeBufferCount() { ArrowType type = new ArrowType.Int(8, true); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Union(UnionMode.Sparse, new int[2]); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Union(UnionMode.Dense, new int[1]); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Struct(); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Timestamp(TimeUnit.MILLISECOND, null); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.List(); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.FixedSizeList(5); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Map(false); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Decimal(10, 10, 128); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Decimal(10, 10, 256); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.FixedSizeBinary(5); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Bool(); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Binary(); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Utf8(); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Null(); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Date(DateUnit.DAY); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Time(TimeUnit.MILLISECOND, 32); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Interval(IntervalUnit.DAY_TIME); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Duration(TimeUnit.MILLISECOND); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + } + + private String generateRandomString(int length) { + Random random = new Random(); + StringBuilder sb = new StringBuilder(length); + for (int i = 0; i < length; i++) { + sb.append(random.nextInt(10)); // 0-9 + } + return sb.toString(); + } + + @Test + public void testTypeBufferCountInVectorsWithVariadicBuffers() { + // empty vector + try (ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + ArrowType type = viewVarCharVector.getMinorType().getType(); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + } + // vector with long strings + try (ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + viewVarCharVector.allocateNew(32, 6); + + viewVarCharVector.setSafe(0, generateRandomString(8).getBytes()); + viewVarCharVector.setSafe(1, generateRandomString(12).getBytes()); + viewVarCharVector.setSafe(2, generateRandomString(14).getBytes()); + viewVarCharVector.setSafe(3, generateRandomString(18).getBytes()); + viewVarCharVector.setSafe(4, generateRandomString(22).getBytes()); + viewVarCharVector.setSafe(5, generateRandomString(24).getBytes()); + + viewVarCharVector.setValueCount(6); + + ArrowType type = viewVarCharVector.getMinorType().getType(); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java index efb5afac91b13..2d37b0b4eb9ad 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java @@ -31,6 +31,7 @@ import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Objects; @@ -41,8 +42,11 @@ import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.memory.util.ArrowBufPointer; import org.apache.arrow.memory.util.CommonUtil; +import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; import org.apache.arrow.vector.testing.ValueVectorDataPopulator; import org.apache.arrow.vector.types.Types; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.ReusableByteArray; import org.apache.arrow.vector.util.Text; import org.junit.jupiter.api.AfterEach; @@ -1451,6 +1455,68 @@ public void testSafeOverwriteLongFromALongerLongString() { } } + @Test + public void testVectorLoadUnload() { + + try (final ViewVarCharVector vector1 = new ViewVarCharVector("myvector", allocator)) { + + setVector(vector1, STR1, STR2, STR3, STR4, STR5, STR6); + + assertEquals(5, vector1.getLastSet()); + vector1.setValueCount(15); + assertEquals(14, vector1.getLastSet()); + + /* Check the vector output */ + assertArrayEquals(STR1, vector1.get(0)); + assertArrayEquals(STR2, vector1.get(1)); + assertArrayEquals(STR3, vector1.get(2)); + assertArrayEquals(STR4, vector1.get(3)); + assertArrayEquals(STR5, vector1.get(4)); + assertArrayEquals(STR6, vector1.get(5)); + + Field field = vector1.getField(); + String fieldName = field.getName(); + + List fields = new ArrayList<>(); + List fieldVectors = new ArrayList<>(); + + fields.add(field); + fieldVectors.add(vector1); + + Schema schema = new Schema(fields); + + VectorSchemaRoot schemaRoot1 = new VectorSchemaRoot(schema, fieldVectors, vector1.getValueCount()); + VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1); + + try ( + ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); + BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("new vector", 0, Long.MAX_VALUE); + VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, finalVectorsAllocator); + ) { + + VectorLoader vectorLoader = new VectorLoader(schemaRoot2); + vectorLoader.load(recordBatch); + + ViewVarCharVector vector2 = (ViewVarCharVector) schemaRoot2.getVector(fieldName); + /* + * lastSet would have internally been set by VectorLoader.load() when it invokes + * loadFieldBuffers. + */ + assertEquals(14, vector2.getLastSet()); + vector2.setValueCount(25); + assertEquals(24, vector2.getLastSet()); + + /* Check the vector output */ + assertArrayEquals(STR1, vector2.get(0)); + assertArrayEquals(STR2, vector2.get(1)); + assertArrayEquals(STR3, vector2.get(2)); + assertArrayEquals(STR4, vector2.get(3)); + assertArrayEquals(STR5, vector2.get(4)); + assertArrayEquals(STR6, vector2.get(5)); + } + } + } + private String generateRandomString(int length) { Random random = new Random(); StringBuilder sb = new StringBuilder(length); From 19044ee609d8618aeea086c3a199ae5a002e92d7 Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Fri, 24 May 2024 17:53:27 +1200 Subject: [PATCH 166/261] MINOR: [CI] Only try to read vcpkg version from .env if it isn't specified (#41805) ### Rationale for this change See https://github.com/apache/arrow/pull/41599#discussion_r1612517256 This is a small tidy up of `install_vcpkg.sh` based on code review in #41599 after it was merged. Some uses of `install_vcpkg.sh` are in Docker containers where the `.env` file hasn't been copied. Rather than try to read it and ignore any errors, only read the `.env` file if the vcpkg version wasn't specified as an argument to the script. This way if there is an error reading the `.env` file and we do need the default version, the error should be more helpful. ### What changes are included in this PR? Update `install_vcpkg.sh` to only try to read the vcpkg version from `.env` if it isn't specified as an argument and don't ignore any errors. ### Are these changes tested? Yes, this script already runs as part of CI. ### Are there any user-facing changes? No Authored-by: Adam Reeve Signed-off-by: Sutou Kouhei --- ci/scripts/install_vcpkg.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/ci/scripts/install_vcpkg.sh b/ci/scripts/install_vcpkg.sh index 861aeeea8bd0f..08989d6444827 100755 --- a/ci/scripts/install_vcpkg.sh +++ b/ci/scripts/install_vcpkg.sh @@ -25,13 +25,16 @@ if [ "$#" -lt 1 ]; then fi arrow_dir=$(cd -- "$(dirname -- "$0")/../.." && pwd -P) -default_vcpkg_version=$(source "${arrow_dir}/.env" && echo "$VCPKG" || echo "") default_vcpkg_ports_patch="${arrow_dir}/ci/vcpkg/ports.patch" vcpkg_destination=$1 -vcpkg_version=${2:-$default_vcpkg_version} +vcpkg_version=${2:-} vcpkg_ports_patch=${3:-$default_vcpkg_ports_patch} +if [ -z "${vcpkg_version}" ]; then + vcpkg_version=$(source "${arrow_dir}/.env" && echo "$VCPKG") +fi + # reduce the fetched data using a shallow clone git clone --shallow-since=2021-04-01 https://github.com/microsoft/vcpkg ${vcpkg_destination} From b2754832741d1566900e102e0add330a1d620f27 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 24 May 2024 09:11:09 -0300 Subject: [PATCH 167/261] GH-41741: [C++] Check that extension metadata key is present before attempting to delete it (#41763) ### Rationale for this change Neither Schema.fbs nor the Arrow C Data interface nor the columnar specification indicates that the ARROW:extension:metadata key must be present; however, the `ImportType()` implementation assumes that both `ARROW:extension:name` and `ARROW:extension:metadata` are both present and throws an exception if `ARROW:extension:metadata` is missing. This causes pyarrow to crash (see issue for reproducer). ### What changes are included in this PR? This PR checks that the extension metadata is present before attempting to delete it. ### Are these changes tested? Yes (test added). ### Are there any user-facing changes? No. * GitHub Issue: #41741 Authored-by: Dewey Dunnington Signed-off-by: Dewey Dunnington --- cpp/src/arrow/c/bridge.cc | 10 ++++++++-- cpp/src/arrow/c/bridge_test.cc | 17 +++++++++++++++++ cpp/src/arrow/testing/extension_type.h | 19 +++++++++++++++++++ 3 files changed, 44 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc index 3e2e04ba0b6ec..afb664c3bc258 100644 --- a/cpp/src/arrow/c/bridge.cc +++ b/cpp/src/arrow/c/bridge.cc @@ -1059,8 +1059,14 @@ struct SchemaImporter { ARROW_ASSIGN_OR_RAISE( type_, registered_ext_type->Deserialize(std::move(type_), metadata_.extension_serialized)); - RETURN_NOT_OK(metadata_.metadata->DeleteMany( - {metadata_.extension_name_index, metadata_.extension_serialized_index})); + // If metadata is present, delete both metadata keys (otherwise, just remove + // the extension name key) + if (metadata_.extension_serialized_index >= 0) { + RETURN_NOT_OK(metadata_.metadata->DeleteMany( + {metadata_.extension_name_index, metadata_.extension_serialized_index})); + } else { + RETURN_NOT_OK(metadata_.metadata->Delete(metadata_.extension_name_index)); + } } } diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc index 0ecfb5a957760..e3ec262422ba6 100644 --- a/cpp/src/arrow/c/bridge_test.cc +++ b/cpp/src/arrow/c/bridge_test.cc @@ -4102,6 +4102,23 @@ TEST_F(TestArrayRoundtrip, RegisteredExtension) { TestWithArrayFactory(NestedFactory(ExampleDictExtension)); } +TEST_F(TestArrayRoundtrip, RegisteredExtensionNoMetadata) { + auto ext_type = std::make_shared(); + ExtensionTypeGuard guard(ext_type); + + auto ext_metadata = + KeyValueMetadata::Make({"ARROW:extension:name"}, {ext_type->extension_name()}); + auto ext_field = field("", ext_type->storage_type(), true, std::move(ext_metadata)); + + struct ArrowSchema c_schema {}; + SchemaExportGuard schema_guard(&c_schema); + ASSERT_OK(ExportField(*ext_field, &c_schema)); + + ASSERT_OK_AND_ASSIGN(auto ext_type_roundtrip, ImportType(&c_schema)); + ASSERT_EQ(ext_type_roundtrip->id(), Type::EXTENSION); + AssertTypeEqual(ext_type_roundtrip, ext_type); +} + TEST_F(TestArrayRoundtrip, UnregisteredExtension) { auto StorageExtractor = [](ArrayFactory factory) { return [factory]() -> Result> { diff --git a/cpp/src/arrow/testing/extension_type.h b/cpp/src/arrow/testing/extension_type.h index 846e3c7a16578..6515631f202ae 100644 --- a/cpp/src/arrow/testing/extension_type.h +++ b/cpp/src/arrow/testing/extension_type.h @@ -132,6 +132,25 @@ class ARROW_TESTING_EXPORT DictExtensionType : public ExtensionType { std::string Serialize() const override { return "dict-extension-serialized"; } }; +// A minimal extension type that does not error when passed blank extension information +class ARROW_TESTING_EXPORT MetadataOptionalExtensionType : public ExtensionType { + public: + MetadataOptionalExtensionType() : ExtensionType(null()) {} + std::string extension_name() const override { return "metadata.optional"; } + std::string Serialize() const override { return ""; } + std::shared_ptr MakeArray(std::shared_ptr data) const override { + return nullptr; + } + bool ExtensionEquals(const ExtensionType& other) const override { + return other.extension_name() == extension_name(); + } + Result> Deserialize( + std::shared_ptr storage_type, + const std::string& serialized_data) const override { + return std::make_shared(); + } +}; + class ARROW_TESTING_EXPORT Complex128Array : public ExtensionArray { public: using ExtensionArray::ExtensionArray; From 3a4fcff9a681eca258ef0eb2cc2ad23ddffcc207 Mon Sep 17 00:00:00 2001 From: William Ayd Date: Fri, 24 May 2024 12:07:38 -0400 Subject: [PATCH 168/261] GH-41478: [C++] Clean up more redundant move warnings (#41487) ### Rationale for this change Minor warning cleanup for downstream libraries trying to get warning-free builds ### What changes are included in this PR? Removed redundant std::move from return statements ### Are these changes tested? Builds cleanly ### Are there any user-facing changes? No * GitHub Issue: #41478 Authored-by: Will Ayd Signed-off-by: Benjamin Kietzman --- cpp/cmake_modules/SetupCxxFlags.cmake | 3 ++- cpp/src/arrow/acero/aggregate_internal.cc | 6 ++--- cpp/src/arrow/acero/backpressure_handler.h | 2 +- cpp/src/arrow/acero/hash_aggregate_test.cc | 2 +- cpp/src/arrow/acero/hash_join.cc | 2 +- cpp/src/arrow/acero/hash_join_node.cc | 4 +-- cpp/src/arrow/acero/order_by_impl.cc | 4 +-- cpp/src/arrow/acero/swiss_join.cc | 2 +- cpp/src/arrow/acero/task_util.cc | 2 +- cpp/src/arrow/acero/tpch_node.cc | 14 +++++----- cpp/src/arrow/adapters/orc/adapter.cc | 4 +-- cpp/src/arrow/adapters/orc/util.cc | 6 ++--- cpp/src/arrow/array/array_nested.cc | 2 +- cpp/src/arrow/array/builder_base.h | 6 ++--- cpp/src/arrow/array/builder_run_end.cc | 3 +-- cpp/src/arrow/array/concatenate.cc | 3 ++- cpp/src/arrow/array/util.cc | 3 ++- cpp/src/arrow/buffer.cc | 12 ++++++--- cpp/src/arrow/c/bridge_test.cc | 2 +- cpp/src/arrow/compute/exec.cc | 8 +++--- cpp/src/arrow/compute/expression.cc | 4 +-- cpp/src/arrow/compute/function_internal.h | 5 ++-- cpp/src/arrow/compute/kernel.cc | 2 +- .../arrow/compute/kernels/hash_aggregate.cc | 18 +++++++------ .../compute/kernels/scalar_arithmetic.cc | 4 +-- .../compute/kernels/scalar_string_ascii.cc | 10 +++---- cpp/src/arrow/compute/kernels/vector_hash.cc | 3 ++- cpp/src/arrow/compute/row/grouper.cc | 8 +++--- cpp/src/arrow/csv/reader.cc | 4 +-- cpp/src/arrow/dataset/dataset.cc | 2 +- cpp/src/arrow/dataset/dataset_writer.cc | 3 +-- cpp/src/arrow/dataset/file_csv.cc | 2 +- cpp/src/arrow/dataset/file_parquet.cc | 9 ++++--- cpp/src/arrow/dataset/scan_node.cc | 6 ++--- cpp/src/arrow/device.cc | 4 +-- .../engine/substrait/expression_internal.cc | 26 +++++++++---------- .../substrait/extended_expression_internal.cc | 6 ++--- .../arrow/engine/substrait/extension_set.cc | 10 +++---- .../arrow/engine/substrait/plan_internal.cc | 2 +- .../engine/substrait/relation_internal.cc | 10 +++---- cpp/src/arrow/engine/substrait/serde.cc | 2 +- .../arrow/engine/substrait/type_internal.cc | 4 +-- cpp/src/arrow/field_ref_test.cc | 2 +- cpp/src/arrow/filesystem/azurefs.cc | 4 +-- cpp/src/arrow/filesystem/localfs.cc | 2 +- cpp/src/arrow/filesystem/localfs_test.cc | 2 +- cpp/src/arrow/filesystem/s3fs.cc | 5 ++-- cpp/src/arrow/filesystem/util_internal.cc | 2 +- cpp/src/arrow/flight/client.cc | 4 +-- cpp/src/arrow/flight/sql/client.cc | 2 +- cpp/src/arrow/gpu/cuda_memory.cc | 2 +- cpp/src/arrow/integration/json_internal.cc | 4 +-- cpp/src/arrow/io/buffered.cc | 3 ++- cpp/src/arrow/io/compressed.cc | 5 ++-- cpp/src/arrow/io/compressed_test.cc | 2 +- cpp/src/arrow/io/file.cc | 6 +++-- cpp/src/arrow/io/hdfs.cc | 6 +++-- cpp/src/arrow/io/stdio.cc | 3 ++- cpp/src/arrow/ipc/message.cc | 18 ++++++------- cpp/src/arrow/ipc/metadata_internal.cc | 6 ++--- cpp/src/arrow/ipc/metadata_internal.h | 3 ++- cpp/src/arrow/ipc/reader.cc | 11 ++++---- cpp/src/arrow/ipc/writer.cc | 3 ++- cpp/src/arrow/scalar.cc | 8 +++--- cpp/src/arrow/table_builder.cc | 2 +- cpp/src/arrow/testing/builder.h | 2 +- cpp/src/arrow/type.cc | 4 +-- cpp/src/arrow/util/align_util.cc | 17 ++++++------ cpp/src/arrow/util/async_generator.h | 2 +- cpp/src/arrow/util/bit_util_benchmark.cc | 2 +- cpp/src/arrow/util/bitmap_builders.cc | 6 +++-- cpp/src/arrow/util/bitmap_reader_benchmark.cc | 2 +- cpp/src/arrow/util/compression.cc | 2 +- cpp/src/arrow/util/decimal.cc | 4 +-- cpp/src/arrow/util/future.cc | 2 +- cpp/src/arrow/util/future.h | 2 +- cpp/src/arrow/util/io_util.cc | 6 ++--- cpp/src/arrow/util/iterator.h | 4 +-- cpp/src/arrow/util/vector.h | 6 ++--- cpp/src/gandiva/function_registry.cc | 2 +- cpp/src/gandiva/llvm_generator.cc | 2 +- .../parquet/arrow/arrow_reader_writer_test.cc | 2 +- cpp/src/parquet/encoding.cc | 2 +- .../parquet/encryption/file_key_unwrapper.cc | 2 +- cpp/src/parquet/platform.cc | 2 +- cpp/src/parquet/properties.cc | 2 +- cpp/src/skyhook/cls/cls_skyhook.cc | 2 +- 87 files changed, 218 insertions(+), 198 deletions(-) diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake b/cpp/cmake_modules/SetupCxxFlags.cmake index ea357b47794ce..e2e1c4412abd0 100644 --- a/cpp/cmake_modules/SetupCxxFlags.cmake +++ b/cpp/cmake_modules/SetupCxxFlags.cmake @@ -330,8 +330,9 @@ if("${BUILD_WARNING_LEVEL}" STREQUAL "CHECKIN") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wall") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-conversion") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-sign-conversion") - set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wunused-result") set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wdate-time") + string(APPEND CXX_ONLY_FLAGS " -Wredundant-move") + set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wunused-result") elseif(CMAKE_CXX_COMPILER_ID STREQUAL "Intel" OR CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM") if(WIN32) diff --git a/cpp/src/arrow/acero/aggregate_internal.cc b/cpp/src/arrow/acero/aggregate_internal.cc index 9c4b7fe5ae98c..0c1bc3db365a6 100644 --- a/cpp/src/arrow/acero/aggregate_internal.cc +++ b/cpp/src/arrow/acero/aggregate_internal.cc @@ -102,7 +102,7 @@ Result> InitKernel(const HashAggregateKernel* kerne ARROW_ASSIGN_OR_RAISE( auto state, kernel->init(&kernel_ctx, KernelInitArgs{kernel, aggr_in_types, options})); - return std::move(state); + return state; } Result> GetKernels( @@ -129,7 +129,7 @@ Result>> InitKernels( ARROW_ASSIGN_OR_RAISE(states[i], InitKernel(kernels[i], ctx, aggregates[i], in_types[i])); } - return std::move(states); + return states; } Result ResolveKernels( @@ -242,7 +242,7 @@ Result> ExtractValues(const ExecBatch& input_batch, DCHECK(false); } } - return std::move(values); + return values; } } // namespace aggregate diff --git a/cpp/src/arrow/acero/backpressure_handler.h b/cpp/src/arrow/acero/backpressure_handler.h index 178272315d7fb..db6c3799354af 100644 --- a/cpp/src/arrow/acero/backpressure_handler.h +++ b/cpp/src/arrow/acero/backpressure_handler.h @@ -45,7 +45,7 @@ class BackpressureHandler { } BackpressureHandler backpressure_handler(input, low_threshold, high_threshold, std::move(backpressure_control)); - return std::move(backpressure_handler); + return backpressure_handler; } void Handle(size_t start_level, size_t end_level) { diff --git a/cpp/src/arrow/acero/hash_aggregate_test.cc b/cpp/src/arrow/acero/hash_aggregate_test.cc index d529f443319b9..743cb20d1960d 100644 --- a/cpp/src/arrow/acero/hash_aggregate_test.cc +++ b/cpp/src/arrow/acero/hash_aggregate_test.cc @@ -318,7 +318,7 @@ Result RunGroupBy(const BatchesWithSchema& input, { {"source", SourceNodeOptions{input.schema, input.gen(use_threads, /*slow=*/false)}}, - {"aggregate", AggregateNodeOptions{std::move(aggregates), std::move(keys), + {"aggregate", AggregateNodeOptions{aggregates, std::move(keys), std::move(segment_keys)}}, {"sink", SinkNodeOptions{&sink_gen}}, }) diff --git a/cpp/src/arrow/acero/hash_join.cc b/cpp/src/arrow/acero/hash_join.cc index 296b2c56e00f4..5aa70a23f7c9e 100644 --- a/cpp/src/arrow/acero/hash_join.cc +++ b/cpp/src/arrow/acero/hash_join.cc @@ -791,7 +791,7 @@ class HashJoinBasicImpl : public HashJoinImpl { Result> HashJoinImpl::MakeBasic() { std::unique_ptr impl{new HashJoinBasicImpl()}; - return std::move(impl); + return impl; } } // namespace acero diff --git a/cpp/src/arrow/acero/hash_join_node.cc b/cpp/src/arrow/acero/hash_join_node.cc index 06405f16c8d4c..67f902e64be93 100644 --- a/cpp/src/arrow/acero/hash_join_node.cc +++ b/cpp/src/arrow/acero/hash_join_node.cc @@ -351,7 +351,7 @@ Result HashJoinSchema::BindFilter(Expression filter, const Schema& right_schema, ExecContext* exec_context) { if (filter.IsBound() || filter == literal(true)) { - return std::move(filter); + return filter; } // Step 1: Construct filter schema FieldVector fields; @@ -386,7 +386,7 @@ Result HashJoinSchema::BindFilter(Expression filter, filter.ToString(), " evaluates to ", filter.type()->ToString()); } - return std::move(filter); + return filter; } Expression HashJoinSchema::RewriteFilterToUseFilterSchema( diff --git a/cpp/src/arrow/acero/order_by_impl.cc b/cpp/src/arrow/acero/order_by_impl.cc index 2c624f6ab895f..1165799fc6610 100644 --- a/cpp/src/arrow/acero/order_by_impl.cc +++ b/cpp/src/arrow/acero/order_by_impl.cc @@ -93,14 +93,14 @@ Result> OrderByImpl::MakeSort( ExecContext* ctx, const std::shared_ptr& output_schema, const SortOptions& options) { std::unique_ptr impl{new SortBasicImpl(ctx, output_schema, options)}; - return std::move(impl); + return impl; } Result> OrderByImpl::MakeSelectK( ExecContext* ctx, const std::shared_ptr& output_schema, const SelectKOptions& options) { std::unique_ptr impl{new SelectKBasicImpl(ctx, output_schema, options)}; - return std::move(impl); + return impl; } } // namespace acero diff --git a/cpp/src/arrow/acero/swiss_join.cc b/cpp/src/arrow/acero/swiss_join.cc index 17c5212697339..732deb72861d6 100644 --- a/cpp/src/arrow/acero/swiss_join.cc +++ b/cpp/src/arrow/acero/swiss_join.cc @@ -2985,7 +2985,7 @@ class SwissJoin : public HashJoinImpl { Result> HashJoinImpl::MakeSwiss() { std::unique_ptr impl{new SwissJoin()}; - return std::move(impl); + return impl; } } // namespace acero diff --git a/cpp/src/arrow/acero/task_util.cc b/cpp/src/arrow/acero/task_util.cc index 4d8e9ecf76597..85378eaeeb27c 100644 --- a/cpp/src/arrow/acero/task_util.cc +++ b/cpp/src/arrow/acero/task_util.cc @@ -424,7 +424,7 @@ void TaskSchedulerImpl::Abort(AbortContinuationImpl impl) { std::unique_ptr TaskScheduler::Make() { std::unique_ptr impl{new TaskSchedulerImpl()}; - return std::move(impl); + return impl; } } // namespace acero diff --git a/cpp/src/arrow/acero/tpch_node.cc b/cpp/src/arrow/acero/tpch_node.cc index 9797a082b49d2..137b62ad38a95 100644 --- a/cpp/src/arrow/acero/tpch_node.cc +++ b/cpp/src/arrow/acero/tpch_node.cc @@ -336,7 +336,7 @@ Result TpchPseudotext::GenerateComments(size_t num_comments, size_t min_l } ArrayData ad(utf8(), num_comments, {nullptr, std::move(offset_buffer), std::move(comment_buffer)}); - return std::move(ad); + return ad; } bool TpchPseudotext::GenerateWord(int64_t& offset, random::pcg32_fast& rng, char* arr, @@ -611,7 +611,7 @@ Result RandomVString(random::pcg32_fast& rng, int64_t num_rows, int32_t m for (int32_t i = 0; i < offsets[num_rows]; i++) str[i] = alpha_numerics[char_dist(rng)]; ArrayData ad(utf8(), num_rows, {nullptr, std::move(offset_buff), std::move(str_buff)}); - return std::move(ad); + return ad; } void GeneratePhoneNumber(char* out, random::pcg32_fast& rng, int32_t country) { @@ -677,7 +677,7 @@ class PartAndPartSupplierGenerator { if (!part_output_queue_.empty()) { ExecBatch batch = std::move(part_output_queue_.front()); part_output_queue_.pop(); - return std::move(batch); + return batch; } else if (part_rows_generated_ == part_rows_to_generate_) { return std::nullopt; } else { @@ -732,7 +732,7 @@ class PartAndPartSupplierGenerator { if (!partsupp_output_queue_.empty()) { ExecBatch result = std::move(partsupp_output_queue_.front()); partsupp_output_queue_.pop(); - return std::move(result); + return result; } } { @@ -1337,7 +1337,7 @@ class OrdersAndLineItemGenerator { if (!orders_output_queue_.empty()) { ExecBatch batch = std::move(orders_output_queue_.front()); orders_output_queue_.pop(); - return std::move(batch); + return batch; } else if (orders_rows_generated_ == orders_rows_to_generate_) { return std::nullopt; } else { @@ -1401,12 +1401,12 @@ class OrdersAndLineItemGenerator { if (from_queue) { ARROW_DCHECK(queued.length <= batch_size_); tld.first_batch_offset = queued.length; - if (queued.length == batch_size_) return std::move(queued); + if (queued.length == batch_size_) return queued; } { std::lock_guard lock(orders_output_queue_mutex_); if (orders_rows_generated_ == orders_rows_to_generate_) { - if (from_queue) return std::move(queued); + if (from_queue) return queued; return std::nullopt; } diff --git a/cpp/src/arrow/adapters/orc/adapter.cc b/cpp/src/arrow/adapters/orc/adapter.cc index 98784450b3cce..25759f8471365 100644 --- a/cpp/src/arrow/adapters/orc/adapter.cc +++ b/cpp/src/arrow/adapters/orc/adapter.cc @@ -566,7 +566,7 @@ Result> ORCFileReader::Open( #endif auto result = std::unique_ptr(new ORCFileReader()); RETURN_NOT_OK(result->impl_->Open(file, pool)); - return std::move(result); + return result; } Result> ORCFileReader::ReadMetadata() { @@ -837,7 +837,7 @@ Result> ORCFileWriter::Open( std::unique_ptr(new ORCFileWriter()); Status status = result->impl_->Open(output_stream, writer_options); RETURN_NOT_OK(status); - return std::move(result); + return result; } Status ORCFileWriter::Write(const Table& table) { return impl_->Write(table); } diff --git a/cpp/src/arrow/adapters/orc/util.cc b/cpp/src/arrow/adapters/orc/util.cc index 2a74bec1aa6fd..5bfe257ac7bad 100644 --- a/cpp/src/arrow/adapters/orc/util.cc +++ b/cpp/src/arrow/adapters/orc/util.cc @@ -1026,7 +1026,7 @@ Result> GetOrcType(const DataType& type) { SetAttributes(*it, orc_subtype.get()); out_type->addStructField(field_name, std::move(orc_subtype)); } - return std::move(out_type); + return out_type; } case Type::type::MAP: { const auto& key_field = checked_cast(type).key_field(); @@ -1048,7 +1048,7 @@ Result> GetOrcType(const DataType& type) { SetAttributes(arrow_field, orc_subtype.get()); out_type->addUnionChild(std::move(orc_subtype)); } - return std::move(out_type); + return out_type; } default: { return Status::NotImplemented("Unknown or unsupported Arrow type: ", @@ -1195,7 +1195,7 @@ Result> GetOrcType(const Schema& schema) { SetAttributes(field, orc_subtype.get()); out_type->addStructField(field->name(), std::move(orc_subtype)); } - return std::move(out_type); + return out_type; } Result> GetFieldMetadata( diff --git a/cpp/src/arrow/array/array_nested.cc b/cpp/src/arrow/array/array_nested.cc index 1be771d8228d9..67a499c2b8277 100644 --- a/cpp/src/arrow/array/array_nested.cc +++ b/cpp/src/arrow/array/array_nested.cc @@ -1177,7 +1177,7 @@ void SparseUnionArray::SetData(std::shared_ptr data) { } void DenseUnionArray::SetData(const std::shared_ptr& data) { - this->UnionArray::SetData(std::move(data)); + this->UnionArray::SetData(data); ARROW_CHECK_EQ(data_->type->id(), Type::DENSE_UNION); ARROW_CHECK_EQ(data_->buffers.size(), 3); diff --git a/cpp/src/arrow/array/builder_base.h b/cpp/src/arrow/array/builder_base.h index 3a85318735f80..ecd2136f5d20b 100644 --- a/cpp/src/arrow/array/builder_base.h +++ b/cpp/src/arrow/array/builder_base.h @@ -332,7 +332,7 @@ inline Result> MakeBuilder( const std::shared_ptr& type, MemoryPool* pool = default_memory_pool()) { std::unique_ptr out; ARROW_RETURN_NOT_OK(MakeBuilder(pool, type, &out)); - return std::move(out); + return out; } /// \brief Construct an empty ArrayBuilder corresponding to the data @@ -346,7 +346,7 @@ inline Result> MakeBuilderExactIndex( const std::shared_ptr& type, MemoryPool* pool = default_memory_pool()) { std::unique_ptr out; ARROW_RETURN_NOT_OK(MakeBuilderExactIndex(pool, type, &out)); - return std::move(out); + return out; } /// \brief Construct an empty DictionaryBuilder initialized optionally @@ -365,7 +365,7 @@ inline Result> MakeDictionaryBuilder( MemoryPool* pool = default_memory_pool()) { std::unique_ptr out; ARROW_RETURN_NOT_OK(MakeDictionaryBuilder(pool, type, dictionary, &out)); - return std::move(out); + return out; } } // namespace arrow diff --git a/cpp/src/arrow/array/builder_run_end.cc b/cpp/src/arrow/array/builder_run_end.cc index cff8d72952385..ed384123d8b87 100644 --- a/cpp/src/arrow/array/builder_run_end.cc +++ b/cpp/src/arrow/array/builder_run_end.cc @@ -162,8 +162,7 @@ Status RunCompressorBuilder::FinishInternal(std::shared_ptr* out) { RunEndEncodedBuilder::ValueRunBuilder::ValueRunBuilder( MemoryPool* pool, const std::shared_ptr& value_builder, const std::shared_ptr& value_type, RunEndEncodedBuilder& ree_builder) - : RunCompressorBuilder(pool, std::move(value_builder), std::move(value_type)), - ree_builder_(ree_builder) {} + : RunCompressorBuilder(pool, value_builder, value_type), ree_builder_(ree_builder) {} RunEndEncodedBuilder::RunEndEncodedBuilder( MemoryPool* pool, const std::shared_ptr& run_end_builder, diff --git a/cpp/src/arrow/array/concatenate.cc b/cpp/src/arrow/array/concatenate.cc index 44d58cc0bdebc..87e55246c78fe 100644 --- a/cpp/src/arrow/array/concatenate.cc +++ b/cpp/src/arrow/array/concatenate.cc @@ -522,7 +522,8 @@ class ConcatenateImpl { } out_data += data->length * index_width; } - return std::move(out); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(out)); } Status Visit(const DictionaryType& d) { diff --git a/cpp/src/arrow/array/util.cc b/cpp/src/arrow/array/util.cc index 41cd6a1c0b260..b56ea25f9e421 100644 --- a/cpp/src/arrow/array/util.cc +++ b/cpp/src/arrow/array/util.cc @@ -125,7 +125,8 @@ class ArrayDataEndianSwapper { for (int64_t i = 0; i < length; i++) { out_data[i] = bit_util::ByteSwap(in_data[i]); } - return std::move(out_buffer); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(out_buffer)); } template diff --git a/cpp/src/arrow/buffer.cc b/cpp/src/arrow/buffer.cc index 1bd789b7cafe6..0eb22a9d1553d 100644 --- a/cpp/src/arrow/buffer.cc +++ b/cpp/src/arrow/buffer.cc @@ -41,7 +41,8 @@ Result> Buffer::CopySlice(const int64_t start, ARROW_ASSIGN_OR_RAISE(auto new_buffer, AllocateResizableBuffer(nbytes, pool)); std::memcpy(new_buffer->mutable_data(), data() + start, static_cast(nbytes)); - return std::move(new_buffer); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(new_buffer)); } Buffer::Buffer() : Buffer(memory_pool::internal::kZeroSizeArea, 0) {} @@ -185,7 +186,8 @@ Result> AllocateBitmap(int64_t length, MemoryPool* pool) if (buf->size() > 0) { buf->mutable_data()[buf->size() - 1] = 0; } - return std::move(buf); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(buf)); } Result> AllocateEmptyBitmap(int64_t length, MemoryPool* pool) { @@ -197,7 +199,8 @@ Result> AllocateEmptyBitmap(int64_t length, int64_t alig ARROW_ASSIGN_OR_RAISE(auto buf, AllocateBuffer(bit_util::BytesForBits(length), alignment, pool)); memset(buf->mutable_data(), 0, static_cast(buf->size())); - return std::move(buf); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(buf)); } Status AllocateEmptyBitmap(int64_t length, std::shared_ptr* out) { @@ -219,7 +222,8 @@ Result> ConcatenateBuffers( out_data += buffer->size(); } } - return std::move(out); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(out)); } } // namespace arrow diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc index e3ec262422ba6..09bb524adbdf0 100644 --- a/cpp/src/arrow/c/bridge_test.cc +++ b/cpp/src/arrow/c/bridge_test.cc @@ -1362,7 +1362,7 @@ class MyMemoryManager : public CPUMemoryManager { if (buf.size() > 0) { memcpy(dest->mutable_data(), buf.data(), static_cast(buf.size())); } - return std::move(dest); + return dest; } }; diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc index f2e4578383122..05c4936482b0b 100644 --- a/cpp/src/arrow/compute/exec.cc +++ b/cpp/src/arrow/compute/exec.cc @@ -923,7 +923,7 @@ class ScalarExecutor : public KernelExecutorImpl { DCHECK(output.is_array_data()); // Emit a result for each chunk - RETURN_NOT_OK(EmitResult(std::move(output.array_data()), listener)); + RETURN_NOT_OK(EmitResult(output.array_data(), listener)); } return Status::OK(); } @@ -1107,7 +1107,7 @@ class VectorExecutor : public KernelExecutorImpl { RETURN_NOT_OK(PropagateNulls(kernel_ctx_, span, out.array_data().get())); } RETURN_NOT_OK(kernel_->exec(kernel_ctx_, span, &out)); - return EmitResult(std::move(out.array_data()), listener); + return EmitResult(out.array_data(), listener); } Status ExecChunked(const ExecBatch& batch, ExecListener* listener) { @@ -1116,10 +1116,10 @@ class VectorExecutor : public KernelExecutorImpl { ARROW_ASSIGN_OR_RAISE(out.value, PrepareOutput(batch.length)); RETURN_NOT_OK(kernel_->exec_chunked(kernel_ctx_, batch, &out)); if (out.is_array()) { - return EmitResult(std::move(out.array()), listener); + return EmitResult(out.array(), listener); } else { DCHECK(out.is_chunked_array()); - return EmitResult(std::move(out.chunked_array()), listener); + return EmitResult(out.chunked_array(), listener); } } diff --git a/cpp/src/arrow/compute/expression.cc b/cpp/src/arrow/compute/expression.cc index 532869b3453a7..b1d914ce873cc 100644 --- a/cpp/src/arrow/compute/expression.cc +++ b/cpp/src/arrow/compute/expression.cc @@ -1645,7 +1645,7 @@ Expression and_(const std::vector& operands) { Expression folded = operands.front(); for (auto it = operands.begin() + 1; it != operands.end(); ++it) { - folded = and_(std::move(folded), std::move(*it)); + folded = and_(std::move(folded), *it); } return folded; } @@ -1659,7 +1659,7 @@ Expression or_(const std::vector& operands) { Expression folded = operands.front(); for (auto it = operands.begin() + 1; it != operands.end(); ++it) { - folded = or_(std::move(folded), std::move(*it)); + folded = or_(std::move(folded), *it); } return folded; } diff --git a/cpp/src/arrow/compute/function_internal.h b/cpp/src/arrow/compute/function_internal.h index 653273ef0fac2..9d8928466baa5 100644 --- a/cpp/src/arrow/compute/function_internal.h +++ b/cpp/src/arrow/compute/function_internal.h @@ -684,12 +684,13 @@ const FunctionOptionsType* GetFunctionOptionsType(const Properties&... propertie auto options = std::make_unique(); RETURN_NOT_OK( FromStructScalarImpl(options.get(), scalar, properties_).status_); - return std::move(options); + // R build with openSUSE155 requires an explicit unique_ptr construction + return std::unique_ptr(std::move(options)); } std::unique_ptr Copy(const FunctionOptions& options) const override { auto out = std::make_unique(); CopyImpl(out.get(), checked_cast(options), properties_); - return std::move(out); + return out; } private: diff --git a/cpp/src/arrow/compute/kernel.cc b/cpp/src/arrow/compute/kernel.cc index fd554ba3d83c5..9cc5cc10917ee 100644 --- a/cpp/src/arrow/compute/kernel.cc +++ b/cpp/src/arrow/compute/kernel.cc @@ -75,7 +75,7 @@ Result> ScalarAggregateKernel::MergeAll( for (auto& state : states) { RETURN_NOT_OK(kernel->merge(ctx, std::move(*state), out.get())); } - return std::move(out); + return out; } // ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/compute/kernels/hash_aggregate.cc b/cpp/src/arrow/compute/kernels/hash_aggregate.cc index 5052d8dd66694..54cd695421a93 100644 --- a/cpp/src/arrow/compute/kernels/hash_aggregate.cc +++ b/cpp/src/arrow/compute/kernels/hash_aggregate.cc @@ -83,7 +83,8 @@ Result> HashAggregateInit(KernelContext* ctx, const KernelInitArgs& args) { auto impl = std::make_unique(); RETURN_NOT_OK(impl->Init(ctx->exec_context(), args)); - return std::move(impl); + // R build with openSUSE155 requires an explicit unique_ptr construction + return std::unique_ptr(std::move(impl)); } Status HashAggregateResize(KernelContext* ctx, int64_t num_groups) { @@ -813,7 +814,7 @@ struct GroupedMeanImpl (*null_count)++; bit_util::SetBitTo((*null_bitmap)->mutable_data(), i, false); } - return std::move(values); + return values; } std::shared_ptr out_type() const override { @@ -1114,7 +1115,8 @@ Result> VarStdInit(KernelContext* ctx, auto impl = std::make_unique>(); impl->result_type_ = result_type; RETURN_NOT_OK(impl->Init(ctx->exec_context(), args)); - return std::move(impl); + // R build with openSUSE155 requires an explicit unique_ptr construction + return std::unique_ptr(std::move(impl)); } template @@ -1685,7 +1687,7 @@ Result> MinMaxInit(KernelContext* ctx, const KernelInitArgs& args) { ARROW_ASSIGN_OR_RAISE(auto impl, HashAggregateInit>(ctx, args)); static_cast*>(impl.get())->type_ = args.inputs[0].GetSharedPtr(); - return std::move(impl); + return impl; } template @@ -2188,7 +2190,7 @@ Result> FirstLastInit(KernelContext* ctx, ARROW_ASSIGN_OR_RAISE(auto impl, HashAggregateInit>(ctx, args)); static_cast*>(impl.get())->type_ = args.inputs[0].GetSharedPtr(); - return std::move(impl); + return impl; } template @@ -2597,7 +2599,7 @@ Result> GroupedDistinctInit(KernelContext* ctx, instance->out_type_ = args.inputs[0].GetSharedPtr(); ARROW_ASSIGN_OR_RAISE(instance->grouper_, Grouper::Make(args.inputs, ctx->exec_context())); - return std::move(impl); + return impl; } // ---------------------------------------------------------------------- @@ -2839,7 +2841,7 @@ Result> GroupedOneInit(KernelContext* ctx, ARROW_ASSIGN_OR_RAISE(auto impl, HashAggregateInit>(ctx, args)); auto instance = static_cast*>(impl.get()); instance->out_type_ = args.inputs[0].GetSharedPtr(); - return std::move(impl); + return impl; } struct GroupedOneFactory { @@ -3237,7 +3239,7 @@ Result> GroupedListInit(KernelContext* ctx, ARROW_ASSIGN_OR_RAISE(auto impl, HashAggregateInit>(ctx, args)); auto instance = static_cast*>(impl.get()); instance->out_type_ = args.inputs[0].GetSharedPtr(); - return std::move(impl); + return impl; } struct GroupedListFactory { diff --git a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc index efd25a8a20c80..eb243de4a765e 100644 --- a/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc +++ b/cpp/src/arrow/compute/kernels/scalar_arithmetic.cc @@ -503,7 +503,7 @@ Result ResolveDecimalBinaryOperationOutput( ToResult(getter(left_type.precision(), left_type.scale(), right_type.precision(), right_type.scale()))); ARROW_ASSIGN_OR_RAISE(auto type, DecimalType::Make(left_type.id(), precision, scale)); - return std::move(type); + return type; } Result ResolveDecimalAdditionOrSubtractionOutput( @@ -566,7 +566,7 @@ Result ResolveTemporalOutput(KernelContext*, } auto type = duration(right_type.unit()); - return std::move(type); + return type; } template diff --git a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc index 038e623b43c53..762b666c6a148 100644 --- a/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc +++ b/cpp/src/arrow/compute/kernels/scalar_string_ascii.cc @@ -1315,7 +1315,7 @@ struct RegexSubstringMatcher { const MatchSubstringOptions& options, bool is_utf8 = true, bool literal = false) { auto matcher = std::make_unique(options, is_utf8, literal); RETURN_NOT_OK(RegexStatus(matcher->regex_match_)); - return std::move(matcher); + return matcher; } explicit RegexSubstringMatcher(const MatchSubstringOptions& options, @@ -1685,7 +1685,7 @@ struct FindSubstringRegex { bool is_utf8 = true, bool literal = false) { auto matcher = FindSubstringRegex(options, is_utf8, literal); RETURN_NOT_OK(RegexStatus(*matcher.regex_match_)); - return std::move(matcher); + return matcher; } explicit FindSubstringRegex(const MatchSubstringOptions& options, bool is_utf8 = true, @@ -1832,7 +1832,7 @@ struct CountSubstringRegex { bool is_utf8 = true, bool literal = false) { CountSubstringRegex counter(options, is_utf8, literal); RETURN_NOT_OK(RegexStatus(*counter.regex_match_)); - return std::move(counter); + return counter; } template @@ -2055,7 +2055,7 @@ struct RegexSubstringReplacer { std::move(replacement_error)); } - return std::move(replacer); + return replacer; } // Using RE2::FindAndConsume we can only find the pattern if it is a group, therefore @@ -2203,7 +2203,7 @@ struct ExtractRegexData { } data.group_names.emplace_back(item->second); } - return std::move(data); + return data; } Result ResolveOutputType(const std::vector& types) const { diff --git a/cpp/src/arrow/compute/kernels/vector_hash.cc b/cpp/src/arrow/compute/kernels/vector_hash.cc index 800deba3a5ed2..44bb7372c3f68 100644 --- a/cpp/src/arrow/compute/kernels/vector_hash.cc +++ b/cpp/src/arrow/compute/kernels/vector_hash.cc @@ -530,7 +530,8 @@ Result> HashInit(KernelContext* ctx, auto result = std::make_unique(args.inputs[0].GetSharedPtr(), args.options, ctx->memory_pool()); RETURN_NOT_OK(result->Reset()); - return std::move(result); + // R build with openSUSE155 requires an explicit unique_ptr construction + return std::unique_ptr(std::move(result)); } template diff --git a/cpp/src/arrow/compute/row/grouper.cc b/cpp/src/arrow/compute/row/grouper.cc index 3ed5411d0ba02..45b9ad5971e80 100644 --- a/cpp/src/arrow/compute/row/grouper.cc +++ b/cpp/src/arrow/compute/row/grouper.cc @@ -347,7 +347,7 @@ struct GrouperNoKeysImpl : Grouper { } std::shared_ptr array; RETURN_NOT_OK(builder->Finish(&array)); - return std::move(array); + return array; } Status Reset() override { return Status::OK(); } Result Consume(const ExecSpan& batch, int64_t offset, int64_t length) override { @@ -359,7 +359,7 @@ struct GrouperNoKeysImpl : Grouper { auto values = data->GetMutableValues(0); values[0] = 0; ExecBatch out({Datum(data)}, 1); - return std::move(out); + return out; } uint32_t num_groups() const override { return 1; } }; @@ -412,7 +412,7 @@ struct GrouperImpl : public Grouper { return Status::NotImplemented("Keys of type ", *key); } - return std::move(impl); + return impl; } Status Reset() override { @@ -596,7 +596,7 @@ struct GrouperFastImpl : public Grouper { impl->minibatch_hashes_.resize(impl->minibatch_size_max_ + kPaddingForSIMD / sizeof(uint32_t)); - return std::move(impl); + return impl; } Status Reset() override { diff --git a/cpp/src/arrow/csv/reader.cc b/cpp/src/arrow/csv/reader.cc index e981fafe8e780..e3033a81486e8 100644 --- a/cpp/src/arrow/csv/reader.cc +++ b/cpp/src/arrow/csv/reader.cc @@ -1053,8 +1053,8 @@ class AsyncThreadedTableReader auto self = shared_from_this(); return ProcessFirstBuffer().Then([self](const std::shared_ptr& first_buffer) { auto block_generator = ThreadedBlockReader::MakeAsyncIterator( - self->buffer_generator_, MakeChunker(self->parse_options_), - std::move(first_buffer), self->read_options_.skip_rows_after_names); + self->buffer_generator_, MakeChunker(self->parse_options_), first_buffer, + self->read_options_.skip_rows_after_names); std::function block_visitor = [self](CSVBlock maybe_block) -> Status { diff --git a/cpp/src/arrow/dataset/dataset.cc b/cpp/src/arrow/dataset/dataset.cc index 0e7bc3da84235..8a3df182474bf 100644 --- a/cpp/src/arrow/dataset/dataset.cc +++ b/cpp/src/arrow/dataset/dataset.cc @@ -402,7 +402,7 @@ class BasicFragmentEvolution : public FragmentEvolutionStrategy { } return compute::field_ref(FieldRef(std::move(modified_indices))); } - return std::move(expr); + return expr; }, [](compute::Expression expr, compute::Expression* old_expr) { return expr; }); }; diff --git a/cpp/src/arrow/dataset/dataset_writer.cc b/cpp/src/arrow/dataset/dataset_writer.cc index 754386275d60c..c60042dd6fef8 100644 --- a/cpp/src/arrow/dataset/dataset_writer.cc +++ b/cpp/src/arrow/dataset/dataset_writer.cc @@ -408,8 +408,7 @@ class DatasetWriterDirectoryQueue { write_options, writer_state); dir_queue->PrepareDirectory(); ARROW_ASSIGN_OR_RAISE(dir_queue->current_filename_, dir_queue->GetNextFilename()); - // std::move required to make RTools 3.5 mingw compiler happy - return std::move(dir_queue); + return dir_queue; } Status Finish() { diff --git a/cpp/src/arrow/dataset/file_csv.cc b/cpp/src/arrow/dataset/file_csv.cc index 09ab775727c98..6258a674deee5 100644 --- a/cpp/src/arrow/dataset/file_csv.cc +++ b/cpp/src/arrow/dataset/file_csv.cc @@ -106,7 +106,7 @@ class CsvFileScanner : public FragmentScanner { } convert_options.include_columns = std::move(columns); convert_options.column_types = std::move(column_types); - return std::move(convert_options); + return convert_options; } static Future> Make( diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc index c17ba89be7907..1f8b6cc4882cf 100644 --- a/cpp/src/arrow/dataset/file_parquet.cc +++ b/cpp/src/arrow/dataset/file_parquet.cc @@ -279,7 +279,7 @@ Status ResolveOneFieldRef( // names) based on the dataset schema. Returns `false` if no conversion was needed. Result MaybeConvertFieldRef(FieldRef ref, const Schema& dataset_schema) { if (ARROW_PREDICT_TRUE(ref.IsNameSequence())) { - return std::move(ref); + return ref; } ARROW_ASSIGN_OR_RAISE(auto path, ref.FindOne(dataset_schema)); @@ -504,7 +504,8 @@ Result> ParquetFileFormat::GetReader std::unique_ptr arrow_reader; RETURN_NOT_OK(parquet::arrow::FileReader::Make( options->pool, std::move(reader), std::move(arrow_properties), &arrow_reader)); - return std::move(arrow_reader); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(arrow_reader)); } Future> ParquetFileFormat::GetReaderAsync( @@ -543,7 +544,9 @@ Future> ParquetFileFormat::GetReader reader)), std::move(arrow_properties), &arrow_reader)); - return std::move(arrow_reader); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr( + std::move(arrow_reader)); }, [path = source.path()](const Status& status) -> Result> { diff --git a/cpp/src/arrow/dataset/scan_node.cc b/cpp/src/arrow/dataset/scan_node.cc index c25c5b70ae1ec..4493332111429 100644 --- a/cpp/src/arrow/dataset/scan_node.cc +++ b/cpp/src/arrow/dataset/scan_node.cc @@ -166,7 +166,7 @@ class ScanNode : public acero::ExecNode, public acero::TracedNode { return Status::Invalid("A scan filter must be a boolean expression"); } - return std::move(normalized); + return normalized; } static Result Make(acero::ExecPlan* plan, @@ -334,7 +334,7 @@ class ScanNode : public acero::ExecNode, public acero::TracedNode { extracted.known_values.push_back({i, *maybe_casted}); } } - return std::move(extracted); + return extracted; } Future<> BeginScan(const std::shared_ptr& inspected_fragment) { @@ -427,7 +427,7 @@ class ScanNode : public acero::ExecNode, public acero::TracedNode { /*queue=*/nullptr, [this]() { return output_->InputFinished(this, num_batches_.load()); }); fragment_tasks->AddAsyncGenerator>( - std::move(frag_gen), + frag_gen, [this, fragment_tasks = std::move(fragment_tasks)](const std::shared_ptr& fragment) { fragment_tasks->AddTask(std::make_unique(this, fragment)); diff --git a/cpp/src/arrow/device.cc b/cpp/src/arrow/device.cc index 01a2b8df5398d..c2195e89e75ee 100644 --- a/cpp/src/arrow/device.cc +++ b/cpp/src/arrow/device.cc @@ -215,7 +215,7 @@ Result> CPUMemoryManager::CopyNonOwnedFrom( if (buf.size() > 0) { memcpy(dest->mutable_data(), buf.data(), static_cast(buf.size())); } - return std::move(dest); + return dest; } Result> CPUMemoryManager::ViewBufferFrom( @@ -247,7 +247,7 @@ Result> CPUMemoryManager::CopyNonOwnedTo( if (buf.size() > 0) { memcpy(dest->mutable_data(), buf.data(), static_cast(buf.size())); } - return std::move(dest); + return dest; } Result> CPUMemoryManager::ViewBufferTo( diff --git a/cpp/src/arrow/engine/substrait/expression_internal.cc b/cpp/src/arrow/engine/substrait/expression_internal.cc index 480cf30d3033f..56d7956076bf8 100644 --- a/cpp/src/arrow/engine/substrait/expression_internal.cc +++ b/cpp/src/arrow/engine/substrait/expression_internal.cc @@ -133,7 +133,7 @@ Result DecodeScalarFunction( for (const auto& opt : scalar_fn.options()) { ARROW_RETURN_NOT_OK(DecodeOption(opt, &call)); } - return std::move(call); + return call; } std::string EnumToString(int value, const google::protobuf::EnumDescriptor* descriptor) { @@ -279,7 +279,7 @@ Result FromProto(const substrait::AggregateFunction& func, bool i for (int i = 0; i < func.options_size(); i++) { ARROW_RETURN_NOT_OK(DecodeOption(func.options(i), &call)); } - return std::move(call); + return call; } Result FromProto(const substrait::Expression& expr, @@ -1153,7 +1153,7 @@ Result> ToProto( out->set_allocated_null(type.release()); } - return std::move(out); + return out; } static Status AddChildToReferenceSegment( @@ -1226,7 +1226,7 @@ static Result> MakeDirectReference( auto out = std::make_unique(); out->set_allocated_selection(selection.release()); - return std::move(out); + return out; } // Indexes the given Substrait struct-typed expression or root (if expr is empty) using @@ -1292,7 +1292,7 @@ Result> EncodeSubstraitCa } } - return std::move(scalar_fn); + return scalar_fn; } Result>> DatumToLiterals( @@ -1356,7 +1356,7 @@ Result> ToProto( if (auto datum = expr.literal()) { ARROW_ASSIGN_OR_RAISE(auto literal, ToProto(*datum, ext_set, conversion_options)); out->set_allocated_literal(literal.release()); - return std::move(out); + return out; } if (auto param = expr.parameter()) { @@ -1367,7 +1367,7 @@ Result> ToProto( ARROW_ASSIGN_OR_RAISE(out, MakeStructFieldReference(std::move(out), index)); } - return std::move(out); + return out; } auto call = CallNotNull(expr); @@ -1399,7 +1399,7 @@ Result> ToProto( if_then_->set_allocated_else_(arguments.back().release()); out->set_allocated_if_then(if_then_.release()); - return std::move(out); + return out; } } @@ -1423,7 +1423,7 @@ Result> ToProto( for (int index : field_path.indices()) { ARROW_ASSIGN_OR_RAISE(out, MakeStructFieldReference(std::move(out), index)); } - return std::move(out); + return out; } if (call->function_name == "list_element") { @@ -1449,7 +1449,7 @@ Result> ToProto( if_then->set_allocated_else_(arguments[2].release()); out->set_allocated_if_then(if_then.release()); - return std::move(out); + return out; } else if (call->function_name == "cast") { auto cast = std::make_unique(); @@ -1478,7 +1478,7 @@ Result> ToProto( cast->set_allocated_type(to_type.release()); out->set_allocated_cast(cast.release()); - return std::move(out); + return out; } else if (call->function_name == "is_in") { auto or_list = std::make_unique(); @@ -1499,7 +1499,7 @@ Result> ToProto( or_list->mutable_options()->AddAllocated(option.release()); } out->set_allocated_singular_or_list(or_list.release()); - return std::move(out); + return out; } // other expression types dive into extensions immediately @@ -1534,7 +1534,7 @@ Result> ToProto( return maybe_converter.status(); } out->set_allocated_scalar_function(scalar_fn.release()); - return std::move(out); + return out; } } // namespace engine diff --git a/cpp/src/arrow/engine/substrait/extended_expression_internal.cc b/cpp/src/arrow/engine/substrait/extended_expression_internal.cc index 225901c910f25..e2e6d934372dc 100644 --- a/cpp/src/arrow/engine/substrait/extended_expression_internal.cc +++ b/cpp/src/arrow/engine/substrait/extended_expression_internal.cc @@ -143,7 +143,7 @@ Result> CreateExpressionReferenc ARROW_ASSIGN_OR_RAISE(std::unique_ptr expression, ToProto(expr, ext_set, conversion_options)); expr_ref->set_allocated_expression(expression.release()); - return std::move(expr_ref); + return expr_ref; } } // namespace @@ -178,7 +178,7 @@ Result FromProto(const substrait::ExtendedExpression& expressi *ext_set_out = std::move(ext_set); } - return std::move(bound_expressions); + return bound_expressions; } Result> ToProto( @@ -203,7 +203,7 @@ Result> ToProto( expression->mutable_referred_expr()->AddAllocated(expr_ref.release()); } RETURN_NOT_OK(AddExtensionSetToExtendedExpression(*ext_set, expression.get())); - return std::move(expression); + return expression; } } // namespace engine diff --git a/cpp/src/arrow/engine/substrait/extension_set.cc b/cpp/src/arrow/engine/substrait/extension_set.cc index e955084dcdfbb..cefe53d2847ca 100644 --- a/cpp/src/arrow/engine/substrait/extension_set.cc +++ b/cpp/src/arrow/engine/substrait/extension_set.cc @@ -288,7 +288,7 @@ Result ExtensionSet::Make( } } - return std::move(set); + return set; } Result ExtensionSet::DecodeType(uint32_t anchor) const { @@ -799,7 +799,7 @@ Result> GetValueArgs(const SubstraitCall& call, ARROW_ASSIGN_OR_RAISE(compute::Expression arg, call.GetValueArg(index)); expressions.push_back(arg); } - return std::move(expressions); + return expressions; } ExtensionIdRegistry::SubstraitCallToArrow DecodeOptionlessOverflowableArithmetic( @@ -874,7 +874,7 @@ ExtensionIdRegistry::ArrowToSubstraitCall EncodeOptionlessOverflowableArithmetic for (std::size_t i = 0; i < call.arguments.size(); i++) { substrait_call.SetValueArg(static_cast(i), call.arguments[i]); } - return std::move(substrait_call); + return substrait_call; }; } @@ -887,7 +887,7 @@ ExtensionIdRegistry::ArrowToSubstraitCall EncodeBasic(Id substrait_fn_id) { for (std::size_t i = 0; i < call.arguments.size(); i++) { substrait_call.SetValueArg(static_cast(i), call.arguments[i]); } - return std::move(substrait_call); + return substrait_call; }; } @@ -907,7 +907,7 @@ ExtensionIdRegistry::ArrowToSubstraitCall EncodeIsNull(Id substrait_fn_id) { for (std::size_t i = 0; i < call.arguments.size(); i++) { substrait_call.SetValueArg(static_cast(i), call.arguments[i]); } - return std::move(substrait_call); + return substrait_call; }; } diff --git a/cpp/src/arrow/engine/substrait/plan_internal.cc b/cpp/src/arrow/engine/substrait/plan_internal.cc index cc4806878c404..4473b0443eb19 100644 --- a/cpp/src/arrow/engine/substrait/plan_internal.cc +++ b/cpp/src/arrow/engine/substrait/plan_internal.cc @@ -65,7 +65,7 @@ Result> PlanToProto( plan_rel->set_allocated_root(rel_root.release()); subs_plan->mutable_relations()->AddAllocated(plan_rel.release()); RETURN_NOT_OK(AddExtensionSetToPlan(*ext_set, subs_plan.get())); - return std::move(subs_plan); + return subs_plan; } } // namespace engine diff --git a/cpp/src/arrow/engine/substrait/relation_internal.cc b/cpp/src/arrow/engine/substrait/relation_internal.cc index 7c462c418f81b..6a25bd89f0128 100644 --- a/cpp/src/arrow/engine/substrait/relation_internal.cc +++ b/cpp/src/arrow/engine/substrait/relation_internal.cc @@ -91,7 +91,7 @@ Result GetEmitInfo(const RelMessage& rel, } emit_info.expressions = std::move(proj_field_refs); emit_info.schema = schema(std::move(emit_fields)); - return std::move(emit_info); + return emit_info; } Result ProcessEmitProject( @@ -1024,7 +1024,7 @@ Result> NamedTableRelationConverter( } read_rel->set_allocated_named_table(read_rel_tn.release()); - return std::move(read_rel); + return read_rel; } Result> ScanRelationConverter( @@ -1068,7 +1068,7 @@ Result> ScanRelationConverter( read_rel_lfs->mutable_items()->AddAllocated(read_rel_lfs_ffs.release()); } read_rel->set_allocated_local_files(read_rel_lfs.release()); - return std::move(read_rel); + return read_rel; } Result> FilterRelationConverter( @@ -1097,7 +1097,7 @@ Result> FilterRelationConverter( ARROW_ASSIGN_OR_RAISE(auto subs_expr, ToProto(bound_expression, ext_set, conversion_options)); filter_rel->set_allocated_condition(subs_expr.release()); - return std::move(filter_rel); + return filter_rel; } } // namespace @@ -1146,7 +1146,7 @@ Result> ToProto( const ConversionOptions& conversion_options) { auto rel = std::make_unique(); RETURN_NOT_OK(SerializeAndCombineRelations(declr, ext_set, &rel, conversion_options)); - return std::move(rel); + return rel; } } // namespace engine diff --git a/cpp/src/arrow/engine/substrait/serde.cc b/cpp/src/arrow/engine/substrait/serde.cc index 9e670f121778e..16d2ace4ac0d7 100644 --- a/cpp/src/arrow/engine/substrait/serde.cc +++ b/cpp/src/arrow/engine/substrait/serde.cc @@ -256,7 +256,7 @@ Result> MakeSingleDeclarationPlan( } else { ARROW_ASSIGN_OR_RAISE(auto plan, acero::ExecPlan::Make()); ARROW_RETURN_NOT_OK(declarations[0].AddToPlan(plan.get())); - return std::move(plan); + return plan; } } diff --git a/cpp/src/arrow/engine/substrait/type_internal.cc b/cpp/src/arrow/engine/substrait/type_internal.cc index 5e7e364fe00c5..b469f5fa0baab 100644 --- a/cpp/src/arrow/engine/substrait/type_internal.cc +++ b/cpp/src/arrow/engine/substrait/type_internal.cc @@ -506,7 +506,7 @@ Result> ToProto( auto out = std::make_unique(); RETURN_NOT_OK( (DataTypeToProtoImpl{out.get(), nullable, ext_set, conversion_options})(type)); - return std::move(out); + return out; } Result> FromProto(const substrait::NamedStruct& named_struct, @@ -583,7 +583,7 @@ Result> ToProto( } named_struct->set_allocated_struct_(struct_.release()); - return std::move(named_struct); + return named_struct; } } // namespace engine diff --git a/cpp/src/arrow/field_ref_test.cc b/cpp/src/arrow/field_ref_test.cc index 0cb2da4f709a1..25c8aa0b71623 100644 --- a/cpp/src/arrow/field_ref_test.cc +++ b/cpp/src/arrow/field_ref_test.cc @@ -199,7 +199,7 @@ struct FieldPathTestCase { Table::Make(out.schema, {out.v0.chunked_array, out.v1.chunked_array}, kNumRows); ARROW_RETURN_NOT_OK(out.table->ValidateFull()); - return std::move(out); + return out; } private: diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index ac563b134586e..7462827d80f1e 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -845,7 +845,7 @@ class ObjectInputFile final : public io::RandomAccessFile { DCHECK_LE(bytes_read, nbytes); RETURN_NOT_OK(buffer->Resize(bytes_read)); } - return std::move(buffer); + return buffer; } Result Read(int64_t nbytes, void* out) override { @@ -857,7 +857,7 @@ class ObjectInputFile final : public io::RandomAccessFile { Result> Read(int64_t nbytes) override { ARROW_ASSIGN_OR_RAISE(auto buffer, ReadAt(pos_, nbytes)); pos_ += buffer->size(); - return std::move(buffer); + return buffer; } private: diff --git a/cpp/src/arrow/filesystem/localfs.cc b/cpp/src/arrow/filesystem/localfs.cc index 25ac04b758f9b..22d802d8f9f7f 100644 --- a/cpp/src/arrow/filesystem/localfs.cc +++ b/cpp/src/arrow/filesystem/localfs.cc @@ -524,7 +524,7 @@ class AsyncStatSelector { ARROW_ASSIGN_OR_RAISE( auto gen, MakeBackgroundGenerator(Iterator(DiscoveryImplIterator( - std::move(dir_fn), nesting_depth, std::move(selector), + dir_fn, nesting_depth, std::move(selector), discovery_state, io_context, file_info_batch_size)), io_context.executor())); gen = MakeTransferredGenerator(std::move(gen), io_context.executor()); diff --git a/cpp/src/arrow/filesystem/localfs_test.cc b/cpp/src/arrow/filesystem/localfs_test.cc index d68c992dff863..6dd7a8c75586c 100644 --- a/cpp/src/arrow/filesystem/localfs_test.cc +++ b/cpp/src/arrow/filesystem/localfs_test.cc @@ -543,7 +543,7 @@ struct DirTreeCreator { Result Create(const std::string& base) { FileInfoVector infos; RETURN_NOT_OK(Create(base, 0, &infos)); - return std::move(infos); + return infos; } Status Create(const std::string& base, int depth, FileInfoVector* infos) { diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index 640888e1c4fa5..43666f32b3da6 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -1510,7 +1510,8 @@ class ObjectInputFile final : public io::RandomAccessFile { DCHECK_LE(bytes_read, nbytes); RETURN_NOT_OK(buf->Resize(bytes_read)); } - return std::move(buf); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(buf)); } Result Read(int64_t nbytes, void* out) override { @@ -1522,7 +1523,7 @@ class ObjectInputFile final : public io::RandomAccessFile { Result> Read(int64_t nbytes) override { ARROW_ASSIGN_OR_RAISE(auto buffer, ReadAt(pos_, nbytes)); pos_ += buffer->size(); - return std::move(buffer); + return buffer; } protected: diff --git a/cpp/src/arrow/filesystem/util_internal.cc b/cpp/src/arrow/filesystem/util_internal.cc index d69f6c896d08e..be43e14e84337 100644 --- a/cpp/src/arrow/filesystem/util_internal.cc +++ b/cpp/src/arrow/filesystem/util_internal.cc @@ -103,7 +103,7 @@ Result ParseFileSystemUri(const std::string& uri_string) { return status; #endif } - return std::move(uri); + return uri; } #ifdef _WIN32 diff --git a/cpp/src/arrow/flight/client.cc b/cpp/src/arrow/flight/client.cc index 4d4f13a09fb26..58a3ba4ab83e5 100644 --- a/cpp/src/arrow/flight/client.cc +++ b/cpp/src/arrow/flight/client.cc @@ -591,7 +591,7 @@ arrow::Result FlightClient::CancelFlightInfo( ARROW_ASSIGN_OR_RAISE(auto cancel_result, CancelFlightInfoResult::Deserialize( std::string_view(*result->body))); ARROW_RETURN_NOT_OK(stream->Drain()); - return std::move(cancel_result); + return cancel_result; } arrow::Result FlightClient::RenewFlightEndpoint( @@ -603,7 +603,7 @@ arrow::Result FlightClient::RenewFlightEndpoint( ARROW_ASSIGN_OR_RAISE(auto renewed_endpoint, FlightEndpoint::Deserialize(std::string_view(*result->body))); ARROW_RETURN_NOT_OK(stream->Drain()); - return std::move(renewed_endpoint); + return renewed_endpoint; } arrow::Result> FlightClient::ListActions( diff --git a/cpp/src/arrow/flight/sql/client.cc b/cpp/src/arrow/flight/sql/client.cc index 37b6a0b32e45d..86fd4868bad2d 100644 --- a/cpp/src/arrow/flight/sql/client.cc +++ b/cpp/src/arrow/flight/sql/client.cc @@ -682,7 +682,7 @@ arrow::Result> PreparedStatement::Execute( parameter_binding_.get())); } ARROW_ASSIGN_OR_RAISE(auto flight_info, client_->GetFlightInfo(options, descriptor)); - return std::move(flight_info); + return flight_info; } arrow::Result PreparedStatement::ExecuteUpdate( diff --git a/cpp/src/arrow/gpu/cuda_memory.cc b/cpp/src/arrow/gpu/cuda_memory.cc index dcf0a31963e45..148de68434272 100644 --- a/cpp/src/arrow/gpu/cuda_memory.cc +++ b/cpp/src/arrow/gpu/cuda_memory.cc @@ -89,7 +89,7 @@ Result> CudaIpcMemHandle::Serialize(MemoryPool* pool) co memcpy(buffer->mutable_data() + sizeof(impl_->memory_size), &impl_->ipc_handle, sizeof(impl_->ipc_handle)); } - return std::move(buffer); + return buffer; } const void* CudaIpcMemHandle::handle() const { return &impl_->ipc_handle; } diff --git a/cpp/src/arrow/integration/json_internal.cc b/cpp/src/arrow/integration/json_internal.cc index 4b75e84bfccb6..89719b4ba4b2e 100644 --- a/cpp/src/arrow/integration/json_internal.cc +++ b/cpp/src/arrow/integration/json_internal.cc @@ -1069,9 +1069,9 @@ Result> GetUnion(const RjObject& json_type, } if (mode == UnionMode::SPARSE) { - return sparse_union(std::move(children), std::move(type_codes)); + return sparse_union(children, std::move(type_codes)); } else { - return dense_union(std::move(children), std::move(type_codes)); + return dense_union(children, std::move(type_codes)); } } diff --git a/cpp/src/arrow/io/buffered.cc b/cpp/src/arrow/io/buffered.cc index 21cce478d3fa5..c53b3d223d4c0 100644 --- a/cpp/src/arrow/io/buffered.cc +++ b/cpp/src/arrow/io/buffered.cc @@ -423,7 +423,8 @@ class BufferedInputStream::Impl : public BufferedBase { RETURN_NOT_OK(buffer->Resize(bytes_read, false /* shrink_to_fit */)); buffer->ZeroPadding(); } - return std::move(buffer); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(buffer)); } // For providing access to the raw file handles diff --git a/cpp/src/arrow/io/compressed.cc b/cpp/src/arrow/io/compressed.cc index 6a6fbf40f9628..306f7c4bf8519 100644 --- a/cpp/src/arrow/io/compressed.cc +++ b/cpp/src/arrow/io/compressed.cc @@ -411,9 +411,8 @@ class CompressedInputStream::Impl { ARROW_ASSIGN_OR_RAISE(auto buf, AllocateResizableBuffer(nbytes, pool_)); ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buf->mutable_data())); RETURN_NOT_OK(buf->Resize(bytes_read)); - // Using std::move because some compiler might has issue below: - // https://wg21.cmeerw.net/cwg/issue1579 - return std::move(buf); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(buf)); } const std::shared_ptr& raw() const { return raw_; } diff --git a/cpp/src/arrow/io/compressed_test.cc b/cpp/src/arrow/io/compressed_test.cc index bd414149d5345..12d116e3395d4 100644 --- a/cpp/src/arrow/io/compressed_test.cc +++ b/cpp/src/arrow/io/compressed_test.cc @@ -77,7 +77,7 @@ std::shared_ptr CompressDataOneShot(Codec* codec, compressed_len = *codec->Compress(data.size(), data.data(), max_compressed_len, compressed->mutable_data()); ABORT_NOT_OK(compressed->Resize(compressed_len)); - return std::move(compressed); + return compressed; } Status RunCompressedInputStream(Codec* codec, std::shared_ptr compressed, diff --git a/cpp/src/arrow/io/file.cc b/cpp/src/arrow/io/file.cc index cc3a5187059e9..a22accf65660a 100644 --- a/cpp/src/arrow/io/file.cc +++ b/cpp/src/arrow/io/file.cc @@ -230,7 +230,8 @@ class ReadableFile::ReadableFileImpl : public OSFile { RETURN_NOT_OK(buffer->Resize(bytes_read)); buffer->ZeroPadding(); } - return std::move(buffer); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(buffer)); } Result> ReadBufferAt(int64_t position, int64_t nbytes) { @@ -242,7 +243,8 @@ class ReadableFile::ReadableFileImpl : public OSFile { RETURN_NOT_OK(buffer->Resize(bytes_read)); buffer->ZeroPadding(); } - return std::move(buffer); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(buffer)); } Status WillNeed(const std::vector& ranges) { diff --git a/cpp/src/arrow/io/hdfs.cc b/cpp/src/arrow/io/hdfs.cc index 5d3edcd3ba63a..5fb762d076376 100644 --- a/cpp/src/arrow/io/hdfs.cc +++ b/cpp/src/arrow/io/hdfs.cc @@ -172,7 +172,8 @@ class HdfsReadableFile::HdfsReadableFileImpl : public HdfsAnyFileImpl { RETURN_NOT_OK(buffer->Resize(bytes_read)); buffer->ZeroPadding(); } - return std::move(buffer); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(buffer)); } Result Read(int64_t nbytes, void* buffer) { @@ -200,7 +201,8 @@ class HdfsReadableFile::HdfsReadableFileImpl : public HdfsAnyFileImpl { if (bytes_read < nbytes) { RETURN_NOT_OK(buffer->Resize(bytes_read)); } - return std::move(buffer); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(buffer)); } Result GetSize() { diff --git a/cpp/src/arrow/io/stdio.cc b/cpp/src/arrow/io/stdio.cc index ac6ca3a63a604..ba4a66a2f340e 100644 --- a/cpp/src/arrow/io/stdio.cc +++ b/cpp/src/arrow/io/stdio.cc @@ -85,7 +85,8 @@ Result> StdinStream::Read(int64_t nbytes) { ARROW_ASSIGN_OR_RAISE(int64_t bytes_read, Read(nbytes, buffer->mutable_data())); ARROW_RETURN_NOT_OK(buffer->Resize(bytes_read, false)); buffer->ZeroPadding(); - return std::move(buffer); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(buffer)); } } // namespace io diff --git a/cpp/src/arrow/ipc/message.cc b/cpp/src/arrow/ipc/message.cc index e196dd7bf5389..27ded52861eaf 100644 --- a/cpp/src/arrow/ipc/message.cc +++ b/cpp/src/arrow/ipc/message.cc @@ -130,7 +130,7 @@ Result> Message::Open(std::shared_ptr metadata, std::shared_ptr body) { std::unique_ptr result(new Message(std::move(metadata), std::move(body))); RETURN_NOT_OK(result->impl_->Open()); - return std::move(result); + return result; } Message::~Message() {} @@ -208,7 +208,7 @@ Result> Message::ReadFrom(std::shared_ptr metad " bytes for message body, got ", body->size()); } RETURN_NOT_OK(decoder.Consume(body)); - return std::move(result); + return result; } Result> Message::ReadFrom(const int64_t offset, @@ -225,7 +225,7 @@ Result> Message::ReadFrom(const int64_t offset, " bytes for message body, got ", body->size()); } RETURN_NOT_OK(decoder.Consume(body)); - return std::move(result); + return result; } Status WritePadding(io::OutputStream* stream, int64_t nbytes) { @@ -329,7 +329,7 @@ Result> ReadMessage(std::shared_ptr metadata, case MessageDecoder::State::INITIAL: // Metadata did not request a body so we better not have provided one DCHECK_EQ(body, nullptr); - return std::move(result); + return result; case MessageDecoder::State::METADATA_LENGTH: return Status::Invalid("metadata length is missing from the metadata buffer"); case MessageDecoder::State::METADATA: @@ -338,7 +338,7 @@ Result> ReadMessage(std::shared_ptr metadata, case MessageDecoder::State::BODY: { if (body == nullptr) { // Caller didn't give a body so just give them a message without body - return std::move(result); + return result; } if (body->size() != decoder.next_required_size()) { return Status::IOError("Expected body buffer to be ", @@ -346,7 +346,7 @@ Result> ReadMessage(std::shared_ptr metadata, " bytes for message body, got ", body->size()); } RETURN_NOT_OK(decoder.Consume(body)); - return std::move(result); + return result; } case MessageDecoder::State::EOS: return Status::Invalid("Unexpected empty message in IPC file format"); @@ -376,7 +376,7 @@ Result> ReadMessage(int64_t offset, int32_t metadata_le switch (decoder.state()) { case MessageDecoder::State::INITIAL: - return std::move(result); + return result; case MessageDecoder::State::METADATA_LENGTH: return Status::Invalid("metadata length is missing. File offset: ", offset, ", metadata length: ", metadata_length); @@ -401,7 +401,7 @@ Result> ReadMessage(int64_t offset, int32_t metadata_le " bytes for message body, got ", body->size()); } RETURN_NOT_OK(decoder.Consume(body)); - return std::move(result); + return result; } case MessageDecoder::State::EOS: return Status::Invalid("Unexpected empty message in IPC file format"); @@ -551,7 +551,7 @@ Result> ReadMessage(io::InputStream* file, MemoryPool* if (!message) { return nullptr; } else { - return std::move(message); + return message; } } diff --git a/cpp/src/arrow/ipc/metadata_internal.cc b/cpp/src/arrow/ipc/metadata_internal.cc index eed426d9337dd..be8d1ccc35f1a 100644 --- a/cpp/src/arrow/ipc/metadata_internal.cc +++ b/cpp/src/arrow/ipc/metadata_internal.cc @@ -191,11 +191,9 @@ Status UnionFromFlatbuffer(const flatbuf::Union* union_data, } if (mode == UnionMode::SPARSE) { - ARROW_ASSIGN_OR_RAISE( - *out, SparseUnionType::Make(std::move(children), std::move(type_codes))); + ARROW_ASSIGN_OR_RAISE(*out, SparseUnionType::Make(children, std::move(type_codes))); } else { - ARROW_ASSIGN_OR_RAISE( - *out, DenseUnionType::Make(std::move(children), std::move(type_codes))); + ARROW_ASSIGN_OR_RAISE(*out, DenseUnionType::Make(children, std::move(type_codes))); } return Status::OK(); } diff --git a/cpp/src/arrow/ipc/metadata_internal.h b/cpp/src/arrow/ipc/metadata_internal.h index 631a336f75a9a..c0aca44644a40 100644 --- a/cpp/src/arrow/ipc/metadata_internal.h +++ b/cpp/src/arrow/ipc/metadata_internal.h @@ -238,7 +238,8 @@ static inline Result> WriteFlatbufferBuilder( uint8_t* dst = result->mutable_data(); memcpy(dst, fbb.GetBufferPointer(), size); - return std::move(result); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(result)); } ARROW_EXPORT diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc index d272c78560f82..da84f2f2dc87d 100644 --- a/cpp/src/arrow/ipc/reader.cc +++ b/cpp/src/arrow/ipc/reader.cc @@ -540,7 +540,8 @@ Result> DecompressBuffer(const std::shared_ptr& actual_decompressed); } - return std::move(uncompressed); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(uncompressed)); } Status DecompressBuffers(Compression::type compression, const IpcReadOptions& options, @@ -1174,7 +1175,7 @@ static Result> ReadMessageFromBlock( ARROW_ASSIGN_OR_RAISE(auto message, ReadMessage(block.offset, block.metadata_length, file, fields_loader)); - return std::move(message); + return message; } static Future> ReadMessageFromBlockAsync( @@ -1536,7 +1537,7 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader { ARROW_ASSIGN_OR_RAISE(auto message, arrow::ipc::ReadMessageFromBlock(block, file_, fields_loader)); stats_.num_messages.fetch_add(1, std::memory_order_relaxed); - return std::move(message); + return message; } Status ReadDictionaries() { @@ -1632,7 +1633,7 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader { } context.compression = compression; context.metadata_version = internal::GetMetadataVersion(message->version()); - return std::move(context); + return context; } Result GetBatchFromMessage( @@ -2704,7 +2705,7 @@ Result> IoRecordedRandomAccessFile::Read(int64_t nbytes) ARROW_ASSIGN_OR_RAISE(std::shared_ptr buffer, ReadAt(position_, nbytes)); auto num_bytes_read = std::min(file_size_, position_ + nbytes) - position_; position_ += num_bytes_read; - return std::move(buffer); + return buffer; } const io::IOContext& IoRecordedRandomAccessFile::io_context() const { diff --git a/cpp/src/arrow/ipc/writer.cc b/cpp/src/arrow/ipc/writer.cc index 93256440f4a7a..e3dd36efe0543 100644 --- a/cpp/src/arrow/ipc/writer.cc +++ b/cpp/src/arrow/ipc/writer.cc @@ -1561,7 +1561,8 @@ Result> OpenRecordBatchWriter( auto writer = std::make_unique( std::move(sink), schema, options, /*is_file_format=*/false); RETURN_NOT_OK(writer->Start()); - return std::move(writer); + // R build with openSUSE155 requires an explicit unique_ptr construction + return std::unique_ptr(std::move(writer)); } Result> MakePayloadStreamWriter( diff --git a/cpp/src/arrow/scalar.cc b/cpp/src/arrow/scalar.cc index 7d8084e17c279..252706fd0b387 100644 --- a/cpp/src/arrow/scalar.cc +++ b/cpp/src/arrow/scalar.cc @@ -1223,7 +1223,7 @@ Result> CastImpl(const StringScalar& from, ARROW_ASSIGN_OR_RAISE(auto out, Scalar::Parse(std::move(to_type), std::string_view(*from.value))); DCHECK(checked_pointer_cast(out) != nullptr); - return std::move(out); + return out; } // binary/large binary/large string to string @@ -1347,7 +1347,7 @@ struct FromTypeVisitor : CastImplVisitor { ARROW_ASSIGN_OR_RAISE( out_, CastImpl( checked_cast::ScalarType&>(from_), - std::move(to_type_))); + to_type_)); return Status::OK(); } @@ -1355,8 +1355,8 @@ struct FromTypeVisitor : CastImplVisitor { template typename std::enable_if_t::is_parameter_free, Status> Visit( const ToType&) { - ARROW_ASSIGN_OR_RAISE(out_, MakeScalar(std::move(to_type_), - checked_cast(from_).value)); + ARROW_ASSIGN_OR_RAISE( + out_, MakeScalar(to_type_, checked_cast(from_).value)); return Status::OK(); } diff --git a/cpp/src/arrow/table_builder.cc b/cpp/src/arrow/table_builder.cc index 19ca151ac200f..8dc2efd19d90d 100644 --- a/cpp/src/arrow/table_builder.cc +++ b/cpp/src/arrow/table_builder.cc @@ -47,7 +47,7 @@ Result> RecordBatchBuilder::Make( new RecordBatchBuilder(schema, pool, initial_capacity)); RETURN_NOT_OK(builder->CreateBuilders()); RETURN_NOT_OK(builder->InitBuilders()); - return std::move(builder); + return builder; } Result> RecordBatchBuilder::Flush(bool reset_builders) { diff --git a/cpp/src/arrow/testing/builder.h b/cpp/src/arrow/testing/builder.h index 09e8f49dea9eb..6beb7760e3bbf 100644 --- a/cpp/src/arrow/testing/builder.h +++ b/cpp/src/arrow/testing/builder.h @@ -195,7 +195,7 @@ Result> ArrayFromBuilderVisitor( std::shared_ptr out; RETURN_NOT_OK(builder->Finish(&out)); - return std::move(out); + return out; } template diff --git a/cpp/src/arrow/type.cc b/cpp/src/arrow/type.cc index edf8f0496628c..8ce03a91c70ae 100644 --- a/cpp/src/arrow/type.cc +++ b/cpp/src/arrow/type.cc @@ -729,7 +729,7 @@ Result> MaybeMergeListTypes( auto item_field, left.item_field()->MergeWith( *right.item_field()->WithName(left.item_field()->name()), options)); - return map(std::move(key_field->type()), std::move(item_field), + return map(key_field->type(), std::move(item_field), /*keys_sorted=*/left.keys_sorted() && right.keys_sorted()); } else if (promoted_type->id() == Type::STRUCT && other_type->id() == Type::STRUCT) { return MergeStructs(promoted_type, other_type, options); @@ -1696,7 +1696,7 @@ class NestedSelector { } } - return std::move(child_data); + return child_data; } static Result> GetChild(const Array& array, int i, diff --git a/cpp/src/arrow/util/align_util.cc b/cpp/src/arrow/util/align_util.cc index 7bc687b155052..a327afa7a5cc3 100644 --- a/cpp/src/arrow/util/align_util.cc +++ b/cpp/src/arrow/util/align_util.cc @@ -159,9 +159,10 @@ Result> EnsureAlignment(std::shared_ptr buffer, auto new_buffer, AllocateBuffer(buffer->size(), minimum_desired_alignment, memory_pool)); std::memcpy(new_buffer->mutable_data(), buffer->data(), buffer->size()); - return std::move(new_buffer); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(new_buffer)); } else { - return std::move(buffer); + return buffer; } } @@ -197,9 +198,9 @@ Result> EnsureAlignment(std::shared_ptr ar auto new_array_data = ArrayData::Make( array_data->type, array_data->length, std::move(buffers), array_data->child_data, array_data->dictionary, array_data->GetNullCount(), array_data->offset); - return std::move(new_array_data); + return new_array_data; } else { - return std::move(array_data); + return array_data; } } @@ -210,7 +211,7 @@ Result> EnsureAlignment(std::shared_ptr array, EnsureAlignment(array->data(), alignment, memory_pool)); if (new_array_data.get() == array->data().get()) { - return std::move(array); + return array; } else { return MakeArray(std::move(new_array_data)); } @@ -230,7 +231,7 @@ Result> EnsureAlignment(std::shared_ptrtype()); } else { - return std::move(array); + return array; } } @@ -248,7 +249,7 @@ Result> EnsureAlignment(std::shared_ptrschema(), batch->num_rows(), std::move(columns_)); } else { - return std::move(batch); + return batch; } } @@ -275,7 +276,7 @@ Result> EnsureAlignment(std::shared_ptr

table, } return Table::Make(table->schema(), std::move(columns_), table->num_rows()); } else { - return std::move(table); + return table; } } diff --git a/cpp/src/arrow/util/async_generator.h b/cpp/src/arrow/util/async_generator.h index f9bcd534567c6..fd66298d1a9d6 100644 --- a/cpp/src/arrow/util/async_generator.h +++ b/cpp/src/arrow/util/async_generator.h @@ -1962,7 +1962,7 @@ AsyncGenerator MakeFailingGenerator(Status st) { return [state]() -> Future { auto st = std::move(*state); if (!st.ok()) { - return std::move(st); + return st; } else { return AsyncGeneratorEnd(); } diff --git a/cpp/src/arrow/util/bit_util_benchmark.cc b/cpp/src/arrow/util/bit_util_benchmark.cc index 0bf2c26f12486..43f3fb33cd7fd 100644 --- a/cpp/src/arrow/util/bit_util_benchmark.cc +++ b/cpp/src/arrow/util/bit_util_benchmark.cc @@ -107,7 +107,7 @@ static std::shared_ptr CreateRandomBuffer(int64_t nbytes) { auto buffer = *AllocateBuffer(nbytes); memset(buffer->mutable_data(), 0, nbytes); random_bytes(nbytes, /*seed=*/0, buffer->mutable_data()); - return std::move(buffer); + return buffer; } static std::shared_ptr CreateRandomBitsBuffer(int64_t nbits, diff --git a/cpp/src/arrow/util/bitmap_builders.cc b/cpp/src/arrow/util/bitmap_builders.cc index 0348b1303b96b..c5cf3d2bc72b5 100644 --- a/cpp/src/arrow/util/bitmap_builders.cc +++ b/cpp/src/arrow/util/bitmap_builders.cc @@ -51,7 +51,8 @@ Result> BytesToBits(const std::vector& bytes, uint8_t* out_buf = buffer->mutable_data(); memset(out_buf, 0, static_cast(buffer->capacity())); FillBitsFromBytes(bytes, out_buf); - return std::move(buffer); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(buffer)); } Result> BitmapAllButOne(MemoryPool* pool, int64_t length, @@ -66,7 +67,8 @@ Result> BitmapAllButOne(MemoryPool* pool, int64_t length auto bitmap_data = buffer->mutable_data(); bit_util::SetBitsTo(bitmap_data, 0, length, value); bit_util::SetBitTo(bitmap_data, straggler_pos, !value); - return std::move(buffer); + // R build with openSUSE155 requires an explicit shared_ptr construction + return std::shared_ptr(std::move(buffer)); } } // namespace internal diff --git a/cpp/src/arrow/util/bitmap_reader_benchmark.cc b/cpp/src/arrow/util/bitmap_reader_benchmark.cc index 1427adb13e131..b3c199ec3bd55 100644 --- a/cpp/src/arrow/util/bitmap_reader_benchmark.cc +++ b/cpp/src/arrow/util/bitmap_reader_benchmark.cc @@ -45,7 +45,7 @@ static std::shared_ptr CreateRandomBuffer(int64_t nbytes) { auto buffer = *AllocateBuffer(nbytes); memset(buffer->mutable_data(), 0, nbytes); random_bytes(nbytes, /*seed=*/0, buffer->mutable_data()); - return std::move(buffer); + return buffer; } static void BitBlockCounterBench(benchmark::State& state) { diff --git a/cpp/src/arrow/util/compression.cc b/cpp/src/arrow/util/compression.cc index b63aec0aae8f9..7e2a3de30306a 100644 --- a/cpp/src/arrow/util/compression.cc +++ b/cpp/src/arrow/util/compression.cc @@ -216,7 +216,7 @@ Result> Codec::Create(Compression::type codec_type, DCHECK_NE(codec, nullptr); RETURN_NOT_OK(codec->Init()); - return std::move(codec); + return codec; } // use compression level to create Codec diff --git a/cpp/src/arrow/util/decimal.cc b/cpp/src/arrow/util/decimal.cc index ce71def497161..c8457eae8ed33 100644 --- a/cpp/src/arrow/util/decimal.cc +++ b/cpp/src/arrow/util/decimal.cc @@ -717,7 +717,7 @@ Status Decimal128::FromString(const char* s, Decimal128* out, int32_t* precision Result Decimal128::FromString(std::string_view s) { Decimal128 out; RETURN_NOT_OK(FromString(s, &out, nullptr, nullptr)); - return std::move(out); + return out; } Result Decimal128::FromString(const std::string& s) { @@ -850,7 +850,7 @@ Status Decimal256::FromString(const char* s, Decimal256* out, int32_t* precision Result Decimal256::FromString(std::string_view s) { Decimal256 out; RETURN_NOT_OK(FromString(s, &out, nullptr, nullptr)); - return std::move(out); + return out; } Result Decimal256::FromString(const std::string& s) { diff --git a/cpp/src/arrow/util/future.cc b/cpp/src/arrow/util/future.cc index a5426f949e721..60687172fe8d7 100644 --- a/cpp/src/arrow/util/future.cc +++ b/cpp/src/arrow/util/future.cc @@ -212,7 +212,7 @@ std::unique_ptr FutureImpl::Make() { std::unique_ptr FutureImpl::MakeFinished(FutureState state) { std::unique_ptr ptr(new ConcreteFutureImpl()); ptr->state_ = state; - return std::move(ptr); + return ptr; } FutureImpl::FutureImpl() : state_(FutureState::PENDING) {} diff --git a/cpp/src/arrow/util/future.h b/cpp/src/arrow/util/future.h index 283b581a5100a..0aa2842703712 100644 --- a/cpp/src/arrow/util/future.h +++ b/cpp/src/arrow/util/future.h @@ -871,7 +871,7 @@ Future ToFuture(Result maybe_value) { template Future ToFuture(Future fut) { - return std::move(fut); + return fut; } template diff --git a/cpp/src/arrow/util/io_util.cc b/cpp/src/arrow/util/io_util.cc index d48f9eb97d562..2eefe96f0d5c4 100644 --- a/cpp/src/arrow/util/io_util.cc +++ b/cpp/src/arrow/util/io_util.cc @@ -1084,7 +1084,7 @@ Result FileOpenReadable(const PlatformFilename& file_name) { } #endif - return std::move(fd); + return fd; } Result FileOpenWritable(const PlatformFilename& file_name, @@ -1148,7 +1148,7 @@ Result FileOpenWritable(const PlatformFilename& file_name, // Seek to end, as O_APPEND does not necessarily do it RETURN_NOT_OK(lseek64_compat(fd.fd(), 0, SEEK_END)); } - return std::move(fd); + return fd; } Result FileTell(int fd) { @@ -1967,7 +1967,7 @@ Result> TemporaryDir::Make(const std::string& pref for (const auto& base_dir : base_dirs) { ARROW_ASSIGN_OR_RAISE(auto ptr, TryCreatingDirectory(base_dir)); if (ptr) { - return std::move(ptr); + return ptr; } // Cannot create in this directory, try the next one } diff --git a/cpp/src/arrow/util/iterator.h b/cpp/src/arrow/util/iterator.h index 5e716d0fd113d..4da8394a0299c 100644 --- a/cpp/src/arrow/util/iterator.h +++ b/cpp/src/arrow/util/iterator.h @@ -180,9 +180,7 @@ class Iterator : public util::EqualityComparable> { ARROW_ASSIGN_OR_RAISE(auto element, maybe_element); out.push_back(std::move(element)); } - // ARROW-8193: On gcc-4.8 without the explicit move it tries to use the - // copy constructor, which may be deleted on the elements of type T - return std::move(out); + return out; } private: diff --git a/cpp/src/arrow/util/vector.h b/cpp/src/arrow/util/vector.h index e3c0a67cf46c4..74b6a2403a2bb 100644 --- a/cpp/src/arrow/util/vector.h +++ b/cpp/src/arrow/util/vector.h @@ -113,7 +113,7 @@ Result> MaybeMapVector(Fn&& map, const std::vector& source out.reserve(source.size()); ARROW_RETURN_NOT_OK(MaybeTransform(source.begin(), source.end(), std::back_inserter(out), std::forward(map))); - return std::move(out); + return out; } template , @@ -152,7 +152,7 @@ Result> UnwrapOrRaise(std::vector>&& results) { } out.push_back(it->MoveValueUnsafe()); } - return std::move(out); + return out; } template @@ -165,7 +165,7 @@ Result> UnwrapOrRaise(const std::vector>& results) { } out.push_back(result.ValueUnsafe()); } - return std::move(out); + return out; } } // namespace internal diff --git a/cpp/src/gandiva/function_registry.cc b/cpp/src/gandiva/function_registry.cc index 2e392630ee009..0955a2e47fcaf 100644 --- a/cpp/src/gandiva/function_registry.cc +++ b/cpp/src/gandiva/function_registry.cc @@ -147,7 +147,7 @@ arrow::Result> MakeDefaultFunctionRegistry() { ARROW_RETURN_NOT_OK(registry->Add(func_signature)); } } - return std::move(registry); + return registry; } std::shared_ptr default_function_registry() { diff --git a/cpp/src/gandiva/llvm_generator.cc b/cpp/src/gandiva/llvm_generator.cc index 62ebab08f4d6b..4afa2935ace33 100644 --- a/cpp/src/gandiva/llvm_generator.cc +++ b/cpp/src/gandiva/llvm_generator.cc @@ -1251,7 +1251,7 @@ LValuePtr LLVMGenerator::Visitor::BuildFunctionCall(const NativeFunction* func, // Make the function call auto out = decimalIR.CallDecimalFunction(func->pc_name(), llvm_return_type, *params); ret_lvalue->set_data(out); - return std::move(ret_lvalue); + return ret_lvalue; } else { bool isDecimalFunction = false; for (auto& arg : *params) { diff --git a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc index dd0b19c2ce048..aad1e933c4f25 100644 --- a/cpp/src/parquet/arrow/arrow_reader_writer_test.cc +++ b/cpp/src/parquet/arrow/arrow_reader_writer_test.cc @@ -4469,7 +4469,7 @@ class TestArrowReadDictionary : public ::testing::TestWithParam { RETURN_NOT_OK(builder.Open(std::make_shared(buffer_))); RETURN_NOT_OK(builder.properties(properties_)->Build(&reader)); - return std::move(reader); + return reader; } }; diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc index 004cb746b3a89..903faa92b6370 100644 --- a/cpp/src/parquet/encoding.cc +++ b/cpp/src/parquet/encoding.cc @@ -551,7 +551,7 @@ class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder { int result_size = WriteIndices(buffer->mutable_data(), static_cast(EstimatedDataEncodedSize())); PARQUET_THROW_NOT_OK(buffer->Resize(result_size, false)); - return std::move(buffer); + return buffer; } /// Writes out the encoded dictionary to buffer. buffer must be preallocated to diff --git a/cpp/src/parquet/encryption/file_key_unwrapper.cc b/cpp/src/parquet/encryption/file_key_unwrapper.cc index 02bea127fd1c3..d88aa6c52ac12 100644 --- a/cpp/src/parquet/encryption/file_key_unwrapper.cc +++ b/cpp/src/parquet/encryption/file_key_unwrapper.cc @@ -124,7 +124,7 @@ KeyWithMasterId FileKeyUnwrapper::GetDataEncryptionKey(const KeyMaterial& key_ma data_key = internal::DecryptKeyLocally(encoded_wrapped_dek, kek_bytes, aad); } - return KeyWithMasterId(std::move(data_key), std::move(master_key_id)); + return KeyWithMasterId(std::move(data_key), master_key_id); } std::shared_ptr FileKeyUnwrapper::GetKmsClientFromConfigOrKeyMaterial( diff --git a/cpp/src/parquet/platform.cc b/cpp/src/parquet/platform.cc index 5c355c28be1c3..98946029fb866 100644 --- a/cpp/src/parquet/platform.cc +++ b/cpp/src/parquet/platform.cc @@ -35,7 +35,7 @@ std::shared_ptr<::arrow::io::BufferOutputStream> CreateOutputStream(MemoryPool* std::shared_ptr AllocateBuffer(MemoryPool* pool, int64_t size) { PARQUET_ASSIGN_OR_THROW(auto result, ::arrow::AllocateResizableBuffer(size, pool)); - return std::move(result); + return result; } } // namespace parquet diff --git a/cpp/src/parquet/properties.cc b/cpp/src/parquet/properties.cc index 2267efdf8a44a..4e6c558e064d4 100644 --- a/cpp/src/parquet/properties.cc +++ b/cpp/src/parquet/properties.cc @@ -38,7 +38,7 @@ std::shared_ptr ReaderProperties::GetStream( PARQUET_ASSIGN_OR_THROW( auto stream, ::arrow::io::BufferedInputStream::Create(buffer_size_, pool_, safe_stream, num_bytes)); - return std::move(stream); + return stream; } else { PARQUET_ASSIGN_OR_THROW(auto data, source->ReadAt(start, num_bytes)); diff --git a/cpp/src/skyhook/cls/cls_skyhook.cc b/cpp/src/skyhook/cls/cls_skyhook.cc index e021cb3c8248a..632b82f1d1a6c 100644 --- a/cpp/src/skyhook/cls/cls_skyhook.cc +++ b/cpp/src/skyhook/cls/cls_skyhook.cc @@ -95,7 +95,7 @@ class RandomAccessObject : public arrow::io::RandomAccessFile { arrow::Result> Read(int64_t nbytes) override { ARROW_ASSIGN_OR_RAISE(auto buffer, ReadAt(pos_, nbytes)); pos_ += buffer->size(); - return std::move(buffer); + return buffer; } /// Read a specified number of bytes from the current position into an output stream. From 54dfb82a401fa4c1e53fccb2f152b04f7bb85f4d Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Sat, 25 May 2024 07:25:11 +0530 Subject: [PATCH 169/261] GH-40933: [Java] Enhance the copyFrom* functionality in StringView (#41752) ### Rationale for this change Initial implementation of StringView doesn't contain `copy` functionality. This PR adds that feature. ### What changes are included in this PR? This PR adds `copyFrom` and `copyFromSafe` functions to `BaseVariableWidthViewVector`. ### Are these changes tested? Yes ### Are there any user-facing changes? No * GitHub Issue: #40933 Lead-authored-by: Vibhatha Abeykoon Co-authored-by: Vibhatha Lakmal Abeykoon Signed-off-by: David Li --- .../vector/BaseVariableWidthViewVector.java | 52 ++++- .../org/apache/arrow/vector/types/Types.java | 2 +- .../arrow/vector/TestVarCharViewVector.java | 197 ++++++++++++++++++ 3 files changed, 243 insertions(+), 8 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java index b3e86fab05462..aaa8098b690fd 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java @@ -33,6 +33,7 @@ import org.apache.arrow.memory.util.ByteFunctionHelpers; import org.apache.arrow.memory.util.CommonUtil; import org.apache.arrow.memory.util.hash.ArrowBufHasher; +import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.compare.VectorVisitor; import org.apache.arrow.vector.ipc.message.ArrowFieldNode; import org.apache.arrow.vector.types.pojo.Field; @@ -1334,30 +1335,67 @@ protected final void handleSafe(int index, int dataLength) { /** * Copy a cell value from a particular index in source vector to a particular position in this * vector. - * TODO: Improve functionality to support copying views. - * Enhance CopyFrom - * * @param fromIndex position to copy from in source vector * @param thisIndex position to copy to in this vector * @param from source vector */ @Override public void copyFrom(int fromIndex, int thisIndex, ValueVector from) { - throw new UnsupportedOperationException("copyFrom is not supported for VariableWidthVector"); + Preconditions.checkArgument(getMinorType() == from.getMinorType()); + if (from.isNull(fromIndex)) { + BitVectorHelper.unsetBit(validityBuffer, thisIndex); + } else { + final int viewLength = from.getDataBuffer().getInt((long) fromIndex * ELEMENT_SIZE); + BitVectorHelper.setBit(validityBuffer, thisIndex); + final int start = thisIndex * ELEMENT_SIZE; + final int copyStart = fromIndex * ELEMENT_SIZE; + from.getDataBuffer().getBytes(start, viewBuffer, copyStart, ELEMENT_SIZE); + if (viewLength > INLINE_SIZE) { + final int bufIndex = from.getDataBuffer().getInt(((long) fromIndex * ELEMENT_SIZE) + + LENGTH_WIDTH + PREFIX_WIDTH); + final int dataOffset = from.getDataBuffer().getInt(((long) fromIndex * ELEMENT_SIZE) + + LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH); + final ArrowBuf dataBuf = ((BaseVariableWidthViewVector) from).dataBuffers.get(bufIndex); + final ArrowBuf thisDataBuf = allocateOrGetLastDataBuffer(viewLength); + thisDataBuf.setBytes(thisDataBuf.writerIndex(), dataBuf, dataOffset, viewLength); + thisDataBuf.writerIndex(thisDataBuf.writerIndex() + viewLength); + } + } + lastSet = thisIndex; } /** * Same as {@link #copyFrom(int, int, ValueVector)} except that it handles the case when the * capacity of the vector needs to be expanded before copy. - * TODO: Improve functionality to support copying views. - * Enhance CopyFrom * @param fromIndex position to copy from in source vector * @param thisIndex position to copy to in this vector * @param from source vector */ @Override public void copyFromSafe(int fromIndex, int thisIndex, ValueVector from) { - throw new UnsupportedOperationException("copyFromSafe is not supported for VariableWidthVector"); + Preconditions.checkArgument(getMinorType() == from.getMinorType()); + if (from.isNull(fromIndex)) { + handleSafe(thisIndex, 0); + BitVectorHelper.unsetBit(validityBuffer, thisIndex); + } else { + final int viewLength = from.getDataBuffer().getInt((long) fromIndex * ELEMENT_SIZE); + handleSafe(thisIndex, viewLength); + BitVectorHelper.setBit(validityBuffer, thisIndex); + final int start = thisIndex * ELEMENT_SIZE; + final int copyStart = fromIndex * ELEMENT_SIZE; + from.getDataBuffer().getBytes(start, viewBuffer, copyStart, ELEMENT_SIZE); + if (viewLength > INLINE_SIZE) { + final int bufIndex = from.getDataBuffer().getInt(((long) fromIndex * ELEMENT_SIZE) + + LENGTH_WIDTH + PREFIX_WIDTH); + final int dataOffset = from.getDataBuffer().getInt(((long) fromIndex * ELEMENT_SIZE) + + LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH); + final ArrowBuf dataBuf = ((BaseVariableWidthViewVector) from).dataBuffers.get(bufIndex); + final ArrowBuf thisDataBuf = allocateOrGetLastDataBuffer(viewLength); + thisDataBuf.setBytes(thisDataBuf.writerIndex(), dataBuf, dataOffset, viewLength); + thisDataBuf.writerIndex(thisDataBuf.writerIndex() + viewLength); + } + } + lastSet = thisIndex; } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java index e10a65e3b2c53..abed4d1ff0143 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java @@ -568,7 +568,7 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { return new VarBinaryWriterImpl((VarBinaryVector) vector); } }, - VIEWVARBINARY(Binary.INSTANCE) { + VIEWVARBINARY(BinaryView.INSTANCE) { @Override public FieldVector getNewVector( Field field, diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java index 2d37b0b4eb9ad..17bc08c7d398c 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java @@ -36,6 +36,8 @@ import java.util.List; import java.util.Objects; import java.util.Random; +import java.util.function.Function; +import java.util.stream.Stream; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; @@ -52,6 +54,9 @@ import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; public class TestVarCharViewVector { @@ -1517,6 +1522,198 @@ public void testVectorLoadUnload() { } } + static Stream vectorCreatorProvider() { + return Stream.of( + Arguments.of((Function) + (allocator -> newVector(ViewVarBinaryVector.class, EMPTY_SCHEMA_PATH, + Types.MinorType.VIEWVARBINARY, allocator))), + Arguments.of((Function) + (allocator -> newVector(ViewVarCharVector.class, EMPTY_SCHEMA_PATH, + Types.MinorType.VIEWVARCHAR, allocator))) + ); + } + + @ParameterizedTest + @MethodSource({"vectorCreatorProvider"}) + public void testCopyFromWithNulls(Function vectorCreator) { + try (final BaseVariableWidthViewVector vector = vectorCreator.apply(allocator); + final BaseVariableWidthViewVector vector2 = vectorCreator.apply(allocator)) { + final int initialCapacity = 1024; + vector.setInitialCapacity(initialCapacity); + vector.allocateNew(); + int capacity = vector.getValueCapacity(); + assertTrue(capacity >= initialCapacity); + + // setting number of values such that we have enough space in the initial allocation + // to avoid re-allocation. This is to test copyFrom() without re-allocation. + final int numberOfValues = initialCapacity / 2 / ViewVarCharVector.ELEMENT_SIZE; + + final String prefixString = generateRandomString(12); + + for (int i = 0; i < numberOfValues; i++) { + if (i % 3 == 0) { + // null values + vector.setNull(i); + } else if (i % 3 == 1) { + // short strings + byte[] b = Integer.toString(i).getBytes(StandardCharsets.UTF_8); + vector.set(i, b, 0, b.length); + } else { + // long strings + byte[] b = (i + prefixString).getBytes(StandardCharsets.UTF_8); + vector.set(i, b, 0, b.length); + } + } + + assertEquals(capacity, vector.getValueCapacity()); + + vector.setValueCount(numberOfValues); + + for (int i = 0; i < numberOfValues; i++) { + if (i % 3 == 0) { + assertNull(vector.getObject(i)); + } else if (i % 3 == 1) { + assertArrayEquals(Integer.toString(i).getBytes(StandardCharsets.UTF_8), + vector.get(i), + "unexpected value at index: " + i); + } else { + assertArrayEquals((i + prefixString).getBytes(StandardCharsets.UTF_8), + vector.get(i), + "unexpected value at index: " + i); + } + } + + vector2.setInitialCapacity(initialCapacity); + vector2.allocateNew(); + int capacity2 = vector2.getValueCapacity(); + assertEquals(capacity2, capacity); + + for (int i = 0; i < numberOfValues; i++) { + vector2.copyFrom(i, i, vector); + if (i % 3 == 0) { + assertNull(vector2.getObject(i)); + } else if (i % 3 == 1) { + assertArrayEquals(Integer.toString(i).getBytes(StandardCharsets.UTF_8), + vector.get(i), + "unexpected value at index: " + i); + } else { + assertArrayEquals((i + prefixString).getBytes(StandardCharsets.UTF_8), + vector.get(i), + "unexpected value at index: " + i); + } + } + + assertEquals(capacity, vector2.getValueCapacity()); + + vector2.setValueCount(numberOfValues); + + for (int i = 0; i < numberOfValues; i++) { + if (i % 3 == 0) { + assertNull(vector2.getObject(i)); + } else if (i % 3 == 1) { + assertArrayEquals(Integer.toString(i).getBytes(StandardCharsets.UTF_8), + vector.get(i), + "unexpected value at index: " + i); + } else { + assertArrayEquals((i + prefixString).getBytes(StandardCharsets.UTF_8), + vector.get(i), + "unexpected value at index: " + i); + } + } + } + } + + @ParameterizedTest + @MethodSource("vectorCreatorProvider") + public void testCopyFromSafeWithNulls(Function vectorCreator) { + try (final BaseVariableWidthViewVector vector = vectorCreator.apply(allocator); + final BaseVariableWidthViewVector vector2 = vectorCreator.apply(allocator)) { + + final int initialCapacity = 4096; + vector.setInitialCapacity(initialCapacity); + vector.allocateNew(); + int capacity = vector.getValueCapacity(); + assertTrue(capacity >= initialCapacity); + + final int numberOfValues = initialCapacity / ViewVarCharVector.ELEMENT_SIZE; + + final String prefixString = generateRandomString(12); + + for (int i = 0; i < numberOfValues; i++) { + if (i % 3 == 0) { + // null values + vector.setNull(i); + } else if (i % 3 == 1) { + // short strings + byte[] b = Integer.toString(i).getBytes(StandardCharsets.UTF_8); + vector.setSafe(i, b, 0, b.length); + } else { + // long strings + byte[] b = (i + prefixString).getBytes(StandardCharsets.UTF_8); + vector.setSafe(i, b, 0, b.length); + } + } + + /* NO reAlloc() should have happened in setSafe() */ + assertEquals(capacity, vector.getValueCapacity()); + + vector.setValueCount(numberOfValues); + + for (int i = 0; i < numberOfValues; i++) { + if (i % 3 == 0) { + assertNull(vector.getObject(i)); + } else if (i % 3 == 1) { + assertArrayEquals(Integer.toString(i).getBytes(StandardCharsets.UTF_8), + vector.get(i), + "unexpected value at index: " + i); + } else { + assertArrayEquals((i + prefixString).getBytes(StandardCharsets.UTF_8), + vector.get(i), + "unexpected value at index: " + i); + } + } + + vector2.setInitialCapacity(initialCapacity); + vector2.allocateNew(); + int capacity2 = vector2.getValueCapacity(); + assertEquals(capacity2, capacity); + + for (int i = 0; i < numberOfValues; i++) { + vector2.copyFromSafe(i, i, vector); + if (i % 3 == 0) { + assertNull(vector2.getObject(i)); + } else if (i % 3 == 1) { + assertArrayEquals(Integer.toString(i).getBytes(StandardCharsets.UTF_8), + vector.get(i), + "unexpected value at index: " + i); + } else { + assertArrayEquals((i + prefixString).getBytes(StandardCharsets.UTF_8), + vector.get(i), + "unexpected value at index: " + i); + } + } + + /* NO reAlloc() should have happened in setSafe() */ + assertEquals(capacity, vector2.getValueCapacity()); + + vector2.setValueCount(numberOfValues); + + for (int i = 0; i < numberOfValues; i++) { + if (i % 3 == 0) { + assertNull(vector2.getObject(i)); + } else if (i % 3 == 1) { + assertArrayEquals(Integer.toString(i).getBytes(StandardCharsets.UTF_8), + vector.get(i), + "unexpected value at index: " + i); + } else { + assertArrayEquals((i + prefixString).getBytes(StandardCharsets.UTF_8), + vector.get(i), + "unexpected value at index: " + i); + } + } + } + } + private String generateRandomString(int length) { Random random = new Random(); StringBuilder sb = new StringBuilder(length); From ad711ec4590170ad225f2cbb7b6a1113a87c6e67 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 25 May 2024 11:06:19 +0900 Subject: [PATCH 170/261] MINOR: [Java] Bump checker.framework.version from 3.42.0 to 3.43.0 in /java (#41520) Bumps `checker.framework.version` from 3.42.0 to 3.43.0. Updates `org.checkerframework:checker-qual` from 3.42.0 to 3.43.0
Release notes

Sourced from org.checkerframework:checker-qual's releases.

Checker Framework 3.43.0

Version 3.43.0 (May 1, 2024)

User-visible changes:

Method, constructor, lambda, and method reference type inference has been greatly improved. The -AconservativeUninferredTypeArguments option is no longer necessary and has been removed.

Renamed command-line arguments:

  • -AskipDirs has been renamed to -AskipFiles. -AskipDirs will continue to work for the time being.

New command-line arguments:

  • -AonlyFiles complements -AskipFiles

A specialized inference algorithm for the Resource Leak Checker runs automatically as part of whole-program inference.

Implementation details:

Deprecated ObjectCreationNode#getConstructor in favor of new ObjectCreationNode#getTypeToInstantiate().

Renamed AbstractCFGVisualizer.visualizeBlockHelper() to visualizeBlockWithSeparator().

Moved methods from TreeUtils to subclasses of TreeUtilsAfterJava11:

  • isConstantCaseLabelTree
  • isDefaultCaseLabelTree
  • isPatternCaseLabelTree

Renamed BaseTypeVisitor.checkForPolymorphicQualifiers() to warnInvalidPolymorphicQualifier().

Closed issues:

#979, #4559, #4593, #5058, #5734, #5781, #6071, #6093, #6239, #6297, #6317, #6322, #6346, #6373, #6376, #6378, #6379, #6380, #6389, #6393, #6396, #6402, #6406, #6407, #6417, #6421, #6430, #6433, #6438, #6442, #6473, #6480, #6507, #6531, #6535.

Changelog

Sourced from org.checkerframework:checker-qual's changelog.

Version 3.43.0 (May 1, 2024)

User-visible changes:

Method, constructor, lambda, and method reference type inference has been greatly improved. The -AconservativeUninferredTypeArguments option is no longer necessary and has been removed.

Renamed command-line arguments:

  • -AskipDirs has been renamed to -AskipFiles. -AskipDirs will continue to work for the time being.

New command-line arguments:

  • -AonlyFiles complements -AskipFiles

A specialized inference algorithm for the Resource Leak Checker runs automatically as part of whole-program inference.

Implementation details:

Deprecated ObjectCreationNode#getConstructor in favor of new ObjectCreationNode#getTypeToInstantiate().

Renamed AbstractCFGVisualizer.visualizeBlockHelper() to visualizeBlockWithSeparator().

Moved methods from TreeUtils to subclasses of TreeUtilsAfterJava11:

  • isConstantCaseLabelTree
  • isDefaultCaseLabelTree
  • isPatternCaseLabelTree

Renamed BaseTypeVisitor.checkForPolymorphicQualifiers() to warnInvalidPolymorphicQualifier().

Closed issues:

#979, #4559, #4593, #5058, #5734, #5781, #6071, #6093, #6239, #6297, #6317, #6322, #6346, #6373, #6376, #6378, #6379, #6380, #6389, #6393, #6396, #6402, #6406, #6407, #6417, #6421, #6430, #6433, #6438, #6442, #6473, #6480, #6507, #6531, #6535.

Commits

Updates `org.checkerframework:checker` from 3.42.0 to 3.43.0
Release notes

Sourced from org.checkerframework:checker's releases.

Checker Framework 3.43.0

Version 3.43.0 (May 1, 2024)

User-visible changes:

Method, constructor, lambda, and method reference type inference has been greatly improved. The -AconservativeUninferredTypeArguments option is no longer necessary and has been removed.

Renamed command-line arguments:

  • -AskipDirs has been renamed to -AskipFiles. -AskipDirs will continue to work for the time being.

New command-line arguments:

  • -AonlyFiles complements -AskipFiles

A specialized inference algorithm for the Resource Leak Checker runs automatically as part of whole-program inference.

Implementation details:

Deprecated ObjectCreationNode#getConstructor in favor of new ObjectCreationNode#getTypeToInstantiate().

Renamed AbstractCFGVisualizer.visualizeBlockHelper() to visualizeBlockWithSeparator().

Moved methods from TreeUtils to subclasses of TreeUtilsAfterJava11:

  • isConstantCaseLabelTree
  • isDefaultCaseLabelTree
  • isPatternCaseLabelTree

Renamed BaseTypeVisitor.checkForPolymorphicQualifiers() to warnInvalidPolymorphicQualifier().

Closed issues:

#979, #4559, #4593, #5058, #5734, #5781, #6071, #6093, #6239, #6297, #6317, #6322, #6346, #6373, #6376, #6378, #6379, #6380, #6389, #6393, #6396, #6402, #6406, #6407, #6417, #6421, #6430, #6433, #6438, #6442, #6473, #6480, #6507, #6531, #6535.

Changelog

Sourced from org.checkerframework:checker's changelog.

Version 3.43.0 (May 1, 2024)

User-visible changes:

Method, constructor, lambda, and method reference type inference has been greatly improved. The -AconservativeUninferredTypeArguments option is no longer necessary and has been removed.

Renamed command-line arguments:

  • -AskipDirs has been renamed to -AskipFiles. -AskipDirs will continue to work for the time being.

New command-line arguments:

  • -AonlyFiles complements -AskipFiles

A specialized inference algorithm for the Resource Leak Checker runs automatically as part of whole-program inference.

Implementation details:

Deprecated ObjectCreationNode#getConstructor in favor of new ObjectCreationNode#getTypeToInstantiate().

Renamed AbstractCFGVisualizer.visualizeBlockHelper() to visualizeBlockWithSeparator().

Moved methods from TreeUtils to subclasses of TreeUtilsAfterJava11:

  • isConstantCaseLabelTree
  • isDefaultCaseLabelTree
  • isPatternCaseLabelTree

Renamed BaseTypeVisitor.checkForPolymorphicQualifiers() to warnInvalidPolymorphicQualifier().

Closed issues:

#979, #4559, #4593, #5058, #5734, #5781, #6071, #6093, #6239, #6297, #6317, #6322, #6346, #6373, #6376, #6378, #6379, #6380, #6389, #6393, #6396, #6402, #6406, #6407, #6417, #6421, #6430, #6433, #6438, #6442, #6473, #6480, #6507, #6531, #6535.

Commits

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: David Li --- java/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/pom.xml b/java/pom.xml index 925ec585152bc..289810daba3ac 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -98,7 +98,7 @@ 3.12.1 5.11.0 5.2.0 - 3.42.0 + 3.43.0 From 7c8ce4589ae9e3c4a9c0cd54cff81a54ac003079 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sat, 25 May 2024 12:34:04 +0900 Subject: [PATCH 171/261] GH-41770: [CI][GLib] Remove temporary files explicitly (#41807) ### Rationale for this change If we remove temporary files by GC, "`unlink': Permission denied" warnings are happen on Windows. ### What changes are included in this PR? Use `Tempfile.create {...}` to remove temporary files explicitly. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #41770 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- c_glib/test/parquet/test-arrow-file-reader.rb | 27 +++++--- c_glib/test/parquet/test-arrow-file-writer.rb | 27 +++++--- .../test/parquet/test-boolean-statistics.rb | 24 +++++--- .../parquet/test-byte-array-statistics.rb | 24 +++++--- .../parquet/test-column-chunk-metadata.rb | 61 +++++++++++-------- c_glib/test/parquet/test-double-statistics.rb | 24 +++++--- c_glib/test/parquet/test-file-metadata.rb | 61 +++++++++++-------- ...test-fixed-length-byte-array-statistics.rb | 28 ++++++--- c_glib/test/parquet/test-float-statistics.rb | 24 +++++--- c_glib/test/parquet/test-int32-statistics.rb | 24 +++++--- c_glib/test/parquet/test-int64-statistics.rb | 26 +++++--- .../test/parquet/test-row-group-metadata.rb | 61 +++++++++++-------- c_glib/test/parquet/test-statistics.rb | 36 +++++++---- 13 files changed, 281 insertions(+), 166 deletions(-) diff --git a/c_glib/test/parquet/test-arrow-file-reader.rb b/c_glib/test/parquet/test-arrow-file-reader.rb index 45eb335965434..eff5ad966aea6 100644 --- a/c_glib/test/parquet/test-arrow-file-reader.rb +++ b/c_glib/test/parquet/test-arrow-file-reader.rb @@ -20,16 +20,23 @@ class TestParquetArrowFileReader < Test::Unit::TestCase def setup omit("Parquet is required") unless defined?(::Parquet) - @file = Tempfile.open(["data", ".parquet"]) - @a_array = build_string_array(["foo", "bar"]) - @b_array = build_int32_array([123, 456]) - @table = build_table("a" => @a_array, - "b" => @b_array) - writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) - chunk_size = 1 - writer.write_table(@table, chunk_size) - writer.close - @reader = Parquet::ArrowFileReader.new(@file.path) + Tempfile.create(["data", ".parquet"]) do |file| + @file = file + @a_array = build_string_array(["foo", "bar"]) + @b_array = build_int32_array([123, 456]) + @table = build_table("a" => @a_array, + "b" => @b_array) + writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) + chunk_size = 1 + writer.write_table(@table, chunk_size) + writer.close + @reader = Parquet::ArrowFileReader.new(@file.path) + begin + yield + ensure + @reader.unref + end + end end def test_schema diff --git a/c_glib/test/parquet/test-arrow-file-writer.rb b/c_glib/test/parquet/test-arrow-file-writer.rb index 855527444d063..f899e7273b2a2 100644 --- a/c_glib/test/parquet/test-arrow-file-writer.rb +++ b/c_glib/test/parquet/test-arrow-file-writer.rb @@ -20,7 +20,10 @@ class TestParquetArrowFileWriter < Test::Unit::TestCase def setup omit("Parquet is required") unless defined?(::Parquet) - @file = Tempfile.open(["data", ".parquet"]) + Tempfile.create(["data", ".parquet"]) do |file| + @file = file + yield + end end def test_write @@ -33,14 +36,18 @@ def test_write writer.close reader = Parquet::ArrowFileReader.new(@file.path) - reader.use_threads = true - assert_equal([ - enabled_values.length / chunk_size, - true, - ], - [ - reader.n_row_groups, - table.equal_metadata(reader.read_table, false), - ]) + begin + reader.use_threads = true + assert_equal([ + enabled_values.length / chunk_size, + true, + ], + [ + reader.n_row_groups, + table.equal_metadata(reader.read_table, false), + ]) + ensure + reader.unref + end end end diff --git a/c_glib/test/parquet/test-boolean-statistics.rb b/c_glib/test/parquet/test-boolean-statistics.rb index 6131a22195cb8..244348641320e 100644 --- a/c_glib/test/parquet/test-boolean-statistics.rb +++ b/c_glib/test/parquet/test-boolean-statistics.rb @@ -20,14 +20,22 @@ class TestParquetBooleanStatistics < Test::Unit::TestCase def setup omit("Parquet is required") unless defined?(::Parquet) - @file = Tempfile.open(["data", ".parquet"]) - @table = build_table("boolean" => build_boolean_array([nil, false, true])) - writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) - chunk_size = 1024 - writer.write_table(@table, chunk_size) - writer.close - reader = Parquet::ArrowFileReader.new(@file.path) - @statistics = reader.metadata.get_row_group(0).get_column_chunk(0).statistics + Tempfile.create(["data", ".parquet"]) do |file| + @file = file + @table = build_table("boolean" => build_boolean_array([nil, false, true])) + writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) + chunk_size = 1024 + writer.write_table(@table, chunk_size) + writer.close + reader = Parquet::ArrowFileReader.new(@file.path) + begin + @statistics = + reader.metadata.get_row_group(0).get_column_chunk(0).statistics + yield + ensure + reader.unref + end + end end test("#min") do diff --git a/c_glib/test/parquet/test-byte-array-statistics.rb b/c_glib/test/parquet/test-byte-array-statistics.rb index 50ec409dbce7c..b9693a77fff13 100644 --- a/c_glib/test/parquet/test-byte-array-statistics.rb +++ b/c_glib/test/parquet/test-byte-array-statistics.rb @@ -20,14 +20,22 @@ class TestParquetByteArrayStatistics < Test::Unit::TestCase def setup omit("Parquet is required") unless defined?(::Parquet) - @file = Tempfile.open(["data", ".parquet"]) - @table = build_table("string" => build_string_array([nil, "abc", "xyz"])) - writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) - chunk_size = 1024 - writer.write_table(@table, chunk_size) - writer.close - reader = Parquet::ArrowFileReader.new(@file.path) - @statistics = reader.metadata.get_row_group(0).get_column_chunk(0).statistics + Tempfile.create(["data", ".parquet"]) do |file| + @file = file + @table = build_table("string" => build_string_array([nil, "abc", "xyz"])) + writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) + chunk_size = 1024 + writer.write_table(@table, chunk_size) + writer.close + reader = Parquet::ArrowFileReader.new(@file.path) + begin + @statistics = + reader.metadata.get_row_group(0).get_column_chunk(0).statistics + yield + ensure + reader.unref + end + end end test("#min") do diff --git a/c_glib/test/parquet/test-column-chunk-metadata.rb b/c_glib/test/parquet/test-column-chunk-metadata.rb index a93fe85bbfbf1..f0012f0124577 100644 --- a/c_glib/test/parquet/test-column-chunk-metadata.rb +++ b/c_glib/test/parquet/test-column-chunk-metadata.rb @@ -20,35 +20,46 @@ class TestParquetColumnChunkMetadata < Test::Unit::TestCase def setup omit("Parquet is required") unless defined?(::Parquet) - @file = Tempfile.open(["data", ".parquet"]) - @string_array = build_string_array([nil, "hello"]) - fields = [ - Arrow::Field.new("int8", Arrow::Int8DataType.new), - Arrow::Field.new("boolean", Arrow::BooleanDataType.new), - ] - structs = [ - { - "int8" => -29, - "boolean" => true, - }, - nil, - ] - @struct_array = build_struct_array(fields, structs) - @table = build_table("string" => @string_array, - "struct" => @struct_array) - writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) - chunk_size = 1 - writer.write_table(@table, chunk_size) - writer.close - reader = Parquet::ArrowFileReader.new(@file.path) - @metadata = reader.metadata.get_row_group(0).get_column_chunk(0) + Tempfile.create(["data", ".parquet"]) do |file| + @file = file + @string_array = build_string_array([nil, "hello"]) + fields = [ + Arrow::Field.new("int8", Arrow::Int8DataType.new), + Arrow::Field.new("boolean", Arrow::BooleanDataType.new), + ] + structs = [ + { + "int8" => -29, + "boolean" => true, + }, + nil, + ] + @struct_array = build_struct_array(fields, structs) + @table = build_table("string" => @string_array, + "struct" => @struct_array) + writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) + chunk_size = 1 + writer.write_table(@table, chunk_size) + writer.close + reader = Parquet::ArrowFileReader.new(@file.path) + begin + @metadata = reader.metadata.get_row_group(0).get_column_chunk(0) + yield + ensure + reader.unref + end + end end test("#==") do reader = Parquet::ArrowFileReader.new(@file.path) - other_metadata = reader.metadata.get_row_group(0).get_column_chunk(0) - assert do - @metadata == other_metadata + begin + other_metadata = reader.metadata.get_row_group(0).get_column_chunk(0) + assert do + @metadata == other_metadata + end + ensure + reader.unref end end diff --git a/c_glib/test/parquet/test-double-statistics.rb b/c_glib/test/parquet/test-double-statistics.rb index a610fb24a9bdf..6c7a95824570d 100644 --- a/c_glib/test/parquet/test-double-statistics.rb +++ b/c_glib/test/parquet/test-double-statistics.rb @@ -20,14 +20,22 @@ class TestParquetDoubleStatistics < Test::Unit::TestCase def setup omit("Parquet is required") unless defined?(::Parquet) - @file = Tempfile.open(["data", ".parquet"]) - @table = build_table("double" => build_double_array([nil, -2.9, 2.9])) - writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) - chunk_size = 1024 - writer.write_table(@table, chunk_size) - writer.close - reader = Parquet::ArrowFileReader.new(@file.path) - @statistics = reader.metadata.get_row_group(0).get_column_chunk(0).statistics + Tempfile.create(["data", ".parquet"]) do |file| + @file = file + @table = build_table("double" => build_double_array([nil, -2.9, 2.9])) + writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) + chunk_size = 1024 + writer.write_table(@table, chunk_size) + writer.close + reader = Parquet::ArrowFileReader.new(@file.path) + begin + @statistics = + reader.metadata.get_row_group(0).get_column_chunk(0).statistics + yield + ensure + reader.unref + end + end end test("#min") do diff --git a/c_glib/test/parquet/test-file-metadata.rb b/c_glib/test/parquet/test-file-metadata.rb index 2bca7e66e0b07..aec3f4ab829b9 100644 --- a/c_glib/test/parquet/test-file-metadata.rb +++ b/c_glib/test/parquet/test-file-metadata.rb @@ -20,35 +20,46 @@ class TestParquetFileMetadata < Test::Unit::TestCase def setup omit("Parquet is required") unless defined?(::Parquet) - @file = Tempfile.open(["data", ".parquet"]) - @string_array = build_string_array([nil, "hello"]) - fields = [ - Arrow::Field.new("int8", Arrow::Int8DataType.new), - Arrow::Field.new("boolean", Arrow::BooleanDataType.new), - ] - structs = [ - { - "int8" => -29, - "boolean" => true, - }, - nil, - ] - @struct_array = build_struct_array(fields, structs) - @table = build_table("string" => @string_array, - "struct" => @struct_array) - writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) - chunk_size = 1 - writer.write_table(@table, chunk_size) - writer.close - reader = Parquet::ArrowFileReader.new(@file.path) - @metadata = reader.metadata + Tempfile.create(["data", ".parquet"]) do |file| + @file = file + @string_array = build_string_array([nil, "hello"]) + fields = [ + Arrow::Field.new("int8", Arrow::Int8DataType.new), + Arrow::Field.new("boolean", Arrow::BooleanDataType.new), + ] + structs = [ + { + "int8" => -29, + "boolean" => true, + }, + nil, + ] + @struct_array = build_struct_array(fields, structs) + @table = build_table("string" => @string_array, + "struct" => @struct_array) + writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) + chunk_size = 1 + writer.write_table(@table, chunk_size) + writer.close + reader = Parquet::ArrowFileReader.new(@file.path) + begin + @metadata = reader.metadata + yield + ensure + reader.unref + end + end end test("#==") do reader = Parquet::ArrowFileReader.new(@file.path) - other_metadata = reader.metadata - assert do - @metadata == other_metadata + begin + other_metadata = reader.metadata + assert do + @metadata == other_metadata + end + ensure + reader.unref end end diff --git a/c_glib/test/parquet/test-fixed-length-byte-array-statistics.rb b/c_glib/test/parquet/test-fixed-length-byte-array-statistics.rb index 87a96d009c509..c2f179627d06a 100644 --- a/c_glib/test/parquet/test-fixed-length-byte-array-statistics.rb +++ b/c_glib/test/parquet/test-fixed-length-byte-array-statistics.rb @@ -20,16 +20,24 @@ class TestParquetFixedLengthByteArrayStatistics < Test::Unit::TestCase def setup omit("Parquet is required") unless defined?(::Parquet) - @file = Tempfile.open(["data", ".parquet"]) - data_type = Arrow::FixedSizeBinaryDataType.new(3) - array = build_fixed_size_binary_array(data_type, [nil, "abc", "xyz"]) - @table = build_table("binary" => array) - writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) - chunk_size = 1024 - writer.write_table(@table, chunk_size) - writer.close - reader = Parquet::ArrowFileReader.new(@file.path) - @statistics = reader.metadata.get_row_group(0).get_column_chunk(0).statistics + Tempfile.create(["data", ".parquet"]) do |file| + @file = file + data_type = Arrow::FixedSizeBinaryDataType.new(3) + array = build_fixed_size_binary_array(data_type, [nil, "abc", "xyz"]) + @table = build_table("binary" => array) + writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) + chunk_size = 1024 + writer.write_table(@table, chunk_size) + writer.close + reader = Parquet::ArrowFileReader.new(@file.path) + begin + @statistics = + reader.metadata.get_row_group(0).get_column_chunk(0).statistics + yield + ensure + reader.unref + end + end end test("#min") do diff --git a/c_glib/test/parquet/test-float-statistics.rb b/c_glib/test/parquet/test-float-statistics.rb index 2622a2bb36fe6..7d1a233f53ca0 100644 --- a/c_glib/test/parquet/test-float-statistics.rb +++ b/c_glib/test/parquet/test-float-statistics.rb @@ -20,14 +20,22 @@ class TestParquetFloatStatistics < Test::Unit::TestCase def setup omit("Parquet is required") unless defined?(::Parquet) - @file = Tempfile.open(["data", ".parquet"]) - @table = build_table("float" => build_float_array([nil, -2.9, 2.9])) - writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) - chunk_size = 1024 - writer.write_table(@table, chunk_size) - writer.close - reader = Parquet::ArrowFileReader.new(@file.path) - @statistics = reader.metadata.get_row_group(0).get_column_chunk(0).statistics + Tempfile.create(["data", ".parquet"]) do |file| + @file = file + @table = build_table("float" => build_float_array([nil, -2.9, 2.9])) + writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) + chunk_size = 1024 + writer.write_table(@table, chunk_size) + writer.close + reader = Parquet::ArrowFileReader.new(@file.path) + begin + @statistics = + reader.metadata.get_row_group(0).get_column_chunk(0).statistics + yield + ensure + reader.unref + end + end end test("#min") do diff --git a/c_glib/test/parquet/test-int32-statistics.rb b/c_glib/test/parquet/test-int32-statistics.rb index 041f07c74292f..8d41327f88014 100644 --- a/c_glib/test/parquet/test-int32-statistics.rb +++ b/c_glib/test/parquet/test-int32-statistics.rb @@ -20,14 +20,22 @@ class TestParquetInt32Statistics < Test::Unit::TestCase def setup omit("Parquet is required") unless defined?(::Parquet) - @file = Tempfile.open(["data", ".parquet"]) - @table = build_table("int32" => build_int32_array([nil, -2, 9])) - writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) - chunk_size = 1024 - writer.write_table(@table, chunk_size) - writer.close - reader = Parquet::ArrowFileReader.new(@file.path) - @statistics = reader.metadata.get_row_group(0).get_column_chunk(0).statistics + Tempfile.create(["data", ".parquet"]) do |file| + @file = file + @table = build_table("int32" => build_int32_array([nil, -2, 9])) + writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) + chunk_size = 1024 + writer.write_table(@table, chunk_size) + writer.close + reader = Parquet::ArrowFileReader.new(@file.path) + begin + @statistics = + reader.metadata.get_row_group(0).get_column_chunk(0).statistics + yield + ensure + reader.unref + end + end end test("#min") do diff --git a/c_glib/test/parquet/test-int64-statistics.rb b/c_glib/test/parquet/test-int64-statistics.rb index 0a014573c1144..81fce8a0bbbbd 100644 --- a/c_glib/test/parquet/test-int64-statistics.rb +++ b/c_glib/test/parquet/test-int64-statistics.rb @@ -20,15 +20,23 @@ class TestParquetInt64Statistics < Test::Unit::TestCase def setup omit("Parquet is required") unless defined?(::Parquet) - @file = Tempfile.open(["data", ".parquet"]) - array = build_int64_array([nil, -(2 ** 32), 2 ** 32]) - @table = build_table("int64" => array) - writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) - chunk_size = 1024 - writer.write_table(@table, chunk_size) - writer.close - reader = Parquet::ArrowFileReader.new(@file.path) - @statistics = reader.metadata.get_row_group(0).get_column_chunk(0).statistics + Tempfile.create(["data", ".parquet"]) do |file| + @file = file + array = build_int64_array([nil, -(2 ** 32), 2 ** 32]) + @table = build_table("int64" => array) + writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) + chunk_size = 1024 + writer.write_table(@table, chunk_size) + writer.close + reader = Parquet::ArrowFileReader.new(@file.path) + begin + @statistics = + reader.metadata.get_row_group(0).get_column_chunk(0).statistics + yield + ensure + reader.unref + end + end end test("#min") do diff --git a/c_glib/test/parquet/test-row-group-metadata.rb b/c_glib/test/parquet/test-row-group-metadata.rb index e68cb9d11ee62..f238dd3b5774e 100644 --- a/c_glib/test/parquet/test-row-group-metadata.rb +++ b/c_glib/test/parquet/test-row-group-metadata.rb @@ -20,35 +20,46 @@ class TestParquetRowGroupMetadata < Test::Unit::TestCase def setup omit("Parquet is required") unless defined?(::Parquet) - @file = Tempfile.open(["data", ".parquet"]) - @string_array = build_string_array([nil, "hello"]) - fields = [ - Arrow::Field.new("int8", Arrow::Int8DataType.new), - Arrow::Field.new("boolean", Arrow::BooleanDataType.new), - ] - structs = [ - { - "int8" => -29, - "boolean" => true, - }, - nil, - ] - @struct_array = build_struct_array(fields, structs) - @table = build_table("string" => @string_array, - "struct" => @struct_array) - writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) - chunk_size = 1 - writer.write_table(@table, chunk_size) - writer.close - reader = Parquet::ArrowFileReader.new(@file.path) - @metadata = reader.metadata.get_row_group(0) + Tempfile.create(["data", ".parquet"]) do |file| + @file = file + @string_array = build_string_array([nil, "hello"]) + fields = [ + Arrow::Field.new("int8", Arrow::Int8DataType.new), + Arrow::Field.new("boolean", Arrow::BooleanDataType.new), + ] + structs = [ + { + "int8" => -29, + "boolean" => true, + }, + nil, + ] + @struct_array = build_struct_array(fields, structs) + @table = build_table("string" => @string_array, + "struct" => @struct_array) + writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) + chunk_size = 1 + writer.write_table(@table, chunk_size) + writer.close + reader = Parquet::ArrowFileReader.new(@file.path) + begin + @metadata = reader.metadata.get_row_group(0) + yield + ensure + reader.unref + end + end end test("#==") do reader = Parquet::ArrowFileReader.new(@file.path) - other_metadata = reader.metadata.get_row_group(0) - assert do - @metadata == other_metadata + begin + other_metadata = reader.metadata.get_row_group(0) + assert do + @metadata == other_metadata + end + ensure + reader.unref end end diff --git a/c_glib/test/parquet/test-statistics.rb b/c_glib/test/parquet/test-statistics.rb index 0367084c88a49..09a47ac255927 100644 --- a/c_glib/test/parquet/test-statistics.rb +++ b/c_glib/test/parquet/test-statistics.rb @@ -20,22 +20,34 @@ class TestParquetStatistics < Test::Unit::TestCase def setup omit("Parquet is required") unless defined?(::Parquet) - @file = Tempfile.open(["data", ".parquet"]) - @table = build_table("int32" => build_int32_array([nil, 2, 2, 9])) - writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) - chunk_size = 1024 - writer.write_table(@table, chunk_size) - writer.close - reader = Parquet::ArrowFileReader.new(@file.path) - @statistics = reader.metadata.get_row_group(0).get_column_chunk(0).statistics + Tempfile.create(["data", ".parquet"]) do |file| + @file = file + @table = build_table("int32" => build_int32_array([nil, 2, 2, 9])) + writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path) + chunk_size = 1024 + writer.write_table(@table, chunk_size) + writer.close + reader = Parquet::ArrowFileReader.new(@file.path) + begin + @statistics = + reader.metadata.get_row_group(0).get_column_chunk(0).statistics + yield + ensure + reader.unref + end + end end test("#==") do reader = Parquet::ArrowFileReader.new(@file.path) - other_statistics = - reader.metadata.get_row_group(0).get_column_chunk(0).statistics - assert do - @statistics == other_statistics + begin + other_statistics = + reader.metadata.get_row_group(0).get_column_chunk(0).statistics + assert do + @statistics == other_statistics + end + ensure + reader.unref end end From 283f66f396401d6371b14a8ff66836de889bcfb0 Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Sat, 25 May 2024 11:33:12 +0100 Subject: [PATCH 172/261] GH-41420: [R] Update NEWS.md for 16.1.0 (#41422) ### Rationale for this change Update NEWS.md ### What changes are included in this PR? News updates ### Are these changes tested? No ### Are there any user-facing changes? No * GitHub Issue: #41420 Authored-by: Nic Crane Signed-off-by: Nic Crane --- r/NEWS.md | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/r/NEWS.md b/r/NEWS.md index 47c4ac1571dad..dc89fa266e3ef 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -25,13 +25,22 @@ * `summarize()` supports more complex expressions, and correctly handles cases where column names are reused in expressions. * The `na_matches` argument to the `dplyr::*_join()` functions is now supported. This argument controls whether `NA` values are considered equal when joining. (#41358) -# arrow 16.0.0 +# arrow 16.1.0 + +## New features -# arrow 15.0.2 +* Streams can now be written to socket connections (#38897) +* The Arrow R package now can be built with older versions of the Arrow C++ library (back to 13.0.0) (#39738) -# arrow 15.0.1 +## Minor improvements and fixes + +* Dataset and table output printing now truncates schemas longer than 20 items long (#38916) +* Fixed pointer conversion to Python for latest reticulate to ensure data can be passed between Arrow and PyArrow (#39969) +* Check on macOS if we are using GNU libtool is and ensure we use macOS libtool instead (#40259) +* Fix an error where creating a bundled tarball with all dependencies was failing on Windows (@hutch3232, #40232) -# arrow 15.0.0 + +# arrow 15.0.1 ## New features From 1c9e393b73195840960dfb9eca8c0dc390be751a Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Sun, 26 May 2024 09:43:52 +1200 Subject: [PATCH 173/261] GH-41749: [GLib] Allow getting a RecordBatchReader from a Dataset or Scanner (#41750) ### Rationale for this change See #41749 ### What changes are included in this PR? Adds `to_reader` methods to `GADatasetDataset` and `GADatasetScanner`. ### Are these changes tested? Yes I've added new unit tests. ### Are there any user-facing changes? Yes this is a new feature. * GitHub Issue: #41749 Lead-authored-by: Adam Reeve Co-authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- c_glib/arrow-dataset-glib/dataset.cpp | 37 ++++++++++++++++++- c_glib/arrow-dataset-glib/dataset.h | 3 ++ c_glib/arrow-dataset-glib/scanner.cpp | 22 +++++++++++ c_glib/arrow-dataset-glib/scanner.h | 4 ++ .../test/dataset/test-file-system-dataset.rb | 24 ++++++++++-- c_glib/test/dataset/test-scanner.rb | 10 +++++ 6 files changed, 96 insertions(+), 4 deletions(-) diff --git a/c_glib/arrow-dataset-glib/dataset.cpp b/c_glib/arrow-dataset-glib/dataset.cpp index 704d6b589ee94..f84e4e3db380a 100644 --- a/c_glib/arrow-dataset-glib/dataset.cpp +++ b/c_glib/arrow-dataset-glib/dataset.cpp @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -152,12 +153,46 @@ gadataset_dataset_to_table(GADatasetDataset *dataset, GError **error) } auto arrow_scanner = *arrow_scanner_result; auto arrow_table_result = arrow_scanner->ToTable(); - if (!garrow::check(error, arrow_scanner_result, "[dataset][to-table]")) { + if (!garrow::check(error, arrow_table_result, "[dataset][to-table]")) { return NULL; } return garrow_table_new_raw(&(*arrow_table_result)); } +/** + * gadataset_dataset_to_record_batch_reader: + * @dataset: A #GADatasetDataset. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): + * A #GArrowRecordBatchReader on success, %NULL on error. + * + * Since: 17.0.0 + */ +GArrowRecordBatchReader * +gadataset_dataset_to_record_batch_reader(GADatasetDataset *dataset, GError **error) +{ + auto arrow_dataset = gadataset_dataset_get_raw(dataset); + auto arrow_scanner_builder_result = arrow_dataset->NewScan(); + if (!garrow::check(error, + arrow_scanner_builder_result, + "[dataset][to-record-batch-reader]")) { + return nullptr; + } + auto arrow_scanner_builder = *arrow_scanner_builder_result; + auto arrow_scanner_result = arrow_scanner_builder->Finish(); + if (!garrow::check(error, arrow_scanner_result, "[dataset][to-record-batch-reader]")) { + return nullptr; + } + auto arrow_scanner = *arrow_scanner_result; + auto arrow_reader_result = arrow_scanner->ToRecordBatchReader(); + if (!garrow::check(error, arrow_reader_result, "[dataset][to-record-batch-reader]")) { + return nullptr; + } + auto sources = g_list_prepend(nullptr, dataset); + return garrow_record_batch_reader_new_raw(&(*arrow_reader_result), sources); +} + /** * gadataset_dataset_get_type_name: * @dataset: A #GADatasetDataset. diff --git a/c_glib/arrow-dataset-glib/dataset.h b/c_glib/arrow-dataset-glib/dataset.h index 657de330e6c49..5b957f0538a2a 100644 --- a/c_glib/arrow-dataset-glib/dataset.h +++ b/c_glib/arrow-dataset-glib/dataset.h @@ -34,6 +34,9 @@ gadataset_dataset_to_table(GADatasetDataset *dataset, GError **error); GADATASET_AVAILABLE_IN_5_0 gchar * gadataset_dataset_get_type_name(GADatasetDataset *dataset); +GADATASET_AVAILABLE_IN_17_0 +GArrowRecordBatchReader * +gadataset_dataset_to_record_batch_reader(GADatasetDataset *dataset, GError **error); #define GADATASET_TYPE_FILE_SYSTEM_DATASET_WRITE_OPTIONS \ (gadataset_file_system_dataset_write_options_get_type()) diff --git a/c_glib/arrow-dataset-glib/scanner.cpp b/c_glib/arrow-dataset-glib/scanner.cpp index 717532db9220f..28af1f16e5968 100644 --- a/c_glib/arrow-dataset-glib/scanner.cpp +++ b/c_glib/arrow-dataset-glib/scanner.cpp @@ -128,6 +128,28 @@ gadataset_scanner_to_table(GADatasetScanner *scanner, GError **error) } } +/** + * gadataset_scanner_to_record_batch_reader: + * @scanner: A #GADatasetScanner. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: (transfer full) (nullable): + * A #GArrowRecordBatchReader on success, %NULL on error. + * + * Since: 17.0.0 + */ +GArrowRecordBatchReader * +gadataset_scanner_to_record_batch_reader(GADatasetScanner *scanner, GError **error) +{ + auto arrow_scanner = gadataset_scanner_get_raw(scanner); + auto arrow_reader_result = arrow_scanner->ToRecordBatchReader(); + if (!garrow::check(error, arrow_reader_result, "[scanner][to-record-batch-reader]")) { + return nullptr; + } + auto sources = g_list_prepend(nullptr, scanner); + return garrow_record_batch_reader_new_raw(&(*arrow_reader_result), sources); +} + typedef struct GADatasetScannerBuilderPrivate_ { std::shared_ptr scanner_builder; diff --git a/c_glib/arrow-dataset-glib/scanner.h b/c_glib/arrow-dataset-glib/scanner.h index ad462391568a3..d92eca5ab8420 100644 --- a/c_glib/arrow-dataset-glib/scanner.h +++ b/c_glib/arrow-dataset-glib/scanner.h @@ -37,6 +37,10 @@ GADATASET_AVAILABLE_IN_5_0 GArrowTable * gadataset_scanner_to_table(GADatasetScanner *scanner, GError **error); +GADATASET_AVAILABLE_IN_17_0 +GArrowRecordBatchReader * +gadataset_scanner_to_record_batch_reader(GADatasetScanner *scanner, GError **error); + #define GADATASET_TYPE_SCANNER_BUILDER (gadataset_scanner_builder_get_type()) GADATASET_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( diff --git a/c_glib/test/dataset/test-file-system-dataset.rb b/c_glib/test/dataset/test-file-system-dataset.rb index 0e856b678f860..96deedf6b4eb0 100644 --- a/c_glib/test/dataset/test-file-system-dataset.rb +++ b/c_glib/test/dataset/test-file-system-dataset.rb @@ -56,6 +56,22 @@ def test_partitioning end def test_read_write + dataset, expected_table = create_dataset + assert_equal(expected_table, dataset.to_table) + end + + def test_to_record_batch_reader + dataset, expected_table = create_dataset + reader = dataset.to_record_batch_reader + begin + assert_equal(expected_table, reader.read_all) + ensure + # Unref to ensure the reader closes files and we can delete the temp directory + reader.unref + end + end + + def create_dataset table = build_table(label: build_string_array(["a", "a", "b", "c"]), count: build_int32_array([1, 10, 2, 3])) table_reader = Arrow::TableBatchReader.new(table) @@ -73,7 +89,8 @@ def test_read_write end @factory.partition_base_dir = @dir dataset = @factory.finish - assert_equal(build_table(count: [ + + expected_table = build_table(count: [ build_int32_array([1, 10]), build_int32_array([2]), build_int32_array([3]), @@ -82,7 +99,8 @@ def test_read_write build_string_array(["a", "a"]), build_string_array(["b"]), build_string_array(["c"]), - ]), - dataset.to_table) + ]) + + return dataset, expected_table end end diff --git a/c_glib/test/dataset/test-scanner.rb b/c_glib/test/dataset/test-scanner.rb index f7702d4905fb6..5dc31eefc5f4c 100644 --- a/c_glib/test/dataset/test-scanner.rb +++ b/c_glib/test/dataset/test-scanner.rb @@ -45,4 +45,14 @@ def setup def test_to_table assert_equal(@table, @scanner.to_table) end + + def test_to_record_batch_reader + reader = @scanner.to_record_batch_reader + begin + assert_equal(@table, reader.read_all) + ensure + # Unref to ensure the reader closes files and we can delete the temp directory + reader.unref + end + end end From ff9921ffa89585be69ae85674bb365d03cb22ba4 Mon Sep 17 00:00:00 2001 From: h-vetinari Date: Mon, 27 May 2024 18:59:52 +1100 Subject: [PATCH 174/261] GH-41755: [C++][ORC] Ensure setting detected ORC version (#41767) `FindorcAlt.cmake` doesn't set `orcAlt_VERSION` when it finds ORC by `find_library()`/`find_path()`. If `orcAlt_VERSION` isn't set, ORC version detection by caller is failed. `cpp/src/arrow/adapters/orc/adapter.cc` uses detected ORC version. If detected ORC version isn't correct, needless time zone database check is used. Deployed in conda-forge through https://github.com/conda-forge/arrow-cpp-feedstock/pull/1424 and confirmed as working in https://github.com/conda-forge/pyarrow-feedstock/pull/122 * GitHub Issue: #41755 Authored-by: H. Vetinari Signed-off-by: Sutou Kouhei --- cpp/cmake_modules/FindorcAlt.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/cmake_modules/FindorcAlt.cmake b/cpp/cmake_modules/FindorcAlt.cmake index 289416678ad39..ce8cd11b4c3f0 100644 --- a/cpp/cmake_modules/FindorcAlt.cmake +++ b/cpp/cmake_modules/FindorcAlt.cmake @@ -71,4 +71,5 @@ if(orcAlt_FOUND) PROPERTIES IMPORTED_LOCATION "${ORC_STATIC_LIB}" INTERFACE_INCLUDE_DIRECTORIES "${ORC_INCLUDE_DIR}") endif() + set(orcAlt_VERSION ${ORC_VERSION}) endif() From f8fe2ae3279f71cb2256024a9ab5d70dd9432f89 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 28 May 2024 08:44:56 +0900 Subject: [PATCH 175/261] MINOR: [Go] Bump github.com/goccy/go-json from 0.10.2 to 0.10.3 in /go (#41850) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [github.com/goccy/go-json](https://github.com/goccy/go-json) from 0.10.2 to 0.10.3.
Release notes

Sourced from github.com/goccy/go-json's releases.

0.10.3

What's Changed

New Contributors

Full Changelog: https://github.com/goccy/go-json/compare/v0.10.2...v0.10.3

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/goccy/go-json&package-manager=go_modules&previous-version=0.10.2&new-version=0.10.3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- go/go.mod | 2 +- go/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go/go.mod b/go/go.mod index e846c61033f47..9c70544539b16 100644 --- a/go/go.mod +++ b/go/go.mod @@ -23,7 +23,7 @@ require ( github.com/andybalholm/brotli v1.1.0 github.com/apache/thrift v0.20.0 github.com/docopt/docopt-go v0.0.0-20180111231733-ee0de3bc6815 - github.com/goccy/go-json v0.10.2 + github.com/goccy/go-json v0.10.3 github.com/golang/snappy v0.0.4 github.com/google/flatbuffers v24.3.25+incompatible github.com/klauspost/asmfmt v1.3.2 diff --git a/go/go.sum b/go/go.sum index 6bceb4e5877ca..9e11041c333ac 100644 --- a/go/go.sum +++ b/go/go.sum @@ -26,8 +26,8 @@ github.com/go-playground/universal-translator v0.17.0 h1:icxd5fm+REJzpZx7ZfpaD87 github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA= github.com/go-playground/validator/v10 v10.4.1 h1:pH2c5ADXtd66mxoE0Zm9SUhxE20r7aM3F26W0hOn+GE= github.com/go-playground/validator/v10 v10.4.1/go.mod h1:nlOn6nFhuKACm19sB/8EGNn9GlaMV7XkbRSipzJ0Ii4= -github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= -github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= +github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA= +github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= github.com/goccy/go-yaml v1.11.0 h1:n7Z+zx8S9f9KgzG6KtQKf+kwqXZlLNR2F6018Dgau54= github.com/goccy/go-yaml v1.11.0/go.mod h1:H+mJrWtjPTJAHvRbV09MCK9xYwODM+wRTVFFTWckfng= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= From ef3d4670e70a40f46e6acdd7041b5ee7edb16a42 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 May 2024 16:58:31 -0700 Subject: [PATCH 176/261] MINOR: [C#] Bump xunit from 2.8.0 to 2.8.1 in /csharp (#41842) Bumps [xunit](https://github.com/xunit/xunit) from 2.8.0 to 2.8.1.
Commits
  • ba2ae9b v2.8.1
  • 151b8d0 Use 'dotnet format' instead of 'dotnet dotnet-format'
  • be6db6f #2931: Tighten up types to prevent accidentically calling AddOrGet with a Con...
  • f466d81 #2927: Misleading error message when class used in IClassFixture<> throws exc...
  • 1911ea7 Missed unit test updates
  • f497d65 Cannot use full assembly path as dictionary key for execution options lookup,...
  • c2f2d47 Add ability to provide live output messages from running tests
  • 013093b Polyfill in StringSyntaxAttribute
  • e1e4c2e #2719: Class with custom Fact with throwing Skip should fail appropriately
  • 8b0b13c Clarify naming
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=xunit&package-manager=nuget&previous-version=2.8.0&new-version=2.8.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Curt Hagenlocher --- .../Apache.Arrow.Compression.Tests.csproj | 2 +- .../Apache.Arrow.Flight.Sql.Tests.csproj | 2 +- .../Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj | 2 +- csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj index 2b1720561004e..a02dc383fb4e8 100644 --- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj +++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj @@ -8,7 +8,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj index c8fb40f2d6702..50e5cea7a1dd0 100644 --- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj +++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj @@ -7,7 +7,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj index ba60451f25f68..7b4d063763fde 100644 --- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj +++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj @@ -7,7 +7,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj index 90b498d4e9b03..776f37db3a6d7 100644 --- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj +++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj @@ -17,7 +17,7 @@ - + all runtime; build; native; contentfiles; analyzers From 85a2ac9168ac5f003e0a38c9458ed10f3534abe6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 May 2024 16:59:51 -0700 Subject: [PATCH 177/261] MINOR: [C#] Bump Grpc.AspNetCore, Grpc.AspNetCore.Server, System.Runtime.CompilerServices.Unsafe and Grpc.Net.Client in /csharp (#41843) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [Grpc.AspNetCore](https://github.com/grpc/grpc-dotnet), [Grpc.AspNetCore.Server](https://github.com/grpc/grpc-dotnet), [System.Runtime.CompilerServices.Unsafe](https://github.com/dotnet/runtime) and [Grpc.Net.Client](https://github.com/grpc/grpc-dotnet). These dependencies needed to be updated together. Updates `Grpc.AspNetCore` from 2.62.0 to 2.63.0
Release notes

Sourced from Grpc.AspNetCore's releases.

Release v2.63.0

What's Changed

New Contributors

Full Changelog: https://github.com/grpc/grpc-dotnet/compare/v2.62.0...v2.63.0

Release v2.63.0-pre1

What's Changed

New Contributors

... (truncated)

Commits
  • 6f1271f bump to stable release (#2440)
  • ac75fe3 build: configure rollForward in global.json (#2442)
  • 48aeb80 Bump 2.63.x branch to 2.63.0-pre1 (#2430)
  • 989420e Update Grpc.Tools to 2.63 (#2429)
  • c80f459 Don't capture async locals in resolver (#2426)
  • 63914f2 Add semaphore to limit subchannel connect to prevent race conditions (#2422)
  • 8199f66 Fix HTTP/3 test errors on .NET 6 (#2423)
  • 2d9df58 Fix memory leak when using call context propagation with cancellation token (...
  • c9c902c Enable multiple connections with WinHttpHandler by default (#2416)
  • 2a36215 Fix ObjectDisposedException message (#2415)
  • Additional commits viewable in compare view

Updates `Grpc.AspNetCore.Server` from 2.62.0 to 2.63.0
Release notes

Sourced from Grpc.AspNetCore.Server's releases.

Release v2.63.0

What's Changed

New Contributors

Full Changelog: https://github.com/grpc/grpc-dotnet/compare/v2.62.0...v2.63.0

Release v2.63.0-pre1

What's Changed

New Contributors

... (truncated)

Commits
  • 6f1271f bump to stable release (#2440)
  • ac75fe3 build: configure rollForward in global.json (#2442)
  • 48aeb80 Bump 2.63.x branch to 2.63.0-pre1 (#2430)
  • 989420e Update Grpc.Tools to 2.63 (#2429)
  • c80f459 Don't capture async locals in resolver (#2426)
  • 63914f2 Add semaphore to limit subchannel connect to prevent race conditions (#2422)
  • 8199f66 Fix HTTP/3 test errors on .NET 6 (#2423)
  • 2d9df58 Fix memory leak when using call context propagation with cancellation token (...
  • c9c902c Enable multiple connections with WinHttpHandler by default (#2416)
  • 2a36215 Fix ObjectDisposedException message (#2415)
  • Additional commits viewable in compare view

Updates `System.Runtime.CompilerServices.Unsafe` from 4.7.1 to 6.0.0
Release notes

Sourced from System.Runtime.CompilerServices.Unsafe's releases.

.NET 6.0

Release

.NET 6.0 RC 2

Release

.NET 6.0 RC 1

Release

.NET 6.0 Preview 7

Release

.NET 6.0 Preview 6

Release

.NET 6.0 Preview 5

Release

.NET 6.0 Preview 4

Release

.NET 6.0 Preview 3

Release

.NET 6.0 Preview 2

Release

.NET 6.0 Preview 1

Release

.NET 5.0.17

Release

.NET 5 is now out of support. We recommend using .NET 6.

.NET 5.0.16

Release

.NET 5.0.15

Release

.NET 5.0.14

Release

.NET 5.0.13

Release

.NET 5.0.11

Release

... (truncated)

Commits

Updates `Grpc.Net.Client` from 2.59.0 to 2.63.0
Release notes

Sourced from Grpc.Net.Client's releases.

Release v2.63.0

What's Changed

New Contributors

Full Changelog: https://github.com/grpc/grpc-dotnet/compare/v2.62.0...v2.63.0

Release v2.63.0-pre1

What's Changed

New Contributors

... (truncated)

Commits
  • 6f1271f bump to stable release (#2440)
  • ac75fe3 build: configure rollForward in global.json (#2442)
  • 48aeb80 Bump 2.63.x branch to 2.63.0-pre1 (#2430)
  • 989420e Update Grpc.Tools to 2.63 (#2429)
  • c80f459 Don't capture async locals in resolver (#2426)
  • 63914f2 Add semaphore to limit subchannel connect to prevent race conditions (#2422)
  • 8199f66 Fix HTTP/3 test errors on .NET 6 (#2423)
  • 2d9df58 Fix memory leak when using call context propagation with cancellation token (...
  • c9c902c Enable multiple connections with WinHttpHandler by default (#2416)
  • 2a36215 Fix ObjectDisposedException message (#2415)
  • Additional commits viewable in compare view

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Curt Hagenlocher --- .../Apache.Arrow.Flight.TestWeb.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj b/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj index bd6425e7ed99b..789fb9569edba 100644 --- a/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj +++ b/csharp/test/Apache.Arrow.Flight.TestWeb/Apache.Arrow.Flight.TestWeb.csproj @@ -5,7 +5,7 @@ - + From 95c8f0a031bb5659feb42f526fb5710282165de9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 May 2024 17:01:07 -0700 Subject: [PATCH 178/261] MINOR: [C#] Bump Grpc.Tools from 2.63.0 to 2.64.0 in /csharp (#41844) Bumps [Grpc.Tools](https://github.com/grpc/grpc) from 2.63.0 to 2.64.0.
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Grpc.Tools&package-manager=nuget&previous-version=2.63.0&new-version=2.64.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Curt Hagenlocher --- .../src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj | 2 +- csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj index 7314b8207fef6..ee6d42c8d17fc 100644 --- a/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj +++ b/csharp/src/Apache.Arrow.Flight.Sql/Apache.Arrow.Flight.Sql.csproj @@ -5,7 +5,7 @@ - + diff --git a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj index 780da3ad39081..d68cdfae30010 100644 --- a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj +++ b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj @@ -7,7 +7,7 @@ - + From aa00b8b7e5cc82316ad3c2a3d625784536cd9de9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 May 2024 17:02:33 -0700 Subject: [PATCH 179/261] MINOR: [C#] Bump Grpc.AspNetCore.Server from 2.62.0 to 2.63.0 in /csharp (#41846) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [Grpc.AspNetCore.Server](https://github.com/grpc/grpc-dotnet) from 2.62.0 to 2.63.0.
Release notes

Sourced from Grpc.AspNetCore.Server's releases.

Release v2.63.0

What's Changed

New Contributors

Full Changelog: https://github.com/grpc/grpc-dotnet/compare/v2.62.0...v2.63.0

Release v2.63.0-pre1

What's Changed

New Contributors

... (truncated)

Commits
  • 6f1271f bump to stable release (#2440)
  • ac75fe3 build: configure rollForward in global.json (#2442)
  • 48aeb80 Bump 2.63.x branch to 2.63.0-pre1 (#2430)
  • 989420e Update Grpc.Tools to 2.63 (#2429)
  • c80f459 Don't capture async locals in resolver (#2426)
  • 63914f2 Add semaphore to limit subchannel connect to prevent race conditions (#2422)
  • 8199f66 Fix HTTP/3 test errors on .NET 6 (#2423)
  • 2d9df58 Fix memory leak when using call context propagation with cancellation token (...
  • c9c902c Enable multiple connections with WinHttpHandler by default (#2416)
  • 2a36215 Fix ObjectDisposedException message (#2415)
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Grpc.AspNetCore.Server&package-manager=nuget&previous-version=2.62.0&new-version=2.63.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Curt Hagenlocher --- .../Apache.Arrow.Flight.AspNetCore.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj b/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj index 55497203a12be..2dd1d9d8f98e2 100644 --- a/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj +++ b/csharp/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj @@ -5,7 +5,7 @@ - + From 4b49f50ba7ff649b6b83bb4b849b4fc33eb77adb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 May 2024 17:03:31 -0700 Subject: [PATCH 180/261] MINOR: [C#] Bump Google.Protobuf from 3.26.1 to 3.27.0 in /csharp (#41847) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [//]: # (dependabot-start) ⚠️ **Dependabot is rebasing this PR** ⚠️ Rebasing might not happen immediately, so don't worry if this takes some time. Note: if you make any changes to this PR yourself, they will take precedence over the rebase. --- [//]: # (dependabot-end) Bumps [Google.Protobuf](https://github.com/protocolbuffers/protobuf) from 3.26.1 to 3.27.0.
Commits
  • a978b75 Updating version.json and repo version numbers to: 27.0
  • f396506 MODULE.bazel fixes for protobuf BCR release. (#16927)
  • 4baa11f Merge pull request #16906 from protocolbuffers/editions-27
  • 4483c6b Lazily resolve features for proto2 and proto3 for compatibility with old open...
  • f8a4a68 Fix whitespace merge issue in gencode
  • c3417f5 Regenerate stale files
  • fea6847 Future-proof edition 2023 against feature value support windows.
  • d2da463 Mark deleted fields in descriptor.proto reserved
  • 0a05aa8 Merge pull request #16875 from protocolbuffers/ban-recursive-features
  • 8c5f3a7 Prohibit using features in the same file they're defined in.
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Google.Protobuf&package-manager=nuget&previous-version=3.26.1&new-version=3.27.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Curt Hagenlocher --- csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj index d68cdfae30010..d3892a6de16ee 100644 --- a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj +++ b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj @@ -5,7 +5,7 @@ - + From cf18331480d7f086a418bac0fa569a29d3fb7664 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 May 2024 20:41:18 -0700 Subject: [PATCH 181/261] MINOR: [C#] Bump Grpc.Net.Client and System.Runtime.CompilerServices.Unsafe in /csharp (#41845) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [Grpc.Net.Client](https://github.com/grpc/grpc-dotnet) and [System.Runtime.CompilerServices.Unsafe](https://github.com/dotnet/runtime). These dependencies needed to be updated together. Updates `Grpc.Net.Client` from 2.59.0 to 2.63.0
Release notes

Sourced from Grpc.Net.Client's releases.

Release v2.63.0

What's Changed

New Contributors

Full Changelog: https://github.com/grpc/grpc-dotnet/compare/v2.62.0...v2.63.0

Release v2.63.0-pre1

What's Changed

New Contributors

... (truncated)

Commits
  • 6f1271f bump to stable release (#2440)
  • ac75fe3 build: configure rollForward in global.json (#2442)
  • 48aeb80 Bump 2.63.x branch to 2.63.0-pre1 (#2430)
  • 989420e Update Grpc.Tools to 2.63 (#2429)
  • c80f459 Don't capture async locals in resolver (#2426)
  • 63914f2 Add semaphore to limit subchannel connect to prevent race conditions (#2422)
  • 8199f66 Fix HTTP/3 test errors on .NET 6 (#2423)
  • 2d9df58 Fix memory leak when using call context propagation with cancellation token (...
  • c9c902c Enable multiple connections with WinHttpHandler by default (#2416)
  • 2a36215 Fix ObjectDisposedException message (#2415)
  • Additional commits viewable in compare view

Updates `System.Runtime.CompilerServices.Unsafe` from 4.7.1 to 6.0.0
Release notes

Sourced from System.Runtime.CompilerServices.Unsafe's releases.

.NET 6.0

Release

.NET 6.0 RC 2

Release

.NET 6.0 RC 1

Release

.NET 6.0 Preview 7

Release

.NET 6.0 Preview 6

Release

.NET 6.0 Preview 5

Release

.NET 6.0 Preview 4

Release

.NET 6.0 Preview 3

Release

.NET 6.0 Preview 2

Release

.NET 6.0 Preview 1

Release

.NET 5.0.17

Release

.NET 5 is now out of support. We recommend using .NET 6.

.NET 5.0.16

Release

.NET 5.0.15

Release

.NET 5.0.14

Release

.NET 5.0.13

Release

.NET 5.0.11

Release

... (truncated)

Commits

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Curt Hagenlocher --- csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj index d3892a6de16ee..21b0df349e2d8 100644 --- a/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj +++ b/csharp/src/Apache.Arrow.Flight/Apache.Arrow.Flight.csproj @@ -6,7 +6,7 @@ - + From a34d995af8510e24654ccdb347343bd793ac1fed Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 May 2024 20:42:00 -0700 Subject: [PATCH 182/261] MINOR: [C#] Bump Microsoft.NET.Test.Sdk from 17.9.0 to 17.10.0 in /csharp (#41848) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [Microsoft.NET.Test.Sdk](https://github.com/microsoft/vstest) from 17.9.0 to 17.10.0.
Release notes

Sourced from Microsoft.NET.Test.Sdk's releases.

v17.10.0

What's Changed

And many infrastructure related changes and updates.

New Contributors

Full Changelog: https://github.com/microsoft/vstest/compare/v17.9.0...v17.10.0

v17.10.0-release-24177-07

What's Changed

... (truncated)

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=Microsoft.NET.Test.Sdk&package-manager=nuget&previous-version=17.9.0&new-version=17.10.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Curt Hagenlocher --- .../Apache.Arrow.Compression.Tests.csproj | 2 +- .../Apache.Arrow.Flight.Sql.Tests.csproj | 2 +- .../Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj | 2 +- csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj index a02dc383fb4e8..f4780c0dad194 100644 --- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj +++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj @@ -7,7 +7,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj index 50e5cea7a1dd0..d3caaf9ca0fa8 100644 --- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj +++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj @@ -6,7 +6,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj index 7b4d063763fde..37726b85f8fe4 100644 --- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj +++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj @@ -6,7 +6,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj index 776f37db3a6d7..067d41a42524f 100644 --- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj +++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj @@ -16,7 +16,7 @@ - + all From 5a667281efec2d5f8789fbadc2b27924ab5b103f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 May 2024 20:42:54 -0700 Subject: [PATCH 183/261] MINOR: [C#] Bump xunit.runner.visualstudio from 2.8.0 to 2.8.1 in /csharp (#41849) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit [//]: # (dependabot-start) ⚠️ **Dependabot is rebasing this PR** ⚠️ Rebasing might not happen immediately, so don't worry if this takes some time. Note: if you make any changes to this PR yourself, they will take precedence over the rebase. --- [//]: # (dependabot-end) Bumps [xunit.runner.visualstudio](https://github.com/xunit/visualstudio.xunit) from 2.8.0 to 2.8.1.
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=xunit.runner.visualstudio&package-manager=nuget&previous-version=2.8.0&new-version=2.8.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Curt Hagenlocher --- .../Apache.Arrow.Compression.Tests.csproj | 2 +- .../Apache.Arrow.Flight.Sql.Tests.csproj | 2 +- .../Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj | 2 +- csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj index f4780c0dad194..bd97372d1021b 100644 --- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj +++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj @@ -9,7 +9,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj index d3caaf9ca0fa8..5a5a92ccd2c7f 100644 --- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj +++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj @@ -8,7 +8,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj index 37726b85f8fe4..132f17fa212a5 100644 --- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj +++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj @@ -8,7 +8,7 @@ - + diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj index 067d41a42524f..a3290e3be14ee 100644 --- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj +++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj @@ -18,7 +18,7 @@ - + all runtime; build; native; contentfiles; analyzers From f63c9943d6b8cd2d40b0d3dab69de7b6e4abe6e2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 May 2024 20:44:35 -0700 Subject: [PATCH 184/261] MINOR: [C#] Bump ZstdSharp.Port and System.Runtime.CompilerServices.Unsafe in /csharp (#41742) Bumps [ZstdSharp.Port](https://github.com/oleg-st/ZstdSharp) and [System.Runtime.CompilerServices.Unsafe](https://github.com/dotnet/runtime). These dependencies needed to be updated together. Updates `ZstdSharp.Port` from 0.8.0 to 0.8.1
Release notes

Sourced from ZstdSharp.Port's releases.

0.8.1

Better decompression speed in .NET 8 (~5%)

Commits

Updates `System.Runtime.CompilerServices.Unsafe` from 4.7.1 to 6.0.0
Release notes

Sourced from System.Runtime.CompilerServices.Unsafe's releases.

.NET 6.0

Release

.NET 6.0 RC 2

Release

.NET 6.0 RC 1

Release

.NET 6.0 Preview 7

Release

.NET 6.0 Preview 6

Release

.NET 6.0 Preview 5

Release

.NET 6.0 Preview 4

Release

.NET 6.0 Preview 3

Release

.NET 6.0 Preview 2

Release

.NET 6.0 Preview 1

Release

.NET 5.0.17

Release

.NET 5 is now out of support. We recommend using .NET 6.

.NET 5.0.16

Release

.NET 5.0.15

Release

.NET 5.0.14

Release

.NET 5.0.13

Release

.NET 5.0.11

Release

... (truncated)

Commits

Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Curt Hagenlocher --- .../Apache.Arrow.Compression/Apache.Arrow.Compression.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj b/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj index c34d880f90060..b8f69672cbc7c 100644 --- a/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj +++ b/csharp/src/Apache.Arrow.Compression/Apache.Arrow.Compression.csproj @@ -13,7 +13,7 @@ - + From f904928054fad89360d83015db5c23ac1ef86d05 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 28 May 2024 13:17:35 +0900 Subject: [PATCH 185/261] GH-41784: [Packaging][RPM] Use SO version for -libs package name (#41838) ### Rationale for this change We should use `arrow${SO_VERSION}-libs` not `arrow${MAJOR_VERSION}-libs` to co-exist newer versions and older versions. ### What changes are included in this PR? Use SO version not major version. ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. * GitHub Issue: #41784 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- .../apache-arrow/yum/arrow.spec.in | 148 +++++++++--------- 1 file changed, 75 insertions(+), 73 deletions(-) diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in index c6148e9260586..d5e6c3a332eb3 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in +++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in @@ -27,7 +27,9 @@ %define is_centos_7 (%{_rhel} == 7 && !%{is_amazon_linux}) -%define major_version %(echo @VERSION@ | grep -o '^[0-9]*') +%define major_version %(echo @VERSION@ | cut -d. -f 1) +%define minor_version %(echo @VERSION@ | cut -d. -f 2) +%define so_version %(expr %{major_version} '*' 100 + %{minor_version}) %define boost_version %( \ if [ %{_rhel} -eq 7 ]; then \ @@ -239,7 +241,7 @@ cd cpp rm -rf %{buildroot}%{_docdir}/arrow/ cd - -%package -n %{name}%{major_version}-libs +%package -n %{name}%{so_version}-libs Summary: Runtime libraries for Apache Arrow C++ License: Apache-2.0 %if %{have_lz4_libs} @@ -248,10 +250,10 @@ Requires: lz4-libs %{lz4_requirement} Requires: lz4 %{lz4_requirement} %endif -%description -n %{name}%{major_version}-libs +%description -n %{name}%{so_version}-libs This package contains the libraries for Apache Arrow C++. -%files -n %{name}%{major_version}-libs +%files -n %{name}%{so_version}-libs %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt @@ -260,7 +262,7 @@ This package contains the libraries for Apache Arrow C++. %package tools Summary: Tools for Apache Arrow C++ License: Apache-2.0 -Requires: %{name}%{major_version}-libs = %{version}-%{release} +Requires: %{name}%{so_version}-libs = %{version}-%{release} %description tools Tools for Apache Arrow C++. @@ -274,7 +276,7 @@ Tools for Apache Arrow C++. %package devel Summary: Libraries and header files for Apache Arrow C++ License: Apache-2.0 -Requires: %{name}%{major_version}-libs = %{version}-%{release} +Requires: %{name}%{so_version}-libs = %{version}-%{release} Requires: brotli-devel Requires: bzip2-devel Requires: curl-devel @@ -322,15 +324,15 @@ Libraries and header files for Apache Arrow C++. %{_libdir}/pkgconfig/arrow-orc.pc %{_libdir}/pkgconfig/arrow.pc -%package -n %{name}%{major_version}-acero-libs +%package -n %{name}%{so_version}-acero-libs Summary: C++ library to execute a query in streaming License: Apache-2.0 -Requires: %{name}%{major_version}-libs = %{version}-%{release} +Requires: %{name}%{so_version}-libs = %{version}-%{release} -%description -n %{name}%{major_version}-acero-libs +%description -n %{name}%{so_version}-acero-libs This package contains the libraries for Apache Arrow Acero. -%files -n %{name}%{major_version}-acero-libs +%files -n %{name}%{so_version}-acero-libs %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt @@ -339,7 +341,7 @@ This package contains the libraries for Apache Arrow Acero. %package acero-devel Summary: Libraries and header files for Apache Arrow Acero License: Apache-2.0 -Requires: %{name}%{major_version}-acero-libs = %{version}-%{release} +Requires: %{name}%{so_version}-acero-libs = %{version}-%{release} Requires: %{name}-devel = %{version}-%{release} %description acero-devel @@ -355,16 +357,16 @@ Libraries and header files for Apache Arrow Acero %{_libdir}/libarrow_acero.so %{_libdir}/pkgconfig/arrow-acero.pc -%package -n %{name}%{major_version}-dataset-libs +%package -n %{name}%{so_version}-dataset-libs Summary: C++ library to read and write semantic datasets stored in different locations and formats License: Apache-2.0 -Requires: %{name}%{major_version}-acero-libs = %{version}-%{release} -Requires: parquet%{major_version}-libs = %{version}-%{release} +Requires: %{name}%{so_version}-acero-libs = %{version}-%{release} +Requires: parquet%{so_version}-libs = %{version}-%{release} -%description -n %{name}%{major_version}-dataset-libs +%description -n %{name}%{so_version}-dataset-libs This package contains the libraries for Apache Arrow dataset. -%files -n %{name}%{major_version}-dataset-libs +%files -n %{name}%{so_version}-dataset-libs %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt @@ -373,7 +375,7 @@ This package contains the libraries for Apache Arrow dataset. %package dataset-devel Summary: Libraries and header files for Apache Arrow dataset. License: Apache-2.0 -Requires: %{name}%{major_version}-dataset-libs = %{version}-%{release} +Requires: %{name}%{so_version}-dataset-libs = %{version}-%{release} Requires: %{name}-acero-devel = %{version}-%{release} Requires: parquet-devel = %{version}-%{release} @@ -391,15 +393,15 @@ Libraries and header files for Apache Arrow dataset. %{_libdir}/pkgconfig/arrow-dataset.pc %if %{use_flight} -%package -n %{name}%{major_version}-flight-libs +%package -n %{name}%{so_version}-flight-libs Summary: C++ library for fast data transport. License: Apache-2.0 -Requires: %{name}%{major_version}-libs = %{version}-%{release} +Requires: %{name}%{so_version}-libs = %{version}-%{release} -%description -n %{name}%{major_version}-flight-libs +%description -n %{name}%{so_version}-flight-libs This package contains the libraries for Apache Arrow Flight. -%files -n %{name}%{major_version}-flight-libs +%files -n %{name}%{so_version}-flight-libs %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt @@ -408,7 +410,7 @@ This package contains the libraries for Apache Arrow Flight. %package flight-devel Summary: Libraries and header files for Apache Arrow Flight. License: Apache-2.0 -Requires: %{name}%{major_version}-flight-libs = %{version}-%{release} +Requires: %{name}%{so_version}-flight-libs = %{version}-%{release} Requires: %{name}-devel = %{version}-%{release} Requires: c-ares-devel %if %{have_grpc} @@ -430,15 +432,15 @@ Libraries and header files for Apache Arrow Flight. %{_libdir}/libarrow_flight.so %{_libdir}/pkgconfig/arrow-flight.pc -%package -n %{name}%{major_version}-flight-sql-libs +%package -n %{name}%{so_version}-flight-sql-libs Summary: C++ library for interacting with SQL databases. License: Apache-2.0 -Requires: %{name}%{major_version}-flight-libs = %{version}-%{release} +Requires: %{name}%{so_version}-flight-libs = %{version}-%{release} -%description -n %{name}%{major_version}-flight-sql-libs +%description -n %{name}%{so_version}-flight-sql-libs This package contains the libraries for Apache Arrow Flight SQL. -%files -n %{name}%{major_version}-flight-sql-libs +%files -n %{name}%{so_version}-flight-sql-libs %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt @@ -447,7 +449,7 @@ This package contains the libraries for Apache Arrow Flight SQL. %package flight-sql-devel Summary: Libraries and header files for Apache Arrow Flight SQL. License: Apache-2.0 -Requires: %{name}%{major_version}-flight-sql-libs = %{version}-%{release} +Requires: %{name}%{so_version}-flight-sql-libs = %{version}-%{release} Requires: %{name}-devel = %{version}-%{release} %description flight-sql-devel @@ -465,15 +467,15 @@ Libraries and header files for Apache Arrow Flight SQL. %endif %if %{use_gandiva} -%package -n gandiva%{major_version}-libs +%package -n gandiva%{so_version}-libs Summary: C++ library for compiling and evaluating expressions on Apache Arrow data. License: Apache-2.0 -Requires: %{name}%{major_version}-libs = %{version}-%{release} +Requires: %{name}%{so_version}-libs = %{version}-%{release} -%description -n gandiva%{major_version}-libs +%description -n gandiva%{so_version}-libs This package contains the libraries for Gandiva. -%files -n gandiva%{major_version}-libs +%files -n gandiva%{so_version}-libs %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt @@ -483,7 +485,7 @@ This package contains the libraries for Gandiva. Summary: Libraries and header files for Gandiva. License: Apache-2.0 Requires: %{name}-devel = %{version}-%{release} -Requires: gandiva%{major_version}-libs = %{version}-%{release} +Requires: gandiva%{so_version}-libs = %{version}-%{release} Requires: llvm-devel %description -n gandiva-devel @@ -500,15 +502,15 @@ Libraries and header files for Gandiva. %{_libdir}/pkgconfig/gandiva.pc %endif -%package -n parquet%{major_version}-libs +%package -n parquet%{so_version}-libs Summary: Runtime libraries for Apache Parquet C++ License: Apache-2.0 -Requires: %{name}%{major_version}-libs = %{version}-%{release} +Requires: %{name}%{so_version}-libs = %{version}-%{release} -%description -n parquet%{major_version}-libs +%description -n parquet%{so_version}-libs This package contains the libraries for Apache Parquet C++. -%files -n parquet%{major_version}-libs +%files -n parquet%{so_version}-libs %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt @@ -517,7 +519,7 @@ This package contains the libraries for Apache Parquet C++. %package -n parquet-tools Summary: Tools for Apache Parquet C++ License: Apache-2.0 -Requires: parquet%{major_version}-libs = %{version}-%{release} +Requires: parquet%{so_version}-libs = %{version}-%{release} %description -n parquet-tools Tools for Apache Parquet C++. @@ -532,7 +534,7 @@ Tools for Apache Parquet C++. Summary: Libraries and header files for Apache Parquet C++ License: Apache-2.0 Requires: %{name}-devel = %{version}-%{release} -Requires: parquet%{major_version}-libs = %{version}-%{release} +Requires: parquet%{so_version}-libs = %{version}-%{release} Requires: zlib-devel %description -n parquet-devel @@ -548,15 +550,15 @@ Libraries and header files for Apache Parquet C++. %{_libdir}/libparquet.so %{_libdir}/pkgconfig/parquet*.pc -%package -n %{name}%{major_version}-glib-libs +%package -n %{name}%{so_version}-glib-libs Summary: Runtime libraries for Apache Arrow GLib License: Apache-2.0 -Requires: %{name}%{major_version}-libs = %{version}-%{release} +Requires: %{name}%{so_version}-libs = %{version}-%{release} -%description -n %{name}%{major_version}-glib-libs +%description -n %{name}%{so_version}-glib-libs This package contains the libraries for Apache Arrow GLib. -%files -n %{name}%{major_version}-glib-libs +%files -n %{name}%{so_version}-glib-libs %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt @@ -567,7 +569,7 @@ This package contains the libraries for Apache Arrow GLib. Summary: Libraries and header files for Apache Arrow GLib License: Apache-2.0 Requires: %{name}-acero-devel = %{version}-%{release} -Requires: %{name}%{major_version}-glib-libs = %{version}-%{release} +Requires: %{name}%{so_version}-glib-libs = %{version}-%{release} Requires: glib2-devel Requires: gobject-introspection-devel @@ -606,16 +608,16 @@ Documentation for Apache Arrow GLib. %{_docdir}/arrow-glib/ %endif -%package -n %{name}%{major_version}-dataset-glib-libs +%package -n %{name}%{so_version}-dataset-glib-libs Summary: Runtime libraries for Apache Arrow Dataset GLib License: Apache-2.0 -Requires: %{name}%{major_version}-dataset-libs = %{version}-%{release} -Requires: %{name}%{major_version}-glib-libs = %{version}-%{release} +Requires: %{name}%{so_version}-dataset-libs = %{version}-%{release} +Requires: %{name}%{so_version}-glib-libs = %{version}-%{release} -%description -n %{name}%{major_version}-dataset-glib-libs +%description -n %{name}%{so_version}-dataset-glib-libs This package contains the libraries for Apache Arrow Dataset GLib. -%files -n %{name}%{major_version}-dataset-glib-libs +%files -n %{name}%{so_version}-dataset-glib-libs %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt @@ -625,7 +627,7 @@ This package contains the libraries for Apache Arrow Dataset GLib. %package dataset-glib-devel Summary: Libraries and header files for Apache Arrow Dataset GLib License: Apache-2.0 -Requires: %{name}%{major_version}-dataset-glib-libs = %{version}-%{release} +Requires: %{name}%{so_version}-dataset-glib-libs = %{version}-%{release} Requires: %{name}-dataset-devel = %{version}-%{release} Requires: %{name}-glib-devel = %{version}-%{release} @@ -661,16 +663,16 @@ Documentation for Apache Arrow dataset GLib. %endif %if %{use_flight} -%package -n %{name}%{major_version}-flight-glib-libs +%package -n %{name}%{so_version}-flight-glib-libs Summary: Runtime libraries for Apache Arrow Flight GLib License: Apache-2.0 -Requires: %{name}%{major_version}-flight-libs = %{version}-%{release} -Requires: %{name}%{major_version}-glib-libs = %{version}-%{release} +Requires: %{name}%{so_version}-flight-libs = %{version}-%{release} +Requires: %{name}%{so_version}-glib-libs = %{version}-%{release} -%description -n %{name}%{major_version}-flight-glib-libs +%description -n %{name}%{so_version}-flight-glib-libs This package contains the libraries for Apache Arrow Flight GLib. -%files -n %{name}%{major_version}-flight-glib-libs +%files -n %{name}%{so_version}-flight-glib-libs %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt @@ -680,7 +682,7 @@ This package contains the libraries for Apache Arrow Flight GLib. %package flight-glib-devel Summary: Libraries and header files for Apache Arrow Flight GLib License: Apache-2.0 -Requires: %{name}%{major_version}-flight-glib-libs = %{version}-%{release} +Requires: %{name}%{so_version}-flight-glib-libs = %{version}-%{release} Requires: %{name}-flight-devel = %{version}-%{release} Requires: %{name}-glib-devel = %{version}-%{release} @@ -715,16 +717,16 @@ Documentation for Apache Arrow Flight GLib. %{_docdir}/arrow-flight-glib/ %endif -%package -n %{name}%{major_version}-flight-sql-glib-libs +%package -n %{name}%{so_version}-flight-sql-glib-libs Summary: Runtime libraries for Apache Arrow Flight SQL GLib License: Apache-2.0 -Requires: %{name}%{major_version}-flight-sql-libs = %{version}-%{release} -Requires: %{name}%{major_version}-flight-glib-libs = %{version}-%{release} +Requires: %{name}%{so_version}-flight-sql-libs = %{version}-%{release} +Requires: %{name}%{so_version}-flight-glib-libs = %{version}-%{release} -%description -n %{name}%{major_version}-flight-sql-glib-libs +%description -n %{name}%{so_version}-flight-sql-glib-libs This package contains the libraries for Apache Arrow Flight SQL GLib. -%files -n %{name}%{major_version}-flight-sql-glib-libs +%files -n %{name}%{so_version}-flight-sql-glib-libs %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt @@ -734,7 +736,7 @@ This package contains the libraries for Apache Arrow Flight SQL GLib. %package flight-sql-glib-devel Summary: Libraries and header files for Apache Arrow Flight SQL GLib License: Apache-2.0 -Requires: %{name}%{major_version}-flight-sql-glib-libs = %{version}-%{release} +Requires: %{name}%{so_version}-flight-sql-glib-libs = %{version}-%{release} Requires: %{name}-flight-sql-devel = %{version}-%{release} Requires: %{name}-flight-glib-devel = %{version}-%{release} @@ -771,16 +773,16 @@ Documentation for Apache Arrow Flight SQL GLib. %endif %if %{use_gandiva} -%package -n gandiva%{major_version}-glib-libs +%package -n gandiva%{so_version}-glib-libs Summary: Runtime libraries for Gandiva GLib License: Apache-2.0 -Requires: %{name}%{major_version}-glib-libs = %{version}-%{release} -Requires: gandiva%{major_version}-libs = %{version}-%{release} +Requires: %{name}%{so_version}-glib-libs = %{version}-%{release} +Requires: gandiva%{so_version}-libs = %{version}-%{release} -%description -n gandiva%{major_version}-glib-libs +%description -n gandiva%{so_version}-glib-libs This package contains the libraries for Gandiva GLib. -%files -n gandiva%{major_version}-glib-libs +%files -n gandiva%{so_version}-glib-libs %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt @@ -791,7 +793,7 @@ This package contains the libraries for Gandiva GLib. Summary: Libraries and header files for Gandiva GLib License: Apache-2.0 Requires: %{name}-glib-devel = %{version}-%{release} -Requires: gandiva%{major_version}-glib-libs = %{version}-%{release} +Requires: gandiva%{so_version}-glib-libs = %{version}-%{release} Requires: gandiva-devel = %{version}-%{release} %description -n gandiva-glib-devel @@ -826,16 +828,16 @@ Documentation for Gandiva GLib. %endif %endif -%package -n parquet%{major_version}-glib-libs +%package -n parquet%{so_version}-glib-libs Summary: Runtime libraries for Apache Parquet GLib License: Apache-2.0 -Requires: %{name}%{major_version}-glib-libs = %{version}-%{release} -Requires: parquet%{major_version}-libs = %{version}-%{release} +Requires: %{name}%{so_version}-glib-libs = %{version}-%{release} +Requires: parquet%{so_version}-libs = %{version}-%{release} -%description -n parquet%{major_version}-glib-libs +%description -n parquet%{so_version}-glib-libs This package contains the libraries for Apache Parquet GLib. -%files -n parquet%{major_version}-glib-libs +%files -n parquet%{so_version}-glib-libs %defattr(-,root,root,-) %doc README.md %license LICENSE.txt NOTICE.txt @@ -846,7 +848,7 @@ This package contains the libraries for Apache Parquet GLib. Summary: Libraries and header files for Apache Parquet GLib License: Apache-2.0 Requires: %{name}-glib-devel = %{version}-%{release} -Requires: parquet%{major_version}-glib-libs = %{version}-%{release} +Requires: parquet%{so_version}-glib-libs = %{version}-%{release} Requires: parquet-devel = %{version}-%{release} %description -n parquet-glib-devel From fe2d926ef385be58833f0e5e09d1860c63f800e6 Mon Sep 17 00:00:00 2001 From: Sarah Gilmore <74676073+sgilmore10@users.noreply.github.com> Date: Tue, 28 May 2024 09:37:54 -0400 Subject: [PATCH 186/261] GH-41803: [MATLAB] Add C Data Interface format import/export functionality for `arrow.tabular.RecordBatch` (#41817) ### Rationale for this change This pull requests adds two new APIs for importing and exporting `arrow.tabular.RecordBatch` instances using the C Data Interface format. **Example:** ```matlab >> T = table((1:3)', ["A"; "B"; "C"]); >> expected = arrow.recordBatch(T) expected = Arrow RecordBatch with 3 rows and 2 columns: Schema: Var1: Float64 | Var2: String First Row: 1 | "A" >> cArray = arrow.c.Array(); >> cSchema = arrow.c.Schema(); % Export the RecordBatch to C Data Interface Format >> expected.export(cArray.Address, cSchema.Address); % Import the RecordBatch from C Data Interface Format >> actual = arrow.tabular.RecordBatch.import(cArray, cSchema) actual = Arrow RecordBatch with 3 rows and 2 columns: Schema: Var1: Float64 | Var2: String First Row: 1 | "A" % The RecordBatch is the same after round-tripping to the C Data Interface format >> isequal(actual, expected) ans = logical 1 ``` ### What changes are included in this PR? 1. Added a new method `arrow.tabular.RecordBatch.export` for exporting `RecordBatch` objects to the C Data Interface format. 2. Added a new static method `arrow.tabular.RecordBatch.import` for importing `RecordBatch` objects from the C Data Interface format. 3. Added a new internal class `arrow.c.internal.RecordBatchImporter` for importing `RecordBatch` objects from the C Data Interface format. ### Are these changes tested? Yes. 1. Added a new test file `matlab/test/arrow/c/tRoundtripRecordBatch.m` which has basic round-trip tests for importing and exporting `RecordBatch` objects. ### Are there any user-facing changes? Yes. 1. Two new user-facing methods were added to `arrow.tabular.RecordBatch`. The first is `arrow.tabular.RecordBatch.export(cArrowArrayAddress, cArrowSchemaAddress)`. The second is `arrow.tabular.RecordBatch.import(cArray, cSchema)`. These APIs can be used to export/import `RecordBatch` objects using the C Data Interface format. ### Future Directions 1. Add integration tests for sharing data between MATLAB/mlarrow and Python/pyarrow running in the same process using the [MATLAB interface to Python](https://www.mathworks.com/help/matlab/call-python-libraries.html). 2. Add support for the Arrow [C stream interface format](https://arrow.apache.org/docs/format/CStreamInterface.html). ### Notes 1. Thanks to @ kevingurney for the help with this feature! * GitHub Issue: #41803 Authored-by: Sarah Gilmore Signed-off-by: Sarah Gilmore --- .../matlab/c/proxy/record_batch_importer.cc | 66 +++++++ .../matlab/c/proxy/record_batch_importer.h | 37 ++++ matlab/src/cpp/arrow/matlab/proxy/factory.cc | 104 +++++------ .../matlab/tabular/proxy/record_batch.cc | 19 +- .../arrow/matlab/tabular/proxy/record_batch.h | 1 + .../+arrow/+c/+internal/RecordBatchImporter.m | 52 ++++++ .../src/matlab/+arrow/+tabular/RecordBatch.m | 22 +++ matlab/test/arrow/c/tRoundTripRecordBatch.m | 170 ++++++++++++++++++ .../cmake/BuildMatlabArrowInterface.cmake | 3 +- 9 files changed, 420 insertions(+), 54 deletions(-) create mode 100644 matlab/src/cpp/arrow/matlab/c/proxy/record_batch_importer.cc create mode 100644 matlab/src/cpp/arrow/matlab/c/proxy/record_batch_importer.h create mode 100644 matlab/src/matlab/+arrow/+c/+internal/RecordBatchImporter.m create mode 100644 matlab/test/arrow/c/tRoundTripRecordBatch.m diff --git a/matlab/src/cpp/arrow/matlab/c/proxy/record_batch_importer.cc b/matlab/src/cpp/arrow/matlab/c/proxy/record_batch_importer.cc new file mode 100644 index 0000000000000..ed9ba14cfbe01 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/c/proxy/record_batch_importer.cc @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/c/bridge.h" + +#include "arrow/matlab/c/proxy/record_batch_importer.h" +#include "arrow/matlab/error/error.h" +#include "arrow/matlab/tabular/proxy/record_batch.h" + +#include "libmexclass/proxy/ProxyManager.h" + +namespace arrow::matlab::c::proxy { + +RecordBatchImporter::RecordBatchImporter() { + REGISTER_METHOD(RecordBatchImporter, import); +} + +libmexclass::proxy::MakeResult RecordBatchImporter::make( + const libmexclass::proxy::FunctionArguments& constructor_arguments) { + return std::make_shared(); +} + +void RecordBatchImporter::import(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + using namespace libmexclass::proxy; + using RecordBatchProxy = arrow::matlab::tabular::proxy::RecordBatch; + + mda::StructArray args = context.inputs[0]; + const mda::TypedArray arrow_array_address_mda = args[0]["ArrowArrayAddress"]; + const mda::TypedArray arrow_schema_address_mda = + args[0]["ArrowSchemaAddress"]; + + const auto arrow_array_address = uint64_t(arrow_array_address_mda[0]); + const auto arrow_schema_address = uint64_t(arrow_schema_address_mda[0]); + + auto arrow_array = reinterpret_cast(arrow_array_address); + auto arrow_schema = reinterpret_cast(arrow_schema_address); + + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto record_batch, + arrow::ImportRecordBatch(arrow_array, arrow_schema), + context, error::C_IMPORT_FAILED); + + auto record_batch_proxy = std::make_shared(std::move(record_batch)); + + mda::ArrayFactory factory; + const auto record_batch_proxy_id = ProxyManager::manageProxy(record_batch_proxy); + const auto record_batch_proxy_id_mda = factory.createScalar(record_batch_proxy_id); + + context.outputs[0] = record_batch_proxy_id_mda; +} + +} // namespace arrow::matlab::c::proxy diff --git a/matlab/src/cpp/arrow/matlab/c/proxy/record_batch_importer.h b/matlab/src/cpp/arrow/matlab/c/proxy/record_batch_importer.h new file mode 100644 index 0000000000000..0f697db0d25b0 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/c/proxy/record_batch_importer.h @@ -0,0 +1,37 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "libmexclass/proxy/Proxy.h" + +namespace arrow::matlab::c::proxy { + +class RecordBatchImporter : public libmexclass::proxy::Proxy { + public: + RecordBatchImporter(); + + ~RecordBatchImporter() = default; + + static libmexclass::proxy::MakeResult make( + const libmexclass::proxy::FunctionArguments& constructor_arguments); + + protected: + void import(libmexclass::proxy::method::Context& context); +}; + +} // namespace arrow::matlab::c::proxy diff --git a/matlab/src/cpp/arrow/matlab/proxy/factory.cc b/matlab/src/cpp/arrow/matlab/proxy/factory.cc index 9b95fcf128090..53a19da82e334 100644 --- a/matlab/src/cpp/arrow/matlab/proxy/factory.cc +++ b/matlab/src/cpp/arrow/matlab/proxy/factory.cc @@ -27,6 +27,7 @@ #include "arrow/matlab/buffer/proxy/buffer.h" #include "arrow/matlab/c/proxy/array.h" #include "arrow/matlab/c/proxy/array_importer.h" +#include "arrow/matlab/c/proxy/record_batch_importer.h" #include "arrow/matlab/c/proxy/schema.h" #include "arrow/matlab/error/error.h" #include "arrow/matlab/io/csv/proxy/table_reader.h" @@ -54,57 +55,58 @@ namespace arrow::matlab::proxy { libmexclass::proxy::MakeResult Factory::make_proxy( const ClassName& class_name, const FunctionArguments& constructor_arguments) { // clang-format off - REGISTER_PROXY(arrow.array.proxy.Float32Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Float64Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.UInt8Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.UInt16Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.UInt32Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.UInt64Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Int8Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Int16Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Int32Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Int64Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.BooleanArray , arrow::matlab::array::proxy::BooleanArray); - REGISTER_PROXY(arrow.array.proxy.StringArray , arrow::matlab::array::proxy::StringArray); - REGISTER_PROXY(arrow.array.proxy.StructArray , arrow::matlab::array::proxy::StructArray); - REGISTER_PROXY(arrow.array.proxy.ListArray , arrow::matlab::array::proxy::ListArray); - REGISTER_PROXY(arrow.array.proxy.TimestampArray, arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Time32Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Time64Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Date32Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.Date64Array , arrow::matlab::array::proxy::NumericArray); - REGISTER_PROXY(arrow.array.proxy.ChunkedArray , arrow::matlab::array::proxy::ChunkedArray); - REGISTER_PROXY(arrow.buffer.proxy.Buffer , arrow::matlab::buffer::proxy::Buffer); - REGISTER_PROXY(arrow.tabular.proxy.RecordBatch , arrow::matlab::tabular::proxy::RecordBatch); - REGISTER_PROXY(arrow.tabular.proxy.Table , arrow::matlab::tabular::proxy::Table); - REGISTER_PROXY(arrow.tabular.proxy.Schema , arrow::matlab::tabular::proxy::Schema); - REGISTER_PROXY(arrow.type.proxy.Field , arrow::matlab::type::proxy::Field); - REGISTER_PROXY(arrow.type.proxy.Float32Type , arrow::matlab::type::proxy::PrimitiveCType); - REGISTER_PROXY(arrow.type.proxy.Float64Type , arrow::matlab::type::proxy::PrimitiveCType); - REGISTER_PROXY(arrow.type.proxy.UInt8Type , arrow::matlab::type::proxy::PrimitiveCType); - REGISTER_PROXY(arrow.type.proxy.UInt16Type , arrow::matlab::type::proxy::PrimitiveCType); - REGISTER_PROXY(arrow.type.proxy.UInt32Type , arrow::matlab::type::proxy::PrimitiveCType); - REGISTER_PROXY(arrow.type.proxy.UInt64Type , arrow::matlab::type::proxy::PrimitiveCType); - REGISTER_PROXY(arrow.type.proxy.Int8Type , arrow::matlab::type::proxy::PrimitiveCType); - REGISTER_PROXY(arrow.type.proxy.Int16Type , arrow::matlab::type::proxy::PrimitiveCType); - REGISTER_PROXY(arrow.type.proxy.Int32Type , arrow::matlab::type::proxy::PrimitiveCType); - REGISTER_PROXY(arrow.type.proxy.Int64Type , arrow::matlab::type::proxy::PrimitiveCType); - REGISTER_PROXY(arrow.type.proxy.BooleanType , arrow::matlab::type::proxy::PrimitiveCType); - REGISTER_PROXY(arrow.type.proxy.StringType , arrow::matlab::type::proxy::StringType); - REGISTER_PROXY(arrow.type.proxy.TimestampType , arrow::matlab::type::proxy::TimestampType); - REGISTER_PROXY(arrow.type.proxy.Time32Type , arrow::matlab::type::proxy::Time32Type); - REGISTER_PROXY(arrow.type.proxy.Time64Type , arrow::matlab::type::proxy::Time64Type); - REGISTER_PROXY(arrow.type.proxy.Date32Type , arrow::matlab::type::proxy::Date32Type); - REGISTER_PROXY(arrow.type.proxy.Date64Type , arrow::matlab::type::proxy::Date64Type); - REGISTER_PROXY(arrow.type.proxy.StructType , arrow::matlab::type::proxy::StructType); - REGISTER_PROXY(arrow.type.proxy.ListType , arrow::matlab::type::proxy::ListType); - REGISTER_PROXY(arrow.io.feather.proxy.Writer , arrow::matlab::io::feather::proxy::Writer); - REGISTER_PROXY(arrow.io.feather.proxy.Reader , arrow::matlab::io::feather::proxy::Reader); - REGISTER_PROXY(arrow.io.csv.proxy.TableWriter , arrow::matlab::io::csv::proxy::TableWriter); - REGISTER_PROXY(arrow.io.csv.proxy.TableReader , arrow::matlab::io::csv::proxy::TableReader); - REGISTER_PROXY(arrow.c.proxy.Array , arrow::matlab::c::proxy::Array); - REGISTER_PROXY(arrow.c.proxy.ArrayImporter , arrow::matlab::c::proxy::ArrayImporter); - REGISTER_PROXY(arrow.c.proxy.Schema , arrow::matlab::c::proxy::Schema); + REGISTER_PROXY(arrow.array.proxy.Float32Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Float64Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.UInt8Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.UInt16Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.UInt32Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.UInt64Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Int8Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Int16Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Int32Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Int64Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.BooleanArray , arrow::matlab::array::proxy::BooleanArray); + REGISTER_PROXY(arrow.array.proxy.StringArray , arrow::matlab::array::proxy::StringArray); + REGISTER_PROXY(arrow.array.proxy.StructArray , arrow::matlab::array::proxy::StructArray); + REGISTER_PROXY(arrow.array.proxy.ListArray , arrow::matlab::array::proxy::ListArray); + REGISTER_PROXY(arrow.array.proxy.TimestampArray , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Time32Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Time64Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Date32Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.Date64Array , arrow::matlab::array::proxy::NumericArray); + REGISTER_PROXY(arrow.array.proxy.ChunkedArray , arrow::matlab::array::proxy::ChunkedArray); + REGISTER_PROXY(arrow.buffer.proxy.Buffer , arrow::matlab::buffer::proxy::Buffer); + REGISTER_PROXY(arrow.tabular.proxy.RecordBatch , arrow::matlab::tabular::proxy::RecordBatch); + REGISTER_PROXY(arrow.tabular.proxy.Table , arrow::matlab::tabular::proxy::Table); + REGISTER_PROXY(arrow.tabular.proxy.Schema , arrow::matlab::tabular::proxy::Schema); + REGISTER_PROXY(arrow.type.proxy.Field , arrow::matlab::type::proxy::Field); + REGISTER_PROXY(arrow.type.proxy.Float32Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.Float64Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.UInt8Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.UInt16Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.UInt32Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.UInt64Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.Int8Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.Int16Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.Int32Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.Int64Type , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.BooleanType , arrow::matlab::type::proxy::PrimitiveCType); + REGISTER_PROXY(arrow.type.proxy.StringType , arrow::matlab::type::proxy::StringType); + REGISTER_PROXY(arrow.type.proxy.TimestampType , arrow::matlab::type::proxy::TimestampType); + REGISTER_PROXY(arrow.type.proxy.Time32Type , arrow::matlab::type::proxy::Time32Type); + REGISTER_PROXY(arrow.type.proxy.Time64Type , arrow::matlab::type::proxy::Time64Type); + REGISTER_PROXY(arrow.type.proxy.Date32Type , arrow::matlab::type::proxy::Date32Type); + REGISTER_PROXY(arrow.type.proxy.Date64Type , arrow::matlab::type::proxy::Date64Type); + REGISTER_PROXY(arrow.type.proxy.StructType , arrow::matlab::type::proxy::StructType); + REGISTER_PROXY(arrow.type.proxy.ListType , arrow::matlab::type::proxy::ListType); + REGISTER_PROXY(arrow.io.feather.proxy.Writer , arrow::matlab::io::feather::proxy::Writer); + REGISTER_PROXY(arrow.io.feather.proxy.Reader , arrow::matlab::io::feather::proxy::Reader); + REGISTER_PROXY(arrow.io.csv.proxy.TableWriter , arrow::matlab::io::csv::proxy::TableWriter); + REGISTER_PROXY(arrow.io.csv.proxy.TableReader , arrow::matlab::io::csv::proxy::TableReader); + REGISTER_PROXY(arrow.c.proxy.Array , arrow::matlab::c::proxy::Array); + REGISTER_PROXY(arrow.c.proxy.ArrayImporter , arrow::matlab::c::proxy::ArrayImporter); + REGISTER_PROXY(arrow.c.proxy.Schema , arrow::matlab::c::proxy::Schema); + REGISTER_PROXY(arrow.c.proxy.RecordBatchImporter , arrow::matlab::c::proxy::RecordBatchImporter); // clang-format on return libmexclass::error::Error{error::UNKNOWN_PROXY_ERROR_ID, diff --git a/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc b/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc index 298ac4b595139..f3cee25a3a8ee 100644 --- a/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc +++ b/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.cc @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "libmexclass/proxy/ProxyManager.h" - +#include "arrow/c/bridge.h" #include "arrow/matlab/array/proxy/array.h" #include "arrow/matlab/array/proxy/wrap.h" @@ -66,6 +65,7 @@ RecordBatch::RecordBatch(std::shared_ptr record_batch) REGISTER_METHOD(RecordBatch, getColumnByName); REGISTER_METHOD(RecordBatch, getSchema); REGISTER_METHOD(RecordBatch, getRowAsString); + REGISTER_METHOD(RecordBatch, exportToC); } std::shared_ptr RecordBatch::unwrap() { return record_batch; } @@ -259,4 +259,19 @@ void RecordBatch::getRowAsString(libmexclass::proxy::method::Context& context) { context.outputs[0] = factory.createScalar(row_str_utf16); } +void RecordBatch::exportToC(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + mda::StructArray opts = context.inputs[0]; + const mda::TypedArray array_address_mda = opts[0]["ArrowArrayAddress"]; + const mda::TypedArray schema_address_mda = opts[0]["ArrowSchemaAddress"]; + + auto arrow_array = reinterpret_cast(uint64_t(array_address_mda[0])); + auto arrow_schema = + reinterpret_cast(uint64_t(schema_address_mda[0])); + + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT( + arrow::ExportRecordBatch(*record_batch, arrow_array, arrow_schema), context, + error::C_EXPORT_FAILED); +} + } // namespace arrow::matlab::tabular::proxy diff --git a/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.h b/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.h index c8285c9b095d5..4a1675a8a438a 100644 --- a/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.h +++ b/matlab/src/cpp/arrow/matlab/tabular/proxy/record_batch.h @@ -43,6 +43,7 @@ class RecordBatch : public libmexclass::proxy::Proxy { void getColumnByName(libmexclass::proxy::method::Context& context); void getSchema(libmexclass::proxy::method::Context& context); void getRowAsString(libmexclass::proxy::method::Context& context); + void exportToC(libmexclass::proxy::method::Context& context); std::shared_ptr record_batch; }; diff --git a/matlab/src/matlab/+arrow/+c/+internal/RecordBatchImporter.m b/matlab/src/matlab/+arrow/+c/+internal/RecordBatchImporter.m new file mode 100644 index 0000000000000..120763bb46e7b --- /dev/null +++ b/matlab/src/matlab/+arrow/+c/+internal/RecordBatchImporter.m @@ -0,0 +1,52 @@ +%RECORDBATCHIMPORTER Imports Arrow RecordBatch using the C Data Interface +% Format. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. + +classdef RecordBatchImporter + + properties (Hidden, SetAccess=private, GetAccess=public) + Proxy + end + + methods + + function obj = RecordBatchImporter() + proxyName = "arrow.c.proxy.RecordBatchImporter"; + proxy = arrow.internal.proxy.create(proxyName, struct()); + obj.Proxy = proxy; + end + + function recordBatch = import(obj, cArray, cSchema) + arguments + obj(1, 1) arrow.c.internal.RecordBatchImporter + cArray(1, 1) arrow.c.Array + cSchema(1, 1) arrow.c.Schema + end + args = struct(... + ArrowArrayAddress=cArray.Address,... + ArrowSchemaAddress=cSchema.Address... + ); + proxyID = obj.Proxy.import(args); + proxyName = "arrow.tabular.proxy.RecordBatch"; + proxy = libmexclass.proxy.Proxy(Name=proxyName, ID=proxyID); + recordBatch = arrow.tabular.RecordBatch(proxy); + end + + end + +end + diff --git a/matlab/src/matlab/+arrow/+tabular/RecordBatch.m b/matlab/src/matlab/+arrow/+tabular/RecordBatch.m index 0225f3d771181..da5c1fc1c3764 100644 --- a/matlab/src/matlab/+arrow/+tabular/RecordBatch.m +++ b/matlab/src/matlab/+arrow/+tabular/RecordBatch.m @@ -102,6 +102,19 @@ function tf = isequal(obj, varargin) tf = arrow.tabular.internal.isequal(obj, varargin{:}); end + + function export(obj, cArrowArrayAddress, cArrowSchemaAddress) + arguments + obj(1, 1) arrow.tabular.RecordBatch + cArrowArrayAddress(1, 1) uint64 + cArrowSchemaAddress(1, 1) uint64 + end + args = struct(... + ArrowArrayAddress=cArrowArrayAddress,... + ArrowSchemaAddress=cArrowSchemaAddress... + ); + obj.Proxy.exportToC(args); + end end methods (Access = private) @@ -141,5 +154,14 @@ function displayScalarObject(obj) proxy = arrow.internal.proxy.create(proxyName, args); recordBatch = arrow.tabular.RecordBatch(proxy); end + + function recordBatch = import(cArray, cSchema) + arguments + cArray(1, 1) arrow.c.Array + cSchema(1, 1) arrow.c.Schema + end + importer = arrow.c.internal.RecordBatchImporter(); + recordBatch = importer.import(cArray, cSchema); + end end end diff --git a/matlab/test/arrow/c/tRoundTripRecordBatch.m b/matlab/test/arrow/c/tRoundTripRecordBatch.m new file mode 100644 index 0000000000000..5d95aecbe1603 --- /dev/null +++ b/matlab/test/arrow/c/tRoundTripRecordBatch.m @@ -0,0 +1,170 @@ +%TROUNDTRIPRECORDBATCH Tests for roundtripping RecordBatches using +% the C Data Interface format. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +classdef tRoundTripRecordBatch < matlab.unittest.TestCase + + methods (Test) + function ZeroColumnRecordBatch(testCase) + expected = arrow.recordBatch(table()); + + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + expected.export(cArray.Address, cSchema.Address); + actual = arrow.tabular.RecordBatch.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + end + + function ZeroRowRecordBatch(testCase) + doubleArray = arrow.array([]); + stringArray = arrow.array(string.empty(0, 0)); + expected = arrow.tabular.RecordBatch.fromArrays(doubleArray, stringArray); + + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + expected.export(cArray.Address, cSchema.Address); + actual = arrow.tabular.RecordBatch.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + end + + function OneRowRecordBatch(testCase) + varNames = ["Col1" "Col2" "Col3"]; + t = table(1, "A", false, VariableNames=varNames); + expected = arrow.recordBatch(t); + + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + expected.export(cArray.Address, cSchema.Address); + actual = arrow.tabular.RecordBatch.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + end + + function MultiRowRecordBatch(testCase) + varNames = ["Col1" "Col2" "Col3"]; + t = table((1:3)', ["A"; "B"; "C"], [false; true; false],... + VariableNames=varNames); + expected = arrow.recordBatch(t); + + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + expected.export(cArray.Address, cSchema.Address); + actual = arrow.tabular.RecordBatch.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + end + + function ExportErrorWrongInputTypes(testCase) + rb = arrow.recordBatch(table([1; 2; 3])); + fcn = @() rb.export("cArray.Address", "cSchema.Address"); + testCase.verifyError(fcn, "MATLAB:validation:UnableToConvert"); + end + + function ExportTooFewInputs(testCase) + rb = arrow.recordBatch(table([1; 2; 3])); + fcn = @() rb.export(); + testCase.verifyError(fcn, "MATLAB:minrhs"); + end + + function ExportTooManyInputs(testCase) + rb = arrow.recordBatch(table([1; 2; 3])); + fcn = @() rb.export("A", "B", "C"); + testCase.verifyError(fcn, "MATLAB:TooManyInputs"); + end + + function ImportErrorWrongInputTypes(testCase) + cArray = "arrow.c.Array"; + cSchema = "arrow.c.Schema"; + fcn = @() arrow.tabular.RecordBatch.import(cArray, cSchema); + testCase.verifyError(fcn, "MATLAB:validation:UnableToConvert"); + end + + function ImportTooFewInputs(testCase) + fcn = @() arrow.tabular.RecordBatch.import(); + testCase.verifyError(fcn, "MATLAB:minrhs"); + end + + function ImportTooManyInputs(testCase) + fcn = @() arrow.tabular.RecordBatch.import("A", "B", "C"); + testCase.verifyError(fcn, "MATLAB:TooManyInputs"); + end + + function ImportErrorImportFailed(testCase) + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + % An arrow:c:import:ImportFailed error should be thrown + % if the supplied arrow.c.Array and arrow.c.Schema were + % never populated previously from an exported Array. + fcn = @() arrow.tabular.RecordBatch.import(cArray, cSchema); + testCase.verifyError(fcn, "arrow:c:import:ImportFailed"); + end + + function ImportErrorInvalidSchema(testCase) + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + % An arrow:c:import:ImportFailed error should be thrown + % if the supplied arrow.c.Schema was not populated from a + % struct-like type (i.e. StructArray or RecordBatch). + a = arrow.array(1:3); + a.export(cArray.Address, cSchema.Address); + fcn = @() arrow.tabular.RecordBatch.import(cArray, cSchema); + testCase.verifyError(fcn, "arrow:c:import:ImportFailed"); + end + + function ImportFromStructArray(testCase) + % Verify a StructArray exported via the C Data Interface format + % can be imported as a RecordBatch. + field1 = arrow.array(1:3); + + field2 = arrow.array(["A" "B" "C"]); + structArray = arrow.array.StructArray.fromArrays(field1, field2, ... + FieldNames=["Number" "Text"]); + + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + structArray.export(cArray.Address, cSchema.Address) + rb = arrow.tabular.RecordBatch.import(cArray, cSchema); + + expected = arrow.tabular.RecordBatch.fromArrays(field1, field2, ... + ColumnNames=["Number" "Text"]); + + testCase.verifyEqual(rb, expected); + end + + function ExportToStructArray(testCase) + % Verify a RecordBatch exported via the C Data Interface + % format can be imported as a StructArray. + column1 = arrow.array(1:3); + column2 = arrow.array(["A" "B" "C"]); + rb = arrow.tabular.RecordBatch.fromArrays(column1, column2, ... + ColumnNames=["Number" "Text"]); + + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + rb.export(cArray.Address, cSchema.Address) + structArray = arrow.array.Array.import(cArray, cSchema); + + expected = arrow.array.StructArray.fromArrays(column1, column2, ... + FieldNames=["Number" "Text"]); + + testCase.verifyEqual(structArray, expected); + end + + end + +end \ No newline at end of file diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake index 92e9f59145acc..0a747e648cd84 100644 --- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake +++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake @@ -78,7 +78,8 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/a "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/buffer/proxy/buffer.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/array.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/array_importer.cc" - "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/schema.cc") + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/schema.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/record_batch_importer.cc") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy") From e6e00e7f5fb03cf4397496e2f7a0e9dc4da5126d Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 29 May 2024 00:02:19 +0900 Subject: [PATCH 187/261] GH-41771: [C++] Iterator releases its resource immediately when it reads all values (#41824) ### Rationale for this change `Iterator` keeps its resource (`ptr_`) until it's deleted but we can release its resource immediately when it reads all values. If `Iterator` keeps its resource until it's deleted, it may block closing a file. See GH-41771 for this case. ### What changes are included in this PR? Releases `ptr_` when `Next()` returns the end. ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. * GitHub Issue: #41771 Authored-by: Sutou Kouhei Signed-off-by: Benjamin Kietzman --- cpp/src/arrow/util/iterator.h | 15 ++++++++-- cpp/src/arrow/util/iterator_test.cc | 43 +++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/util/iterator.h b/cpp/src/arrow/util/iterator.h index 4da8394a0299c..5025799b9a372 100644 --- a/cpp/src/arrow/util/iterator.h +++ b/cpp/src/arrow/util/iterator.h @@ -105,9 +105,18 @@ class Iterator : public util::EqualityComparable> { Iterator() : ptr_(NULLPTR, [](void*) {}) {} /// \brief Return the next element of the sequence, IterationTraits::End() when the - /// iteration is completed. Calling this on a default constructed Iterator - /// will result in undefined behavior. - Result Next() { return next_(ptr_.get()); } + /// iteration is completed. + Result Next() { + if (ptr_) { + auto next_result = next_(ptr_.get()); + if (next_result.ok() && IsIterationEnd(next_result.ValueUnsafe())) { + ptr_.reset(NULLPTR); + } + return next_result; + } else { + return IterationTraits::End(); + } + } /// Pass each element of the sequence to a visitor. Will return any error status /// returned by the visitor, terminating iteration. diff --git a/cpp/src/arrow/util/iterator_test.cc b/cpp/src/arrow/util/iterator_test.cc index ba21ddcced209..a247ba13aef73 100644 --- a/cpp/src/arrow/util/iterator_test.cc +++ b/cpp/src/arrow/util/iterator_test.cc @@ -146,6 +146,49 @@ void AssertIteratorNext(T expected, Iterator& it) { ASSERT_EQ(expected, actual); } +template +class DeleteDetectableIterator { + public: + explicit DeleteDetectableIterator(std::vector values, bool* deleted) + : values_(std::move(values)), i_(0), deleted_(deleted) {} + + DeleteDetectableIterator(DeleteDetectableIterator&& source) + : values_(std::move(source.values_)), i_(source.i_), deleted_(source.deleted_) { + source.deleted_ = nullptr; + } + + ~DeleteDetectableIterator() { + if (deleted_) { + *deleted_ = true; + } + } + + Result Next() { + if (i_ == values_.size()) { + return IterationTraits::End(); + } + return std::move(values_[i_++]); + } + + private: + std::vector values_; + size_t i_; + bool* deleted_; +}; + +// Generic iterator tests + +TEST(TestIterator, DeleteOnEnd) { + bool deleted = false; + Iterator it(DeleteDetectableIterator({1}, &deleted)); + ASSERT_FALSE(deleted); + AssertIteratorNext({1}, it); + ASSERT_FALSE(deleted); + ASSERT_OK_AND_ASSIGN(auto value, it.Next()); + ASSERT_TRUE(IsIterationEnd(value)); + ASSERT_TRUE(deleted); +} + // -------------------------------------------------------------------- // Synchronous iterator tests From 8f3bf67cca32902e241b1857502247918861a3f8 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Tue, 28 May 2024 17:26:09 -0500 Subject: [PATCH 188/261] GH-41841: [R][CI] Remove more defunct rhub containers (#41828) Testing CI to see if we can replicate the incoming NOTEs: ``` Found the following (possibly) invalid file URIs: URI: articles/read_write.html From: README.md URI: articles/data_wrangling.html From: README.md URI: reference/acero.html From: README.md URI: articles/install.html From: README.md URI: articles/install_nightly.html From: README.md ``` I wasn't able to replicate them in CI (even with `_R_CHECK_CRAN_INCOMING_REMOTE_` set to true, and installing pandoc so that the docs could be munged.) But in the process realized we were running old rhub images that aren't updated anymore (thanks, @ thisisnic). Also did a bit of cleanup of `--run-donttest` which is now no longer needed (was removed in favor of the env var in 4.0) * GitHub Issue: #41841 Authored-by: Jonathan Keane Signed-off-by: Jonathan Keane --- .github/workflows/r.yml | 5 ++- ci/scripts/r_install_system_dependencies.sh | 43 +++++++++++---------- ci/scripts/r_test.sh | 9 ++--- dev/tasks/r/github.linux.cran.yml | 9 ++--- r/Makefile | 4 +- 5 files changed, 35 insertions(+), 35 deletions(-) diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml index aba77347659cd..6bd940f806775 100644 --- a/.github/workflows/r.yml +++ b/.github/workflows/r.yml @@ -370,11 +370,12 @@ jobs: MAKEFLAGS = paste0("-j", parallel::detectCores()), ARROW_R_DEV = TRUE, "_R_CHECK_FORCE_SUGGESTS_" = FALSE, - "_R_CHECK_STOP_ON_INVALID_NUMERIC_VERSION_INPUTS_" = TRUE + "_R_CHECK_STOP_ON_INVALID_NUMERIC_VERSION_INPUTS_" = TRUE, + "_R_CHECK_DONTTEST_EXAMPLES_" = TRUE ) rcmdcheck::rcmdcheck(".", build_args = '--no-build-vignettes', - args = c('--no-manual', '--as-cran', '--ignore-vignettes', '--run-donttest'), + args = c('--no-manual', '--as-cran', '--ignore-vignettes'), error_on = 'warning', check_dir = 'check', timeout = 3600 diff --git a/ci/scripts/r_install_system_dependencies.sh b/ci/scripts/r_install_system_dependencies.sh index be0d75ef235e6..7ddc2604f661a 100755 --- a/ci/scripts/r_install_system_dependencies.sh +++ b/ci/scripts/r_install_system_dependencies.sh @@ -21,29 +21,30 @@ set -ex : ${ARROW_SOURCE_HOME:=/arrow} -if [ "$ARROW_S3" == "ON" ] || [ "$ARROW_GCS" == "ON" ] || [ "$ARROW_R_DEV" == "TRUE" ]; then - # Figure out what package manager we have - if [ "`which dnf`" ]; then - PACKAGE_MANAGER=dnf - elif [ "`which yum`" ]; then - PACKAGE_MANAGER=yum - elif [ "`which zypper`" ]; then - PACKAGE_MANAGER=zypper - else - PACKAGE_MANAGER=apt-get - apt-get update - fi +# Figure out what package manager we have +if [ "`which dnf`" ]; then + PACKAGE_MANAGER=dnf +elif [ "`which yum`" ]; then + PACKAGE_MANAGER=yum +elif [ "`which zypper`" ]; then + PACKAGE_MANAGER=zypper +else + PACKAGE_MANAGER=apt-get + apt-get update +fi - # Install curl and OpenSSL for S3/GCS support - case "$PACKAGE_MANAGER" in - apt-get) - apt-get install -y libcurl4-openssl-dev libssl-dev - ;; - *) - $PACKAGE_MANAGER install -y libcurl-devel openssl-devel - ;; - esac +# Install curl and OpenSSL (technically, only needed for S3/GCS support, but +# installing the R curl package fails without it) +case "$PACKAGE_MANAGER" in + apt-get) + apt-get install -y libcurl4-openssl-dev libssl-dev + ;; + *) + $PACKAGE_MANAGER install -y libcurl-devel openssl-devel + ;; +esac +if [ "$ARROW_S3" == "ON" ] || [ "$ARROW_GCS" == "ON" ] || [ "$ARROW_R_DEV" == "TRUE" ]; then # The Dockerfile should have put this file here if [ "$ARROW_S3" == "ON" ] && [ -f "${ARROW_SOURCE_HOME}/ci/scripts/install_minio.sh" ] && [ "`which wget`" ]; then "${ARROW_SOURCE_HOME}/ci/scripts/install_minio.sh" latest /usr/local diff --git a/ci/scripts/r_test.sh b/ci/scripts/r_test.sh index e13da45e2d296..fe9d18edb8cbb 100755 --- a/ci/scripts/r_test.sh +++ b/ci/scripts/r_test.sh @@ -110,16 +110,15 @@ SCRIPT="as_cran <- !identical(tolower(Sys.getenv('NOT_CRAN')), 'true') on.exit(tools::pskill(pid_flight), add = TRUE) } - run_donttest <- identical(tolower(Sys.getenv('_R_CHECK_DONTTEST_EXAMPLES_', 'true')), 'true') - if (run_donttest) { - args <- c(args, '--run-donttest') - } - install_args <- Sys.getenv('INSTALL_ARGS') if (nzchar(install_args)) { args <- c(args, paste0('--install-args=\"', install_args, '\"')) } + message('Running rcmdcheck with:\n') + print(build_args) + print(args) + rcmdcheck::rcmdcheck(build_args = build_args, args = args, error_on = 'warning', check_dir = 'check', timeout = 3600)" echo "$SCRIPT" | ${R_BIN} --no-save diff --git a/dev/tasks/r/github.linux.cran.yml b/dev/tasks/r/github.linux.cran.yml index 0aeb7cfa2b434..34cb4b9446a0b 100644 --- a/dev/tasks/r/github.linux.cran.yml +++ b/dev/tasks/r/github.linux.cran.yml @@ -28,11 +28,10 @@ jobs: matrix: # See https://hub.docker.com/r/rhub r_image: - - debian-gcc-devel - - debian-gcc-patched - - debian-gcc-release - - fedora-gcc-devel - - fedora-clang-devel + - ubuntu-gcc12 # ~ r-devel-linux-x86_64-debian-gcc + - ubuntu-clang # ~ r-devel-linux-x86_64-debian-clang + - ubuntu-next # ~ r-patched-linux-x86_64 + - ubuntu-release # ~ r-release-linux-x86_64 env: R_ORG: "rhub" R_IMAGE: {{ MATRIX }} diff --git a/r/Makefile b/r/Makefile index c3267e8cfe45b..785e9e1214d4f 100644 --- a/r/Makefile +++ b/r/Makefile @@ -52,11 +52,11 @@ build: doc sync-cpp R CMD build ${args} . check: build - -export _R_CHECK_CRAN_INCOMING_REMOTE_=FALSE && export ARROW_R_DEV=$(ARROW_R_DEV) && export _R_CHECK_TESTS_NLINES_=0 && R CMD check --as-cran --run-donttest arrow_$(VERSION).tar.gz + -export _R_CHECK_CRAN_INCOMING_REMOTE_=FALSE && export ARROW_R_DEV=$(ARROW_R_DEV) && export _R_CHECK_TESTS_NLINES_=0 && R CMD check --as-cran arrow_$(VERSION).tar.gz rm -rf arrow.Rcheck/ release: build - -export _R_CHECK_TESTS_NLINES_=0 && R CMD check --as-cran --run-donttest arrow_$(VERSION).tar.gz + -export _R_CHECK_TESTS_NLINES_=0 && R CMD check --as-cran arrow_$(VERSION).tar.gz rm -rf arrow.Rcheck/ clean: From 235608beb68332d8e00f70afced0d2a7a52b2d98 Mon Sep 17 00:00:00 2001 From: Matt Topol Date: Tue, 28 May 2024 19:23:47 -0400 Subject: [PATCH 189/261] MINOR: [C++] Slight improvement for ArrayData device_type (#41814) Responding to feedback on #40807: This condition implies that, conversely, in non-debug mode we could immediately return when we encounter a buffer? Instead of continue looping on all buffers and children... _Originally posted in https://github.com/apache/arrow/pull/40807#discussion_r1611641179_ Authored-by: Matt Topol Signed-off-by: Matt Topol --- cpp/src/arrow/array/data.cc | 12 ++++++++++++ cpp/src/arrow/array/data.h | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc index 76a43521394c1..83eeb56c496cf 100644 --- a/cpp/src/arrow/array/data.cc +++ b/cpp/src/arrow/array/data.cc @@ -233,28 +233,40 @@ DeviceAllocationType ArrayData::device_type() const { int type = 0; for (const auto& buf : buffers) { if (!buf) continue; +#ifdef NDEBUG + return buf->device_type(); +#else if (type == 0) { type = static_cast(buf->device_type()); } else { DCHECK_EQ(type, static_cast(buf->device_type())); } +#endif } for (const auto& child : child_data) { if (!child) continue; +#ifdef NDEBUG + return child->device_type(); +#else if (type == 0) { type = static_cast(child->device_type()); } else { DCHECK_EQ(type, static_cast(child->device_type())); } +#endif } if (dictionary) { +#ifdef NDEBUG + return dictionary->device_type(); +#else if (type == 0) { type = static_cast(dictionary->device_type()); } else { DCHECK_EQ(type, static_cast(dictionary->device_type())); } +#endif } return type == 0 ? DeviceAllocationType::kCPU : static_cast(type); diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h index 0c49f36229a40..e0508fe6980a7 100644 --- a/cpp/src/arrow/array/data.h +++ b/cpp/src/arrow/array/data.h @@ -369,7 +369,7 @@ struct ARROW_EXPORT ArrayData { /// \see GetNullCount int64_t ComputeLogicalNullCount() const; - /// \brief Returns the device_type of the underlying buffers and children + /// \brief Return the device_type of the underlying buffers and children /// /// If there are no buffers in this ArrayData object, it just returns /// DeviceAllocationType::kCPU as a default. We also assume that all buffers From 0b5c53ba0f37b0687ff64b78cbf4f71a11bfdbec Mon Sep 17 00:00:00 2001 From: Laurent Goujon Date: Tue, 28 May 2024 16:41:13 -0700 Subject: [PATCH 190/261] MINOR: [Java] Fix develocity cache directory name in .gitignore (#41866) ### Rationale for this change `.gitignore` is not referencing correctly the develocity cache directory ### What changes are included in this PR? Changing from `.mvn/.develocity.xml` to `.mvn/.develocity/` ### Are these changes tested? No (Checking local git output) ### Are there any user-facing changes? No Authored-by: Laurent Goujon Signed-off-by: David Li --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index e2e84fee57e3c..a482f5503c2b9 100644 --- a/.gitignore +++ b/.gitignore @@ -102,4 +102,4 @@ __debug_bin .envrc # Develocity -.mvn/.develocity.xml +.mvn/.develocity/ From 13630c7a8316bccce4a119d193e3a46f5a9a35bc Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 29 May 2024 08:44:43 +0900 Subject: [PATCH 191/261] MINOR: [Java] Bump org.apache.commons:commons-compress from 1.26.0 to 1.26.2 in /java (#41853) Bumps org.apache.commons:commons-compress from 1.26.0 to 1.26.2. [![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.commons:commons-compress&package-manager=maven&previous-version=1.26.0&new-version=1.26.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: David Li --- java/compression/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/compression/pom.xml b/java/compression/pom.xml index 26467dbaf2db3..6ed0be6815ca3 100644 --- a/java/compression/pom.xml +++ b/java/compression/pom.xml @@ -42,7 +42,7 @@ org.apache.commons commons-compress - 1.26.0 + 1.26.2 com.github.luben From 4d524eb40161579c96d1ac11d8e296ab3507f889 Mon Sep 17 00:00:00 2001 From: abandy Date: Tue, 28 May 2024 22:58:22 -0400 Subject: [PATCH 192/261] GH-37938: [Swift] Add initial C data interface implementation (#41342) Continuation for PR: #39091 This add an initial implementation of the C Data interface for swift. During development it was found that null count was not being properly maintained on the arrow buffers and this change is included as well. Also some minor refactoring was done to existing sources to enable this feature. This has been tested from Swift calling into C to import data but not from Swift to C exporting data. Test is currently ongoing. * GitHub Issue: #37938 Authored-by: Alva Bandy Signed-off-by: Sutou Kouhei --- ci/docker/ubuntu-swift.dockerfile | 2 +- dev/release/rat_exclude_files.txt | 1 + swift/.swiftlint.yml | 4 + swift/Arrow/Package.swift | 22 ++- swift/Arrow/Sources/Arrow/ArrowArray.swift | 39 ++-- swift/Arrow/Sources/Arrow/ArrowBuffer.swift | 17 +- .../Arrow/Sources/Arrow/ArrowCExporter.swift | 135 +++++++++++++ .../Arrow/Sources/Arrow/ArrowCImporter.swift | 179 ++++++++++++++++++ .../Sources/Arrow/ArrowReaderHelper.swift | 16 +- swift/Arrow/Sources/Arrow/ArrowSchema.swift | 6 +- swift/Arrow/Sources/Arrow/ArrowTable.swift | 18 +- swift/Arrow/Sources/Arrow/ArrowType.swift | 116 ++++++++++++ swift/Arrow/Sources/Arrow/ChunkedArray.swift | 5 + swift/Arrow/Sources/ArrowC/ArrowCData.c | 30 +++ .../Arrow/Sources/ArrowC/include/ArrowCData.h | 81 ++++++++ swift/Arrow/Tests/ArrowTests/CDataTests.swift | 125 ++++++++++++ swift/Arrow/Tests/ArrowTests/IPCTests.swift | 16 +- .../Tests/ArrowTests/RecordBatchTests.swift | 4 +- swift/Arrow/Tests/ArrowTests/TableTests.swift | 4 +- swift/ArrowFlight/Package.swift | 4 +- .../Tests/ArrowFlightTests/FlightTest.swift | 6 +- swift/CDataWGo/.gitignore | 8 + swift/CDataWGo/Package.swift | 43 +++++ .../CDataWGo/Sources/go-swift/CDataTest.swift | 132 +++++++++++++ swift/CDataWGo/go.mod | 41 ++++ swift/CDataWGo/go.sum | 75 ++++++++ swift/CDataWGo/include/go_swift.h | 30 +++ swift/CDataWGo/main.go | 127 +++++++++++++ 28 files changed, 1231 insertions(+), 55 deletions(-) create mode 100644 swift/Arrow/Sources/Arrow/ArrowCExporter.swift create mode 100644 swift/Arrow/Sources/Arrow/ArrowCImporter.swift create mode 100644 swift/Arrow/Sources/ArrowC/ArrowCData.c create mode 100644 swift/Arrow/Sources/ArrowC/include/ArrowCData.h create mode 100644 swift/Arrow/Tests/ArrowTests/CDataTests.swift create mode 100644 swift/CDataWGo/.gitignore create mode 100644 swift/CDataWGo/Package.swift create mode 100644 swift/CDataWGo/Sources/go-swift/CDataTest.swift create mode 100644 swift/CDataWGo/go.mod create mode 100644 swift/CDataWGo/go.sum create mode 100644 swift/CDataWGo/include/go_swift.h create mode 100644 swift/CDataWGo/main.go diff --git a/ci/docker/ubuntu-swift.dockerfile b/ci/docker/ubuntu-swift.dockerfile index 4789c9188c226..26950b806d1bc 100644 --- a/ci/docker/ubuntu-swift.dockerfile +++ b/ci/docker/ubuntu-swift.dockerfile @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -FROM swift:5.7.3 +FROM swift:5.9.0 # Go is needed for generating test data RUN apt-get update -y -q && \ diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index f4d7b411c4dc2..0cc1348f50b95 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -150,3 +150,4 @@ r/tools/nixlibs-allowlist.txt ruby/red-arrow/.yardopts .github/pull_request_template.md swift/data-generator/swift-datagen/go.sum +swift/CDataWGo/go.sum diff --git a/swift/.swiftlint.yml b/swift/.swiftlint.yml index d447bf9d5d97c..7e4da29f3741c 100644 --- a/swift/.swiftlint.yml +++ b/swift/.swiftlint.yml @@ -16,10 +16,14 @@ # under the License. included: + - Arrow/Package.swift - Arrow/Sources - Arrow/Tests + - ArrowFlight/Package.swift - ArrowFlight/Sources - ArrowFlight/Tests + - CDataWGo/Package.swift + - CDataWGo/Sources/go-swift excluded: - Arrow/Sources/Arrow/File_generated.swift - Arrow/Sources/Arrow/Message_generated.swift diff --git a/swift/Arrow/Package.swift b/swift/Arrow/Package.swift index 946eb999c798a..6f19136fd4292 100644 --- a/swift/Arrow/Package.swift +++ b/swift/Arrow/Package.swift @@ -26,28 +26,34 @@ let package = Package( .macOS(.v10_14) ], products: [ - // Products define the executables and libraries a package produces, and make them visible to other packages. .library( name: "Arrow", - targets: ["Arrow"]), + targets: ["Arrow"]) ], dependencies: [ // The latest version of flatbuffers v23.5.26 was built in May 26, 2023 // and therefore doesn't include the unaligned buffer swift changes. // This can be changed back to using the tag once a new version of // flatbuffers has been released. - .package(url: "https://github.com/google/flatbuffers.git", branch: "master") + .package(url: "https://github.com/google/flatbuffers.git", branch: "master"), + .package( + url: "https://github.com/apple/swift-atomics.git", + .upToNextMajor(from: "1.2.0") // or `.upToNextMinor + ) ], targets: [ - // Targets are the basic building blocks of a package. A target can define a module or a test suite. - // Targets can depend on other targets in this package, and on products in packages this package depends on. + .target( + name: "ArrowC", + path: "Sources/ArrowC" + ), .target( name: "Arrow", - dependencies: [ - .product(name: "FlatBuffers", package: "flatbuffers") + dependencies: ["ArrowC", + .product(name: "FlatBuffers", package: "flatbuffers"), + .product(name: "Atomics", package: "swift-atomics") ]), .testTarget( name: "ArrowTests", - dependencies: ["Arrow"]), + dependencies: ["Arrow", "ArrowC"]) ] ) diff --git a/swift/Arrow/Sources/Arrow/ArrowArray.swift b/swift/Arrow/Sources/Arrow/ArrowArray.swift index 88b43e63a92b7..32b6ba1704511 100644 --- a/swift/Arrow/Sources/Arrow/ArrowArray.swift +++ b/swift/Arrow/Sources/Arrow/ArrowArray.swift @@ -17,16 +17,29 @@ import Foundation -public class ArrowArrayHolder { +public protocol ArrowArrayHolder { + var type: ArrowType {get} + var length: UInt {get} + var nullCount: UInt {get} + var array: Any {get} + var data: ArrowData {get} + var getBufferData: () -> [Data] {get} + var getBufferDataSizes: () -> [Int] {get} + var getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> ArrowColumn {get} +} + +public class ArrowArrayHolderImpl: ArrowArrayHolder { + public let array: Any + public let data: ArrowData public let type: ArrowType public let length: UInt public let nullCount: UInt - public let array: Any public let getBufferData: () -> [Data] public let getBufferDataSizes: () -> [Int] - private let getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> ArrowColumn + public let getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> ArrowColumn public init(_ arrowArray: ArrowArray) { self.array = arrowArray + self.data = arrowArray.arrowData self.length = arrowArray.length self.type = arrowArray.arrowData.type self.nullCount = arrowArray.nullCount @@ -60,19 +73,9 @@ public class ArrowArrayHolder { return ArrowColumn(field, chunked: ChunkedArrayHolder(try ChunkedArray(arrays))) } } - - public static func makeArrowColumn(_ field: ArrowField, - holders: [ArrowArrayHolder] - ) -> Result { - do { - return .success(try holders[0].getArrowColumn(field, holders)) - } catch { - return .failure(.runtimeError("\(error)")) - } - } } -public class ArrowArray: AsString { +public class ArrowArray: AsString, AnyArray { public typealias ItemType = T public let arrowData: ArrowData public var nullCount: UInt {return self.arrowData.nullCount} @@ -101,6 +104,14 @@ public class ArrowArray: AsString { return "\(self[index]!)" } + + public func asAny(_ index: UInt) -> Any? { + if self[index] == nil { + return nil + } + + return self[index]! + } } public class FixedArray: ArrowArray { diff --git a/swift/Arrow/Sources/Arrow/ArrowBuffer.swift b/swift/Arrow/Sources/Arrow/ArrowBuffer.swift index 4ac4eb93c91db..1ff53cd7dd5d9 100644 --- a/swift/Arrow/Sources/Arrow/ArrowBuffer.swift +++ b/swift/Arrow/Sources/Arrow/ArrowBuffer.swift @@ -22,16 +22,20 @@ public class ArrowBuffer { static let maxLength = UInt.max fileprivate(set) var length: UInt let capacity: UInt - let rawPointer: UnsafeMutableRawPointer + public let rawPointer: UnsafeMutableRawPointer + let isMemoryOwner: Bool - init(length: UInt, capacity: UInt, rawPointer: UnsafeMutableRawPointer) { + init(length: UInt, capacity: UInt, rawPointer: UnsafeMutableRawPointer, isMemoryOwner: Bool = true) { self.length = length self.capacity = capacity self.rawPointer = rawPointer + self.isMemoryOwner = isMemoryOwner } deinit { - self.rawPointer.deallocate() + if isMemoryOwner { + self.rawPointer.deallocate() + } } func append(to data: inout Data) { @@ -39,6 +43,13 @@ public class ArrowBuffer { data.append(ptr, count: Int(capacity)) } + static func createEmptyBuffer() -> ArrowBuffer { + return ArrowBuffer( + length: 0, + capacity: 0, + rawPointer: UnsafeMutableRawPointer.allocate(byteCount: 0, alignment: .zero)) + } + static func createBuffer(_ data: [UInt8], length: UInt) -> ArrowBuffer { let byteCount = UInt(data.count) let capacity = alignTo64(byteCount) diff --git a/swift/Arrow/Sources/Arrow/ArrowCExporter.swift b/swift/Arrow/Sources/Arrow/ArrowCExporter.swift new file mode 100644 index 0000000000000..aa93f0cb7e389 --- /dev/null +++ b/swift/Arrow/Sources/Arrow/ArrowCExporter.swift @@ -0,0 +1,135 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import Foundation +import ArrowC +import Atomics + +// The memory used by UnsafeAtomic is not automatically +// reclaimed. Since this value is initialized once +// and used until the program/app is closed it's +// memory will be released on program/app exit +let exportDataCounter: UnsafeAtomic = .create(0) + +public class ArrowCExporter { + private class ExportData { + let id: Int + init() { + id = exportDataCounter.loadThenWrappingIncrement(ordering: .relaxed) + ArrowCExporter.exportedData[id] = self + } + } + + private class ExportSchema: ExportData { + public let arrowTypeNameCstr: UnsafePointer + public let nameCstr: UnsafePointer + private let arrowType: ArrowType + private let name: String + private let arrowTypeName: String + init(_ arrowType: ArrowType, name: String = "") throws { + self.arrowType = arrowType + // keeping the name str to ensure the cstring buffer remains valid + self.name = name + self.arrowTypeName = try arrowType.cDataFormatId + self.nameCstr = (self.name as NSString).utf8String! + self.arrowTypeNameCstr = (self.arrowTypeName as NSString).utf8String! + super.init() + } + } + + private class ExportArray: ExportData { + private let arrowData: ArrowData + private(set) var data = [UnsafeRawPointer?]() + private(set) var buffers: UnsafeMutablePointer + init(_ arrowData: ArrowData) { + // keep a reference to the ArrowData + // obj so the memory doesn't get + // deallocated + self.arrowData = arrowData + for arrowBuffer in arrowData.buffers { + data.append(arrowBuffer.rawPointer) + } + + self.buffers = UnsafeMutablePointer(mutating: data) + super.init() + } + } + + private static var exportedData = [Int: ExportData]() + public init() {} + + public func exportType(_ cSchema: inout ArrowC.ArrowSchema, arrowType: ArrowType, name: String = "") -> + Result { + do { + let exportSchema = try ExportSchema(arrowType, name: name) + cSchema.format = exportSchema.arrowTypeNameCstr + cSchema.name = exportSchema.nameCstr + cSchema.private_data = + UnsafeMutableRawPointer(mutating: UnsafeRawPointer(bitPattern: exportSchema.id)) + cSchema.release = {(data: UnsafeMutablePointer?) in + let arraySchema = data!.pointee + let exportId = Int(bitPattern: arraySchema.private_data) + guard ArrowCExporter.exportedData[exportId] != nil else { + fatalError("Export schema not found with id \(exportId)") + } + + // the data associated with this exportSchema object + // which includes the C strings for the format and name + // be deallocated upon removal + ArrowCExporter.exportedData.removeValue(forKey: exportId) + ArrowC.ArrowSwiftClearReleaseSchema(data) + } + } catch { + return .failure(.unknownError("\(error)")) + } + return .success(true) + } + + public func exportField(_ schema: inout ArrowC.ArrowSchema, field: ArrowField) -> + Result { + return exportType(&schema, arrowType: field.type, name: field.name) + } + + public func exportArray(_ cArray: inout ArrowC.ArrowArray, arrowData: ArrowData) { + let exportArray = ExportArray(arrowData) + cArray.buffers = exportArray.buffers + cArray.length = Int64(arrowData.length) + cArray.null_count = Int64(arrowData.nullCount) + cArray.n_buffers = Int64(arrowData.buffers.count) + // Swift Arrow does not currently support children or dictionaries + // This will need to be updated once support has been added + cArray.n_children = 0 + cArray.children = nil + cArray.dictionary = nil + cArray.private_data = + UnsafeMutableRawPointer(mutating: UnsafeRawPointer(bitPattern: exportArray.id)) + cArray.release = {(data: UnsafeMutablePointer?) in + let arrayData = data!.pointee + let exportId = Int(bitPattern: arrayData.private_data) + guard ArrowCExporter.exportedData[exportId] != nil else { + fatalError("Export data not found with id \(exportId)") + } + + // the data associated with this exportArray object + // which includes the entire arrowData object + // and the buffers UnsafeMutablePointer[] will + // be deallocated upon removal + ArrowCExporter.exportedData.removeValue(forKey: exportId) + ArrowC.ArrowSwiftClearReleaseArray(data) + } + } +} diff --git a/swift/Arrow/Sources/Arrow/ArrowCImporter.swift b/swift/Arrow/Sources/Arrow/ArrowCImporter.swift new file mode 100644 index 0000000000000..8a4cf47fc0b43 --- /dev/null +++ b/swift/Arrow/Sources/Arrow/ArrowCImporter.swift @@ -0,0 +1,179 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import Foundation +import ArrowC + +public class ImportArrayHolder: ArrowArrayHolder { + let cArrayPtr: UnsafePointer + public var type: ArrowType {self.holder.type} + public var length: UInt {self.holder.length} + public var nullCount: UInt {self.holder.nullCount} + public var array: Any {self.holder.array} + public var data: ArrowData {self.holder.data} + public var getBufferData: () -> [Data] {self.holder.getBufferData} + public var getBufferDataSizes: () -> [Int] {self.holder.getBufferDataSizes} + public var getArrowColumn: (ArrowField, [ArrowArrayHolder]) throws -> ArrowColumn {self.holder.getArrowColumn} + private let holder: ArrowArrayHolder + init(_ holder: ArrowArrayHolder, cArrayPtr: UnsafePointer) { + self.cArrayPtr = cArrayPtr + self.holder = holder + } + + deinit { + if self.cArrayPtr.pointee.release != nil { + ArrowCImporter.release(self.cArrayPtr) + } + } +} + +public class ArrowCImporter { + private func appendToBuffer( + _ cBuffer: UnsafeRawPointer?, + arrowBuffers: inout [ArrowBuffer], + length: UInt) { + if cBuffer == nil { + arrowBuffers.append(ArrowBuffer.createEmptyBuffer()) + return + } + + let pointer = UnsafeMutableRawPointer(mutating: cBuffer)! + arrowBuffers.append( + ArrowBuffer(length: length, capacity: length, rawPointer: pointer, isMemoryOwner: false)) + } + + public init() {} + + public func importType(_ cArrow: String, name: String = "") -> + Result { + do { + let type = try ArrowType.fromCDataFormatId(cArrow) + return .success(ArrowField(name, type: ArrowType(type.info), isNullable: true)) + } catch { + return .failure(.invalid("Error occurred while attempting to import type: \(error)")) + } + } + + public func importField(_ cSchema: ArrowC.ArrowSchema) -> + Result { + if cSchema.n_children > 0 { + ArrowCImporter.release(cSchema) + return .failure(.invalid("Children currently not supported")) + } else if cSchema.dictionary != nil { + ArrowCImporter.release(cSchema) + return .failure(.invalid("Dictinoary types currently not supported")) + } + + switch importType( + String(cString: cSchema.format), name: String(cString: cSchema.name)) { + case .success(let field): + ArrowCImporter.release(cSchema) + return .success(field) + case .failure(let err): + ArrowCImporter.release(cSchema) + return .failure(err) + } + } + + public func importArray( + _ cArray: UnsafePointer, + arrowType: ArrowType, + isNullable: Bool = false + ) -> Result { + let arrowField = ArrowField("", type: arrowType, isNullable: isNullable) + return importArray(cArray, arrowField: arrowField) + } + + public func importArray( // swiftlint:disable:this cyclomatic_complexity function_body_length + _ cArrayPtr: UnsafePointer, + arrowField: ArrowField + ) -> Result { + let cArray = cArrayPtr.pointee + if cArray.null_count < 0 { + ArrowCImporter.release(cArrayPtr) + return .failure(.invalid("Uncomputed null count is not supported")) + } else if cArray.n_children > 0 { + ArrowCImporter.release(cArrayPtr) + return .failure(.invalid("Children currently not supported")) + } else if cArray.dictionary != nil { + ArrowCImporter.release(cArrayPtr) + return .failure(.invalid("Dictionary types currently not supported")) + } else if cArray.offset != 0 { + ArrowCImporter.release(cArrayPtr) + return .failure(.invalid("Offset of 0 is required but found offset: \(cArray.offset)")) + } + + let arrowType = arrowField.type + let length = UInt(cArray.length) + let nullCount = UInt(cArray.null_count) + var arrowBuffers = [ArrowBuffer]() + + if cArray.n_buffers > 0 { + if cArray.buffers == nil { + ArrowCImporter.release(cArrayPtr) + return .failure(.invalid("C array buffers is nil")) + } + + switch arrowType.info { + case .variableInfo: + if cArray.n_buffers != 3 { + ArrowCImporter.release(cArrayPtr) + return .failure( + .invalid("Variable buffer count expected 3 but found \(cArray.n_buffers)")) + } + + appendToBuffer(cArray.buffers[0], arrowBuffers: &arrowBuffers, length: UInt(ceil(Double(length) / 8))) + appendToBuffer(cArray.buffers[1], arrowBuffers: &arrowBuffers, length: length) + let lastOffsetLength = cArray.buffers[1]! + .advanced(by: Int(length) * MemoryLayout.stride) + .load(as: Int32.self) + appendToBuffer(cArray.buffers[2], arrowBuffers: &arrowBuffers, length: UInt(lastOffsetLength)) + default: + if cArray.n_buffers != 2 { + ArrowCImporter.release(cArrayPtr) + return .failure(.invalid("Expected buffer count 2 but found \(cArray.n_buffers)")) + } + + appendToBuffer(cArray.buffers[0], arrowBuffers: &arrowBuffers, length: UInt(ceil(Double(length) / 8))) + appendToBuffer(cArray.buffers[1], arrowBuffers: &arrowBuffers, length: length) + } + } + + switch makeArrayHolder(arrowField, buffers: arrowBuffers, nullCount: nullCount) { + case .success(let holder): + return .success(ImportArrayHolder(holder, cArrayPtr: cArrayPtr)) + case .failure(let err): + ArrowCImporter.release(cArrayPtr) + return .failure(err) + } + } + + public static func release(_ cArrayPtr: UnsafePointer) { + if cArrayPtr.pointee.release != nil { + let cSchemaMutablePtr = UnsafeMutablePointer(mutating: cArrayPtr) + cArrayPtr.pointee.release(cSchemaMutablePtr) + } + } + + public static func release(_ cSchema: ArrowC.ArrowSchema) { + if cSchema.release != nil { + let cSchemaPtr = UnsafeMutablePointer.allocate(capacity: 1) + cSchemaPtr.initialize(to: cSchema) + cSchema.release(cSchemaPtr) + } + } +} diff --git a/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift b/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift index fb4a13b766f10..c701653ecb2c9 100644 --- a/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift +++ b/swift/Arrow/Sources/Arrow/ArrowReaderHelper.swift @@ -23,7 +23,7 @@ private func makeBinaryHolder(_ buffers: [ArrowBuffer], do { let arrowType = ArrowType(ArrowType.ArrowBinary) let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolder(BinaryArray(arrowData))) + return .success(ArrowArrayHolderImpl(BinaryArray(arrowData))) } catch let error as ArrowError { return .failure(error) } catch { @@ -36,7 +36,7 @@ private func makeStringHolder(_ buffers: [ArrowBuffer], do { let arrowType = ArrowType(ArrowType.ArrowString) let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolder(StringArray(arrowData))) + return .success(ArrowArrayHolderImpl(StringArray(arrowData))) } catch let error as ArrowError { return .failure(error) } catch { @@ -51,11 +51,11 @@ private func makeDateHolder(_ field: ArrowField, do { if field.type.id == .date32 { let arrowData = try ArrowData(field.type, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolder(Date32Array(arrowData))) + return .success(ArrowArrayHolderImpl(Date32Array(arrowData))) } let arrowData = try ArrowData(field.type, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolder(Date64Array(arrowData))) + return .success(ArrowArrayHolderImpl(Date64Array(arrowData))) } catch let error as ArrowError { return .failure(error) } catch { @@ -71,7 +71,7 @@ private func makeTimeHolder(_ field: ArrowField, if field.type.id == .time32 { if let arrowType = field.type as? ArrowTypeTime32 { let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolder(FixedArray(arrowData))) + return .success(ArrowArrayHolderImpl(FixedArray(arrowData))) } else { return .failure(.invalid("Incorrect field type for time: \(field.type)")) } @@ -79,7 +79,7 @@ private func makeTimeHolder(_ field: ArrowField, if let arrowType = field.type as? ArrowTypeTime64 { let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolder(FixedArray(arrowData))) + return .success(ArrowArrayHolderImpl(FixedArray(arrowData))) } else { return .failure(.invalid("Incorrect field type for time: \(field.type)")) } @@ -95,7 +95,7 @@ private func makeBoolHolder(_ buffers: [ArrowBuffer], do { let arrowType = ArrowType(ArrowType.ArrowBool) let arrowData = try ArrowData(arrowType, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolder(BoolArray(arrowData))) + return .success(ArrowArrayHolderImpl(BoolArray(arrowData))) } catch let error as ArrowError { return .failure(error) } catch { @@ -109,7 +109,7 @@ private func makeFixedHolder( ) -> Result { do { let arrowData = try ArrowData(field.type, buffers: buffers, nullCount: nullCount) - return .success(ArrowArrayHolder(FixedArray(arrowData))) + return .success(ArrowArrayHolderImpl(FixedArray(arrowData))) } catch let error as ArrowError { return .failure(error) } catch { diff --git a/swift/Arrow/Sources/Arrow/ArrowSchema.swift b/swift/Arrow/Sources/Arrow/ArrowSchema.swift index 45f13a1551c3d..65c506d51cdd6 100644 --- a/swift/Arrow/Sources/Arrow/ArrowSchema.swift +++ b/swift/Arrow/Sources/Arrow/ArrowSchema.swift @@ -17,9 +17,9 @@ import Foundation public class ArrowField { - let type: ArrowType - let name: String - let isNullable: Bool + public let type: ArrowType + public let name: String + public let isNullable: Bool init(_ name: String, type: ArrowType, isNullable: Bool) { self.name = name diff --git a/swift/Arrow/Sources/Arrow/ArrowTable.swift b/swift/Arrow/Sources/Arrow/ArrowTable.swift index 7677fb4f33a19..b9d15154c4f94 100644 --- a/swift/Arrow/Sources/Arrow/ArrowTable.swift +++ b/swift/Arrow/Sources/Arrow/ArrowTable.swift @@ -64,7 +64,7 @@ public class ArrowTable { let builder = ArrowTable.Builder() for index in 0.. Result { + do { + return .success(try holders[0].getArrowColumn(field, holders)) + } catch { + return .failure(.runtimeError("\(error)")) + } + } + public class Builder { let schemaBuilder = ArrowSchema.Builder() var columns = [ArrowColumn]() @@ -172,6 +183,11 @@ public class RecordBatch { return (arrayHolder.array as! ArrowArray) // swiftlint:disable:this force_cast } + public func anyData(for columnIndex: Int) -> AnyArray { + let arrayHolder = column(columnIndex) + return (arrayHolder.array as! AnyArray) // swiftlint:disable:this force_cast + } + public func column(_ index: Int) -> ArrowArrayHolder { return self.columns[index] } diff --git a/swift/Arrow/Sources/Arrow/ArrowType.swift b/swift/Arrow/Sources/Arrow/ArrowType.swift index f5a869f7cdaff..e1ada4b9734ea 100644 --- a/swift/Arrow/Sources/Arrow/ArrowType.swift +++ b/swift/Arrow/Sources/Arrow/ArrowType.swift @@ -90,6 +90,17 @@ public class ArrowTypeTime32: ArrowType { self.unit = unit super.init(ArrowType.ArrowTime32) } + + public override var cDataFormatId: String { + get throws { + switch self.unit { + case .milliseconds: + return "ttm" + case .seconds: + return "tts" + } + } + } } public class ArrowTypeTime64: ArrowType { @@ -98,6 +109,17 @@ public class ArrowTypeTime64: ArrowType { self.unit = unit super.init(ArrowType.ArrowTime64) } + + public override var cDataFormatId: String { + get throws { + switch self.unit { + case .microseconds: + return "ttu" + case .nanoseconds: + return "ttn" + } + } + } } public class ArrowType { @@ -209,6 +231,100 @@ public class ArrowType { fatalError("Stride requested for unknown type: \(self)") } } + + public var cDataFormatId: String { + get throws { + switch self.id { + case ArrowTypeId.int8: + return "c" + case ArrowTypeId.int16: + return "s" + case ArrowTypeId.int32: + return "i" + case ArrowTypeId.int64: + return "l" + case ArrowTypeId.uint8: + return "C" + case ArrowTypeId.uint16: + return "S" + case ArrowTypeId.uint32: + return "I" + case ArrowTypeId.uint64: + return "L" + case ArrowTypeId.float: + return "f" + case ArrowTypeId.double: + return "g" + case ArrowTypeId.boolean: + return "b" + case ArrowTypeId.date32: + return "tdD" + case ArrowTypeId.date64: + return "tdm" + case ArrowTypeId.time32: + if let time32 = self as? ArrowTypeTime32 { + return try time32.cDataFormatId + } + return "tts" + case ArrowTypeId.time64: + if let time64 = self as? ArrowTypeTime64 { + return try time64.cDataFormatId + } + return "ttu" + case ArrowTypeId.binary: + return "z" + case ArrowTypeId.string: + return "u" + default: + throw ArrowError.notImplemented + } + } + } + + public static func fromCDataFormatId( // swiftlint:disable:this cyclomatic_complexity + _ from: String) throws -> ArrowType { + if from == "c" { + return ArrowType(ArrowType.ArrowInt8) + } else if from == "s" { + return ArrowType(ArrowType.ArrowInt16) + } else if from == "i" { + return ArrowType(ArrowType.ArrowInt32) + } else if from == "l" { + return ArrowType(ArrowType.ArrowInt64) + } else if from == "C" { + return ArrowType(ArrowType.ArrowUInt8) + } else if from == "S" { + return ArrowType(ArrowType.ArrowUInt16) + } else if from == "I" { + return ArrowType(ArrowType.ArrowUInt32) + } else if from == "L" { + return ArrowType(ArrowType.ArrowUInt64) + } else if from == "f" { + return ArrowType(ArrowType.ArrowFloat) + } else if from == "g" { + return ArrowType(ArrowType.ArrowDouble) + } else if from == "b" { + return ArrowType(ArrowType.ArrowBool) + } else if from == "tdD" { + return ArrowType(ArrowType.ArrowDate32) + } else if from == "tdm" { + return ArrowType(ArrowType.ArrowDate64) + } else if from == "tts" { + return ArrowTypeTime32(.seconds) + } else if from == "ttm" { + return ArrowTypeTime32(.milliseconds) + } else if from == "ttu" { + return ArrowTypeTime64(.microseconds) + } else if from == "ttn" { + return ArrowTypeTime64(.nanoseconds) + } else if from == "z" { + return ArrowType(ArrowType.ArrowBinary) + } else if from == "u" { + return ArrowType(ArrowType.ArrowString) + } + + throw ArrowError.notImplemented + } } extension ArrowType.Info: Equatable { diff --git a/swift/Arrow/Sources/Arrow/ChunkedArray.swift b/swift/Arrow/Sources/Arrow/ChunkedArray.swift index 3a06aa46550df..c5ccfe4aec0e6 100644 --- a/swift/Arrow/Sources/Arrow/ChunkedArray.swift +++ b/swift/Arrow/Sources/Arrow/ChunkedArray.swift @@ -17,6 +17,11 @@ import Foundation +public protocol AnyArray { + func asAny(_ index: UInt) -> Any? + var length: UInt {get} +} + public protocol AsString { func asString(_ index: UInt) -> String } diff --git a/swift/Arrow/Sources/ArrowC/ArrowCData.c b/swift/Arrow/Sources/ArrowC/ArrowCData.c new file mode 100644 index 0000000000000..ac366febdaed8 --- /dev/null +++ b/swift/Arrow/Sources/ArrowC/ArrowCData.c @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "include/ArrowCData.h" + +void ArrowSwiftClearReleaseSchema(struct ArrowSchema* arrowSchema) { + if(arrowSchema) { + arrowSchema->release = NULL; + } +} + +void ArrowSwiftClearReleaseArray(struct ArrowArray* arrowArray) { + if(arrowArray) { + arrowArray->release = NULL; + } +} diff --git a/swift/Arrow/Sources/ArrowC/include/ArrowCData.h b/swift/Arrow/Sources/ArrowC/include/ArrowCData.h new file mode 100644 index 0000000000000..9df8992114be3 --- /dev/null +++ b/swift/Arrow/Sources/ArrowC/include/ArrowCData.h @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#ifndef ARROW_C_DATA_INTERFACE +#define ARROW_C_DATA_INTERFACE + +#define ARROW_FLAG_DICTIONARY_ORDERED 1 +#define ARROW_FLAG_NULLABLE 2 +#define ARROW_FLAG_MAP_KEYS_SORTED 4 + +#include +#include +#include // Must have this! + + +#ifdef __cplusplus +extern "C" { +#endif + +struct ArrowSchema { + // Array type description + const char* format; + const char* name; + const char* metadata; + int64_t flags; + int64_t n_children; + struct ArrowSchema** children; + struct ArrowSchema* dictionary; + + // Release callback + void (*release)(struct ArrowSchema*); + // Opaque producer-specific data + void* private_data; +}; + +struct ArrowArray { + // Array data description + int64_t length; + int64_t null_count; + int64_t offset; + int64_t n_buffers; + int64_t n_children; + const void** buffers; + struct ArrowArray** children; + struct ArrowArray* dictionary; + + // Release callback + void (*release)(struct ArrowArray*); + // Opaque producer-specific data + void* private_data; +}; + +// Not able to set the release on the schema +// to NULL in Swift. nil in Swift is not +// equivalent to NULL. +void ArrowSwiftClearReleaseSchema(struct ArrowSchema*); + +// Not able to set the release on the array +// to NULL in Swift. nil in Swift is not +// equivalent to NULL. +void ArrowSwiftClearReleaseArray(struct ArrowArray*); + +#ifdef __cplusplus +} +#endif + +#endif // ARROW_C_DATA_INTERFACE diff --git a/swift/Arrow/Tests/ArrowTests/CDataTests.swift b/swift/Arrow/Tests/ArrowTests/CDataTests.swift new file mode 100644 index 0000000000000..2344b234745a2 --- /dev/null +++ b/swift/Arrow/Tests/ArrowTests/CDataTests.swift @@ -0,0 +1,125 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import Foundation +import XCTest +@testable import Arrow +import ArrowC + +final class CDataTests: XCTestCase { + func makeSchema() -> Arrow.ArrowSchema { + let schemaBuilder = ArrowSchema.Builder() + return schemaBuilder + .addField("colBool", type: ArrowType(ArrowType.ArrowBool), isNullable: false) + .addField("colUInt8", type: ArrowType(ArrowType.ArrowUInt8), isNullable: true) + .addField("colUInt16", type: ArrowType(ArrowType.ArrowUInt16), isNullable: true) + .addField("colUInt32", type: ArrowType(ArrowType.ArrowUInt32), isNullable: true) + .addField("colUInt64", type: ArrowType(ArrowType.ArrowUInt64), isNullable: true) + .addField("colInt8", type: ArrowType(ArrowType.ArrowInt8), isNullable: false) + .addField("colInt16", type: ArrowType(ArrowType.ArrowInt16), isNullable: false) + .addField("colInt32", type: ArrowType(ArrowType.ArrowInt32), isNullable: false) + .addField("colInt64", type: ArrowType(ArrowType.ArrowInt64), isNullable: false) + .addField("colString", type: ArrowType(ArrowType.ArrowString), isNullable: false) + .addField("colBinary", type: ArrowType(ArrowType.ArrowBinary), isNullable: false) + .addField("colDate32", type: ArrowType(ArrowType.ArrowDate32), isNullable: false) + .addField("colDate64", type: ArrowType(ArrowType.ArrowDate64), isNullable: false) + .addField("colTime32", type: ArrowType(ArrowType.ArrowTime32), isNullable: false) + .addField("colTime32s", type: ArrowTypeTime32(.seconds), isNullable: false) + .addField("colTime32m", type: ArrowTypeTime32(.milliseconds), isNullable: false) + .addField("colTime64", type: ArrowType(ArrowType.ArrowTime64), isNullable: false) + .addField("colTime64u", type: ArrowTypeTime64(.microseconds), isNullable: false) + .addField("colTime64n", type: ArrowTypeTime64(.nanoseconds), isNullable: false) + .addField("colTime64", type: ArrowType(ArrowType.ArrowTime64), isNullable: false) + .addField("colFloat", type: ArrowType(ArrowType.ArrowFloat), isNullable: false) + .addField("colDouble", type: ArrowType(ArrowType.ArrowDouble), isNullable: false) + .finish() + } + + func checkImportField(_ cSchema: ArrowC.ArrowSchema, name: String, type: ArrowType.Info) throws { + let importer = ArrowCImporter() + switch importer.importField(cSchema) { + case .success(let arrowField): + XCTAssertEqual(arrowField.type.info, type) + XCTAssertEqual(arrowField.name, name) + case .failure(let error): + throw error + } + } + + func testImportExportSchema() throws { + let schema = makeSchema() + let exporter = ArrowCExporter() + for arrowField in schema.fields { + var cSchema = ArrowC.ArrowSchema() + switch exporter.exportField(&cSchema, field: arrowField) { + case .success: + try checkImportField(cSchema, name: arrowField.name, type: arrowField.type.info) + case .failure(let error): + throw error + } + } + } + + func testImportExportArray() throws { + let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder() + for index in 0..<100 { + if index % 10 == 9 { + stringBuilder.append(nil) + } else { + stringBuilder.append("test" + String(index)) + } + } + + XCTAssertEqual(stringBuilder.nullCount, 10) + XCTAssertEqual(stringBuilder.length, 100) + XCTAssertEqual(stringBuilder.capacity, 648) + let stringArray = try stringBuilder.finish() + let exporter = ArrowCExporter() + var cArray = ArrowC.ArrowArray() + exporter.exportArray(&cArray, arrowData: stringArray.arrowData) + let cArrayMutPtr = UnsafeMutablePointer.allocate(capacity: 1) + cArrayMutPtr.pointee = cArray + defer { + cArrayMutPtr.deallocate() + } + + let importer = ArrowCImporter() + switch importer.importArray(UnsafePointer(cArrayMutPtr), arrowType: ArrowType(ArrowType.ArrowString)) { + case .success(let holder): + let builder = RecordBatch.Builder() + switch builder + .addColumn("test", arrowArray: holder) + .finish() { + case .success(let rb): + XCTAssertEqual(rb.columnCount, 1) + XCTAssertEqual(rb.length, 100) + let col1: Arrow.ArrowArray = rb.data(for: 0) + for index in 0.. RecordBatch { floatBuilder.append(433.334) floatBuilder.append(544.445) - let uint8Holder = ArrowArrayHolder(try uint8Builder.finish()) - let stringHolder = ArrowArrayHolder(try stringBuilder.finish()) - let date32Holder = ArrowArrayHolder(try date32Builder.finish()) - let int32Holder = ArrowArrayHolder(try int32Builder.finish()) - let floatHolder = ArrowArrayHolder(try floatBuilder.finish()) + let uint8Holder = ArrowArrayHolderImpl(try uint8Builder.finish()) + let stringHolder = ArrowArrayHolderImpl(try stringBuilder.finish()) + let date32Holder = ArrowArrayHolderImpl(try date32Builder.finish()) + let int32Holder = ArrowArrayHolderImpl(try int32Builder.finish()) + let floatHolder = ArrowArrayHolderImpl(try floatBuilder.finish()) let result = RecordBatch.Builder() .addColumn("col1", arrowArray: uint8Holder) .addColumn("col2", arrowArray: stringHolder) @@ -279,7 +279,7 @@ final class IPCFileReaderTests: XCTestCase { binaryBuilder.append("test33".data(using: .utf8)) binaryBuilder.append("test44".data(using: .utf8)) - let binaryHolder = ArrowArrayHolder(try binaryBuilder.finish()) + let binaryHolder = ArrowArrayHolderImpl(try binaryBuilder.finish()) let result = RecordBatch.Builder() .addColumn("binary", arrowArray: binaryHolder) .finish() @@ -307,8 +307,8 @@ final class IPCFileReaderTests: XCTestCase { time32Builder.append(2) time32Builder.append(nil) time32Builder.append(3) - let time64Holder = ArrowArrayHolder(try time64Builder.finish()) - let time32Holder = ArrowArrayHolder(try time32Builder.finish()) + let time64Holder = ArrowArrayHolderImpl(try time64Builder.finish()) + let time32Holder = ArrowArrayHolderImpl(try time32Builder.finish()) let result = RecordBatch.Builder() .addColumn("time64", arrowArray: time64Holder) .addColumn("time32", arrowArray: time32Holder) diff --git a/swift/Arrow/Tests/ArrowTests/RecordBatchTests.swift b/swift/Arrow/Tests/ArrowTests/RecordBatchTests.swift index 8820f1cdb1a91..9961781f30833 100644 --- a/swift/Arrow/Tests/ArrowTests/RecordBatchTests.swift +++ b/swift/Arrow/Tests/ArrowTests/RecordBatchTests.swift @@ -29,8 +29,8 @@ final class RecordBatchTests: XCTestCase { stringBuilder.append("test22") stringBuilder.append("test33") - let intHolder = ArrowArrayHolder(try uint8Builder.finish()) - let stringHolder = ArrowArrayHolder(try stringBuilder.finish()) + let intHolder = ArrowArrayHolderImpl(try uint8Builder.finish()) + let stringHolder = ArrowArrayHolderImpl(try stringBuilder.finish()) let result = RecordBatch.Builder() .addColumn("col1", arrowArray: intHolder) .addColumn("col2", arrowArray: stringHolder) diff --git a/swift/Arrow/Tests/ArrowTests/TableTests.swift b/swift/Arrow/Tests/ArrowTests/TableTests.swift index a82a07979345c..8e958ccbf9f9f 100644 --- a/swift/Arrow/Tests/ArrowTests/TableTests.swift +++ b/swift/Arrow/Tests/ArrowTests/TableTests.swift @@ -132,8 +132,8 @@ final class TableTests: XCTestCase { let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder() stringBuilder.append("test10") stringBuilder.append("test22") - let intHolder = ArrowArrayHolder(try uint8Builder.finish()) - let stringHolder = ArrowArrayHolder(try stringBuilder.finish()) + let intHolder = ArrowArrayHolderImpl(try uint8Builder.finish()) + let stringHolder = ArrowArrayHolderImpl(try stringBuilder.finish()) let result = RecordBatch.Builder() .addColumn("col1", arrowArray: intHolder) .addColumn("col2", arrowArray: stringHolder) diff --git a/swift/ArrowFlight/Package.swift b/swift/ArrowFlight/Package.swift index f3caa83486764..629b830a6e0da 100644 --- a/swift/ArrowFlight/Package.swift +++ b/swift/ArrowFlight/Package.swift @@ -29,7 +29,7 @@ let package = Package( // Products define the executables and libraries a package produces, making them visible to other packages. .library( name: "ArrowFlight", - targets: ["ArrowFlight"]), + targets: ["ArrowFlight"]) ], dependencies: [ .package(url: "https://github.com/grpc/grpc-swift.git", from: "1.15.0"), @@ -48,6 +48,6 @@ let package = Package( ]), .testTarget( name: "ArrowFlightTests", - dependencies: ["ArrowFlight"]), + dependencies: ["ArrowFlight"]) ] ) diff --git a/swift/ArrowFlight/Tests/ArrowFlightTests/FlightTest.swift b/swift/ArrowFlight/Tests/ArrowFlightTests/FlightTest.swift index 8097388c7fde1..f7bc3c1ccb0c3 100644 --- a/swift/ArrowFlight/Tests/ArrowFlightTests/FlightTest.swift +++ b/swift/ArrowFlight/Tests/ArrowFlightTests/FlightTest.swift @@ -51,9 +51,9 @@ func makeRecordBatch() throws -> RecordBatch { date32Builder.append(date2) date32Builder.append(date1) date32Builder.append(date2) - let doubleHolder = ArrowArrayHolder(try doubleBuilder.finish()) - let stringHolder = ArrowArrayHolder(try stringBuilder.finish()) - let date32Holder = ArrowArrayHolder(try date32Builder.finish()) + let doubleHolder = ArrowArrayHolderImpl(try doubleBuilder.finish()) + let stringHolder = ArrowArrayHolderImpl(try stringBuilder.finish()) + let date32Holder = ArrowArrayHolderImpl(try date32Builder.finish()) let result = RecordBatch.Builder() .addColumn("col1", arrowArray: doubleHolder) .addColumn("col2", arrowArray: stringHolder) diff --git a/swift/CDataWGo/.gitignore b/swift/CDataWGo/.gitignore new file mode 100644 index 0000000000000..0023a53406379 --- /dev/null +++ b/swift/CDataWGo/.gitignore @@ -0,0 +1,8 @@ +.DS_Store +/.build +/Packages +xcuserdata/ +DerivedData/ +.swiftpm/configuration/registries.json +.swiftpm/xcode/package.xcworkspace/contents.xcworkspacedata +.netrc diff --git a/swift/CDataWGo/Package.swift b/swift/CDataWGo/Package.swift new file mode 100644 index 0000000000000..64d29aec6b845 --- /dev/null +++ b/swift/CDataWGo/Package.swift @@ -0,0 +1,43 @@ +// swift-tools-version: 5.9 +// The swift-tools-version declares the minimum version of Swift required to build this package. + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import PackageDescription + +let package = Package( + name: "go-swift", + platforms: [ + .macOS(.v10_14) + ], + products: [ + .library( + name: "go-swift", + type: .static, + targets: ["go-swift"]) + ], + dependencies: [ + .package(path: "../Arrow") // 👈 Reference to a Local Package + ], + targets: [ + .target( + name: "go-swift", + dependencies: [ + .product(name: "Arrow", package: "Arrow") + ]) + ] +) diff --git a/swift/CDataWGo/Sources/go-swift/CDataTest.swift b/swift/CDataWGo/Sources/go-swift/CDataTest.swift new file mode 100644 index 0000000000000..b38ca7240ab60 --- /dev/null +++ b/swift/CDataWGo/Sources/go-swift/CDataTest.swift @@ -0,0 +1,132 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import Arrow +import ArrowC + +@_cdecl("stringTypeFromSwift") +func stringTypeFromSwift(cSchema: UnsafePointer) { + let unsafePointer = UnsafeMutablePointer(mutating: cSchema) + let exporter = ArrowCExporter() + switch exporter.exportType(&unsafePointer.pointee, arrowType: ArrowType(ArrowType.ArrowString), name: "col1") { + case .success: + return + case .failure(let err): + fatalError("Error exporting string type from swift: \(err)") + } +} + +@_cdecl("stringTypeToSwift") +func stringTypeToSwift(cSchema: UnsafePointer) { + let importer = ArrowCImporter() + switch importer.importField(cSchema.pointee) { + case .success(let field): + if field.name != "col1" { + fatalError("Field name was incorrect expected: col1 but found: \(field.name)") + } + + if field.type.id != ArrowTypeId.string { + fatalError("Field type was incorrect expected: string but found: \(field.type.id)") + } + case .failure(let err): + fatalError("Error importing string type to swift: \(err)") + } +} + +@_cdecl("arrayIntFromSwift") +func arrayIntFromSwift(cArray: UnsafePointer) { + do { + let unsafePointer = UnsafeMutablePointer(mutating: cArray) + let arrayBuilder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + for index in 0..<100 { + arrayBuilder.append(Int32(index)) + } + + let array = try arrayBuilder.finish() + let exporter = ArrowCExporter() + exporter.exportArray(&unsafePointer.pointee, arrowData: array.arrowData) + } catch let err { + fatalError("Error exporting array from swift \(err)") + } +} + +@_cdecl("arrayStringFromSwift") +func arrayStringFromSwift(cArray: UnsafePointer) { + do { + let unsafePointer = UnsafeMutablePointer(mutating: cArray) + let arrayBuilder = try ArrowArrayBuilders.loadStringArrayBuilder() + for index in 0..<100 { + arrayBuilder.append("test" + String(index)) + } + + let array = try arrayBuilder.finish() + let exporter = ArrowCExporter() + exporter.exportArray(&unsafePointer.pointee, arrowData: array.arrowData) + } catch let err { + fatalError("Error exporting array from swift \(err)") + } +} + +@_cdecl("arrayIntToSwift") +func arrayIntToSwift(cArray: UnsafePointer) { + let importer = ArrowCImporter() + switch importer.importArray(cArray, arrowType: ArrowType(ArrowType.ArrowInt32)) { + case .success(let int32Holder): + let result = RecordBatch.Builder() + .addColumn("col1", arrowArray: int32Holder) + .finish() + switch result { + case .success(let recordBatch): + let col1: Arrow.ArrowArray = recordBatch.data(for: 0) + for index in 0..) { + let importer = ArrowCImporter() + switch importer.importArray(cArray, arrowType: ArrowType(ArrowType.ArrowString)) { + case .success(let dataHolder): + let result = RecordBatch.Builder() + .addColumn("col1", arrowArray: dataHolder) + .finish() + switch result { + case .success(let recordBatch): + let col1: Arrow.ArrowArray = recordBatch.data(for: 0) + for index in 0.. +#include "go_swift.h" +*/ +import "C" +import ( + "strconv" + "unsafe" + + "github.com/apache/arrow/go/v16/arrow" + "github.com/apache/arrow/go/v16/arrow/array" + "github.com/apache/arrow/go/v16/arrow/cdata" + "github.com/apache/arrow/go/v16/arrow/memory" +) + +func stringTypeFromSwift() { + arrowSchema := &cdata.CArrowSchema{} + swSchema := (*C.struct_ArrowSchema)(unsafe.Pointer(arrowSchema)) + C.stringTypeFromSwift(swSchema) + gofield, _ := cdata.ImportCArrowField(arrowSchema) + if gofield.Name != "col1" { + panic("Imported type has incorrect name") + } +} + +func stringTypeToSwift() { + arrowSchema := &cdata.CArrowSchema{} + swSchema := (*C.struct_ArrowSchema)(unsafe.Pointer(arrowSchema)) + C.stringTypeFromSwift(swSchema) + gofield, _ := cdata.ImportCArrowField(arrowSchema) + if gofield.Name != "col1" { + panic("Imported type has incorrect name") + } +} + +func arrayStringFromSwift() { + arrowArray := &cdata.CArrowArray{} + swarray := (*C.struct_ArrowArray)(unsafe.Pointer(arrowArray)) + C.arrayStringFromSwift(swarray) + arr, _ := cdata.ImportCArrayWithType(arrowArray, arrow.BinaryTypes.String) + if arr.Len() != 100 { + panic("Array length is incorrect") + } + + for i := 0; i < 100; i++ { + if arr.ValueStr(i) != ("test" + strconv.Itoa(i)) { + panic("Array value is incorrect") + } + } +} + +func arrayIntFromSwift() { + arrowArray := &cdata.CArrowArray{} + swarray := (*C.struct_ArrowArray)(unsafe.Pointer(arrowArray)) + C.arrayIntFromSwift(swarray) + arr, _ := cdata.ImportCArrayWithType(arrowArray, arrow.PrimitiveTypes.Int32) + if arr.Len() != 100 { + panic("Array length is incorrect") + } + + vals := arr.(*array.Int32).Int32Values() + // and that the values are correct + for i, v := range vals { + if v != int32(i) { + panic("Array value is incorrect") + } + } +} + +func arrayIntToSwift() { + bld := array.NewUint32Builder(memory.DefaultAllocator) + defer bld.Release() + bld.AppendValues([]uint32{1, 2, 3, 4}, []bool{true, true, true, true}) + goarray := bld.NewUint32Array() + var carray cdata.CArrowArray + cdata.ExportArrowArray(goarray, &carray, nil) + swarray := (*C.struct_ArrowArray)(unsafe.Pointer(&carray)) + C.arrayIntToSwift(swarray) + + if swarray.release != nil { + panic("Release was not called by swift to deallocate C array") + } +} + +func arrayStringToSwift() { + bld := array.NewStringBuilder(memory.DefaultAllocator) + defer bld.Release() + bld.AppendValues([]string{"test0", "test1", "test2", "test3"}, []bool{true, true, true, true}) + goarray := bld.NewStringArray() + var carray cdata.CArrowArray + cdata.ExportArrowArray(goarray, &carray, nil) + swarray := (*C.struct_ArrowArray)(unsafe.Pointer(&carray)) + C.arrayStringToSwift(swarray) + + if swarray.release != nil { + panic("Release was not called by swift to deallocate C array") + } +} + +func main() { + stringTypeFromSwift() + stringTypeToSwift() + arrayStringFromSwift() + arrayIntFromSwift() + arrayIntToSwift() + arrayStringToSwift() +} From 9f5899019d23b2b1eae2fedb9f6be8827885d843 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Wed, 29 May 2024 14:14:17 +0900 Subject: [PATCH 193/261] GH-41679: [Release][Packaging][deb] Update package name in 01-preparesh too (#41859) ### Rationale for this change It's needed when we publish minor release. For example: ```console $ dev/release/01-prepare.sh 16.0.0 17.0.0 # Release 16.0.0 ... $ dev/release/post-11-bump-versions.sh 16.0.0 17.0.0 # Released 16.0.0 ... $ dev/release/01-prepare.sh 16.1.0 17.0.0 # Release 16.1.0: This is effected ... $ dev/release/post-11-bump-versions.sh 16.1.0 17.0.0 # Released 16.1.0 ``` We can't detect minor release in `post-11-bump-versions.sh`. ### What changes are included in this PR? Share update codes via `utils-prepare.sh` and use the same logic in `01-prepare.sh` too. Linux packages related update code are also shared but it's not related to this change. Sorry. ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. * GitHub Issue: #41679 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- dev/release/01-prepare-test.rb | 45 ++++++++++++++-- dev/release/01-prepare.sh | 15 +++--- dev/release/post-11-bump-versions-test.rb | 8 +-- dev/release/post-11-bump-versions.sh | 50 ++---------------- dev/release/test-helper.rb | 29 +++++++++-- dev/release/utils-prepare.sh | 63 ++++++++++++++++++++++- 6 files changed, 142 insertions(+), 68 deletions(-) diff --git a/dev/release/01-prepare-test.rb b/dev/release/01-prepare-test.rb index 11e75612818ac..bf6cfede15c81 100644 --- a/dev/release/01-prepare-test.rb +++ b/dev/release/01-prepare-test.rb @@ -51,6 +51,45 @@ def prepare(*targets) sh(env, "dev/release/01-prepare.sh", @release_version, @next_version, "0") end + data(:release_type, [nil, :major, :minor, :patch]) + def test_deb_package_names + omit_on_release_branch + current_commit = git_current_commit + stdout = prepare("DEB_PACKAGE_NAMES") + changes = parse_patch(git("log", "-p", "#{current_commit}..")) + sampled_changes = changes.collect do |change| + first_hunk = change[:hunks][0] + first_removed_line = first_hunk.find { |line| line.start_with?("-") } + first_added_line = first_hunk.find { |line| line.start_with?("+") } + { + sampled_diff: [first_removed_line, first_added_line], + path: change[:path], + } + end + case release_type + when :major, :minor + expected_changes = [ + { + sampled_diff: [ + "-Package: libarrow#{@snapshot_so_version}", + "+Package: libarrow#{@so_version}", + ], + path: "dev/tasks/linux-packages/apache-arrow/debian/control.in", + }, + { + sampled_diff: [ + "- - libarrow-acero#{@snapshot_so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb", + "+ - libarrow-acero#{@so_version}-dbgsym_{no_rc_version}-1_[a-z0-9]+.d?deb", + ], + path: "dev/tasks/tasks.yml", + }, + ] + else + expected_changes = [] + end + assert_equal(expected_changes, sampled_changes, "Output:\n#{stdout}") + end + def test_linux_packages user = "Arrow Developers" email = "dev@arrow.apache.org" @@ -96,7 +135,7 @@ def test_linux_packages assert_equal(expected_changes, sampled_changes, "Output:\n#{stdout}") end - data(:release_type, [:major, :minor, :patch]) + data(:next_release_type, [:major, :minor, :patch]) def test_version_pre_tag omit_on_release_branch @@ -158,7 +197,7 @@ def test_version_pre_tag ], }, ] - unless release_type == :patch + unless next_release_type == :patch expected_changes += [ { path: "docs/source/_static/versions.json", @@ -236,7 +275,7 @@ def test_version_pre_tag ], }, ] - if release_type == :major + if next_release_type == :major expected_changes += [ { path: "r/pkgdown/assets/versions.json", diff --git a/dev/release/01-prepare.sh b/dev/release/01-prepare.sh index 01fa2f3d80345..e4c62e6323c23 100755 --- a/dev/release/01-prepare.sh +++ b/dev/release/01-prepare.sh @@ -39,6 +39,7 @@ release_candidate_branch="release-${version}-rc${rc_number}" : ${PREPARE_DEFAULT:=1} : ${PREPARE_CHANGELOG:=${PREPARE_DEFAULT}} +: ${PREPARE_DEB_PACKAGE_NAMES:=${PREPARE_DEFAULT}} : ${PREPARE_LINUX_PACKAGES:=${PREPARE_DEFAULT}} : ${PREPARE_VERSION_PRE_TAG:=${PREPARE_DEFAULT}} : ${PREPARE_BRANCH:=${PREPARE_DEFAULT}} @@ -78,16 +79,12 @@ if [ ${PREPARE_CHANGELOG} -gt 0 ]; then git commit -m "MINOR: [Release] Update CHANGELOG.md for $version" fi +if [ ${PREPARE_DEB_PACKAGE_NAMES} -gt 0 ]; then + update_deb_package_names "$(current_version)" "${version}" +fi + if [ ${PREPARE_LINUX_PACKAGES} -gt 0 ]; then - echo "Updating .deb/.rpm changelogs for $version" - cd $SOURCE_DIR/../tasks/linux-packages - rake \ - version:update \ - ARROW_RELEASE_TIME="$(date +%Y-%m-%dT%H:%M:%S%z)" \ - ARROW_VERSION=${version} - git add */debian*/changelog */yum/*.spec.in - git commit -m "MINOR: [Release] Update .deb/.rpm changelogs for $version" - cd - + update_linux_packages "${version}" "$(date +%Y-%m-%dT%H:%M:%S%z)" fi if [ ${PREPARE_VERSION_PRE_TAG} -gt 0 ]; then diff --git a/dev/release/post-11-bump-versions-test.rb b/dev/release/post-11-bump-versions-test.rb index 5706b1303667a..966c723f70adf 100644 --- a/dev/release/post-11-bump-versions-test.rb +++ b/dev/release/post-11-bump-versions-test.rb @@ -74,7 +74,7 @@ def bump_versions(*targets) end end - data(:release_type, [:major, :minor, :patch]) + data(:next_release_type, [:major, :minor, :patch]) def test_version_post_tag omit_on_release_branch @@ -136,7 +136,7 @@ def test_version_post_tag ], }, ] - unless release_type == :patch + unless next_release_type == :patch expected_changes += [ { path: "docs/source/_static/versions.json", @@ -202,7 +202,7 @@ def test_version_post_tag ], }, ] - if release_type == :major + if next_release_type == :major expected_changes += [ { path: "c_glib/tool/generate-version-header.py", @@ -276,7 +276,7 @@ def test_version_post_tag import_path = "github.com/apache/arrow/go/v#{@snapshot_major_version}" hunks = [] - if release_type == :major + if next_release_type == :major lines = File.readlines(path, chomp: true) target_lines = lines.each_with_index.select do |line, i| line.include?(import_path) diff --git a/dev/release/post-11-bump-versions.sh b/dev/release/post-11-bump-versions.sh index 93eb15e0921c8..422821a66bde5 100755 --- a/dev/release/post-11-bump-versions.sh +++ b/dev/release/post-11-bump-versions.sh @@ -41,10 +41,6 @@ version=$1 next_version=$2 next_version_snapshot="${next_version}-SNAPSHOT" -current_version=$(grep ARROW_VERSION "${SOURCE_DIR}/../../cpp/CMakeLists.txt" | \ - head -n1 | \ - grep -E -o '([0-9]+\.[0-9]+\.[0-9]+)') - case "${version}" in *.0.0) is_major_release=1 @@ -68,52 +64,12 @@ if [ ${BUMP_VERSION_POST_TAG} -gt 0 ]; then fi if [ ${BUMP_DEB_PACKAGE_NAMES} -gt 0 ] && \ - [ "${next_version}" != "${current_version}" ]; then - echo "Updating .deb package names for ${next_version}" - so_version() { - local version=$1 - local major_version=$(echo $version | sed -E -e 's/^([0-9]+)\.[0-9]+\.[0-9]+$/\1/') - local minor_version=$(echo $version | sed -E -e 's/^[0-9]+\.([0-9]+)\.[0-9]+$/\1/') - expr ${major_version} \* 100 + ${minor_version} - } - deb_lib_suffix=$(so_version $version) - next_deb_lib_suffix=$(so_version $next_version) - if [ "${deb_lib_suffix}" != "${next_deb_lib_suffix}" ]; then - cd $SOURCE_DIR/../tasks/linux-packages/apache-arrow - for target in debian*/lib*${deb_lib_suffix}.install; do - git mv \ - ${target} \ - $(echo $target | sed -e "s/${deb_lib_suffix}/${next_deb_lib_suffix}/") - done - deb_lib_suffix_substitute_pattern="s/(lib(arrow|gandiva|parquet)[-a-z]*)${deb_lib_suffix}/\\1${next_deb_lib_suffix}/g" - sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" debian*/control* - rm -f debian*/control*.bak - git add debian*/control* - cd - - cd $SOURCE_DIR/../tasks/ - sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" tasks.yml - rm -f tasks.yml.bak - git add tasks.yml - cd - - cd $SOURCE_DIR - sed -i.bak -E -e "${deb_lib_suffix_substitute_pattern}" rat_exclude_files.txt - rm -f rat_exclude_files.txt.bak - git add rat_exclude_files.txt - git commit -m "MINOR: [Release] Update .deb package names for $next_version" - cd - - fi + [ "${next_version}" != "$(current_version)" ]; then + update_deb_package_names "${version}" "${next_version}" fi if [ ${BUMP_LINUX_PACKAGES} -gt 0 ]; then - echo "Updating .deb/.rpm changelogs for $version" - cd $SOURCE_DIR/../tasks/linux-packages - rake \ - version:update \ - ARROW_RELEASE_TIME="$(git log -n1 --format=%aI apache-arrow-${version})" \ - ARROW_VERSION=${version} - git add */debian*/changelog */yum/*.spec.in - git commit -m "MINOR: [Release] Update .deb/.rpm changelogs for $version" - cd - + update_linux_packages "${version}" "$(git log -n1 --format=%aI apache-arrow-${version})" fi if [ ${BUMP_PUSH} -gt 0 ]; then diff --git a/dev/release/test-helper.rb b/dev/release/test-helper.rb index 3b2c3aa6e5874..82400bae2793b 100644 --- a/dev/release/test-helper.rb +++ b/dev/release/test-helper.rb @@ -96,7 +96,11 @@ def parse_patch(patch) module VersionDetectable def release_type - (data || {})[:release_type] || :major + (data || {})[:release_type] + end + + def next_release_type + (data || {})[:next_release_type] || :major end def detect_versions @@ -104,19 +108,36 @@ def detect_versions cpp_cmake_lists = top_dir + "cpp" + "CMakeLists.txt" @snapshot_version = cpp_cmake_lists.read[/ARROW_VERSION "(.+?)"/, 1] @snapshot_major_version = @snapshot_version.split(".")[0] - @release_version = @snapshot_version.gsub(/-SNAPSHOT\z/, "") + @snapshot_so_version = compute_so_version(@snapshot_version.split("-")[0]) + release_version = @snapshot_version.gsub(/-SNAPSHOT\z/, "") + release_version_components = release_version.split(".") + case release_type + when nil + when :major + release_version_components[0].succ! + when :minor + release_version_components[1].succ! + when :patch + release_version_components[2].succ! + else + raise "unknown release type: #{release_type.inspect}" + end + @release_version = release_version_components.join(".") @release_compatible_version = @release_version.split(".")[0, 2].join(".") @so_version = compute_so_version(@release_version) next_version_components = @release_version.split(".") - case release_type + case next_release_type when :major next_version_components[0].succ! + next_version_components[1] = 0 + next_version_components[2] = 0 when :minor next_version_components[1].succ! + next_version_components[2] = 0 when :patch next_version_components[2].succ! else - raise "unknown release type: #{release_type.inspect}" + raise "unknown next release type: #{next_release_type.inspect}" end @next_version = next_version_components.join(".") @next_major_version = @next_version.split(".")[0] diff --git a/dev/release/utils-prepare.sh b/dev/release/utils-prepare.sh index 015f7109cd251..dfe9b052b09fa 100644 --- a/dev/release/utils-prepare.sh +++ b/dev/release/utils-prepare.sh @@ -143,7 +143,7 @@ update_versions() { DESCRIPTION rm -f DESCRIPTION.bak git add DESCRIPTION - + # Replace dev version with release version sed -i.bak -E -e \ "/^ completion counter From da0eb7e9fc90190b616aa85635d561d03e1ffe67 Mon Sep 17 00:00:00 2001 From: abandy Date: Wed, 29 May 2024 16:55:13 -0400 Subject: [PATCH 197/261] MINOR: [Swift] cleanup some go and C++ artifacts (#41878) ### Rationale for this change Follow up changes requested in the #41342 ### What changes are included in this PR? Update includes header file and go dependencies changes related to the C Data interface changes. Authored-by: Alva Bandy Signed-off-by: Sutou Kouhei --- swift/Arrow/Sources/ArrowC/ArrowCData.c | 1 + .../Arrow/Sources/ArrowC/include/ArrowCData.h | 5 +- swift/CDataWGo/go.mod | 28 ++--- swift/CDataWGo/go.sum | 106 ++++++------------ 4 files changed, 49 insertions(+), 91 deletions(-) diff --git a/swift/Arrow/Sources/ArrowC/ArrowCData.c b/swift/Arrow/Sources/ArrowC/ArrowCData.c index ac366febdaed8..fe0f80899719b 100644 --- a/swift/Arrow/Sources/ArrowC/ArrowCData.c +++ b/swift/Arrow/Sources/ArrowC/ArrowCData.c @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include #include "include/ArrowCData.h" void ArrowSwiftClearReleaseSchema(struct ArrowSchema* arrowSchema) { diff --git a/swift/Arrow/Sources/ArrowC/include/ArrowCData.h b/swift/Arrow/Sources/ArrowC/include/ArrowCData.h index 9df8992114be3..4b2f35efcb961 100644 --- a/swift/Arrow/Sources/ArrowC/include/ArrowCData.h +++ b/swift/Arrow/Sources/ArrowC/include/ArrowCData.h @@ -22,10 +22,7 @@ #define ARROW_FLAG_NULLABLE 2 #define ARROW_FLAG_MAP_KEYS_SORTED 4 -#include -#include -#include // Must have this! - +#include // For int64_t #ifdef __cplusplus extern "C" { diff --git a/swift/CDataWGo/go.mod b/swift/CDataWGo/go.mod index 631dd58e74bf7..323b5daac2a1e 100644 --- a/swift/CDataWGo/go.mod +++ b/swift/CDataWGo/go.mod @@ -18,24 +18,18 @@ module go-swift go 1.21 +require github.com/apache/arrow/go/v16 v16.1.0 + require ( - github.com/andybalholm/brotli v1.0.5 // indirect - github.com/apache/arrow/go/v12 v12.0.1 // indirect - github.com/apache/arrow/go/v16 v16.0.0-20240203105949-22f2cfd1e1eb // indirect - github.com/apache/thrift v0.17.0 // indirect github.com/goccy/go-json v0.10.2 // indirect - github.com/golang/snappy v0.0.4 // indirect - github.com/google/flatbuffers v23.5.26+incompatible // indirect - github.com/klauspost/asmfmt v1.3.2 // indirect - github.com/klauspost/compress v1.16.7 // indirect - github.com/klauspost/cpuid/v2 v2.2.5 // indirect - github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect - github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect - github.com/pierrec/lz4/v4 v4.1.18 // indirect + github.com/google/flatbuffers v24.3.25+incompatible // indirect + github.com/klauspost/compress v1.17.7 // indirect + github.com/klauspost/cpuid/v2 v2.2.7 // indirect + github.com/pierrec/lz4/v4 v4.1.21 // indirect github.com/zeebo/xxh3 v1.0.2 // indirect - golang.org/x/exp v0.0.0-20231006140011-7918f672742d // indirect - golang.org/x/mod v0.13.0 // indirect - golang.org/x/sys v0.13.0 // indirect - golang.org/x/tools v0.14.0 // indirect - golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect + golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 // indirect + golang.org/x/mod v0.16.0 // indirect + golang.org/x/sys v0.18.0 // indirect + golang.org/x/tools v0.19.0 // indirect + golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect ) diff --git a/swift/CDataWGo/go.sum b/swift/CDataWGo/go.sum index 983c3cecaa521..e0c7e6f2747c0 100644 --- a/swift/CDataWGo/go.sum +++ b/swift/CDataWGo/go.sum @@ -1,75 +1,41 @@ -github.com/andybalholm/brotli v1.0.4 h1:V7DdXeJtZscaqfNuAdSRuRFzuiKlHSC/Zh3zl9qY3JY= -github.com/andybalholm/brotli v1.0.4/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= -github.com/andybalholm/brotli v1.0.5 h1:8uQZIdzKmjc/iuPu7O2ioW48L81FgatrcpfFmiq/cCs= -github.com/andybalholm/brotli v1.0.5/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= -github.com/apache/arrow/go/v12 v12.0.1 h1:JsR2+hzYYjgSUkBSaahpqCetqZMr76djX80fF/DiJbg= -github.com/apache/arrow/go/v12 v12.0.1/go.mod h1:weuTY7JvTG/HDPtMQxEUp7pU73vkLWMLpY67QwZ/WWw= -github.com/apache/arrow/go/v16 v16.0.0-20240202201540-0fb00fdea7a9 h1:/hG2vtror6DZi7vLBjasliWY+/3GQPnt5FrrLt+boN0= -github.com/apache/arrow/go/v16 v16.0.0-20240202201540-0fb00fdea7a9/go.mod h1:+HkSDKotr3KDBxj7gTVgj8Egy18Y1ECzQdnY5XsXwlQ= -github.com/apache/arrow/go/v16 v16.0.0-20240203105949-22f2cfd1e1eb h1:ox9Zl3OSD9yFHffSUw+mEYVnlC13je3+CGxaR5wIZmA= -github.com/apache/arrow/go/v16 v16.0.0-20240203105949-22f2cfd1e1eb/go.mod h1:+HkSDKotr3KDBxj7gTVgj8Egy18Y1ECzQdnY5XsXwlQ= -github.com/apache/thrift v0.16.0 h1:qEy6UW60iVOlUy+b9ZR0d5WzUWYGOo4HfopoyBaNmoY= -github.com/apache/thrift v0.16.0/go.mod h1:PHK3hniurgQaNMZYaCLEqXKsYK8upmhPbmdP2FXSqgU= -github.com/apache/thrift v0.17.0 h1:cMd2aj52n+8VoAtvSvLn4kDC3aZ6IAkBuqWQ2IDu7wo= -github.com/apache/thrift v0.17.0/go.mod h1:OLxhMRJxomX+1I/KUw03qoV3mMz16BwaKI+d4fPBx7Q= -github.com/goccy/go-json v0.9.11 h1:/pAaQDLHEoCq/5FFmSKBswWmK6H0e8g4159Kc/X/nqk= -github.com/goccy/go-json v0.9.11/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= +github.com/apache/arrow/go/v16 v16.1.0 h1:dwgfOya6s03CzH9JrjCBx6bkVb4yPD4ma3haj9p7FXI= +github.com/apache/arrow/go/v16 v16.1.0/go.mod h1:9wnc9mn6vEDTRIm4+27pEjQpRKuTvBaessPoEXQzxWA= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= -github.com/golang/mock v1.5.0/go.mod h1:CWnOUgYIOo4TcNZ0wHX3YZCqsaM1I1Jvs6v3mP3KVu8= -github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= -github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= -github.com/google/flatbuffers v2.0.8+incompatible h1:ivUb1cGomAB101ZM1T0nOiWz9pSrTMoa9+EiY7igmkM= -github.com/google/flatbuffers v2.0.8+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= -github.com/google/flatbuffers v23.5.26+incompatible h1:M9dgRyhJemaM4Sw8+66GHBu8ioaQmyPLg1b8VwK5WJg= -github.com/google/flatbuffers v23.5.26+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= -github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4= -github.com/klauspost/asmfmt v1.3.2/go.mod h1:AG8TuvYojzulgDAMCnYn50l/5QV3Bs/tp6j0HLHbNSE= -github.com/klauspost/compress v1.15.9 h1:wKRjX6JRtDdrE9qwa4b/Cip7ACOshUI4smpCQanqjSY= -github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU= -github.com/klauspost/compress v1.16.7 h1:2mk3MPGNzKyxErAw8YaohYh69+pa4sIQSC0fPGCFR9I= -github.com/klauspost/compress v1.16.7/go.mod h1:ntbaceVETuRiXiv4DpjP66DpAtAGkEQskQzEyD//IeE= -github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4= -github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= -github.com/klauspost/cpuid/v2 v2.2.5 h1:0E5MSMDEoAulmXNFquVs//DdoomxaoTY1kUhbc/qbZg= -github.com/klauspost/cpuid/v2 v2.2.5/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= -github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpspGNG7Z948v4n35fFGB3RR3G/ry4FWs= -github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY= -github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI= -github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE= -github.com/pierrec/lz4/v4 v4.1.15 h1:MO0/ucJhngq7299dKLwIMtgTfbkoSPF6AoMYDd8Q4q0= -github.com/pierrec/lz4/v4 v4.1.15/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= -github.com/pierrec/lz4/v4 v4.1.18 h1:xaKrnTkyoqfh1YItXl56+6KJNVYWlEEPuAQW9xsplYQ= -github.com/pierrec/lz4/v4 v4.1.18/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/google/flatbuffers v24.3.25+incompatible h1:CX395cjN9Kke9mmalRoL3d81AtFUxJM+yDthflgJGkI= +github.com/google/flatbuffers v24.3.25+incompatible/go.mod h1:1AeVuKshWv4vARoZatz6mlQ0JxURH0Kv5+zNeJKJCa8= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/klauspost/compress v1.17.7 h1:ehO88t2UGzQK66LMdE8tibEd1ErmzZjNEqWkjLAKQQg= +github.com/klauspost/compress v1.17.7/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= +github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM= +github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= +github.com/pierrec/lz4/v4 v4.1.21 h1:yOVMLb6qSIDP67pl/5F7RepeKYu/VmTyEXvuMI5d9mQ= +github.com/pierrec/lz4/v4 v4.1.21/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= +github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI= -golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo= -golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.8.0 h1:LUYupSeNrTNCGzR/hVBk2NHZO4hXcVaW1k4Qx7rjPx8= -golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= -golang.org/x/mod v0.13.0 h1:I/DsJXRlw/8l/0c24sM9yb0T4z9liZTduXvdAWYiysY= -golang.org/x/mod v0.13.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= -golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU= +golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 h1:LfspQV/FYTatPTr/3HzIcmiUFH7PGP+OQ6mgDYo3yuQ= +golang.org/x/exp v0.0.0-20240222234643-814bf88cf225/go.mod h1:CxmFvTBINI24O/j8iY7H1xHzx2i4OsyguNBmN/uPtqc= +golang.org/x/mod v0.16.0 h1:QX4fJ0Rr5cPQCF7O9lh9Se4pmwfwskqZfq5moyldzic= +golang.org/x/mod v0.16.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/sync v0.6.0 h1:5BMeUDZ7vkXGfEr1x9B4bRcTH4lpkTkpdh0T/J+qjbQ= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE= -golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.6.0 h1:BOw41kyTf3PuCW1pVQf8+Cyg8pMlkYB1oo9iJ6D/lKM= -golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -golang.org/x/tools v0.14.0 h1:jvNa2pY0M4r62jkRQ6RwEZZyPcymeL9XZMLBbV7U2nc= -golang.org/x/tools v0.14.0/go.mod h1:uYBEerGOWcJyEORxN+Ek8+TT266gXkNlHdJBwexUsBg= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f h1:uF6paiQQebLeSXkrTqHqz0MXhXXS1KgF41eUdBNvxK0= -golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= -golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 h1:H2TDz8ibqkAF6YGhCdN3jS9O0/s90v0rJh3X/OLHEUk= -golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2/go.mod h1:K8+ghG5WaK9qNqU5K3HdILfMLy1f3aNYFI/wnl100a8= +golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4= +golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/tools v0.19.0 h1:tfGCXNR1OsFG+sVdLAitlpjAvD/I6dHDKnYrpEZUHkw= +golang.org/x/tools v0.19.0/go.mod h1:qoJWxmGSIBmAeriMx19ogtrEPrGtDbPK634QFIcLAhc= +golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU= +golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= +gonum.org/v1/gonum v0.15.0 h1:2lYxjRbTYyxkJxlhC+LvJIx3SsANPdRybu1tGj9/OrQ= +gonum.org/v1/gonum v0.15.0/go.mod h1:xzZVBJBtS+Mz4q0Yl2LJTk+OxOg4jiXZ7qBoM0uISGo= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= From 6800be9331d88024bf550c77865a06c592a22699 Mon Sep 17 00:00:00 2001 From: Neal Richardson Date: Wed, 29 May 2024 21:30:49 -0400 Subject: [PATCH 198/261] MINOR: [R] Remove writing_bindings from _pkgdown.yml (#41877) ### Rationale for this change Missed this in #41576 ### Are these changes tested? We should make sure. ### Are there any user-facing changes? No. --- r/_pkgdown.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/r/_pkgdown.yml b/r/_pkgdown.yml index 1ce35d2a546ca..ceb68d773bdb4 100644 --- a/r/_pkgdown.yml +++ b/r/_pkgdown.yml @@ -137,7 +137,6 @@ articles: - developers/workflow - developers/debugging - developers/docker - - developers/writing_bindings - developers/install_details - developers/data_object_layout From 6c15eb8e8453b57b44c2a6975b81314978bebd8b Mon Sep 17 00:00:00 2001 From: Laurent Goujon Date: Thu, 30 May 2024 16:59:38 -0700 Subject: [PATCH 199/261] MINOR: [Java] Update develocity access key environment variable (#41880) ### Rationale for this change It was not mentioned in the migration document but `GRADLE_ENTERPRISE_ACCESS_KEY` environment variable is being deprecated and replaced with `DEVELOCITY_ACCESS_KEY` ### What changes are included in this PR? Changing github java workflows referencing the legacy variable to use the new environment variable ### Are these changes tested? No ### Are there any user-facing changes? No Authored-by: Laurent Goujon Signed-off-by: Jacob Wujciak-Jens --- .github/workflows/java.yml | 12 ++++++------ .github/workflows/java_jni.yml | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/java.yml b/.github/workflows/java.yml index e92d3f4fc5877..e31f7a4fc4d27 100644 --- a/.github/workflows/java.yml +++ b/.github/workflows/java.yml @@ -86,11 +86,11 @@ jobs: env: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} - GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} run: | archery docker run \ -e CI=true \ - -e "GRADLE_ENTERPRISE_ACCESS_KEY=$GRADLE_ENTERPRISE_ACCESS_KEY" \ + -e "DEVELOCITY_ACCESS_KEY=$DEVELOCITY_ACCESS_KEY" \ ${{ matrix.image }} - name: Docker Push if: >- @@ -127,12 +127,12 @@ jobs: - name: Build shell: bash env: - GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} run: ci/scripts/java_build.sh $(pwd) $(pwd)/build - name: Test shell: bash env: - GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} run: ci/scripts/java_test.sh $(pwd) $(pwd)/build windows: @@ -158,10 +158,10 @@ jobs: - name: Build shell: bash env: - GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} run: ci/scripts/java_build.sh $(pwd) $(pwd)/build - name: Test shell: bash env: - GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} run: ci/scripts/java_test.sh $(pwd) $(pwd)/build diff --git a/.github/workflows/java_jni.yml b/.github/workflows/java_jni.yml index 958216ac7669d..059a7430a38ce 100644 --- a/.github/workflows/java_jni.yml +++ b/.github/workflows/java_jni.yml @@ -120,11 +120,11 @@ jobs: env: ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }} ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }} - GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} + DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} run: | archery docker run \ -e CI=true \ - -e "GRADLE_ENTERPRISE_ACCESS_KEY=$GRADLE_ENTERPRISE_ACCESS_KEY" \ + -e "DEVELOCITY_ACCESS_KEY=$DEVELOCITY_ACCESS_KEY" \ conda-python-java-integration - name: Docker Push if: >- From 706b3e09e1c7db3c7383ecb6426b86462d7abd16 Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Fri, 31 May 2024 07:34:46 +0530 Subject: [PATCH 200/261] GH-40932: [Java] Implement TransferPair functionality for StringView (#41861) ### Rationale for this change StringView implementation requires transferPair functionality which is required for C Data interface implementation as well. ### What changes are included in this PR? Adding transferPair functionality and corresponding test cases. ### Are these changes tested? Yes ### Are there any user-facing changes? No * GitHub Issue: #40932 Authored-by: Vibhatha Abeykoon Signed-off-by: David Li --- .../vector/BaseVariableWidthViewVector.java | 166 +++++- .../arrow/vector/ViewVarCharVector.java | 52 +- .../arrow/vector/TestSplitAndTransfer.java | 250 ++++++++- .../arrow/vector/TestVarCharViewVector.java | 498 ++++++++++++++++++ 4 files changed, 940 insertions(+), 26 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java index aaa8098b690fd..dffb4a39a9cd6 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java @@ -814,7 +814,20 @@ public TransferPair getTransferPair(BufferAllocator allocator) { * @param target destination vector for transfer */ public void transferTo(BaseVariableWidthViewVector target) { - throw new UnsupportedOperationException("trasferTo function not supported!"); + compareTypes(target, "transferTo"); + target.clear(); + target.validityBuffer = transferBuffer(validityBuffer, target.allocator); + target.viewBuffer = transferBuffer(viewBuffer, target.allocator); + target.dataBuffers = new ArrayList<>(dataBuffers.size()); + for (int i = 0; i < dataBuffers.size(); i++) { + target.dataBuffers.add(transferBuffer(dataBuffers.get(i), target.allocator)); + } + + target.setLastSet(this.lastSet); + if (this.valueCount > 0) { + target.setValueCount(this.valueCount); + } + clear(); } /** @@ -826,7 +839,154 @@ public void transferTo(BaseVariableWidthViewVector target) { */ public void splitAndTransferTo(int startIndex, int length, BaseVariableWidthViewVector target) { - throw new UnsupportedOperationException("splitAndTransferTo function not supported!"); + Preconditions.checkArgument(startIndex >= 0 && length >= 0 && startIndex + length <= valueCount, + "Invalid parameters startIndex: %s, length: %s for valueCount: %s", startIndex, length, valueCount); + compareTypes(target, "splitAndTransferTo"); + target.clear(); + if (length > 0) { + splitAndTransferValidityBuffer(startIndex, length, target); + splitAndTransferViewBufferAndDataBuffer(startIndex, length, target); + target.setLastSet(length - 1); + target.setValueCount(length); + } + } + + /* allocate validity buffer */ + private void allocateValidityBuffer(final long size) { + final int curSize = (int) size; + validityBuffer = allocator.buffer(curSize); + validityBuffer.readerIndex(0); + initValidityBuffer(); + } + + /* + * Transfer the validity. + */ + private void splitAndTransferValidityBuffer(int startIndex, int length, + BaseVariableWidthViewVector target) { + if (length <= 0) { + return; + } + + final int firstByteSource = BitVectorHelper.byteIndex(startIndex); + final int lastByteSource = BitVectorHelper.byteIndex(valueCount - 1); + final int byteSizeTarget = getValidityBufferSizeFromCount(length); + final int offset = startIndex % 8; + + if (offset == 0) { + // slice + if (target.validityBuffer != null) { + target.validityBuffer.getReferenceManager().release(); + } + final ArrowBuf slicedValidityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); + target.validityBuffer = transferBuffer(slicedValidityBuffer, target.allocator); + return; + } + + /* Copy data + * When the first bit starts from the middle of a byte (offset != 0), + * copy data from src BitVector. + * Each byte in the target is composed by a part in i-th byte, + * another part in (i+1)-th byte. + */ + target.allocateValidityBuffer(byteSizeTarget); + + for (int i = 0; i < byteSizeTarget - 1; i++) { + byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer, firstByteSource + i, offset); + byte b2 = BitVectorHelper.getBitsFromNextByte(this.validityBuffer, firstByteSource + i + 1, offset); + + target.validityBuffer.setByte(i, (b1 + b2)); + } + /* Copying the last piece is done in the following manner: + * if the source vector has 1 or more bytes remaining, we copy + * the last piece as a byte formed by shifting data + * from the current byte and the next byte. + * + * if the source vector has no more bytes remaining + * (we are at the last byte), we copy the last piece as a byte + * by shifting data from the current byte. + */ + if ((firstByteSource + byteSizeTarget - 1) < lastByteSource) { + byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer, + firstByteSource + byteSizeTarget - 1, offset); + byte b2 = BitVectorHelper.getBitsFromNextByte(this.validityBuffer, + firstByteSource + byteSizeTarget, offset); + + target.validityBuffer.setByte(byteSizeTarget - 1, b1 + b2); + } else { + byte b1 = BitVectorHelper.getBitsFromCurrentByte(this.validityBuffer, + firstByteSource + byteSizeTarget - 1, offset); + target.validityBuffer.setByte(byteSizeTarget - 1, b1); + } + } + + /** + * In split and transfer, the view buffer and the data buffer will be allocated. + * Then the values will be copied from the source vector to the target vector. + * Allocation and setting are preferred over transfer + * since the buf index and buf offset needs to be overwritten + * when large strings are added. + * @param startIndex starting index + * @param length number of elements to be copied + * @param target target vector + */ + private void splitAndTransferViewBufferAndDataBuffer(int startIndex, int length, + BaseVariableWidthViewVector target) { + if (length == 0) { + return; + } + + if (target.viewBuffer != null) { + target.viewBuffer.getReferenceManager().release(); + } + + // allocate target view buffer + target.viewBuffer = target.allocator.buffer(length * ELEMENT_SIZE); + + for (int i = startIndex; i < startIndex + length; i++) { + final int stringLength = getValueLength(i); + + // keeping track of writing index in the target view buffer + int writePosition = (i - startIndex) * ELEMENT_SIZE; + // keeping track of reading index in the source view buffer + int readPosition = i * ELEMENT_SIZE; + + // set length + target.viewBuffer.setInt(writePosition, stringLength); + + if (stringLength <= INLINE_SIZE) { + // handle inline buffer + writePosition += LENGTH_WIDTH; + readPosition += LENGTH_WIDTH; + // set data by copying the required portion from the source buffer + target.viewBuffer.setBytes(writePosition, viewBuffer, readPosition, stringLength); + } else { + // handle non-inline buffer + final int readBufIndex = viewBuffer.getInt(((long) i * ELEMENT_SIZE) + + LENGTH_WIDTH + PREFIX_WIDTH); + final int readBufOffset = viewBuffer.getInt(((long) i * ELEMENT_SIZE) + + LENGTH_WIDTH + PREFIX_WIDTH + BUF_INDEX_WIDTH); + final ArrowBuf dataBuf = dataBuffers.get(readBufIndex); + + // allocate data buffer + ArrowBuf currentDataBuf = target.allocateOrGetLastDataBuffer(stringLength); + final long currentOffSet = currentDataBuf.writerIndex(); + + writePosition += LENGTH_WIDTH; + readPosition += LENGTH_WIDTH; + // set prefix + target.viewBuffer.setBytes(writePosition, viewBuffer, readPosition, PREFIX_WIDTH); + writePosition += PREFIX_WIDTH; + // set buf id + target.viewBuffer.setInt(writePosition, target.dataBuffers.size() - 1); + writePosition += BUF_INDEX_WIDTH; + // set offset + target.viewBuffer.setInt(writePosition, (int) currentOffSet); + + currentDataBuf.setBytes(currentOffSet, dataBuf, readBufOffset, stringLength); + currentDataBuf.writerIndex(currentOffSet + stringLength); + } + } } /*----------------------------------------------------------------* @@ -972,7 +1132,7 @@ public void setValueLengthSafe(int index, int length) { } /** - * Get the variable length element at specified index as Text. + * Get the length of the element at specified index. * * @param index position of an element to get * @return greater than length 0 for a non-null element, 0 otherwise diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java index d35bf9e4b513b..400f8cb1fc2e0 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ViewVarCharVector.java @@ -257,9 +257,7 @@ public void validateScalars() { */ @Override public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - // TODO: https://github.com/apache/arrow/issues/40932 - throw new UnsupportedOperationException( - "ViewVarCharVector does not support getTransferPair(String, BufferAllocator)"); + return new TransferImpl(ref, allocator); } /** @@ -271,21 +269,53 @@ public TransferPair getTransferPair(String ref, BufferAllocator allocator) { */ @Override public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - // TODO: https://github.com/apache/arrow/issues/40932 - throw new UnsupportedOperationException( - "ViewVarCharVector does not support getTransferPair(Field, BufferAllocator)"); + return new TransferImpl(field, allocator); } /** * Construct a TransferPair with a desired target vector of the same type. * - * @param target the target for the transfer + * @param to the target for the transfer * @return {@link TransferPair} (UnsupportedOperationException) */ @Override - public TransferPair makeTransferPair(ValueVector target) { - // TODO: https://github.com/apache/arrow/issues/40932 - throw new UnsupportedOperationException( - "ViewVarCharVector does not support makeTransferPair(ValueVector)"); + public TransferPair makeTransferPair(ValueVector to) { + return new TransferImpl((ViewVarCharVector) to); + } + + private class TransferImpl implements TransferPair { + ViewVarCharVector to; + + public TransferImpl(String ref, BufferAllocator allocator) { + to = new ViewVarCharVector(ref, field.getFieldType(), allocator); + } + + public TransferImpl(Field field, BufferAllocator allocator) { + to = new ViewVarCharVector(field, allocator); + } + + public TransferImpl(ViewVarCharVector to) { + this.to = to; + } + + @Override + public ViewVarCharVector getTo() { + return to; + } + + @Override + public void transfer() { + transferTo(to); + } + + @Override + public void splitAndTransfer(int startIndex, int length) { + splitAndTransferTo(startIndex, length, to); + } + + @Override + public void copyValueSafe(int fromIndex, int toIndex) { + to.copyFromSafe(fromIndex, toIndex, ViewVarCharVector.this); + } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java index 396f5665e0382..d2c03930ca37a 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java @@ -17,10 +17,11 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.StandardCharsets; import java.util.HashMap; @@ -38,20 +39,19 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Struct; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestSplitAndTransfer { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -67,6 +67,17 @@ private void populateVarcharVector(final VarCharVector vector, int valueCount, S vector.setValueCount(valueCount); } + private void populateViewVarcharVector(final ViewVarCharVector vector, int valueCount, String[] compareArray) { + for (int i = 0; i < valueCount; i += 3) { + final String s = String.format("%010d", i); + vector.set(i, s.getBytes(StandardCharsets.UTF_8)); + if (compareArray != null) { + compareArray[i] = s; + } + } + vector.setValueCount(valueCount); + } + private void populateIntVector(final IntVector vector, int valueCount) { for (int i = 0; i < valueCount; i++) { vector.set(i, i); @@ -109,6 +120,11 @@ public void testWithEmptyVector() { transferPair = varCharVector.getTransferPair(allocator); transferPair.splitAndTransfer(0, 0); assertEquals(0, transferPair.getTo().getValueCount()); + // BaseVariableWidthViewVector + ViewVarCharVector viewVarCharVector = new ViewVarCharVector("", allocator); + transferPair = viewVarCharVector.getTransferPair(allocator); + transferPair.splitAndTransfer(0, 0); + assertEquals(0, transferPair.getTo().getValueCount()); // BaseLargeVariableWidthVector LargeVarCharVector largeVarCharVector = new LargeVarCharVector("", allocator); transferPair = largeVarCharVector.getTransferPair(allocator); @@ -209,6 +225,39 @@ public void test() throws Exception { } } + @Test + public void testView() throws Exception { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + viewVarCharVector.allocateNew(10000, 1000); + + final int valueCount = 500; + final String[] compareArray = new String[valueCount]; + + populateViewVarcharVector(viewVarCharVector, valueCount, compareArray); + + final TransferPair tp = viewVarCharVector.getTransferPair(allocator); + final ViewVarCharVector newViewVarCharVector = (ViewVarCharVector) tp.getTo(); + final int[][] startLengths = {{0, 201}, {201, 0}, {201, 200}, {401, 99}}; + + for (final int[] startLength : startLengths) { + final int start = startLength[0]; + final int length = startLength[1]; + tp.splitAndTransfer(start, length); + for (int i = 0; i < length; i++) { + final boolean expectedSet = ((start + i) % 3) == 0; + if (expectedSet) { + final byte[] expectedValue = compareArray[start + i].getBytes(StandardCharsets.UTF_8); + assertFalse(newViewVarCharVector.isNull(i)); + assertArrayEquals(expectedValue, newViewVarCharVector.get(i)); + } else { + assertTrue(newViewVarCharVector.isNull(i)); + } + } + newViewVarCharVector.clear(); + } + } + } + @Test public void testMemoryConstrainedTransfer() { try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) { @@ -233,6 +282,38 @@ public void testMemoryConstrainedTransfer() { } } + @Test + public void testMemoryConstrainedTransferInViews() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + // Here we have the target vector being transferred with a long string + // hence, the data buffer will be allocated. + // The default data buffer allocation takes + // BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * BaseVariableWidthViewVector.ELEMENT_SIZE + // set limit = BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * + // BaseVariableWidthViewVector.ELEMENT_SIZE + final int setLimit = BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * + BaseVariableWidthViewVector.ELEMENT_SIZE; + allocator.setLimit(setLimit); + + viewVarCharVector.allocateNew(16000, 1000); + + final int valueCount = 1000; + + populateViewVarcharVector(viewVarCharVector, valueCount, null); + + final TransferPair tp = viewVarCharVector.getTransferPair(allocator); + final ViewVarCharVector newViewVarCharVector = (ViewVarCharVector) tp.getTo(); + final int[][] startLengths = {{0, 700}, {700, 299}}; + + for (final int[] startLength : startLengths) { + final int start = startLength[0]; + final int length = startLength[1]; + tp.splitAndTransfer(start, length); + newViewVarCharVector.clear(); + } + } + } + @Test public void testTransfer() { try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) { @@ -264,6 +345,37 @@ public void testTransfer() { } } + @Test + public void testTransferInViews() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + viewVarCharVector.allocateNew(16000, 1000); + + final int valueCount = 500; + final String[] compareArray = new String[valueCount]; + populateViewVarcharVector(viewVarCharVector, valueCount, compareArray); + + final TransferPair tp = viewVarCharVector.getTransferPair(allocator); + final ViewVarCharVector newViewVarCharVector = (ViewVarCharVector) tp.getTo(); + tp.transfer(); + + assertEquals(0, viewVarCharVector.valueCount); + assertEquals(valueCount, newViewVarCharVector.valueCount); + + for (int i = 0; i < valueCount; i++) { + final boolean expectedSet = (i % 3) == 0; + if (expectedSet) { + final byte[] expectedValue = compareArray[i].getBytes(StandardCharsets.UTF_8); + assertFalse(newViewVarCharVector.isNull(i)); + assertArrayEquals(expectedValue, newViewVarCharVector.get(i)); + } else { + assertTrue(newViewVarCharVector.isNull(i)); + } + } + + newViewVarCharVector.clear(); + } + } + @Test public void testCopyValueSafe() { try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator); @@ -312,6 +424,24 @@ public void testSplitAndTransferNon() { } } + @Test + public void testSplitAndTransferNonInViews() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + + viewVarCharVector.allocateNew(16000, 1000); + final int valueCount = 500; + populateViewVarcharVector(viewVarCharVector, valueCount, null); + + final TransferPair tp = viewVarCharVector.getTransferPair(allocator); + ViewVarCharVector newViewVarCharVector = (ViewVarCharVector) tp.getTo(); + + tp.splitAndTransfer(0, 0); + assertEquals(0, newViewVarCharVector.getValueCount()); + + newViewVarCharVector.clear(); + } + } + @Test public void testSplitAndTransferAll() { try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator)) { @@ -330,6 +460,24 @@ public void testSplitAndTransferAll() { } } + @Test + public void testSplitAndTransferAllInViews() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + + viewVarCharVector.allocateNew(16000, 1000); + final int valueCount = 500; + populateViewVarcharVector(viewVarCharVector, valueCount, null); + + final TransferPair tp = viewVarCharVector.getTransferPair(allocator); + ViewVarCharVector newViewVarCharVector = (ViewVarCharVector) tp.getTo(); + + tp.splitAndTransfer(0, valueCount); + assertEquals(valueCount, newViewVarCharVector.getValueCount()); + + newViewVarCharVector.clear(); + } + } + @Test public void testInvalidStartIndex() { try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator); @@ -341,7 +489,7 @@ public void testInvalidStartIndex() { final TransferPair tp = varCharVector.makeTransferPair(newVarCharVector); - IllegalArgumentException e = Assertions.assertThrows( + IllegalArgumentException e = assertThrows( IllegalArgumentException.class, () -> tp.splitAndTransfer(valueCount, 10)); @@ -351,6 +499,27 @@ public void testInvalidStartIndex() { } } + @Test + public void testInvalidStartIndexInViews() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); + final ViewVarCharVector newViewVarCharVector = new ViewVarCharVector("newvector", allocator)) { + + viewVarCharVector.allocateNew(16000, 1000); + final int valueCount = 500; + populateViewVarcharVector(viewVarCharVector, valueCount, null); + + final TransferPair tp = viewVarCharVector.makeTransferPair(newViewVarCharVector); + + IllegalArgumentException e = assertThrows( + IllegalArgumentException.class, + () -> tp.splitAndTransfer(valueCount, 10)); + + assertEquals("Invalid parameters startIndex: 500, length: 10 for valueCount: 500", e.getMessage()); + + newViewVarCharVector.clear(); + } + } + @Test public void testInvalidLength() { try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator); @@ -362,7 +531,7 @@ public void testInvalidLength() { final TransferPair tp = varCharVector.makeTransferPair(newVarCharVector); - IllegalArgumentException e = Assertions.assertThrows( + IllegalArgumentException e = assertThrows( IllegalArgumentException.class, () -> tp.splitAndTransfer(0, valueCount * 2)); @@ -372,6 +541,27 @@ public void testInvalidLength() { } } + @Test + public void testInvalidLengthInViews() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); + final ViewVarCharVector newViewVarCharVector = new ViewVarCharVector("newvector", allocator)) { + + viewVarCharVector.allocateNew(16000, 1000); + final int valueCount = 500; + populateViewVarcharVector(viewVarCharVector, valueCount, null); + + final TransferPair tp = viewVarCharVector.makeTransferPair(newViewVarCharVector); + + IllegalArgumentException e = assertThrows( + IllegalArgumentException.class, + () -> tp.splitAndTransfer(0, valueCount * 2)); + + assertEquals("Invalid parameters startIndex: 0, length: 1000 for valueCount: 500", e.getMessage()); + + newViewVarCharVector.clear(); + } + } + @Test public void testZeroStartIndexAndLength() { try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator); @@ -390,6 +580,24 @@ public void testZeroStartIndexAndLength() { } } + @Test + public void testZeroStartIndexAndLengthInViews() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); + final ViewVarCharVector newViewVarCharVector = new ViewVarCharVector("newvector", allocator)) { + + viewVarCharVector.allocateNew(0, 0); + final int valueCount = 0; + populateViewVarcharVector(viewVarCharVector, valueCount, null); + + final TransferPair tp = viewVarCharVector.makeTransferPair(newViewVarCharVector); + + tp.splitAndTransfer(0, 0); + assertEquals(valueCount, newViewVarCharVector.getValueCount()); + + newViewVarCharVector.clear(); + } + } + @Test public void testZeroLength() { try (final VarCharVector varCharVector = new VarCharVector("myvector", allocator); @@ -408,6 +616,24 @@ public void testZeroLength() { } } + @Test + public void testZeroLengthInViews() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); + final ViewVarCharVector newViewVarCharVector = new ViewVarCharVector("newvector", allocator)) { + + viewVarCharVector.allocateNew(16000, 1000); + final int valueCount = 500; + populateViewVarcharVector(viewVarCharVector, valueCount, null); + + final TransferPair tp = viewVarCharVector.makeTransferPair(newViewVarCharVector); + + tp.splitAndTransfer(500, 0); + assertEquals(0, newViewVarCharVector.getValueCount()); + + newViewVarCharVector.clear(); + } + } + @Test public void testUnionVectorZeroStartIndexAndLength() { try (final UnionVector unionVector = UnionVector.empty("myvector", allocator); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java index 17bc08c7d398c..1ba3bc3576fb2 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java @@ -42,6 +42,7 @@ import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.memory.rounding.DefaultRoundingPolicy; import org.apache.arrow.memory.util.ArrowBufPointer; import org.apache.arrow.memory.util.CommonUtil; import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; @@ -51,6 +52,7 @@ import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.ReusableByteArray; import org.apache.arrow.vector.util.Text; +import org.apache.arrow.vector.util.TransferPair; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -1714,6 +1716,502 @@ public void testCopyFromSafeWithNulls(Function= 4096); + + /* populate the vector */ + for (int i = 0; i < valueCapacity; i++) { + if ((i & 1) == 1) { + vector.set(i, STR1); + } else { + vector.set(i, STR2); + } + } + + /* Check the vector output */ + for (int i = 0; i < valueCapacity; i++) { + if ((i & 1) == 1) { + assertArrayEquals(STR1, vector.get(i)); + } else { + assertArrayEquals(STR2, vector.get(i)); + } + } + + /* trigger first realloc */ + vector.setSafe(valueCapacity, STR2, 0, STR2.length); + assertTrue(vector.getValueCapacity() >= 2 * valueCapacity); + while (vector.getByteCapacity() < bytesPerRecord * vector.getValueCapacity()) { + vector.reallocViewBuffer(); + vector.reallocViewDataBuffer(); + } + + /* populate the remaining vector */ + for (int i = valueCapacity; i < vector.getValueCapacity(); i++) { + if ((i & 1) == 1) { + vector.set(i, STR1); + } else { + vector.set(i, STR2); + } + } + + /* Check the vector output */ + valueCapacity = vector.getValueCapacity(); + for (int i = 0; i < valueCapacity; i++) { + if ((i & 1) == 1) { + assertArrayEquals(STR1, vector.get(i)); + } else { + assertArrayEquals(STR2, vector.get(i)); + } + } + + /* trigger second realloc */ + vector.setSafe(valueCapacity + bytesPerRecord, STR2, 0, STR2.length); + assertTrue(vector.getValueCapacity() >= 2 * valueCapacity); + while (vector.getByteCapacity() < bytesPerRecord * vector.getValueCapacity()) { + vector.reallocViewBuffer(); + vector.reallocViewDataBuffer(); + } + + /* populate the remaining vector */ + for (int i = valueCapacity; i < vector.getValueCapacity(); i++) { + if ((i & 1) == 1) { + vector.set(i, STR1); + } else { + vector.set(i, STR2); + } + } + + /* Check the vector output */ + valueCapacity = vector.getValueCapacity(); + for (int i = 0; i < valueCapacity; i++) { + if ((i & 1) == 1) { + assertArrayEquals(STR1, vector.get(i)); + } else { + assertArrayEquals(STR2, vector.get(i)); + } + } + + /* We are potentially working with 4x the size of vector buffer + * that we initially started with. + * Now let's transfer the vector. + */ + + TransferPair transferPair = vector.getTransferPair(allocator); + transferPair.transfer(); + ViewVarCharVector toVector = (ViewVarCharVector) transferPair.getTo(); + valueCapacity = toVector.getValueCapacity(); + + for (int i = 0; i < valueCapacity; i++) { + if ((i & 1) == 1) { + assertArrayEquals(STR1, toVector.get(i)); + } else { + assertArrayEquals(STR2, toVector.get(i)); + } + } + + toVector.close(); + } + } + + /** + * ARROW-7831: + * ensures that data is transferred from one allocator to another in case of 0-index + * start special cases. + * With long strings and multiple data buffers. + * Check multi-data buffer source copying + */ + @Test + public void testSplitAndTransfer9() { + try (final ViewVarCharVector targetVector = new ViewVarCharVector("target", allocator)) { + String str4 = generateRandomString(35); + try (final ViewVarCharVector sourceVector = new ViewVarCharVector("source", allocator)) { + sourceVector.allocateNew(48, 4); + + sourceVector.set(0, STR1); + sourceVector.set(1, STR2); + sourceVector.set(2, STR3); + sourceVector.set(3, str4.getBytes(StandardCharsets.UTF_8)); + sourceVector.setValueCount(4); + + // we should have multiple data buffers + assertTrue(sourceVector.getDataBuffers().size() > 1); + + final long allocatedMem = allocator.getAllocatedMemory(); + final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); + final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); + + // split and transfer with slice starting at the beginning: + // this should not allocate anything new + sourceVector.splitAndTransferTo(1, 3, targetVector); + // we allocate view and data buffers for the target vector + assertTrue(allocatedMem < allocator.getAllocatedMemory()); + + // the refcnts of each buffer for this test should be the same as what + // the source allocator ended up with. + assertEquals(validityRefCnt, sourceVector.getValidityBuffer().refCnt()); + // since the new view buffer is allocated, the refcnt is the same as the source vector. + assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); + + assertArrayEquals(STR1, sourceVector.get(0)); + assertArrayEquals(STR2, sourceVector.get(1)); + assertArrayEquals(STR3, sourceVector.get(2)); + assertArrayEquals(str4.getBytes(StandardCharsets.UTF_8), sourceVector.get(3)); + } + assertArrayEquals(STR2, targetVector.get(0)); + assertArrayEquals(STR3, targetVector.get(1)); + assertArrayEquals(str4.getBytes(StandardCharsets.UTF_8), targetVector.get(2)); + } + } + private String generateRandomString(int length) { Random random = new Random(); StringBuilder sb = new StringBuilder(length); From 052c330de128951e2bb8202c95b5927916be66d0 Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Fri, 31 May 2024 11:54:45 +0800 Subject: [PATCH 201/261] GH-41836: [Java] Fix an undefined symbol error when ARROW_S3=OFF (#41837) ### Rationale for this change Fix undefined symbol error reported at runtime after ARROW_S3=OFF is used in compiling arrow Java. ### What changes are included in this PR? ### Are these changes tested? Tested in my local. Not sure whether we need to add some test. ### Are there any user-facing changes? No. * GitHub Issue: #41836 Authored-by: PHILO-HE Signed-off-by: Sutou Kouhei --- java/dataset/src/main/cpp/jni_wrapper.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/java/dataset/src/main/cpp/jni_wrapper.cc b/java/dataset/src/main/cpp/jni_wrapper.cc index 19a43c8d2fa41..79efbeb74fc54 100644 --- a/java/dataset/src/main/cpp/jni_wrapper.cc +++ b/java/dataset/src/main/cpp/jni_wrapper.cc @@ -25,9 +25,8 @@ #include "arrow/c/helpers.h" #include "arrow/dataset/api.h" #include "arrow/dataset/file_base.h" -#include "arrow/filesystem/localfs.h" +#include "arrow/filesystem/api.h" #include "arrow/filesystem/path_util.h" -#include "arrow/filesystem/s3fs.h" #include "arrow/engine/substrait/util.h" #include "arrow/engine/substrait/serde.h" #include "arrow/engine/substrait/relation.h" @@ -660,7 +659,9 @@ JNIEXPORT void JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_releaseBuffe JNIEXPORT void JNICALL Java_org_apache_arrow_dataset_jni_JniWrapper_ensureS3Finalized( JNIEnv* env, jobject) { JNI_METHOD_START +#ifdef ARROW_S3 JniAssertOkOrThrow(arrow::fs::EnsureS3Finalized()); +#endif JNI_METHOD_END() } From 31fe24dd3345d387ba52d46c2915a909a5667813 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 31 May 2024 09:48:54 +0200 Subject: [PATCH 202/261] GH-41126: [Python] Basic bindings for Device and MemoryManager classes (#41685) ### Rationale for this change Add bindings for the C++ `arrow::Device` and `arrow::MemoryManager` classes. ### What changes are included in this PR? Basic bindings by adding the `pyarrow.Device` and `pyarrow.MemoryManager` classes, and just tested for CPU. What is not included here are additional methods on the `MemoryManager` class (eg to allocate or copy buffers), and this is also not yet tested for CUDA. Planning to do this as follow-ups, and first doing those basic bindings should enable further enhancements to be done in parallel. ### Are these changes tested? Yes, for the CPU device only. * GitHub Issue: #41126 Authored-by: Joris Van den Bossche Signed-off-by: Joris Van den Bossche --- python/pyarrow/__init__.py | 3 + python/pyarrow/device.pxi | 162 +++++++++++++++++++++++++++ python/pyarrow/includes/libarrow.pxd | 35 ++++++ python/pyarrow/io.pxi | 33 ++++++ python/pyarrow/lib.pxd | 20 ++++ python/pyarrow/lib.pyx | 3 + python/pyarrow/tests/test_device.py | 43 +++++++ python/pyarrow/tests/test_misc.py | 2 + 8 files changed, 301 insertions(+) create mode 100644 python/pyarrow/device.pxi create mode 100644 python/pyarrow/tests/test_device.py diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index 936f4736977c8..e52e0d242bee5 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -236,6 +236,9 @@ def print_entry(label, value): RunEndEncodedScalar, ExtensionScalar) # Buffers, allocation +from pyarrow.lib import (DeviceAllocationType, Device, MemoryManager, + default_cpu_memory_manager) + from pyarrow.lib import (Buffer, ResizableBuffer, foreign_buffer, py_buffer, Codec, compress, decompress, allocate_buffer) diff --git a/python/pyarrow/device.pxi b/python/pyarrow/device.pxi new file mode 100644 index 0000000000000..6e6034752085a --- /dev/null +++ b/python/pyarrow/device.pxi @@ -0,0 +1,162 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True + + +cpdef enum DeviceAllocationType: + CPU = CDeviceAllocationType_kCPU + CUDA = CDeviceAllocationType_kCUDA + CUDA_HOST = CDeviceAllocationType_kCUDA_HOST + OPENCL = CDeviceAllocationType_kOPENCL + VULKAN = CDeviceAllocationType_kVULKAN + METAL = CDeviceAllocationType_kMETAL + VPI = CDeviceAllocationType_kVPI + ROCM = CDeviceAllocationType_kROCM + ROCM_HOST = CDeviceAllocationType_kROCM_HOST + EXT_DEV = CDeviceAllocationType_kEXT_DEV + CUDA_MANAGED = CDeviceAllocationType_kCUDA_MANAGED + ONEAPI = CDeviceAllocationType_kONEAPI + WEBGPU = CDeviceAllocationType_kWEBGPU + HEXAGON = CDeviceAllocationType_kHEXAGON + + +cdef object _wrap_device_allocation_type(CDeviceAllocationType device_type): + return DeviceAllocationType( device_type) + + +cdef class Device(_Weakrefable): + """ + Abstract interface for hardware devices + + This object represents a device with access to some memory spaces. + When handling a Buffer or raw memory address, it allows deciding in which + context the raw memory address should be interpreted + (e.g. CPU-accessible memory, or embedded memory on some particular GPU). + """ + + def __init__(self): + raise TypeError("Do not call Device's constructor directly, " + "use the device attribute of the MemoryManager instead.") + + cdef void init(self, const shared_ptr[CDevice]& device): + self.device = device + + @staticmethod + cdef wrap(const shared_ptr[CDevice]& device): + cdef Device self = Device.__new__(Device) + self.init(device) + return self + + def __eq__(self, other): + if not isinstance(other, Device): + return False + return self.device.get().Equals(deref((other).device.get())) + + def __repr__(self): + return "".format(frombytes(self.device.get().ToString())) + + @property + def type_name(self): + """ + A shorthand for this device's type. + """ + return frombytes(self.device.get().type_name()) + + @property + def device_id(self): + """ + A device ID to identify this device if there are multiple of this type. + + If there is no "device_id" equivalent (such as for the main CPU device on + non-numa systems) returns -1. + """ + return self.device.get().device_id() + + @property + def is_cpu(self): + """ + Whether this device is the main CPU device. + + This shorthand method is very useful when deciding whether a memory address + is CPU-accessible. + """ + return self.device.get().is_cpu() + + @property + def device_type(self): + """ + Return the DeviceAllocationType of this device. + """ + return _wrap_device_allocation_type(self.device.get().device_type()) + + +cdef class MemoryManager(_Weakrefable): + """ + An object that provides memory management primitives. + + A MemoryManager is always tied to a particular Device instance. + It can also have additional parameters (such as a MemoryPool to + allocate CPU memory). + + """ + + def __init__(self): + raise TypeError("Do not call MemoryManager's constructor directly, " + "use pyarrow.default_cpu_memory_manager() instead.") + + cdef void init(self, const shared_ptr[CMemoryManager]& mm): + self.memory_manager = mm + + @staticmethod + cdef wrap(const shared_ptr[CMemoryManager]& mm): + cdef MemoryManager self = MemoryManager.__new__(MemoryManager) + self.init(mm) + return self + + def __repr__(self): + return "".format( + frombytes(self.memory_manager.get().device().get().ToString()) + ) + + @property + def device(self): + """ + The device this MemoryManager is tied to. + """ + return Device.wrap(self.memory_manager.get().device()) + + @property + def is_cpu(self): + """ + Whether this MemoryManager is tied to the main CPU device. + + This shorthand method is very useful when deciding whether a memory + address is CPU-accessible. + """ + return self.memory_manager.get().is_cpu() + + +def default_cpu_memory_manager(): + """ + Return the default CPU MemoryManager instance. + + The returned singleton instance uses the default MemoryPool. + """ + return MemoryManager.wrap(c_default_cpu_memory_manager()) diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 8bfc31edc747d..a66f584b83f5b 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -316,6 +316,38 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: cdef cppclass CProxyMemoryPool" arrow::ProxyMemoryPool"(CMemoryPool): CProxyMemoryPool(CMemoryPool*) + ctypedef enum CDeviceAllocationType "arrow::DeviceAllocationType": + CDeviceAllocationType_kCPU "arrow::DeviceAllocationType::kCPU" + CDeviceAllocationType_kCUDA "arrow::DeviceAllocationType::kCUDA" + CDeviceAllocationType_kCUDA_HOST "arrow::DeviceAllocationType::kCUDA_HOST" + CDeviceAllocationType_kOPENCL "arrow::DeviceAllocationType::kOPENCL" + CDeviceAllocationType_kVULKAN "arrow::DeviceAllocationType::kVULKAN" + CDeviceAllocationType_kMETAL "arrow::DeviceAllocationType::kMETAL" + CDeviceAllocationType_kVPI "arrow::DeviceAllocationType::kVPI" + CDeviceAllocationType_kROCM "arrow::DeviceAllocationType::kROCM" + CDeviceAllocationType_kROCM_HOST "arrow::DeviceAllocationType::kROCM_HOST" + CDeviceAllocationType_kEXT_DEV "arrow::DeviceAllocationType::kEXT_DEV" + CDeviceAllocationType_kCUDA_MANAGED "arrow::DeviceAllocationType::kCUDA_MANAGED" + CDeviceAllocationType_kONEAPI "arrow::DeviceAllocationType::kONEAPI" + CDeviceAllocationType_kWEBGPU "arrow::DeviceAllocationType::kWEBGPU" + CDeviceAllocationType_kHEXAGON "arrow::DeviceAllocationType::kHEXAGON" + + cdef cppclass CDevice" arrow::Device": + const char* type_name() + c_string ToString() + c_bool Equals(const CDevice& other) + int64_t device_id() + c_bool is_cpu() const + shared_ptr[CMemoryManager] default_memory_manager() + CDeviceAllocationType device_type() + + cdef cppclass CMemoryManager" arrow::MemoryManager": + const shared_ptr[CDevice] device() + c_bool is_cpu() const + + shared_ptr[CMemoryManager] c_default_cpu_memory_manager \ + " arrow::default_cpu_memory_manager"() + cdef cppclass CBuffer" arrow::Buffer": CBuffer(const uint8_t* data, int64_t size) const uint8_t* data() @@ -328,6 +360,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: c_bool is_mutable() const c_string ToHexString() c_bool Equals(const CBuffer& other) + shared_ptr[CDevice] device() + const shared_ptr[CMemoryManager] memory_manager() + CDeviceAllocationType device_type() CResult[shared_ptr[CBuffer]] SliceBufferSafe( const shared_ptr[CBuffer]& buffer, int64_t offset) diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index 9e8026deb435c..48b7934209c3a 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -1327,6 +1327,39 @@ cdef class Buffer(_Weakrefable): """ return self.buffer.get().is_cpu() + @property + def device(self): + """ + The device where the buffer resides. + + Returns + ------- + Device + """ + return Device.wrap(self.buffer.get().device()) + + @property + def memory_manager(self): + """ + The memory manager associated with the buffer. + + Returns + ------- + MemoryManager + """ + return MemoryManager.wrap(self.buffer.get().memory_manager()) + + @property + def device_type(self): + """ + The device type where the buffer resides. + + Returns + ------- + DeviceAllocationType + """ + return _wrap_device_allocation_type(self.buffer.get().device_type()) + @property def parent(self): cdef shared_ptr[CBuffer] parent_buf = self.buffer.get().parent() diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd index bfd266a807c40..1bc639cc8d2ba 100644 --- a/python/pyarrow/lib.pxd +++ b/python/pyarrow/lib.pxd @@ -524,6 +524,26 @@ cdef class RecordBatch(_Tabular): cdef void init(self, const shared_ptr[CRecordBatch]& table) +cdef class Device(_Weakrefable): + cdef: + shared_ptr[CDevice] device + + cdef void init(self, const shared_ptr[CDevice]& device) + + @staticmethod + cdef wrap(const shared_ptr[CDevice]& device) + + +cdef class MemoryManager(_Weakrefable): + cdef: + shared_ptr[CMemoryManager] memory_manager + + cdef void init(self, const shared_ptr[CMemoryManager]& memory_manager) + + @staticmethod + cdef wrap(const shared_ptr[CMemoryManager]& mm) + + cdef class Buffer(_Weakrefable): cdef: shared_ptr[CBuffer] buffer diff --git a/python/pyarrow/lib.pyx b/python/pyarrow/lib.pyx index 3245e50f0fe69..904e018ffddcc 100644 --- a/python/pyarrow/lib.pyx +++ b/python/pyarrow/lib.pyx @@ -162,6 +162,9 @@ include "pandas-shim.pxi" # Memory pools and allocation include "memory.pxi" +# Device type and memory manager +include "device.pxi" + # DataType, Field, Schema include "types.pxi" diff --git a/python/pyarrow/tests/test_device.py b/python/pyarrow/tests/test_device.py new file mode 100644 index 0000000000000..6bdb015be1a95 --- /dev/null +++ b/python/pyarrow/tests/test_device.py @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import pyarrow as pa + + +def test_device_memory_manager(): + mm = pa.default_cpu_memory_manager() + assert mm.is_cpu + device = mm.device + assert device.is_cpu + assert device.device_id == -1 + assert device.device_type == pa.DeviceAllocationType.CPU + assert device.type_name == "arrow::CPUDevice" + assert device == device + assert repr(device) == "" + assert repr(mm) == "" + + +def test_buffer_device(): + arr = pa.array([0, 1, 2]) + buf = arr.buffers()[1] + assert buf.device_type == pa.DeviceAllocationType.CPU + assert isinstance(buf.device, pa.Device) + assert isinstance(buf.memory_manager, pa.MemoryManager) + assert buf.is_cpu + assert buf.device.is_cpu + assert buf.device == pa.default_cpu_memory_manager().device + assert buf.memory_manager.is_cpu diff --git a/python/pyarrow/tests/test_misc.py b/python/pyarrow/tests/test_misc.py index 39dac4eb81dfb..308c37fd0de1e 100644 --- a/python/pyarrow/tests/test_misc.py +++ b/python/pyarrow/tests/test_misc.py @@ -242,6 +242,8 @@ def test_set_timezone_db_path_non_windows(): pa.MemoryPool, pa.LoggingMemoryPool, pa.ProxyMemoryPool, + pa.Device, + pa.MemoryManager, ]) def test_extension_type_constructor_errors(klass): # ARROW-2638: prevent calling extension class constructors directly From 255dbf990c3d3e5fb1270a2a11efe0af2be195ab Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Fri, 31 May 2024 10:09:54 +0200 Subject: [PATCH 203/261] GH-41684: [C++][Python] Add optional null_bitmap to MapArray::FromArrays (#41757) ### Rationale for this change When constructing a `MapArray` with `FromArrays` one can not supply a `null_bitmap`. ### What changes are included in this PR? Optional `null_bitmap` argument is added to `MapArray::FromArrays`. ### Are these changes tested? TODO (have them locally, need to clean them up and commit. ### Are there any user-facing changes? No. * GitHub Issue: #41684 Authored-by: AlenkaF Signed-off-by: Joris Van den Bossche --- cpp/src/arrow/array/array_list_test.cc | 17 ++++++++++ cpp/src/arrow/array/array_nested.cc | 45 ++++++++++++++++++-------- cpp/src/arrow/array/array_nested.h | 9 ++++-- python/pyarrow/array.pxi | 11 +++++-- python/pyarrow/includes/libarrow.pxd | 8 +++-- python/pyarrow/tests/test_array.py | 34 +++++++++++++++++++ 6 files changed, 102 insertions(+), 22 deletions(-) diff --git a/cpp/src/arrow/array/array_list_test.cc b/cpp/src/arrow/array/array_list_test.cc index e79ce6fe172b2..55f91dc34167b 100644 --- a/cpp/src/arrow/array/array_list_test.cc +++ b/cpp/src/arrow/array/array_list_test.cc @@ -1368,6 +1368,23 @@ TEST_F(TestMapArray, FromArrays) { ASSERT_EQ(keys_with_null->length(), tmp_items->length()); ASSERT_RAISES(Invalid, MapArray::FromArrays(offsets1, keys_with_null, tmp_items, pool_)); + + // With null_bitmap + ASSERT_OK_AND_ASSIGN(auto map7, MapArray::FromArrays(offsets1, keys, items, pool_, + offsets3->data()->buffers[0])); + ASSERT_OK(map7->Validate()); + MapArray expected7(map_type, length, offsets1->data()->buffers[1], keys, items, + offsets3->data()->buffers[0], 1); + AssertArraysEqual(expected7, *map7); + + // Null bitmap and offset with null + ASSERT_RAISES(Invalid, MapArray::FromArrays(offsets3, keys, items, pool_, + offsets3->data()->buffers[0])); + + // Null bitmap and offset with offset + ASSERT_RAISES(NotImplemented, + MapArray::FromArrays(offsets3->Slice(2), keys, items, pool_, + offsets3->data()->buffers[0])); } TEST_F(TestMapArray, FromArraysEquality) { diff --git a/cpp/src/arrow/array/array_nested.cc b/cpp/src/arrow/array/array_nested.cc index 67a499c2b8277..bb5c6bf018006 100644 --- a/cpp/src/arrow/array/array_nested.cc +++ b/cpp/src/arrow/array/array_nested.cc @@ -807,7 +807,7 @@ MapArray::MapArray(const std::shared_ptr& type, int64_t length, Result> MapArray::FromArraysInternal( std::shared_ptr type, const std::shared_ptr& offsets, const std::shared_ptr& keys, const std::shared_ptr& items, - MemoryPool* pool) { + MemoryPool* pool, const std::shared_ptr& null_bitmap) { using offset_type = typename MapType::offset_type; using OffsetArrowType = typename CTypeTraits::ArrowType; @@ -827,6 +827,15 @@ Result> MapArray::FromArraysInternal( return Status::Invalid("Map key and item arrays must be equal length"); } + if (null_bitmap != nullptr && offsets->null_count() > 0) { + return Status::Invalid( + "Ambiguous to specify both validity map and offsets with nulls"); + } + + if (null_bitmap != nullptr && offsets->offset() != 0) { + return Status::NotImplemented("Null bitmap with offsets slice not supported."); + } + if (offsets->null_count() > 0) { ARROW_ASSIGN_OR_RAISE(auto buffers, CleanListOffsets(NULLPTR, *offsets, pool)); @@ -836,24 +845,32 @@ Result> MapArray::FromArraysInternal( using OffsetArrayType = typename TypeTraits::ArrayType; const auto& typed_offsets = checked_cast(*offsets); - auto buffers = BufferVector({nullptr, typed_offsets.values()}); + + BufferVector buffers; + int64_t null_count; + if (null_bitmap != nullptr) { + buffers = BufferVector({std::move(null_bitmap), typed_offsets.values()}); + null_count = null_bitmap->size(); + } else { + buffers = BufferVector({null_bitmap, typed_offsets.values()}); + null_count = 0; + } return std::make_shared(type, offsets->length() - 1, std::move(buffers), keys, - items, /*null_count=*/0, offsets->offset()); + items, /*null_count=*/null_count, offsets->offset()); } -Result> MapArray::FromArrays(const std::shared_ptr& offsets, - const std::shared_ptr& keys, - const std::shared_ptr& items, - MemoryPool* pool) { +Result> MapArray::FromArrays( + const std::shared_ptr& offsets, const std::shared_ptr& keys, + const std::shared_ptr& items, MemoryPool* pool, + const std::shared_ptr& null_bitmap) { return FromArraysInternal(std::make_shared(keys->type(), items->type()), - offsets, keys, items, pool); + offsets, keys, items, pool, null_bitmap); } -Result> MapArray::FromArrays(std::shared_ptr type, - const std::shared_ptr& offsets, - const std::shared_ptr& keys, - const std::shared_ptr& items, - MemoryPool* pool) { +Result> MapArray::FromArrays( + std::shared_ptr type, const std::shared_ptr& offsets, + const std::shared_ptr& keys, const std::shared_ptr& items, + MemoryPool* pool, const std::shared_ptr& null_bitmap) { if (type->id() != Type::MAP) { return Status::TypeError("Expected map type, got ", type->ToString()); } @@ -864,7 +881,7 @@ Result> MapArray::FromArrays(std::shared_ptr ty if (!map_type.item_type()->Equals(items->type())) { return Status::TypeError("Mismatching map items type"); } - return FromArraysInternal(std::move(type), offsets, keys, items, pool); + return FromArraysInternal(std::move(type), offsets, keys, items, pool, null_bitmap); } Status MapArray::ValidateChildData( diff --git a/cpp/src/arrow/array/array_nested.h b/cpp/src/arrow/array/array_nested.h index 5744f5fcadf05..f96b6bd3b1346 100644 --- a/cpp/src/arrow/array/array_nested.h +++ b/cpp/src/arrow/array/array_nested.h @@ -532,15 +532,18 @@ class ARROW_EXPORT MapArray : public ListArray { /// \param[in] keys Array containing key values /// \param[in] items Array containing item values /// \param[in] pool MemoryPool in case new offsets array needs to be + /// \param[in] null_bitmap Optional validity bitmap /// allocated because of null values static Result> FromArrays( const std::shared_ptr& offsets, const std::shared_ptr& keys, - const std::shared_ptr& items, MemoryPool* pool = default_memory_pool()); + const std::shared_ptr& items, MemoryPool* pool = default_memory_pool(), + const std::shared_ptr& null_bitmap = NULLPTR); static Result> FromArrays( std::shared_ptr type, const std::shared_ptr& offsets, const std::shared_ptr& keys, const std::shared_ptr& items, - MemoryPool* pool = default_memory_pool()); + MemoryPool* pool = default_memory_pool(), + const std::shared_ptr& null_bitmap = NULLPTR); const MapType* map_type() const { return map_type_; } @@ -560,7 +563,7 @@ class ARROW_EXPORT MapArray : public ListArray { static Result> FromArraysInternal( std::shared_ptr type, const std::shared_ptr& offsets, const std::shared_ptr& keys, const std::shared_ptr& items, - MemoryPool* pool); + MemoryPool* pool, const std::shared_ptr& null_bitmap = NULLPTR); private: const MapType* map_type_; diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 406830ad4dd69..3c26e85887466 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -3060,7 +3060,7 @@ cdef class MapArray(ListArray): """ @staticmethod - def from_arrays(offsets, keys, items, DataType type=None, MemoryPool pool=None): + def from_arrays(offsets, keys, items, DataType type=None, MemoryPool pool=None, mask=None): """ Construct MapArray from arrays of int32 offsets and key, item arrays. @@ -3072,6 +3072,8 @@ cdef class MapArray(ListArray): type : DataType, optional If not specified, a default MapArray with the keys' and items' type is used. pool : MemoryPool + mask : Array (boolean type), optional + Indicate which values are null (True) or not null (False). Returns ------- @@ -3153,24 +3155,27 @@ cdef class MapArray(ListArray): cdef: Array _offsets, _keys, _items shared_ptr[CArray] out + shared_ptr[CBuffer] c_mask cdef CMemoryPool* cpool = maybe_unbox_memory_pool(pool) _offsets = asarray(offsets, type='int32') _keys = asarray(keys) _items = asarray(items) + c_mask = c_mask_inverted_from_obj(mask, pool) + if type is not None: with nogil: out = GetResultValue( CMapArray.FromArraysAndType( type.sp_type, _offsets.sp_array, - _keys.sp_array, _items.sp_array, cpool)) + _keys.sp_array, _items.sp_array, cpool, c_mask)) else: with nogil: out = GetResultValue( CMapArray.FromArrays(_offsets.sp_array, _keys.sp_array, - _items.sp_array, cpool)) + _items.sp_array, cpool, c_mask)) cdef Array result = pyarrow_wrap_array(out) result.validate() return result diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index a66f584b83f5b..0d63ec6be38d8 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -823,7 +823,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: const shared_ptr[CArray]& offsets, const shared_ptr[CArray]& keys, const shared_ptr[CArray]& items, - CMemoryPool* pool) + CMemoryPool* pool, + const shared_ptr[CBuffer] null_bitmap, + ) @staticmethod CResult[shared_ptr[CArray]] FromArraysAndType" FromArrays"( @@ -831,7 +833,9 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: const shared_ptr[CArray]& offsets, const shared_ptr[CArray]& keys, const shared_ptr[CArray]& items, - CMemoryPool* pool) + CMemoryPool* pool, + const shared_ptr[CBuffer] null_bitmap, + ) shared_ptr[CArray] keys() shared_ptr[CArray] items() diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index b89e0ace157af..49a00517fca9f 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -1079,6 +1079,40 @@ def test_map_from_arrays(): pa.int64() )) + # pass in null bitmap with type + result = pa.MapArray.from_arrays([0, 2, 2, 6], keys, items, pa.map_( + keys.type, + items.type), + mask=pa.array([False, True, False], type=pa.bool_()) + ) + assert result.equals(expected) + + # pass in null bitmap without the type + result = pa.MapArray.from_arrays([0, 2, 2, 6], keys, items, + mask=pa.array([False, True, False], + type=pa.bool_()) + ) + assert result.equals(expected) + + # error if null bitmap and offsets with nulls passed + msg1 = 'Ambiguous to specify both validity map and offsets with nulls' + with pytest.raises(pa.ArrowInvalid, match=msg1): + pa.MapArray.from_arrays(offsets, keys, items, pa.map_( + keys.type, + items.type), + mask=pa.array([False, True, False], type=pa.bool_()) + ) + + # error if null bitmap passed to sliced offset + msg2 = 'Null bitmap with offsets slice not supported.' + offsets = pa.array(offsets, pa.int32()) + with pytest.raises(pa.ArrowNotImplementedError, match=msg2): + pa.MapArray.from_arrays(offsets.slice(2), keys, items, pa.map_( + keys.type, + items.type), + mask=pa.array([False, True, False], type=pa.bool_()) + ) + # check invalid usage offsets = [0, 1, 3, 5] keys = np.arange(5) From 0d9d699250ad98cdcc5c99735e1379b1e79bc195 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Sun, 2 Jun 2024 06:51:56 +0900 Subject: [PATCH 204/261] GH-41920: [CI][JS] Add missing build directory argument (#41921) ### Rationale for this change This is a follow-up of GH-41455. ### What changes are included in this PR? Add missing build directory argument. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #41920 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- .github/workflows/js.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/js.yml b/.github/workflows/js.yml index c9b7d7b742d88..e03d0c2dadce0 100644 --- a/.github/workflows/js.yml +++ b/.github/workflows/js.yml @@ -106,10 +106,10 @@ jobs: node-version: ${{ matrix.node }} - name: Build shell: bash - run: ci/scripts/js_build.sh $(pwd) + run: ci/scripts/js_build.sh $(pwd) build - name: Test shell: bash - run: ci/scripts/js_test.sh $(pwd) + run: ci/scripts/js_test.sh $(pwd) build windows: name: AMD64 Windows NodeJS ${{ matrix.node }} @@ -136,7 +136,7 @@ jobs: node-version: ${{ matrix.node }} - name: Build shell: bash - run: ci/scripts/js_build.sh $(pwd) + run: ci/scripts/js_build.sh $(pwd) build - name: Test shell: bash - run: ci/scripts/js_test.sh $(pwd) + run: ci/scripts/js_test.sh $(pwd) build From 44070eb99f0db907e1cf7a445bf7c6fea78e17fe Mon Sep 17 00:00:00 2001 From: Adam Reeve Date: Mon, 3 Jun 2024 08:53:34 +1200 Subject: [PATCH 205/261] GH-41806: [GLib][CI] Use vcpkg for C++ dependencies when building GLib libraries with MSVC (#41839) ### Rationale for this change This is more consistent as vcpkg is now used for both the Arrow C++ dependencies and GLib library dependencies, and also fixes building arrow-flight-glib and arrow-flight-sql-glib which previously failed due to a missing zlib dependency. ### What changes are included in this PR? * Configure the MSVC GLib build to use VCPKG as the dependency source for the C++ build * Configure GitHub packages based vcpkg caching (see https://learn.microsoft.com/en-us/vcpkg/users/binarycaching#quickstart-github) * Enable building Flight and Flight SQL ### Are these changes tested? NA ### Are there any user-facing changes? No Because GitHub packages is being used for caching, this will start creating GitHub packages that are visible in the main page of the Arrow GitHub repository by default. Eg. you can see them in the right panel on the crossbow repo which also runs tasks that use GitHub packages for vcpkg caching: https://github.com/ursacomputing/crossbow. The Arrow repository doesn't currently publish any packages, so it would probably be best to hide these by default to avoid confusing users and adding unnecessary noise. Someone with admin privileges on the Arrow GitHub repository would need to click the cog on the home page and hide this section: ![hide_packages](https://github.com/apache/arrow/assets/626438/69e1be77-65e7-4a17-8e9e-6d7cf90834b5) If this isn't desirable I could look into other ways of caching instead, eg. using a file based cache combined with GitHub actions cache action, or the experimental GitHub actions cache in vcpkg. * GitHub Issue: #41806 Lead-authored-by: Adam Reeve Co-authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- .github/workflows/ruby.yml | 35 +++++++++++++++++++++++++------- ci/scripts/cpp_build.sh | 1 + cpp/cmake_modules/Usevcpkg.cmake | 3 +++ 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml index 04f944f56c665..35c4460d47bc6 100644 --- a/.github/workflows/ruby.yml +++ b/.github/workflows/ruby.yml @@ -313,15 +313,17 @@ jobs: strategy: fail-fast: false env: + ARROW_ACERO: ON ARROW_BOOST_USE_SHARED: OFF ARROW_BUILD_BENCHMARKS: OFF ARROW_BUILD_SHARED: ON ARROW_BUILD_STATIC: OFF ARROW_BUILD_TESTS: OFF - ARROW_ACERO: ON ARROW_DATASET: ON - ARROW_FLIGHT: OFF - ARROW_FLIGHT_SQL: OFF + ARROW_DEPENDENCY_SOURCE: VCPKG + ARROW_DEPENDENCY_USE_SHARED: OFF + ARROW_FLIGHT: ON + ARROW_FLIGHT_SQL: ON ARROW_GANDIVA: OFF ARROW_HDFS: OFF ARROW_HOME: "${{ github.workspace }}/dist" @@ -337,13 +339,16 @@ jobs: ARROW_WITH_LZ4: OFF ARROW_WITH_OPENTELEMETRY: OFF ARROW_WITH_SNAPPY: ON - ARROW_WITH_ZLIB: OFF + ARROW_WITH_ZLIB: ON ARROW_WITH_ZSTD: ON - BOOST_SOURCE: BUNDLED CMAKE_CXX_STANDARD: "17" CMAKE_GENERATOR: Ninja CMAKE_INSTALL_PREFIX: "${{ github.workspace }}/dist" CMAKE_UNITY_BUILD: ON + VCPKG_BINARY_SOURCES: 'clear;nuget,GitHub,readwrite' + VCPKG_ROOT: "${{ github.workspace }}/vcpkg" + permissions: + packages: write steps: - name: Disable Crash Dialogs run: | @@ -361,7 +366,7 @@ jobs: - name: Install vcpkg shell: bash run: | - ci/scripts/install_vcpkg.sh ./vcpkg + ci/scripts/install_vcpkg.sh "${VCPKG_ROOT}" - name: Install meson run: | python -m pip install meson @@ -387,6 +392,22 @@ jobs: env: # We can invalidate the current cache by updating this. CACHE_VERSION: "2024-05-09" + - name: Setup NuGet credentials for vcpkg caching + shell: bash + run: | + $(vcpkg/vcpkg.exe fetch nuget | tail -n 1) \ + sources add \ + -source "https://nuget.pkg.github.com/$GITHUB_REPOSITORY_OWNER/index.json" \ + -storepasswordincleartext \ + -name "GitHub" \ + -username "$GITHUB_REPOSITORY_OWNER" \ + -password "${{ secrets.GITHUB_TOKEN }}" + $(vcpkg/vcpkg.exe fetch nuget | tail -n 1) \ + setapikey "${{ secrets.GITHUB_TOKEN }}" \ + -source "https://nuget.pkg.github.com/$GITHUB_REPOSITORY_OWNER/index.json" + - name: Build C++ vcpkg dependencies + run: | + vcpkg\vcpkg.exe install --triplet x64-windows --x-manifest-root cpp --x-install-root build\cpp\vcpkg_installed - name: Build C++ shell: cmd run: | @@ -396,4 +417,4 @@ jobs: shell: cmd run: | call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 - bash -c "VCPKG_ROOT=\"$(pwd)/vcpkg\" ci/scripts/c_glib_build.sh $(pwd) $(pwd)/build" + bash -c "ci/scripts/c_glib_build.sh $(pwd) $(pwd)/build" diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index 6a3a53f2533cd..3ee7fbd9d19cd 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -137,6 +137,7 @@ else -DARROW_C_FLAGS_RELWITHDEBINFO="${ARROW_C_FLAGS_RELWITHDEBINFO:-}" \ -DARROW_DATASET=${ARROW_DATASET:-OFF} \ -DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \ + -DARROW_DEPENDENCY_USE_SHARED=${ARROW_DEPENDENCY_USE_SHARED:-ON} \ -DARROW_ENABLE_THREADING=${ARROW_ENABLE_THREADING:-ON} \ -DARROW_ENABLE_TIMING_TESTS=${ARROW_ENABLE_TIMING_TESTS:-ON} \ -DARROW_EXTRA_ERROR_CONTEXT=${ARROW_EXTRA_ERROR_CONTEXT:-OFF} \ diff --git a/cpp/cmake_modules/Usevcpkg.cmake b/cpp/cmake_modules/Usevcpkg.cmake index b6192468da342..37a732f4b85a0 100644 --- a/cpp/cmake_modules/Usevcpkg.cmake +++ b/cpp/cmake_modules/Usevcpkg.cmake @@ -237,6 +237,9 @@ set(LZ4_ROOT CACHE STRING "") if(CMAKE_HOST_WIN32) + set(utf8proc_MSVC_STATIC_LIB_SUFFIX + "" + CACHE STRING "") set(LZ4_MSVC_LIB_PREFIX "" CACHE STRING "") From 02585cd11249116c00fdfe0822df4ad7da790c27 Mon Sep 17 00:00:00 2001 From: Thomas Newton Date: Mon, 3 Jun 2024 02:37:51 +0100 Subject: [PATCH 206/261] GH-39345: [C++][FS][Azure] Add support for environment credential (#41715) ### Rationale for this change Maybe be useful to support explicit environment credential (currently environment credential can be used as part of the Azure default credential flow). ### What changes are included in this PR? ### Are these changes tested? There are new unittests but no integration tests that we can actually authenticate successfully. We are relying on the Azure C++ SDK to abstracting that away. ### Are there any user-facing changes? Yes, environment credential is now available. * GitHub Issue: #39345 Authored-by: Thomas Newton Signed-off-by: Sutou Kouhei --- cpp/src/arrow/filesystem/azurefs.cc | 14 ++++++++++++++ cpp/src/arrow/filesystem/azurefs.h | 10 +++++++--- cpp/src/arrow/filesystem/azurefs_test.cc | 19 +++++++++++++++++++ 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc index 7462827d80f1e..f367bbdd3ed90 100644 --- a/cpp/src/arrow/filesystem/azurefs.cc +++ b/cpp/src/arrow/filesystem/azurefs.cc @@ -119,6 +119,8 @@ Status AzureOptions::ExtractFromUriQuery(const Uri& uri) { credential_kind = CredentialKind::kAnonymous; } else if (kv.second == "workload_identity") { credential_kind = CredentialKind::kWorkloadIdentity; + } else if (kv.second == "environment") { + credential_kind = CredentialKind::kEnvironment; } else { // Other credential kinds should be inferred from the given // parameters automatically. @@ -171,6 +173,9 @@ Status AzureOptions::ExtractFromUriQuery(const Uri& uri) { case CredentialKind::kWorkloadIdentity: RETURN_NOT_OK(ConfigureWorkloadIdentityCredential()); break; + case CredentialKind::kEnvironment: + RETURN_NOT_OK(ConfigureEnvironmentCredential()); + break; default: // Default credential break; @@ -252,6 +257,7 @@ bool AzureOptions::Equals(const AzureOptions& other) const { case CredentialKind::kClientSecret: case CredentialKind::kManagedIdentity: case CredentialKind::kWorkloadIdentity: + case CredentialKind::kEnvironment: return token_credential_->GetCredentialName() == other.token_credential_->GetCredentialName(); } @@ -337,6 +343,12 @@ Status AzureOptions::ConfigureWorkloadIdentityCredential() { return Status::OK(); } +Status AzureOptions::ConfigureEnvironmentCredential() { + credential_kind_ = CredentialKind::kEnvironment; + token_credential_ = std::make_shared(); + return Status::OK(); +} + Result> AzureOptions::MakeBlobServiceClient() const { if (account_name.empty()) { @@ -353,6 +365,7 @@ Result> AzureOptions::MakeBlobServiceC case CredentialKind::kClientSecret: case CredentialKind::kManagedIdentity: case CredentialKind::kWorkloadIdentity: + case CredentialKind::kEnvironment: return std::make_unique(AccountBlobUrl(account_name), token_credential_); case CredentialKind::kStorageSharedKey: @@ -379,6 +392,7 @@ AzureOptions::MakeDataLakeServiceClient() const { case CredentialKind::kClientSecret: case CredentialKind::kManagedIdentity: case CredentialKind::kWorkloadIdentity: + case CredentialKind::kEnvironment: return std::make_unique( AccountDfsUrl(account_name), token_credential_); case CredentialKind::kStorageSharedKey: diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h index b71a5ae73b2e9..5d100bbcb4a8a 100644 --- a/cpp/src/arrow/filesystem/azurefs.h +++ b/cpp/src/arrow/filesystem/azurefs.h @@ -120,6 +120,7 @@ struct ARROW_EXPORT AzureOptions { kClientSecret, kManagedIdentity, kWorkloadIdentity, + kEnvironment, } credential_kind_ = CredentialKind::kDefault; std::shared_ptr @@ -160,11 +161,13 @@ struct ARROW_EXPORT AzureOptions { /// * dfs_storage_authority: Set AzureOptions::dfs_storage_authority /// * enable_tls: If it's "false" or "0", HTTP not HTTPS is used. /// * credential_kind: One of "default", "anonymous", - /// "workload_identity". If "default" is specified, it's just - /// ignored. If "anonymous" is specified, + /// "workload_identity" or "environment". If "default" is specified, it's + /// just ignored. If "anonymous" is specified, /// AzureOptions::ConfigureAnonymousCredential() is called. If /// "workload_identity" is specified, - /// AzureOptions::ConfigureWorkloadIdentityCredential() is called. + /// AzureOptions::ConfigureWorkloadIdentityCredential() is called, If + /// "environment" is specified, + /// AzureOptions::ConfigureEnvironmentCredential() is called. /// * tenant_id: You must specify "client_id" and "client_secret" /// too. AzureOptions::ConfigureClientSecretCredential() is called. /// * client_id: If you don't specify "tenant_id" and @@ -188,6 +191,7 @@ struct ARROW_EXPORT AzureOptions { const std::string& client_secret); Status ConfigureManagedIdentityCredential(const std::string& client_id = std::string()); Status ConfigureWorkloadIdentityCredential(); + Status ConfigureEnvironmentCredential(); bool Equals(const AzureOptions& other) const; diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc index ed09bfc2fadd7..6075cbf0c0c91 100644 --- a/cpp/src/arrow/filesystem/azurefs_test.cc +++ b/cpp/src/arrow/filesystem/azurefs_test.cc @@ -528,6 +528,13 @@ TEST(AzureFileSystem, InitializeWithWorkloadIdentityCredential) { EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options)); } +TEST(AzureFileSystem, InitializeWithEnvironmentCredential) { + AzureOptions options; + options.account_name = "dummy-account-name"; + ARROW_EXPECT_OK(options.ConfigureEnvironmentCredential()); + EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options)); +} + TEST(AzureFileSystem, OptionsCompare) { AzureOptions options; EXPECT_TRUE(options.Equals(options)); @@ -669,6 +676,15 @@ class TestAzureOptions : public ::testing::Test { ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kWorkloadIdentity); } + void TestFromUriCredentialEnvironment() { + ASSERT_OK_AND_ASSIGN( + auto options, + AzureOptions::FromUri("abfs://account.blob.core.windows.net/container/dir/blob?" + "credential_kind=environment", + nullptr)); + ASSERT_EQ(options.credential_kind_, AzureOptions::CredentialKind::kEnvironment); + } + void TestFromUriCredentialInvalid() { ASSERT_RAISES(Invalid, AzureOptions::FromUri( "abfs://file_system@account.dfs.core.windows.net/dir/file?" @@ -720,6 +736,9 @@ TEST_F(TestAzureOptions, FromUriCredentialManagedIdentity) { TEST_F(TestAzureOptions, FromUriCredentialWorkloadIdentity) { TestFromUriCredentialWorkloadIdentity(); } +TEST_F(TestAzureOptions, FromUriCredentialEnvironment) { + TestFromUriCredentialEnvironment(); +} TEST_F(TestAzureOptions, FromUriCredentialInvalid) { TestFromUriCredentialInvalid(); } TEST_F(TestAzureOptions, FromUriBlobStorageAuthority) { TestFromUriBlobStorageAuthority(); From 54bece3d4cf28df3e7bd92ed27f62d705a5cac96 Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Mon, 3 Jun 2024 16:55:54 +0530 Subject: [PATCH 207/261] GH-41648: [Java] Memory Leak about splitAndTransfer (#41898) ### Rationale for this change An inconsistency in the validity buffer split and transfer functionality in the `BaseFixedWidthVector`. ### What changes are included in this PR? - [X] Change to handle the validity buffer transfer - [X] User provided test criterion which reproduced the error - [X] Upgrading to JUnit 5 ### Are these changes tested? Mainly by existing test cases and added new one. ### Are there any user-facing changes? No * GitHub Issue: #41648 Authored-by: Vibhatha Abeykoon Signed-off-by: David Li --- .../arrow/vector/BaseFixedWidthVector.java | 4 +- .../apache/arrow/vector/TestValueVector.java | 197 +++++++++++------- 2 files changed, 122 insertions(+), 79 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java index c456c625389ba..a6e1a71dc36bd 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseFixedWidthVector.java @@ -674,8 +674,8 @@ private void splitAndTransferValidityBuffer(int startIndex, int length, if (target.validityBuffer != null) { target.validityBuffer.getReferenceManager().release(); } - target.validityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); - target.validityBuffer.getReferenceManager().retain(1); + ArrowBuf slicedValidityBuffer = validityBuffer.slice(firstByteSource, byteSizeTarget); + target.validityBuffer = transferBuffer(slicedValidityBuffer, target.allocator); target.refreshValueCapacity(); } else { /* Copy data diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index 3e53512f7338f..fda14b24a4c8b 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -21,12 +21,13 @@ import static org.apache.arrow.vector.TestUtils.newVarCharVector; import static org.apache.arrow.vector.TestUtils.newVector; import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.ByteBuffer; import java.nio.charset.Charset; @@ -69,9 +70,9 @@ import org.apache.arrow.vector.util.ReusableByteArray; import org.apache.arrow.vector.util.Text; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestValueVector { @@ -79,7 +80,7 @@ public class TestValueVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } @@ -95,7 +96,7 @@ public void init() { (int) (Integer.getInteger("arrow.vector.max_allocation_bytes", Integer.MAX_VALUE) / 7); private static final int MAX_VALUE_COUNT_8BYTE = (int) (MAX_VALUE_COUNT / 2); - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -188,7 +189,7 @@ public void testFixedType1() { /* vector data should have been zeroed out */ for (int i = 0; i < capacityBeforeReset; i++) { // TODO: test vector.get(i) is 0 after unsafe get added - assertEquals("non-zero data not expected at index: " + i, true, vector.isNull(i)); + assertTrue(vector.isNull(i), "non-zero data not expected at index: " + i); } } } @@ -276,7 +277,7 @@ public void testFixedType2() { /* check vector contents */ j = 1; for (int i = 0; i < initialCapacity; i += 2) { - assertEquals("unexpected value at index: " + i, j, intVector.get(i)); + assertEquals(j, intVector.get(i), "unexpected value at index: " + i); j++; } @@ -298,7 +299,7 @@ public void testFixedType2() { /* vector data should still be intact after realloc */ j = 1; for (int i = 0; i <= initialCapacity; i += 2) { - assertEquals("unexpected value at index: " + i, j, intVector.get(i)); + assertEquals(j, intVector.get(i), "unexpected value at index: " + i); j++; } @@ -311,7 +312,7 @@ public void testFixedType2() { /* vector data should have been zeroed out */ for (int i = 0; i < capacityBeforeRealloc; i++) { - assertEquals("non-zero data not expected at index: " + i, true, intVector.isNull(i)); + assertTrue(intVector.isNull(i), "non-zero data not expected at index: " + i); } } } @@ -427,7 +428,7 @@ public void testFixedFloat2() { /* vector data should be zeroed out */ for (int i = 0; i < capacityBeforeReset; i++) { - assertTrue("non-zero data not expected at index: " + i, floatVector.isNull(i)); + assertTrue(floatVector.isNull(i), "non-zero data not expected at index: " + i); } } } @@ -526,7 +527,7 @@ public void testFixedFloat2WithPossibleTruncate() { /* vector data should be zeroed out */ for (int i = 0; i < capacityBeforeReset; i++) { - assertTrue("non-zero data not expected at index: " + i, floatVector.isNull(i)); + assertTrue(floatVector.isNull(i), "non-zero data not expected at index: " + i); } } } @@ -626,7 +627,7 @@ public void testFixedType3() { /* vector data should be zeroed out */ for (int i = 0; i < capacityBeforeReset; i++) { - assertEquals("non-zero data not expected at index: " + i, true, floatVector.isNull(i)); + assertTrue(floatVector.isNull(i), "non-zero data not expected at index: " + i); } } } @@ -724,7 +725,7 @@ public void testFixedType4() { /* vector data should be zeroed out */ for (int i = 0; i < capacityBeforeReset; i++) { - assertEquals("non-zero data not expected at index: " + i, true, floatVector.isNull(i)); + assertTrue(floatVector.isNull(i), "non-zero data not expected at index: " + i); } } } @@ -821,7 +822,7 @@ public void testNullableFixedType1() { /* vector data should be zeroed out */ for (int i = 0; i < capacityBeforeReset; i++) { - assertTrue("non-null data not expected at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "non-null data not expected at index: " + i); } } } @@ -920,7 +921,7 @@ public void testNullableFixedType2() { /* vector data should be zeroed out */ for (int i = 0; i < capacityBeforeReset; i++) { - assertTrue("non-null data not expected at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "non-null data not expected at index: " + i); } } } @@ -949,10 +950,10 @@ public void testNullableFixedType3() { int j = 1; for (int i = 0; i <= 1023; i++) { if ((i >= 2 && i <= 99) || (i >= 101 && i <= 1021)) { - assertTrue("non-null data not expected at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "non-null data not expected at index: " + i); } else { - assertFalse("null data not expected at index: " + i, vector.isNull(i)); - assertEquals("unexpected value at index: " + i, j, vector.get(i)); + assertFalse(vector.isNull(i), "null data not expected at index: " + i); + assertEquals(j, vector.get(i), "unexpected value at index: " + i); j++; } } @@ -987,10 +988,10 @@ public void testNullableFixedType3() { j = 1; for (int i = 0; i < (initialCapacity * 2); i++) { if ((i > 1023 && i != initialCapacity) || (i >= 2 && i <= 99) || (i >= 101 && i <= 1021)) { - assertTrue("non-null data not expected at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "non-null data not expected at index: " + i); } else { - assertFalse("null data not expected at index: " + i, vector.isNull(i)); - assertEquals("unexpected value at index: " + i, j, vector.get(i)); + assertFalse(vector.isNull(i), "null data not expected at index: " + i); + assertEquals(j, vector.get(i), "unexpected value at index: " + i); j++; } } @@ -1004,13 +1005,13 @@ public void testNullableFixedType3() { /* vector data should have been zeroed out */ for (int i = 0; i < capacityBeforeReset; i++) { - assertTrue("non-null data not expected at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "non-null data not expected at index: " + i); } vector.allocateNew(initialCapacity * 4); // vector has been erased for (int i = 0; i < initialCapacity * 4; i++) { - assertTrue("non-null data not expected at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "non-null data not expected at index: " + i); } } } @@ -1036,10 +1037,10 @@ public void testNullableFixedType4() { for (int i = 0; i < valueCapacity; i++) { if ((i & 1) == 1) { - assertFalse("unexpected null value at index: " + i, vector.isNull(i)); - assertEquals("unexpected value at index: " + i, (baseValue + i), vector.get(i)); + assertFalse(vector.isNull(i), "unexpected null value at index: " + i); + assertEquals((baseValue + i), vector.get(i), "unexpected value at index: " + i); } else { - assertTrue("unexpected non-null value at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "unexpected non-null value at index: " + i); } } @@ -1048,15 +1049,15 @@ public void testNullableFixedType4() { for (int i = 0; i < vector.getValueCapacity(); i++) { if (i == valueCapacity) { - assertFalse("unexpected null value at index: " + i, vector.isNull(i)); - assertEquals("unexpected value at index: " + i, 20000000, vector.get(i)); + assertFalse(vector.isNull(i), "unexpected null value at index: " + i); + assertEquals(20000000, vector.get(i), "unexpected value at index: " + i); } else if (i < valueCapacity) { if ((i & 1) == 1) { - assertFalse("unexpected null value at index: " + i, vector.isNull(i)); - assertEquals("unexpected value at index: " + i, (baseValue + i), vector.get(i)); + assertFalse(vector.isNull(i), "unexpected null value at index: " + i); + assertEquals((baseValue + i), vector.get(i), "unexpected value at index: " + i); } } else { - assertTrue("unexpected non-null value at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "unexpected non-null value at index: " + i); } } @@ -1068,10 +1069,10 @@ public void testNullableFixedType4() { for (int i = 0; i < vector.getValueCapacity(); i++) { if (i % 2 == 0) { - assertFalse("unexpected null value at index: " + i, vector.isNull(i)); - assertEquals("unexpected value at index: " + i, (baseValue + i), vector.get(i)); + assertFalse(vector.isNull(i), "unexpected null value at index: " + i); + assertEquals((baseValue + i), vector.get(i), "unexpected value at index: " + i); } else { - assertTrue("unexpected non-null value at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "unexpected non-null value at index: " + i); } } @@ -1081,13 +1082,13 @@ public void testNullableFixedType4() { for (int i = 0; i < vector.getValueCapacity(); i++) { if (i == (valueCapacityBeforeRealloc + 1000)) { - assertFalse("unexpected null value at index: " + i, vector.isNull(i)); - assertEquals("unexpected value at index: " + i, 400000000, vector.get(i)); + assertFalse(vector.isNull(i), "unexpected null value at index: " + i); + assertEquals(400000000, vector.get(i), "unexpected value at index: " + i); } else if (i < valueCapacityBeforeRealloc && (i % 2) == 0) { - assertFalse("unexpected null value at index: " + i, vector.isNull(i)); - assertEquals("unexpected value at index: " + i, baseValue + i, vector.get(i)); + assertFalse(vector.isNull(i), "unexpected null value at index: " + i); + assertEquals(baseValue + i, vector.get(i), "unexpected value at index: " + i); } else { - assertTrue("unexpected non-null value at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "unexpected non-null value at index: " + i); } } @@ -1100,7 +1101,7 @@ public void testNullableFixedType4() { /* vector data should be zeroed out */ for (int i = 0; i < valueCapacityBeforeReset; i++) { - assertTrue("non-null data not expected at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "non-null data not expected at index: " + i); } } } @@ -1347,23 +1348,24 @@ public void testNullableVarType2() { } } - @Test(expected = OversizedAllocationException.class) + @Test public void testReallocateCheckSuccess() { + assertThrows(OversizedAllocationException.class, () -> { + // Create a new value vector for 1024 integers. + try (final VarBinaryVector vector = newVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + vector.allocateNew(1024 * 10, 1024); - // Create a new value vector for 1024 integers. - try (final VarBinaryVector vector = newVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(1024 * 10, 1024); + vector.set(0, STR1); + // Check the sample strings. + assertArrayEquals(STR1, vector.get(0)); - vector.set(0, STR1); - // Check the sample strings. - assertArrayEquals(STR1, vector.get(0)); + // update the index offset to a larger one + ArrowBuf offsetBuf = vector.getOffsetBuffer(); + offsetBuf.setInt(VarBinaryVector.OFFSET_WIDTH, Integer.MAX_VALUE - 5); - // update the index offset to a larger one - ArrowBuf offsetBuf = vector.getOffsetBuffer(); - offsetBuf.setInt(VarBinaryVector.OFFSET_WIDTH, Integer.MAX_VALUE - 5); - - vector.setValueLengthSafe(1, 6); - } + vector.setValueLengthSafe(1, 6); + } + }); } @Test @@ -1551,9 +1553,9 @@ public void testReallocAfterVectorTransfer2() { /* check toVector contents before realloc */ for (int i = 0; i < toVector.getValueCapacity(); i++) { - assertFalse("unexpected null value at index: " + i, toVector.isNull(i)); + assertFalse(toVector.isNull(i), "unexpected null value at index: " + i); double value = toVector.get(i); - assertEquals("unexpected value at index: " + i, baseValue + (double) i, value, 0); + assertEquals(baseValue + (double) i, value, 0, "unexpected value at index: " + i); } /* now let's realloc the toVector and check contents again */ @@ -1562,11 +1564,11 @@ public void testReallocAfterVectorTransfer2() { for (int i = 0; i < toVector.getValueCapacity(); i++) { if (i < capacityAfterRealloc2) { - assertFalse("unexpected null value at index: " + i, toVector.isNull(i)); + assertFalse(toVector.isNull(i), "unexpected null value at index: " + i); double value = toVector.get(i); - assertEquals("unexpected value at index: " + i, baseValue + (double) i, value, 0); + assertEquals(baseValue + (double) i, value, 0, "unexpected value at index: " + i); } else { - assertTrue("unexpected non-null value at index: " + i, toVector.isNull(i)); + assertTrue(toVector.isNull(i), "unexpected non-null value at index: " + i); } } @@ -1921,7 +1923,7 @@ public void testCopyFromWithNulls() { if (i % 3 == 0) { assertNull(vector.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, Integer.toString(i), vector.getObject(i).toString()); + assertEquals(Integer.toString(i), vector.getObject(i).toString(), "unexpected value at index: " + i); } } @@ -1935,7 +1937,7 @@ public void testCopyFromWithNulls() { if (i % 3 == 0) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, Integer.toString(i), vector2.getObject(i).toString()); + assertEquals(Integer.toString(i), vector2.getObject(i).toString(), "unexpected value at index: " + i); } } @@ -1948,7 +1950,7 @@ public void testCopyFromWithNulls() { if (i % 3 == 0) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, Integer.toString(i), vector2.getObject(i).toString()); + assertEquals(Integer.toString(i), vector2.getObject(i).toString(), "unexpected value at index: " + i); } } } @@ -1982,7 +1984,7 @@ public void testCopyFromWithNulls1() { if (i % 3 == 0) { assertNull(vector.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, Integer.toString(i), vector.getObject(i).toString()); + assertEquals(Integer.toString(i), vector.getObject(i).toString(), "unexpected value at index: " + i); } } @@ -2000,7 +2002,7 @@ public void testCopyFromWithNulls1() { if (i % 3 == 0) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, Integer.toString(i), vector2.getObject(i).toString()); + assertEquals(Integer.toString(i), vector2.getObject(i).toString(), "unexpected value at index: " + i); } } @@ -2013,7 +2015,7 @@ public void testCopyFromWithNulls1() { if (i % 3 == 0) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, Integer.toString(i), vector2.getObject(i).toString()); + assertEquals(Integer.toString(i), vector2.getObject(i).toString(), "unexpected value at index: " + i); } } } @@ -3021,16 +3023,18 @@ public void testUnionVectorEquals() { } } - @Test(expected = IllegalArgumentException.class) + @Test public void testEqualsWithIndexOutOfRange() { - try (final IntVector vector1 = new IntVector("int", allocator); - final IntVector vector2 = new IntVector("int", allocator)) { + assertThrows(IllegalArgumentException.class, () -> { + try (final IntVector vector1 = new IntVector("int", allocator); + final IntVector vector2 = new IntVector("int", allocator)) { - setVector(vector1, 1, 2); - setVector(vector2, 1, 2); + setVector(vector1, 1, 2); + setVector(vector2, 1, 2); - assertTrue(new RangeEqualsVisitor(vector1, vector2).rangeEquals(new Range(2, 3, 1))); - } + assertTrue(new RangeEqualsVisitor(vector1, vector2).rangeEquals(new Range(2, 3, 1))); + } + }); } @Test @@ -3398,4 +3402,43 @@ public void testSetGetUInt4() { assertEquals(expected, vector.getValueAsLong(1)); } } + + @Test + public void testSplitAndTransferFixedWithVector1() { + RootAllocator allocator = new RootAllocator(Long.MAX_VALUE); + try (BufferAllocator child = allocator.newChildAllocator("child", 0, Long.MAX_VALUE)) { + try (IntVector vector = new IntVector("vector", child)) { + vector.setSafe(0, 1); + vector.setSafe(1, 2); + vector.setSafe(2, 3); + vector.setValueCount(3); + + TransferPair transferPair = vector.getTransferPair(allocator); + transferPair.splitAndTransfer(0, 1); + try (IntVector target = (IntVector) transferPair.getTo()) { + // no-op try-with-resource + assertEquals(1, target.get(0)); + } + } + } + } + + @Test + public void testSplitAndTransferFixedWithVector2() { + IntVector target; + try (BufferAllocator child = allocator.newChildAllocator("child", 0, Long.MAX_VALUE)) { + try (IntVector vector = new IntVector("source", child)) { + vector.setSafe(0, 1); + vector.setSafe(1, 2); + vector.setSafe(2, 3); + vector.setValueCount(3); + + TransferPair transferPair = vector.getTransferPair(allocator); + transferPair.splitAndTransfer(0, 1); + target = (IntVector) transferPair.getTo(); + assertEquals(1, target.get(0)); + } + } + target.close(); + } } From 99014abd193c84aee7490a36fd7914389b84cfcd Mon Sep 17 00:00:00 2001 From: Tom Scott-Coombes <62209801+tscottcoombes1@users.noreply.github.com> Date: Mon, 3 Jun 2024 16:36:29 +0100 Subject: [PATCH 208/261] GH-41887: [Go] Run linter via pre-commit (#41888) ### Rationale for this change Add and run pre-commit ### What changes are included in this PR? Update pre-commit file Run pre-commit ### Are these changes tested? N/A ### Are there any user-facing changes? No GitHub Issue: https://github.com/apache/arrow/issues/41887 * GitHub Issue: #41887 Lead-authored-by: Tom Scott-Coombes Co-authored-by: Tom Scott-Coombes <62209801+tscottcoombes1@users.noreply.github.com> Co-authored-by: Sutou Kouhei Signed-off-by: Matt Topol --- .gitignore | 5 + .golangci.yaml | 29 + .pre-commit-config.yaml | 14 + go/arrow/array/bufferbuilder_numeric_test.go | 2 +- go/arrow/array/numeric.gen.go | 2 +- go/arrow/array/numeric_test.go | 12 +- go/arrow/array/numericbuilder.gen_test.go | 8 +- go/arrow/array/record_test.go | 2 +- go/arrow/datatype_nested_test.go | 16 +- go/arrow/flight/gen/flight/Flight.pb.go | 5 +- go/arrow/flight/gen/flight/FlightSql.pb.go | 5 +- go/arrow/flight/gen/flight/Flight_grpc.pb.go | 1 + go/arrow/float16/float16.go | 2 +- go/arrow/gen-flatbuffers.go | 1 + go/arrow/internal/debug/assert_off.go | 1 + go/arrow/internal/debug/assert_on.go | 1 + go/arrow/internal/debug/doc.go | 6 +- go/arrow/internal/debug/log_off.go | 1 + go/arrow/internal/debug/log_on.go | 1 + go/arrow/internal/debug/util.go | 1 + go/arrow/internal/flatbuf/Binary.go | 2 +- go/arrow/internal/flatbuf/BinaryView.go | 14 +- go/arrow/internal/flatbuf/Block.go | 19 +- go/arrow/internal/flatbuf/BodyCompression.go | 18 +- .../internal/flatbuf/BodyCompressionMethod.go | 6 +- go/arrow/internal/flatbuf/Buffer.go | 34 +- go/arrow/internal/flatbuf/Date.go | 12 +- go/arrow/internal/flatbuf/Decimal.go | 24 +- go/arrow/internal/flatbuf/DictionaryBatch.go | 24 +- .../internal/flatbuf/DictionaryEncoding.go | 48 +- go/arrow/internal/flatbuf/DictionaryKind.go | 10 +- go/arrow/internal/flatbuf/Endianness.go | 4 +- go/arrow/internal/flatbuf/Feature.go | 38 +- go/arrow/internal/flatbuf/Field.go | 34 +- go/arrow/internal/flatbuf/FieldNode.go | 40 +- go/arrow/internal/flatbuf/FixedSizeBinary.go | 4 +- go/arrow/internal/flatbuf/FixedSizeList.go | 4 +- go/arrow/internal/flatbuf/Footer.go | 10 +- go/arrow/internal/flatbuf/KeyValue.go | 6 +- go/arrow/internal/flatbuf/LargeBinary.go | 4 +- go/arrow/internal/flatbuf/LargeList.go | 4 +- go/arrow/internal/flatbuf/LargeListView.go | 4 +- go/arrow/internal/flatbuf/LargeUtf8.go | 4 +- go/arrow/internal/flatbuf/ListView.go | 6 +- go/arrow/internal/flatbuf/Map.go | 54 +- go/arrow/internal/flatbuf/MessageHeader.go | 16 +- go/arrow/internal/flatbuf/Null.go | 2 +- go/arrow/internal/flatbuf/RecordBatch.go | 102 +- go/arrow/internal/flatbuf/RunEndEncoded.go | 10 +- go/arrow/internal/flatbuf/Schema.go | 20 +- .../internal/flatbuf/SparseMatrixIndexCSR.go | 134 +- .../internal/flatbuf/SparseMatrixIndexCSX.go | 142 +- go/arrow/internal/flatbuf/SparseTensor.go | 28 +- .../internal/flatbuf/SparseTensorIndexCOO.go | 100 +- .../internal/flatbuf/SparseTensorIndexCSF.go | 254 +- go/arrow/internal/flatbuf/Struct_.go | 6 +- go/arrow/internal/flatbuf/Tensor.go | 24 +- go/arrow/internal/flatbuf/TensorDim.go | 14 +- go/arrow/internal/flatbuf/Time.go | 28 +- go/arrow/internal/flatbuf/Timestamp.go | 250 +- go/arrow/internal/flatbuf/Type.go | 6 +- go/arrow/internal/flatbuf/Union.go | 8 +- go/arrow/internal/flatbuf/Utf8.go | 2 +- go/arrow/internal/flatbuf/Utf8View.go | 14 +- .../internal/flight_integration/scenario.go | 2 +- go/arrow/ipc/cmd/arrow-cat/main.go | 66 +- go/arrow/ipc/cmd/arrow-ls/main.go | 62 +- go/arrow/math/math_amd64.go | 1 + go/arrow/math/math_arm64.go | 5 +- go/arrow/math/math_noasm.go | 1 + go/arrow/math/math_ppc64le.go | 1 + go/arrow/math/math_s390x.go | 1 + go/arrow/memory/cgo_allocator.go | 4 +- go/arrow/memory/cgo_allocator_defaults.go | 5 +- go/arrow/memory/cgo_allocator_logging.go | 5 +- go/arrow/memory/cgo_allocator_test.go | 4 +- go/arrow/memory/memory_amd64.go | 1 + go/arrow/memory/memory_arm64.go | 1 + go/arrow/memory/memory_avx2_amd64.go | 1 + go/arrow/memory/memory_js_wasm.go | 1 + go/arrow/memory/memory_neon_arm64.go | 1 + go/arrow/memory/memory_noasm.go | 1 + go/arrow/memory/memory_sse4_amd64.go | 1 + go/internal/utils/min_max_arm64.go | 3 +- go/parquet/doc.go | 15 +- go/parquet/internal/bmi/bitmap_bmi2_amd64.go | 1 + go/parquet/internal/bmi/bitmap_bmi2_noasm.go | 1 + .../internal/bmi/bitmap_bmi2_ppc64le.go | 1 + go/parquet/internal/bmi/bitmap_bmi2_s390x.go | 1 + go/parquet/internal/bmi/bmi_amd64.go | 1 + go/parquet/internal/debug/assert_off.go | 1 + go/parquet/internal/debug/assert_on.go | 1 + go/parquet/internal/debug/doc.go | 2 +- go/parquet/internal/debug/log_off.go | 1 + go/parquet/internal/debug/log_on.go | 1 + .../encoding/delta_byte_array_test.go | 3 +- .../gen-go/parquet/GoUnusedProtection__.go | 3 +- .../internal/gen-go/parquet/parquet-consts.go | 8 +- go/parquet/internal/gen-go/parquet/parquet.go | 18925 +++++++++------- .../internal/utils/bit_packing_avx2_amd64.go | 1 + .../internal/utils/bit_packing_neon_arm64.go | 1 + .../internal/utils/unpack_bool_amd64.go | 1 + .../internal/utils/unpack_bool_arm64.go | 6 +- .../internal/utils/unpack_bool_avx2_amd64.go | 1 + .../internal/utils/unpack_bool_neon_arm64.go | 1 + .../internal/utils/unpack_bool_noasm.go | 1 + .../internal/utils/unpack_bool_sse4_amd64.go | 1 + go/parquet/metadata/app_version.go | 3 +- go/parquet/schema/reflection.go | 4 +- go/parquet/tools.go | 1 + swift/data-generator/swift-datagen/main.go | 13 +- 111 files changed, 11252 insertions(+), 9616 deletions(-) create mode 100644 .golangci.yaml diff --git a/.gitignore b/.gitignore index a482f5503c2b9..3192069d1ac7a 100644 --- a/.gitignore +++ b/.gitignore @@ -102,4 +102,9 @@ __debug_bin .envrc # Develocity +.mvn/.gradle-enterprise/ .mvn/.develocity/ + +# rat +filtered_rat.txt +rat.txt diff --git a/.golangci.yaml b/.golangci.yaml new file mode 100644 index 0000000000000..7d486a9e85a0a --- /dev/null +++ b/.golangci.yaml @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +linters: + # Disable all linters. + # Default: false + disable-all: true + # Enable specific linter + # https://golangci-lint.run/usage/linters/#enabled-by-default + enable: + - gofmt + - goimports + +issues: + fix: true \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e0b8009b03184..05bf8e54f9cdb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -141,3 +141,17 @@ repos: '--disable', 'dangling-hyphen,line-too-long', ] + - repo: https://github.com/golangci/golangci-lint + rev: v1.59.0 + hooks: + # no built-in support for multiple go.mod + # https://github.com/golangci/golangci-lint/issues/828 + - id: golangci-lint-full + name: golangci-lint-full-arrow + entry: bash -c 'cd go/arrow && golangci-lint run' + - id: golangci-lint-full + name: golangci-lint-full-parquet + entry: bash -c 'cd go/parquet && golangci-lint run' + - id: golangci-lint-full + name: golangci-lint-full-internal + entry: bash -c 'cd go/internal && golangci-lint run' diff --git a/go/arrow/array/bufferbuilder_numeric_test.go b/go/arrow/array/bufferbuilder_numeric_test.go index df48dcff2e70f..372ba6976269d 100644 --- a/go/arrow/array/bufferbuilder_numeric_test.go +++ b/go/arrow/array/bufferbuilder_numeric_test.go @@ -20,8 +20,8 @@ import ( "testing" "unsafe" - "github.com/apache/arrow/go/v17/arrow/memory" "github.com/apache/arrow/go/v17/arrow/endian" + "github.com/apache/arrow/go/v17/arrow/memory" "github.com/stretchr/testify/assert" ) diff --git a/go/arrow/array/numeric.gen.go b/go/arrow/array/numeric.gen.go index 1d65657c5fae8..b962cda40b8b3 100644 --- a/go/arrow/array/numeric.gen.go +++ b/go/arrow/array/numeric.gen.go @@ -307,7 +307,7 @@ func (a *Float64) MarshalJSON() ([]byte, error) { default: vals[i] = f } - + } return json.Marshal(vals) diff --git a/go/arrow/array/numeric_test.go b/go/arrow/array/numeric_test.go index f775035c66652..3013d45acbb2b 100644 --- a/go/arrow/array/numeric_test.go +++ b/go/arrow/array/numeric_test.go @@ -16,7 +16,7 @@ package array_test -import ( +import ( "math" "reflect" "testing" @@ -144,7 +144,7 @@ func TestFloat16MarshalJSON(t *testing.T) { bldr := array.NewFloat16Builder(pool) defer bldr.Release() - + jsonstr := `[0, 1, 2, 3, "NaN", "NaN", 4, 5, "+Inf", "-Inf"]` bldr.Append(float16.New(0)) @@ -158,7 +158,6 @@ func TestFloat16MarshalJSON(t *testing.T) { bldr.Append(float16.Inf()) bldr.Append(float16.Inf().Negate()) - expected := bldr.NewFloat16Array() defer expected.Release() expected_json, err := expected.MarshalJSON() @@ -172,7 +171,7 @@ func TestFloat32MarshalJSON(t *testing.T) { bldr := array.NewFloat32Builder(pool) defer bldr.Release() - + jsonstr := `[0, 1, "+Inf", 2, 3, "NaN", "NaN", 4, 5, "-Inf"]` bldr.Append(0) @@ -186,10 +185,9 @@ func TestFloat32MarshalJSON(t *testing.T) { bldr.Append(5) bldr.Append(float32(math.Inf(-1))) - expected := bldr.NewFloat32Array() defer expected.Release() - + expected_json, err := expected.MarshalJSON() assert.NoError(t, err) @@ -223,7 +221,7 @@ func TestFloat64MarshalJSON(t *testing.T) { assert.NoError(t, err) assert.JSONEq(t, jsonstr, string(expected_json)) - + } func TestUnmarshalSpecialFloat(t *testing.T) { diff --git a/go/arrow/array/numericbuilder.gen_test.go b/go/arrow/array/numericbuilder.gen_test.go index 43b14c1868666..b43aa7f807090 100644 --- a/go/arrow/array/numericbuilder.gen_test.go +++ b/go/arrow/array/numericbuilder.gen_test.go @@ -648,9 +648,9 @@ func TestFloat64BuilderUnmarshalJSON(t *testing.T) { arr := bldr.NewFloat64Array() defer arr.Release() - + assert.NotNil(t, arr) - + assert.False(t, math.IsInf(float64(arr.Value(0)), 0), arr.Value(0)) assert.True(t, math.IsInf(float64(arr.Value(2)), 1), arr.Value(2)) assert.True(t, math.IsNaN(float64(arr.Value(5))), arr.Value(5)) @@ -1276,9 +1276,9 @@ func TestFloat32BuilderUnmarshalJSON(t *testing.T) { arr := bldr.NewFloat32Array() defer arr.Release() - + assert.NotNil(t, arr) - + assert.False(t, math.IsInf(float64(arr.Value(0)), 0), arr.Value(0)) assert.True(t, math.IsInf(float64(arr.Value(2)), 1), arr.Value(2)) assert.True(t, math.IsNaN(float64(arr.Value(5))), arr.Value(5)) diff --git a/go/arrow/array/record_test.go b/go/arrow/array/record_test.go index 36bb0eaa4c511..be6a26eb1a6ba 100644 --- a/go/arrow/array/record_test.go +++ b/go/arrow/array/record_test.go @@ -94,7 +94,7 @@ func TestRecord(t *testing.T) { if _, err := rec.SetColumn(0, col2_1); err == nil { t.Fatalf("expected an error") } - newRec, err := rec.SetColumn(1, col2_1); + newRec, err := rec.SetColumn(1, col2_1) if err != nil { t.Fatalf("unexpected error: %v", err) } diff --git a/go/arrow/datatype_nested_test.go b/go/arrow/datatype_nested_test.go index a1daa8e58df31..fc4c672c6b768 100644 --- a/go/arrow/datatype_nested_test.go +++ b/go/arrow/datatype_nested_test.go @@ -94,14 +94,14 @@ func TestStructOf(t *testing.T) { fields: []Field{{Name: "f1", Type: PrimitiveTypes.Int32}}, want: &StructType{ fields: []Field{{Name: "f1", Type: PrimitiveTypes.Int32}}, - index: map[string][]int{"f1": []int{0}}, + index: map[string][]int{"f1": {0}}, }, }, { fields: []Field{{Name: "f1", Type: PrimitiveTypes.Int32, Nullable: true}}, want: &StructType{ fields: []Field{{Name: "f1", Type: PrimitiveTypes.Int32, Nullable: true}}, - index: map[string][]int{"f1": []int{0}}, + index: map[string][]int{"f1": {0}}, }, }, { @@ -114,7 +114,7 @@ func TestStructOf(t *testing.T) { {Name: "f1", Type: PrimitiveTypes.Int32}, {Name: "", Type: PrimitiveTypes.Int64}, }, - index: map[string][]int{"f1": []int{0}, "": []int{1}}, + index: map[string][]int{"f1": {0}, "": {1}}, }, }, { @@ -127,7 +127,7 @@ func TestStructOf(t *testing.T) { {Name: "f1", Type: PrimitiveTypes.Int32}, {Name: "f2", Type: PrimitiveTypes.Int64}, }, - index: map[string][]int{"f1": []int{0}, "f2": []int{1}}, + index: map[string][]int{"f1": {0}, "f2": {1}}, }, }, { @@ -142,7 +142,7 @@ func TestStructOf(t *testing.T) { {Name: "f2", Type: PrimitiveTypes.Int64}, {Name: "f3", Type: ListOf(PrimitiveTypes.Float64)}, }, - index: map[string][]int{"f1": []int{0}, "f2": []int{1}, "f3": []int{2}}, + index: map[string][]int{"f1": {0}, "f2": {1}, "f3": {2}}, }, }, { @@ -157,7 +157,7 @@ func TestStructOf(t *testing.T) { {Name: "f2", Type: PrimitiveTypes.Int64}, {Name: "f3", Type: ListOf(ListOf(PrimitiveTypes.Float64))}, }, - index: map[string][]int{"f1": []int{0}, "f2": []int{1}, "f3": []int{2}}, + index: map[string][]int{"f1": {0}, "f2": {1}, "f3": {2}}, }, }, { @@ -172,7 +172,7 @@ func TestStructOf(t *testing.T) { {Name: "f2", Type: PrimitiveTypes.Int64}, {Name: "f3", Type: ListOf(ListOf(StructOf(Field{Name: "f1", Type: PrimitiveTypes.Float64})))}, }, - index: map[string][]int{"f1": []int{0}, "f2": []int{1}, "f3": []int{2}}, + index: map[string][]int{"f1": {0}, "f2": {1}, "f3": {2}}, }, }, { @@ -187,7 +187,7 @@ func TestStructOf(t *testing.T) { {Name: "f2", Type: PrimitiveTypes.Int64}, {Name: "f1", Type: PrimitiveTypes.Int64}, }, - index: map[string][]int{"f1": []int{0, 2}, "f2": []int{1}}, + index: map[string][]int{"f1": {0, 2}, "f2": {1}}, }, }, } { diff --git a/go/arrow/flight/gen/flight/Flight.pb.go b/go/arrow/flight/gen/flight/Flight.pb.go index d9477ee062fa8..ea35f469116ab 100644 --- a/go/arrow/flight/gen/flight/Flight.pb.go +++ b/go/arrow/flight/gen/flight/Flight.pb.go @@ -24,11 +24,12 @@ package flight import ( + reflect "reflect" + sync "sync" + protoreflect "google.golang.org/protobuf/reflect/protoreflect" protoimpl "google.golang.org/protobuf/runtime/protoimpl" timestamppb "google.golang.org/protobuf/types/known/timestamppb" - reflect "reflect" - sync "sync" ) const ( diff --git a/go/arrow/flight/gen/flight/FlightSql.pb.go b/go/arrow/flight/gen/flight/FlightSql.pb.go index 196c1d6b33643..f8f5e17d76bd2 100644 --- a/go/arrow/flight/gen/flight/FlightSql.pb.go +++ b/go/arrow/flight/gen/flight/FlightSql.pb.go @@ -24,11 +24,12 @@ package flight import ( + reflect "reflect" + sync "sync" + protoreflect "google.golang.org/protobuf/reflect/protoreflect" protoimpl "google.golang.org/protobuf/runtime/protoimpl" descriptorpb "google.golang.org/protobuf/types/descriptorpb" - reflect "reflect" - sync "sync" ) const ( diff --git a/go/arrow/flight/gen/flight/Flight_grpc.pb.go b/go/arrow/flight/gen/flight/Flight_grpc.pb.go index 11bbb00131ddb..da5601b46ab95 100644 --- a/go/arrow/flight/gen/flight/Flight_grpc.pb.go +++ b/go/arrow/flight/gen/flight/Flight_grpc.pb.go @@ -8,6 +8,7 @@ package flight import ( context "context" + grpc "google.golang.org/grpc" codes "google.golang.org/grpc/codes" status "google.golang.org/grpc/status" diff --git a/go/arrow/float16/float16.go b/go/arrow/float16/float16.go index ecf5c9ddce9db..f61db40ef498c 100644 --- a/go/arrow/float16/float16.go +++ b/go/arrow/float16/float16.go @@ -175,7 +175,7 @@ func (n Num) Signbit() bool { return (n.bits & 0x8000) != 0 } func (n Num) IsNaN() bool { return (n.bits & 0x7fff) > 0x7c00 } -func (n Num) IsInf() bool {return (n.bits & 0x7c00) == 0x7c00 } +func (n Num) IsInf() bool { return (n.bits & 0x7c00) == 0x7c00 } func (n Num) IsZero() bool { return (n.bits & 0x7fff) == 0 } diff --git a/go/arrow/gen-flatbuffers.go b/go/arrow/gen-flatbuffers.go index 5c8eba4a24757..720016e0bf168 100644 --- a/go/arrow/gen-flatbuffers.go +++ b/go/arrow/gen-flatbuffers.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build ignore // +build ignore package main diff --git a/go/arrow/internal/debug/assert_off.go b/go/arrow/internal/debug/assert_off.go index 52b9a233169d2..1450ecc98a26e 100644 --- a/go/arrow/internal/debug/assert_off.go +++ b/go/arrow/internal/debug/assert_off.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !assert // +build !assert package debug diff --git a/go/arrow/internal/debug/assert_on.go b/go/arrow/internal/debug/assert_on.go index 2aa5d6ace4cf0..4a57169b31358 100644 --- a/go/arrow/internal/debug/assert_on.go +++ b/go/arrow/internal/debug/assert_on.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build assert // +build assert package debug diff --git a/go/arrow/internal/debug/doc.go b/go/arrow/internal/debug/doc.go index 3ee1783ca4bda..094e427a22e09 100644 --- a/go/arrow/internal/debug/doc.go +++ b/go/arrow/internal/debug/doc.go @@ -17,14 +17,12 @@ /* Package debug provides APIs for conditional runtime assertions and debug logging. - -Using Assert +# Using Assert To enable runtime assertions, build with the assert tag. When the assert tag is omitted, the code for the assertion will be omitted from the binary. - -Using Log +# Using Log To enable runtime debug logs, build with the debug tag. When the debug tag is omitted, the code for logging will be omitted from the binary. diff --git a/go/arrow/internal/debug/log_off.go b/go/arrow/internal/debug/log_off.go index 48da8e1ee94c7..760a5cdc0dc01 100644 --- a/go/arrow/internal/debug/log_off.go +++ b/go/arrow/internal/debug/log_off.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !debug // +build !debug package debug diff --git a/go/arrow/internal/debug/log_on.go b/go/arrow/internal/debug/log_on.go index 99d0c8ae33fef..2588e7d1069f0 100644 --- a/go/arrow/internal/debug/log_on.go +++ b/go/arrow/internal/debug/log_on.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build debug // +build debug package debug diff --git a/go/arrow/internal/debug/util.go b/go/arrow/internal/debug/util.go index 7bd3d5389e669..ea4eba7fb5cb8 100644 --- a/go/arrow/internal/debug/util.go +++ b/go/arrow/internal/debug/util.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build debug || assert // +build debug assert package debug diff --git a/go/arrow/internal/flatbuf/Binary.go b/go/arrow/internal/flatbuf/Binary.go index e8018e74c4151..95e015595b548 100644 --- a/go/arrow/internal/flatbuf/Binary.go +++ b/go/arrow/internal/flatbuf/Binary.go @@ -22,7 +22,7 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Opaque binary data +// / Opaque binary data type Binary struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/BinaryView.go b/go/arrow/internal/flatbuf/BinaryView.go index 09ca5e7db9601..f6906674bdbc7 100644 --- a/go/arrow/internal/flatbuf/BinaryView.go +++ b/go/arrow/internal/flatbuf/BinaryView.go @@ -22,13 +22,13 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Logically the same as Binary, but the internal representation uses a view -/// struct that contains the string length and either the string's entire data -/// inline (for small strings) or an inlined prefix, an index of another buffer, -/// and an offset pointing to a slice in that buffer (for non-small strings). -/// -/// Since it uses a variable number of data buffers, each Field with this type -/// must have a corresponding entry in `variadicBufferCounts`. +// / Logically the same as Binary, but the internal representation uses a view +// / struct that contains the string length and either the string's entire data +// / inline (for small strings) or an inlined prefix, an index of another buffer, +// / and an offset pointing to a slice in that buffer (for non-small strings). +// / +// / Since it uses a variable number of data buffers, each Field with this type +// / must have a corresponding entry in `variadicBufferCounts`. type BinaryView struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Block.go b/go/arrow/internal/flatbuf/Block.go index 57a697b196883..8e33d3e641543 100644 --- a/go/arrow/internal/flatbuf/Block.go +++ b/go/arrow/internal/flatbuf/Block.go @@ -35,31 +35,34 @@ func (rcv *Block) Table() flatbuffers.Table { return rcv._tab.Table } -/// Index to the start of the RecordBlock (note this is past the Message header) +// / Index to the start of the RecordBlock (note this is past the Message header) func (rcv *Block) Offset() int64 { return rcv._tab.GetInt64(rcv._tab.Pos + flatbuffers.UOffsetT(0)) } -/// Index to the start of the RecordBlock (note this is past the Message header) + +// / Index to the start of the RecordBlock (note this is past the Message header) func (rcv *Block) MutateOffset(n int64) bool { return rcv._tab.MutateInt64(rcv._tab.Pos+flatbuffers.UOffsetT(0), n) } -/// Length of the metadata +// / Length of the metadata func (rcv *Block) MetaDataLength() int32 { return rcv._tab.GetInt32(rcv._tab.Pos + flatbuffers.UOffsetT(8)) } -/// Length of the metadata + +// / Length of the metadata func (rcv *Block) MutateMetaDataLength(n int32) bool { return rcv._tab.MutateInt32(rcv._tab.Pos+flatbuffers.UOffsetT(8), n) } -/// Length of the data (this is aligned so there can be a gap between this and -/// the metadata). +// / Length of the data (this is aligned so there can be a gap between this and +// / the metadata). func (rcv *Block) BodyLength() int64 { return rcv._tab.GetInt64(rcv._tab.Pos + flatbuffers.UOffsetT(16)) } -/// Length of the data (this is aligned so there can be a gap between this and -/// the metadata). + +// / Length of the data (this is aligned so there can be a gap between this and +// / the metadata). func (rcv *Block) MutateBodyLength(n int64) bool { return rcv._tab.MutateInt64(rcv._tab.Pos+flatbuffers.UOffsetT(16), n) } diff --git a/go/arrow/internal/flatbuf/BodyCompression.go b/go/arrow/internal/flatbuf/BodyCompression.go index 6468e23135254..c23c29190216b 100644 --- a/go/arrow/internal/flatbuf/BodyCompression.go +++ b/go/arrow/internal/flatbuf/BodyCompression.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Optional compression for the memory buffers constituting IPC message -/// bodies. Intended for use with RecordBatch but could be used for other -/// message types +// / Optional compression for the memory buffers constituting IPC message +// / bodies. Intended for use with RecordBatch but could be used for other +// / message types type BodyCompression struct { _tab flatbuffers.Table } @@ -45,8 +45,8 @@ func (rcv *BodyCompression) Table() flatbuffers.Table { return rcv._tab } -/// Compressor library. -/// For LZ4_FRAME, each compressed buffer must consist of a single frame. +// / Compressor library. +// / For LZ4_FRAME, each compressed buffer must consist of a single frame. func (rcv *BodyCompression) Codec() CompressionType { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -55,13 +55,13 @@ func (rcv *BodyCompression) Codec() CompressionType { return 0 } -/// Compressor library. -/// For LZ4_FRAME, each compressed buffer must consist of a single frame. +// / Compressor library. +// / For LZ4_FRAME, each compressed buffer must consist of a single frame. func (rcv *BodyCompression) MutateCodec(n CompressionType) bool { return rcv._tab.MutateInt8Slot(4, int8(n)) } -/// Indicates the way the record batch body was compressed +// / Indicates the way the record batch body was compressed func (rcv *BodyCompression) Method() BodyCompressionMethod { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -70,7 +70,7 @@ func (rcv *BodyCompression) Method() BodyCompressionMethod { return 0 } -/// Indicates the way the record batch body was compressed +// / Indicates the way the record batch body was compressed func (rcv *BodyCompression) MutateMethod(n BodyCompressionMethod) bool { return rcv._tab.MutateInt8Slot(6, int8(n)) } diff --git a/go/arrow/internal/flatbuf/BodyCompressionMethod.go b/go/arrow/internal/flatbuf/BodyCompressionMethod.go index 108ab3e07fba6..bb7234b3989b5 100644 --- a/go/arrow/internal/flatbuf/BodyCompressionMethod.go +++ b/go/arrow/internal/flatbuf/BodyCompressionMethod.go @@ -20,9 +20,9 @@ package flatbuf import "strconv" -/// Provided for forward compatibility in case we need to support different -/// strategies for compressing the IPC message body (like whole-body -/// compression rather than buffer-level) in the future +// / Provided for forward compatibility in case we need to support different +// / strategies for compressing the IPC message body (like whole-body +// / compression rather than buffer-level) in the future type BodyCompressionMethod int8 const ( diff --git a/go/arrow/internal/flatbuf/Buffer.go b/go/arrow/internal/flatbuf/Buffer.go index eba8d99b28e9b..e650e06a57026 100644 --- a/go/arrow/internal/flatbuf/Buffer.go +++ b/go/arrow/internal/flatbuf/Buffer.go @@ -22,8 +22,8 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// ---------------------------------------------------------------------- -/// A Buffer represents a single contiguous memory segment +// / ---------------------------------------------------------------------- +// / A Buffer represents a single contiguous memory segment type Buffer struct { _tab flatbuffers.Struct } @@ -37,30 +37,32 @@ func (rcv *Buffer) Table() flatbuffers.Table { return rcv._tab.Table } -/// The relative offset into the shared memory page where the bytes for this -/// buffer starts +// / The relative offset into the shared memory page where the bytes for this +// / buffer starts func (rcv *Buffer) Offset() int64 { return rcv._tab.GetInt64(rcv._tab.Pos + flatbuffers.UOffsetT(0)) } -/// The relative offset into the shared memory page where the bytes for this -/// buffer starts + +// / The relative offset into the shared memory page where the bytes for this +// / buffer starts func (rcv *Buffer) MutateOffset(n int64) bool { return rcv._tab.MutateInt64(rcv._tab.Pos+flatbuffers.UOffsetT(0), n) } -/// The absolute length (in bytes) of the memory buffer. The memory is found -/// from offset (inclusive) to offset + length (non-inclusive). When building -/// messages using the encapsulated IPC message, padding bytes may be written -/// after a buffer, but such padding bytes do not need to be accounted for in -/// the size here. +// / The absolute length (in bytes) of the memory buffer. The memory is found +// / from offset (inclusive) to offset + length (non-inclusive). When building +// / messages using the encapsulated IPC message, padding bytes may be written +// / after a buffer, but such padding bytes do not need to be accounted for in +// / the size here. func (rcv *Buffer) Length() int64 { return rcv._tab.GetInt64(rcv._tab.Pos + flatbuffers.UOffsetT(8)) } -/// The absolute length (in bytes) of the memory buffer. The memory is found -/// from offset (inclusive) to offset + length (non-inclusive). When building -/// messages using the encapsulated IPC message, padding bytes may be written -/// after a buffer, but such padding bytes do not need to be accounted for in -/// the size here. + +// / The absolute length (in bytes) of the memory buffer. The memory is found +// / from offset (inclusive) to offset + length (non-inclusive). When building +// / messages using the encapsulated IPC message, padding bytes may be written +// / after a buffer, but such padding bytes do not need to be accounted for in +// / the size here. func (rcv *Buffer) MutateLength(n int64) bool { return rcv._tab.MutateInt64(rcv._tab.Pos+flatbuffers.UOffsetT(8), n) } diff --git a/go/arrow/internal/flatbuf/Date.go b/go/arrow/internal/flatbuf/Date.go index 32983ec54ccc2..985a8f79955a4 100644 --- a/go/arrow/internal/flatbuf/Date.go +++ b/go/arrow/internal/flatbuf/Date.go @@ -22,12 +22,12 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Date is either a 32-bit or 64-bit signed integer type representing an -/// elapsed time since UNIX epoch (1970-01-01), stored in either of two units: -/// -/// * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no -/// leap seconds), where the values are evenly divisible by 86400000 -/// * Days (32 bits) since the UNIX epoch +// / Date is either a 32-bit or 64-bit signed integer type representing an +// / elapsed time since UNIX epoch (1970-01-01), stored in either of two units: +// / +// / * Milliseconds (64 bits) indicating UNIX time elapsed since the epoch (no +// / leap seconds), where the values are evenly divisible by 86400000 +// / * Days (32 bits) since the UNIX epoch type Date struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Decimal.go b/go/arrow/internal/flatbuf/Decimal.go index c9de254d1dcbd..2fc9d5ad6586c 100644 --- a/go/arrow/internal/flatbuf/Decimal.go +++ b/go/arrow/internal/flatbuf/Decimal.go @@ -22,10 +22,10 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Exact decimal value represented as an integer value in two's -/// complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers -/// are used. The representation uses the endianness indicated -/// in the Schema. +// / Exact decimal value represented as an integer value in two's +// / complement. Currently only 128-bit (16-byte) and 256-bit (32-byte) integers +// / are used. The representation uses the endianness indicated +// / in the Schema. type Decimal struct { _tab flatbuffers.Table } @@ -46,7 +46,7 @@ func (rcv *Decimal) Table() flatbuffers.Table { return rcv._tab } -/// Total number of decimal digits +// / Total number of decimal digits func (rcv *Decimal) Precision() int32 { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -55,12 +55,12 @@ func (rcv *Decimal) Precision() int32 { return 0 } -/// Total number of decimal digits +// / Total number of decimal digits func (rcv *Decimal) MutatePrecision(n int32) bool { return rcv._tab.MutateInt32Slot(4, n) } -/// Number of digits after the decimal point "." +// / Number of digits after the decimal point "." func (rcv *Decimal) Scale() int32 { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -69,13 +69,13 @@ func (rcv *Decimal) Scale() int32 { return 0 } -/// Number of digits after the decimal point "." +// / Number of digits after the decimal point "." func (rcv *Decimal) MutateScale(n int32) bool { return rcv._tab.MutateInt32Slot(6, n) } -/// Number of bits per value. The only accepted widths are 128 and 256. -/// We use bitWidth for consistency with Int::bitWidth. +// / Number of bits per value. The only accepted widths are 128 and 256. +// / We use bitWidth for consistency with Int::bitWidth. func (rcv *Decimal) BitWidth() int32 { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -84,8 +84,8 @@ func (rcv *Decimal) BitWidth() int32 { return 128 } -/// Number of bits per value. The only accepted widths are 128 and 256. -/// We use bitWidth for consistency with Int::bitWidth. +// / Number of bits per value. The only accepted widths are 128 and 256. +// / We use bitWidth for consistency with Int::bitWidth. func (rcv *Decimal) MutateBitWidth(n int32) bool { return rcv._tab.MutateInt32Slot(8, n) } diff --git a/go/arrow/internal/flatbuf/DictionaryBatch.go b/go/arrow/internal/flatbuf/DictionaryBatch.go index 25b5384e46a5c..999c5fda46384 100644 --- a/go/arrow/internal/flatbuf/DictionaryBatch.go +++ b/go/arrow/internal/flatbuf/DictionaryBatch.go @@ -22,12 +22,12 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// For sending dictionary encoding information. Any Field can be -/// dictionary-encoded, but in this case none of its children may be -/// dictionary-encoded. -/// There is one vector / column per dictionary, but that vector / column -/// may be spread across multiple dictionary batches by using the isDelta -/// flag +// / For sending dictionary encoding information. Any Field can be +// / dictionary-encoded, but in this case none of its children may be +// / dictionary-encoded. +// / There is one vector / column per dictionary, but that vector / column +// / may be spread across multiple dictionary batches by using the isDelta +// / flag type DictionaryBatch struct { _tab flatbuffers.Table } @@ -73,9 +73,9 @@ func (rcv *DictionaryBatch) Data(obj *RecordBatch) *RecordBatch { return nil } -/// If isDelta is true the values in the dictionary are to be appended to a -/// dictionary with the indicated id. If isDelta is false this dictionary -/// should replace the existing dictionary. +// / If isDelta is true the values in the dictionary are to be appended to a +// / dictionary with the indicated id. If isDelta is false this dictionary +// / should replace the existing dictionary. func (rcv *DictionaryBatch) IsDelta() bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -84,9 +84,9 @@ func (rcv *DictionaryBatch) IsDelta() bool { return false } -/// If isDelta is true the values in the dictionary are to be appended to a -/// dictionary with the indicated id. If isDelta is false this dictionary -/// should replace the existing dictionary. +// / If isDelta is true the values in the dictionary are to be appended to a +// / dictionary with the indicated id. If isDelta is false this dictionary +// / should replace the existing dictionary. func (rcv *DictionaryBatch) MutateIsDelta(n bool) bool { return rcv._tab.MutateBoolSlot(8, n) } diff --git a/go/arrow/internal/flatbuf/DictionaryEncoding.go b/go/arrow/internal/flatbuf/DictionaryEncoding.go index a9b09530b2a52..44c3874219f1c 100644 --- a/go/arrow/internal/flatbuf/DictionaryEncoding.go +++ b/go/arrow/internal/flatbuf/DictionaryEncoding.go @@ -42,9 +42,9 @@ func (rcv *DictionaryEncoding) Table() flatbuffers.Table { return rcv._tab } -/// The known dictionary id in the application where this data is used. In -/// the file or streaming formats, the dictionary ids are found in the -/// DictionaryBatch messages +// / The known dictionary id in the application where this data is used. In +// / the file or streaming formats, the dictionary ids are found in the +// / DictionaryBatch messages func (rcv *DictionaryEncoding) Id() int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -53,18 +53,18 @@ func (rcv *DictionaryEncoding) Id() int64 { return 0 } -/// The known dictionary id in the application where this data is used. In -/// the file or streaming formats, the dictionary ids are found in the -/// DictionaryBatch messages +// / The known dictionary id in the application where this data is used. In +// / the file or streaming formats, the dictionary ids are found in the +// / DictionaryBatch messages func (rcv *DictionaryEncoding) MutateId(n int64) bool { return rcv._tab.MutateInt64Slot(4, n) } -/// The dictionary indices are constrained to be non-negative integers. If -/// this field is null, the indices must be signed int32. To maximize -/// cross-language compatibility and performance, implementations are -/// recommended to prefer signed integer types over unsigned integer types -/// and to avoid uint64 indices unless they are required by an application. +// / The dictionary indices are constrained to be non-negative integers. If +// / this field is null, the indices must be signed int32. To maximize +// / cross-language compatibility and performance, implementations are +// / recommended to prefer signed integer types over unsigned integer types +// / and to avoid uint64 indices unless they are required by an application. func (rcv *DictionaryEncoding) IndexType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -78,15 +78,15 @@ func (rcv *DictionaryEncoding) IndexType(obj *Int) *Int { return nil } -/// The dictionary indices are constrained to be non-negative integers. If -/// this field is null, the indices must be signed int32. To maximize -/// cross-language compatibility and performance, implementations are -/// recommended to prefer signed integer types over unsigned integer types -/// and to avoid uint64 indices unless they are required by an application. -/// By default, dictionaries are not ordered, or the order does not have -/// semantic meaning. In some statistical, applications, dictionary-encoding -/// is used to represent ordered categorical data, and we provide a way to -/// preserve that metadata here +// / The dictionary indices are constrained to be non-negative integers. If +// / this field is null, the indices must be signed int32. To maximize +// / cross-language compatibility and performance, implementations are +// / recommended to prefer signed integer types over unsigned integer types +// / and to avoid uint64 indices unless they are required by an application. +// / By default, dictionaries are not ordered, or the order does not have +// / semantic meaning. In some statistical, applications, dictionary-encoding +// / is used to represent ordered categorical data, and we provide a way to +// / preserve that metadata here func (rcv *DictionaryEncoding) IsOrdered() bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -95,10 +95,10 @@ func (rcv *DictionaryEncoding) IsOrdered() bool { return false } -/// By default, dictionaries are not ordered, or the order does not have -/// semantic meaning. In some statistical, applications, dictionary-encoding -/// is used to represent ordered categorical data, and we provide a way to -/// preserve that metadata here +// / By default, dictionaries are not ordered, or the order does not have +// / semantic meaning. In some statistical, applications, dictionary-encoding +// / is used to represent ordered categorical data, and we provide a way to +// / preserve that metadata here func (rcv *DictionaryEncoding) MutateIsOrdered(n bool) bool { return rcv._tab.MutateBoolSlot(8, n) } diff --git a/go/arrow/internal/flatbuf/DictionaryKind.go b/go/arrow/internal/flatbuf/DictionaryKind.go index 126ba5f7f6bb0..6825100515612 100644 --- a/go/arrow/internal/flatbuf/DictionaryKind.go +++ b/go/arrow/internal/flatbuf/DictionaryKind.go @@ -20,11 +20,11 @@ package flatbuf import "strconv" -/// ---------------------------------------------------------------------- -/// Dictionary encoding metadata -/// Maintained for forwards compatibility, in the future -/// Dictionaries might be explicit maps between integers and values -/// allowing for non-contiguous index values +// / ---------------------------------------------------------------------- +// / Dictionary encoding metadata +// / Maintained for forwards compatibility, in the future +// / Dictionaries might be explicit maps between integers and values +// / allowing for non-contiguous index values type DictionaryKind int16 const ( diff --git a/go/arrow/internal/flatbuf/Endianness.go b/go/arrow/internal/flatbuf/Endianness.go index cefa2ff9c06ed..c9619b7b0d978 100644 --- a/go/arrow/internal/flatbuf/Endianness.go +++ b/go/arrow/internal/flatbuf/Endianness.go @@ -20,8 +20,8 @@ package flatbuf import "strconv" -/// ---------------------------------------------------------------------- -/// Endianness of the platform producing the data +// / ---------------------------------------------------------------------- +// / Endianness of the platform producing the data type Endianness int16 const ( diff --git a/go/arrow/internal/flatbuf/Feature.go b/go/arrow/internal/flatbuf/Feature.go index ae5a0398b607d..2204c440ed4fe 100644 --- a/go/arrow/internal/flatbuf/Feature.go +++ b/go/arrow/internal/flatbuf/Feature.go @@ -20,35 +20,35 @@ package flatbuf import "strconv" -/// Represents Arrow Features that might not have full support -/// within implementations. This is intended to be used in -/// two scenarios: -/// 1. A mechanism for readers of Arrow Streams -/// and files to understand that the stream or file makes -/// use of a feature that isn't supported or unknown to -/// the implementation (and therefore can meet the Arrow -/// forward compatibility guarantees). -/// 2. A means of negotiating between a client and server -/// what features a stream is allowed to use. The enums -/// values here are intented to represent higher level -/// features, additional details maybe negotiated -/// with key-value pairs specific to the protocol. -/// -/// Enums added to this list should be assigned power-of-two values -/// to facilitate exchanging and comparing bitmaps for supported -/// features. +// / Represents Arrow Features that might not have full support +// / within implementations. This is intended to be used in +// / two scenarios: +// / 1. A mechanism for readers of Arrow Streams +// / and files to understand that the stream or file makes +// / use of a feature that isn't supported or unknown to +// / the implementation (and therefore can meet the Arrow +// / forward compatibility guarantees). +// / 2. A means of negotiating between a client and server +// / what features a stream is allowed to use. The enums +// / values here are intented to represent higher level +// / features, additional details maybe negotiated +// / with key-value pairs specific to the protocol. +// / +// / Enums added to this list should be assigned power-of-two values +// / to facilitate exchanging and comparing bitmaps for supported +// / features. type Feature int64 const ( /// Needed to make flatbuffers happy. - FeatureUNUSED Feature = 0 + FeatureUNUSED Feature = 0 /// The stream makes use of multiple full dictionaries with the /// same ID and assumes clients implement dictionary replacement /// correctly. FeatureDICTIONARY_REPLACEMENT Feature = 1 /// The stream makes use of compressed bodies as described /// in Message.fbs. - FeatureCOMPRESSED_BODY Feature = 2 + FeatureCOMPRESSED_BODY Feature = 2 ) var EnumNamesFeature = map[Feature]string{ diff --git a/go/arrow/internal/flatbuf/Field.go b/go/arrow/internal/flatbuf/Field.go index c03cf2f878b6f..8aed29bc48137 100644 --- a/go/arrow/internal/flatbuf/Field.go +++ b/go/arrow/internal/flatbuf/Field.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// ---------------------------------------------------------------------- -/// A field represents a named column in a record / row batch or child of a -/// nested type. +// / ---------------------------------------------------------------------- +// / A field represents a named column in a record / row batch or child of a +// / nested type. type Field struct { _tab flatbuffers.Table } @@ -45,7 +45,7 @@ func (rcv *Field) Table() flatbuffers.Table { return rcv._tab } -/// Name is not required, in i.e. a List +// / Name is not required, in i.e. a List func (rcv *Field) Name() []byte { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -54,8 +54,8 @@ func (rcv *Field) Name() []byte { return nil } -/// Name is not required, in i.e. a List -/// Whether or not this field can contain nulls. Should be true in general. +// / Name is not required, in i.e. a List +// / Whether or not this field can contain nulls. Should be true in general. func (rcv *Field) Nullable() bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -64,7 +64,7 @@ func (rcv *Field) Nullable() bool { return false } -/// Whether or not this field can contain nulls. Should be true in general. +// / Whether or not this field can contain nulls. Should be true in general. func (rcv *Field) MutateNullable(n bool) bool { return rcv._tab.MutateBoolSlot(6, n) } @@ -81,7 +81,7 @@ func (rcv *Field) MutateTypeType(n Type) bool { return rcv._tab.MutateByteSlot(8, byte(n)) } -/// This is the type of the decoded value if the field is dictionary encoded. +// / This is the type of the decoded value if the field is dictionary encoded. func (rcv *Field) Type(obj *flatbuffers.Table) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -91,8 +91,8 @@ func (rcv *Field) Type(obj *flatbuffers.Table) bool { return false } -/// This is the type of the decoded value if the field is dictionary encoded. -/// Present only if the field is dictionary encoded. +// / This is the type of the decoded value if the field is dictionary encoded. +// / Present only if the field is dictionary encoded. func (rcv *Field) Dictionary(obj *DictionaryEncoding) *DictionaryEncoding { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { @@ -106,9 +106,9 @@ func (rcv *Field) Dictionary(obj *DictionaryEncoding) *DictionaryEncoding { return nil } -/// Present only if the field is dictionary encoded. -/// children apply only to nested data types like Struct, List and Union. For -/// primitive types children will have length 0. +// / Present only if the field is dictionary encoded. +// / children apply only to nested data types like Struct, List and Union. For +// / primitive types children will have length 0. func (rcv *Field) Children(obj *Field, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(14)) if o != 0 { @@ -129,9 +129,9 @@ func (rcv *Field) ChildrenLength() int { return 0 } -/// children apply only to nested data types like Struct, List and Union. For -/// primitive types children will have length 0. -/// User-defined metadata +// / children apply only to nested data types like Struct, List and Union. For +// / primitive types children will have length 0. +// / User-defined metadata func (rcv *Field) CustomMetadata(obj *KeyValue, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(16)) if o != 0 { @@ -152,7 +152,7 @@ func (rcv *Field) CustomMetadataLength() int { return 0 } -/// User-defined metadata +// / User-defined metadata func FieldStart(builder *flatbuffers.Builder) { builder.StartObject(7) } diff --git a/go/arrow/internal/flatbuf/FieldNode.go b/go/arrow/internal/flatbuf/FieldNode.go index 606b30bfebbd2..0e258a3d2cde8 100644 --- a/go/arrow/internal/flatbuf/FieldNode.go +++ b/go/arrow/internal/flatbuf/FieldNode.go @@ -22,15 +22,15 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// ---------------------------------------------------------------------- -/// Data structures for describing a table row batch (a collection of -/// equal-length Arrow arrays) -/// Metadata about a field at some level of a nested type tree (but not -/// its children). -/// -/// For example, a List with values `[[1, 2, 3], null, [4], [5, 6], null]` -/// would have {length: 5, null_count: 2} for its List node, and {length: 6, -/// null_count: 0} for its Int16 node, as separate FieldNode structs +// / ---------------------------------------------------------------------- +// / Data structures for describing a table row batch (a collection of +// / equal-length Arrow arrays) +// / Metadata about a field at some level of a nested type tree (but not +// / its children). +// / +// / For example, a List with values `[[1, 2, 3], null, [4], [5, 6], null]` +// / would have {length: 5, null_count: 2} for its List node, and {length: 6, +// / null_count: 0} for its Int16 node, as separate FieldNode structs type FieldNode struct { _tab flatbuffers.Struct } @@ -44,26 +44,28 @@ func (rcv *FieldNode) Table() flatbuffers.Table { return rcv._tab.Table } -/// The number of value slots in the Arrow array at this level of a nested -/// tree +// / The number of value slots in the Arrow array at this level of a nested +// / tree func (rcv *FieldNode) Length() int64 { return rcv._tab.GetInt64(rcv._tab.Pos + flatbuffers.UOffsetT(0)) } -/// The number of value slots in the Arrow array at this level of a nested -/// tree + +// / The number of value slots in the Arrow array at this level of a nested +// / tree func (rcv *FieldNode) MutateLength(n int64) bool { return rcv._tab.MutateInt64(rcv._tab.Pos+flatbuffers.UOffsetT(0), n) } -/// The number of observed nulls. Fields with null_count == 0 may choose not -/// to write their physical validity bitmap out as a materialized buffer, -/// instead setting the length of the bitmap buffer to 0. +// / The number of observed nulls. Fields with null_count == 0 may choose not +// / to write their physical validity bitmap out as a materialized buffer, +// / instead setting the length of the bitmap buffer to 0. func (rcv *FieldNode) NullCount() int64 { return rcv._tab.GetInt64(rcv._tab.Pos + flatbuffers.UOffsetT(8)) } -/// The number of observed nulls. Fields with null_count == 0 may choose not -/// to write their physical validity bitmap out as a materialized buffer, -/// instead setting the length of the bitmap buffer to 0. + +// / The number of observed nulls. Fields with null_count == 0 may choose not +// / to write their physical validity bitmap out as a materialized buffer, +// / instead setting the length of the bitmap buffer to 0. func (rcv *FieldNode) MutateNullCount(n int64) bool { return rcv._tab.MutateInt64(rcv._tab.Pos+flatbuffers.UOffsetT(8), n) } diff --git a/go/arrow/internal/flatbuf/FixedSizeBinary.go b/go/arrow/internal/flatbuf/FixedSizeBinary.go index 4e660d5077f71..2725dfb90b966 100644 --- a/go/arrow/internal/flatbuf/FixedSizeBinary.go +++ b/go/arrow/internal/flatbuf/FixedSizeBinary.go @@ -42,7 +42,7 @@ func (rcv *FixedSizeBinary) Table() flatbuffers.Table { return rcv._tab } -/// Number of bytes per value +// / Number of bytes per value func (rcv *FixedSizeBinary) ByteWidth() int32 { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -51,7 +51,7 @@ func (rcv *FixedSizeBinary) ByteWidth() int32 { return 0 } -/// Number of bytes per value +// / Number of bytes per value func (rcv *FixedSizeBinary) MutateByteWidth(n int32) bool { return rcv._tab.MutateInt32Slot(4, n) } diff --git a/go/arrow/internal/flatbuf/FixedSizeList.go b/go/arrow/internal/flatbuf/FixedSizeList.go index dabf5cc8581da..534ca27f2fe21 100644 --- a/go/arrow/internal/flatbuf/FixedSizeList.go +++ b/go/arrow/internal/flatbuf/FixedSizeList.go @@ -42,7 +42,7 @@ func (rcv *FixedSizeList) Table() flatbuffers.Table { return rcv._tab } -/// Number of list items per value +// / Number of list items per value func (rcv *FixedSizeList) ListSize() int32 { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -51,7 +51,7 @@ func (rcv *FixedSizeList) ListSize() int32 { return 0 } -/// Number of list items per value +// / Number of list items per value func (rcv *FixedSizeList) MutateListSize(n int32) bool { return rcv._tab.MutateInt32Slot(4, n) } diff --git a/go/arrow/internal/flatbuf/Footer.go b/go/arrow/internal/flatbuf/Footer.go index 65b0ff0954614..d65af41e7f62e 100644 --- a/go/arrow/internal/flatbuf/Footer.go +++ b/go/arrow/internal/flatbuf/Footer.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// ---------------------------------------------------------------------- -/// Arrow File metadata -/// +// / ---------------------------------------------------------------------- +// / Arrow File metadata +// / type Footer struct { _tab flatbuffers.Table } @@ -108,7 +108,7 @@ func (rcv *Footer) RecordBatchesLength() int { return 0 } -/// User-defined metadata +// / User-defined metadata func (rcv *Footer) CustomMetadata(obj *KeyValue, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { @@ -129,7 +129,7 @@ func (rcv *Footer) CustomMetadataLength() int { return 0 } -/// User-defined metadata +// / User-defined metadata func FooterStart(builder *flatbuffers.Builder) { builder.StartObject(5) } diff --git a/go/arrow/internal/flatbuf/KeyValue.go b/go/arrow/internal/flatbuf/KeyValue.go index c1b85318ecd5f..0cd5dc62923e3 100644 --- a/go/arrow/internal/flatbuf/KeyValue.go +++ b/go/arrow/internal/flatbuf/KeyValue.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// ---------------------------------------------------------------------- -/// user defined key value pairs to add custom metadata to arrow -/// key namespacing is the responsibility of the user +// / ---------------------------------------------------------------------- +// / user defined key value pairs to add custom metadata to arrow +// / key namespacing is the responsibility of the user type KeyValue struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/LargeBinary.go b/go/arrow/internal/flatbuf/LargeBinary.go index 2c3befcc16fb9..b25ecc41aff51 100644 --- a/go/arrow/internal/flatbuf/LargeBinary.go +++ b/go/arrow/internal/flatbuf/LargeBinary.go @@ -22,8 +22,8 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Same as Binary, but with 64-bit offsets, allowing to represent -/// extremely large data values. +// / Same as Binary, but with 64-bit offsets, allowing to represent +// / extremely large data values. type LargeBinary struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/LargeList.go b/go/arrow/internal/flatbuf/LargeList.go index 92f2284587445..d8bfb9c07df76 100644 --- a/go/arrow/internal/flatbuf/LargeList.go +++ b/go/arrow/internal/flatbuf/LargeList.go @@ -22,8 +22,8 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Same as List, but with 64-bit offsets, allowing to represent -/// extremely large data values. +// / Same as List, but with 64-bit offsets, allowing to represent +// / extremely large data values. type LargeList struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/LargeListView.go b/go/arrow/internal/flatbuf/LargeListView.go index 5b1df149cd1e2..4608c1dec53d8 100644 --- a/go/arrow/internal/flatbuf/LargeListView.go +++ b/go/arrow/internal/flatbuf/LargeListView.go @@ -22,8 +22,8 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Same as ListView, but with 64-bit offsets and sizes, allowing to represent -/// extremely large data values. +// / Same as ListView, but with 64-bit offsets and sizes, allowing to represent +// / extremely large data values. type LargeListView struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/LargeUtf8.go b/go/arrow/internal/flatbuf/LargeUtf8.go index e78b33e110066..4478fed856e6d 100644 --- a/go/arrow/internal/flatbuf/LargeUtf8.go +++ b/go/arrow/internal/flatbuf/LargeUtf8.go @@ -22,8 +22,8 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Same as Utf8, but with 64-bit offsets, allowing to represent -/// extremely large data values. +// / Same as Utf8, but with 64-bit offsets, allowing to represent +// / extremely large data values. type LargeUtf8 struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/ListView.go b/go/arrow/internal/flatbuf/ListView.go index 46b1e0b3cbf2f..cde43cf5b6893 100644 --- a/go/arrow/internal/flatbuf/ListView.go +++ b/go/arrow/internal/flatbuf/ListView.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Represents the same logical types that List can, but contains offsets and -/// sizes allowing for writes in any order and sharing of child values among -/// list values. +// / Represents the same logical types that List can, but contains offsets and +// / sizes allowing for writes in any order and sharing of child values among +// / list values. type ListView struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Map.go b/go/arrow/internal/flatbuf/Map.go index 8802aba1ebd39..d4871e558199f 100644 --- a/go/arrow/internal/flatbuf/Map.go +++ b/go/arrow/internal/flatbuf/Map.go @@ -22,31 +22,31 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// A Map is a logical nested type that is represented as -/// -/// List> -/// -/// In this layout, the keys and values are each respectively contiguous. We do -/// not constrain the key and value types, so the application is responsible -/// for ensuring that the keys are hashable and unique. Whether the keys are sorted -/// may be set in the metadata for this field. -/// -/// In a field with Map type, the field has a child Struct field, which then -/// has two children: key type and the second the value type. The names of the -/// child fields may be respectively "entries", "key", and "value", but this is -/// not enforced. -/// -/// Map -/// ```text -/// - child[0] entries: Struct -/// - child[0] key: K -/// - child[1] value: V -/// ``` -/// Neither the "entries" field nor the "key" field may be nullable. -/// -/// The metadata is structured so that Arrow systems without special handling -/// for Map can make Map an alias for List. The "layout" attribute for the Map -/// field must have the same contents as a List. +// / A Map is a logical nested type that is represented as +// / +// / List> +// / +// / In this layout, the keys and values are each respectively contiguous. We do +// / not constrain the key and value types, so the application is responsible +// / for ensuring that the keys are hashable and unique. Whether the keys are sorted +// / may be set in the metadata for this field. +// / +// / In a field with Map type, the field has a child Struct field, which then +// / has two children: key type and the second the value type. The names of the +// / child fields may be respectively "entries", "key", and "value", but this is +// / not enforced. +// / +// / Map +// / ```text +// / - child[0] entries: Struct +// / - child[0] key: K +// / - child[1] value: V +// / ``` +// / Neither the "entries" field nor the "key" field may be nullable. +// / +// / The metadata is structured so that Arrow systems without special handling +// / for Map can make Map an alias for List. The "layout" attribute for the Map +// / field must have the same contents as a List. type Map struct { _tab flatbuffers.Table } @@ -67,7 +67,7 @@ func (rcv *Map) Table() flatbuffers.Table { return rcv._tab } -/// Set to true if the keys within each value are sorted +// / Set to true if the keys within each value are sorted func (rcv *Map) KeysSorted() bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -76,7 +76,7 @@ func (rcv *Map) KeysSorted() bool { return false } -/// Set to true if the keys within each value are sorted +// / Set to true if the keys within each value are sorted func (rcv *Map) MutateKeysSorted(n bool) bool { return rcv._tab.MutateBoolSlot(4, n) } diff --git a/go/arrow/internal/flatbuf/MessageHeader.go b/go/arrow/internal/flatbuf/MessageHeader.go index c12fc1058119d..d7f9907c7a7a2 100644 --- a/go/arrow/internal/flatbuf/MessageHeader.go +++ b/go/arrow/internal/flatbuf/MessageHeader.go @@ -20,14 +20,14 @@ package flatbuf import "strconv" -/// ---------------------------------------------------------------------- -/// The root Message type -/// This union enables us to easily send different message types without -/// redundant storage, and in the future we can easily add new message types. -/// -/// Arrow implementations do not need to implement all of the message types, -/// which may include experimental metadata types. For maximum compatibility, -/// it is best to send data using RecordBatch +// / ---------------------------------------------------------------------- +// / The root Message type +// / This union enables us to easily send different message types without +// / redundant storage, and in the future we can easily add new message types. +// / +// / Arrow implementations do not need to implement all of the message types, +// / which may include experimental metadata types. For maximum compatibility, +// / it is best to send data using RecordBatch type MessageHeader byte const ( diff --git a/go/arrow/internal/flatbuf/Null.go b/go/arrow/internal/flatbuf/Null.go index 3c3eb4bda3619..3b93a1b6ee965 100644 --- a/go/arrow/internal/flatbuf/Null.go +++ b/go/arrow/internal/flatbuf/Null.go @@ -22,7 +22,7 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// These are stored in the flatbuffer in the Type union below +// / These are stored in the flatbuffer in the Type union below type Null struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/RecordBatch.go b/go/arrow/internal/flatbuf/RecordBatch.go index c50f4a6e868ea..52c72a8a20ae4 100644 --- a/go/arrow/internal/flatbuf/RecordBatch.go +++ b/go/arrow/internal/flatbuf/RecordBatch.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// A data header describing the shared memory layout of a "record" or "row" -/// batch. Some systems call this a "row batch" internally and others a "record -/// batch". +// / A data header describing the shared memory layout of a "record" or "row" +// / batch. Some systems call this a "row batch" internally and others a "record +// / batch". type RecordBatch struct { _tab flatbuffers.Table } @@ -45,8 +45,8 @@ func (rcv *RecordBatch) Table() flatbuffers.Table { return rcv._tab } -/// number of records / rows. The arrays in the batch should all have this -/// length +// / number of records / rows. The arrays in the batch should all have this +// / length func (rcv *RecordBatch) Length() int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -55,13 +55,13 @@ func (rcv *RecordBatch) Length() int64 { return 0 } -/// number of records / rows. The arrays in the batch should all have this -/// length +// / number of records / rows. The arrays in the batch should all have this +// / length func (rcv *RecordBatch) MutateLength(n int64) bool { return rcv._tab.MutateInt64Slot(4, n) } -/// Nodes correspond to the pre-ordered flattened logical schema +// / Nodes correspond to the pre-ordered flattened logical schema func (rcv *RecordBatch) Nodes(obj *FieldNode, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -81,13 +81,13 @@ func (rcv *RecordBatch) NodesLength() int { return 0 } -/// Nodes correspond to the pre-ordered flattened logical schema -/// Buffers correspond to the pre-ordered flattened buffer tree -/// -/// The number of buffers appended to this list depends on the schema. For -/// example, most primitive arrays will have 2 buffers, 1 for the validity -/// bitmap and 1 for the values. For struct arrays, there will only be a -/// single buffer for the validity (nulls) bitmap +// / Nodes correspond to the pre-ordered flattened logical schema +// / Buffers correspond to the pre-ordered flattened buffer tree +// / +// / The number of buffers appended to this list depends on the schema. For +// / example, most primitive arrays will have 2 buffers, 1 for the validity +// / bitmap and 1 for the values. For struct arrays, there will only be a +// / single buffer for the validity (nulls) bitmap func (rcv *RecordBatch) Buffers(obj *Buffer, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -107,13 +107,13 @@ func (rcv *RecordBatch) BuffersLength() int { return 0 } -/// Buffers correspond to the pre-ordered flattened buffer tree -/// -/// The number of buffers appended to this list depends on the schema. For -/// example, most primitive arrays will have 2 buffers, 1 for the validity -/// bitmap and 1 for the values. For struct arrays, there will only be a -/// single buffer for the validity (nulls) bitmap -/// Optional compression of the message body +// / Buffers correspond to the pre-ordered flattened buffer tree +// / +// / The number of buffers appended to this list depends on the schema. For +// / example, most primitive arrays will have 2 buffers, 1 for the validity +// / bitmap and 1 for the values. For struct arrays, there will only be a +// / single buffer for the validity (nulls) bitmap +// / Optional compression of the message body func (rcv *RecordBatch) Compression(obj *BodyCompression) *BodyCompression { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -127,21 +127,21 @@ func (rcv *RecordBatch) Compression(obj *BodyCompression) *BodyCompression { return nil } -/// Optional compression of the message body -/// Some types such as Utf8View are represented using a variable number of buffers. -/// For each such Field in the pre-ordered flattened logical schema, there will be -/// an entry in variadicBufferCounts to indicate the number of number of variadic -/// buffers which belong to that Field in the current RecordBatch. -/// -/// For example, the schema -/// col1: Struct -/// col2: Utf8View -/// contains two Fields with variadic buffers so variadicBufferCounts will have -/// two entries, the first counting the variadic buffers of `col1.beta` and the -/// second counting `col2`'s. -/// -/// This field may be omitted if and only if the schema contains no Fields with -/// a variable number of buffers, such as BinaryView and Utf8View. +// / Optional compression of the message body +// / Some types such as Utf8View are represented using a variable number of buffers. +// / For each such Field in the pre-ordered flattened logical schema, there will be +// / an entry in variadicBufferCounts to indicate the number of number of variadic +// / buffers which belong to that Field in the current RecordBatch. +// / +// / For example, the schema +// / col1: Struct +// / col2: Utf8View +// / contains two Fields with variadic buffers so variadicBufferCounts will have +// / two entries, the first counting the variadic buffers of `col1.beta` and the +// / second counting `col2`'s. +// / +// / This field may be omitted if and only if the schema contains no Fields with +// / a variable number of buffers, such as BinaryView and Utf8View. func (rcv *RecordBatch) VariadicBufferCounts(j int) int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { @@ -159,20 +159,20 @@ func (rcv *RecordBatch) VariadicBufferCountsLength() int { return 0 } -/// Some types such as Utf8View are represented using a variable number of buffers. -/// For each such Field in the pre-ordered flattened logical schema, there will be -/// an entry in variadicBufferCounts to indicate the number of number of variadic -/// buffers which belong to that Field in the current RecordBatch. -/// -/// For example, the schema -/// col1: Struct -/// col2: Utf8View -/// contains two Fields with variadic buffers so variadicBufferCounts will have -/// two entries, the first counting the variadic buffers of `col1.beta` and the -/// second counting `col2`'s. -/// -/// This field may be omitted if and only if the schema contains no Fields with -/// a variable number of buffers, such as BinaryView and Utf8View. +// / Some types such as Utf8View are represented using a variable number of buffers. +// / For each such Field in the pre-ordered flattened logical schema, there will be +// / an entry in variadicBufferCounts to indicate the number of number of variadic +// / buffers which belong to that Field in the current RecordBatch. +// / +// / For example, the schema +// / col1: Struct +// / col2: Utf8View +// / contains two Fields with variadic buffers so variadicBufferCounts will have +// / two entries, the first counting the variadic buffers of `col1.beta` and the +// / second counting `col2`'s. +// / +// / This field may be omitted if and only if the schema contains no Fields with +// / a variable number of buffers, such as BinaryView and Utf8View. func (rcv *RecordBatch) MutateVariadicBufferCounts(j int, n int64) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { diff --git a/go/arrow/internal/flatbuf/RunEndEncoded.go b/go/arrow/internal/flatbuf/RunEndEncoded.go index fa414c1bf0eed..b88460b2e22bc 100644 --- a/go/arrow/internal/flatbuf/RunEndEncoded.go +++ b/go/arrow/internal/flatbuf/RunEndEncoded.go @@ -22,11 +22,11 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Contains two child arrays, run_ends and values. -/// The run_ends child array must be a 16/32/64-bit integer array -/// which encodes the indices at which the run with the value in -/// each corresponding index in the values child array ends. -/// Like list/struct types, the value array can be of any type. +// / Contains two child arrays, run_ends and values. +// / The run_ends child array must be a 16/32/64-bit integer array +// / which encodes the indices at which the run with the value in +// / each corresponding index in the values child array ends. +// / Like list/struct types, the value array can be of any type. type RunEndEncoded struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Schema.go b/go/arrow/internal/flatbuf/Schema.go index 4ee5ecc9e5e40..ae5b248a766e3 100644 --- a/go/arrow/internal/flatbuf/Schema.go +++ b/go/arrow/internal/flatbuf/Schema.go @@ -22,8 +22,8 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// ---------------------------------------------------------------------- -/// A Schema describes the columns in a row batch +// / ---------------------------------------------------------------------- +// / A Schema describes the columns in a row batch type Schema struct { _tab flatbuffers.Table } @@ -44,9 +44,9 @@ func (rcv *Schema) Table() flatbuffers.Table { return rcv._tab } -/// endianness of the buffer -/// it is Little Endian by default -/// if endianness doesn't match the underlying system then the vectors need to be converted +// / endianness of the buffer +// / it is Little Endian by default +// / if endianness doesn't match the underlying system then the vectors need to be converted func (rcv *Schema) Endianness() Endianness { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -55,9 +55,9 @@ func (rcv *Schema) Endianness() Endianness { return 0 } -/// endianness of the buffer -/// it is Little Endian by default -/// if endianness doesn't match the underlying system then the vectors need to be converted +// / endianness of the buffer +// / it is Little Endian by default +// / if endianness doesn't match the underlying system then the vectors need to be converted func (rcv *Schema) MutateEndianness(n Endianness) bool { return rcv._tab.MutateInt16Slot(4, int16(n)) } @@ -102,7 +102,7 @@ func (rcv *Schema) CustomMetadataLength() int { return 0 } -/// Features used in the stream/file. +// / Features used in the stream/file. func (rcv *Schema) Features(j int) Feature { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -120,7 +120,7 @@ func (rcv *Schema) FeaturesLength() int { return 0 } -/// Features used in the stream/file. +// / Features used in the stream/file. func (rcv *Schema) MutateFeatures(j int, n Feature) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { diff --git a/go/arrow/internal/flatbuf/SparseMatrixIndexCSR.go b/go/arrow/internal/flatbuf/SparseMatrixIndexCSR.go index de8217650b281..2477af100355c 100644 --- a/go/arrow/internal/flatbuf/SparseMatrixIndexCSR.go +++ b/go/arrow/internal/flatbuf/SparseMatrixIndexCSR.go @@ -22,7 +22,7 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Compressed Sparse Row format, that is matrix-specific. +// / Compressed Sparse Row format, that is matrix-specific. type SparseMatrixIndexCSR struct { _tab flatbuffers.Table } @@ -43,7 +43,7 @@ func (rcv *SparseMatrixIndexCSR) Table() flatbuffers.Table { return rcv._tab } -/// The type of values in indptrBuffer +// / The type of values in indptrBuffer func (rcv *SparseMatrixIndexCSR) IndptrType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -57,29 +57,29 @@ func (rcv *SparseMatrixIndexCSR) IndptrType(obj *Int) *Int { return nil } -/// The type of values in indptrBuffer -/// indptrBuffer stores the location and size of indptr array that -/// represents the range of the rows. -/// The i-th row spans from indptr[i] to indptr[i+1] in the data. -/// The length of this array is 1 + (the number of rows), and the type -/// of index value is long. -/// -/// For example, let X be the following 6x4 matrix: -/// -/// X := [[0, 1, 2, 0], -/// [0, 0, 3, 0], -/// [0, 4, 0, 5], -/// [0, 0, 0, 0], -/// [6, 0, 7, 8], -/// [0, 9, 0, 0]]. -/// -/// The array of non-zero values in X is: -/// -/// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. -/// -/// And the indptr of X is: -/// -/// indptr(X) = [0, 2, 3, 5, 5, 8, 10]. +// / The type of values in indptrBuffer +// / indptrBuffer stores the location and size of indptr array that +// / represents the range of the rows. +// / The i-th row spans from indptr[i] to indptr[i+1] in the data. +// / The length of this array is 1 + (the number of rows), and the type +// / of index value is long. +// / +// / For example, let X be the following 6x4 matrix: +// / +// / X := [[0, 1, 2, 0], +// / [0, 0, 3, 0], +// / [0, 4, 0, 5], +// / [0, 0, 0, 0], +// / [6, 0, 7, 8], +// / [0, 9, 0, 0]]. +// / +// / The array of non-zero values in X is: +// / +// / values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. +// / +// / And the indptr of X is: +// / +// / indptr(X) = [0, 2, 3, 5, 5, 8, 10]. func (rcv *SparseMatrixIndexCSR) IndptrBuffer(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -93,29 +93,29 @@ func (rcv *SparseMatrixIndexCSR) IndptrBuffer(obj *Buffer) *Buffer { return nil } -/// indptrBuffer stores the location and size of indptr array that -/// represents the range of the rows. -/// The i-th row spans from indptr[i] to indptr[i+1] in the data. -/// The length of this array is 1 + (the number of rows), and the type -/// of index value is long. -/// -/// For example, let X be the following 6x4 matrix: -/// -/// X := [[0, 1, 2, 0], -/// [0, 0, 3, 0], -/// [0, 4, 0, 5], -/// [0, 0, 0, 0], -/// [6, 0, 7, 8], -/// [0, 9, 0, 0]]. -/// -/// The array of non-zero values in X is: -/// -/// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. -/// -/// And the indptr of X is: -/// -/// indptr(X) = [0, 2, 3, 5, 5, 8, 10]. -/// The type of values in indicesBuffer +// / indptrBuffer stores the location and size of indptr array that +// / represents the range of the rows. +// / The i-th row spans from indptr[i] to indptr[i+1] in the data. +// / The length of this array is 1 + (the number of rows), and the type +// / of index value is long. +// / +// / For example, let X be the following 6x4 matrix: +// / +// / X := [[0, 1, 2, 0], +// / [0, 0, 3, 0], +// / [0, 4, 0, 5], +// / [0, 0, 0, 0], +// / [6, 0, 7, 8], +// / [0, 9, 0, 0]]. +// / +// / The array of non-zero values in X is: +// / +// / values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. +// / +// / And the indptr of X is: +// / +// / indptr(X) = [0, 2, 3, 5, 5, 8, 10]. +// / The type of values in indicesBuffer func (rcv *SparseMatrixIndexCSR) IndicesType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -129,16 +129,16 @@ func (rcv *SparseMatrixIndexCSR) IndicesType(obj *Int) *Int { return nil } -/// The type of values in indicesBuffer -/// indicesBuffer stores the location and size of the array that -/// contains the column indices of the corresponding non-zero values. -/// The type of index value is long. -/// -/// For example, the indices of the above X is: -/// -/// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. -/// -/// Note that the indices are sorted in lexicographical order for each row. +// / The type of values in indicesBuffer +// / indicesBuffer stores the location and size of the array that +// / contains the column indices of the corresponding non-zero values. +// / The type of index value is long. +// / +// / For example, the indices of the above X is: +// / +// / indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. +// / +// / Note that the indices are sorted in lexicographical order for each row. func (rcv *SparseMatrixIndexCSR) IndicesBuffer(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -152,15 +152,15 @@ func (rcv *SparseMatrixIndexCSR) IndicesBuffer(obj *Buffer) *Buffer { return nil } -/// indicesBuffer stores the location and size of the array that -/// contains the column indices of the corresponding non-zero values. -/// The type of index value is long. -/// -/// For example, the indices of the above X is: -/// -/// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. -/// -/// Note that the indices are sorted in lexicographical order for each row. +// / indicesBuffer stores the location and size of the array that +// / contains the column indices of the corresponding non-zero values. +// / The type of index value is long. +// / +// / For example, the indices of the above X is: +// / +// / indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. +// / +// / Note that the indices are sorted in lexicographical order for each row. func SparseMatrixIndexCSRStart(builder *flatbuffers.Builder) { builder.StartObject(4) } diff --git a/go/arrow/internal/flatbuf/SparseMatrixIndexCSX.go b/go/arrow/internal/flatbuf/SparseMatrixIndexCSX.go index c28cc5d082fac..7f262deedbfc1 100644 --- a/go/arrow/internal/flatbuf/SparseMatrixIndexCSX.go +++ b/go/arrow/internal/flatbuf/SparseMatrixIndexCSX.go @@ -22,7 +22,7 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Compressed Sparse format, that is matrix-specific. +// / Compressed Sparse format, that is matrix-specific. type SparseMatrixIndexCSX struct { _tab flatbuffers.Table } @@ -43,7 +43,7 @@ func (rcv *SparseMatrixIndexCSX) Table() flatbuffers.Table { return rcv._tab } -/// Which axis, row or column, is compressed +// / Which axis, row or column, is compressed func (rcv *SparseMatrixIndexCSX) CompressedAxis() SparseMatrixCompressedAxis { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -52,12 +52,12 @@ func (rcv *SparseMatrixIndexCSX) CompressedAxis() SparseMatrixCompressedAxis { return 0 } -/// Which axis, row or column, is compressed +// / Which axis, row or column, is compressed func (rcv *SparseMatrixIndexCSX) MutateCompressedAxis(n SparseMatrixCompressedAxis) bool { return rcv._tab.MutateInt16Slot(4, int16(n)) } -/// The type of values in indptrBuffer +// / The type of values in indptrBuffer func (rcv *SparseMatrixIndexCSX) IndptrType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -71,30 +71,30 @@ func (rcv *SparseMatrixIndexCSX) IndptrType(obj *Int) *Int { return nil } -/// The type of values in indptrBuffer -/// indptrBuffer stores the location and size of indptr array that -/// represents the range of the rows. -/// The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data. -/// The length of this array is 1 + (the number of rows), and the type -/// of index value is long. -/// -/// For example, let X be the following 6x4 matrix: -/// ```text -/// X := [[0, 1, 2, 0], -/// [0, 0, 3, 0], -/// [0, 4, 0, 5], -/// [0, 0, 0, 0], -/// [6, 0, 7, 8], -/// [0, 9, 0, 0]]. -/// ``` -/// The array of non-zero values in X is: -/// ```text -/// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. -/// ``` -/// And the indptr of X is: -/// ```text -/// indptr(X) = [0, 2, 3, 5, 5, 8, 10]. -/// ``` +// / The type of values in indptrBuffer +// / indptrBuffer stores the location and size of indptr array that +// / represents the range of the rows. +// / The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data. +// / The length of this array is 1 + (the number of rows), and the type +// / of index value is long. +// / +// / For example, let X be the following 6x4 matrix: +// / ```text +// / X := [[0, 1, 2, 0], +// / [0, 0, 3, 0], +// / [0, 4, 0, 5], +// / [0, 0, 0, 0], +// / [6, 0, 7, 8], +// / [0, 9, 0, 0]]. +// / ``` +// / The array of non-zero values in X is: +// / ```text +// / values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. +// / ``` +// / And the indptr of X is: +// / ```text +// / indptr(X) = [0, 2, 3, 5, 5, 8, 10]. +// / ``` func (rcv *SparseMatrixIndexCSX) IndptrBuffer(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -108,30 +108,30 @@ func (rcv *SparseMatrixIndexCSX) IndptrBuffer(obj *Buffer) *Buffer { return nil } -/// indptrBuffer stores the location and size of indptr array that -/// represents the range of the rows. -/// The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data. -/// The length of this array is 1 + (the number of rows), and the type -/// of index value is long. -/// -/// For example, let X be the following 6x4 matrix: -/// ```text -/// X := [[0, 1, 2, 0], -/// [0, 0, 3, 0], -/// [0, 4, 0, 5], -/// [0, 0, 0, 0], -/// [6, 0, 7, 8], -/// [0, 9, 0, 0]]. -/// ``` -/// The array of non-zero values in X is: -/// ```text -/// values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. -/// ``` -/// And the indptr of X is: -/// ```text -/// indptr(X) = [0, 2, 3, 5, 5, 8, 10]. -/// ``` -/// The type of values in indicesBuffer +// / indptrBuffer stores the location and size of indptr array that +// / represents the range of the rows. +// / The i-th row spans from `indptr[i]` to `indptr[i+1]` in the data. +// / The length of this array is 1 + (the number of rows), and the type +// / of index value is long. +// / +// / For example, let X be the following 6x4 matrix: +// / ```text +// / X := [[0, 1, 2, 0], +// / [0, 0, 3, 0], +// / [0, 4, 0, 5], +// / [0, 0, 0, 0], +// / [6, 0, 7, 8], +// / [0, 9, 0, 0]]. +// / ``` +// / The array of non-zero values in X is: +// / ```text +// / values(X) = [1, 2, 3, 4, 5, 6, 7, 8, 9]. +// / ``` +// / And the indptr of X is: +// / ```text +// / indptr(X) = [0, 2, 3, 5, 5, 8, 10]. +// / ``` +// / The type of values in indicesBuffer func (rcv *SparseMatrixIndexCSX) IndicesType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -145,16 +145,16 @@ func (rcv *SparseMatrixIndexCSX) IndicesType(obj *Int) *Int { return nil } -/// The type of values in indicesBuffer -/// indicesBuffer stores the location and size of the array that -/// contains the column indices of the corresponding non-zero values. -/// The type of index value is long. -/// -/// For example, the indices of the above X is: -/// ```text -/// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. -/// ``` -/// Note that the indices are sorted in lexicographical order for each row. +// / The type of values in indicesBuffer +// / indicesBuffer stores the location and size of the array that +// / contains the column indices of the corresponding non-zero values. +// / The type of index value is long. +// / +// / For example, the indices of the above X is: +// / ```text +// / indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. +// / ``` +// / Note that the indices are sorted in lexicographical order for each row. func (rcv *SparseMatrixIndexCSX) IndicesBuffer(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { @@ -168,15 +168,15 @@ func (rcv *SparseMatrixIndexCSX) IndicesBuffer(obj *Buffer) *Buffer { return nil } -/// indicesBuffer stores the location and size of the array that -/// contains the column indices of the corresponding non-zero values. -/// The type of index value is long. -/// -/// For example, the indices of the above X is: -/// ```text -/// indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. -/// ``` -/// Note that the indices are sorted in lexicographical order for each row. +// / indicesBuffer stores the location and size of the array that +// / contains the column indices of the corresponding non-zero values. +// / The type of index value is long. +// / +// / For example, the indices of the above X is: +// / ```text +// / indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. +// / ``` +// / Note that the indices are sorted in lexicographical order for each row. func SparseMatrixIndexCSXStart(builder *flatbuffers.Builder) { builder.StartObject(5) } diff --git a/go/arrow/internal/flatbuf/SparseTensor.go b/go/arrow/internal/flatbuf/SparseTensor.go index 6f3f55797d755..8f67e1fc08b84 100644 --- a/go/arrow/internal/flatbuf/SparseTensor.go +++ b/go/arrow/internal/flatbuf/SparseTensor.go @@ -54,9 +54,9 @@ func (rcv *SparseTensor) MutateTypeType(n Type) bool { return rcv._tab.MutateByteSlot(4, byte(n)) } -/// The type of data contained in a value cell. -/// Currently only fixed-width value types are supported, -/// no strings or nested types. +// / The type of data contained in a value cell. +// / Currently only fixed-width value types are supported, +// / no strings or nested types. func (rcv *SparseTensor) Type(obj *flatbuffers.Table) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -66,10 +66,10 @@ func (rcv *SparseTensor) Type(obj *flatbuffers.Table) bool { return false } -/// The type of data contained in a value cell. -/// Currently only fixed-width value types are supported, -/// no strings or nested types. -/// The dimensions of the tensor, optionally named. +// / The type of data contained in a value cell. +// / Currently only fixed-width value types are supported, +// / no strings or nested types. +// / The dimensions of the tensor, optionally named. func (rcv *SparseTensor) Shape(obj *TensorDim, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -90,8 +90,8 @@ func (rcv *SparseTensor) ShapeLength() int { return 0 } -/// The dimensions of the tensor, optionally named. -/// The number of non-zero values in a sparse tensor. +// / The dimensions of the tensor, optionally named. +// / The number of non-zero values in a sparse tensor. func (rcv *SparseTensor) NonZeroLength() int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -100,7 +100,7 @@ func (rcv *SparseTensor) NonZeroLength() int64 { return 0 } -/// The number of non-zero values in a sparse tensor. +// / The number of non-zero values in a sparse tensor. func (rcv *SparseTensor) MutateNonZeroLength(n int64) bool { return rcv._tab.MutateInt64Slot(10, n) } @@ -117,7 +117,7 @@ func (rcv *SparseTensor) MutateSparseIndexType(n SparseTensorIndex) bool { return rcv._tab.MutateByteSlot(12, byte(n)) } -/// Sparse tensor index +// / Sparse tensor index func (rcv *SparseTensor) SparseIndex(obj *flatbuffers.Table) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(14)) if o != 0 { @@ -127,8 +127,8 @@ func (rcv *SparseTensor) SparseIndex(obj *flatbuffers.Table) bool { return false } -/// Sparse tensor index -/// The location and size of the tensor's data +// / Sparse tensor index +// / The location and size of the tensor's data func (rcv *SparseTensor) Data(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(16)) if o != 0 { @@ -142,7 +142,7 @@ func (rcv *SparseTensor) Data(obj *Buffer) *Buffer { return nil } -/// The location and size of the tensor's data +// / The location and size of the tensor's data func SparseTensorStart(builder *flatbuffers.Builder) { builder.StartObject(7) } diff --git a/go/arrow/internal/flatbuf/SparseTensorIndexCOO.go b/go/arrow/internal/flatbuf/SparseTensorIndexCOO.go index f8eee99fa691e..bf1c218e2e415 100644 --- a/go/arrow/internal/flatbuf/SparseTensorIndexCOO.go +++ b/go/arrow/internal/flatbuf/SparseTensorIndexCOO.go @@ -22,38 +22,38 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// ---------------------------------------------------------------------- -/// EXPERIMENTAL: Data structures for sparse tensors -/// Coordinate (COO) format of sparse tensor index. -/// -/// COO's index list are represented as a NxM matrix, -/// where N is the number of non-zero values, -/// and M is the number of dimensions of a sparse tensor. -/// -/// indicesBuffer stores the location and size of the data of this indices -/// matrix. The value type and the stride of the indices matrix is -/// specified in indicesType and indicesStrides fields. -/// -/// For example, let X be a 2x3x4x5 tensor, and it has the following -/// 6 non-zero values: -/// ```text -/// X[0, 1, 2, 0] := 1 -/// X[1, 1, 2, 3] := 2 -/// X[0, 2, 1, 0] := 3 -/// X[0, 1, 3, 0] := 4 -/// X[0, 1, 2, 1] := 5 -/// X[1, 2, 0, 4] := 6 -/// ``` -/// In COO format, the index matrix of X is the following 4x6 matrix: -/// ```text -/// [[0, 0, 0, 0, 1, 1], -/// [1, 1, 1, 2, 1, 2], -/// [2, 2, 3, 1, 2, 0], -/// [0, 1, 0, 0, 3, 4]] -/// ``` -/// When isCanonical is true, the indices is sorted in lexicographical order -/// (row-major order), and it does not have duplicated entries. Otherwise, -/// the indices may not be sorted, or may have duplicated entries. +// / ---------------------------------------------------------------------- +// / EXPERIMENTAL: Data structures for sparse tensors +// / Coordinate (COO) format of sparse tensor index. +// / +// / COO's index list are represented as a NxM matrix, +// / where N is the number of non-zero values, +// / and M is the number of dimensions of a sparse tensor. +// / +// / indicesBuffer stores the location and size of the data of this indices +// / matrix. The value type and the stride of the indices matrix is +// / specified in indicesType and indicesStrides fields. +// / +// / For example, let X be a 2x3x4x5 tensor, and it has the following +// / 6 non-zero values: +// / ```text +// / X[0, 1, 2, 0] := 1 +// / X[1, 1, 2, 3] := 2 +// / X[0, 2, 1, 0] := 3 +// / X[0, 1, 3, 0] := 4 +// / X[0, 1, 2, 1] := 5 +// / X[1, 2, 0, 4] := 6 +// / ``` +// / In COO format, the index matrix of X is the following 4x6 matrix: +// / ```text +// / [[0, 0, 0, 0, 1, 1], +// / [1, 1, 1, 2, 1, 2], +// / [2, 2, 3, 1, 2, 0], +// / [0, 1, 0, 0, 3, 4]] +// / ``` +// / When isCanonical is true, the indices is sorted in lexicographical order +// / (row-major order), and it does not have duplicated entries. Otherwise, +// / the indices may not be sorted, or may have duplicated entries. type SparseTensorIndexCOO struct { _tab flatbuffers.Table } @@ -74,7 +74,7 @@ func (rcv *SparseTensorIndexCOO) Table() flatbuffers.Table { return rcv._tab } -/// The type of values in indicesBuffer +// / The type of values in indicesBuffer func (rcv *SparseTensorIndexCOO) IndicesType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -88,9 +88,9 @@ func (rcv *SparseTensorIndexCOO) IndicesType(obj *Int) *Int { return nil } -/// The type of values in indicesBuffer -/// Non-negative byte offsets to advance one value cell along each dimension -/// If omitted, default to row-major order (C-like). +// / The type of values in indicesBuffer +// / Non-negative byte offsets to advance one value cell along each dimension +// / If omitted, default to row-major order (C-like). func (rcv *SparseTensorIndexCOO) IndicesStrides(j int) int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -108,8 +108,8 @@ func (rcv *SparseTensorIndexCOO) IndicesStridesLength() int { return 0 } -/// Non-negative byte offsets to advance one value cell along each dimension -/// If omitted, default to row-major order (C-like). +// / Non-negative byte offsets to advance one value cell along each dimension +// / If omitted, default to row-major order (C-like). func (rcv *SparseTensorIndexCOO) MutateIndicesStrides(j int, n int64) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -119,7 +119,7 @@ func (rcv *SparseTensorIndexCOO) MutateIndicesStrides(j int, n int64) bool { return false } -/// The location and size of the indices matrix's data +// / The location and size of the indices matrix's data func (rcv *SparseTensorIndexCOO) IndicesBuffer(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -133,12 +133,12 @@ func (rcv *SparseTensorIndexCOO) IndicesBuffer(obj *Buffer) *Buffer { return nil } -/// The location and size of the indices matrix's data -/// This flag is true if and only if the indices matrix is sorted in -/// row-major order, and does not have duplicated entries. -/// This sort order is the same as of Tensorflow's SparseTensor, -/// but it is inverse order of SciPy's canonical coo_matrix -/// (SciPy employs column-major order for its coo_matrix). +// / The location and size of the indices matrix's data +// / This flag is true if and only if the indices matrix is sorted in +// / row-major order, and does not have duplicated entries. +// / This sort order is the same as of Tensorflow's SparseTensor, +// / but it is inverse order of SciPy's canonical coo_matrix +// / (SciPy employs column-major order for its coo_matrix). func (rcv *SparseTensorIndexCOO) IsCanonical() bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -147,11 +147,11 @@ func (rcv *SparseTensorIndexCOO) IsCanonical() bool { return false } -/// This flag is true if and only if the indices matrix is sorted in -/// row-major order, and does not have duplicated entries. -/// This sort order is the same as of Tensorflow's SparseTensor, -/// but it is inverse order of SciPy's canonical coo_matrix -/// (SciPy employs column-major order for its coo_matrix). +// / This flag is true if and only if the indices matrix is sorted in +// / row-major order, and does not have duplicated entries. +// / This sort order is the same as of Tensorflow's SparseTensor, +// / but it is inverse order of SciPy's canonical coo_matrix +// / (SciPy employs column-major order for its coo_matrix). func (rcv *SparseTensorIndexCOO) MutateIsCanonical(n bool) bool { return rcv._tab.MutateBoolSlot(10, n) } diff --git a/go/arrow/internal/flatbuf/SparseTensorIndexCSF.go b/go/arrow/internal/flatbuf/SparseTensorIndexCSF.go index a824c84ebfe2e..66226e0412c21 100644 --- a/go/arrow/internal/flatbuf/SparseTensorIndexCSF.go +++ b/go/arrow/internal/flatbuf/SparseTensorIndexCSF.go @@ -22,7 +22,7 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Compressed Sparse Fiber (CSF) sparse tensor index. +// / Compressed Sparse Fiber (CSF) sparse tensor index. type SparseTensorIndexCSF struct { _tab flatbuffers.Table } @@ -43,37 +43,37 @@ func (rcv *SparseTensorIndexCSF) Table() flatbuffers.Table { return rcv._tab } -/// CSF is a generalization of compressed sparse row (CSR) index. -/// See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf) -/// -/// CSF index recursively compresses each dimension of a tensor into a set -/// of prefix trees. Each path from a root to leaf forms one tensor -/// non-zero index. CSF is implemented with two arrays of buffers and one -/// arrays of integers. -/// -/// For example, let X be a 2x3x4x5 tensor and let it have the following -/// 8 non-zero values: -/// ```text -/// X[0, 0, 0, 1] := 1 -/// X[0, 0, 0, 2] := 2 -/// X[0, 1, 0, 0] := 3 -/// X[0, 1, 0, 2] := 4 -/// X[0, 1, 1, 0] := 5 -/// X[1, 1, 1, 0] := 6 -/// X[1, 1, 1, 1] := 7 -/// X[1, 1, 1, 2] := 8 -/// ``` -/// As a prefix tree this would be represented as: -/// ```text -/// 0 1 -/// / \ | -/// 0 1 1 -/// / / \ | -/// 0 0 1 1 -/// /| /| | /| | -/// 1 2 0 2 0 0 1 2 -/// ``` -/// The type of values in indptrBuffers +// / CSF is a generalization of compressed sparse row (CSR) index. +// / See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf) +// / +// / CSF index recursively compresses each dimension of a tensor into a set +// / of prefix trees. Each path from a root to leaf forms one tensor +// / non-zero index. CSF is implemented with two arrays of buffers and one +// / arrays of integers. +// / +// / For example, let X be a 2x3x4x5 tensor and let it have the following +// / 8 non-zero values: +// / ```text +// / X[0, 0, 0, 1] := 1 +// / X[0, 0, 0, 2] := 2 +// / X[0, 1, 0, 0] := 3 +// / X[0, 1, 0, 2] := 4 +// / X[0, 1, 1, 0] := 5 +// / X[1, 1, 1, 0] := 6 +// / X[1, 1, 1, 1] := 7 +// / X[1, 1, 1, 2] := 8 +// / ``` +// / As a prefix tree this would be represented as: +// / ```text +// / 0 1 +// / / \ | +// / 0 1 1 +// / / / \ | +// / 0 0 1 1 +// / /| /| | /| | +// / 1 2 0 2 0 0 1 2 +// / ``` +// / The type of values in indptrBuffers func (rcv *SparseTensorIndexCSF) IndptrType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -87,51 +87,51 @@ func (rcv *SparseTensorIndexCSF) IndptrType(obj *Int) *Int { return nil } -/// CSF is a generalization of compressed sparse row (CSR) index. -/// See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf) -/// -/// CSF index recursively compresses each dimension of a tensor into a set -/// of prefix trees. Each path from a root to leaf forms one tensor -/// non-zero index. CSF is implemented with two arrays of buffers and one -/// arrays of integers. -/// -/// For example, let X be a 2x3x4x5 tensor and let it have the following -/// 8 non-zero values: -/// ```text -/// X[0, 0, 0, 1] := 1 -/// X[0, 0, 0, 2] := 2 -/// X[0, 1, 0, 0] := 3 -/// X[0, 1, 0, 2] := 4 -/// X[0, 1, 1, 0] := 5 -/// X[1, 1, 1, 0] := 6 -/// X[1, 1, 1, 1] := 7 -/// X[1, 1, 1, 2] := 8 -/// ``` -/// As a prefix tree this would be represented as: -/// ```text -/// 0 1 -/// / \ | -/// 0 1 1 -/// / / \ | -/// 0 0 1 1 -/// /| /| | /| | -/// 1 2 0 2 0 0 1 2 -/// ``` -/// The type of values in indptrBuffers -/// indptrBuffers stores the sparsity structure. -/// Each two consecutive dimensions in a tensor correspond to a buffer in -/// indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]` -/// and `indptrBuffers[dim][i + 1]` signify a range of nodes in -/// `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node. -/// -/// For example, the indptrBuffers for the above X is: -/// ```text -/// indptrBuffer(X) = [ -/// [0, 2, 3], -/// [0, 1, 3, 4], -/// [0, 2, 4, 5, 8] -/// ]. -/// ``` +// / CSF is a generalization of compressed sparse row (CSR) index. +// / See [smith2017knl](http://shaden.io/pub-files/smith2017knl.pdf) +// / +// / CSF index recursively compresses each dimension of a tensor into a set +// / of prefix trees. Each path from a root to leaf forms one tensor +// / non-zero index. CSF is implemented with two arrays of buffers and one +// / arrays of integers. +// / +// / For example, let X be a 2x3x4x5 tensor and let it have the following +// / 8 non-zero values: +// / ```text +// / X[0, 0, 0, 1] := 1 +// / X[0, 0, 0, 2] := 2 +// / X[0, 1, 0, 0] := 3 +// / X[0, 1, 0, 2] := 4 +// / X[0, 1, 1, 0] := 5 +// / X[1, 1, 1, 0] := 6 +// / X[1, 1, 1, 1] := 7 +// / X[1, 1, 1, 2] := 8 +// / ``` +// / As a prefix tree this would be represented as: +// / ```text +// / 0 1 +// / / \ | +// / 0 1 1 +// / / / \ | +// / 0 0 1 1 +// / /| /| | /| | +// / 1 2 0 2 0 0 1 2 +// / ``` +// / The type of values in indptrBuffers +// / indptrBuffers stores the sparsity structure. +// / Each two consecutive dimensions in a tensor correspond to a buffer in +// / indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]` +// / and `indptrBuffers[dim][i + 1]` signify a range of nodes in +// / `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node. +// / +// / For example, the indptrBuffers for the above X is: +// / ```text +// / indptrBuffer(X) = [ +// / [0, 2, 3], +// / [0, 1, 3, 4], +// / [0, 2, 4, 5, 8] +// / ]. +// / ``` func (rcv *SparseTensorIndexCSF) IndptrBuffers(obj *Buffer, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -151,21 +151,21 @@ func (rcv *SparseTensorIndexCSF) IndptrBuffersLength() int { return 0 } -/// indptrBuffers stores the sparsity structure. -/// Each two consecutive dimensions in a tensor correspond to a buffer in -/// indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]` -/// and `indptrBuffers[dim][i + 1]` signify a range of nodes in -/// `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node. -/// -/// For example, the indptrBuffers for the above X is: -/// ```text -/// indptrBuffer(X) = [ -/// [0, 2, 3], -/// [0, 1, 3, 4], -/// [0, 2, 4, 5, 8] -/// ]. -/// ``` -/// The type of values in indicesBuffers +// / indptrBuffers stores the sparsity structure. +// / Each two consecutive dimensions in a tensor correspond to a buffer in +// / indptrBuffers. A pair of consecutive values at `indptrBuffers[dim][i]` +// / and `indptrBuffers[dim][i + 1]` signify a range of nodes in +// / `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node. +// / +// / For example, the indptrBuffers for the above X is: +// / ```text +// / indptrBuffer(X) = [ +// / [0, 2, 3], +// / [0, 1, 3, 4], +// / [0, 2, 4, 5, 8] +// / ]. +// / ``` +// / The type of values in indicesBuffers func (rcv *SparseTensorIndexCSF) IndicesType(obj *Int) *Int { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -179,18 +179,18 @@ func (rcv *SparseTensorIndexCSF) IndicesType(obj *Int) *Int { return nil } -/// The type of values in indicesBuffers -/// indicesBuffers stores values of nodes. -/// Each tensor dimension corresponds to a buffer in indicesBuffers. -/// For example, the indicesBuffers for the above X is: -/// ```text -/// indicesBuffer(X) = [ -/// [0, 1], -/// [0, 1, 1], -/// [0, 0, 1, 1], -/// [1, 2, 0, 2, 0, 0, 1, 2] -/// ]. -/// ``` +// / The type of values in indicesBuffers +// / indicesBuffers stores values of nodes. +// / Each tensor dimension corresponds to a buffer in indicesBuffers. +// / For example, the indicesBuffers for the above X is: +// / ```text +// / indicesBuffer(X) = [ +// / [0, 1], +// / [0, 1, 1], +// / [0, 0, 1, 1], +// / [1, 2, 0, 2, 0, 0, 1, 2] +// / ]. +// / ``` func (rcv *SparseTensorIndexCSF) IndicesBuffers(obj *Buffer, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -210,23 +210,23 @@ func (rcv *SparseTensorIndexCSF) IndicesBuffersLength() int { return 0 } -/// indicesBuffers stores values of nodes. -/// Each tensor dimension corresponds to a buffer in indicesBuffers. -/// For example, the indicesBuffers for the above X is: -/// ```text -/// indicesBuffer(X) = [ -/// [0, 1], -/// [0, 1, 1], -/// [0, 0, 1, 1], -/// [1, 2, 0, 2, 0, 0, 1, 2] -/// ]. -/// ``` -/// axisOrder stores the sequence in which dimensions were traversed to -/// produce the prefix tree. -/// For example, the axisOrder for the above X is: -/// ```text -/// axisOrder(X) = [0, 1, 2, 3]. -/// ``` +// / indicesBuffers stores values of nodes. +// / Each tensor dimension corresponds to a buffer in indicesBuffers. +// / For example, the indicesBuffers for the above X is: +// / ```text +// / indicesBuffer(X) = [ +// / [0, 1], +// / [0, 1, 1], +// / [0, 0, 1, 1], +// / [1, 2, 0, 2, 0, 0, 1, 2] +// / ]. +// / ``` +// / axisOrder stores the sequence in which dimensions were traversed to +// / produce the prefix tree. +// / For example, the axisOrder for the above X is: +// / ```text +// / axisOrder(X) = [0, 1, 2, 3]. +// / ``` func (rcv *SparseTensorIndexCSF) AxisOrder(j int) int32 { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { @@ -244,12 +244,12 @@ func (rcv *SparseTensorIndexCSF) AxisOrderLength() int { return 0 } -/// axisOrder stores the sequence in which dimensions were traversed to -/// produce the prefix tree. -/// For example, the axisOrder for the above X is: -/// ```text -/// axisOrder(X) = [0, 1, 2, 3]. -/// ``` +// / axisOrder stores the sequence in which dimensions were traversed to +// / produce the prefix tree. +// / For example, the axisOrder for the above X is: +// / ```text +// / axisOrder(X) = [0, 1, 2, 3]. +// / ``` func (rcv *SparseTensorIndexCSF) MutateAxisOrder(j int, n int32) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { diff --git a/go/arrow/internal/flatbuf/Struct_.go b/go/arrow/internal/flatbuf/Struct_.go index 427e7060382af..73752a17e00fa 100644 --- a/go/arrow/internal/flatbuf/Struct_.go +++ b/go/arrow/internal/flatbuf/Struct_.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// A Struct_ in the flatbuffer metadata is the same as an Arrow Struct -/// (according to the physical memory layout). We used Struct_ here as -/// Struct is a reserved word in Flatbuffers +// / A Struct_ in the flatbuffer metadata is the same as an Arrow Struct +// / (according to the physical memory layout). We used Struct_ here as +// / Struct is a reserved word in Flatbuffers type Struct_ struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Tensor.go b/go/arrow/internal/flatbuf/Tensor.go index 39d70e351e3d6..47bfe8067b57b 100644 --- a/go/arrow/internal/flatbuf/Tensor.go +++ b/go/arrow/internal/flatbuf/Tensor.go @@ -54,8 +54,8 @@ func (rcv *Tensor) MutateTypeType(n Type) bool { return rcv._tab.MutateByteSlot(4, byte(n)) } -/// The type of data contained in a value cell. Currently only fixed-width -/// value types are supported, no strings or nested types +// / The type of data contained in a value cell. Currently only fixed-width +// / value types are supported, no strings or nested types func (rcv *Tensor) Type(obj *flatbuffers.Table) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -65,9 +65,9 @@ func (rcv *Tensor) Type(obj *flatbuffers.Table) bool { return false } -/// The type of data contained in a value cell. Currently only fixed-width -/// value types are supported, no strings or nested types -/// The dimensions of the tensor, optionally named +// / The type of data contained in a value cell. Currently only fixed-width +// / value types are supported, no strings or nested types +// / The dimensions of the tensor, optionally named func (rcv *Tensor) Shape(obj *TensorDim, j int) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(8)) if o != 0 { @@ -88,9 +88,9 @@ func (rcv *Tensor) ShapeLength() int { return 0 } -/// The dimensions of the tensor, optionally named -/// Non-negative byte offsets to advance one value cell along each dimension -/// If omitted, default to row-major order (C-like). +// / The dimensions of the tensor, optionally named +// / Non-negative byte offsets to advance one value cell along each dimension +// / If omitted, default to row-major order (C-like). func (rcv *Tensor) Strides(j int) int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -108,8 +108,8 @@ func (rcv *Tensor) StridesLength() int { return 0 } -/// Non-negative byte offsets to advance one value cell along each dimension -/// If omitted, default to row-major order (C-like). +// / Non-negative byte offsets to advance one value cell along each dimension +// / If omitted, default to row-major order (C-like). func (rcv *Tensor) MutateStrides(j int, n int64) bool { o := flatbuffers.UOffsetT(rcv._tab.Offset(10)) if o != 0 { @@ -119,7 +119,7 @@ func (rcv *Tensor) MutateStrides(j int, n int64) bool { return false } -/// The location and size of the tensor's data +// / The location and size of the tensor's data func (rcv *Tensor) Data(obj *Buffer) *Buffer { o := flatbuffers.UOffsetT(rcv._tab.Offset(12)) if o != 0 { @@ -133,7 +133,7 @@ func (rcv *Tensor) Data(obj *Buffer) *Buffer { return nil } -/// The location and size of the tensor's data +// / The location and size of the tensor's data func TensorStart(builder *flatbuffers.Builder) { builder.StartObject(5) } diff --git a/go/arrow/internal/flatbuf/TensorDim.go b/go/arrow/internal/flatbuf/TensorDim.go index 14b82120887e9..c6413b6a8c0bd 100644 --- a/go/arrow/internal/flatbuf/TensorDim.go +++ b/go/arrow/internal/flatbuf/TensorDim.go @@ -22,9 +22,9 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// ---------------------------------------------------------------------- -/// Data structures for dense tensors -/// Shape data for a single axis in a tensor +// / ---------------------------------------------------------------------- +// / Data structures for dense tensors +// / Shape data for a single axis in a tensor type TensorDim struct { _tab flatbuffers.Table } @@ -45,7 +45,7 @@ func (rcv *TensorDim) Table() flatbuffers.Table { return rcv._tab } -/// Length of dimension +// / Length of dimension func (rcv *TensorDim) Size() int64 { o := flatbuffers.UOffsetT(rcv._tab.Offset(4)) if o != 0 { @@ -54,12 +54,12 @@ func (rcv *TensorDim) Size() int64 { return 0 } -/// Length of dimension +// / Length of dimension func (rcv *TensorDim) MutateSize(n int64) bool { return rcv._tab.MutateInt64Slot(4, n) } -/// Name of the dimension, optional +// / Name of the dimension, optional func (rcv *TensorDim) Name() []byte { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -68,7 +68,7 @@ func (rcv *TensorDim) Name() []byte { return nil } -/// Name of the dimension, optional +// / Name of the dimension, optional func TensorDimStart(builder *flatbuffers.Builder) { builder.StartObject(2) } diff --git a/go/arrow/internal/flatbuf/Time.go b/go/arrow/internal/flatbuf/Time.go index 2fb6e4c110e0a..13038a6e33280 100644 --- a/go/arrow/internal/flatbuf/Time.go +++ b/go/arrow/internal/flatbuf/Time.go @@ -22,20 +22,20 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Time is either a 32-bit or 64-bit signed integer type representing an -/// elapsed time since midnight, stored in either of four units: seconds, -/// milliseconds, microseconds or nanoseconds. -/// -/// The integer `bitWidth` depends on the `unit` and must be one of the following: -/// * SECOND and MILLISECOND: 32 bits -/// * MICROSECOND and NANOSECOND: 64 bits -/// -/// The allowed values are between 0 (inclusive) and 86400 (=24*60*60) seconds -/// (exclusive), adjusted for the time unit (for example, up to 86400000 -/// exclusive for the MILLISECOND unit). -/// This definition doesn't allow for leap seconds. Time values from -/// measurements with leap seconds will need to be corrected when ingesting -/// into Arrow (for example by replacing the value 86400 with 86399). +// / Time is either a 32-bit or 64-bit signed integer type representing an +// / elapsed time since midnight, stored in either of four units: seconds, +// / milliseconds, microseconds or nanoseconds. +// / +// / The integer `bitWidth` depends on the `unit` and must be one of the following: +// / * SECOND and MILLISECOND: 32 bits +// / * MICROSECOND and NANOSECOND: 64 bits +// / +// / The allowed values are between 0 (inclusive) and 86400 (=24*60*60) seconds +// / (exclusive), adjusted for the time unit (for example, up to 86400000 +// / exclusive for the MILLISECOND unit). +// / This definition doesn't allow for leap seconds. Time values from +// / measurements with leap seconds will need to be corrected when ingesting +// / into Arrow (for example by replacing the value 86400 with 86399). type Time struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Timestamp.go b/go/arrow/internal/flatbuf/Timestamp.go index d0058e13e6545..ce172bacdd3c3 100644 --- a/go/arrow/internal/flatbuf/Timestamp.go +++ b/go/arrow/internal/flatbuf/Timestamp.go @@ -22,111 +22,111 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Timestamp is a 64-bit signed integer representing an elapsed time since a -/// fixed epoch, stored in either of four units: seconds, milliseconds, -/// microseconds or nanoseconds, and is optionally annotated with a timezone. -/// -/// Timestamp values do not include any leap seconds (in other words, all -/// days are considered 86400 seconds long). -/// -/// Timestamps with a non-empty timezone -/// ------------------------------------ -/// -/// If a Timestamp column has a non-empty timezone value, its epoch is -/// 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone -/// (the Unix epoch), regardless of the Timestamp's own timezone. -/// -/// Therefore, timestamp values with a non-empty timezone correspond to -/// physical points in time together with some additional information about -/// how the data was obtained and/or how to display it (the timezone). -/// -/// For example, the timestamp value 0 with the timezone string "Europe/Paris" -/// corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the -/// application may prefer to display it as "January 1st 1970, 01h00" in -/// the Europe/Paris timezone (which is the same physical point in time). -/// -/// One consequence is that timestamp values with a non-empty timezone -/// can be compared and ordered directly, since they all share the same -/// well-known point of reference (the Unix epoch). -/// -/// Timestamps with an unset / empty timezone -/// ----------------------------------------- -/// -/// If a Timestamp column has no timezone value, its epoch is -/// 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone. -/// -/// Therefore, timestamp values without a timezone cannot be meaningfully -/// interpreted as physical points in time, but only as calendar / clock -/// indications ("wall clock time") in an unspecified timezone. -/// -/// For example, the timestamp value 0 with an empty timezone string -/// corresponds to "January 1st 1970, 00h00" in an unknown timezone: there -/// is not enough information to interpret it as a well-defined physical -/// point in time. -/// -/// One consequence is that timestamp values without a timezone cannot -/// be reliably compared or ordered, since they may have different points of -/// reference. In particular, it is *not* possible to interpret an unset -/// or empty timezone as the same as "UTC". -/// -/// Conversion between timezones -/// ---------------------------- -/// -/// If a Timestamp column has a non-empty timezone, changing the timezone -/// to a different non-empty value is a metadata-only operation: -/// the timestamp values need not change as their point of reference remains -/// the same (the Unix epoch). -/// -/// However, if a Timestamp column has no timezone value, changing it to a -/// non-empty value requires to think about the desired semantics. -/// One possibility is to assume that the original timestamp values are -/// relative to the epoch of the timezone being set; timestamp values should -/// then adjusted to the Unix epoch (for example, changing the timezone from -/// empty to "Europe/Paris" would require converting the timestamp values -/// from "Europe/Paris" to "UTC", which seems counter-intuitive but is -/// nevertheless correct). -/// -/// Guidelines for encoding data from external libraries -/// ---------------------------------------------------- -/// -/// Date & time libraries often have multiple different data types for temporal -/// data. In order to ease interoperability between different implementations the -/// Arrow project has some recommendations for encoding these types into a Timestamp -/// column. -/// -/// An "instant" represents a physical point in time that has no relevant timezone -/// (for example, astronomical data). To encode an instant, use a Timestamp with -/// the timezone string set to "UTC", and make sure the Timestamp values -/// are relative to the UTC epoch (January 1st 1970, midnight). -/// -/// A "zoned date-time" represents a physical point in time annotated with an -/// informative timezone (for example, the timezone in which the data was -/// recorded). To encode a zoned date-time, use a Timestamp with the timezone -/// string set to the name of the timezone, and make sure the Timestamp values -/// are relative to the UTC epoch (January 1st 1970, midnight). -/// -/// (There is some ambiguity between an instant and a zoned date-time with the -/// UTC timezone. Both of these are stored the same in Arrow. Typically, -/// this distinction does not matter. If it does, then an application should -/// use custom metadata or an extension type to distinguish between the two cases.) -/// -/// An "offset date-time" represents a physical point in time combined with an -/// explicit offset from UTC. To encode an offset date-time, use a Timestamp -/// with the timezone string set to the numeric timezone offset string -/// (e.g. "+03:00"), and make sure the Timestamp values are relative to -/// the UTC epoch (January 1st 1970, midnight). -/// -/// A "naive date-time" (also called "local date-time" in some libraries) -/// represents a wall clock time combined with a calendar date, but with -/// no indication of how to map this information to a physical point in time. -/// Naive date-times must be handled with care because of this missing -/// information, and also because daylight saving time (DST) may make -/// some values ambiguous or nonexistent. A naive date-time may be -/// stored as a struct with Date and Time fields. However, it may also be -/// encoded into a Timestamp column with an empty timezone. The timestamp -/// values should be computed "as if" the timezone of the date-time values -/// was UTC; for example, the naive date-time "January 1st 1970, 00h00" would -/// be encoded as timestamp value 0. +// / Timestamp is a 64-bit signed integer representing an elapsed time since a +// / fixed epoch, stored in either of four units: seconds, milliseconds, +// / microseconds or nanoseconds, and is optionally annotated with a timezone. +// / +// / Timestamp values do not include any leap seconds (in other words, all +// / days are considered 86400 seconds long). +// / +// / Timestamps with a non-empty timezone +// / ------------------------------------ +// / +// / If a Timestamp column has a non-empty timezone value, its epoch is +// / 1970-01-01 00:00:00 (January 1st 1970, midnight) in the *UTC* timezone +// / (the Unix epoch), regardless of the Timestamp's own timezone. +// / +// / Therefore, timestamp values with a non-empty timezone correspond to +// / physical points in time together with some additional information about +// / how the data was obtained and/or how to display it (the timezone). +// / +// / For example, the timestamp value 0 with the timezone string "Europe/Paris" +// / corresponds to "January 1st 1970, 00h00" in the UTC timezone, but the +// / application may prefer to display it as "January 1st 1970, 01h00" in +// / the Europe/Paris timezone (which is the same physical point in time). +// / +// / One consequence is that timestamp values with a non-empty timezone +// / can be compared and ordered directly, since they all share the same +// / well-known point of reference (the Unix epoch). +// / +// / Timestamps with an unset / empty timezone +// / ----------------------------------------- +// / +// / If a Timestamp column has no timezone value, its epoch is +// / 1970-01-01 00:00:00 (January 1st 1970, midnight) in an *unknown* timezone. +// / +// / Therefore, timestamp values without a timezone cannot be meaningfully +// / interpreted as physical points in time, but only as calendar / clock +// / indications ("wall clock time") in an unspecified timezone. +// / +// / For example, the timestamp value 0 with an empty timezone string +// / corresponds to "January 1st 1970, 00h00" in an unknown timezone: there +// / is not enough information to interpret it as a well-defined physical +// / point in time. +// / +// / One consequence is that timestamp values without a timezone cannot +// / be reliably compared or ordered, since they may have different points of +// / reference. In particular, it is *not* possible to interpret an unset +// / or empty timezone as the same as "UTC". +// / +// / Conversion between timezones +// / ---------------------------- +// / +// / If a Timestamp column has a non-empty timezone, changing the timezone +// / to a different non-empty value is a metadata-only operation: +// / the timestamp values need not change as their point of reference remains +// / the same (the Unix epoch). +// / +// / However, if a Timestamp column has no timezone value, changing it to a +// / non-empty value requires to think about the desired semantics. +// / One possibility is to assume that the original timestamp values are +// / relative to the epoch of the timezone being set; timestamp values should +// / then adjusted to the Unix epoch (for example, changing the timezone from +// / empty to "Europe/Paris" would require converting the timestamp values +// / from "Europe/Paris" to "UTC", which seems counter-intuitive but is +// / nevertheless correct). +// / +// / Guidelines for encoding data from external libraries +// / ---------------------------------------------------- +// / +// / Date & time libraries often have multiple different data types for temporal +// / data. In order to ease interoperability between different implementations the +// / Arrow project has some recommendations for encoding these types into a Timestamp +// / column. +// / +// / An "instant" represents a physical point in time that has no relevant timezone +// / (for example, astronomical data). To encode an instant, use a Timestamp with +// / the timezone string set to "UTC", and make sure the Timestamp values +// / are relative to the UTC epoch (January 1st 1970, midnight). +// / +// / A "zoned date-time" represents a physical point in time annotated with an +// / informative timezone (for example, the timezone in which the data was +// / recorded). To encode a zoned date-time, use a Timestamp with the timezone +// / string set to the name of the timezone, and make sure the Timestamp values +// / are relative to the UTC epoch (January 1st 1970, midnight). +// / +// / (There is some ambiguity between an instant and a zoned date-time with the +// / UTC timezone. Both of these are stored the same in Arrow. Typically, +// / this distinction does not matter. If it does, then an application should +// / use custom metadata or an extension type to distinguish between the two cases.) +// / +// / An "offset date-time" represents a physical point in time combined with an +// / explicit offset from UTC. To encode an offset date-time, use a Timestamp +// / with the timezone string set to the numeric timezone offset string +// / (e.g. "+03:00"), and make sure the Timestamp values are relative to +// / the UTC epoch (January 1st 1970, midnight). +// / +// / A "naive date-time" (also called "local date-time" in some libraries) +// / represents a wall clock time combined with a calendar date, but with +// / no indication of how to map this information to a physical point in time. +// / Naive date-times must be handled with care because of this missing +// / information, and also because daylight saving time (DST) may make +// / some values ambiguous or nonexistent. A naive date-time may be +// / stored as a struct with Date and Time fields. However, it may also be +// / encoded into a Timestamp column with an empty timezone. The timestamp +// / values should be computed "as if" the timezone of the date-time values +// / was UTC; for example, the naive date-time "January 1st 1970, 00h00" would +// / be encoded as timestamp value 0. type Timestamp struct { _tab flatbuffers.Table } @@ -159,16 +159,16 @@ func (rcv *Timestamp) MutateUnit(n TimeUnit) bool { return rcv._tab.MutateInt16Slot(4, int16(n)) } -/// The timezone is an optional string indicating the name of a timezone, -/// one of: -/// -/// * As used in the Olson timezone database (the "tz database" or -/// "tzdata"), such as "America/New_York". -/// * An absolute timezone offset of the form "+XX:XX" or "-XX:XX", -/// such as "+07:30". -/// -/// Whether a timezone string is present indicates different semantics about -/// the data (see above). +// / The timezone is an optional string indicating the name of a timezone, +// / one of: +// / +// / * As used in the Olson timezone database (the "tz database" or +// / "tzdata"), such as "America/New_York". +// / * An absolute timezone offset of the form "+XX:XX" or "-XX:XX", +// / such as "+07:30". +// / +// / Whether a timezone string is present indicates different semantics about +// / the data (see above). func (rcv *Timestamp) Timezone() []byte { o := flatbuffers.UOffsetT(rcv._tab.Offset(6)) if o != 0 { @@ -177,16 +177,16 @@ func (rcv *Timestamp) Timezone() []byte { return nil } -/// The timezone is an optional string indicating the name of a timezone, -/// one of: -/// -/// * As used in the Olson timezone database (the "tz database" or -/// "tzdata"), such as "America/New_York". -/// * An absolute timezone offset of the form "+XX:XX" or "-XX:XX", -/// such as "+07:30". -/// -/// Whether a timezone string is present indicates different semantics about -/// the data (see above). +// / The timezone is an optional string indicating the name of a timezone, +// / one of: +// / +// / * As used in the Olson timezone database (the "tz database" or +// / "tzdata"), such as "America/New_York". +// / * An absolute timezone offset of the form "+XX:XX" or "-XX:XX", +// / such as "+07:30". +// / +// / Whether a timezone string is present indicates different semantics about +// / the data (see above). func TimestampStart(builder *flatbuffers.Builder) { builder.StartObject(2) } diff --git a/go/arrow/internal/flatbuf/Type.go b/go/arrow/internal/flatbuf/Type.go index ab2bce9c63657..df8ba8650e1cd 100644 --- a/go/arrow/internal/flatbuf/Type.go +++ b/go/arrow/internal/flatbuf/Type.go @@ -20,9 +20,9 @@ package flatbuf import "strconv" -/// ---------------------------------------------------------------------- -/// Top-level Type value, enabling extensible type-specific metadata. We can -/// add new logical types to Type without breaking backwards compatibility +// / ---------------------------------------------------------------------- +// / Top-level Type value, enabling extensible type-specific metadata. We can +// / add new logical types to Type without breaking backwards compatibility type Type byte const ( diff --git a/go/arrow/internal/flatbuf/Union.go b/go/arrow/internal/flatbuf/Union.go index e34121d4757f2..0367fb3c1fb94 100644 --- a/go/arrow/internal/flatbuf/Union.go +++ b/go/arrow/internal/flatbuf/Union.go @@ -22,10 +22,10 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// A union is a complex type with children in Field -/// By default ids in the type vector refer to the offsets in the children -/// optionally typeIds provides an indirection between the child offset and the type id -/// for each child `typeIds[offset]` is the id used in the type vector +// / A union is a complex type with children in Field +// / By default ids in the type vector refer to the offsets in the children +// / optionally typeIds provides an indirection between the child offset and the type id +// / for each child `typeIds[offset]` is the id used in the type vector type Union struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Utf8.go b/go/arrow/internal/flatbuf/Utf8.go index 4ff365a37504a..cab4ce7743ca9 100644 --- a/go/arrow/internal/flatbuf/Utf8.go +++ b/go/arrow/internal/flatbuf/Utf8.go @@ -22,7 +22,7 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Unicode with UTF-8 encoding +// / Unicode with UTF-8 encoding type Utf8 struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flatbuf/Utf8View.go b/go/arrow/internal/flatbuf/Utf8View.go index 9cf821490198f..f294126a618b6 100644 --- a/go/arrow/internal/flatbuf/Utf8View.go +++ b/go/arrow/internal/flatbuf/Utf8View.go @@ -22,13 +22,13 @@ import ( flatbuffers "github.com/google/flatbuffers/go" ) -/// Logically the same as Utf8, but the internal representation uses a view -/// struct that contains the string length and either the string's entire data -/// inline (for small strings) or an inlined prefix, an index of another buffer, -/// and an offset pointing to a slice in that buffer (for non-small strings). -/// -/// Since it uses a variable number of data buffers, each Field with this type -/// must have a corresponding entry in `variadicBufferCounts`. +// / Logically the same as Utf8, but the internal representation uses a view +// / struct that contains the string length and either the string's entire data +// / inline (for small strings) or an inlined prefix, an index of another buffer, +// / and an offset pointing to a slice in that buffer (for non-small strings). +// / +// / Since it uses a variable number of data buffers, each Field with this type +// / must have a corresponding entry in `variadicBufferCounts`. type Utf8View struct { _tab flatbuffers.Table } diff --git a/go/arrow/internal/flight_integration/scenario.go b/go/arrow/internal/flight_integration/scenario.go index cde0fff522ec5..ccfc7a0ed45a3 100644 --- a/go/arrow/internal/flight_integration/scenario.go +++ b/go/arrow/internal/flight_integration/scenario.go @@ -2134,7 +2134,7 @@ func (m *flightSqlScenarioTester) ClosePreparedStatement(_ context.Context, requ return nil } -func (m *flightSqlScenarioTester) DoPutPreparedStatementQuery(_ context.Context, cmd flightsql.PreparedStatementQuery, rdr flight.MessageReader, _ flight.MetadataWriter) ([]byte, error){ +func (m *flightSqlScenarioTester) DoPutPreparedStatementQuery(_ context.Context, cmd flightsql.PreparedStatementQuery, rdr flight.MessageReader, _ flight.MetadataWriter) ([]byte, error) { switch string(cmd.GetPreparedStatementHandle()) { case "SELECT PREPARED STATEMENT HANDLE", "SELECT PREPARED STATEMENT WITH TXN HANDLE", diff --git a/go/arrow/ipc/cmd/arrow-cat/main.go b/go/arrow/ipc/cmd/arrow-cat/main.go index 080401e56a83e..4faaabb05ddc1 100644 --- a/go/arrow/ipc/cmd/arrow-cat/main.go +++ b/go/arrow/ipc/cmd/arrow-cat/main.go @@ -18,40 +18,40 @@ // // Examples: // -// $> arrow-cat ./testdata/primitives.data -// version: V4 -// record 1/3... -// col[0] "bools": [true (null) (null) false true] -// col[1] "int8s": [-1 (null) (null) -4 -5] -// col[2] "int16s": [-1 (null) (null) -4 -5] -// col[3] "int32s": [-1 (null) (null) -4 -5] -// col[4] "int64s": [-1 (null) (null) -4 -5] -// col[5] "uint8s": [1 (null) (null) 4 5] -// col[6] "uint16s": [1 (null) (null) 4 5] -// col[7] "uint32s": [1 (null) (null) 4 5] -// col[8] "uint64s": [1 (null) (null) 4 5] -// col[9] "float32s": [1 (null) (null) 4 5] -// col[10] "float64s": [1 (null) (null) 4 5] -// record 2/3... -// col[0] "bools": [true (null) (null) false true] -// [...] +// $> arrow-cat ./testdata/primitives.data +// version: V4 +// record 1/3... +// col[0] "bools": [true (null) (null) false true] +// col[1] "int8s": [-1 (null) (null) -4 -5] +// col[2] "int16s": [-1 (null) (null) -4 -5] +// col[3] "int32s": [-1 (null) (null) -4 -5] +// col[4] "int64s": [-1 (null) (null) -4 -5] +// col[5] "uint8s": [1 (null) (null) 4 5] +// col[6] "uint16s": [1 (null) (null) 4 5] +// col[7] "uint32s": [1 (null) (null) 4 5] +// col[8] "uint64s": [1 (null) (null) 4 5] +// col[9] "float32s": [1 (null) (null) 4 5] +// col[10] "float64s": [1 (null) (null) 4 5] +// record 2/3... +// col[0] "bools": [true (null) (null) false true] +// [...] // -// $> gen-arrow-stream | arrow-cat -// record 1... -// col[0] "bools": [true (null) (null) false true] -// col[1] "int8s": [-1 (null) (null) -4 -5] -// col[2] "int16s": [-1 (null) (null) -4 -5] -// col[3] "int32s": [-1 (null) (null) -4 -5] -// col[4] "int64s": [-1 (null) (null) -4 -5] -// col[5] "uint8s": [1 (null) (null) 4 5] -// col[6] "uint16s": [1 (null) (null) 4 5] -// col[7] "uint32s": [1 (null) (null) 4 5] -// col[8] "uint64s": [1 (null) (null) 4 5] -// col[9] "float32s": [1 (null) (null) 4 5] -// col[10] "float64s": [1 (null) (null) 4 5] -// record 2... -// col[0] "bools": [true (null) (null) false true] -// [...] +// $> gen-arrow-stream | arrow-cat +// record 1... +// col[0] "bools": [true (null) (null) false true] +// col[1] "int8s": [-1 (null) (null) -4 -5] +// col[2] "int16s": [-1 (null) (null) -4 -5] +// col[3] "int32s": [-1 (null) (null) -4 -5] +// col[4] "int64s": [-1 (null) (null) -4 -5] +// col[5] "uint8s": [1 (null) (null) 4 5] +// col[6] "uint16s": [1 (null) (null) 4 5] +// col[7] "uint32s": [1 (null) (null) 4 5] +// col[8] "uint64s": [1 (null) (null) 4 5] +// col[9] "float32s": [1 (null) (null) 4 5] +// col[10] "float64s": [1 (null) (null) 4 5] +// record 2... +// col[0] "bools": [true (null) (null) false true] +// [...] package main import ( diff --git a/go/arrow/ipc/cmd/arrow-ls/main.go b/go/arrow/ipc/cmd/arrow-ls/main.go index 2be1d076e45f0..2f54744c4068d 100644 --- a/go/arrow/ipc/cmd/arrow-ls/main.go +++ b/go/arrow/ipc/cmd/arrow-ls/main.go @@ -18,38 +18,38 @@ // // Examples: // -// $> arrow-ls ./testdata/primitives.data -// version: V4 -// schema: -// fields: 11 -// - bools: type=bool, nullable -// - int8s: type=int8, nullable -// - int16s: type=int16, nullable -// - int32s: type=int32, nullable -// - int64s: type=int64, nullable -// - uint8s: type=uint8, nullable -// - uint16s: type=uint16, nullable -// - uint32s: type=uint32, nullable -// - uint64s: type=uint64, nullable -// - float32s: type=float32, nullable -// - float64s: type=float64, nullable -// records: 3 +// $> arrow-ls ./testdata/primitives.data +// version: V4 +// schema: +// fields: 11 +// - bools: type=bool, nullable +// - int8s: type=int8, nullable +// - int16s: type=int16, nullable +// - int32s: type=int32, nullable +// - int64s: type=int64, nullable +// - uint8s: type=uint8, nullable +// - uint16s: type=uint16, nullable +// - uint32s: type=uint32, nullable +// - uint64s: type=uint64, nullable +// - float32s: type=float32, nullable +// - float64s: type=float64, nullable +// records: 3 // -// $> gen-arrow-stream | arrow-ls -// schema: -// fields: 11 -// - bools: type=bool, nullable -// - int8s: type=int8, nullable -// - int16s: type=int16, nullable -// - int32s: type=int32, nullable -// - int64s: type=int64, nullable -// - uint8s: type=uint8, nullable -// - uint16s: type=uint16, nullable -// - uint32s: type=uint32, nullable -// - uint64s: type=uint64, nullable -// - float32s: type=float32, nullable -// - float64s: type=float64, nullable -// records: 3 +// $> gen-arrow-stream | arrow-ls +// schema: +// fields: 11 +// - bools: type=bool, nullable +// - int8s: type=int8, nullable +// - int16s: type=int16, nullable +// - int32s: type=int32, nullable +// - int64s: type=int64, nullable +// - uint8s: type=uint8, nullable +// - uint16s: type=uint16, nullable +// - uint32s: type=uint32, nullable +// - uint64s: type=uint64, nullable +// - float32s: type=float32, nullable +// - float64s: type=float64, nullable +// records: 3 package main import ( diff --git a/go/arrow/math/math_amd64.go b/go/arrow/math/math_amd64.go index 44301dc2415a5..2397eef718df9 100644 --- a/go/arrow/math/math_amd64.go +++ b/go/arrow/math/math_amd64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package math diff --git a/go/arrow/math/math_arm64.go b/go/arrow/math/math_arm64.go index 014664b046308..b150eb061f9f5 100644 --- a/go/arrow/math/math_arm64.go +++ b/go/arrow/math/math_arm64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package math @@ -25,8 +26,8 @@ import ( func init() { if cpu.ARM64.HasASIMD { initNEON() - } else { - initGo() + } else { + initGo() } } diff --git a/go/arrow/math/math_noasm.go b/go/arrow/math/math_noasm.go index 0fa924d90aa88..5527ebf801891 100644 --- a/go/arrow/math/math_noasm.go +++ b/go/arrow/math/math_noasm.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build noasm // +build noasm package math diff --git a/go/arrow/math/math_ppc64le.go b/go/arrow/math/math_ppc64le.go index 3daeac7efaff8..85c8f2fe2e758 100644 --- a/go/arrow/math/math_ppc64le.go +++ b/go/arrow/math/math_ppc64le.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package math diff --git a/go/arrow/math/math_s390x.go b/go/arrow/math/math_s390x.go index 3daeac7efaff8..85c8f2fe2e758 100644 --- a/go/arrow/math/math_s390x.go +++ b/go/arrow/math/math_s390x.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package math diff --git a/go/arrow/memory/cgo_allocator.go b/go/arrow/memory/cgo_allocator.go index ffc6b2cb88050..5eb66ade9d861 100644 --- a/go/arrow/memory/cgo_allocator.go +++ b/go/arrow/memory/cgo_allocator.go @@ -14,8 +14,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build cgo -// +build ccalloc +//go:build cgo && ccalloc +// +build cgo,ccalloc package memory diff --git a/go/arrow/memory/cgo_allocator_defaults.go b/go/arrow/memory/cgo_allocator_defaults.go index 501431a0e1eb2..0a2e9a342d37c 100644 --- a/go/arrow/memory/cgo_allocator_defaults.go +++ b/go/arrow/memory/cgo_allocator_defaults.go @@ -14,9 +14,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build cgo -// +build ccalloc -// +build !cclog +//go:build cgo && ccalloc && !cclog +// +build cgo,ccalloc,!cclog package memory diff --git a/go/arrow/memory/cgo_allocator_logging.go b/go/arrow/memory/cgo_allocator_logging.go index 01ad6b394807d..fe2e3a940ce21 100644 --- a/go/arrow/memory/cgo_allocator_logging.go +++ b/go/arrow/memory/cgo_allocator_logging.go @@ -14,9 +14,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build cgo -// +build ccalloc -// +build cclog +//go:build cgo && ccalloc && cclog +// +build cgo,ccalloc,cclog package memory diff --git a/go/arrow/memory/cgo_allocator_test.go b/go/arrow/memory/cgo_allocator_test.go index e7a03767fc89a..4c07cc326c87f 100644 --- a/go/arrow/memory/cgo_allocator_test.go +++ b/go/arrow/memory/cgo_allocator_test.go @@ -14,8 +14,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -// +build cgo -// +build ccalloc +//go:build cgo && ccalloc +// +build cgo,ccalloc package memory diff --git a/go/arrow/memory/memory_amd64.go b/go/arrow/memory/memory_amd64.go index 58356d6482558..895ddc07cf81f 100644 --- a/go/arrow/memory/memory_amd64.go +++ b/go/arrow/memory/memory_amd64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package memory diff --git a/go/arrow/memory/memory_arm64.go b/go/arrow/memory/memory_arm64.go index 3db5d11013164..5260334958526 100755 --- a/go/arrow/memory/memory_arm64.go +++ b/go/arrow/memory/memory_arm64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package memory diff --git a/go/arrow/memory/memory_avx2_amd64.go b/go/arrow/memory/memory_avx2_amd64.go index 2bd851ea53275..39fb3a5f7692f 100644 --- a/go/arrow/memory/memory_avx2_amd64.go +++ b/go/arrow/memory/memory_avx2_amd64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package memory diff --git a/go/arrow/memory/memory_js_wasm.go b/go/arrow/memory/memory_js_wasm.go index 9b94d99ff33ca..5cc0c84d39ee7 100644 --- a/go/arrow/memory/memory_js_wasm.go +++ b/go/arrow/memory/memory_js_wasm.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build wasm // +build wasm package memory diff --git a/go/arrow/memory/memory_neon_arm64.go b/go/arrow/memory/memory_neon_arm64.go index 6cb0400c9c597..806ca575f22dd 100755 --- a/go/arrow/memory/memory_neon_arm64.go +++ b/go/arrow/memory/memory_neon_arm64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package memory diff --git a/go/arrow/memory/memory_noasm.go b/go/arrow/memory/memory_noasm.go index bf8846fa2e059..44f19c091c7e0 100644 --- a/go/arrow/memory/memory_noasm.go +++ b/go/arrow/memory/memory_noasm.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build noasm // +build noasm package memory diff --git a/go/arrow/memory/memory_sse4_amd64.go b/go/arrow/memory/memory_sse4_amd64.go index 716c0d2704a88..1711a1ee3eaf7 100644 --- a/go/arrow/memory/memory_sse4_amd64.go +++ b/go/arrow/memory/memory_sse4_amd64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package memory diff --git a/go/internal/utils/min_max_arm64.go b/go/internal/utils/min_max_arm64.go index 7404e95d963e3..d02849665df56 100644 --- a/go/internal/utils/min_max_arm64.go +++ b/go/internal/utils/min_max_arm64.go @@ -21,8 +21,9 @@ package utils import ( "os" "strings" + + "golang.org/x/sys/cpu" ) -import "golang.org/x/sys/cpu" func init() { // Added ability to enable extension via environment: diff --git a/go/parquet/doc.go b/go/parquet/doc.go index ff42be6498d8c..6ab08f83f063f 100644 --- a/go/parquet/doc.go +++ b/go/parquet/doc.go @@ -26,14 +26,15 @@ // This implementation is a native go implementation for reading and writing the // parquet file format. // -// Install +// # Install // // You can download the library and cli utilities via: -// go get -u github.com/apache/arrow/go/v17/parquet -// go install github.com/apache/arrow/go/v17/parquet/cmd/parquet_reader@latest -// go install github.com/apache/arrow/go/v17/parquet/cmd/parquet_schema@latest // -// Modules +// go get -u github.com/apache/arrow/go/v17/parquet +// go install github.com/apache/arrow/go/v17/parquet/cmd/parquet_reader@latest +// go install github.com/apache/arrow/go/v17/parquet/cmd/parquet_schema@latest +// +// # Modules // // This top level parquet package contains the basic common types and reader/writer // properties along with some utilities that are used throughout the other modules. @@ -50,13 +51,13 @@ // The schema module contains the types for manipulating / inspecting / creating // parquet file schemas. // -// Primitive Types +// # Primitive Types // // The Parquet Primitive Types and their corresponding Go types are Boolean (bool), // Int32 (int32), Int64 (int64), Int96 (parquet.Int96), Float (float32), Double (float64), // ByteArray (parquet.ByteArray) and FixedLenByteArray (parquet.FixedLenByteArray). // -// Encodings +// # Encodings // // The encoding types supported in this package are: // Plain, Plain/RLE Dictionary, Delta Binary Packed (only integer types), Delta Byte Array diff --git a/go/parquet/internal/bmi/bitmap_bmi2_amd64.go b/go/parquet/internal/bmi/bitmap_bmi2_amd64.go index ab6dcec40b02b..7fe5a1654911e 100644 --- a/go/parquet/internal/bmi/bitmap_bmi2_amd64.go +++ b/go/parquet/internal/bmi/bitmap_bmi2_amd64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package bmi diff --git a/go/parquet/internal/bmi/bitmap_bmi2_noasm.go b/go/parquet/internal/bmi/bitmap_bmi2_noasm.go index 6dc4a39a60e5a..03be648e011a7 100644 --- a/go/parquet/internal/bmi/bitmap_bmi2_noasm.go +++ b/go/parquet/internal/bmi/bitmap_bmi2_noasm.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build noasm // +build noasm package bmi diff --git a/go/parquet/internal/bmi/bitmap_bmi2_ppc64le.go b/go/parquet/internal/bmi/bitmap_bmi2_ppc64le.go index 498d5452e17ad..60f898f6bd557 100644 --- a/go/parquet/internal/bmi/bitmap_bmi2_ppc64le.go +++ b/go/parquet/internal/bmi/bitmap_bmi2_ppc64le.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package bmi diff --git a/go/parquet/internal/bmi/bitmap_bmi2_s390x.go b/go/parquet/internal/bmi/bitmap_bmi2_s390x.go index 498d5452e17ad..60f898f6bd557 100644 --- a/go/parquet/internal/bmi/bitmap_bmi2_s390x.go +++ b/go/parquet/internal/bmi/bitmap_bmi2_s390x.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package bmi diff --git a/go/parquet/internal/bmi/bmi_amd64.go b/go/parquet/internal/bmi/bmi_amd64.go index 600ef024f69a8..f894b160d4c8b 100644 --- a/go/parquet/internal/bmi/bmi_amd64.go +++ b/go/parquet/internal/bmi/bmi_amd64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package bmi diff --git a/go/parquet/internal/debug/assert_off.go b/go/parquet/internal/debug/assert_off.go index 52b9a233169d2..1450ecc98a26e 100644 --- a/go/parquet/internal/debug/assert_off.go +++ b/go/parquet/internal/debug/assert_off.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !assert // +build !assert package debug diff --git a/go/parquet/internal/debug/assert_on.go b/go/parquet/internal/debug/assert_on.go index 188e683120466..1a47460fd542a 100644 --- a/go/parquet/internal/debug/assert_on.go +++ b/go/parquet/internal/debug/assert_on.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build assert // +build assert package debug diff --git a/go/parquet/internal/debug/doc.go b/go/parquet/internal/debug/doc.go index 61684d625380d..d3965793a0825 100644 --- a/go/parquet/internal/debug/doc.go +++ b/go/parquet/internal/debug/doc.go @@ -16,7 +16,7 @@ // Package debug provides APIs for conditional runtime assertions and debug logging. // -// Using Assert +// # Using Assert // // To enable runtime assertions, build with the assert tag. When the assert tag is omitted, // the code for the assertion will be omitted from the binary. diff --git a/go/parquet/internal/debug/log_off.go b/go/parquet/internal/debug/log_off.go index 23dcccd810ce4..09f0e09a5ed1d 100644 --- a/go/parquet/internal/debug/log_off.go +++ b/go/parquet/internal/debug/log_off.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !debug // +build !debug package debug diff --git a/go/parquet/internal/debug/log_on.go b/go/parquet/internal/debug/log_on.go index 8d6106099f6f0..0067e442d3693 100644 --- a/go/parquet/internal/debug/log_on.go +++ b/go/parquet/internal/debug/log_on.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build debug // +build debug package debug diff --git a/go/parquet/internal/encoding/delta_byte_array_test.go b/go/parquet/internal/encoding/delta_byte_array_test.go index 1e5e6b2d676ef..c2e4e6849396e 100644 --- a/go/parquet/internal/encoding/delta_byte_array_test.go +++ b/go/parquet/internal/encoding/delta_byte_array_test.go @@ -18,10 +18,11 @@ package encoding import ( "fmt" + "testing" + "github.com/apache/arrow/go/v17/arrow/memory" "github.com/apache/arrow/go/v17/parquet" "github.com/stretchr/testify/assert" - "testing" ) func TestDeltaByteArrayDecoder_SetData(t *testing.T) { diff --git a/go/parquet/internal/gen-go/parquet/GoUnusedProtection__.go b/go/parquet/internal/gen-go/parquet/GoUnusedProtection__.go index 01f1eb5aa99e3..c2a8e5415ed64 100644 --- a/go/parquet/internal/gen-go/parquet/GoUnusedProtection__.go +++ b/go/parquet/internal/gen-go/parquet/GoUnusedProtection__.go @@ -2,5 +2,4 @@ package parquet -var GoUnusedProtection__ int; - +var GoUnusedProtection__ int diff --git a/go/parquet/internal/gen-go/parquet/parquet-consts.go b/go/parquet/internal/gen-go/parquet/parquet-consts.go index ab0a73c596e7d..f83e0be7640ff 100644 --- a/go/parquet/internal/gen-go/parquet/parquet-consts.go +++ b/go/parquet/internal/gen-go/parquet/parquet-consts.go @@ -7,10 +7,11 @@ import ( "context" "errors" "fmt" + "regexp" + "strings" "time" + thrift "github.com/apache/thrift/lib/go/thrift" - "strings" - "regexp" ) // (needed to ensure safety because of naive import list construction.) @@ -20,11 +21,10 @@ var _ = errors.New var _ = context.Background var _ = time.Now var _ = bytes.Equal + // (needed by validator.) var _ = strings.Contains var _ = regexp.MatchString - func init() { } - diff --git a/go/parquet/internal/gen-go/parquet/parquet.go b/go/parquet/internal/gen-go/parquet/parquet.go index 9dcedae8888d3..5b616d1335150 100644 --- a/go/parquet/internal/gen-go/parquet/parquet.go +++ b/go/parquet/internal/gen-go/parquet/parquet.go @@ -8,10 +8,11 @@ import ( "database/sql/driver" "errors" "fmt" + "regexp" + "strings" "time" + thrift "github.com/apache/thrift/lib/go/thrift" - "strings" - "regexp" ) // (needed to ensure safety because of naive import list construction.) @@ -21,1336 +22,1547 @@ var _ = errors.New var _ = context.Background var _ = time.Now var _ = bytes.Equal + // (needed by validator.) var _ = strings.Contains var _ = regexp.MatchString -//Types supported by Parquet. These types are intended to be used in combination -//with the encodings to control the on disk storage format. -//For example INT16 is not included as a type since a good encoding of INT32 -//would handle this. +// Types supported by Parquet. These types are intended to be used in combination +// with the encodings to control the on disk storage format. +// For example INT16 is not included as a type since a good encoding of INT32 +// would handle this. type Type int64 + const ( - Type_BOOLEAN Type = 0 - Type_INT32 Type = 1 - Type_INT64 Type = 2 - Type_INT96 Type = 3 - Type_FLOAT Type = 4 - Type_DOUBLE Type = 5 - Type_BYTE_ARRAY Type = 6 - Type_FIXED_LEN_BYTE_ARRAY Type = 7 + Type_BOOLEAN Type = 0 + Type_INT32 Type = 1 + Type_INT64 Type = 2 + Type_INT96 Type = 3 + Type_FLOAT Type = 4 + Type_DOUBLE Type = 5 + Type_BYTE_ARRAY Type = 6 + Type_FIXED_LEN_BYTE_ARRAY Type = 7 ) func (p Type) String() string { - switch p { - case Type_BOOLEAN: return "BOOLEAN" - case Type_INT32: return "INT32" - case Type_INT64: return "INT64" - case Type_INT96: return "INT96" - case Type_FLOAT: return "FLOAT" - case Type_DOUBLE: return "DOUBLE" - case Type_BYTE_ARRAY: return "BYTE_ARRAY" - case Type_FIXED_LEN_BYTE_ARRAY: return "FIXED_LEN_BYTE_ARRAY" - } - return "" + switch p { + case Type_BOOLEAN: + return "BOOLEAN" + case Type_INT32: + return "INT32" + case Type_INT64: + return "INT64" + case Type_INT96: + return "INT96" + case Type_FLOAT: + return "FLOAT" + case Type_DOUBLE: + return "DOUBLE" + case Type_BYTE_ARRAY: + return "BYTE_ARRAY" + case Type_FIXED_LEN_BYTE_ARRAY: + return "FIXED_LEN_BYTE_ARRAY" + } + return "" } func TypeFromString(s string) (Type, error) { - switch s { - case "BOOLEAN": return Type_BOOLEAN, nil - case "INT32": return Type_INT32, nil - case "INT64": return Type_INT64, nil - case "INT96": return Type_INT96, nil - case "FLOAT": return Type_FLOAT, nil - case "DOUBLE": return Type_DOUBLE, nil - case "BYTE_ARRAY": return Type_BYTE_ARRAY, nil - case "FIXED_LEN_BYTE_ARRAY": return Type_FIXED_LEN_BYTE_ARRAY, nil - } - return Type(0), fmt.Errorf("not a valid Type string") + switch s { + case "BOOLEAN": + return Type_BOOLEAN, nil + case "INT32": + return Type_INT32, nil + case "INT64": + return Type_INT64, nil + case "INT96": + return Type_INT96, nil + case "FLOAT": + return Type_FLOAT, nil + case "DOUBLE": + return Type_DOUBLE, nil + case "BYTE_ARRAY": + return Type_BYTE_ARRAY, nil + case "FIXED_LEN_BYTE_ARRAY": + return Type_FIXED_LEN_BYTE_ARRAY, nil + } + return Type(0), fmt.Errorf("not a valid Type string") } - func TypePtr(v Type) *Type { return &v } func (p Type) MarshalText() ([]byte, error) { -return []byte(p.String()), nil + return []byte(p.String()), nil } func (p *Type) UnmarshalText(text []byte) error { -q, err := TypeFromString(string(text)) -if (err != nil) { -return err -} -*p = q -return nil + q, err := TypeFromString(string(text)) + if err != nil { + return err + } + *p = q + return nil } func (p *Type) Scan(value interface{}) error { -v, ok := value.(int64) -if !ok { -return errors.New("Scan value is not int64") -} -*p = Type(v) -return nil + v, ok := value.(int64) + if !ok { + return errors.New("Scan value is not int64") + } + *p = Type(v) + return nil } -func (p * Type) Value() (driver.Value, error) { - if p == nil { - return nil, nil - } -return int64(*p), nil +func (p *Type) Value() (driver.Value, error) { + if p == nil { + return nil, nil + } + return int64(*p), nil } -//DEPRECATED: Common types used by frameworks(e.g. hive, pig) using parquet. -//ConvertedType is superseded by LogicalType. This enum should not be extended. + +// DEPRECATED: Common types used by frameworks(e.g. hive, pig) using parquet. +// ConvertedType is superseded by LogicalType. This enum should not be extended. // -//See LogicalTypes.md for conversion between ConvertedType and LogicalType. +// See LogicalTypes.md for conversion between ConvertedType and LogicalType. type ConvertedType int64 + const ( - ConvertedType_UTF8 ConvertedType = 0 - ConvertedType_MAP ConvertedType = 1 - ConvertedType_MAP_KEY_VALUE ConvertedType = 2 - ConvertedType_LIST ConvertedType = 3 - ConvertedType_ENUM ConvertedType = 4 - ConvertedType_DECIMAL ConvertedType = 5 - ConvertedType_DATE ConvertedType = 6 - ConvertedType_TIME_MILLIS ConvertedType = 7 - ConvertedType_TIME_MICROS ConvertedType = 8 - ConvertedType_TIMESTAMP_MILLIS ConvertedType = 9 - ConvertedType_TIMESTAMP_MICROS ConvertedType = 10 - ConvertedType_UINT_8 ConvertedType = 11 - ConvertedType_UINT_16 ConvertedType = 12 - ConvertedType_UINT_32 ConvertedType = 13 - ConvertedType_UINT_64 ConvertedType = 14 - ConvertedType_INT_8 ConvertedType = 15 - ConvertedType_INT_16 ConvertedType = 16 - ConvertedType_INT_32 ConvertedType = 17 - ConvertedType_INT_64 ConvertedType = 18 - ConvertedType_JSON ConvertedType = 19 - ConvertedType_BSON ConvertedType = 20 - ConvertedType_INTERVAL ConvertedType = 21 + ConvertedType_UTF8 ConvertedType = 0 + ConvertedType_MAP ConvertedType = 1 + ConvertedType_MAP_KEY_VALUE ConvertedType = 2 + ConvertedType_LIST ConvertedType = 3 + ConvertedType_ENUM ConvertedType = 4 + ConvertedType_DECIMAL ConvertedType = 5 + ConvertedType_DATE ConvertedType = 6 + ConvertedType_TIME_MILLIS ConvertedType = 7 + ConvertedType_TIME_MICROS ConvertedType = 8 + ConvertedType_TIMESTAMP_MILLIS ConvertedType = 9 + ConvertedType_TIMESTAMP_MICROS ConvertedType = 10 + ConvertedType_UINT_8 ConvertedType = 11 + ConvertedType_UINT_16 ConvertedType = 12 + ConvertedType_UINT_32 ConvertedType = 13 + ConvertedType_UINT_64 ConvertedType = 14 + ConvertedType_INT_8 ConvertedType = 15 + ConvertedType_INT_16 ConvertedType = 16 + ConvertedType_INT_32 ConvertedType = 17 + ConvertedType_INT_64 ConvertedType = 18 + ConvertedType_JSON ConvertedType = 19 + ConvertedType_BSON ConvertedType = 20 + ConvertedType_INTERVAL ConvertedType = 21 ) func (p ConvertedType) String() string { - switch p { - case ConvertedType_UTF8: return "UTF8" - case ConvertedType_MAP: return "MAP" - case ConvertedType_MAP_KEY_VALUE: return "MAP_KEY_VALUE" - case ConvertedType_LIST: return "LIST" - case ConvertedType_ENUM: return "ENUM" - case ConvertedType_DECIMAL: return "DECIMAL" - case ConvertedType_DATE: return "DATE" - case ConvertedType_TIME_MILLIS: return "TIME_MILLIS" - case ConvertedType_TIME_MICROS: return "TIME_MICROS" - case ConvertedType_TIMESTAMP_MILLIS: return "TIMESTAMP_MILLIS" - case ConvertedType_TIMESTAMP_MICROS: return "TIMESTAMP_MICROS" - case ConvertedType_UINT_8: return "UINT_8" - case ConvertedType_UINT_16: return "UINT_16" - case ConvertedType_UINT_32: return "UINT_32" - case ConvertedType_UINT_64: return "UINT_64" - case ConvertedType_INT_8: return "INT_8" - case ConvertedType_INT_16: return "INT_16" - case ConvertedType_INT_32: return "INT_32" - case ConvertedType_INT_64: return "INT_64" - case ConvertedType_JSON: return "JSON" - case ConvertedType_BSON: return "BSON" - case ConvertedType_INTERVAL: return "INTERVAL" - } - return "" + switch p { + case ConvertedType_UTF8: + return "UTF8" + case ConvertedType_MAP: + return "MAP" + case ConvertedType_MAP_KEY_VALUE: + return "MAP_KEY_VALUE" + case ConvertedType_LIST: + return "LIST" + case ConvertedType_ENUM: + return "ENUM" + case ConvertedType_DECIMAL: + return "DECIMAL" + case ConvertedType_DATE: + return "DATE" + case ConvertedType_TIME_MILLIS: + return "TIME_MILLIS" + case ConvertedType_TIME_MICROS: + return "TIME_MICROS" + case ConvertedType_TIMESTAMP_MILLIS: + return "TIMESTAMP_MILLIS" + case ConvertedType_TIMESTAMP_MICROS: + return "TIMESTAMP_MICROS" + case ConvertedType_UINT_8: + return "UINT_8" + case ConvertedType_UINT_16: + return "UINT_16" + case ConvertedType_UINT_32: + return "UINT_32" + case ConvertedType_UINT_64: + return "UINT_64" + case ConvertedType_INT_8: + return "INT_8" + case ConvertedType_INT_16: + return "INT_16" + case ConvertedType_INT_32: + return "INT_32" + case ConvertedType_INT_64: + return "INT_64" + case ConvertedType_JSON: + return "JSON" + case ConvertedType_BSON: + return "BSON" + case ConvertedType_INTERVAL: + return "INTERVAL" + } + return "" } func ConvertedTypeFromString(s string) (ConvertedType, error) { - switch s { - case "UTF8": return ConvertedType_UTF8, nil - case "MAP": return ConvertedType_MAP, nil - case "MAP_KEY_VALUE": return ConvertedType_MAP_KEY_VALUE, nil - case "LIST": return ConvertedType_LIST, nil - case "ENUM": return ConvertedType_ENUM, nil - case "DECIMAL": return ConvertedType_DECIMAL, nil - case "DATE": return ConvertedType_DATE, nil - case "TIME_MILLIS": return ConvertedType_TIME_MILLIS, nil - case "TIME_MICROS": return ConvertedType_TIME_MICROS, nil - case "TIMESTAMP_MILLIS": return ConvertedType_TIMESTAMP_MILLIS, nil - case "TIMESTAMP_MICROS": return ConvertedType_TIMESTAMP_MICROS, nil - case "UINT_8": return ConvertedType_UINT_8, nil - case "UINT_16": return ConvertedType_UINT_16, nil - case "UINT_32": return ConvertedType_UINT_32, nil - case "UINT_64": return ConvertedType_UINT_64, nil - case "INT_8": return ConvertedType_INT_8, nil - case "INT_16": return ConvertedType_INT_16, nil - case "INT_32": return ConvertedType_INT_32, nil - case "INT_64": return ConvertedType_INT_64, nil - case "JSON": return ConvertedType_JSON, nil - case "BSON": return ConvertedType_BSON, nil - case "INTERVAL": return ConvertedType_INTERVAL, nil - } - return ConvertedType(0), fmt.Errorf("not a valid ConvertedType string") + switch s { + case "UTF8": + return ConvertedType_UTF8, nil + case "MAP": + return ConvertedType_MAP, nil + case "MAP_KEY_VALUE": + return ConvertedType_MAP_KEY_VALUE, nil + case "LIST": + return ConvertedType_LIST, nil + case "ENUM": + return ConvertedType_ENUM, nil + case "DECIMAL": + return ConvertedType_DECIMAL, nil + case "DATE": + return ConvertedType_DATE, nil + case "TIME_MILLIS": + return ConvertedType_TIME_MILLIS, nil + case "TIME_MICROS": + return ConvertedType_TIME_MICROS, nil + case "TIMESTAMP_MILLIS": + return ConvertedType_TIMESTAMP_MILLIS, nil + case "TIMESTAMP_MICROS": + return ConvertedType_TIMESTAMP_MICROS, nil + case "UINT_8": + return ConvertedType_UINT_8, nil + case "UINT_16": + return ConvertedType_UINT_16, nil + case "UINT_32": + return ConvertedType_UINT_32, nil + case "UINT_64": + return ConvertedType_UINT_64, nil + case "INT_8": + return ConvertedType_INT_8, nil + case "INT_16": + return ConvertedType_INT_16, nil + case "INT_32": + return ConvertedType_INT_32, nil + case "INT_64": + return ConvertedType_INT_64, nil + case "JSON": + return ConvertedType_JSON, nil + case "BSON": + return ConvertedType_BSON, nil + case "INTERVAL": + return ConvertedType_INTERVAL, nil + } + return ConvertedType(0), fmt.Errorf("not a valid ConvertedType string") } - func ConvertedTypePtr(v ConvertedType) *ConvertedType { return &v } func (p ConvertedType) MarshalText() ([]byte, error) { -return []byte(p.String()), nil + return []byte(p.String()), nil } func (p *ConvertedType) UnmarshalText(text []byte) error { -q, err := ConvertedTypeFromString(string(text)) -if (err != nil) { -return err -} -*p = q -return nil + q, err := ConvertedTypeFromString(string(text)) + if err != nil { + return err + } + *p = q + return nil } func (p *ConvertedType) Scan(value interface{}) error { -v, ok := value.(int64) -if !ok { -return errors.New("Scan value is not int64") -} -*p = ConvertedType(v) -return nil + v, ok := value.(int64) + if !ok { + return errors.New("Scan value is not int64") + } + *p = ConvertedType(v) + return nil } -func (p * ConvertedType) Value() (driver.Value, error) { - if p == nil { - return nil, nil - } -return int64(*p), nil +func (p *ConvertedType) Value() (driver.Value, error) { + if p == nil { + return nil, nil + } + return int64(*p), nil } -//Representation of Schemas + +// Representation of Schemas type FieldRepetitionType int64 + const ( - FieldRepetitionType_REQUIRED FieldRepetitionType = 0 - FieldRepetitionType_OPTIONAL FieldRepetitionType = 1 - FieldRepetitionType_REPEATED FieldRepetitionType = 2 + FieldRepetitionType_REQUIRED FieldRepetitionType = 0 + FieldRepetitionType_OPTIONAL FieldRepetitionType = 1 + FieldRepetitionType_REPEATED FieldRepetitionType = 2 ) func (p FieldRepetitionType) String() string { - switch p { - case FieldRepetitionType_REQUIRED: return "REQUIRED" - case FieldRepetitionType_OPTIONAL: return "OPTIONAL" - case FieldRepetitionType_REPEATED: return "REPEATED" - } - return "" + switch p { + case FieldRepetitionType_REQUIRED: + return "REQUIRED" + case FieldRepetitionType_OPTIONAL: + return "OPTIONAL" + case FieldRepetitionType_REPEATED: + return "REPEATED" + } + return "" } func FieldRepetitionTypeFromString(s string) (FieldRepetitionType, error) { - switch s { - case "REQUIRED": return FieldRepetitionType_REQUIRED, nil - case "OPTIONAL": return FieldRepetitionType_OPTIONAL, nil - case "REPEATED": return FieldRepetitionType_REPEATED, nil - } - return FieldRepetitionType(0), fmt.Errorf("not a valid FieldRepetitionType string") + switch s { + case "REQUIRED": + return FieldRepetitionType_REQUIRED, nil + case "OPTIONAL": + return FieldRepetitionType_OPTIONAL, nil + case "REPEATED": + return FieldRepetitionType_REPEATED, nil + } + return FieldRepetitionType(0), fmt.Errorf("not a valid FieldRepetitionType string") } - func FieldRepetitionTypePtr(v FieldRepetitionType) *FieldRepetitionType { return &v } func (p FieldRepetitionType) MarshalText() ([]byte, error) { -return []byte(p.String()), nil + return []byte(p.String()), nil } func (p *FieldRepetitionType) UnmarshalText(text []byte) error { -q, err := FieldRepetitionTypeFromString(string(text)) -if (err != nil) { -return err -} -*p = q -return nil + q, err := FieldRepetitionTypeFromString(string(text)) + if err != nil { + return err + } + *p = q + return nil } func (p *FieldRepetitionType) Scan(value interface{}) error { -v, ok := value.(int64) -if !ok { -return errors.New("Scan value is not int64") -} -*p = FieldRepetitionType(v) -return nil + v, ok := value.(int64) + if !ok { + return errors.New("Scan value is not int64") + } + *p = FieldRepetitionType(v) + return nil } -func (p * FieldRepetitionType) Value() (driver.Value, error) { - if p == nil { - return nil, nil - } -return int64(*p), nil +func (p *FieldRepetitionType) Value() (driver.Value, error) { + if p == nil { + return nil, nil + } + return int64(*p), nil } -//Encodings supported by Parquet. Not all encodings are valid for all types. These -//enums are also used to specify the encoding of definition and repetition levels. -//See the accompanying doc for the details of the more complicated encodings. + +// Encodings supported by Parquet. Not all encodings are valid for all types. These +// enums are also used to specify the encoding of definition and repetition levels. +// See the accompanying doc for the details of the more complicated encodings. type Encoding int64 + const ( - Encoding_PLAIN Encoding = 0 - Encoding_PLAIN_DICTIONARY Encoding = 2 - Encoding_RLE Encoding = 3 - Encoding_BIT_PACKED Encoding = 4 - Encoding_DELTA_BINARY_PACKED Encoding = 5 - Encoding_DELTA_LENGTH_BYTE_ARRAY Encoding = 6 - Encoding_DELTA_BYTE_ARRAY Encoding = 7 - Encoding_RLE_DICTIONARY Encoding = 8 - Encoding_BYTE_STREAM_SPLIT Encoding = 9 + Encoding_PLAIN Encoding = 0 + Encoding_PLAIN_DICTIONARY Encoding = 2 + Encoding_RLE Encoding = 3 + Encoding_BIT_PACKED Encoding = 4 + Encoding_DELTA_BINARY_PACKED Encoding = 5 + Encoding_DELTA_LENGTH_BYTE_ARRAY Encoding = 6 + Encoding_DELTA_BYTE_ARRAY Encoding = 7 + Encoding_RLE_DICTIONARY Encoding = 8 + Encoding_BYTE_STREAM_SPLIT Encoding = 9 ) func (p Encoding) String() string { - switch p { - case Encoding_PLAIN: return "PLAIN" - case Encoding_PLAIN_DICTIONARY: return "PLAIN_DICTIONARY" - case Encoding_RLE: return "RLE" - case Encoding_BIT_PACKED: return "BIT_PACKED" - case Encoding_DELTA_BINARY_PACKED: return "DELTA_BINARY_PACKED" - case Encoding_DELTA_LENGTH_BYTE_ARRAY: return "DELTA_LENGTH_BYTE_ARRAY" - case Encoding_DELTA_BYTE_ARRAY: return "DELTA_BYTE_ARRAY" - case Encoding_RLE_DICTIONARY: return "RLE_DICTIONARY" - case Encoding_BYTE_STREAM_SPLIT: return "BYTE_STREAM_SPLIT" - } - return "" + switch p { + case Encoding_PLAIN: + return "PLAIN" + case Encoding_PLAIN_DICTIONARY: + return "PLAIN_DICTIONARY" + case Encoding_RLE: + return "RLE" + case Encoding_BIT_PACKED: + return "BIT_PACKED" + case Encoding_DELTA_BINARY_PACKED: + return "DELTA_BINARY_PACKED" + case Encoding_DELTA_LENGTH_BYTE_ARRAY: + return "DELTA_LENGTH_BYTE_ARRAY" + case Encoding_DELTA_BYTE_ARRAY: + return "DELTA_BYTE_ARRAY" + case Encoding_RLE_DICTIONARY: + return "RLE_DICTIONARY" + case Encoding_BYTE_STREAM_SPLIT: + return "BYTE_STREAM_SPLIT" + } + return "" } func EncodingFromString(s string) (Encoding, error) { - switch s { - case "PLAIN": return Encoding_PLAIN, nil - case "PLAIN_DICTIONARY": return Encoding_PLAIN_DICTIONARY, nil - case "RLE": return Encoding_RLE, nil - case "BIT_PACKED": return Encoding_BIT_PACKED, nil - case "DELTA_BINARY_PACKED": return Encoding_DELTA_BINARY_PACKED, nil - case "DELTA_LENGTH_BYTE_ARRAY": return Encoding_DELTA_LENGTH_BYTE_ARRAY, nil - case "DELTA_BYTE_ARRAY": return Encoding_DELTA_BYTE_ARRAY, nil - case "RLE_DICTIONARY": return Encoding_RLE_DICTIONARY, nil - case "BYTE_STREAM_SPLIT": return Encoding_BYTE_STREAM_SPLIT, nil - } - return Encoding(0), fmt.Errorf("not a valid Encoding string") + switch s { + case "PLAIN": + return Encoding_PLAIN, nil + case "PLAIN_DICTIONARY": + return Encoding_PLAIN_DICTIONARY, nil + case "RLE": + return Encoding_RLE, nil + case "BIT_PACKED": + return Encoding_BIT_PACKED, nil + case "DELTA_BINARY_PACKED": + return Encoding_DELTA_BINARY_PACKED, nil + case "DELTA_LENGTH_BYTE_ARRAY": + return Encoding_DELTA_LENGTH_BYTE_ARRAY, nil + case "DELTA_BYTE_ARRAY": + return Encoding_DELTA_BYTE_ARRAY, nil + case "RLE_DICTIONARY": + return Encoding_RLE_DICTIONARY, nil + case "BYTE_STREAM_SPLIT": + return Encoding_BYTE_STREAM_SPLIT, nil + } + return Encoding(0), fmt.Errorf("not a valid Encoding string") } - func EncodingPtr(v Encoding) *Encoding { return &v } func (p Encoding) MarshalText() ([]byte, error) { -return []byte(p.String()), nil + return []byte(p.String()), nil } func (p *Encoding) UnmarshalText(text []byte) error { -q, err := EncodingFromString(string(text)) -if (err != nil) { -return err -} -*p = q -return nil + q, err := EncodingFromString(string(text)) + if err != nil { + return err + } + *p = q + return nil } func (p *Encoding) Scan(value interface{}) error { -v, ok := value.(int64) -if !ok { -return errors.New("Scan value is not int64") -} -*p = Encoding(v) -return nil + v, ok := value.(int64) + if !ok { + return errors.New("Scan value is not int64") + } + *p = Encoding(v) + return nil } -func (p * Encoding) Value() (driver.Value, error) { - if p == nil { - return nil, nil - } -return int64(*p), nil +func (p *Encoding) Value() (driver.Value, error) { + if p == nil { + return nil, nil + } + return int64(*p), nil } -//Supported compression algorithms. + +// Supported compression algorithms. // -//Codecs added in format version X.Y can be read by readers based on X.Y and later. -//Codec support may vary between readers based on the format version and -//libraries available at runtime. +// Codecs added in format version X.Y can be read by readers based on X.Y and later. +// Codec support may vary between readers based on the format version and +// libraries available at runtime. // -//See Compression.md for a detailed specification of these algorithms. +// See Compression.md for a detailed specification of these algorithms. type CompressionCodec int64 + const ( - CompressionCodec_UNCOMPRESSED CompressionCodec = 0 - CompressionCodec_SNAPPY CompressionCodec = 1 - CompressionCodec_GZIP CompressionCodec = 2 - CompressionCodec_LZO CompressionCodec = 3 - CompressionCodec_BROTLI CompressionCodec = 4 - CompressionCodec_LZ4 CompressionCodec = 5 - CompressionCodec_ZSTD CompressionCodec = 6 - CompressionCodec_LZ4_RAW CompressionCodec = 7 + CompressionCodec_UNCOMPRESSED CompressionCodec = 0 + CompressionCodec_SNAPPY CompressionCodec = 1 + CompressionCodec_GZIP CompressionCodec = 2 + CompressionCodec_LZO CompressionCodec = 3 + CompressionCodec_BROTLI CompressionCodec = 4 + CompressionCodec_LZ4 CompressionCodec = 5 + CompressionCodec_ZSTD CompressionCodec = 6 + CompressionCodec_LZ4_RAW CompressionCodec = 7 ) func (p CompressionCodec) String() string { - switch p { - case CompressionCodec_UNCOMPRESSED: return "UNCOMPRESSED" - case CompressionCodec_SNAPPY: return "SNAPPY" - case CompressionCodec_GZIP: return "GZIP" - case CompressionCodec_LZO: return "LZO" - case CompressionCodec_BROTLI: return "BROTLI" - case CompressionCodec_LZ4: return "LZ4" - case CompressionCodec_ZSTD: return "ZSTD" - case CompressionCodec_LZ4_RAW: return "LZ4_RAW" - } - return "" + switch p { + case CompressionCodec_UNCOMPRESSED: + return "UNCOMPRESSED" + case CompressionCodec_SNAPPY: + return "SNAPPY" + case CompressionCodec_GZIP: + return "GZIP" + case CompressionCodec_LZO: + return "LZO" + case CompressionCodec_BROTLI: + return "BROTLI" + case CompressionCodec_LZ4: + return "LZ4" + case CompressionCodec_ZSTD: + return "ZSTD" + case CompressionCodec_LZ4_RAW: + return "LZ4_RAW" + } + return "" } func CompressionCodecFromString(s string) (CompressionCodec, error) { - switch s { - case "UNCOMPRESSED": return CompressionCodec_UNCOMPRESSED, nil - case "SNAPPY": return CompressionCodec_SNAPPY, nil - case "GZIP": return CompressionCodec_GZIP, nil - case "LZO": return CompressionCodec_LZO, nil - case "BROTLI": return CompressionCodec_BROTLI, nil - case "LZ4": return CompressionCodec_LZ4, nil - case "ZSTD": return CompressionCodec_ZSTD, nil - case "LZ4_RAW": return CompressionCodec_LZ4_RAW, nil - } - return CompressionCodec(0), fmt.Errorf("not a valid CompressionCodec string") + switch s { + case "UNCOMPRESSED": + return CompressionCodec_UNCOMPRESSED, nil + case "SNAPPY": + return CompressionCodec_SNAPPY, nil + case "GZIP": + return CompressionCodec_GZIP, nil + case "LZO": + return CompressionCodec_LZO, nil + case "BROTLI": + return CompressionCodec_BROTLI, nil + case "LZ4": + return CompressionCodec_LZ4, nil + case "ZSTD": + return CompressionCodec_ZSTD, nil + case "LZ4_RAW": + return CompressionCodec_LZ4_RAW, nil + } + return CompressionCodec(0), fmt.Errorf("not a valid CompressionCodec string") } - func CompressionCodecPtr(v CompressionCodec) *CompressionCodec { return &v } func (p CompressionCodec) MarshalText() ([]byte, error) { -return []byte(p.String()), nil + return []byte(p.String()), nil } func (p *CompressionCodec) UnmarshalText(text []byte) error { -q, err := CompressionCodecFromString(string(text)) -if (err != nil) { -return err -} -*p = q -return nil + q, err := CompressionCodecFromString(string(text)) + if err != nil { + return err + } + *p = q + return nil } func (p *CompressionCodec) Scan(value interface{}) error { -v, ok := value.(int64) -if !ok { -return errors.New("Scan value is not int64") -} -*p = CompressionCodec(v) -return nil + v, ok := value.(int64) + if !ok { + return errors.New("Scan value is not int64") + } + *p = CompressionCodec(v) + return nil } -func (p * CompressionCodec) Value() (driver.Value, error) { - if p == nil { - return nil, nil - } -return int64(*p), nil +func (p *CompressionCodec) Value() (driver.Value, error) { + if p == nil { + return nil, nil + } + return int64(*p), nil } + type PageType int64 + const ( - PageType_DATA_PAGE PageType = 0 - PageType_INDEX_PAGE PageType = 1 - PageType_DICTIONARY_PAGE PageType = 2 - PageType_DATA_PAGE_V2 PageType = 3 + PageType_DATA_PAGE PageType = 0 + PageType_INDEX_PAGE PageType = 1 + PageType_DICTIONARY_PAGE PageType = 2 + PageType_DATA_PAGE_V2 PageType = 3 ) func (p PageType) String() string { - switch p { - case PageType_DATA_PAGE: return "DATA_PAGE" - case PageType_INDEX_PAGE: return "INDEX_PAGE" - case PageType_DICTIONARY_PAGE: return "DICTIONARY_PAGE" - case PageType_DATA_PAGE_V2: return "DATA_PAGE_V2" - } - return "" + switch p { + case PageType_DATA_PAGE: + return "DATA_PAGE" + case PageType_INDEX_PAGE: + return "INDEX_PAGE" + case PageType_DICTIONARY_PAGE: + return "DICTIONARY_PAGE" + case PageType_DATA_PAGE_V2: + return "DATA_PAGE_V2" + } + return "" } func PageTypeFromString(s string) (PageType, error) { - switch s { - case "DATA_PAGE": return PageType_DATA_PAGE, nil - case "INDEX_PAGE": return PageType_INDEX_PAGE, nil - case "DICTIONARY_PAGE": return PageType_DICTIONARY_PAGE, nil - case "DATA_PAGE_V2": return PageType_DATA_PAGE_V2, nil - } - return PageType(0), fmt.Errorf("not a valid PageType string") + switch s { + case "DATA_PAGE": + return PageType_DATA_PAGE, nil + case "INDEX_PAGE": + return PageType_INDEX_PAGE, nil + case "DICTIONARY_PAGE": + return PageType_DICTIONARY_PAGE, nil + case "DATA_PAGE_V2": + return PageType_DATA_PAGE_V2, nil + } + return PageType(0), fmt.Errorf("not a valid PageType string") } - func PageTypePtr(v PageType) *PageType { return &v } func (p PageType) MarshalText() ([]byte, error) { -return []byte(p.String()), nil + return []byte(p.String()), nil } func (p *PageType) UnmarshalText(text []byte) error { -q, err := PageTypeFromString(string(text)) -if (err != nil) { -return err -} -*p = q -return nil + q, err := PageTypeFromString(string(text)) + if err != nil { + return err + } + *p = q + return nil } func (p *PageType) Scan(value interface{}) error { -v, ok := value.(int64) -if !ok { -return errors.New("Scan value is not int64") -} -*p = PageType(v) -return nil + v, ok := value.(int64) + if !ok { + return errors.New("Scan value is not int64") + } + *p = PageType(v) + return nil } -func (p * PageType) Value() (driver.Value, error) { - if p == nil { - return nil, nil - } -return int64(*p), nil +func (p *PageType) Value() (driver.Value, error) { + if p == nil { + return nil, nil + } + return int64(*p), nil } -//Enum to annotate whether lists of min/max elements inside ColumnIndex -//are ordered and if so, in which direction. + +// Enum to annotate whether lists of min/max elements inside ColumnIndex +// are ordered and if so, in which direction. type BoundaryOrder int64 + const ( - BoundaryOrder_UNORDERED BoundaryOrder = 0 - BoundaryOrder_ASCENDING BoundaryOrder = 1 - BoundaryOrder_DESCENDING BoundaryOrder = 2 + BoundaryOrder_UNORDERED BoundaryOrder = 0 + BoundaryOrder_ASCENDING BoundaryOrder = 1 + BoundaryOrder_DESCENDING BoundaryOrder = 2 ) func (p BoundaryOrder) String() string { - switch p { - case BoundaryOrder_UNORDERED: return "UNORDERED" - case BoundaryOrder_ASCENDING: return "ASCENDING" - case BoundaryOrder_DESCENDING: return "DESCENDING" - } - return "" + switch p { + case BoundaryOrder_UNORDERED: + return "UNORDERED" + case BoundaryOrder_ASCENDING: + return "ASCENDING" + case BoundaryOrder_DESCENDING: + return "DESCENDING" + } + return "" } func BoundaryOrderFromString(s string) (BoundaryOrder, error) { - switch s { - case "UNORDERED": return BoundaryOrder_UNORDERED, nil - case "ASCENDING": return BoundaryOrder_ASCENDING, nil - case "DESCENDING": return BoundaryOrder_DESCENDING, nil - } - return BoundaryOrder(0), fmt.Errorf("not a valid BoundaryOrder string") + switch s { + case "UNORDERED": + return BoundaryOrder_UNORDERED, nil + case "ASCENDING": + return BoundaryOrder_ASCENDING, nil + case "DESCENDING": + return BoundaryOrder_DESCENDING, nil + } + return BoundaryOrder(0), fmt.Errorf("not a valid BoundaryOrder string") } - func BoundaryOrderPtr(v BoundaryOrder) *BoundaryOrder { return &v } func (p BoundaryOrder) MarshalText() ([]byte, error) { -return []byte(p.String()), nil + return []byte(p.String()), nil } func (p *BoundaryOrder) UnmarshalText(text []byte) error { -q, err := BoundaryOrderFromString(string(text)) -if (err != nil) { -return err -} -*p = q -return nil + q, err := BoundaryOrderFromString(string(text)) + if err != nil { + return err + } + *p = q + return nil } func (p *BoundaryOrder) Scan(value interface{}) error { -v, ok := value.(int64) -if !ok { -return errors.New("Scan value is not int64") -} -*p = BoundaryOrder(v) -return nil + v, ok := value.(int64) + if !ok { + return errors.New("Scan value is not int64") + } + *p = BoundaryOrder(v) + return nil } -func (p * BoundaryOrder) Value() (driver.Value, error) { - if p == nil { - return nil, nil - } -return int64(*p), nil +func (p *BoundaryOrder) Value() (driver.Value, error) { + if p == nil { + return nil, nil + } + return int64(*p), nil } + // Statistics per row group and per page // All fields are optional. -// +// // Attributes: -// - Max: DEPRECATED: min and max value of the column. Use min_value and max_value. -// +// - Max: DEPRECATED: min and max value of the column. Use min_value and max_value. +// // Values are encoded using PLAIN encoding, except that variable-length byte // arrays do not include a length prefix. -// +// // These fields encode min and max values determined by signed comparison // only. New files should use the correct order for a column's logical type // and store the values in the min_value and max_value fields. -// +// // To support older readers, these may be set when the column order is // signed. -// - Min -// - NullCount: count of null value in the column -// - DistinctCount: count of distinct values occurring -// - MaxValue: Min and max values for the column, determined by its ColumnOrder. -// +// - Min +// - NullCount: count of null value in the column +// - DistinctCount: count of distinct values occurring +// - MaxValue: Min and max values for the column, determined by its ColumnOrder. +// // Values are encoded using PLAIN encoding, except that variable-length byte // arrays do not include a length prefix. -// - MinValue +// - MinValue type Statistics struct { - Max []byte `thrift:"max,1" db:"max" json:"max,omitempty"` - Min []byte `thrift:"min,2" db:"min" json:"min,omitempty"` - NullCount *int64 `thrift:"null_count,3" db:"null_count" json:"null_count,omitempty"` - DistinctCount *int64 `thrift:"distinct_count,4" db:"distinct_count" json:"distinct_count,omitempty"` - MaxValue []byte `thrift:"max_value,5" db:"max_value" json:"max_value,omitempty"` - MinValue []byte `thrift:"min_value,6" db:"min_value" json:"min_value,omitempty"` + Max []byte `thrift:"max,1" db:"max" json:"max,omitempty"` + Min []byte `thrift:"min,2" db:"min" json:"min,omitempty"` + NullCount *int64 `thrift:"null_count,3" db:"null_count" json:"null_count,omitempty"` + DistinctCount *int64 `thrift:"distinct_count,4" db:"distinct_count" json:"distinct_count,omitempty"` + MaxValue []byte `thrift:"max_value,5" db:"max_value" json:"max_value,omitempty"` + MinValue []byte `thrift:"min_value,6" db:"min_value" json:"min_value,omitempty"` } func NewStatistics() *Statistics { - return &Statistics{} + return &Statistics{} } var Statistics_Max_DEFAULT []byte func (p *Statistics) GetMax() []byte { - return p.Max + return p.Max } + var Statistics_Min_DEFAULT []byte func (p *Statistics) GetMin() []byte { - return p.Min + return p.Min } + var Statistics_NullCount_DEFAULT int64 + func (p *Statistics) GetNullCount() int64 { - if !p.IsSetNullCount() { - return Statistics_NullCount_DEFAULT - } -return *p.NullCount + if !p.IsSetNullCount() { + return Statistics_NullCount_DEFAULT + } + return *p.NullCount } + var Statistics_DistinctCount_DEFAULT int64 + func (p *Statistics) GetDistinctCount() int64 { - if !p.IsSetDistinctCount() { - return Statistics_DistinctCount_DEFAULT - } -return *p.DistinctCount + if !p.IsSetDistinctCount() { + return Statistics_DistinctCount_DEFAULT + } + return *p.DistinctCount } + var Statistics_MaxValue_DEFAULT []byte func (p *Statistics) GetMaxValue() []byte { - return p.MaxValue + return p.MaxValue } + var Statistics_MinValue_DEFAULT []byte func (p *Statistics) GetMinValue() []byte { - return p.MinValue + return p.MinValue } func (p *Statistics) IsSetMax() bool { - return p.Max != nil + return p.Max != nil } func (p *Statistics) IsSetMin() bool { - return p.Min != nil + return p.Min != nil } func (p *Statistics) IsSetNullCount() bool { - return p.NullCount != nil + return p.NullCount != nil } func (p *Statistics) IsSetDistinctCount() bool { - return p.DistinctCount != nil + return p.DistinctCount != nil } func (p *Statistics) IsSetMaxValue() bool { - return p.MaxValue != nil + return p.MaxValue != nil } func (p *Statistics) IsSetMinValue() bool { - return p.MinValue != nil + return p.MinValue != nil } func (p *Statistics) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRING { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.STRING { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.I64 { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 4: - if fieldTypeId == thrift.I64 { - if err := p.ReadField4(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 5: - if fieldTypeId == thrift.STRING { - if err := p.ReadField5(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 6: - if fieldTypeId == thrift.STRING { - if err := p.ReadField6(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *Statistics) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.Max = v -} - return nil -} - -func (p *Statistics) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.Min = v -} - return nil -} - -func (p *Statistics) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 3: ", err) -} else { - p.NullCount = &v -} - return nil -} - -func (p *Statistics) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 4: ", err) -} else { - p.DistinctCount = &v -} - return nil -} - -func (p *Statistics) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 5: ", err) -} else { - p.MaxValue = v -} - return nil -} - -func (p *Statistics) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 6: ", err) -} else { - p.MinValue = v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRING { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.STRING { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.I64 { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 4: + if fieldTypeId == thrift.I64 { + if err := p.ReadField4(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 5: + if fieldTypeId == thrift.STRING { + if err := p.ReadField5(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 6: + if fieldTypeId == thrift.STRING { + if err := p.ReadField6(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *Statistics) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.Max = v + } + return nil +} + +func (p *Statistics) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.Min = v + } + return nil +} + +func (p *Statistics) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.NullCount = &v + } + return nil +} + +func (p *Statistics) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 4: ", err) + } else { + p.DistinctCount = &v + } + return nil +} + +func (p *Statistics) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 5: ", err) + } else { + p.MaxValue = v + } + return nil +} + +func (p *Statistics) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 6: ", err) + } else { + p.MinValue = v + } + return nil } func (p *Statistics) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "Statistics"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - if err := p.writeField4(ctx, oprot); err != nil { return err } - if err := p.writeField5(ctx, oprot); err != nil { return err } - if err := p.writeField6(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "Statistics"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + if err := p.writeField4(ctx, oprot); err != nil { + return err + } + if err := p.writeField5(ctx, oprot); err != nil { + return err + } + if err := p.writeField6(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *Statistics) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetMax() { - if err := oprot.WriteFieldBegin(ctx, "max", thrift.STRING, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:max: ", p), err) } - if err := oprot.WriteBinary(ctx, p.Max); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.max (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:max: ", p), err) } - } - return err + if p.IsSetMax() { + if err := oprot.WriteFieldBegin(ctx, "max", thrift.STRING, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:max: ", p), err) + } + if err := oprot.WriteBinary(ctx, p.Max); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.max (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:max: ", p), err) + } + } + return err } func (p *Statistics) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetMin() { - if err := oprot.WriteFieldBegin(ctx, "min", thrift.STRING, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:min: ", p), err) } - if err := oprot.WriteBinary(ctx, p.Min); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.min (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:min: ", p), err) } - } - return err + if p.IsSetMin() { + if err := oprot.WriteFieldBegin(ctx, "min", thrift.STRING, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:min: ", p), err) + } + if err := oprot.WriteBinary(ctx, p.Min); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.min (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:min: ", p), err) + } + } + return err } func (p *Statistics) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetNullCount() { - if err := oprot.WriteFieldBegin(ctx, "null_count", thrift.I64, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:null_count: ", p), err) } - if err := oprot.WriteI64(ctx, int64(*p.NullCount)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.null_count (3) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:null_count: ", p), err) } - } - return err + if p.IsSetNullCount() { + if err := oprot.WriteFieldBegin(ctx, "null_count", thrift.I64, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:null_count: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(*p.NullCount)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.null_count (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:null_count: ", p), err) + } + } + return err } func (p *Statistics) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetDistinctCount() { - if err := oprot.WriteFieldBegin(ctx, "distinct_count", thrift.I64, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:distinct_count: ", p), err) } - if err := oprot.WriteI64(ctx, int64(*p.DistinctCount)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.distinct_count (4) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:distinct_count: ", p), err) } - } - return err + if p.IsSetDistinctCount() { + if err := oprot.WriteFieldBegin(ctx, "distinct_count", thrift.I64, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:distinct_count: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(*p.DistinctCount)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.distinct_count (4) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:distinct_count: ", p), err) + } + } + return err } func (p *Statistics) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetMaxValue() { - if err := oprot.WriteFieldBegin(ctx, "max_value", thrift.STRING, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:max_value: ", p), err) } - if err := oprot.WriteBinary(ctx, p.MaxValue); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.max_value (5) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:max_value: ", p), err) } - } - return err + if p.IsSetMaxValue() { + if err := oprot.WriteFieldBegin(ctx, "max_value", thrift.STRING, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:max_value: ", p), err) + } + if err := oprot.WriteBinary(ctx, p.MaxValue); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.max_value (5) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:max_value: ", p), err) + } + } + return err } func (p *Statistics) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetMinValue() { - if err := oprot.WriteFieldBegin(ctx, "min_value", thrift.STRING, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:min_value: ", p), err) } - if err := oprot.WriteBinary(ctx, p.MinValue); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.min_value (6) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:min_value: ", p), err) } - } - return err + if p.IsSetMinValue() { + if err := oprot.WriteFieldBegin(ctx, "min_value", thrift.STRING, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:min_value: ", p), err) + } + if err := oprot.WriteBinary(ctx, p.MinValue); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.min_value (6) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:min_value: ", p), err) + } + } + return err } func (p *Statistics) Equals(other *Statistics) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if bytes.Compare(p.Max, other.Max) != 0 { return false } - if bytes.Compare(p.Min, other.Min) != 0 { return false } - if p.NullCount != other.NullCount { - if p.NullCount == nil || other.NullCount == nil { - return false - } - if (*p.NullCount) != (*other.NullCount) { return false } - } - if p.DistinctCount != other.DistinctCount { - if p.DistinctCount == nil || other.DistinctCount == nil { - return false - } - if (*p.DistinctCount) != (*other.DistinctCount) { return false } - } - if bytes.Compare(p.MaxValue, other.MaxValue) != 0 { return false } - if bytes.Compare(p.MinValue, other.MinValue) != 0 { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if bytes.Compare(p.Max, other.Max) != 0 { + return false + } + if bytes.Compare(p.Min, other.Min) != 0 { + return false + } + if p.NullCount != other.NullCount { + if p.NullCount == nil || other.NullCount == nil { + return false + } + if (*p.NullCount) != (*other.NullCount) { + return false + } + } + if p.DistinctCount != other.DistinctCount { + if p.DistinctCount == nil || other.DistinctCount == nil { + return false + } + if (*p.DistinctCount) != (*other.DistinctCount) { + return false + } + } + if bytes.Compare(p.MaxValue, other.MaxValue) != 0 { + return false + } + if bytes.Compare(p.MinValue, other.MinValue) != 0 { + return false + } + return true } func (p *Statistics) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("Statistics(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("Statistics(%+v)", *p) } func (p *Statistics) Validate() error { - return nil + return nil } + // Empty structs to use as logical type annotations type StringType struct { } func NewStringType() *StringType { - return &StringType{} + return &StringType{} } func (p *StringType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *StringType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "StringType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "StringType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *StringType) Equals(other *StringType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *StringType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("StringType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("StringType(%+v)", *p) } func (p *StringType) Validate() error { - return nil + return nil } + type UUIDType struct { } func NewUUIDType() *UUIDType { - return &UUIDType{} + return &UUIDType{} } func (p *UUIDType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *UUIDType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "UUIDType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "UUIDType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *UUIDType) Equals(other *UUIDType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *UUIDType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("UUIDType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("UUIDType(%+v)", *p) } func (p *UUIDType) Validate() error { - return nil + return nil } + type MapType struct { } func NewMapType() *MapType { - return &MapType{} + return &MapType{} } func (p *MapType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *MapType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "MapType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "MapType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *MapType) Equals(other *MapType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *MapType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("MapType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("MapType(%+v)", *p) } func (p *MapType) Validate() error { - return nil + return nil } + type ListType struct { } func NewListType() *ListType { - return &ListType{} + return &ListType{} } func (p *ListType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *ListType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "ListType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "ListType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *ListType) Equals(other *ListType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *ListType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("ListType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("ListType(%+v)", *p) } func (p *ListType) Validate() error { - return nil + return nil } + type EnumType struct { } func NewEnumType() *EnumType { - return &EnumType{} + return &EnumType{} } func (p *EnumType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *EnumType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "EnumType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "EnumType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *EnumType) Equals(other *EnumType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *EnumType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("EnumType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("EnumType(%+v)", *p) } func (p *EnumType) Validate() error { - return nil + return nil } + type DateType struct { } func NewDateType() *DateType { - return &DateType{} + return &DateType{} } func (p *DateType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *DateType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "DateType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "DateType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *DateType) Equals(other *DateType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *DateType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("DateType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("DateType(%+v)", *p) } func (p *DateType) Validate() error { - return nil + return nil } + type Float16Type struct { } func NewFloat16Type() *Float16Type { - return &Float16Type{} + return &Float16Type{} } func (p *Float16Type) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *Float16Type) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "Float16Type"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "Float16Type"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *Float16Type) Equals(other *Float16Type) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *Float16Type) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("Float16Type(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("Float16Type(%+v)", *p) } func (p *Float16Type) Validate() error { - return nil + return nil } + // Logical type to annotate a column that is always null. -// +// // Sometimes when discovering the schema of existing data, values are always // null and the physical type can't be determined. This annotation signals // the case where the physical type was guessed from all null values. @@ -1358,8169 +1570,9269 @@ type NullType struct { } func NewNullType() *NullType { - return &NullType{} + return &NullType{} } func (p *NullType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *NullType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "NullType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "NullType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *NullType) Equals(other *NullType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *NullType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("NullType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("NullType(%+v)", *p) } func (p *NullType) Validate() error { - return nil + return nil } + // Decimal logical type annotation -// +// // To maintain forward-compatibility in v1, implementations using this logical // type must also set scale and precision on the annotated SchemaElement. -// +// // Allowed for physical types: INT32, INT64, FIXED, and BINARY -// +// // Attributes: -// - Scale -// - Precision +// - Scale +// - Precision type DecimalType struct { - Scale int32 `thrift:"scale,1,required" db:"scale" json:"scale"` - Precision int32 `thrift:"precision,2,required" db:"precision" json:"precision"` + Scale int32 `thrift:"scale,1,required" db:"scale" json:"scale"` + Precision int32 `thrift:"precision,2,required" db:"precision" json:"precision"` } func NewDecimalType() *DecimalType { - return &DecimalType{} + return &DecimalType{} } - func (p *DecimalType) GetScale() int32 { - return p.Scale + return p.Scale } func (p *DecimalType) GetPrecision() int32 { - return p.Precision + return p.Precision } func (p *DecimalType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetScale bool = false; - var issetPrecision bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.I32 { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetScale = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.I32 { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetPrecision = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetScale{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Scale is not set")); - } - if !issetPrecision{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Precision is not set")); - } - return nil -} - -func (p *DecimalType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.Scale = v -} - return nil -} - -func (p *DecimalType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.Precision = v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetScale bool = false + var issetPrecision bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.I32 { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetScale = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.I32 { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetPrecision = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetScale { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Scale is not set")) + } + if !issetPrecision { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Precision is not set")) + } + return nil +} + +func (p *DecimalType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.Scale = v + } + return nil +} + +func (p *DecimalType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.Precision = v + } + return nil } func (p *DecimalType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "DecimalType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "DecimalType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *DecimalType) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "scale", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:scale: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.Scale)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.scale (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:scale: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "scale", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:scale: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.Scale)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.scale (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:scale: ", p), err) + } + return err } func (p *DecimalType) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "precision", thrift.I32, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:precision: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.Precision)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.precision (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:precision: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "precision", thrift.I32, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:precision: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.Precision)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.precision (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:precision: ", p), err) + } + return err } func (p *DecimalType) Equals(other *DecimalType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.Scale != other.Scale { return false } - if p.Precision != other.Precision { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.Scale != other.Scale { + return false + } + if p.Precision != other.Precision { + return false + } + return true } func (p *DecimalType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("DecimalType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("DecimalType(%+v)", *p) } func (p *DecimalType) Validate() error { - return nil + return nil } + // Time units for logical types type MilliSeconds struct { } func NewMilliSeconds() *MilliSeconds { - return &MilliSeconds{} + return &MilliSeconds{} } func (p *MilliSeconds) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *MilliSeconds) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "MilliSeconds"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "MilliSeconds"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *MilliSeconds) Equals(other *MilliSeconds) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *MilliSeconds) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("MilliSeconds(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("MilliSeconds(%+v)", *p) } func (p *MilliSeconds) Validate() error { - return nil + return nil } + type MicroSeconds struct { } func NewMicroSeconds() *MicroSeconds { - return &MicroSeconds{} + return &MicroSeconds{} } func (p *MicroSeconds) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *MicroSeconds) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "MicroSeconds"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "MicroSeconds"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *MicroSeconds) Equals(other *MicroSeconds) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *MicroSeconds) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("MicroSeconds(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("MicroSeconds(%+v)", *p) } func (p *MicroSeconds) Validate() error { - return nil + return nil } + type NanoSeconds struct { } func NewNanoSeconds() *NanoSeconds { - return &NanoSeconds{} + return &NanoSeconds{} } func (p *NanoSeconds) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *NanoSeconds) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "NanoSeconds"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "NanoSeconds"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *NanoSeconds) Equals(other *NanoSeconds) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *NanoSeconds) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("NanoSeconds(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("NanoSeconds(%+v)", *p) } func (p *NanoSeconds) Validate() error { - return nil + return nil } + // Attributes: -// - MILLIS -// - MICROS -// - NANOS +// - MILLIS +// - MICROS +// - NANOS type TimeUnit struct { - MILLIS *MilliSeconds `thrift:"MILLIS,1" db:"MILLIS" json:"MILLIS,omitempty"` - MICROS *MicroSeconds `thrift:"MICROS,2" db:"MICROS" json:"MICROS,omitempty"` - NANOS *NanoSeconds `thrift:"NANOS,3" db:"NANOS" json:"NANOS,omitempty"` + MILLIS *MilliSeconds `thrift:"MILLIS,1" db:"MILLIS" json:"MILLIS,omitempty"` + MICROS *MicroSeconds `thrift:"MICROS,2" db:"MICROS" json:"MICROS,omitempty"` + NANOS *NanoSeconds `thrift:"NANOS,3" db:"NANOS" json:"NANOS,omitempty"` } func NewTimeUnit() *TimeUnit { - return &TimeUnit{} + return &TimeUnit{} } var TimeUnit_MILLIS_DEFAULT *MilliSeconds + func (p *TimeUnit) GetMILLIS() *MilliSeconds { - if !p.IsSetMILLIS() { - return TimeUnit_MILLIS_DEFAULT - } -return p.MILLIS + if !p.IsSetMILLIS() { + return TimeUnit_MILLIS_DEFAULT + } + return p.MILLIS } + var TimeUnit_MICROS_DEFAULT *MicroSeconds + func (p *TimeUnit) GetMICROS() *MicroSeconds { - if !p.IsSetMICROS() { - return TimeUnit_MICROS_DEFAULT - } -return p.MICROS + if !p.IsSetMICROS() { + return TimeUnit_MICROS_DEFAULT + } + return p.MICROS } + var TimeUnit_NANOS_DEFAULT *NanoSeconds + func (p *TimeUnit) GetNANOS() *NanoSeconds { - if !p.IsSetNANOS() { - return TimeUnit_NANOS_DEFAULT - } -return p.NANOS + if !p.IsSetNANOS() { + return TimeUnit_NANOS_DEFAULT + } + return p.NANOS } func (p *TimeUnit) CountSetFieldsTimeUnit() int { - count := 0 - if (p.IsSetMILLIS()) { - count++ - } - if (p.IsSetMICROS()) { - count++ - } - if (p.IsSetNANOS()) { - count++ - } - return count + count := 0 + if p.IsSetMILLIS() { + count++ + } + if p.IsSetMICROS() { + count++ + } + if p.IsSetNANOS() { + count++ + } + return count } func (p *TimeUnit) IsSetMILLIS() bool { - return p.MILLIS != nil + return p.MILLIS != nil } func (p *TimeUnit) IsSetMICROS() bool { - return p.MICROS != nil + return p.MICROS != nil } func (p *TimeUnit) IsSetNANOS() bool { - return p.NANOS != nil + return p.NANOS != nil } func (p *TimeUnit) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *TimeUnit) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - p.MILLIS = &MilliSeconds{} - if err := p.MILLIS.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MILLIS), err) - } - return nil -} - -func (p *TimeUnit) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - p.MICROS = &MicroSeconds{} - if err := p.MICROS.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MICROS), err) - } - return nil -} - -func (p *TimeUnit) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - p.NANOS = &NanoSeconds{} - if err := p.NANOS.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.NANOS), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *TimeUnit) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + p.MILLIS = &MilliSeconds{} + if err := p.MILLIS.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MILLIS), err) + } + return nil +} + +func (p *TimeUnit) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + p.MICROS = &MicroSeconds{} + if err := p.MICROS.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MICROS), err) + } + return nil +} + +func (p *TimeUnit) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + p.NANOS = &NanoSeconds{} + if err := p.NANOS.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.NANOS), err) + } + return nil } func (p *TimeUnit) Write(ctx context.Context, oprot thrift.TProtocol) error { - if c := p.CountSetFieldsTimeUnit(); c != 1 { - return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) - } - if err := oprot.WriteStructBegin(ctx, "TimeUnit"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if c := p.CountSetFieldsTimeUnit(); c != 1 { + return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) + } + if err := oprot.WriteStructBegin(ctx, "TimeUnit"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *TimeUnit) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetMILLIS() { - if err := oprot.WriteFieldBegin(ctx, "MILLIS", thrift.STRUCT, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:MILLIS: ", p), err) } - if err := p.MILLIS.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MILLIS), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:MILLIS: ", p), err) } - } - return err + if p.IsSetMILLIS() { + if err := oprot.WriteFieldBegin(ctx, "MILLIS", thrift.STRUCT, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:MILLIS: ", p), err) + } + if err := p.MILLIS.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MILLIS), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:MILLIS: ", p), err) + } + } + return err } func (p *TimeUnit) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetMICROS() { - if err := oprot.WriteFieldBegin(ctx, "MICROS", thrift.STRUCT, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:MICROS: ", p), err) } - if err := p.MICROS.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MICROS), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:MICROS: ", p), err) } - } - return err + if p.IsSetMICROS() { + if err := oprot.WriteFieldBegin(ctx, "MICROS", thrift.STRUCT, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:MICROS: ", p), err) + } + if err := p.MICROS.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MICROS), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:MICROS: ", p), err) + } + } + return err } func (p *TimeUnit) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetNANOS() { - if err := oprot.WriteFieldBegin(ctx, "NANOS", thrift.STRUCT, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:NANOS: ", p), err) } - if err := p.NANOS.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.NANOS), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:NANOS: ", p), err) } - } - return err + if p.IsSetNANOS() { + if err := oprot.WriteFieldBegin(ctx, "NANOS", thrift.STRUCT, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:NANOS: ", p), err) + } + if err := p.NANOS.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.NANOS), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:NANOS: ", p), err) + } + } + return err } func (p *TimeUnit) Equals(other *TimeUnit) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if !p.MILLIS.Equals(other.MILLIS) { return false } - if !p.MICROS.Equals(other.MICROS) { return false } - if !p.NANOS.Equals(other.NANOS) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if !p.MILLIS.Equals(other.MILLIS) { + return false + } + if !p.MICROS.Equals(other.MICROS) { + return false + } + if !p.NANOS.Equals(other.NANOS) { + return false + } + return true } func (p *TimeUnit) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("TimeUnit(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("TimeUnit(%+v)", *p) } func (p *TimeUnit) Validate() error { - return nil + return nil } + // Timestamp logical type annotation -// +// // Allowed for physical types: INT64 -// +// // Attributes: -// - IsAdjustedToUTC -// - Unit +// - IsAdjustedToUTC +// - Unit type TimestampType struct { - IsAdjustedToUTC bool `thrift:"isAdjustedToUTC,1,required" db:"isAdjustedToUTC" json:"isAdjustedToUTC"` - Unit *TimeUnit `thrift:"unit,2,required" db:"unit" json:"unit"` + IsAdjustedToUTC bool `thrift:"isAdjustedToUTC,1,required" db:"isAdjustedToUTC" json:"isAdjustedToUTC"` + Unit *TimeUnit `thrift:"unit,2,required" db:"unit" json:"unit"` } func NewTimestampType() *TimestampType { - return &TimestampType{} + return &TimestampType{} } - func (p *TimestampType) GetIsAdjustedToUTC() bool { - return p.IsAdjustedToUTC + return p.IsAdjustedToUTC } + var TimestampType_Unit_DEFAULT *TimeUnit + func (p *TimestampType) GetUnit() *TimeUnit { - if !p.IsSetUnit() { - return TimestampType_Unit_DEFAULT - } -return p.Unit + if !p.IsSetUnit() { + return TimestampType_Unit_DEFAULT + } + return p.Unit } func (p *TimestampType) IsSetUnit() bool { - return p.Unit != nil + return p.Unit != nil } func (p *TimestampType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetIsAdjustedToUTC bool = false; - var issetUnit bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.BOOL { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetIsAdjustedToUTC = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetUnit = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetIsAdjustedToUTC{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field IsAdjustedToUTC is not set")); - } - if !issetUnit{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Unit is not set")); - } - return nil -} - -func (p *TimestampType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBool(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.IsAdjustedToUTC = v -} - return nil -} - -func (p *TimestampType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - p.Unit = &TimeUnit{} - if err := p.Unit.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Unit), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetIsAdjustedToUTC bool = false + var issetUnit bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.BOOL { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetIsAdjustedToUTC = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetUnit = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetIsAdjustedToUTC { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field IsAdjustedToUTC is not set")) + } + if !issetUnit { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Unit is not set")) + } + return nil +} + +func (p *TimestampType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBool(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.IsAdjustedToUTC = v + } + return nil +} + +func (p *TimestampType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + p.Unit = &TimeUnit{} + if err := p.Unit.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Unit), err) + } + return nil } func (p *TimestampType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "TimestampType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "TimestampType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *TimestampType) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "isAdjustedToUTC", thrift.BOOL, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:isAdjustedToUTC: ", p), err) } - if err := oprot.WriteBool(ctx, bool(p.IsAdjustedToUTC)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.isAdjustedToUTC (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:isAdjustedToUTC: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "isAdjustedToUTC", thrift.BOOL, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:isAdjustedToUTC: ", p), err) + } + if err := oprot.WriteBool(ctx, bool(p.IsAdjustedToUTC)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.isAdjustedToUTC (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:isAdjustedToUTC: ", p), err) + } + return err } func (p *TimestampType) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "unit", thrift.STRUCT, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:unit: ", p), err) } - if err := p.Unit.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Unit), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:unit: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "unit", thrift.STRUCT, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:unit: ", p), err) + } + if err := p.Unit.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Unit), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:unit: ", p), err) + } + return err } func (p *TimestampType) Equals(other *TimestampType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.IsAdjustedToUTC != other.IsAdjustedToUTC { return false } - if !p.Unit.Equals(other.Unit) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.IsAdjustedToUTC != other.IsAdjustedToUTC { + return false + } + if !p.Unit.Equals(other.Unit) { + return false + } + return true } func (p *TimestampType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("TimestampType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("TimestampType(%+v)", *p) } func (p *TimestampType) Validate() error { - return nil + return nil } + // Time logical type annotation -// +// // Allowed for physical types: INT32 (millis), INT64 (micros, nanos) -// +// // Attributes: -// - IsAdjustedToUTC -// - Unit +// - IsAdjustedToUTC +// - Unit type TimeType struct { - IsAdjustedToUTC bool `thrift:"isAdjustedToUTC,1,required" db:"isAdjustedToUTC" json:"isAdjustedToUTC"` - Unit *TimeUnit `thrift:"unit,2,required" db:"unit" json:"unit"` + IsAdjustedToUTC bool `thrift:"isAdjustedToUTC,1,required" db:"isAdjustedToUTC" json:"isAdjustedToUTC"` + Unit *TimeUnit `thrift:"unit,2,required" db:"unit" json:"unit"` } func NewTimeType() *TimeType { - return &TimeType{} + return &TimeType{} } - func (p *TimeType) GetIsAdjustedToUTC() bool { - return p.IsAdjustedToUTC + return p.IsAdjustedToUTC } + var TimeType_Unit_DEFAULT *TimeUnit + func (p *TimeType) GetUnit() *TimeUnit { - if !p.IsSetUnit() { - return TimeType_Unit_DEFAULT - } -return p.Unit + if !p.IsSetUnit() { + return TimeType_Unit_DEFAULT + } + return p.Unit } func (p *TimeType) IsSetUnit() bool { - return p.Unit != nil + return p.Unit != nil } func (p *TimeType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetIsAdjustedToUTC bool = false; - var issetUnit bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.BOOL { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetIsAdjustedToUTC = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetUnit = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetIsAdjustedToUTC{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field IsAdjustedToUTC is not set")); - } - if !issetUnit{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Unit is not set")); - } - return nil -} - -func (p *TimeType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBool(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.IsAdjustedToUTC = v -} - return nil -} - -func (p *TimeType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - p.Unit = &TimeUnit{} - if err := p.Unit.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Unit), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetIsAdjustedToUTC bool = false + var issetUnit bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.BOOL { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetIsAdjustedToUTC = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetUnit = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetIsAdjustedToUTC { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field IsAdjustedToUTC is not set")) + } + if !issetUnit { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Unit is not set")) + } + return nil +} + +func (p *TimeType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBool(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.IsAdjustedToUTC = v + } + return nil +} + +func (p *TimeType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + p.Unit = &TimeUnit{} + if err := p.Unit.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Unit), err) + } + return nil } func (p *TimeType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "TimeType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "TimeType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *TimeType) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "isAdjustedToUTC", thrift.BOOL, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:isAdjustedToUTC: ", p), err) } - if err := oprot.WriteBool(ctx, bool(p.IsAdjustedToUTC)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.isAdjustedToUTC (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:isAdjustedToUTC: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "isAdjustedToUTC", thrift.BOOL, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:isAdjustedToUTC: ", p), err) + } + if err := oprot.WriteBool(ctx, bool(p.IsAdjustedToUTC)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.isAdjustedToUTC (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:isAdjustedToUTC: ", p), err) + } + return err } func (p *TimeType) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "unit", thrift.STRUCT, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:unit: ", p), err) } - if err := p.Unit.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Unit), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:unit: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "unit", thrift.STRUCT, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:unit: ", p), err) + } + if err := p.Unit.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Unit), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:unit: ", p), err) + } + return err } func (p *TimeType) Equals(other *TimeType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.IsAdjustedToUTC != other.IsAdjustedToUTC { return false } - if !p.Unit.Equals(other.Unit) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.IsAdjustedToUTC != other.IsAdjustedToUTC { + return false + } + if !p.Unit.Equals(other.Unit) { + return false + } + return true } func (p *TimeType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("TimeType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("TimeType(%+v)", *p) } func (p *TimeType) Validate() error { - return nil + return nil } + // Integer logical type annotation -// +// // bitWidth must be 8, 16, 32, or 64. -// +// // Allowed for physical types: INT32, INT64 -// +// // Attributes: -// - BitWidth -// - IsSigned +// - BitWidth +// - IsSigned type IntType struct { - BitWidth int8 `thrift:"bitWidth,1,required" db:"bitWidth" json:"bitWidth"` - IsSigned bool `thrift:"isSigned,2,required" db:"isSigned" json:"isSigned"` + BitWidth int8 `thrift:"bitWidth,1,required" db:"bitWidth" json:"bitWidth"` + IsSigned bool `thrift:"isSigned,2,required" db:"isSigned" json:"isSigned"` } func NewIntType() *IntType { - return &IntType{} + return &IntType{} } - func (p *IntType) GetBitWidth() int8 { - return p.BitWidth + return p.BitWidth } func (p *IntType) GetIsSigned() bool { - return p.IsSigned + return p.IsSigned } func (p *IntType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetBitWidth bool = false; - var issetIsSigned bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.BYTE { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetBitWidth = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.BOOL { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetIsSigned = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetBitWidth{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field BitWidth is not set")); - } - if !issetIsSigned{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field IsSigned is not set")); - } - return nil -} - -func (p *IntType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadByte(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - temp := int8(v) - p.BitWidth = temp -} - return nil -} - -func (p *IntType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBool(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.IsSigned = v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetBitWidth bool = false + var issetIsSigned bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.BYTE { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetBitWidth = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.BOOL { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetIsSigned = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetBitWidth { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field BitWidth is not set")) + } + if !issetIsSigned { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field IsSigned is not set")) + } + return nil +} + +func (p *IntType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadByte(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + temp := int8(v) + p.BitWidth = temp + } + return nil +} + +func (p *IntType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBool(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.IsSigned = v + } + return nil } func (p *IntType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "IntType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "IntType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *IntType) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "bitWidth", thrift.BYTE, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:bitWidth: ", p), err) } - if err := oprot.WriteByte(ctx, int8(p.BitWidth)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.bitWidth (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:bitWidth: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "bitWidth", thrift.BYTE, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:bitWidth: ", p), err) + } + if err := oprot.WriteByte(ctx, int8(p.BitWidth)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.bitWidth (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:bitWidth: ", p), err) + } + return err } func (p *IntType) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "isSigned", thrift.BOOL, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:isSigned: ", p), err) } - if err := oprot.WriteBool(ctx, bool(p.IsSigned)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.isSigned (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:isSigned: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "isSigned", thrift.BOOL, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:isSigned: ", p), err) + } + if err := oprot.WriteBool(ctx, bool(p.IsSigned)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.isSigned (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:isSigned: ", p), err) + } + return err } func (p *IntType) Equals(other *IntType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.BitWidth != other.BitWidth { return false } - if p.IsSigned != other.IsSigned { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.BitWidth != other.BitWidth { + return false + } + if p.IsSigned != other.IsSigned { + return false + } + return true } func (p *IntType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("IntType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("IntType(%+v)", *p) } func (p *IntType) Validate() error { - return nil + return nil } + // Embedded JSON logical type annotation -// +// // Allowed for physical types: BINARY type JsonType struct { } func NewJsonType() *JsonType { - return &JsonType{} + return &JsonType{} } func (p *JsonType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *JsonType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "JsonType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "JsonType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *JsonType) Equals(other *JsonType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *JsonType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("JsonType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("JsonType(%+v)", *p) } func (p *JsonType) Validate() error { - return nil + return nil } + // Embedded BSON logical type annotation -// +// // Allowed for physical types: BINARY type BsonType struct { } func NewBsonType() *BsonType { - return &BsonType{} + return &BsonType{} } func (p *BsonType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *BsonType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "BsonType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "BsonType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *BsonType) Equals(other *BsonType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *BsonType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("BsonType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("BsonType(%+v)", *p) } func (p *BsonType) Validate() error { - return nil + return nil } + // LogicalType annotations to replace ConvertedType. -// +// // To maintain compatibility, implementations using LogicalType for a // SchemaElement must also set the corresponding ConvertedType (if any) // from the following table. -// +// // Attributes: -// - STRING -// - MAP -// - LIST -// - ENUM -// - DECIMAL -// - DATE -// - TIME -// - TIMESTAMP -// - INTEGER -// - UNKNOWN -// - JSON -// - BSON -// - UUID -// - FLOAT16 +// - STRING +// - MAP +// - LIST +// - ENUM +// - DECIMAL +// - DATE +// - TIME +// - TIMESTAMP +// - INTEGER +// - UNKNOWN +// - JSON +// - BSON +// - UUID +// - FLOAT16 type LogicalType struct { - STRING *StringType `thrift:"STRING,1" db:"STRING" json:"STRING,omitempty"` - MAP *MapType `thrift:"MAP,2" db:"MAP" json:"MAP,omitempty"` - LIST *ListType `thrift:"LIST,3" db:"LIST" json:"LIST,omitempty"` - ENUM *EnumType `thrift:"ENUM,4" db:"ENUM" json:"ENUM,omitempty"` - DECIMAL *DecimalType `thrift:"DECIMAL,5" db:"DECIMAL" json:"DECIMAL,omitempty"` - DATE *DateType `thrift:"DATE,6" db:"DATE" json:"DATE,omitempty"` - TIME *TimeType `thrift:"TIME,7" db:"TIME" json:"TIME,omitempty"` - TIMESTAMP *TimestampType `thrift:"TIMESTAMP,8" db:"TIMESTAMP" json:"TIMESTAMP,omitempty"` - // unused field # 9 - INTEGER *IntType `thrift:"INTEGER,10" db:"INTEGER" json:"INTEGER,omitempty"` - UNKNOWN *NullType `thrift:"UNKNOWN,11" db:"UNKNOWN" json:"UNKNOWN,omitempty"` - JSON *JsonType `thrift:"JSON,12" db:"JSON" json:"JSON,omitempty"` - BSON *BsonType `thrift:"BSON,13" db:"BSON" json:"BSON,omitempty"` - UUID *UUIDType `thrift:"UUID,14" db:"UUID" json:"UUID,omitempty"` - FLOAT16 *Float16Type `thrift:"FLOAT16,15" db:"FLOAT16" json:"FLOAT16,omitempty"` + STRING *StringType `thrift:"STRING,1" db:"STRING" json:"STRING,omitempty"` + MAP *MapType `thrift:"MAP,2" db:"MAP" json:"MAP,omitempty"` + LIST *ListType `thrift:"LIST,3" db:"LIST" json:"LIST,omitempty"` + ENUM *EnumType `thrift:"ENUM,4" db:"ENUM" json:"ENUM,omitempty"` + DECIMAL *DecimalType `thrift:"DECIMAL,5" db:"DECIMAL" json:"DECIMAL,omitempty"` + DATE *DateType `thrift:"DATE,6" db:"DATE" json:"DATE,omitempty"` + TIME *TimeType `thrift:"TIME,7" db:"TIME" json:"TIME,omitempty"` + TIMESTAMP *TimestampType `thrift:"TIMESTAMP,8" db:"TIMESTAMP" json:"TIMESTAMP,omitempty"` + // unused field # 9 + INTEGER *IntType `thrift:"INTEGER,10" db:"INTEGER" json:"INTEGER,omitempty"` + UNKNOWN *NullType `thrift:"UNKNOWN,11" db:"UNKNOWN" json:"UNKNOWN,omitempty"` + JSON *JsonType `thrift:"JSON,12" db:"JSON" json:"JSON,omitempty"` + BSON *BsonType `thrift:"BSON,13" db:"BSON" json:"BSON,omitempty"` + UUID *UUIDType `thrift:"UUID,14" db:"UUID" json:"UUID,omitempty"` + FLOAT16 *Float16Type `thrift:"FLOAT16,15" db:"FLOAT16" json:"FLOAT16,omitempty"` } func NewLogicalType() *LogicalType { - return &LogicalType{} + return &LogicalType{} } var LogicalType_STRING_DEFAULT *StringType + func (p *LogicalType) GetSTRING() *StringType { - if !p.IsSetSTRING() { - return LogicalType_STRING_DEFAULT - } -return p.STRING + if !p.IsSetSTRING() { + return LogicalType_STRING_DEFAULT + } + return p.STRING } + var LogicalType_MAP_DEFAULT *MapType + func (p *LogicalType) GetMAP() *MapType { - if !p.IsSetMAP() { - return LogicalType_MAP_DEFAULT - } -return p.MAP + if !p.IsSetMAP() { + return LogicalType_MAP_DEFAULT + } + return p.MAP } + var LogicalType_LIST_DEFAULT *ListType + func (p *LogicalType) GetLIST() *ListType { - if !p.IsSetLIST() { - return LogicalType_LIST_DEFAULT - } -return p.LIST + if !p.IsSetLIST() { + return LogicalType_LIST_DEFAULT + } + return p.LIST } + var LogicalType_ENUM_DEFAULT *EnumType + func (p *LogicalType) GetENUM() *EnumType { - if !p.IsSetENUM() { - return LogicalType_ENUM_DEFAULT - } -return p.ENUM + if !p.IsSetENUM() { + return LogicalType_ENUM_DEFAULT + } + return p.ENUM } + var LogicalType_DECIMAL_DEFAULT *DecimalType + func (p *LogicalType) GetDECIMAL() *DecimalType { - if !p.IsSetDECIMAL() { - return LogicalType_DECIMAL_DEFAULT - } -return p.DECIMAL + if !p.IsSetDECIMAL() { + return LogicalType_DECIMAL_DEFAULT + } + return p.DECIMAL } + var LogicalType_DATE_DEFAULT *DateType + func (p *LogicalType) GetDATE() *DateType { - if !p.IsSetDATE() { - return LogicalType_DATE_DEFAULT - } -return p.DATE + if !p.IsSetDATE() { + return LogicalType_DATE_DEFAULT + } + return p.DATE } + var LogicalType_TIME_DEFAULT *TimeType + func (p *LogicalType) GetTIME() *TimeType { - if !p.IsSetTIME() { - return LogicalType_TIME_DEFAULT - } -return p.TIME + if !p.IsSetTIME() { + return LogicalType_TIME_DEFAULT + } + return p.TIME } + var LogicalType_TIMESTAMP_DEFAULT *TimestampType + func (p *LogicalType) GetTIMESTAMP() *TimestampType { - if !p.IsSetTIMESTAMP() { - return LogicalType_TIMESTAMP_DEFAULT - } -return p.TIMESTAMP + if !p.IsSetTIMESTAMP() { + return LogicalType_TIMESTAMP_DEFAULT + } + return p.TIMESTAMP } + var LogicalType_INTEGER_DEFAULT *IntType + func (p *LogicalType) GetINTEGER() *IntType { - if !p.IsSetINTEGER() { - return LogicalType_INTEGER_DEFAULT - } -return p.INTEGER + if !p.IsSetINTEGER() { + return LogicalType_INTEGER_DEFAULT + } + return p.INTEGER } + var LogicalType_UNKNOWN_DEFAULT *NullType + func (p *LogicalType) GetUNKNOWN() *NullType { - if !p.IsSetUNKNOWN() { - return LogicalType_UNKNOWN_DEFAULT - } -return p.UNKNOWN + if !p.IsSetUNKNOWN() { + return LogicalType_UNKNOWN_DEFAULT + } + return p.UNKNOWN } + var LogicalType_JSON_DEFAULT *JsonType + func (p *LogicalType) GetJSON() *JsonType { - if !p.IsSetJSON() { - return LogicalType_JSON_DEFAULT - } -return p.JSON + if !p.IsSetJSON() { + return LogicalType_JSON_DEFAULT + } + return p.JSON } + var LogicalType_BSON_DEFAULT *BsonType + func (p *LogicalType) GetBSON() *BsonType { - if !p.IsSetBSON() { - return LogicalType_BSON_DEFAULT - } -return p.BSON + if !p.IsSetBSON() { + return LogicalType_BSON_DEFAULT + } + return p.BSON } + var LogicalType_UUID_DEFAULT *UUIDType + func (p *LogicalType) GetUUID() *UUIDType { - if !p.IsSetUUID() { - return LogicalType_UUID_DEFAULT - } -return p.UUID + if !p.IsSetUUID() { + return LogicalType_UUID_DEFAULT + } + return p.UUID } + var LogicalType_FLOAT16_DEFAULT *Float16Type + func (p *LogicalType) GetFLOAT16() *Float16Type { - if !p.IsSetFLOAT16() { - return LogicalType_FLOAT16_DEFAULT - } -return p.FLOAT16 + if !p.IsSetFLOAT16() { + return LogicalType_FLOAT16_DEFAULT + } + return p.FLOAT16 } func (p *LogicalType) CountSetFieldsLogicalType() int { - count := 0 - if (p.IsSetSTRING()) { - count++ - } - if (p.IsSetMAP()) { - count++ - } - if (p.IsSetLIST()) { - count++ - } - if (p.IsSetENUM()) { - count++ - } - if (p.IsSetDECIMAL()) { - count++ - } - if (p.IsSetDATE()) { - count++ - } - if (p.IsSetTIME()) { - count++ - } - if (p.IsSetTIMESTAMP()) { - count++ - } - if (p.IsSetINTEGER()) { - count++ - } - if (p.IsSetUNKNOWN()) { - count++ - } - if (p.IsSetJSON()) { - count++ - } - if (p.IsSetBSON()) { - count++ - } - if (p.IsSetUUID()) { - count++ - } - if (p.IsSetFLOAT16()) { - count++ - } - return count + count := 0 + if p.IsSetSTRING() { + count++ + } + if p.IsSetMAP() { + count++ + } + if p.IsSetLIST() { + count++ + } + if p.IsSetENUM() { + count++ + } + if p.IsSetDECIMAL() { + count++ + } + if p.IsSetDATE() { + count++ + } + if p.IsSetTIME() { + count++ + } + if p.IsSetTIMESTAMP() { + count++ + } + if p.IsSetINTEGER() { + count++ + } + if p.IsSetUNKNOWN() { + count++ + } + if p.IsSetJSON() { + count++ + } + if p.IsSetBSON() { + count++ + } + if p.IsSetUUID() { + count++ + } + if p.IsSetFLOAT16() { + count++ + } + return count } func (p *LogicalType) IsSetSTRING() bool { - return p.STRING != nil + return p.STRING != nil } func (p *LogicalType) IsSetMAP() bool { - return p.MAP != nil + return p.MAP != nil } func (p *LogicalType) IsSetLIST() bool { - return p.LIST != nil + return p.LIST != nil } func (p *LogicalType) IsSetENUM() bool { - return p.ENUM != nil + return p.ENUM != nil } func (p *LogicalType) IsSetDECIMAL() bool { - return p.DECIMAL != nil + return p.DECIMAL != nil } func (p *LogicalType) IsSetDATE() bool { - return p.DATE != nil + return p.DATE != nil } func (p *LogicalType) IsSetTIME() bool { - return p.TIME != nil + return p.TIME != nil } func (p *LogicalType) IsSetTIMESTAMP() bool { - return p.TIMESTAMP != nil + return p.TIMESTAMP != nil } func (p *LogicalType) IsSetINTEGER() bool { - return p.INTEGER != nil + return p.INTEGER != nil } func (p *LogicalType) IsSetUNKNOWN() bool { - return p.UNKNOWN != nil + return p.UNKNOWN != nil } func (p *LogicalType) IsSetJSON() bool { - return p.JSON != nil + return p.JSON != nil } func (p *LogicalType) IsSetBSON() bool { - return p.BSON != nil + return p.BSON != nil } func (p *LogicalType) IsSetUUID() bool { - return p.UUID != nil + return p.UUID != nil } func (p *LogicalType) IsSetFLOAT16() bool { - return p.FLOAT16 != nil + return p.FLOAT16 != nil } func (p *LogicalType) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 4: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField4(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 5: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField5(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 6: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField6(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 7: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField7(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 8: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField8(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 10: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField10(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 11: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField11(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 12: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField12(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 13: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField13(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 14: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField14(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 15: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField15(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *LogicalType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - p.STRING = &StringType{} - if err := p.STRING.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.STRING), err) - } - return nil -} - -func (p *LogicalType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - p.MAP = &MapType{} - if err := p.MAP.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MAP), err) - } - return nil -} - -func (p *LogicalType) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - p.LIST = &ListType{} - if err := p.LIST.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.LIST), err) - } - return nil -} - -func (p *LogicalType) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { - p.ENUM = &EnumType{} - if err := p.ENUM.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.ENUM), err) - } - return nil -} - -func (p *LogicalType) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { - p.DECIMAL = &DecimalType{} - if err := p.DECIMAL.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DECIMAL), err) - } - return nil -} - -func (p *LogicalType) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { - p.DATE = &DateType{} - if err := p.DATE.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DATE), err) - } - return nil -} - -func (p *LogicalType) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { - p.TIME = &TimeType{} - if err := p.TIME.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.TIME), err) - } - return nil -} - -func (p *LogicalType) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { - p.TIMESTAMP = &TimestampType{} - if err := p.TIMESTAMP.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.TIMESTAMP), err) - } - return nil -} - -func (p *LogicalType) ReadField10(ctx context.Context, iprot thrift.TProtocol) error { - p.INTEGER = &IntType{} - if err := p.INTEGER.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.INTEGER), err) - } - return nil -} - -func (p *LogicalType) ReadField11(ctx context.Context, iprot thrift.TProtocol) error { - p.UNKNOWN = &NullType{} - if err := p.UNKNOWN.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.UNKNOWN), err) - } - return nil -} - -func (p *LogicalType) ReadField12(ctx context.Context, iprot thrift.TProtocol) error { - p.JSON = &JsonType{} - if err := p.JSON.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.JSON), err) - } - return nil -} - -func (p *LogicalType) ReadField13(ctx context.Context, iprot thrift.TProtocol) error { - p.BSON = &BsonType{} - if err := p.BSON.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.BSON), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 4: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField4(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 5: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField5(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 6: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField6(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 7: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField7(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 8: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField8(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 10: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField10(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 11: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField11(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 12: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField12(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 13: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField13(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 14: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField14(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 15: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField15(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *LogicalType) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + p.STRING = &StringType{} + if err := p.STRING.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.STRING), err) + } + return nil +} + +func (p *LogicalType) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + p.MAP = &MapType{} + if err := p.MAP.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MAP), err) + } + return nil +} + +func (p *LogicalType) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + p.LIST = &ListType{} + if err := p.LIST.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.LIST), err) + } + return nil +} + +func (p *LogicalType) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { + p.ENUM = &EnumType{} + if err := p.ENUM.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.ENUM), err) + } + return nil +} + +func (p *LogicalType) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { + p.DECIMAL = &DecimalType{} + if err := p.DECIMAL.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DECIMAL), err) + } + return nil +} + +func (p *LogicalType) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { + p.DATE = &DateType{} + if err := p.DATE.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DATE), err) + } + return nil +} + +func (p *LogicalType) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { + p.TIME = &TimeType{} + if err := p.TIME.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.TIME), err) + } + return nil +} + +func (p *LogicalType) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { + p.TIMESTAMP = &TimestampType{} + if err := p.TIMESTAMP.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.TIMESTAMP), err) + } + return nil +} + +func (p *LogicalType) ReadField10(ctx context.Context, iprot thrift.TProtocol) error { + p.INTEGER = &IntType{} + if err := p.INTEGER.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.INTEGER), err) + } + return nil +} + +func (p *LogicalType) ReadField11(ctx context.Context, iprot thrift.TProtocol) error { + p.UNKNOWN = &NullType{} + if err := p.UNKNOWN.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.UNKNOWN), err) + } + return nil +} + +func (p *LogicalType) ReadField12(ctx context.Context, iprot thrift.TProtocol) error { + p.JSON = &JsonType{} + if err := p.JSON.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.JSON), err) + } + return nil +} + +func (p *LogicalType) ReadField13(ctx context.Context, iprot thrift.TProtocol) error { + p.BSON = &BsonType{} + if err := p.BSON.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.BSON), err) + } + return nil } -func (p *LogicalType) ReadField14(ctx context.Context, iprot thrift.TProtocol) error { - p.UUID = &UUIDType{} - if err := p.UUID.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.UUID), err) - } - return nil +func (p *LogicalType) ReadField14(ctx context.Context, iprot thrift.TProtocol) error { + p.UUID = &UUIDType{} + if err := p.UUID.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.UUID), err) + } + return nil } -func (p *LogicalType) ReadField15(ctx context.Context, iprot thrift.TProtocol) error { - p.FLOAT16 = &Float16Type{} - if err := p.FLOAT16.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.FLOAT16), err) - } - return nil +func (p *LogicalType) ReadField15(ctx context.Context, iprot thrift.TProtocol) error { + p.FLOAT16 = &Float16Type{} + if err := p.FLOAT16.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.FLOAT16), err) + } + return nil } func (p *LogicalType) Write(ctx context.Context, oprot thrift.TProtocol) error { - if c := p.CountSetFieldsLogicalType(); c != 1 { - return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) - } - if err := oprot.WriteStructBegin(ctx, "LogicalType"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - if err := p.writeField4(ctx, oprot); err != nil { return err } - if err := p.writeField5(ctx, oprot); err != nil { return err } - if err := p.writeField6(ctx, oprot); err != nil { return err } - if err := p.writeField7(ctx, oprot); err != nil { return err } - if err := p.writeField8(ctx, oprot); err != nil { return err } - if err := p.writeField10(ctx, oprot); err != nil { return err } - if err := p.writeField11(ctx, oprot); err != nil { return err } - if err := p.writeField12(ctx, oprot); err != nil { return err } - if err := p.writeField13(ctx, oprot); err != nil { return err } - if err := p.writeField14(ctx, oprot); err != nil { return err } - if err := p.writeField15(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if c := p.CountSetFieldsLogicalType(); c != 1 { + return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) + } + if err := oprot.WriteStructBegin(ctx, "LogicalType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + if err := p.writeField4(ctx, oprot); err != nil { + return err + } + if err := p.writeField5(ctx, oprot); err != nil { + return err + } + if err := p.writeField6(ctx, oprot); err != nil { + return err + } + if err := p.writeField7(ctx, oprot); err != nil { + return err + } + if err := p.writeField8(ctx, oprot); err != nil { + return err + } + if err := p.writeField10(ctx, oprot); err != nil { + return err + } + if err := p.writeField11(ctx, oprot); err != nil { + return err + } + if err := p.writeField12(ctx, oprot); err != nil { + return err + } + if err := p.writeField13(ctx, oprot); err != nil { + return err + } + if err := p.writeField14(ctx, oprot); err != nil { + return err + } + if err := p.writeField15(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *LogicalType) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetSTRING() { - if err := oprot.WriteFieldBegin(ctx, "STRING", thrift.STRUCT, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:STRING: ", p), err) } - if err := p.STRING.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.STRING), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:STRING: ", p), err) } - } - return err + if p.IsSetSTRING() { + if err := oprot.WriteFieldBegin(ctx, "STRING", thrift.STRUCT, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:STRING: ", p), err) + } + if err := p.STRING.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.STRING), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:STRING: ", p), err) + } + } + return err } func (p *LogicalType) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetMAP() { - if err := oprot.WriteFieldBegin(ctx, "MAP", thrift.STRUCT, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:MAP: ", p), err) } - if err := p.MAP.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MAP), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:MAP: ", p), err) } - } - return err + if p.IsSetMAP() { + if err := oprot.WriteFieldBegin(ctx, "MAP", thrift.STRUCT, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:MAP: ", p), err) + } + if err := p.MAP.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MAP), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:MAP: ", p), err) + } + } + return err } func (p *LogicalType) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetLIST() { - if err := oprot.WriteFieldBegin(ctx, "LIST", thrift.STRUCT, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:LIST: ", p), err) } - if err := p.LIST.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.LIST), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:LIST: ", p), err) } - } - return err + if p.IsSetLIST() { + if err := oprot.WriteFieldBegin(ctx, "LIST", thrift.STRUCT, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:LIST: ", p), err) + } + if err := p.LIST.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.LIST), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:LIST: ", p), err) + } + } + return err } func (p *LogicalType) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetENUM() { - if err := oprot.WriteFieldBegin(ctx, "ENUM", thrift.STRUCT, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:ENUM: ", p), err) } - if err := p.ENUM.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.ENUM), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:ENUM: ", p), err) } - } - return err + if p.IsSetENUM() { + if err := oprot.WriteFieldBegin(ctx, "ENUM", thrift.STRUCT, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:ENUM: ", p), err) + } + if err := p.ENUM.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.ENUM), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:ENUM: ", p), err) + } + } + return err } func (p *LogicalType) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetDECIMAL() { - if err := oprot.WriteFieldBegin(ctx, "DECIMAL", thrift.STRUCT, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:DECIMAL: ", p), err) } - if err := p.DECIMAL.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DECIMAL), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:DECIMAL: ", p), err) } - } - return err + if p.IsSetDECIMAL() { + if err := oprot.WriteFieldBegin(ctx, "DECIMAL", thrift.STRUCT, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:DECIMAL: ", p), err) + } + if err := p.DECIMAL.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DECIMAL), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:DECIMAL: ", p), err) + } + } + return err } func (p *LogicalType) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetDATE() { - if err := oprot.WriteFieldBegin(ctx, "DATE", thrift.STRUCT, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:DATE: ", p), err) } - if err := p.DATE.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DATE), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:DATE: ", p), err) } - } - return err + if p.IsSetDATE() { + if err := oprot.WriteFieldBegin(ctx, "DATE", thrift.STRUCT, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:DATE: ", p), err) + } + if err := p.DATE.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DATE), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:DATE: ", p), err) + } + } + return err } func (p *LogicalType) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetTIME() { - if err := oprot.WriteFieldBegin(ctx, "TIME", thrift.STRUCT, 7); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:TIME: ", p), err) } - if err := p.TIME.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.TIME), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 7:TIME: ", p), err) } - } - return err + if p.IsSetTIME() { + if err := oprot.WriteFieldBegin(ctx, "TIME", thrift.STRUCT, 7); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:TIME: ", p), err) + } + if err := p.TIME.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.TIME), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 7:TIME: ", p), err) + } + } + return err } func (p *LogicalType) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetTIMESTAMP() { - if err := oprot.WriteFieldBegin(ctx, "TIMESTAMP", thrift.STRUCT, 8); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:TIMESTAMP: ", p), err) } - if err := p.TIMESTAMP.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.TIMESTAMP), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 8:TIMESTAMP: ", p), err) } - } - return err + if p.IsSetTIMESTAMP() { + if err := oprot.WriteFieldBegin(ctx, "TIMESTAMP", thrift.STRUCT, 8); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:TIMESTAMP: ", p), err) + } + if err := p.TIMESTAMP.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.TIMESTAMP), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 8:TIMESTAMP: ", p), err) + } + } + return err } func (p *LogicalType) writeField10(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetINTEGER() { - if err := oprot.WriteFieldBegin(ctx, "INTEGER", thrift.STRUCT, 10); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:INTEGER: ", p), err) } - if err := p.INTEGER.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.INTEGER), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 10:INTEGER: ", p), err) } - } - return err + if p.IsSetINTEGER() { + if err := oprot.WriteFieldBegin(ctx, "INTEGER", thrift.STRUCT, 10); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:INTEGER: ", p), err) + } + if err := p.INTEGER.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.INTEGER), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 10:INTEGER: ", p), err) + } + } + return err } func (p *LogicalType) writeField11(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetUNKNOWN() { - if err := oprot.WriteFieldBegin(ctx, "UNKNOWN", thrift.STRUCT, 11); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 11:UNKNOWN: ", p), err) } - if err := p.UNKNOWN.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.UNKNOWN), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 11:UNKNOWN: ", p), err) } - } - return err + if p.IsSetUNKNOWN() { + if err := oprot.WriteFieldBegin(ctx, "UNKNOWN", thrift.STRUCT, 11); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 11:UNKNOWN: ", p), err) + } + if err := p.UNKNOWN.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.UNKNOWN), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 11:UNKNOWN: ", p), err) + } + } + return err } func (p *LogicalType) writeField12(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetJSON() { - if err := oprot.WriteFieldBegin(ctx, "JSON", thrift.STRUCT, 12); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 12:JSON: ", p), err) } - if err := p.JSON.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.JSON), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 12:JSON: ", p), err) } - } - return err + if p.IsSetJSON() { + if err := oprot.WriteFieldBegin(ctx, "JSON", thrift.STRUCT, 12); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 12:JSON: ", p), err) + } + if err := p.JSON.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.JSON), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 12:JSON: ", p), err) + } + } + return err } func (p *LogicalType) writeField13(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetBSON() { - if err := oprot.WriteFieldBegin(ctx, "BSON", thrift.STRUCT, 13); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 13:BSON: ", p), err) } - if err := p.BSON.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.BSON), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 13:BSON: ", p), err) } - } - return err + if p.IsSetBSON() { + if err := oprot.WriteFieldBegin(ctx, "BSON", thrift.STRUCT, 13); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 13:BSON: ", p), err) + } + if err := p.BSON.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.BSON), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 13:BSON: ", p), err) + } + } + return err } func (p *LogicalType) writeField14(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetUUID() { - if err := oprot.WriteFieldBegin(ctx, "UUID", thrift.STRUCT, 14); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 14:UUID: ", p), err) } - if err := p.UUID.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.UUID), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 14:UUID: ", p), err) } - } - return err + if p.IsSetUUID() { + if err := oprot.WriteFieldBegin(ctx, "UUID", thrift.STRUCT, 14); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 14:UUID: ", p), err) + } + if err := p.UUID.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.UUID), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 14:UUID: ", p), err) + } + } + return err } func (p *LogicalType) writeField15(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetFLOAT16() { - if err := oprot.WriteFieldBegin(ctx, "FLOAT16", thrift.STRUCT, 15); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 15:FLOAT16: ", p), err) } - if err := p.FLOAT16.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.FLOAT16), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 15:FLOAT16: ", p), err) } - } - return err + if p.IsSetFLOAT16() { + if err := oprot.WriteFieldBegin(ctx, "FLOAT16", thrift.STRUCT, 15); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 15:FLOAT16: ", p), err) + } + if err := p.FLOAT16.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.FLOAT16), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 15:FLOAT16: ", p), err) + } + } + return err } func (p *LogicalType) Equals(other *LogicalType) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if !p.STRING.Equals(other.STRING) { return false } - if !p.MAP.Equals(other.MAP) { return false } - if !p.LIST.Equals(other.LIST) { return false } - if !p.ENUM.Equals(other.ENUM) { return false } - if !p.DECIMAL.Equals(other.DECIMAL) { return false } - if !p.DATE.Equals(other.DATE) { return false } - if !p.TIME.Equals(other.TIME) { return false } - if !p.TIMESTAMP.Equals(other.TIMESTAMP) { return false } - if !p.INTEGER.Equals(other.INTEGER) { return false } - if !p.UNKNOWN.Equals(other.UNKNOWN) { return false } - if !p.JSON.Equals(other.JSON) { return false } - if !p.BSON.Equals(other.BSON) { return false } - if !p.UUID.Equals(other.UUID) { return false } - if !p.FLOAT16.Equals(other.FLOAT16) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if !p.STRING.Equals(other.STRING) { + return false + } + if !p.MAP.Equals(other.MAP) { + return false + } + if !p.LIST.Equals(other.LIST) { + return false + } + if !p.ENUM.Equals(other.ENUM) { + return false + } + if !p.DECIMAL.Equals(other.DECIMAL) { + return false + } + if !p.DATE.Equals(other.DATE) { + return false + } + if !p.TIME.Equals(other.TIME) { + return false + } + if !p.TIMESTAMP.Equals(other.TIMESTAMP) { + return false + } + if !p.INTEGER.Equals(other.INTEGER) { + return false + } + if !p.UNKNOWN.Equals(other.UNKNOWN) { + return false + } + if !p.JSON.Equals(other.JSON) { + return false + } + if !p.BSON.Equals(other.BSON) { + return false + } + if !p.UUID.Equals(other.UUID) { + return false + } + if !p.FLOAT16.Equals(other.FLOAT16) { + return false + } + return true } func (p *LogicalType) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("LogicalType(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("LogicalType(%+v)", *p) } func (p *LogicalType) Validate() error { - return nil + return nil } + // Represents a element inside a schema definition. -// - if it is a group (inner node) then type is undefined and num_children is defined -// - if it is a primitive type (leaf) then type is defined and num_children is undefined +// - if it is a group (inner node) then type is undefined and num_children is defined +// - if it is a primitive type (leaf) then type is defined and num_children is undefined +// // the nodes are listed in depth first traversal order. -// +// // Attributes: -// - Type: Data type for this field. Not set if the current element is a non-leaf node -// - TypeLength: If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the values. +// - Type: Data type for this field. Not set if the current element is a non-leaf node +// - TypeLength: If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the values. +// // Otherwise, if specified, this is the maximum bit length to store any of the values. // (e.g. a low cardinality INT col could have this set to 3). Note that this is // in the schema, and therefore fixed for the entire file. -// - RepetitionType: repetition of the field. The root of the schema does not have a repetition_type. +// - RepetitionType: repetition of the field. The root of the schema does not have a repetition_type. +// // All other nodes must have one -// - Name: Name of the field in the schema -// - NumChildren: Nested fields. Since thrift does not support nested fields, +// - Name: Name of the field in the schema +// - NumChildren: Nested fields. Since thrift does not support nested fields, +// // the nesting is flattened to a single list by a depth-first traversal. // The children count is used to construct the nested relationship. // This field is not set when the element is a primitive type -// - ConvertedType: DEPRECATED: When the schema is the result of a conversion from another model. +// - ConvertedType: DEPRECATED: When the schema is the result of a conversion from another model. +// // Used to record the original type to help with cross conversion. -// +// // This is superseded by logicalType. -// - Scale: DEPRECATED: Used when this column contains decimal data. +// - Scale: DEPRECATED: Used when this column contains decimal data. +// // See the DECIMAL converted type for more details. -// +// // This is superseded by using the DecimalType annotation in logicalType. -// - Precision -// - FieldID: When the original schema supports field ids, this will save the +// - Precision +// - FieldID: When the original schema supports field ids, this will save the +// // original field id in the parquet schema -// - LogicalType: The logical type of this SchemaElement -// +// - LogicalType: The logical type of this SchemaElement +// // LogicalType replaces ConvertedType, but ConvertedType is still required // for some logical types to ensure forward-compatibility in format v1. type SchemaElement struct { - Type *Type `thrift:"type,1" db:"type" json:"type,omitempty"` - TypeLength *int32 `thrift:"type_length,2" db:"type_length" json:"type_length,omitempty"` - RepetitionType *FieldRepetitionType `thrift:"repetition_type,3" db:"repetition_type" json:"repetition_type,omitempty"` - Name string `thrift:"name,4,required" db:"name" json:"name"` - NumChildren *int32 `thrift:"num_children,5" db:"num_children" json:"num_children,omitempty"` - ConvertedType *ConvertedType `thrift:"converted_type,6" db:"converted_type" json:"converted_type,omitempty"` - Scale *int32 `thrift:"scale,7" db:"scale" json:"scale,omitempty"` - Precision *int32 `thrift:"precision,8" db:"precision" json:"precision,omitempty"` - FieldID *int32 `thrift:"field_id,9" db:"field_id" json:"field_id,omitempty"` - LogicalType *LogicalType `thrift:"logicalType,10" db:"logicalType" json:"logicalType,omitempty"` + Type *Type `thrift:"type,1" db:"type" json:"type,omitempty"` + TypeLength *int32 `thrift:"type_length,2" db:"type_length" json:"type_length,omitempty"` + RepetitionType *FieldRepetitionType `thrift:"repetition_type,3" db:"repetition_type" json:"repetition_type,omitempty"` + Name string `thrift:"name,4,required" db:"name" json:"name"` + NumChildren *int32 `thrift:"num_children,5" db:"num_children" json:"num_children,omitempty"` + ConvertedType *ConvertedType `thrift:"converted_type,6" db:"converted_type" json:"converted_type,omitempty"` + Scale *int32 `thrift:"scale,7" db:"scale" json:"scale,omitempty"` + Precision *int32 `thrift:"precision,8" db:"precision" json:"precision,omitempty"` + FieldID *int32 `thrift:"field_id,9" db:"field_id" json:"field_id,omitempty"` + LogicalType *LogicalType `thrift:"logicalType,10" db:"logicalType" json:"logicalType,omitempty"` } func NewSchemaElement() *SchemaElement { - return &SchemaElement{} + return &SchemaElement{} } var SchemaElement_Type_DEFAULT Type + func (p *SchemaElement) GetType() Type { - if !p.IsSetType() { - return SchemaElement_Type_DEFAULT - } -return *p.Type + if !p.IsSetType() { + return SchemaElement_Type_DEFAULT + } + return *p.Type } + var SchemaElement_TypeLength_DEFAULT int32 + func (p *SchemaElement) GetTypeLength() int32 { - if !p.IsSetTypeLength() { - return SchemaElement_TypeLength_DEFAULT - } -return *p.TypeLength + if !p.IsSetTypeLength() { + return SchemaElement_TypeLength_DEFAULT + } + return *p.TypeLength } + var SchemaElement_RepetitionType_DEFAULT FieldRepetitionType + func (p *SchemaElement) GetRepetitionType() FieldRepetitionType { - if !p.IsSetRepetitionType() { - return SchemaElement_RepetitionType_DEFAULT - } -return *p.RepetitionType + if !p.IsSetRepetitionType() { + return SchemaElement_RepetitionType_DEFAULT + } + return *p.RepetitionType } func (p *SchemaElement) GetName() string { - return p.Name + return p.Name } + var SchemaElement_NumChildren_DEFAULT int32 + func (p *SchemaElement) GetNumChildren() int32 { - if !p.IsSetNumChildren() { - return SchemaElement_NumChildren_DEFAULT - } -return *p.NumChildren + if !p.IsSetNumChildren() { + return SchemaElement_NumChildren_DEFAULT + } + return *p.NumChildren } + var SchemaElement_ConvertedType_DEFAULT ConvertedType + func (p *SchemaElement) GetConvertedType() ConvertedType { - if !p.IsSetConvertedType() { - return SchemaElement_ConvertedType_DEFAULT - } -return *p.ConvertedType + if !p.IsSetConvertedType() { + return SchemaElement_ConvertedType_DEFAULT + } + return *p.ConvertedType } + var SchemaElement_Scale_DEFAULT int32 + func (p *SchemaElement) GetScale() int32 { - if !p.IsSetScale() { - return SchemaElement_Scale_DEFAULT - } -return *p.Scale + if !p.IsSetScale() { + return SchemaElement_Scale_DEFAULT + } + return *p.Scale } + var SchemaElement_Precision_DEFAULT int32 + func (p *SchemaElement) GetPrecision() int32 { - if !p.IsSetPrecision() { - return SchemaElement_Precision_DEFAULT - } -return *p.Precision + if !p.IsSetPrecision() { + return SchemaElement_Precision_DEFAULT + } + return *p.Precision } + var SchemaElement_FieldID_DEFAULT int32 + func (p *SchemaElement) GetFieldID() int32 { - if !p.IsSetFieldID() { - return SchemaElement_FieldID_DEFAULT - } -return *p.FieldID + if !p.IsSetFieldID() { + return SchemaElement_FieldID_DEFAULT + } + return *p.FieldID } + var SchemaElement_LogicalType_DEFAULT *LogicalType + func (p *SchemaElement) GetLogicalType() *LogicalType { - if !p.IsSetLogicalType() { - return SchemaElement_LogicalType_DEFAULT - } -return p.LogicalType + if !p.IsSetLogicalType() { + return SchemaElement_LogicalType_DEFAULT + } + return p.LogicalType } func (p *SchemaElement) IsSetType() bool { - return p.Type != nil + return p.Type != nil } func (p *SchemaElement) IsSetTypeLength() bool { - return p.TypeLength != nil + return p.TypeLength != nil } func (p *SchemaElement) IsSetRepetitionType() bool { - return p.RepetitionType != nil + return p.RepetitionType != nil } func (p *SchemaElement) IsSetNumChildren() bool { - return p.NumChildren != nil + return p.NumChildren != nil } func (p *SchemaElement) IsSetConvertedType() bool { - return p.ConvertedType != nil + return p.ConvertedType != nil } func (p *SchemaElement) IsSetScale() bool { - return p.Scale != nil + return p.Scale != nil } func (p *SchemaElement) IsSetPrecision() bool { - return p.Precision != nil + return p.Precision != nil } func (p *SchemaElement) IsSetFieldID() bool { - return p.FieldID != nil + return p.FieldID != nil } func (p *SchemaElement) IsSetLogicalType() bool { - return p.LogicalType != nil + return p.LogicalType != nil } func (p *SchemaElement) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetName bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.I32 { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.I32 { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.I32 { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 4: - if fieldTypeId == thrift.STRING { - if err := p.ReadField4(ctx, iprot); err != nil { - return err - } - issetName = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 5: - if fieldTypeId == thrift.I32 { - if err := p.ReadField5(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 6: - if fieldTypeId == thrift.I32 { - if err := p.ReadField6(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 7: - if fieldTypeId == thrift.I32 { - if err := p.ReadField7(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 8: - if fieldTypeId == thrift.I32 { - if err := p.ReadField8(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 9: - if fieldTypeId == thrift.I32 { - if err := p.ReadField9(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 10: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField10(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetName{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Name is not set")); - } - return nil -} - -func (p *SchemaElement) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - temp := Type(v) - p.Type = &temp -} - return nil -} - -func (p *SchemaElement) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.TypeLength = &v -} - return nil -} - -func (p *SchemaElement) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 3: ", err) -} else { - temp := FieldRepetitionType(v) - p.RepetitionType = &temp -} - return nil -} - -func (p *SchemaElement) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadString(ctx); err != nil { - return thrift.PrependError("error reading field 4: ", err) -} else { - p.Name = v -} - return nil -} - -func (p *SchemaElement) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 5: ", err) -} else { - p.NumChildren = &v -} - return nil -} - -func (p *SchemaElement) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 6: ", err) -} else { - temp := ConvertedType(v) - p.ConvertedType = &temp -} - return nil -} - -func (p *SchemaElement) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 7: ", err) -} else { - p.Scale = &v -} - return nil -} - -func (p *SchemaElement) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 8: ", err) -} else { - p.Precision = &v -} - return nil -} - -func (p *SchemaElement) ReadField9(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 9: ", err) -} else { - p.FieldID = &v -} - return nil -} - -func (p *SchemaElement) ReadField10(ctx context.Context, iprot thrift.TProtocol) error { - p.LogicalType = &LogicalType{} - if err := p.LogicalType.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.LogicalType), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetName bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.I32 { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.I32 { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.I32 { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 4: + if fieldTypeId == thrift.STRING { + if err := p.ReadField4(ctx, iprot); err != nil { + return err + } + issetName = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 5: + if fieldTypeId == thrift.I32 { + if err := p.ReadField5(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 6: + if fieldTypeId == thrift.I32 { + if err := p.ReadField6(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 7: + if fieldTypeId == thrift.I32 { + if err := p.ReadField7(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 8: + if fieldTypeId == thrift.I32 { + if err := p.ReadField8(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 9: + if fieldTypeId == thrift.I32 { + if err := p.ReadField9(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 10: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField10(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetName { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Name is not set")) + } + return nil +} + +func (p *SchemaElement) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + temp := Type(v) + p.Type = &temp + } + return nil +} + +func (p *SchemaElement) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.TypeLength = &v + } + return nil +} + +func (p *SchemaElement) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + temp := FieldRepetitionType(v) + p.RepetitionType = &temp + } + return nil +} + +func (p *SchemaElement) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadString(ctx); err != nil { + return thrift.PrependError("error reading field 4: ", err) + } else { + p.Name = v + } + return nil +} + +func (p *SchemaElement) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 5: ", err) + } else { + p.NumChildren = &v + } + return nil +} + +func (p *SchemaElement) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 6: ", err) + } else { + temp := ConvertedType(v) + p.ConvertedType = &temp + } + return nil +} + +func (p *SchemaElement) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 7: ", err) + } else { + p.Scale = &v + } + return nil +} + +func (p *SchemaElement) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 8: ", err) + } else { + p.Precision = &v + } + return nil +} + +func (p *SchemaElement) ReadField9(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 9: ", err) + } else { + p.FieldID = &v + } + return nil +} + +func (p *SchemaElement) ReadField10(ctx context.Context, iprot thrift.TProtocol) error { + p.LogicalType = &LogicalType{} + if err := p.LogicalType.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.LogicalType), err) + } + return nil } func (p *SchemaElement) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "SchemaElement"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - if err := p.writeField4(ctx, oprot); err != nil { return err } - if err := p.writeField5(ctx, oprot); err != nil { return err } - if err := p.writeField6(ctx, oprot); err != nil { return err } - if err := p.writeField7(ctx, oprot); err != nil { return err } - if err := p.writeField8(ctx, oprot); err != nil { return err } - if err := p.writeField9(ctx, oprot); err != nil { return err } - if err := p.writeField10(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "SchemaElement"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + if err := p.writeField4(ctx, oprot); err != nil { + return err + } + if err := p.writeField5(ctx, oprot); err != nil { + return err + } + if err := p.writeField6(ctx, oprot); err != nil { + return err + } + if err := p.writeField7(ctx, oprot); err != nil { + return err + } + if err := p.writeField8(ctx, oprot); err != nil { + return err + } + if err := p.writeField9(ctx, oprot); err != nil { + return err + } + if err := p.writeField10(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *SchemaElement) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetType() { - if err := oprot.WriteFieldBegin(ctx, "type", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:type: ", p), err) } - if err := oprot.WriteI32(ctx, int32(*p.Type)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.type (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:type: ", p), err) } - } - return err + if p.IsSetType() { + if err := oprot.WriteFieldBegin(ctx, "type", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:type: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(*p.Type)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.type (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:type: ", p), err) + } + } + return err } func (p *SchemaElement) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetTypeLength() { - if err := oprot.WriteFieldBegin(ctx, "type_length", thrift.I32, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:type_length: ", p), err) } - if err := oprot.WriteI32(ctx, int32(*p.TypeLength)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.type_length (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:type_length: ", p), err) } - } - return err + if p.IsSetTypeLength() { + if err := oprot.WriteFieldBegin(ctx, "type_length", thrift.I32, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:type_length: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(*p.TypeLength)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.type_length (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:type_length: ", p), err) + } + } + return err } func (p *SchemaElement) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetRepetitionType() { - if err := oprot.WriteFieldBegin(ctx, "repetition_type", thrift.I32, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:repetition_type: ", p), err) } - if err := oprot.WriteI32(ctx, int32(*p.RepetitionType)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.repetition_type (3) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:repetition_type: ", p), err) } - } - return err + if p.IsSetRepetitionType() { + if err := oprot.WriteFieldBegin(ctx, "repetition_type", thrift.I32, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:repetition_type: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(*p.RepetitionType)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.repetition_type (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:repetition_type: ", p), err) + } + } + return err } func (p *SchemaElement) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "name", thrift.STRING, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:name: ", p), err) } - if err := oprot.WriteString(ctx, string(p.Name)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.name (4) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:name: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "name", thrift.STRING, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:name: ", p), err) + } + if err := oprot.WriteString(ctx, string(p.Name)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.name (4) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:name: ", p), err) + } + return err } func (p *SchemaElement) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetNumChildren() { - if err := oprot.WriteFieldBegin(ctx, "num_children", thrift.I32, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:num_children: ", p), err) } - if err := oprot.WriteI32(ctx, int32(*p.NumChildren)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_children (5) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:num_children: ", p), err) } - } - return err + if p.IsSetNumChildren() { + if err := oprot.WriteFieldBegin(ctx, "num_children", thrift.I32, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:num_children: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(*p.NumChildren)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_children (5) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:num_children: ", p), err) + } + } + return err } func (p *SchemaElement) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetConvertedType() { - if err := oprot.WriteFieldBegin(ctx, "converted_type", thrift.I32, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:converted_type: ", p), err) } - if err := oprot.WriteI32(ctx, int32(*p.ConvertedType)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.converted_type (6) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:converted_type: ", p), err) } - } - return err + if p.IsSetConvertedType() { + if err := oprot.WriteFieldBegin(ctx, "converted_type", thrift.I32, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:converted_type: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(*p.ConvertedType)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.converted_type (6) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:converted_type: ", p), err) + } + } + return err } func (p *SchemaElement) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetScale() { - if err := oprot.WriteFieldBegin(ctx, "scale", thrift.I32, 7); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:scale: ", p), err) } - if err := oprot.WriteI32(ctx, int32(*p.Scale)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.scale (7) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 7:scale: ", p), err) } - } - return err + if p.IsSetScale() { + if err := oprot.WriteFieldBegin(ctx, "scale", thrift.I32, 7); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:scale: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(*p.Scale)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.scale (7) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 7:scale: ", p), err) + } + } + return err } func (p *SchemaElement) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetPrecision() { - if err := oprot.WriteFieldBegin(ctx, "precision", thrift.I32, 8); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:precision: ", p), err) } - if err := oprot.WriteI32(ctx, int32(*p.Precision)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.precision (8) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 8:precision: ", p), err) } - } - return err + if p.IsSetPrecision() { + if err := oprot.WriteFieldBegin(ctx, "precision", thrift.I32, 8); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:precision: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(*p.Precision)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.precision (8) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 8:precision: ", p), err) + } + } + return err } func (p *SchemaElement) writeField9(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetFieldID() { - if err := oprot.WriteFieldBegin(ctx, "field_id", thrift.I32, 9); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:field_id: ", p), err) } - if err := oprot.WriteI32(ctx, int32(*p.FieldID)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.field_id (9) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 9:field_id: ", p), err) } - } - return err + if p.IsSetFieldID() { + if err := oprot.WriteFieldBegin(ctx, "field_id", thrift.I32, 9); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:field_id: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(*p.FieldID)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.field_id (9) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 9:field_id: ", p), err) + } + } + return err } func (p *SchemaElement) writeField10(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetLogicalType() { - if err := oprot.WriteFieldBegin(ctx, "logicalType", thrift.STRUCT, 10); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:logicalType: ", p), err) } - if err := p.LogicalType.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.LogicalType), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 10:logicalType: ", p), err) } - } - return err + if p.IsSetLogicalType() { + if err := oprot.WriteFieldBegin(ctx, "logicalType", thrift.STRUCT, 10); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:logicalType: ", p), err) + } + if err := p.LogicalType.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.LogicalType), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 10:logicalType: ", p), err) + } + } + return err } func (p *SchemaElement) Equals(other *SchemaElement) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.Type != other.Type { - if p.Type == nil || other.Type == nil { - return false - } - if (*p.Type) != (*other.Type) { return false } - } - if p.TypeLength != other.TypeLength { - if p.TypeLength == nil || other.TypeLength == nil { - return false - } - if (*p.TypeLength) != (*other.TypeLength) { return false } - } - if p.RepetitionType != other.RepetitionType { - if p.RepetitionType == nil || other.RepetitionType == nil { - return false - } - if (*p.RepetitionType) != (*other.RepetitionType) { return false } - } - if p.Name != other.Name { return false } - if p.NumChildren != other.NumChildren { - if p.NumChildren == nil || other.NumChildren == nil { - return false - } - if (*p.NumChildren) != (*other.NumChildren) { return false } - } - if p.ConvertedType != other.ConvertedType { - if p.ConvertedType == nil || other.ConvertedType == nil { - return false - } - if (*p.ConvertedType) != (*other.ConvertedType) { return false } - } - if p.Scale != other.Scale { - if p.Scale == nil || other.Scale == nil { - return false - } - if (*p.Scale) != (*other.Scale) { return false } - } - if p.Precision != other.Precision { - if p.Precision == nil || other.Precision == nil { - return false - } - if (*p.Precision) != (*other.Precision) { return false } - } - if p.FieldID != other.FieldID { - if p.FieldID == nil || other.FieldID == nil { - return false - } - if (*p.FieldID) != (*other.FieldID) { return false } - } - if !p.LogicalType.Equals(other.LogicalType) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.Type != other.Type { + if p.Type == nil || other.Type == nil { + return false + } + if (*p.Type) != (*other.Type) { + return false + } + } + if p.TypeLength != other.TypeLength { + if p.TypeLength == nil || other.TypeLength == nil { + return false + } + if (*p.TypeLength) != (*other.TypeLength) { + return false + } + } + if p.RepetitionType != other.RepetitionType { + if p.RepetitionType == nil || other.RepetitionType == nil { + return false + } + if (*p.RepetitionType) != (*other.RepetitionType) { + return false + } + } + if p.Name != other.Name { + return false + } + if p.NumChildren != other.NumChildren { + if p.NumChildren == nil || other.NumChildren == nil { + return false + } + if (*p.NumChildren) != (*other.NumChildren) { + return false + } + } + if p.ConvertedType != other.ConvertedType { + if p.ConvertedType == nil || other.ConvertedType == nil { + return false + } + if (*p.ConvertedType) != (*other.ConvertedType) { + return false + } + } + if p.Scale != other.Scale { + if p.Scale == nil || other.Scale == nil { + return false + } + if (*p.Scale) != (*other.Scale) { + return false + } + } + if p.Precision != other.Precision { + if p.Precision == nil || other.Precision == nil { + return false + } + if (*p.Precision) != (*other.Precision) { + return false + } + } + if p.FieldID != other.FieldID { + if p.FieldID == nil || other.FieldID == nil { + return false + } + if (*p.FieldID) != (*other.FieldID) { + return false + } + } + if !p.LogicalType.Equals(other.LogicalType) { + return false + } + return true } func (p *SchemaElement) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("SchemaElement(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("SchemaElement(%+v)", *p) } func (p *SchemaElement) Validate() error { - return nil + return nil } + // Data page header -// +// // Attributes: -// - NumValues: Number of values, including NULLs, in this data page. * -// - Encoding: Encoding used for this data page * -// - DefinitionLevelEncoding: Encoding used for definition levels * -// - RepetitionLevelEncoding: Encoding used for repetition levels * -// - Statistics: Optional statistics for the data in this page* +// - NumValues: Number of values, including NULLs, in this data page. * +// - Encoding: Encoding used for this data page * +// - DefinitionLevelEncoding: Encoding used for definition levels * +// - RepetitionLevelEncoding: Encoding used for repetition levels * +// - Statistics: Optional statistics for the data in this page* type DataPageHeader struct { - NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"` - Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"` - DefinitionLevelEncoding Encoding `thrift:"definition_level_encoding,3,required" db:"definition_level_encoding" json:"definition_level_encoding"` - RepetitionLevelEncoding Encoding `thrift:"repetition_level_encoding,4,required" db:"repetition_level_encoding" json:"repetition_level_encoding"` - Statistics *Statistics `thrift:"statistics,5" db:"statistics" json:"statistics,omitempty"` + NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"` + Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"` + DefinitionLevelEncoding Encoding `thrift:"definition_level_encoding,3,required" db:"definition_level_encoding" json:"definition_level_encoding"` + RepetitionLevelEncoding Encoding `thrift:"repetition_level_encoding,4,required" db:"repetition_level_encoding" json:"repetition_level_encoding"` + Statistics *Statistics `thrift:"statistics,5" db:"statistics" json:"statistics,omitempty"` } func NewDataPageHeader() *DataPageHeader { - return &DataPageHeader{} + return &DataPageHeader{} } - func (p *DataPageHeader) GetNumValues() int32 { - return p.NumValues + return p.NumValues } func (p *DataPageHeader) GetEncoding() Encoding { - return p.Encoding + return p.Encoding } func (p *DataPageHeader) GetDefinitionLevelEncoding() Encoding { - return p.DefinitionLevelEncoding + return p.DefinitionLevelEncoding } func (p *DataPageHeader) GetRepetitionLevelEncoding() Encoding { - return p.RepetitionLevelEncoding + return p.RepetitionLevelEncoding } + var DataPageHeader_Statistics_DEFAULT *Statistics + func (p *DataPageHeader) GetStatistics() *Statistics { - if !p.IsSetStatistics() { - return DataPageHeader_Statistics_DEFAULT - } -return p.Statistics + if !p.IsSetStatistics() { + return DataPageHeader_Statistics_DEFAULT + } + return p.Statistics } func (p *DataPageHeader) IsSetStatistics() bool { - return p.Statistics != nil + return p.Statistics != nil } func (p *DataPageHeader) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetNumValues bool = false; - var issetEncoding bool = false; - var issetDefinitionLevelEncoding bool = false; - var issetRepetitionLevelEncoding bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.I32 { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetNumValues = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.I32 { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetEncoding = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.I32 { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - issetDefinitionLevelEncoding = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 4: - if fieldTypeId == thrift.I32 { - if err := p.ReadField4(ctx, iprot); err != nil { - return err - } - issetRepetitionLevelEncoding = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 5: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField5(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetNumValues{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set")); - } - if !issetEncoding{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set")); - } - if !issetDefinitionLevelEncoding{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field DefinitionLevelEncoding is not set")); - } - if !issetRepetitionLevelEncoding{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field RepetitionLevelEncoding is not set")); - } - return nil -} - -func (p *DataPageHeader) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.NumValues = v -} - return nil -} - -func (p *DataPageHeader) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - temp := Encoding(v) - p.Encoding = temp -} - return nil -} - -func (p *DataPageHeader) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 3: ", err) -} else { - temp := Encoding(v) - p.DefinitionLevelEncoding = temp -} - return nil -} - -func (p *DataPageHeader) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 4: ", err) -} else { - temp := Encoding(v) - p.RepetitionLevelEncoding = temp -} - return nil -} - -func (p *DataPageHeader) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { - p.Statistics = &Statistics{} - if err := p.Statistics.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Statistics), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetNumValues bool = false + var issetEncoding bool = false + var issetDefinitionLevelEncoding bool = false + var issetRepetitionLevelEncoding bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.I32 { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetNumValues = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.I32 { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetEncoding = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.I32 { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + issetDefinitionLevelEncoding = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 4: + if fieldTypeId == thrift.I32 { + if err := p.ReadField4(ctx, iprot); err != nil { + return err + } + issetRepetitionLevelEncoding = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 5: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField5(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetNumValues { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set")) + } + if !issetEncoding { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set")) + } + if !issetDefinitionLevelEncoding { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field DefinitionLevelEncoding is not set")) + } + if !issetRepetitionLevelEncoding { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field RepetitionLevelEncoding is not set")) + } + return nil +} + +func (p *DataPageHeader) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.NumValues = v + } + return nil +} + +func (p *DataPageHeader) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + temp := Encoding(v) + p.Encoding = temp + } + return nil +} + +func (p *DataPageHeader) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + temp := Encoding(v) + p.DefinitionLevelEncoding = temp + } + return nil +} + +func (p *DataPageHeader) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 4: ", err) + } else { + temp := Encoding(v) + p.RepetitionLevelEncoding = temp + } + return nil +} + +func (p *DataPageHeader) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { + p.Statistics = &Statistics{} + if err := p.Statistics.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Statistics), err) + } + return nil } func (p *DataPageHeader) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "DataPageHeader"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - if err := p.writeField4(ctx, oprot); err != nil { return err } - if err := p.writeField5(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "DataPageHeader"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + if err := p.writeField4(ctx, oprot); err != nil { + return err + } + if err := p.writeField5(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *DataPageHeader) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "num_values", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:num_values: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.NumValues)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_values (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:num_values: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "num_values", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:num_values: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.NumValues)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_values (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:num_values: ", p), err) + } + return err } func (p *DataPageHeader) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "encoding", thrift.I32, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encoding: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.Encoding)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.encoding (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encoding: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "encoding", thrift.I32, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encoding: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.Encoding)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.encoding (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encoding: ", p), err) + } + return err } func (p *DataPageHeader) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "definition_level_encoding", thrift.I32, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:definition_level_encoding: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.DefinitionLevelEncoding)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.definition_level_encoding (3) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:definition_level_encoding: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "definition_level_encoding", thrift.I32, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:definition_level_encoding: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.DefinitionLevelEncoding)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.definition_level_encoding (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:definition_level_encoding: ", p), err) + } + return err } func (p *DataPageHeader) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "repetition_level_encoding", thrift.I32, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:repetition_level_encoding: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.RepetitionLevelEncoding)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.repetition_level_encoding (4) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:repetition_level_encoding: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "repetition_level_encoding", thrift.I32, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:repetition_level_encoding: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.RepetitionLevelEncoding)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.repetition_level_encoding (4) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:repetition_level_encoding: ", p), err) + } + return err } func (p *DataPageHeader) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetStatistics() { - if err := oprot.WriteFieldBegin(ctx, "statistics", thrift.STRUCT, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:statistics: ", p), err) } - if err := p.Statistics.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Statistics), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:statistics: ", p), err) } - } - return err + if p.IsSetStatistics() { + if err := oprot.WriteFieldBegin(ctx, "statistics", thrift.STRUCT, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:statistics: ", p), err) + } + if err := p.Statistics.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Statistics), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:statistics: ", p), err) + } + } + return err } func (p *DataPageHeader) Equals(other *DataPageHeader) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.NumValues != other.NumValues { return false } - if p.Encoding != other.Encoding { return false } - if p.DefinitionLevelEncoding != other.DefinitionLevelEncoding { return false } - if p.RepetitionLevelEncoding != other.RepetitionLevelEncoding { return false } - if !p.Statistics.Equals(other.Statistics) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.NumValues != other.NumValues { + return false + } + if p.Encoding != other.Encoding { + return false + } + if p.DefinitionLevelEncoding != other.DefinitionLevelEncoding { + return false + } + if p.RepetitionLevelEncoding != other.RepetitionLevelEncoding { + return false + } + if !p.Statistics.Equals(other.Statistics) { + return false + } + return true } func (p *DataPageHeader) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("DataPageHeader(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("DataPageHeader(%+v)", *p) } func (p *DataPageHeader) Validate() error { - return nil + return nil } + type IndexPageHeader struct { } func NewIndexPageHeader() *IndexPageHeader { - return &IndexPageHeader{} + return &IndexPageHeader{} } func (p *IndexPageHeader) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *IndexPageHeader) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "IndexPageHeader"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "IndexPageHeader"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *IndexPageHeader) Equals(other *IndexPageHeader) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *IndexPageHeader) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("IndexPageHeader(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("IndexPageHeader(%+v)", *p) } func (p *IndexPageHeader) Validate() error { - return nil + return nil } + // The dictionary page must be placed at the first position of the column chunk // if it is partly or completely dictionary encoded. At most one dictionary page // can be placed in a column chunk. -// -// +// // Attributes: -// - NumValues: Number of values in the dictionary * -// - Encoding: Encoding using this dictionary page * -// - IsSorted: If true, the entries in the dictionary are sorted in ascending order * +// - NumValues: Number of values in the dictionary * +// - Encoding: Encoding using this dictionary page * +// - IsSorted: If true, the entries in the dictionary are sorted in ascending order * type DictionaryPageHeader struct { - NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"` - Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"` - IsSorted *bool `thrift:"is_sorted,3" db:"is_sorted" json:"is_sorted,omitempty"` + NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"` + Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"` + IsSorted *bool `thrift:"is_sorted,3" db:"is_sorted" json:"is_sorted,omitempty"` } func NewDictionaryPageHeader() *DictionaryPageHeader { - return &DictionaryPageHeader{} + return &DictionaryPageHeader{} } - func (p *DictionaryPageHeader) GetNumValues() int32 { - return p.NumValues + return p.NumValues } func (p *DictionaryPageHeader) GetEncoding() Encoding { - return p.Encoding + return p.Encoding } + var DictionaryPageHeader_IsSorted_DEFAULT bool + func (p *DictionaryPageHeader) GetIsSorted() bool { - if !p.IsSetIsSorted() { - return DictionaryPageHeader_IsSorted_DEFAULT - } -return *p.IsSorted + if !p.IsSetIsSorted() { + return DictionaryPageHeader_IsSorted_DEFAULT + } + return *p.IsSorted } func (p *DictionaryPageHeader) IsSetIsSorted() bool { - return p.IsSorted != nil + return p.IsSorted != nil } func (p *DictionaryPageHeader) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetNumValues bool = false; - var issetEncoding bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.I32 { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetNumValues = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.I32 { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetEncoding = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.BOOL { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetNumValues{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set")); - } - if !issetEncoding{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set")); - } - return nil -} - -func (p *DictionaryPageHeader) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.NumValues = v -} - return nil -} - -func (p *DictionaryPageHeader) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - temp := Encoding(v) - p.Encoding = temp -} - return nil -} - -func (p *DictionaryPageHeader) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBool(ctx); err != nil { - return thrift.PrependError("error reading field 3: ", err) -} else { - p.IsSorted = &v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetNumValues bool = false + var issetEncoding bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.I32 { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetNumValues = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.I32 { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetEncoding = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.BOOL { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetNumValues { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set")) + } + if !issetEncoding { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set")) + } + return nil +} + +func (p *DictionaryPageHeader) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.NumValues = v + } + return nil +} + +func (p *DictionaryPageHeader) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + temp := Encoding(v) + p.Encoding = temp + } + return nil +} + +func (p *DictionaryPageHeader) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBool(ctx); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.IsSorted = &v + } + return nil } func (p *DictionaryPageHeader) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "DictionaryPageHeader"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "DictionaryPageHeader"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *DictionaryPageHeader) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "num_values", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:num_values: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.NumValues)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_values (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:num_values: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "num_values", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:num_values: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.NumValues)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_values (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:num_values: ", p), err) + } + return err } func (p *DictionaryPageHeader) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "encoding", thrift.I32, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encoding: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.Encoding)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.encoding (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encoding: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "encoding", thrift.I32, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encoding: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.Encoding)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.encoding (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encoding: ", p), err) + } + return err } func (p *DictionaryPageHeader) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetIsSorted() { - if err := oprot.WriteFieldBegin(ctx, "is_sorted", thrift.BOOL, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:is_sorted: ", p), err) } - if err := oprot.WriteBool(ctx, bool(*p.IsSorted)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.is_sorted (3) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:is_sorted: ", p), err) } - } - return err + if p.IsSetIsSorted() { + if err := oprot.WriteFieldBegin(ctx, "is_sorted", thrift.BOOL, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:is_sorted: ", p), err) + } + if err := oprot.WriteBool(ctx, bool(*p.IsSorted)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.is_sorted (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:is_sorted: ", p), err) + } + } + return err } func (p *DictionaryPageHeader) Equals(other *DictionaryPageHeader) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.NumValues != other.NumValues { return false } - if p.Encoding != other.Encoding { return false } - if p.IsSorted != other.IsSorted { - if p.IsSorted == nil || other.IsSorted == nil { - return false - } - if (*p.IsSorted) != (*other.IsSorted) { return false } - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.NumValues != other.NumValues { + return false + } + if p.Encoding != other.Encoding { + return false + } + if p.IsSorted != other.IsSorted { + if p.IsSorted == nil || other.IsSorted == nil { + return false + } + if (*p.IsSorted) != (*other.IsSorted) { + return false + } + } + return true } func (p *DictionaryPageHeader) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("DictionaryPageHeader(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("DictionaryPageHeader(%+v)", *p) } func (p *DictionaryPageHeader) Validate() error { - return nil + return nil } + // New page format allowing reading levels without decompressing the data // Repetition and definition levels are uncompressed // The remaining section containing the data is compressed if is_compressed is true -// -// +// // Attributes: -// - NumValues: Number of values, including NULLs, in this data page. * -// - NumNulls: Number of NULL values, in this data page. +// - NumValues: Number of values, including NULLs, in this data page. * +// - NumNulls: Number of NULL values, in this data page. +// // Number of non-null = num_values - num_nulls which is also the number of values in the data section * -// - NumRows: Number of rows in this data page. which means pages change on record boundaries (r = 0) * -// - Encoding: Encoding used for data in this page * -// - DefinitionLevelsByteLength: length of the definition levels -// - RepetitionLevelsByteLength: length of the repetition levels -// - IsCompressed: whether the values are compressed. +// - NumRows: Number of rows in this data page. which means pages change on record boundaries (r = 0) * +// - Encoding: Encoding used for data in this page * +// - DefinitionLevelsByteLength: length of the definition levels +// - RepetitionLevelsByteLength: length of the repetition levels +// - IsCompressed: whether the values are compressed. +// // Which means the section of the page between // definition_levels_byte_length + repetition_levels_byte_length + 1 and compressed_page_size (included) // is compressed with the compression_codec. // If missing it is considered compressed -// - Statistics: optional statistics for the data in this page * +// - Statistics: optional statistics for the data in this page * type DataPageHeaderV2 struct { - NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"` - NumNulls int32 `thrift:"num_nulls,2,required" db:"num_nulls" json:"num_nulls"` - NumRows int32 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"` - Encoding Encoding `thrift:"encoding,4,required" db:"encoding" json:"encoding"` - DefinitionLevelsByteLength int32 `thrift:"definition_levels_byte_length,5,required" db:"definition_levels_byte_length" json:"definition_levels_byte_length"` - RepetitionLevelsByteLength int32 `thrift:"repetition_levels_byte_length,6,required" db:"repetition_levels_byte_length" json:"repetition_levels_byte_length"` - IsCompressed bool `thrift:"is_compressed,7" db:"is_compressed" json:"is_compressed"` - Statistics *Statistics `thrift:"statistics,8" db:"statistics" json:"statistics,omitempty"` + NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"` + NumNulls int32 `thrift:"num_nulls,2,required" db:"num_nulls" json:"num_nulls"` + NumRows int32 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"` + Encoding Encoding `thrift:"encoding,4,required" db:"encoding" json:"encoding"` + DefinitionLevelsByteLength int32 `thrift:"definition_levels_byte_length,5,required" db:"definition_levels_byte_length" json:"definition_levels_byte_length"` + RepetitionLevelsByteLength int32 `thrift:"repetition_levels_byte_length,6,required" db:"repetition_levels_byte_length" json:"repetition_levels_byte_length"` + IsCompressed bool `thrift:"is_compressed,7" db:"is_compressed" json:"is_compressed"` + Statistics *Statistics `thrift:"statistics,8" db:"statistics" json:"statistics,omitempty"` } func NewDataPageHeaderV2() *DataPageHeaderV2 { - return &DataPageHeaderV2{ -IsCompressed: true, -} + return &DataPageHeaderV2{ + IsCompressed: true, + } } - func (p *DataPageHeaderV2) GetNumValues() int32 { - return p.NumValues + return p.NumValues } func (p *DataPageHeaderV2) GetNumNulls() int32 { - return p.NumNulls + return p.NumNulls } func (p *DataPageHeaderV2) GetNumRows() int32 { - return p.NumRows + return p.NumRows } func (p *DataPageHeaderV2) GetEncoding() Encoding { - return p.Encoding + return p.Encoding } func (p *DataPageHeaderV2) GetDefinitionLevelsByteLength() int32 { - return p.DefinitionLevelsByteLength + return p.DefinitionLevelsByteLength } func (p *DataPageHeaderV2) GetRepetitionLevelsByteLength() int32 { - return p.RepetitionLevelsByteLength + return p.RepetitionLevelsByteLength } + var DataPageHeaderV2_IsCompressed_DEFAULT bool = true func (p *DataPageHeaderV2) GetIsCompressed() bool { - return p.IsCompressed + return p.IsCompressed } + var DataPageHeaderV2_Statistics_DEFAULT *Statistics + func (p *DataPageHeaderV2) GetStatistics() *Statistics { - if !p.IsSetStatistics() { - return DataPageHeaderV2_Statistics_DEFAULT - } -return p.Statistics + if !p.IsSetStatistics() { + return DataPageHeaderV2_Statistics_DEFAULT + } + return p.Statistics } func (p *DataPageHeaderV2) IsSetIsCompressed() bool { - return p.IsCompressed != DataPageHeaderV2_IsCompressed_DEFAULT + return p.IsCompressed != DataPageHeaderV2_IsCompressed_DEFAULT } func (p *DataPageHeaderV2) IsSetStatistics() bool { - return p.Statistics != nil + return p.Statistics != nil } func (p *DataPageHeaderV2) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetNumValues bool = false; - var issetNumNulls bool = false; - var issetNumRows bool = false; - var issetEncoding bool = false; - var issetDefinitionLevelsByteLength bool = false; - var issetRepetitionLevelsByteLength bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.I32 { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetNumValues = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.I32 { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetNumNulls = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.I32 { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - issetNumRows = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 4: - if fieldTypeId == thrift.I32 { - if err := p.ReadField4(ctx, iprot); err != nil { - return err - } - issetEncoding = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 5: - if fieldTypeId == thrift.I32 { - if err := p.ReadField5(ctx, iprot); err != nil { - return err - } - issetDefinitionLevelsByteLength = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 6: - if fieldTypeId == thrift.I32 { - if err := p.ReadField6(ctx, iprot); err != nil { - return err - } - issetRepetitionLevelsByteLength = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 7: - if fieldTypeId == thrift.BOOL { - if err := p.ReadField7(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 8: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField8(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetNumValues{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set")); - } - if !issetNumNulls{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumNulls is not set")); - } - if !issetNumRows{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumRows is not set")); - } - if !issetEncoding{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set")); - } - if !issetDefinitionLevelsByteLength{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field DefinitionLevelsByteLength is not set")); - } - if !issetRepetitionLevelsByteLength{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field RepetitionLevelsByteLength is not set")); - } - return nil -} - -func (p *DataPageHeaderV2) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.NumValues = v -} - return nil -} - -func (p *DataPageHeaderV2) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.NumNulls = v -} - return nil -} - -func (p *DataPageHeaderV2) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 3: ", err) -} else { - p.NumRows = v -} - return nil -} - -func (p *DataPageHeaderV2) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 4: ", err) -} else { - temp := Encoding(v) - p.Encoding = temp -} - return nil -} - -func (p *DataPageHeaderV2) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 5: ", err) -} else { - p.DefinitionLevelsByteLength = v -} - return nil -} - -func (p *DataPageHeaderV2) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 6: ", err) -} else { - p.RepetitionLevelsByteLength = v -} - return nil -} - -func (p *DataPageHeaderV2) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBool(ctx); err != nil { - return thrift.PrependError("error reading field 7: ", err) -} else { - p.IsCompressed = v -} - return nil -} - -func (p *DataPageHeaderV2) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { - p.Statistics = &Statistics{} - if err := p.Statistics.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Statistics), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetNumValues bool = false + var issetNumNulls bool = false + var issetNumRows bool = false + var issetEncoding bool = false + var issetDefinitionLevelsByteLength bool = false + var issetRepetitionLevelsByteLength bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.I32 { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetNumValues = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.I32 { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetNumNulls = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.I32 { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + issetNumRows = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 4: + if fieldTypeId == thrift.I32 { + if err := p.ReadField4(ctx, iprot); err != nil { + return err + } + issetEncoding = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 5: + if fieldTypeId == thrift.I32 { + if err := p.ReadField5(ctx, iprot); err != nil { + return err + } + issetDefinitionLevelsByteLength = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 6: + if fieldTypeId == thrift.I32 { + if err := p.ReadField6(ctx, iprot); err != nil { + return err + } + issetRepetitionLevelsByteLength = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 7: + if fieldTypeId == thrift.BOOL { + if err := p.ReadField7(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 8: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField8(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetNumValues { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set")) + } + if !issetNumNulls { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumNulls is not set")) + } + if !issetNumRows { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumRows is not set")) + } + if !issetEncoding { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set")) + } + if !issetDefinitionLevelsByteLength { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field DefinitionLevelsByteLength is not set")) + } + if !issetRepetitionLevelsByteLength { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field RepetitionLevelsByteLength is not set")) + } + return nil +} + +func (p *DataPageHeaderV2) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.NumValues = v + } + return nil +} + +func (p *DataPageHeaderV2) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.NumNulls = v + } + return nil +} + +func (p *DataPageHeaderV2) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.NumRows = v + } + return nil +} + +func (p *DataPageHeaderV2) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 4: ", err) + } else { + temp := Encoding(v) + p.Encoding = temp + } + return nil +} + +func (p *DataPageHeaderV2) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 5: ", err) + } else { + p.DefinitionLevelsByteLength = v + } + return nil +} + +func (p *DataPageHeaderV2) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 6: ", err) + } else { + p.RepetitionLevelsByteLength = v + } + return nil +} + +func (p *DataPageHeaderV2) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBool(ctx); err != nil { + return thrift.PrependError("error reading field 7: ", err) + } else { + p.IsCompressed = v + } + return nil +} + +func (p *DataPageHeaderV2) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { + p.Statistics = &Statistics{} + if err := p.Statistics.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Statistics), err) + } + return nil } func (p *DataPageHeaderV2) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "DataPageHeaderV2"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - if err := p.writeField4(ctx, oprot); err != nil { return err } - if err := p.writeField5(ctx, oprot); err != nil { return err } - if err := p.writeField6(ctx, oprot); err != nil { return err } - if err := p.writeField7(ctx, oprot); err != nil { return err } - if err := p.writeField8(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "DataPageHeaderV2"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + if err := p.writeField4(ctx, oprot); err != nil { + return err + } + if err := p.writeField5(ctx, oprot); err != nil { + return err + } + if err := p.writeField6(ctx, oprot); err != nil { + return err + } + if err := p.writeField7(ctx, oprot); err != nil { + return err + } + if err := p.writeField8(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *DataPageHeaderV2) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "num_values", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:num_values: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.NumValues)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_values (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:num_values: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "num_values", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:num_values: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.NumValues)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_values (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:num_values: ", p), err) + } + return err } func (p *DataPageHeaderV2) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "num_nulls", thrift.I32, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:num_nulls: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.NumNulls)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_nulls (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:num_nulls: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "num_nulls", thrift.I32, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:num_nulls: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.NumNulls)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_nulls (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:num_nulls: ", p), err) + } + return err } func (p *DataPageHeaderV2) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "num_rows", thrift.I32, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:num_rows: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.NumRows)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_rows (3) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:num_rows: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "num_rows", thrift.I32, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:num_rows: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.NumRows)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_rows (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:num_rows: ", p), err) + } + return err } func (p *DataPageHeaderV2) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "encoding", thrift.I32, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:encoding: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.Encoding)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.encoding (4) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:encoding: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "encoding", thrift.I32, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:encoding: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.Encoding)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.encoding (4) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:encoding: ", p), err) + } + return err } func (p *DataPageHeaderV2) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "definition_levels_byte_length", thrift.I32, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:definition_levels_byte_length: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.DefinitionLevelsByteLength)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.definition_levels_byte_length (5) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:definition_levels_byte_length: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "definition_levels_byte_length", thrift.I32, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:definition_levels_byte_length: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.DefinitionLevelsByteLength)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.definition_levels_byte_length (5) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:definition_levels_byte_length: ", p), err) + } + return err } func (p *DataPageHeaderV2) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "repetition_levels_byte_length", thrift.I32, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:repetition_levels_byte_length: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.RepetitionLevelsByteLength)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.repetition_levels_byte_length (6) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:repetition_levels_byte_length: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "repetition_levels_byte_length", thrift.I32, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:repetition_levels_byte_length: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.RepetitionLevelsByteLength)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.repetition_levels_byte_length (6) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:repetition_levels_byte_length: ", p), err) + } + return err } func (p *DataPageHeaderV2) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetIsCompressed() { - if err := oprot.WriteFieldBegin(ctx, "is_compressed", thrift.BOOL, 7); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:is_compressed: ", p), err) } - if err := oprot.WriteBool(ctx, bool(p.IsCompressed)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.is_compressed (7) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 7:is_compressed: ", p), err) } - } - return err + if p.IsSetIsCompressed() { + if err := oprot.WriteFieldBegin(ctx, "is_compressed", thrift.BOOL, 7); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:is_compressed: ", p), err) + } + if err := oprot.WriteBool(ctx, bool(p.IsCompressed)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.is_compressed (7) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 7:is_compressed: ", p), err) + } + } + return err } func (p *DataPageHeaderV2) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetStatistics() { - if err := oprot.WriteFieldBegin(ctx, "statistics", thrift.STRUCT, 8); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:statistics: ", p), err) } - if err := p.Statistics.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Statistics), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 8:statistics: ", p), err) } - } - return err + if p.IsSetStatistics() { + if err := oprot.WriteFieldBegin(ctx, "statistics", thrift.STRUCT, 8); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:statistics: ", p), err) + } + if err := p.Statistics.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Statistics), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 8:statistics: ", p), err) + } + } + return err } func (p *DataPageHeaderV2) Equals(other *DataPageHeaderV2) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.NumValues != other.NumValues { return false } - if p.NumNulls != other.NumNulls { return false } - if p.NumRows != other.NumRows { return false } - if p.Encoding != other.Encoding { return false } - if p.DefinitionLevelsByteLength != other.DefinitionLevelsByteLength { return false } - if p.RepetitionLevelsByteLength != other.RepetitionLevelsByteLength { return false } - if p.IsCompressed != other.IsCompressed { return false } - if !p.Statistics.Equals(other.Statistics) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.NumValues != other.NumValues { + return false + } + if p.NumNulls != other.NumNulls { + return false + } + if p.NumRows != other.NumRows { + return false + } + if p.Encoding != other.Encoding { + return false + } + if p.DefinitionLevelsByteLength != other.DefinitionLevelsByteLength { + return false + } + if p.RepetitionLevelsByteLength != other.RepetitionLevelsByteLength { + return false + } + if p.IsCompressed != other.IsCompressed { + return false + } + if !p.Statistics.Equals(other.Statistics) { + return false + } + return true } func (p *DataPageHeaderV2) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("DataPageHeaderV2(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("DataPageHeaderV2(%+v)", *p) } func (p *DataPageHeaderV2) Validate() error { - return nil + return nil } + // Block-based algorithm type annotation. * type SplitBlockAlgorithm struct { } func NewSplitBlockAlgorithm() *SplitBlockAlgorithm { - return &SplitBlockAlgorithm{} + return &SplitBlockAlgorithm{} } func (p *SplitBlockAlgorithm) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *SplitBlockAlgorithm) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "SplitBlockAlgorithm"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "SplitBlockAlgorithm"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *SplitBlockAlgorithm) Equals(other *SplitBlockAlgorithm) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *SplitBlockAlgorithm) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("SplitBlockAlgorithm(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("SplitBlockAlgorithm(%+v)", *p) } func (p *SplitBlockAlgorithm) Validate() error { - return nil + return nil } + // The algorithm used in Bloom filter. * -// +// // Attributes: -// - BLOCK: Block-based Bloom filter. * +// - BLOCK: Block-based Bloom filter. * type BloomFilterAlgorithm struct { - BLOCK *SplitBlockAlgorithm `thrift:"BLOCK,1" db:"BLOCK" json:"BLOCK,omitempty"` + BLOCK *SplitBlockAlgorithm `thrift:"BLOCK,1" db:"BLOCK" json:"BLOCK,omitempty"` } func NewBloomFilterAlgorithm() *BloomFilterAlgorithm { - return &BloomFilterAlgorithm{} + return &BloomFilterAlgorithm{} } var BloomFilterAlgorithm_BLOCK_DEFAULT *SplitBlockAlgorithm + func (p *BloomFilterAlgorithm) GetBLOCK() *SplitBlockAlgorithm { - if !p.IsSetBLOCK() { - return BloomFilterAlgorithm_BLOCK_DEFAULT - } -return p.BLOCK + if !p.IsSetBLOCK() { + return BloomFilterAlgorithm_BLOCK_DEFAULT + } + return p.BLOCK } func (p *BloomFilterAlgorithm) CountSetFieldsBloomFilterAlgorithm() int { - count := 0 - if (p.IsSetBLOCK()) { - count++ - } - return count + count := 0 + if p.IsSetBLOCK() { + count++ + } + return count } func (p *BloomFilterAlgorithm) IsSetBLOCK() bool { - return p.BLOCK != nil + return p.BLOCK != nil } func (p *BloomFilterAlgorithm) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *BloomFilterAlgorithm) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - p.BLOCK = &SplitBlockAlgorithm{} - if err := p.BLOCK.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.BLOCK), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *BloomFilterAlgorithm) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + p.BLOCK = &SplitBlockAlgorithm{} + if err := p.BLOCK.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.BLOCK), err) + } + return nil } func (p *BloomFilterAlgorithm) Write(ctx context.Context, oprot thrift.TProtocol) error { - if c := p.CountSetFieldsBloomFilterAlgorithm(); c != 1 { - return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) - } - if err := oprot.WriteStructBegin(ctx, "BloomFilterAlgorithm"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if c := p.CountSetFieldsBloomFilterAlgorithm(); c != 1 { + return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) + } + if err := oprot.WriteStructBegin(ctx, "BloomFilterAlgorithm"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *BloomFilterAlgorithm) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetBLOCK() { - if err := oprot.WriteFieldBegin(ctx, "BLOCK", thrift.STRUCT, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:BLOCK: ", p), err) } - if err := p.BLOCK.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.BLOCK), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:BLOCK: ", p), err) } - } - return err + if p.IsSetBLOCK() { + if err := oprot.WriteFieldBegin(ctx, "BLOCK", thrift.STRUCT, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:BLOCK: ", p), err) + } + if err := p.BLOCK.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.BLOCK), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:BLOCK: ", p), err) + } + } + return err } func (p *BloomFilterAlgorithm) Equals(other *BloomFilterAlgorithm) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if !p.BLOCK.Equals(other.BLOCK) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if !p.BLOCK.Equals(other.BLOCK) { + return false + } + return true } func (p *BloomFilterAlgorithm) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("BloomFilterAlgorithm(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("BloomFilterAlgorithm(%+v)", *p) } func (p *BloomFilterAlgorithm) Validate() error { - return nil + return nil } + // Hash strategy type annotation. xxHash is an extremely fast non-cryptographic hash // algorithm. It uses 64 bits version of xxHash. -// type XxHash struct { } func NewXxHash() *XxHash { - return &XxHash{} + return &XxHash{} } func (p *XxHash) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *XxHash) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "XxHash"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "XxHash"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *XxHash) Equals(other *XxHash) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *XxHash) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("XxHash(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("XxHash(%+v)", *p) } func (p *XxHash) Validate() error { - return nil + return nil } + // The hash function used in Bloom filter. This function takes the hash of a column value // using plain encoding. -// -// +// // Attributes: -// - XXHASH: xxHash Strategy. * +// - XXHASH: xxHash Strategy. * type BloomFilterHash struct { - XXHASH *XxHash `thrift:"XXHASH,1" db:"XXHASH" json:"XXHASH,omitempty"` + XXHASH *XxHash `thrift:"XXHASH,1" db:"XXHASH" json:"XXHASH,omitempty"` } func NewBloomFilterHash() *BloomFilterHash { - return &BloomFilterHash{} + return &BloomFilterHash{} } var BloomFilterHash_XXHASH_DEFAULT *XxHash + func (p *BloomFilterHash) GetXXHASH() *XxHash { - if !p.IsSetXXHASH() { - return BloomFilterHash_XXHASH_DEFAULT - } -return p.XXHASH + if !p.IsSetXXHASH() { + return BloomFilterHash_XXHASH_DEFAULT + } + return p.XXHASH } func (p *BloomFilterHash) CountSetFieldsBloomFilterHash() int { - count := 0 - if (p.IsSetXXHASH()) { - count++ - } - return count + count := 0 + if p.IsSetXXHASH() { + count++ + } + return count } func (p *BloomFilterHash) IsSetXXHASH() bool { - return p.XXHASH != nil + return p.XXHASH != nil } func (p *BloomFilterHash) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *BloomFilterHash) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - p.XXHASH = &XxHash{} - if err := p.XXHASH.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.XXHASH), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *BloomFilterHash) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + p.XXHASH = &XxHash{} + if err := p.XXHASH.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.XXHASH), err) + } + return nil } func (p *BloomFilterHash) Write(ctx context.Context, oprot thrift.TProtocol) error { - if c := p.CountSetFieldsBloomFilterHash(); c != 1 { - return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) - } - if err := oprot.WriteStructBegin(ctx, "BloomFilterHash"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if c := p.CountSetFieldsBloomFilterHash(); c != 1 { + return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) + } + if err := oprot.WriteStructBegin(ctx, "BloomFilterHash"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *BloomFilterHash) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetXXHASH() { - if err := oprot.WriteFieldBegin(ctx, "XXHASH", thrift.STRUCT, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:XXHASH: ", p), err) } - if err := p.XXHASH.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.XXHASH), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:XXHASH: ", p), err) } - } - return err + if p.IsSetXXHASH() { + if err := oprot.WriteFieldBegin(ctx, "XXHASH", thrift.STRUCT, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:XXHASH: ", p), err) + } + if err := p.XXHASH.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.XXHASH), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:XXHASH: ", p), err) + } + } + return err } func (p *BloomFilterHash) Equals(other *BloomFilterHash) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if !p.XXHASH.Equals(other.XXHASH) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if !p.XXHASH.Equals(other.XXHASH) { + return false + } + return true } func (p *BloomFilterHash) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("BloomFilterHash(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("BloomFilterHash(%+v)", *p) } func (p *BloomFilterHash) Validate() error { - return nil + return nil } + // The compression used in the Bloom filter. -// type Uncompressed struct { } func NewUncompressed() *Uncompressed { - return &Uncompressed{} + return &Uncompressed{} } func (p *Uncompressed) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *Uncompressed) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "Uncompressed"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "Uncompressed"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *Uncompressed) Equals(other *Uncompressed) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *Uncompressed) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("Uncompressed(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("Uncompressed(%+v)", *p) } func (p *Uncompressed) Validate() error { - return nil + return nil } + // Attributes: -// - UNCOMPRESSED +// - UNCOMPRESSED type BloomFilterCompression struct { - UNCOMPRESSED *Uncompressed `thrift:"UNCOMPRESSED,1" db:"UNCOMPRESSED" json:"UNCOMPRESSED,omitempty"` + UNCOMPRESSED *Uncompressed `thrift:"UNCOMPRESSED,1" db:"UNCOMPRESSED" json:"UNCOMPRESSED,omitempty"` } func NewBloomFilterCompression() *BloomFilterCompression { - return &BloomFilterCompression{} + return &BloomFilterCompression{} } var BloomFilterCompression_UNCOMPRESSED_DEFAULT *Uncompressed + func (p *BloomFilterCompression) GetUNCOMPRESSED() *Uncompressed { - if !p.IsSetUNCOMPRESSED() { - return BloomFilterCompression_UNCOMPRESSED_DEFAULT - } -return p.UNCOMPRESSED + if !p.IsSetUNCOMPRESSED() { + return BloomFilterCompression_UNCOMPRESSED_DEFAULT + } + return p.UNCOMPRESSED } func (p *BloomFilterCompression) CountSetFieldsBloomFilterCompression() int { - count := 0 - if (p.IsSetUNCOMPRESSED()) { - count++ - } - return count + count := 0 + if p.IsSetUNCOMPRESSED() { + count++ + } + return count } func (p *BloomFilterCompression) IsSetUNCOMPRESSED() bool { - return p.UNCOMPRESSED != nil + return p.UNCOMPRESSED != nil } func (p *BloomFilterCompression) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *BloomFilterCompression) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - p.UNCOMPRESSED = &Uncompressed{} - if err := p.UNCOMPRESSED.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.UNCOMPRESSED), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *BloomFilterCompression) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + p.UNCOMPRESSED = &Uncompressed{} + if err := p.UNCOMPRESSED.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.UNCOMPRESSED), err) + } + return nil } func (p *BloomFilterCompression) Write(ctx context.Context, oprot thrift.TProtocol) error { - if c := p.CountSetFieldsBloomFilterCompression(); c != 1 { - return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) - } - if err := oprot.WriteStructBegin(ctx, "BloomFilterCompression"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if c := p.CountSetFieldsBloomFilterCompression(); c != 1 { + return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) + } + if err := oprot.WriteStructBegin(ctx, "BloomFilterCompression"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *BloomFilterCompression) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetUNCOMPRESSED() { - if err := oprot.WriteFieldBegin(ctx, "UNCOMPRESSED", thrift.STRUCT, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:UNCOMPRESSED: ", p), err) } - if err := p.UNCOMPRESSED.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.UNCOMPRESSED), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:UNCOMPRESSED: ", p), err) } - } - return err + if p.IsSetUNCOMPRESSED() { + if err := oprot.WriteFieldBegin(ctx, "UNCOMPRESSED", thrift.STRUCT, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:UNCOMPRESSED: ", p), err) + } + if err := p.UNCOMPRESSED.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.UNCOMPRESSED), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:UNCOMPRESSED: ", p), err) + } + } + return err } func (p *BloomFilterCompression) Equals(other *BloomFilterCompression) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if !p.UNCOMPRESSED.Equals(other.UNCOMPRESSED) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if !p.UNCOMPRESSED.Equals(other.UNCOMPRESSED) { + return false + } + return true } func (p *BloomFilterCompression) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("BloomFilterCompression(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("BloomFilterCompression(%+v)", *p) } func (p *BloomFilterCompression) Validate() error { - return nil + return nil } + // Bloom filter header is stored at beginning of Bloom filter data of each column // and followed by its bitset. -// -// +// // Attributes: -// - NumBytes: The size of bitset in bytes * -// - Algorithm: The algorithm for setting bits. * -// - Hash: The hash function used for Bloom filter. * -// - Compression: The compression used in the Bloom filter * +// - NumBytes: The size of bitset in bytes * +// - Algorithm: The algorithm for setting bits. * +// - Hash: The hash function used for Bloom filter. * +// - Compression: The compression used in the Bloom filter * type BloomFilterHeader struct { - NumBytes int32 `thrift:"numBytes,1,required" db:"numBytes" json:"numBytes"` - Algorithm *BloomFilterAlgorithm `thrift:"algorithm,2,required" db:"algorithm" json:"algorithm"` - Hash *BloomFilterHash `thrift:"hash,3,required" db:"hash" json:"hash"` - Compression *BloomFilterCompression `thrift:"compression,4,required" db:"compression" json:"compression"` + NumBytes int32 `thrift:"numBytes,1,required" db:"numBytes" json:"numBytes"` + Algorithm *BloomFilterAlgorithm `thrift:"algorithm,2,required" db:"algorithm" json:"algorithm"` + Hash *BloomFilterHash `thrift:"hash,3,required" db:"hash" json:"hash"` + Compression *BloomFilterCompression `thrift:"compression,4,required" db:"compression" json:"compression"` } func NewBloomFilterHeader() *BloomFilterHeader { - return &BloomFilterHeader{} + return &BloomFilterHeader{} } - func (p *BloomFilterHeader) GetNumBytes() int32 { - return p.NumBytes + return p.NumBytes } + var BloomFilterHeader_Algorithm_DEFAULT *BloomFilterAlgorithm + func (p *BloomFilterHeader) GetAlgorithm() *BloomFilterAlgorithm { - if !p.IsSetAlgorithm() { - return BloomFilterHeader_Algorithm_DEFAULT - } -return p.Algorithm + if !p.IsSetAlgorithm() { + return BloomFilterHeader_Algorithm_DEFAULT + } + return p.Algorithm } + var BloomFilterHeader_Hash_DEFAULT *BloomFilterHash + func (p *BloomFilterHeader) GetHash() *BloomFilterHash { - if !p.IsSetHash() { - return BloomFilterHeader_Hash_DEFAULT - } -return p.Hash + if !p.IsSetHash() { + return BloomFilterHeader_Hash_DEFAULT + } + return p.Hash } + var BloomFilterHeader_Compression_DEFAULT *BloomFilterCompression + func (p *BloomFilterHeader) GetCompression() *BloomFilterCompression { - if !p.IsSetCompression() { - return BloomFilterHeader_Compression_DEFAULT - } -return p.Compression + if !p.IsSetCompression() { + return BloomFilterHeader_Compression_DEFAULT + } + return p.Compression } func (p *BloomFilterHeader) IsSetAlgorithm() bool { - return p.Algorithm != nil + return p.Algorithm != nil } func (p *BloomFilterHeader) IsSetHash() bool { - return p.Hash != nil + return p.Hash != nil } func (p *BloomFilterHeader) IsSetCompression() bool { - return p.Compression != nil + return p.Compression != nil } func (p *BloomFilterHeader) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetNumBytes bool = false; - var issetAlgorithm bool = false; - var issetHash bool = false; - var issetCompression bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.I32 { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetNumBytes = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetAlgorithm = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - issetHash = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 4: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField4(ctx, iprot); err != nil { - return err - } - issetCompression = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetNumBytes{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumBytes is not set")); - } - if !issetAlgorithm{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Algorithm is not set")); - } - if !issetHash{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Hash is not set")); - } - if !issetCompression{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Compression is not set")); - } - return nil -} - -func (p *BloomFilterHeader) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.NumBytes = v -} - return nil -} - -func (p *BloomFilterHeader) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - p.Algorithm = &BloomFilterAlgorithm{} - if err := p.Algorithm.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Algorithm), err) - } - return nil -} - -func (p *BloomFilterHeader) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - p.Hash = &BloomFilterHash{} - if err := p.Hash.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Hash), err) - } - return nil -} - -func (p *BloomFilterHeader) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { - p.Compression = &BloomFilterCompression{} - if err := p.Compression.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Compression), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetNumBytes bool = false + var issetAlgorithm bool = false + var issetHash bool = false + var issetCompression bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.I32 { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetNumBytes = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetAlgorithm = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + issetHash = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 4: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField4(ctx, iprot); err != nil { + return err + } + issetCompression = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetNumBytes { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumBytes is not set")) + } + if !issetAlgorithm { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Algorithm is not set")) + } + if !issetHash { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Hash is not set")) + } + if !issetCompression { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Compression is not set")) + } + return nil +} + +func (p *BloomFilterHeader) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.NumBytes = v + } + return nil +} + +func (p *BloomFilterHeader) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + p.Algorithm = &BloomFilterAlgorithm{} + if err := p.Algorithm.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Algorithm), err) + } + return nil +} + +func (p *BloomFilterHeader) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + p.Hash = &BloomFilterHash{} + if err := p.Hash.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Hash), err) + } + return nil +} + +func (p *BloomFilterHeader) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { + p.Compression = &BloomFilterCompression{} + if err := p.Compression.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Compression), err) + } + return nil } func (p *BloomFilterHeader) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "BloomFilterHeader"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - if err := p.writeField4(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "BloomFilterHeader"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + if err := p.writeField4(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *BloomFilterHeader) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "numBytes", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:numBytes: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.NumBytes)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.numBytes (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:numBytes: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "numBytes", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:numBytes: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.NumBytes)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.numBytes (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:numBytes: ", p), err) + } + return err } func (p *BloomFilterHeader) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "algorithm", thrift.STRUCT, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:algorithm: ", p), err) } - if err := p.Algorithm.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Algorithm), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:algorithm: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "algorithm", thrift.STRUCT, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:algorithm: ", p), err) + } + if err := p.Algorithm.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Algorithm), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:algorithm: ", p), err) + } + return err } func (p *BloomFilterHeader) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "hash", thrift.STRUCT, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:hash: ", p), err) } - if err := p.Hash.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Hash), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:hash: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "hash", thrift.STRUCT, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:hash: ", p), err) + } + if err := p.Hash.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Hash), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:hash: ", p), err) + } + return err } func (p *BloomFilterHeader) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "compression", thrift.STRUCT, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:compression: ", p), err) } - if err := p.Compression.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Compression), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:compression: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "compression", thrift.STRUCT, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:compression: ", p), err) + } + if err := p.Compression.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Compression), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:compression: ", p), err) + } + return err } func (p *BloomFilterHeader) Equals(other *BloomFilterHeader) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.NumBytes != other.NumBytes { return false } - if !p.Algorithm.Equals(other.Algorithm) { return false } - if !p.Hash.Equals(other.Hash) { return false } - if !p.Compression.Equals(other.Compression) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.NumBytes != other.NumBytes { + return false + } + if !p.Algorithm.Equals(other.Algorithm) { + return false + } + if !p.Hash.Equals(other.Hash) { + return false + } + if !p.Compression.Equals(other.Compression) { + return false + } + return true } func (p *BloomFilterHeader) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("BloomFilterHeader(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("BloomFilterHeader(%+v)", *p) } func (p *BloomFilterHeader) Validate() error { - return nil + return nil } + // Attributes: -// - Type: the type of the page: indicates which of the *_header fields is set * -// - UncompressedPageSize: Uncompressed page size in bytes (not including this header) * -// - CompressedPageSize: Compressed (and potentially encrypted) page size in bytes, not including this header * -// - Crc: The 32-bit CRC checksum for the page, to be be calculated as follows: -// -// - The standard CRC32 algorithm is used (with polynomial 0x04C11DB7, -// the same as in e.g. GZip). -// - All page types can have a CRC (v1 and v2 data pages, dictionary pages, -// etc.). -// - The CRC is computed on the serialization binary representation of the page -// (as written to disk), excluding the page header. For example, for v1 -// data pages, the CRC is computed on the concatenation of repetition levels, -// definition levels and column values (optionally compressed, optionally -// encrypted). -// - The CRC computation therefore takes place after any compression -// and encryption steps, if any. -// +// +// - Type: the type of the page: indicates which of the *_header fields is set * +// +// - UncompressedPageSize: Uncompressed page size in bytes (not including this header) * +// +// - CompressedPageSize: Compressed (and potentially encrypted) page size in bytes, not including this header * +// +// - Crc: The 32-bit CRC checksum for the page, to be be calculated as follows: +// +// - The standard CRC32 algorithm is used (with polynomial 0x04C11DB7, +// the same as in e.g. GZip). +// +// - All page types can have a CRC (v1 and v2 data pages, dictionary pages, +// etc.). +// +// - The CRC is computed on the serialization binary representation of the page +// (as written to disk), excluding the page header. For example, for v1 +// data pages, the CRC is computed on the concatenation of repetition levels, +// definition levels and column values (optionally compressed, optionally +// encrypted). +// +// - The CRC computation therefore takes place after any compression +// and encryption steps, if any. +// // If enabled, this allows for disabling checksumming in HDFS if only a few // pages need to be read. -// - DataPageHeader -// - IndexPageHeader -// - DictionaryPageHeader -// - DataPageHeaderV2 +// - DataPageHeader +// - IndexPageHeader +// - DictionaryPageHeader +// - DataPageHeaderV2 type PageHeader struct { - Type PageType `thrift:"type,1,required" db:"type" json:"type"` - UncompressedPageSize int32 `thrift:"uncompressed_page_size,2,required" db:"uncompressed_page_size" json:"uncompressed_page_size"` - CompressedPageSize int32 `thrift:"compressed_page_size,3,required" db:"compressed_page_size" json:"compressed_page_size"` - Crc *int32 `thrift:"crc,4" db:"crc" json:"crc,omitempty"` - DataPageHeader *DataPageHeader `thrift:"data_page_header,5" db:"data_page_header" json:"data_page_header,omitempty"` - IndexPageHeader *IndexPageHeader `thrift:"index_page_header,6" db:"index_page_header" json:"index_page_header,omitempty"` - DictionaryPageHeader *DictionaryPageHeader `thrift:"dictionary_page_header,7" db:"dictionary_page_header" json:"dictionary_page_header,omitempty"` - DataPageHeaderV2 *DataPageHeaderV2 `thrift:"data_page_header_v2,8" db:"data_page_header_v2" json:"data_page_header_v2,omitempty"` + Type PageType `thrift:"type,1,required" db:"type" json:"type"` + UncompressedPageSize int32 `thrift:"uncompressed_page_size,2,required" db:"uncompressed_page_size" json:"uncompressed_page_size"` + CompressedPageSize int32 `thrift:"compressed_page_size,3,required" db:"compressed_page_size" json:"compressed_page_size"` + Crc *int32 `thrift:"crc,4" db:"crc" json:"crc,omitempty"` + DataPageHeader *DataPageHeader `thrift:"data_page_header,5" db:"data_page_header" json:"data_page_header,omitempty"` + IndexPageHeader *IndexPageHeader `thrift:"index_page_header,6" db:"index_page_header" json:"index_page_header,omitempty"` + DictionaryPageHeader *DictionaryPageHeader `thrift:"dictionary_page_header,7" db:"dictionary_page_header" json:"dictionary_page_header,omitempty"` + DataPageHeaderV2 *DataPageHeaderV2 `thrift:"data_page_header_v2,8" db:"data_page_header_v2" json:"data_page_header_v2,omitempty"` } func NewPageHeader() *PageHeader { - return &PageHeader{} + return &PageHeader{} } - func (p *PageHeader) GetType() PageType { - return p.Type + return p.Type } func (p *PageHeader) GetUncompressedPageSize() int32 { - return p.UncompressedPageSize + return p.UncompressedPageSize } func (p *PageHeader) GetCompressedPageSize() int32 { - return p.CompressedPageSize + return p.CompressedPageSize } + var PageHeader_Crc_DEFAULT int32 + func (p *PageHeader) GetCrc() int32 { - if !p.IsSetCrc() { - return PageHeader_Crc_DEFAULT - } -return *p.Crc + if !p.IsSetCrc() { + return PageHeader_Crc_DEFAULT + } + return *p.Crc } + var PageHeader_DataPageHeader_DEFAULT *DataPageHeader + func (p *PageHeader) GetDataPageHeader() *DataPageHeader { - if !p.IsSetDataPageHeader() { - return PageHeader_DataPageHeader_DEFAULT - } -return p.DataPageHeader + if !p.IsSetDataPageHeader() { + return PageHeader_DataPageHeader_DEFAULT + } + return p.DataPageHeader } + var PageHeader_IndexPageHeader_DEFAULT *IndexPageHeader + func (p *PageHeader) GetIndexPageHeader() *IndexPageHeader { - if !p.IsSetIndexPageHeader() { - return PageHeader_IndexPageHeader_DEFAULT - } -return p.IndexPageHeader + if !p.IsSetIndexPageHeader() { + return PageHeader_IndexPageHeader_DEFAULT + } + return p.IndexPageHeader } + var PageHeader_DictionaryPageHeader_DEFAULT *DictionaryPageHeader + func (p *PageHeader) GetDictionaryPageHeader() *DictionaryPageHeader { - if !p.IsSetDictionaryPageHeader() { - return PageHeader_DictionaryPageHeader_DEFAULT - } -return p.DictionaryPageHeader + if !p.IsSetDictionaryPageHeader() { + return PageHeader_DictionaryPageHeader_DEFAULT + } + return p.DictionaryPageHeader } + var PageHeader_DataPageHeaderV2_DEFAULT *DataPageHeaderV2 + func (p *PageHeader) GetDataPageHeaderV2() *DataPageHeaderV2 { - if !p.IsSetDataPageHeaderV2() { - return PageHeader_DataPageHeaderV2_DEFAULT - } -return p.DataPageHeaderV2 + if !p.IsSetDataPageHeaderV2() { + return PageHeader_DataPageHeaderV2_DEFAULT + } + return p.DataPageHeaderV2 } func (p *PageHeader) IsSetCrc() bool { - return p.Crc != nil + return p.Crc != nil } func (p *PageHeader) IsSetDataPageHeader() bool { - return p.DataPageHeader != nil + return p.DataPageHeader != nil } func (p *PageHeader) IsSetIndexPageHeader() bool { - return p.IndexPageHeader != nil + return p.IndexPageHeader != nil } func (p *PageHeader) IsSetDictionaryPageHeader() bool { - return p.DictionaryPageHeader != nil + return p.DictionaryPageHeader != nil } func (p *PageHeader) IsSetDataPageHeaderV2() bool { - return p.DataPageHeaderV2 != nil + return p.DataPageHeaderV2 != nil } func (p *PageHeader) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetType bool = false; - var issetUncompressedPageSize bool = false; - var issetCompressedPageSize bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.I32 { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetType = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.I32 { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetUncompressedPageSize = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.I32 { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - issetCompressedPageSize = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 4: - if fieldTypeId == thrift.I32 { - if err := p.ReadField4(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 5: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField5(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 6: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField6(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 7: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField7(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 8: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField8(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetType{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Type is not set")); - } - if !issetUncompressedPageSize{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field UncompressedPageSize is not set")); - } - if !issetCompressedPageSize{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field CompressedPageSize is not set")); - } - return nil -} - -func (p *PageHeader) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - temp := PageType(v) - p.Type = temp -} - return nil -} - -func (p *PageHeader) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.UncompressedPageSize = v -} - return nil -} - -func (p *PageHeader) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 3: ", err) -} else { - p.CompressedPageSize = v -} - return nil -} - -func (p *PageHeader) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 4: ", err) -} else { - p.Crc = &v -} - return nil -} - -func (p *PageHeader) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { - p.DataPageHeader = &DataPageHeader{} - if err := p.DataPageHeader.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DataPageHeader), err) - } - return nil -} - -func (p *PageHeader) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { - p.IndexPageHeader = &IndexPageHeader{} - if err := p.IndexPageHeader.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.IndexPageHeader), err) - } - return nil -} - -func (p *PageHeader) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { - p.DictionaryPageHeader = &DictionaryPageHeader{} - if err := p.DictionaryPageHeader.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DictionaryPageHeader), err) - } - return nil -} - -func (p *PageHeader) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { - p.DataPageHeaderV2 = &DataPageHeaderV2{ - IsCompressed: true, -} - if err := p.DataPageHeaderV2.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DataPageHeaderV2), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetType bool = false + var issetUncompressedPageSize bool = false + var issetCompressedPageSize bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.I32 { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetType = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.I32 { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetUncompressedPageSize = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.I32 { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + issetCompressedPageSize = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 4: + if fieldTypeId == thrift.I32 { + if err := p.ReadField4(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 5: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField5(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 6: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField6(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 7: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField7(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 8: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField8(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetType { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Type is not set")) + } + if !issetUncompressedPageSize { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field UncompressedPageSize is not set")) + } + if !issetCompressedPageSize { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field CompressedPageSize is not set")) + } + return nil +} + +func (p *PageHeader) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + temp := PageType(v) + p.Type = temp + } + return nil +} + +func (p *PageHeader) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.UncompressedPageSize = v + } + return nil +} + +func (p *PageHeader) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.CompressedPageSize = v + } + return nil +} + +func (p *PageHeader) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 4: ", err) + } else { + p.Crc = &v + } + return nil +} + +func (p *PageHeader) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { + p.DataPageHeader = &DataPageHeader{} + if err := p.DataPageHeader.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DataPageHeader), err) + } + return nil +} + +func (p *PageHeader) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { + p.IndexPageHeader = &IndexPageHeader{} + if err := p.IndexPageHeader.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.IndexPageHeader), err) + } + return nil +} + +func (p *PageHeader) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { + p.DictionaryPageHeader = &DictionaryPageHeader{} + if err := p.DictionaryPageHeader.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DictionaryPageHeader), err) + } + return nil +} + +func (p *PageHeader) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { + p.DataPageHeaderV2 = &DataPageHeaderV2{ + IsCompressed: true, + } + if err := p.DataPageHeaderV2.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DataPageHeaderV2), err) + } + return nil } func (p *PageHeader) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "PageHeader"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - if err := p.writeField4(ctx, oprot); err != nil { return err } - if err := p.writeField5(ctx, oprot); err != nil { return err } - if err := p.writeField6(ctx, oprot); err != nil { return err } - if err := p.writeField7(ctx, oprot); err != nil { return err } - if err := p.writeField8(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "PageHeader"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + if err := p.writeField4(ctx, oprot); err != nil { + return err + } + if err := p.writeField5(ctx, oprot); err != nil { + return err + } + if err := p.writeField6(ctx, oprot); err != nil { + return err + } + if err := p.writeField7(ctx, oprot); err != nil { + return err + } + if err := p.writeField8(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *PageHeader) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "type", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:type: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.Type)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.type (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:type: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "type", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:type: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.Type)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.type (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:type: ", p), err) + } + return err } func (p *PageHeader) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "uncompressed_page_size", thrift.I32, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:uncompressed_page_size: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.UncompressedPageSize)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.uncompressed_page_size (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:uncompressed_page_size: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "uncompressed_page_size", thrift.I32, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:uncompressed_page_size: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.UncompressedPageSize)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.uncompressed_page_size (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:uncompressed_page_size: ", p), err) + } + return err } func (p *PageHeader) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "compressed_page_size", thrift.I32, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:compressed_page_size: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.CompressedPageSize)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.compressed_page_size (3) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:compressed_page_size: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "compressed_page_size", thrift.I32, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:compressed_page_size: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.CompressedPageSize)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.compressed_page_size (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:compressed_page_size: ", p), err) + } + return err } func (p *PageHeader) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetCrc() { - if err := oprot.WriteFieldBegin(ctx, "crc", thrift.I32, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:crc: ", p), err) } - if err := oprot.WriteI32(ctx, int32(*p.Crc)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.crc (4) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:crc: ", p), err) } - } - return err + if p.IsSetCrc() { + if err := oprot.WriteFieldBegin(ctx, "crc", thrift.I32, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:crc: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(*p.Crc)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.crc (4) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:crc: ", p), err) + } + } + return err } func (p *PageHeader) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetDataPageHeader() { - if err := oprot.WriteFieldBegin(ctx, "data_page_header", thrift.STRUCT, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:data_page_header: ", p), err) } - if err := p.DataPageHeader.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DataPageHeader), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:data_page_header: ", p), err) } - } - return err + if p.IsSetDataPageHeader() { + if err := oprot.WriteFieldBegin(ctx, "data_page_header", thrift.STRUCT, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:data_page_header: ", p), err) + } + if err := p.DataPageHeader.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DataPageHeader), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:data_page_header: ", p), err) + } + } + return err } func (p *PageHeader) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetIndexPageHeader() { - if err := oprot.WriteFieldBegin(ctx, "index_page_header", thrift.STRUCT, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:index_page_header: ", p), err) } - if err := p.IndexPageHeader.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.IndexPageHeader), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:index_page_header: ", p), err) } - } - return err + if p.IsSetIndexPageHeader() { + if err := oprot.WriteFieldBegin(ctx, "index_page_header", thrift.STRUCT, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:index_page_header: ", p), err) + } + if err := p.IndexPageHeader.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.IndexPageHeader), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:index_page_header: ", p), err) + } + } + return err } func (p *PageHeader) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetDictionaryPageHeader() { - if err := oprot.WriteFieldBegin(ctx, "dictionary_page_header", thrift.STRUCT, 7); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:dictionary_page_header: ", p), err) } - if err := p.DictionaryPageHeader.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DictionaryPageHeader), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 7:dictionary_page_header: ", p), err) } - } - return err + if p.IsSetDictionaryPageHeader() { + if err := oprot.WriteFieldBegin(ctx, "dictionary_page_header", thrift.STRUCT, 7); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:dictionary_page_header: ", p), err) + } + if err := p.DictionaryPageHeader.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DictionaryPageHeader), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 7:dictionary_page_header: ", p), err) + } + } + return err } func (p *PageHeader) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetDataPageHeaderV2() { - if err := oprot.WriteFieldBegin(ctx, "data_page_header_v2", thrift.STRUCT, 8); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:data_page_header_v2: ", p), err) } - if err := p.DataPageHeaderV2.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DataPageHeaderV2), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 8:data_page_header_v2: ", p), err) } - } - return err + if p.IsSetDataPageHeaderV2() { + if err := oprot.WriteFieldBegin(ctx, "data_page_header_v2", thrift.STRUCT, 8); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:data_page_header_v2: ", p), err) + } + if err := p.DataPageHeaderV2.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DataPageHeaderV2), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 8:data_page_header_v2: ", p), err) + } + } + return err } func (p *PageHeader) Equals(other *PageHeader) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.Type != other.Type { return false } - if p.UncompressedPageSize != other.UncompressedPageSize { return false } - if p.CompressedPageSize != other.CompressedPageSize { return false } - if p.Crc != other.Crc { - if p.Crc == nil || other.Crc == nil { - return false - } - if (*p.Crc) != (*other.Crc) { return false } - } - if !p.DataPageHeader.Equals(other.DataPageHeader) { return false } - if !p.IndexPageHeader.Equals(other.IndexPageHeader) { return false } - if !p.DictionaryPageHeader.Equals(other.DictionaryPageHeader) { return false } - if !p.DataPageHeaderV2.Equals(other.DataPageHeaderV2) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.Type != other.Type { + return false + } + if p.UncompressedPageSize != other.UncompressedPageSize { + return false + } + if p.CompressedPageSize != other.CompressedPageSize { + return false + } + if p.Crc != other.Crc { + if p.Crc == nil || other.Crc == nil { + return false + } + if (*p.Crc) != (*other.Crc) { + return false + } + } + if !p.DataPageHeader.Equals(other.DataPageHeader) { + return false + } + if !p.IndexPageHeader.Equals(other.IndexPageHeader) { + return false + } + if !p.DictionaryPageHeader.Equals(other.DictionaryPageHeader) { + return false + } + if !p.DataPageHeaderV2.Equals(other.DataPageHeaderV2) { + return false + } + return true } func (p *PageHeader) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("PageHeader(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("PageHeader(%+v)", *p) } func (p *PageHeader) Validate() error { - return nil + return nil } + // Wrapper struct to store key values -// +// // Attributes: -// - Key -// - Value +// - Key +// - Value type KeyValue struct { - Key string `thrift:"key,1,required" db:"key" json:"key"` - Value *string `thrift:"value,2" db:"value" json:"value,omitempty"` + Key string `thrift:"key,1,required" db:"key" json:"key"` + Value *string `thrift:"value,2" db:"value" json:"value,omitempty"` } func NewKeyValue() *KeyValue { - return &KeyValue{} + return &KeyValue{} } - func (p *KeyValue) GetKey() string { - return p.Key + return p.Key } + var KeyValue_Value_DEFAULT string + func (p *KeyValue) GetValue() string { - if !p.IsSetValue() { - return KeyValue_Value_DEFAULT - } -return *p.Value + if !p.IsSetValue() { + return KeyValue_Value_DEFAULT + } + return *p.Value } func (p *KeyValue) IsSetValue() bool { - return p.Value != nil + return p.Value != nil } func (p *KeyValue) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetKey bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRING { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetKey = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.STRING { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetKey{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Key is not set")); - } - return nil -} - -func (p *KeyValue) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadString(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.Key = v -} - return nil -} - -func (p *KeyValue) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadString(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.Value = &v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetKey bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRING { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetKey = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.STRING { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetKey { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Key is not set")) + } + return nil +} + +func (p *KeyValue) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadString(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.Key = v + } + return nil +} + +func (p *KeyValue) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadString(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.Value = &v + } + return nil } func (p *KeyValue) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "KeyValue"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "KeyValue"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *KeyValue) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "key", thrift.STRING, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:key: ", p), err) } - if err := oprot.WriteString(ctx, string(p.Key)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.key (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:key: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "key", thrift.STRING, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:key: ", p), err) + } + if err := oprot.WriteString(ctx, string(p.Key)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.key (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:key: ", p), err) + } + return err } func (p *KeyValue) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetValue() { - if err := oprot.WriteFieldBegin(ctx, "value", thrift.STRING, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:value: ", p), err) } - if err := oprot.WriteString(ctx, string(*p.Value)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.value (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:value: ", p), err) } - } - return err + if p.IsSetValue() { + if err := oprot.WriteFieldBegin(ctx, "value", thrift.STRING, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:value: ", p), err) + } + if err := oprot.WriteString(ctx, string(*p.Value)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.value (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:value: ", p), err) + } + } + return err } func (p *KeyValue) Equals(other *KeyValue) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.Key != other.Key { return false } - if p.Value != other.Value { - if p.Value == nil || other.Value == nil { - return false - } - if (*p.Value) != (*other.Value) { return false } - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.Key != other.Key { + return false + } + if p.Value != other.Value { + if p.Value == nil || other.Value == nil { + return false + } + if (*p.Value) != (*other.Value) { + return false + } + } + return true } func (p *KeyValue) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("KeyValue(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("KeyValue(%+v)", *p) } func (p *KeyValue) Validate() error { - return nil + return nil } + // Wrapper struct to specify sort order -// +// // Attributes: -// - ColumnIdx: The column index (in this row group) * -// - Descending: If true, indicates this column is sorted in descending order. * -// - NullsFirst: If true, nulls will come before non-null values, otherwise, +// - ColumnIdx: The column index (in this row group) * +// - Descending: If true, indicates this column is sorted in descending order. * +// - NullsFirst: If true, nulls will come before non-null values, otherwise, +// // nulls go at the end. type SortingColumn struct { - ColumnIdx int32 `thrift:"column_idx,1,required" db:"column_idx" json:"column_idx"` - Descending bool `thrift:"descending,2,required" db:"descending" json:"descending"` - NullsFirst bool `thrift:"nulls_first,3,required" db:"nulls_first" json:"nulls_first"` + ColumnIdx int32 `thrift:"column_idx,1,required" db:"column_idx" json:"column_idx"` + Descending bool `thrift:"descending,2,required" db:"descending" json:"descending"` + NullsFirst bool `thrift:"nulls_first,3,required" db:"nulls_first" json:"nulls_first"` } func NewSortingColumn() *SortingColumn { - return &SortingColumn{} + return &SortingColumn{} } - func (p *SortingColumn) GetColumnIdx() int32 { - return p.ColumnIdx + return p.ColumnIdx } func (p *SortingColumn) GetDescending() bool { - return p.Descending + return p.Descending } func (p *SortingColumn) GetNullsFirst() bool { - return p.NullsFirst + return p.NullsFirst } func (p *SortingColumn) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetColumnIdx bool = false; - var issetDescending bool = false; - var issetNullsFirst bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.I32 { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetColumnIdx = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.BOOL { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetDescending = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.BOOL { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - issetNullsFirst = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetColumnIdx{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field ColumnIdx is not set")); - } - if !issetDescending{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Descending is not set")); - } - if !issetNullsFirst{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NullsFirst is not set")); - } - return nil -} - -func (p *SortingColumn) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.ColumnIdx = v -} - return nil -} - -func (p *SortingColumn) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBool(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.Descending = v -} - return nil -} - -func (p *SortingColumn) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBool(ctx); err != nil { - return thrift.PrependError("error reading field 3: ", err) -} else { - p.NullsFirst = v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetColumnIdx bool = false + var issetDescending bool = false + var issetNullsFirst bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.I32 { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetColumnIdx = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.BOOL { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetDescending = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.BOOL { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + issetNullsFirst = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetColumnIdx { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field ColumnIdx is not set")) + } + if !issetDescending { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Descending is not set")) + } + if !issetNullsFirst { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NullsFirst is not set")) + } + return nil +} + +func (p *SortingColumn) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.ColumnIdx = v + } + return nil +} + +func (p *SortingColumn) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBool(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.Descending = v + } + return nil +} + +func (p *SortingColumn) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBool(ctx); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.NullsFirst = v + } + return nil } func (p *SortingColumn) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "SortingColumn"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "SortingColumn"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *SortingColumn) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "column_idx", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:column_idx: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.ColumnIdx)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.column_idx (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:column_idx: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "column_idx", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:column_idx: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.ColumnIdx)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.column_idx (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:column_idx: ", p), err) + } + return err } func (p *SortingColumn) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "descending", thrift.BOOL, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:descending: ", p), err) } - if err := oprot.WriteBool(ctx, bool(p.Descending)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.descending (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:descending: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "descending", thrift.BOOL, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:descending: ", p), err) + } + if err := oprot.WriteBool(ctx, bool(p.Descending)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.descending (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:descending: ", p), err) + } + return err } func (p *SortingColumn) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "nulls_first", thrift.BOOL, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:nulls_first: ", p), err) } - if err := oprot.WriteBool(ctx, bool(p.NullsFirst)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.nulls_first (3) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:nulls_first: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "nulls_first", thrift.BOOL, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:nulls_first: ", p), err) + } + if err := oprot.WriteBool(ctx, bool(p.NullsFirst)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.nulls_first (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:nulls_first: ", p), err) + } + return err } func (p *SortingColumn) Equals(other *SortingColumn) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.ColumnIdx != other.ColumnIdx { return false } - if p.Descending != other.Descending { return false } - if p.NullsFirst != other.NullsFirst { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.ColumnIdx != other.ColumnIdx { + return false + } + if p.Descending != other.Descending { + return false + } + if p.NullsFirst != other.NullsFirst { + return false + } + return true } func (p *SortingColumn) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("SortingColumn(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("SortingColumn(%+v)", *p) } func (p *SortingColumn) Validate() error { - return nil + return nil } + // statistics of a given page type and encoding -// +// // Attributes: -// - PageType: the page type (data/dic/...) * -// - Encoding: encoding of the page * -// - Count: number of pages of this type with this encoding * +// - PageType: the page type (data/dic/...) * +// - Encoding: encoding of the page * +// - Count: number of pages of this type with this encoding * type PageEncodingStats struct { - PageType PageType `thrift:"page_type,1,required" db:"page_type" json:"page_type"` - Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"` - Count int32 `thrift:"count,3,required" db:"count" json:"count"` + PageType PageType `thrift:"page_type,1,required" db:"page_type" json:"page_type"` + Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"` + Count int32 `thrift:"count,3,required" db:"count" json:"count"` } func NewPageEncodingStats() *PageEncodingStats { - return &PageEncodingStats{} + return &PageEncodingStats{} } - func (p *PageEncodingStats) GetPageType() PageType { - return p.PageType + return p.PageType } func (p *PageEncodingStats) GetEncoding() Encoding { - return p.Encoding + return p.Encoding } func (p *PageEncodingStats) GetCount() int32 { - return p.Count + return p.Count } func (p *PageEncodingStats) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetPageType bool = false; - var issetEncoding bool = false; - var issetCount bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.I32 { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetPageType = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.I32 { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetEncoding = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.I32 { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - issetCount = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetPageType{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PageType is not set")); - } - if !issetEncoding{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set")); - } - if !issetCount{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Count is not set")); - } - return nil -} - -func (p *PageEncodingStats) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - temp := PageType(v) - p.PageType = temp -} - return nil -} - -func (p *PageEncodingStats) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - temp := Encoding(v) - p.Encoding = temp -} - return nil -} - -func (p *PageEncodingStats) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 3: ", err) -} else { - p.Count = v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetPageType bool = false + var issetEncoding bool = false + var issetCount bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.I32 { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetPageType = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.I32 { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetEncoding = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.I32 { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + issetCount = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetPageType { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PageType is not set")) + } + if !issetEncoding { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set")) + } + if !issetCount { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Count is not set")) + } + return nil +} + +func (p *PageEncodingStats) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + temp := PageType(v) + p.PageType = temp + } + return nil +} + +func (p *PageEncodingStats) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + temp := Encoding(v) + p.Encoding = temp + } + return nil +} + +func (p *PageEncodingStats) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.Count = v + } + return nil } func (p *PageEncodingStats) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "PageEncodingStats"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "PageEncodingStats"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *PageEncodingStats) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "page_type", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:page_type: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.PageType)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.page_type (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:page_type: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "page_type", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:page_type: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.PageType)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.page_type (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:page_type: ", p), err) + } + return err } func (p *PageEncodingStats) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "encoding", thrift.I32, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encoding: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.Encoding)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.encoding (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encoding: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "encoding", thrift.I32, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encoding: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.Encoding)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.encoding (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encoding: ", p), err) + } + return err } func (p *PageEncodingStats) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "count", thrift.I32, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:count: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.Count)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.count (3) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:count: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "count", thrift.I32, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:count: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.Count)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.count (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:count: ", p), err) + } + return err } func (p *PageEncodingStats) Equals(other *PageEncodingStats) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.PageType != other.PageType { return false } - if p.Encoding != other.Encoding { return false } - if p.Count != other.Count { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.PageType != other.PageType { + return false + } + if p.Encoding != other.Encoding { + return false + } + if p.Count != other.Count { + return false + } + return true } func (p *PageEncodingStats) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("PageEncodingStats(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("PageEncodingStats(%+v)", *p) } func (p *PageEncodingStats) Validate() error { - return nil + return nil } + // Description for column metadata -// +// // Attributes: -// - Type: Type of this column * -// - Encodings: Set of all encodings used for this column. The purpose is to validate +// - Type: Type of this column * +// - Encodings: Set of all encodings used for this column. The purpose is to validate +// // whether we can decode those pages. * -// - PathInSchema: Path in schema * -// - Codec: Compression codec * -// - NumValues: Number of values in this column * -// - TotalUncompressedSize: total byte size of all uncompressed pages in this column chunk (including the headers) * -// - TotalCompressedSize: total byte size of all compressed, and potentially encrypted, pages +// - PathInSchema: Path in schema * +// - Codec: Compression codec * +// - NumValues: Number of values in this column * +// - TotalUncompressedSize: total byte size of all uncompressed pages in this column chunk (including the headers) * +// - TotalCompressedSize: total byte size of all compressed, and potentially encrypted, pages +// // in this column chunk (including the headers) * -// - KeyValueMetadata: Optional key/value metadata * -// - DataPageOffset: Byte offset from beginning of file to first data page * -// - IndexPageOffset: Byte offset from beginning of file to root index page * -// - DictionaryPageOffset: Byte offset from the beginning of file to first (only) dictionary page * -// - Statistics: optional statistics for this column chunk -// - EncodingStats: Set of all encodings used for pages in this column chunk. +// - KeyValueMetadata: Optional key/value metadata * +// - DataPageOffset: Byte offset from beginning of file to first data page * +// - IndexPageOffset: Byte offset from beginning of file to root index page * +// - DictionaryPageOffset: Byte offset from the beginning of file to first (only) dictionary page * +// - Statistics: optional statistics for this column chunk +// - EncodingStats: Set of all encodings used for pages in this column chunk. +// // This information can be used to determine if all data pages are // dictionary encoded for example * -// - BloomFilterOffset: Byte offset from beginning of file to Bloom filter data. * +// - BloomFilterOffset: Byte offset from beginning of file to Bloom filter data. * type ColumnMetaData struct { - Type Type `thrift:"type,1,required" db:"type" json:"type"` - Encodings []Encoding `thrift:"encodings,2,required" db:"encodings" json:"encodings"` - PathInSchema []string `thrift:"path_in_schema,3,required" db:"path_in_schema" json:"path_in_schema"` - Codec CompressionCodec `thrift:"codec,4,required" db:"codec" json:"codec"` - NumValues int64 `thrift:"num_values,5,required" db:"num_values" json:"num_values"` - TotalUncompressedSize int64 `thrift:"total_uncompressed_size,6,required" db:"total_uncompressed_size" json:"total_uncompressed_size"` - TotalCompressedSize int64 `thrift:"total_compressed_size,7,required" db:"total_compressed_size" json:"total_compressed_size"` - KeyValueMetadata []*KeyValue `thrift:"key_value_metadata,8" db:"key_value_metadata" json:"key_value_metadata,omitempty"` - DataPageOffset int64 `thrift:"data_page_offset,9,required" db:"data_page_offset" json:"data_page_offset"` - IndexPageOffset *int64 `thrift:"index_page_offset,10" db:"index_page_offset" json:"index_page_offset,omitempty"` - DictionaryPageOffset *int64 `thrift:"dictionary_page_offset,11" db:"dictionary_page_offset" json:"dictionary_page_offset,omitempty"` - Statistics *Statistics `thrift:"statistics,12" db:"statistics" json:"statistics,omitempty"` - EncodingStats []*PageEncodingStats `thrift:"encoding_stats,13" db:"encoding_stats" json:"encoding_stats,omitempty"` - BloomFilterOffset *int64 `thrift:"bloom_filter_offset,14" db:"bloom_filter_offset" json:"bloom_filter_offset,omitempty"` + Type Type `thrift:"type,1,required" db:"type" json:"type"` + Encodings []Encoding `thrift:"encodings,2,required" db:"encodings" json:"encodings"` + PathInSchema []string `thrift:"path_in_schema,3,required" db:"path_in_schema" json:"path_in_schema"` + Codec CompressionCodec `thrift:"codec,4,required" db:"codec" json:"codec"` + NumValues int64 `thrift:"num_values,5,required" db:"num_values" json:"num_values"` + TotalUncompressedSize int64 `thrift:"total_uncompressed_size,6,required" db:"total_uncompressed_size" json:"total_uncompressed_size"` + TotalCompressedSize int64 `thrift:"total_compressed_size,7,required" db:"total_compressed_size" json:"total_compressed_size"` + KeyValueMetadata []*KeyValue `thrift:"key_value_metadata,8" db:"key_value_metadata" json:"key_value_metadata,omitempty"` + DataPageOffset int64 `thrift:"data_page_offset,9,required" db:"data_page_offset" json:"data_page_offset"` + IndexPageOffset *int64 `thrift:"index_page_offset,10" db:"index_page_offset" json:"index_page_offset,omitempty"` + DictionaryPageOffset *int64 `thrift:"dictionary_page_offset,11" db:"dictionary_page_offset" json:"dictionary_page_offset,omitempty"` + Statistics *Statistics `thrift:"statistics,12" db:"statistics" json:"statistics,omitempty"` + EncodingStats []*PageEncodingStats `thrift:"encoding_stats,13" db:"encoding_stats" json:"encoding_stats,omitempty"` + BloomFilterOffset *int64 `thrift:"bloom_filter_offset,14" db:"bloom_filter_offset" json:"bloom_filter_offset,omitempty"` } func NewColumnMetaData() *ColumnMetaData { - return &ColumnMetaData{} + return &ColumnMetaData{} } - func (p *ColumnMetaData) GetType() Type { - return p.Type + return p.Type } func (p *ColumnMetaData) GetEncodings() []Encoding { - return p.Encodings + return p.Encodings } func (p *ColumnMetaData) GetPathInSchema() []string { - return p.PathInSchema + return p.PathInSchema } func (p *ColumnMetaData) GetCodec() CompressionCodec { - return p.Codec + return p.Codec } func (p *ColumnMetaData) GetNumValues() int64 { - return p.NumValues + return p.NumValues } func (p *ColumnMetaData) GetTotalUncompressedSize() int64 { - return p.TotalUncompressedSize + return p.TotalUncompressedSize } func (p *ColumnMetaData) GetTotalCompressedSize() int64 { - return p.TotalCompressedSize + return p.TotalCompressedSize } + var ColumnMetaData_KeyValueMetadata_DEFAULT []*KeyValue func (p *ColumnMetaData) GetKeyValueMetadata() []*KeyValue { - return p.KeyValueMetadata + return p.KeyValueMetadata } func (p *ColumnMetaData) GetDataPageOffset() int64 { - return p.DataPageOffset + return p.DataPageOffset } + var ColumnMetaData_IndexPageOffset_DEFAULT int64 + func (p *ColumnMetaData) GetIndexPageOffset() int64 { - if !p.IsSetIndexPageOffset() { - return ColumnMetaData_IndexPageOffset_DEFAULT - } -return *p.IndexPageOffset + if !p.IsSetIndexPageOffset() { + return ColumnMetaData_IndexPageOffset_DEFAULT + } + return *p.IndexPageOffset } + var ColumnMetaData_DictionaryPageOffset_DEFAULT int64 + func (p *ColumnMetaData) GetDictionaryPageOffset() int64 { - if !p.IsSetDictionaryPageOffset() { - return ColumnMetaData_DictionaryPageOffset_DEFAULT - } -return *p.DictionaryPageOffset + if !p.IsSetDictionaryPageOffset() { + return ColumnMetaData_DictionaryPageOffset_DEFAULT + } + return *p.DictionaryPageOffset } + var ColumnMetaData_Statistics_DEFAULT *Statistics + func (p *ColumnMetaData) GetStatistics() *Statistics { - if !p.IsSetStatistics() { - return ColumnMetaData_Statistics_DEFAULT - } -return p.Statistics + if !p.IsSetStatistics() { + return ColumnMetaData_Statistics_DEFAULT + } + return p.Statistics } + var ColumnMetaData_EncodingStats_DEFAULT []*PageEncodingStats func (p *ColumnMetaData) GetEncodingStats() []*PageEncodingStats { - return p.EncodingStats + return p.EncodingStats } + var ColumnMetaData_BloomFilterOffset_DEFAULT int64 + func (p *ColumnMetaData) GetBloomFilterOffset() int64 { - if !p.IsSetBloomFilterOffset() { - return ColumnMetaData_BloomFilterOffset_DEFAULT - } -return *p.BloomFilterOffset + if !p.IsSetBloomFilterOffset() { + return ColumnMetaData_BloomFilterOffset_DEFAULT + } + return *p.BloomFilterOffset } func (p *ColumnMetaData) IsSetKeyValueMetadata() bool { - return p.KeyValueMetadata != nil + return p.KeyValueMetadata != nil } func (p *ColumnMetaData) IsSetIndexPageOffset() bool { - return p.IndexPageOffset != nil + return p.IndexPageOffset != nil } func (p *ColumnMetaData) IsSetDictionaryPageOffset() bool { - return p.DictionaryPageOffset != nil + return p.DictionaryPageOffset != nil } func (p *ColumnMetaData) IsSetStatistics() bool { - return p.Statistics != nil + return p.Statistics != nil } func (p *ColumnMetaData) IsSetEncodingStats() bool { - return p.EncodingStats != nil + return p.EncodingStats != nil } func (p *ColumnMetaData) IsSetBloomFilterOffset() bool { - return p.BloomFilterOffset != nil + return p.BloomFilterOffset != nil } func (p *ColumnMetaData) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetType bool = false; - var issetEncodings bool = false; - var issetPathInSchema bool = false; - var issetCodec bool = false; - var issetNumValues bool = false; - var issetTotalUncompressedSize bool = false; - var issetTotalCompressedSize bool = false; - var issetDataPageOffset bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.I32 { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetType = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.LIST { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetEncodings = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.LIST { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - issetPathInSchema = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 4: - if fieldTypeId == thrift.I32 { - if err := p.ReadField4(ctx, iprot); err != nil { - return err - } - issetCodec = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 5: - if fieldTypeId == thrift.I64 { - if err := p.ReadField5(ctx, iprot); err != nil { - return err - } - issetNumValues = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 6: - if fieldTypeId == thrift.I64 { - if err := p.ReadField6(ctx, iprot); err != nil { - return err - } - issetTotalUncompressedSize = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 7: - if fieldTypeId == thrift.I64 { - if err := p.ReadField7(ctx, iprot); err != nil { - return err - } - issetTotalCompressedSize = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 8: - if fieldTypeId == thrift.LIST { - if err := p.ReadField8(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 9: - if fieldTypeId == thrift.I64 { - if err := p.ReadField9(ctx, iprot); err != nil { - return err - } - issetDataPageOffset = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 10: - if fieldTypeId == thrift.I64 { - if err := p.ReadField10(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 11: - if fieldTypeId == thrift.I64 { - if err := p.ReadField11(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 12: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField12(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 13: - if fieldTypeId == thrift.LIST { - if err := p.ReadField13(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 14: - if fieldTypeId == thrift.I64 { - if err := p.ReadField14(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetType{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Type is not set")); - } - if !issetEncodings{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encodings is not set")); - } - if !issetPathInSchema{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PathInSchema is not set")); - } - if !issetCodec{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Codec is not set")); - } - if !issetNumValues{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set")); - } - if !issetTotalUncompressedSize{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field TotalUncompressedSize is not set")); - } - if !issetTotalCompressedSize{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field TotalCompressedSize is not set")); - } - if !issetDataPageOffset{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field DataPageOffset is not set")); - } - return nil -} - -func (p *ColumnMetaData) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - temp := Type(v) - p.Type = temp -} - return nil -} - -func (p *ColumnMetaData) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]Encoding, 0, size) - p.Encodings = tSlice - for i := 0; i < size; i ++ { -var _elem0 Encoding - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 0: ", err) -} else { - temp := Encoding(v) - _elem0 = temp -} - p.Encodings = append(p.Encodings, _elem0) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *ColumnMetaData) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]string, 0, size) - p.PathInSchema = tSlice - for i := 0; i < size; i ++ { -var _elem1 string - if v, err := iprot.ReadString(ctx); err != nil { - return thrift.PrependError("error reading field 0: ", err) -} else { - _elem1 = v -} - p.PathInSchema = append(p.PathInSchema, _elem1) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *ColumnMetaData) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 4: ", err) -} else { - temp := CompressionCodec(v) - p.Codec = temp -} - return nil -} - -func (p *ColumnMetaData) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 5: ", err) -} else { - p.NumValues = v -} - return nil -} - -func (p *ColumnMetaData) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 6: ", err) -} else { - p.TotalUncompressedSize = v -} - return nil -} - -func (p *ColumnMetaData) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 7: ", err) -} else { - p.TotalCompressedSize = v -} - return nil -} - -func (p *ColumnMetaData) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*KeyValue, 0, size) - p.KeyValueMetadata = tSlice - for i := 0; i < size; i ++ { - _elem2 := &KeyValue{} - if err := _elem2.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem2), err) - } - p.KeyValueMetadata = append(p.KeyValueMetadata, _elem2) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *ColumnMetaData) ReadField9(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 9: ", err) -} else { - p.DataPageOffset = v -} - return nil -} - -func (p *ColumnMetaData) ReadField10(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 10: ", err) -} else { - p.IndexPageOffset = &v -} - return nil -} - -func (p *ColumnMetaData) ReadField11(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 11: ", err) -} else { - p.DictionaryPageOffset = &v -} - return nil -} - -func (p *ColumnMetaData) ReadField12(ctx context.Context, iprot thrift.TProtocol) error { - p.Statistics = &Statistics{} - if err := p.Statistics.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Statistics), err) - } - return nil -} - -func (p *ColumnMetaData) ReadField13(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*PageEncodingStats, 0, size) - p.EncodingStats = tSlice - for i := 0; i < size; i ++ { - _elem3 := &PageEncodingStats{} - if err := _elem3.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem3), err) - } - p.EncodingStats = append(p.EncodingStats, _elem3) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *ColumnMetaData) ReadField14(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 14: ", err) -} else { - p.BloomFilterOffset = &v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetType bool = false + var issetEncodings bool = false + var issetPathInSchema bool = false + var issetCodec bool = false + var issetNumValues bool = false + var issetTotalUncompressedSize bool = false + var issetTotalCompressedSize bool = false + var issetDataPageOffset bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.I32 { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetType = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.LIST { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetEncodings = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.LIST { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + issetPathInSchema = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 4: + if fieldTypeId == thrift.I32 { + if err := p.ReadField4(ctx, iprot); err != nil { + return err + } + issetCodec = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 5: + if fieldTypeId == thrift.I64 { + if err := p.ReadField5(ctx, iprot); err != nil { + return err + } + issetNumValues = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 6: + if fieldTypeId == thrift.I64 { + if err := p.ReadField6(ctx, iprot); err != nil { + return err + } + issetTotalUncompressedSize = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 7: + if fieldTypeId == thrift.I64 { + if err := p.ReadField7(ctx, iprot); err != nil { + return err + } + issetTotalCompressedSize = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 8: + if fieldTypeId == thrift.LIST { + if err := p.ReadField8(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 9: + if fieldTypeId == thrift.I64 { + if err := p.ReadField9(ctx, iprot); err != nil { + return err + } + issetDataPageOffset = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 10: + if fieldTypeId == thrift.I64 { + if err := p.ReadField10(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 11: + if fieldTypeId == thrift.I64 { + if err := p.ReadField11(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 12: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField12(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 13: + if fieldTypeId == thrift.LIST { + if err := p.ReadField13(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 14: + if fieldTypeId == thrift.I64 { + if err := p.ReadField14(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetType { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Type is not set")) + } + if !issetEncodings { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encodings is not set")) + } + if !issetPathInSchema { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PathInSchema is not set")) + } + if !issetCodec { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Codec is not set")) + } + if !issetNumValues { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set")) + } + if !issetTotalUncompressedSize { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field TotalUncompressedSize is not set")) + } + if !issetTotalCompressedSize { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field TotalCompressedSize is not set")) + } + if !issetDataPageOffset { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field DataPageOffset is not set")) + } + return nil +} + +func (p *ColumnMetaData) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + temp := Type(v) + p.Type = temp + } + return nil +} + +func (p *ColumnMetaData) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]Encoding, 0, size) + p.Encodings = tSlice + for i := 0; i < size; i++ { + var _elem0 Encoding + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 0: ", err) + } else { + temp := Encoding(v) + _elem0 = temp + } + p.Encodings = append(p.Encodings, _elem0) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *ColumnMetaData) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]string, 0, size) + p.PathInSchema = tSlice + for i := 0; i < size; i++ { + var _elem1 string + if v, err := iprot.ReadString(ctx); err != nil { + return thrift.PrependError("error reading field 0: ", err) + } else { + _elem1 = v + } + p.PathInSchema = append(p.PathInSchema, _elem1) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *ColumnMetaData) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 4: ", err) + } else { + temp := CompressionCodec(v) + p.Codec = temp + } + return nil +} + +func (p *ColumnMetaData) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 5: ", err) + } else { + p.NumValues = v + } + return nil +} + +func (p *ColumnMetaData) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 6: ", err) + } else { + p.TotalUncompressedSize = v + } + return nil +} + +func (p *ColumnMetaData) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 7: ", err) + } else { + p.TotalCompressedSize = v + } + return nil +} + +func (p *ColumnMetaData) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*KeyValue, 0, size) + p.KeyValueMetadata = tSlice + for i := 0; i < size; i++ { + _elem2 := &KeyValue{} + if err := _elem2.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem2), err) + } + p.KeyValueMetadata = append(p.KeyValueMetadata, _elem2) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *ColumnMetaData) ReadField9(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 9: ", err) + } else { + p.DataPageOffset = v + } + return nil +} + +func (p *ColumnMetaData) ReadField10(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 10: ", err) + } else { + p.IndexPageOffset = &v + } + return nil +} + +func (p *ColumnMetaData) ReadField11(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 11: ", err) + } else { + p.DictionaryPageOffset = &v + } + return nil +} + +func (p *ColumnMetaData) ReadField12(ctx context.Context, iprot thrift.TProtocol) error { + p.Statistics = &Statistics{} + if err := p.Statistics.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Statistics), err) + } + return nil +} + +func (p *ColumnMetaData) ReadField13(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*PageEncodingStats, 0, size) + p.EncodingStats = tSlice + for i := 0; i < size; i++ { + _elem3 := &PageEncodingStats{} + if err := _elem3.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem3), err) + } + p.EncodingStats = append(p.EncodingStats, _elem3) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *ColumnMetaData) ReadField14(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 14: ", err) + } else { + p.BloomFilterOffset = &v + } + return nil } func (p *ColumnMetaData) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "ColumnMetaData"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - if err := p.writeField4(ctx, oprot); err != nil { return err } - if err := p.writeField5(ctx, oprot); err != nil { return err } - if err := p.writeField6(ctx, oprot); err != nil { return err } - if err := p.writeField7(ctx, oprot); err != nil { return err } - if err := p.writeField8(ctx, oprot); err != nil { return err } - if err := p.writeField9(ctx, oprot); err != nil { return err } - if err := p.writeField10(ctx, oprot); err != nil { return err } - if err := p.writeField11(ctx, oprot); err != nil { return err } - if err := p.writeField12(ctx, oprot); err != nil { return err } - if err := p.writeField13(ctx, oprot); err != nil { return err } - if err := p.writeField14(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "ColumnMetaData"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + if err := p.writeField4(ctx, oprot); err != nil { + return err + } + if err := p.writeField5(ctx, oprot); err != nil { + return err + } + if err := p.writeField6(ctx, oprot); err != nil { + return err + } + if err := p.writeField7(ctx, oprot); err != nil { + return err + } + if err := p.writeField8(ctx, oprot); err != nil { + return err + } + if err := p.writeField9(ctx, oprot); err != nil { + return err + } + if err := p.writeField10(ctx, oprot); err != nil { + return err + } + if err := p.writeField11(ctx, oprot); err != nil { + return err + } + if err := p.writeField12(ctx, oprot); err != nil { + return err + } + if err := p.writeField13(ctx, oprot); err != nil { + return err + } + if err := p.writeField14(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *ColumnMetaData) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "type", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:type: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.Type)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.type (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:type: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "type", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:type: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.Type)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.type (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:type: ", p), err) + } + return err } func (p *ColumnMetaData) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "encodings", thrift.LIST, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encodings: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.I32, len(p.Encodings)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.Encodings { - if err := oprot.WriteI32(ctx, int32(v)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encodings: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "encodings", thrift.LIST, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encodings: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.I32, len(p.Encodings)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.Encodings { + if err := oprot.WriteI32(ctx, int32(v)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encodings: ", p), err) + } + return err } func (p *ColumnMetaData) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "path_in_schema", thrift.LIST, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:path_in_schema: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.STRING, len(p.PathInSchema)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.PathInSchema { - if err := oprot.WriteString(ctx, string(v)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:path_in_schema: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "path_in_schema", thrift.LIST, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:path_in_schema: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.STRING, len(p.PathInSchema)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.PathInSchema { + if err := oprot.WriteString(ctx, string(v)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:path_in_schema: ", p), err) + } + return err } func (p *ColumnMetaData) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "codec", thrift.I32, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:codec: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.Codec)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.codec (4) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:codec: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "codec", thrift.I32, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:codec: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.Codec)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.codec (4) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:codec: ", p), err) + } + return err } func (p *ColumnMetaData) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "num_values", thrift.I64, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:num_values: ", p), err) } - if err := oprot.WriteI64(ctx, int64(p.NumValues)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_values (5) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:num_values: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "num_values", thrift.I64, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:num_values: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(p.NumValues)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_values (5) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:num_values: ", p), err) + } + return err } func (p *ColumnMetaData) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "total_uncompressed_size", thrift.I64, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:total_uncompressed_size: ", p), err) } - if err := oprot.WriteI64(ctx, int64(p.TotalUncompressedSize)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.total_uncompressed_size (6) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:total_uncompressed_size: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "total_uncompressed_size", thrift.I64, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:total_uncompressed_size: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(p.TotalUncompressedSize)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.total_uncompressed_size (6) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:total_uncompressed_size: ", p), err) + } + return err } func (p *ColumnMetaData) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "total_compressed_size", thrift.I64, 7); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:total_compressed_size: ", p), err) } - if err := oprot.WriteI64(ctx, int64(p.TotalCompressedSize)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.total_compressed_size (7) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 7:total_compressed_size: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "total_compressed_size", thrift.I64, 7); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:total_compressed_size: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(p.TotalCompressedSize)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.total_compressed_size (7) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 7:total_compressed_size: ", p), err) + } + return err } func (p *ColumnMetaData) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetKeyValueMetadata() { - if err := oprot.WriteFieldBegin(ctx, "key_value_metadata", thrift.LIST, 8); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:key_value_metadata: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.KeyValueMetadata)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.KeyValueMetadata { - if err := v.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 8:key_value_metadata: ", p), err) } - } - return err + if p.IsSetKeyValueMetadata() { + if err := oprot.WriteFieldBegin(ctx, "key_value_metadata", thrift.LIST, 8); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:key_value_metadata: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.KeyValueMetadata)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.KeyValueMetadata { + if err := v.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 8:key_value_metadata: ", p), err) + } + } + return err } func (p *ColumnMetaData) writeField9(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "data_page_offset", thrift.I64, 9); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:data_page_offset: ", p), err) } - if err := oprot.WriteI64(ctx, int64(p.DataPageOffset)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.data_page_offset (9) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 9:data_page_offset: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "data_page_offset", thrift.I64, 9); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:data_page_offset: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(p.DataPageOffset)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.data_page_offset (9) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 9:data_page_offset: ", p), err) + } + return err } func (p *ColumnMetaData) writeField10(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetIndexPageOffset() { - if err := oprot.WriteFieldBegin(ctx, "index_page_offset", thrift.I64, 10); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:index_page_offset: ", p), err) } - if err := oprot.WriteI64(ctx, int64(*p.IndexPageOffset)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.index_page_offset (10) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 10:index_page_offset: ", p), err) } - } - return err + if p.IsSetIndexPageOffset() { + if err := oprot.WriteFieldBegin(ctx, "index_page_offset", thrift.I64, 10); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:index_page_offset: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(*p.IndexPageOffset)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.index_page_offset (10) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 10:index_page_offset: ", p), err) + } + } + return err } func (p *ColumnMetaData) writeField11(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetDictionaryPageOffset() { - if err := oprot.WriteFieldBegin(ctx, "dictionary_page_offset", thrift.I64, 11); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 11:dictionary_page_offset: ", p), err) } - if err := oprot.WriteI64(ctx, int64(*p.DictionaryPageOffset)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.dictionary_page_offset (11) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 11:dictionary_page_offset: ", p), err) } - } - return err + if p.IsSetDictionaryPageOffset() { + if err := oprot.WriteFieldBegin(ctx, "dictionary_page_offset", thrift.I64, 11); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 11:dictionary_page_offset: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(*p.DictionaryPageOffset)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.dictionary_page_offset (11) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 11:dictionary_page_offset: ", p), err) + } + } + return err } func (p *ColumnMetaData) writeField12(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetStatistics() { - if err := oprot.WriteFieldBegin(ctx, "statistics", thrift.STRUCT, 12); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 12:statistics: ", p), err) } - if err := p.Statistics.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Statistics), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 12:statistics: ", p), err) } - } - return err + if p.IsSetStatistics() { + if err := oprot.WriteFieldBegin(ctx, "statistics", thrift.STRUCT, 12); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 12:statistics: ", p), err) + } + if err := p.Statistics.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Statistics), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 12:statistics: ", p), err) + } + } + return err } func (p *ColumnMetaData) writeField13(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetEncodingStats() { - if err := oprot.WriteFieldBegin(ctx, "encoding_stats", thrift.LIST, 13); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 13:encoding_stats: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.EncodingStats)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.EncodingStats { - if err := v.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 13:encoding_stats: ", p), err) } - } - return err + if p.IsSetEncodingStats() { + if err := oprot.WriteFieldBegin(ctx, "encoding_stats", thrift.LIST, 13); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 13:encoding_stats: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.EncodingStats)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.EncodingStats { + if err := v.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 13:encoding_stats: ", p), err) + } + } + return err } func (p *ColumnMetaData) writeField14(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetBloomFilterOffset() { - if err := oprot.WriteFieldBegin(ctx, "bloom_filter_offset", thrift.I64, 14); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 14:bloom_filter_offset: ", p), err) } - if err := oprot.WriteI64(ctx, int64(*p.BloomFilterOffset)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.bloom_filter_offset (14) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 14:bloom_filter_offset: ", p), err) } - } - return err + if p.IsSetBloomFilterOffset() { + if err := oprot.WriteFieldBegin(ctx, "bloom_filter_offset", thrift.I64, 14); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 14:bloom_filter_offset: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(*p.BloomFilterOffset)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.bloom_filter_offset (14) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 14:bloom_filter_offset: ", p), err) + } + } + return err } func (p *ColumnMetaData) Equals(other *ColumnMetaData) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.Type != other.Type { return false } - if len(p.Encodings) != len(other.Encodings) { return false } - for i, _tgt := range p.Encodings { - _src4 := other.Encodings[i] - if _tgt != _src4 { return false } - } - if len(p.PathInSchema) != len(other.PathInSchema) { return false } - for i, _tgt := range p.PathInSchema { - _src5 := other.PathInSchema[i] - if _tgt != _src5 { return false } - } - if p.Codec != other.Codec { return false } - if p.NumValues != other.NumValues { return false } - if p.TotalUncompressedSize != other.TotalUncompressedSize { return false } - if p.TotalCompressedSize != other.TotalCompressedSize { return false } - if len(p.KeyValueMetadata) != len(other.KeyValueMetadata) { return false } - for i, _tgt := range p.KeyValueMetadata { - _src6 := other.KeyValueMetadata[i] - if !_tgt.Equals(_src6) { return false } - } - if p.DataPageOffset != other.DataPageOffset { return false } - if p.IndexPageOffset != other.IndexPageOffset { - if p.IndexPageOffset == nil || other.IndexPageOffset == nil { - return false - } - if (*p.IndexPageOffset) != (*other.IndexPageOffset) { return false } - } - if p.DictionaryPageOffset != other.DictionaryPageOffset { - if p.DictionaryPageOffset == nil || other.DictionaryPageOffset == nil { - return false - } - if (*p.DictionaryPageOffset) != (*other.DictionaryPageOffset) { return false } - } - if !p.Statistics.Equals(other.Statistics) { return false } - if len(p.EncodingStats) != len(other.EncodingStats) { return false } - for i, _tgt := range p.EncodingStats { - _src7 := other.EncodingStats[i] - if !_tgt.Equals(_src7) { return false } - } - if p.BloomFilterOffset != other.BloomFilterOffset { - if p.BloomFilterOffset == nil || other.BloomFilterOffset == nil { - return false - } - if (*p.BloomFilterOffset) != (*other.BloomFilterOffset) { return false } - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.Type != other.Type { + return false + } + if len(p.Encodings) != len(other.Encodings) { + return false + } + for i, _tgt := range p.Encodings { + _src4 := other.Encodings[i] + if _tgt != _src4 { + return false + } + } + if len(p.PathInSchema) != len(other.PathInSchema) { + return false + } + for i, _tgt := range p.PathInSchema { + _src5 := other.PathInSchema[i] + if _tgt != _src5 { + return false + } + } + if p.Codec != other.Codec { + return false + } + if p.NumValues != other.NumValues { + return false + } + if p.TotalUncompressedSize != other.TotalUncompressedSize { + return false + } + if p.TotalCompressedSize != other.TotalCompressedSize { + return false + } + if len(p.KeyValueMetadata) != len(other.KeyValueMetadata) { + return false + } + for i, _tgt := range p.KeyValueMetadata { + _src6 := other.KeyValueMetadata[i] + if !_tgt.Equals(_src6) { + return false + } + } + if p.DataPageOffset != other.DataPageOffset { + return false + } + if p.IndexPageOffset != other.IndexPageOffset { + if p.IndexPageOffset == nil || other.IndexPageOffset == nil { + return false + } + if (*p.IndexPageOffset) != (*other.IndexPageOffset) { + return false + } + } + if p.DictionaryPageOffset != other.DictionaryPageOffset { + if p.DictionaryPageOffset == nil || other.DictionaryPageOffset == nil { + return false + } + if (*p.DictionaryPageOffset) != (*other.DictionaryPageOffset) { + return false + } + } + if !p.Statistics.Equals(other.Statistics) { + return false + } + if len(p.EncodingStats) != len(other.EncodingStats) { + return false + } + for i, _tgt := range p.EncodingStats { + _src7 := other.EncodingStats[i] + if !_tgt.Equals(_src7) { + return false + } + } + if p.BloomFilterOffset != other.BloomFilterOffset { + if p.BloomFilterOffset == nil || other.BloomFilterOffset == nil { + return false + } + if (*p.BloomFilterOffset) != (*other.BloomFilterOffset) { + return false + } + } + return true } func (p *ColumnMetaData) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("ColumnMetaData(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("ColumnMetaData(%+v)", *p) } func (p *ColumnMetaData) Validate() error { - return nil + return nil } + type EncryptionWithFooterKey struct { } func NewEncryptionWithFooterKey() *EncryptionWithFooterKey { - return &EncryptionWithFooterKey{} + return &EncryptionWithFooterKey{} } func (p *EncryptionWithFooterKey) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *EncryptionWithFooterKey) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "EncryptionWithFooterKey"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "EncryptionWithFooterKey"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *EncryptionWithFooterKey) Equals(other *EncryptionWithFooterKey) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *EncryptionWithFooterKey) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("EncryptionWithFooterKey(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("EncryptionWithFooterKey(%+v)", *p) } func (p *EncryptionWithFooterKey) Validate() error { - return nil + return nil } + // Attributes: -// - PathInSchema: Column path in schema * -// - KeyMetadata: Retrieval metadata of column encryption key * +// - PathInSchema: Column path in schema * +// - KeyMetadata: Retrieval metadata of column encryption key * type EncryptionWithColumnKey struct { - PathInSchema []string `thrift:"path_in_schema,1,required" db:"path_in_schema" json:"path_in_schema"` - KeyMetadata []byte `thrift:"key_metadata,2" db:"key_metadata" json:"key_metadata,omitempty"` + PathInSchema []string `thrift:"path_in_schema,1,required" db:"path_in_schema" json:"path_in_schema"` + KeyMetadata []byte `thrift:"key_metadata,2" db:"key_metadata" json:"key_metadata,omitempty"` } func NewEncryptionWithColumnKey() *EncryptionWithColumnKey { - return &EncryptionWithColumnKey{} + return &EncryptionWithColumnKey{} } - func (p *EncryptionWithColumnKey) GetPathInSchema() []string { - return p.PathInSchema + return p.PathInSchema } + var EncryptionWithColumnKey_KeyMetadata_DEFAULT []byte func (p *EncryptionWithColumnKey) GetKeyMetadata() []byte { - return p.KeyMetadata + return p.KeyMetadata } func (p *EncryptionWithColumnKey) IsSetKeyMetadata() bool { - return p.KeyMetadata != nil + return p.KeyMetadata != nil } func (p *EncryptionWithColumnKey) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetPathInSchema bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.LIST { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetPathInSchema = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.STRING { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetPathInSchema{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PathInSchema is not set")); - } - return nil -} - -func (p *EncryptionWithColumnKey) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]string, 0, size) - p.PathInSchema = tSlice - for i := 0; i < size; i ++ { -var _elem8 string - if v, err := iprot.ReadString(ctx); err != nil { - return thrift.PrependError("error reading field 0: ", err) -} else { - _elem8 = v -} - p.PathInSchema = append(p.PathInSchema, _elem8) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *EncryptionWithColumnKey) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.KeyMetadata = v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetPathInSchema bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.LIST { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetPathInSchema = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.STRING { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetPathInSchema { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PathInSchema is not set")) + } + return nil +} + +func (p *EncryptionWithColumnKey) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]string, 0, size) + p.PathInSchema = tSlice + for i := 0; i < size; i++ { + var _elem8 string + if v, err := iprot.ReadString(ctx); err != nil { + return thrift.PrependError("error reading field 0: ", err) + } else { + _elem8 = v + } + p.PathInSchema = append(p.PathInSchema, _elem8) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *EncryptionWithColumnKey) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.KeyMetadata = v + } + return nil } func (p *EncryptionWithColumnKey) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "EncryptionWithColumnKey"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "EncryptionWithColumnKey"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *EncryptionWithColumnKey) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "path_in_schema", thrift.LIST, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:path_in_schema: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.STRING, len(p.PathInSchema)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.PathInSchema { - if err := oprot.WriteString(ctx, string(v)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:path_in_schema: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "path_in_schema", thrift.LIST, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:path_in_schema: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.STRING, len(p.PathInSchema)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.PathInSchema { + if err := oprot.WriteString(ctx, string(v)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:path_in_schema: ", p), err) + } + return err } func (p *EncryptionWithColumnKey) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetKeyMetadata() { - if err := oprot.WriteFieldBegin(ctx, "key_metadata", thrift.STRING, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:key_metadata: ", p), err) } - if err := oprot.WriteBinary(ctx, p.KeyMetadata); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.key_metadata (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:key_metadata: ", p), err) } - } - return err + if p.IsSetKeyMetadata() { + if err := oprot.WriteFieldBegin(ctx, "key_metadata", thrift.STRING, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:key_metadata: ", p), err) + } + if err := oprot.WriteBinary(ctx, p.KeyMetadata); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.key_metadata (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:key_metadata: ", p), err) + } + } + return err } func (p *EncryptionWithColumnKey) Equals(other *EncryptionWithColumnKey) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if len(p.PathInSchema) != len(other.PathInSchema) { return false } - for i, _tgt := range p.PathInSchema { - _src9 := other.PathInSchema[i] - if _tgt != _src9 { return false } - } - if bytes.Compare(p.KeyMetadata, other.KeyMetadata) != 0 { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if len(p.PathInSchema) != len(other.PathInSchema) { + return false + } + for i, _tgt := range p.PathInSchema { + _src9 := other.PathInSchema[i] + if _tgt != _src9 { + return false + } + } + if bytes.Compare(p.KeyMetadata, other.KeyMetadata) != 0 { + return false + } + return true } func (p *EncryptionWithColumnKey) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("EncryptionWithColumnKey(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("EncryptionWithColumnKey(%+v)", *p) } func (p *EncryptionWithColumnKey) Validate() error { - return nil + return nil } + // Attributes: -// - ENCRYPTION_WITH_FOOTER_KEY -// - ENCRYPTION_WITH_COLUMN_KEY +// - ENCRYPTION_WITH_FOOTER_KEY +// - ENCRYPTION_WITH_COLUMN_KEY type ColumnCryptoMetaData struct { - ENCRYPTION_WITH_FOOTER_KEY *EncryptionWithFooterKey `thrift:"ENCRYPTION_WITH_FOOTER_KEY,1" db:"ENCRYPTION_WITH_FOOTER_KEY" json:"ENCRYPTION_WITH_FOOTER_KEY,omitempty"` - ENCRYPTION_WITH_COLUMN_KEY *EncryptionWithColumnKey `thrift:"ENCRYPTION_WITH_COLUMN_KEY,2" db:"ENCRYPTION_WITH_COLUMN_KEY" json:"ENCRYPTION_WITH_COLUMN_KEY,omitempty"` + ENCRYPTION_WITH_FOOTER_KEY *EncryptionWithFooterKey `thrift:"ENCRYPTION_WITH_FOOTER_KEY,1" db:"ENCRYPTION_WITH_FOOTER_KEY" json:"ENCRYPTION_WITH_FOOTER_KEY,omitempty"` + ENCRYPTION_WITH_COLUMN_KEY *EncryptionWithColumnKey `thrift:"ENCRYPTION_WITH_COLUMN_KEY,2" db:"ENCRYPTION_WITH_COLUMN_KEY" json:"ENCRYPTION_WITH_COLUMN_KEY,omitempty"` } func NewColumnCryptoMetaData() *ColumnCryptoMetaData { - return &ColumnCryptoMetaData{} + return &ColumnCryptoMetaData{} } var ColumnCryptoMetaData_ENCRYPTION_WITH_FOOTER_KEY_DEFAULT *EncryptionWithFooterKey + func (p *ColumnCryptoMetaData) GetENCRYPTION_WITH_FOOTER_KEY() *EncryptionWithFooterKey { - if !p.IsSetENCRYPTION_WITH_FOOTER_KEY() { - return ColumnCryptoMetaData_ENCRYPTION_WITH_FOOTER_KEY_DEFAULT - } -return p.ENCRYPTION_WITH_FOOTER_KEY + if !p.IsSetENCRYPTION_WITH_FOOTER_KEY() { + return ColumnCryptoMetaData_ENCRYPTION_WITH_FOOTER_KEY_DEFAULT + } + return p.ENCRYPTION_WITH_FOOTER_KEY } + var ColumnCryptoMetaData_ENCRYPTION_WITH_COLUMN_KEY_DEFAULT *EncryptionWithColumnKey + func (p *ColumnCryptoMetaData) GetENCRYPTION_WITH_COLUMN_KEY() *EncryptionWithColumnKey { - if !p.IsSetENCRYPTION_WITH_COLUMN_KEY() { - return ColumnCryptoMetaData_ENCRYPTION_WITH_COLUMN_KEY_DEFAULT - } -return p.ENCRYPTION_WITH_COLUMN_KEY + if !p.IsSetENCRYPTION_WITH_COLUMN_KEY() { + return ColumnCryptoMetaData_ENCRYPTION_WITH_COLUMN_KEY_DEFAULT + } + return p.ENCRYPTION_WITH_COLUMN_KEY } func (p *ColumnCryptoMetaData) CountSetFieldsColumnCryptoMetaData() int { - count := 0 - if (p.IsSetENCRYPTION_WITH_FOOTER_KEY()) { - count++ - } - if (p.IsSetENCRYPTION_WITH_COLUMN_KEY()) { - count++ - } - return count + count := 0 + if p.IsSetENCRYPTION_WITH_FOOTER_KEY() { + count++ + } + if p.IsSetENCRYPTION_WITH_COLUMN_KEY() { + count++ + } + return count } func (p *ColumnCryptoMetaData) IsSetENCRYPTION_WITH_FOOTER_KEY() bool { - return p.ENCRYPTION_WITH_FOOTER_KEY != nil + return p.ENCRYPTION_WITH_FOOTER_KEY != nil } func (p *ColumnCryptoMetaData) IsSetENCRYPTION_WITH_COLUMN_KEY() bool { - return p.ENCRYPTION_WITH_COLUMN_KEY != nil + return p.ENCRYPTION_WITH_COLUMN_KEY != nil } func (p *ColumnCryptoMetaData) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *ColumnCryptoMetaData) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - p.ENCRYPTION_WITH_FOOTER_KEY = &EncryptionWithFooterKey{} - if err := p.ENCRYPTION_WITH_FOOTER_KEY.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.ENCRYPTION_WITH_FOOTER_KEY), err) - } - return nil -} - -func (p *ColumnCryptoMetaData) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - p.ENCRYPTION_WITH_COLUMN_KEY = &EncryptionWithColumnKey{} - if err := p.ENCRYPTION_WITH_COLUMN_KEY.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.ENCRYPTION_WITH_COLUMN_KEY), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *ColumnCryptoMetaData) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + p.ENCRYPTION_WITH_FOOTER_KEY = &EncryptionWithFooterKey{} + if err := p.ENCRYPTION_WITH_FOOTER_KEY.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.ENCRYPTION_WITH_FOOTER_KEY), err) + } + return nil +} + +func (p *ColumnCryptoMetaData) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + p.ENCRYPTION_WITH_COLUMN_KEY = &EncryptionWithColumnKey{} + if err := p.ENCRYPTION_WITH_COLUMN_KEY.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.ENCRYPTION_WITH_COLUMN_KEY), err) + } + return nil } func (p *ColumnCryptoMetaData) Write(ctx context.Context, oprot thrift.TProtocol) error { - if c := p.CountSetFieldsColumnCryptoMetaData(); c != 1 { - return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) - } - if err := oprot.WriteStructBegin(ctx, "ColumnCryptoMetaData"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if c := p.CountSetFieldsColumnCryptoMetaData(); c != 1 { + return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) + } + if err := oprot.WriteStructBegin(ctx, "ColumnCryptoMetaData"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *ColumnCryptoMetaData) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetENCRYPTION_WITH_FOOTER_KEY() { - if err := oprot.WriteFieldBegin(ctx, "ENCRYPTION_WITH_FOOTER_KEY", thrift.STRUCT, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:ENCRYPTION_WITH_FOOTER_KEY: ", p), err) } - if err := p.ENCRYPTION_WITH_FOOTER_KEY.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.ENCRYPTION_WITH_FOOTER_KEY), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:ENCRYPTION_WITH_FOOTER_KEY: ", p), err) } - } - return err + if p.IsSetENCRYPTION_WITH_FOOTER_KEY() { + if err := oprot.WriteFieldBegin(ctx, "ENCRYPTION_WITH_FOOTER_KEY", thrift.STRUCT, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:ENCRYPTION_WITH_FOOTER_KEY: ", p), err) + } + if err := p.ENCRYPTION_WITH_FOOTER_KEY.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.ENCRYPTION_WITH_FOOTER_KEY), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:ENCRYPTION_WITH_FOOTER_KEY: ", p), err) + } + } + return err } func (p *ColumnCryptoMetaData) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetENCRYPTION_WITH_COLUMN_KEY() { - if err := oprot.WriteFieldBegin(ctx, "ENCRYPTION_WITH_COLUMN_KEY", thrift.STRUCT, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:ENCRYPTION_WITH_COLUMN_KEY: ", p), err) } - if err := p.ENCRYPTION_WITH_COLUMN_KEY.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.ENCRYPTION_WITH_COLUMN_KEY), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:ENCRYPTION_WITH_COLUMN_KEY: ", p), err) } - } - return err + if p.IsSetENCRYPTION_WITH_COLUMN_KEY() { + if err := oprot.WriteFieldBegin(ctx, "ENCRYPTION_WITH_COLUMN_KEY", thrift.STRUCT, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:ENCRYPTION_WITH_COLUMN_KEY: ", p), err) + } + if err := p.ENCRYPTION_WITH_COLUMN_KEY.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.ENCRYPTION_WITH_COLUMN_KEY), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:ENCRYPTION_WITH_COLUMN_KEY: ", p), err) + } + } + return err } func (p *ColumnCryptoMetaData) Equals(other *ColumnCryptoMetaData) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if !p.ENCRYPTION_WITH_FOOTER_KEY.Equals(other.ENCRYPTION_WITH_FOOTER_KEY) { return false } - if !p.ENCRYPTION_WITH_COLUMN_KEY.Equals(other.ENCRYPTION_WITH_COLUMN_KEY) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if !p.ENCRYPTION_WITH_FOOTER_KEY.Equals(other.ENCRYPTION_WITH_FOOTER_KEY) { + return false + } + if !p.ENCRYPTION_WITH_COLUMN_KEY.Equals(other.ENCRYPTION_WITH_COLUMN_KEY) { + return false + } + return true } func (p *ColumnCryptoMetaData) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("ColumnCryptoMetaData(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("ColumnCryptoMetaData(%+v)", *p) } func (p *ColumnCryptoMetaData) Validate() error { - return nil + return nil } + // Attributes: -// - FilePath: File where column data is stored. If not set, assumed to be same file as +// - FilePath: File where column data is stored. If not set, assumed to be same file as +// // metadata. This path is relative to the current file. -// -// - FileOffset: Byte offset in file_path to the ColumnMetaData * -// - MetaData: Column metadata for this chunk. This is the same content as what is at +// +// - FileOffset: Byte offset in file_path to the ColumnMetaData * +// - MetaData: Column metadata for this chunk. This is the same content as what is at +// // file_path/file_offset. Having it here has it replicated in the file // metadata. -// -// - OffsetIndexOffset: File offset of ColumnChunk's OffsetIndex * -// - OffsetIndexLength: Size of ColumnChunk's OffsetIndex, in bytes * -// - ColumnIndexOffset: File offset of ColumnChunk's ColumnIndex * -// - ColumnIndexLength: Size of ColumnChunk's ColumnIndex, in bytes * -// - CryptoMetadata: Crypto metadata of encrypted columns * -// - EncryptedColumnMetadata: Encrypted column metadata for this chunk * +// +// - OffsetIndexOffset: File offset of ColumnChunk's OffsetIndex * +// - OffsetIndexLength: Size of ColumnChunk's OffsetIndex, in bytes * +// - ColumnIndexOffset: File offset of ColumnChunk's ColumnIndex * +// - ColumnIndexLength: Size of ColumnChunk's ColumnIndex, in bytes * +// - CryptoMetadata: Crypto metadata of encrypted columns * +// - EncryptedColumnMetadata: Encrypted column metadata for this chunk * type ColumnChunk struct { - FilePath *string `thrift:"file_path,1" db:"file_path" json:"file_path,omitempty"` - FileOffset int64 `thrift:"file_offset,2,required" db:"file_offset" json:"file_offset"` - MetaData *ColumnMetaData `thrift:"meta_data,3" db:"meta_data" json:"meta_data,omitempty"` - OffsetIndexOffset *int64 `thrift:"offset_index_offset,4" db:"offset_index_offset" json:"offset_index_offset,omitempty"` - OffsetIndexLength *int32 `thrift:"offset_index_length,5" db:"offset_index_length" json:"offset_index_length,omitempty"` - ColumnIndexOffset *int64 `thrift:"column_index_offset,6" db:"column_index_offset" json:"column_index_offset,omitempty"` - ColumnIndexLength *int32 `thrift:"column_index_length,7" db:"column_index_length" json:"column_index_length,omitempty"` - CryptoMetadata *ColumnCryptoMetaData `thrift:"crypto_metadata,8" db:"crypto_metadata" json:"crypto_metadata,omitempty"` - EncryptedColumnMetadata []byte `thrift:"encrypted_column_metadata,9" db:"encrypted_column_metadata" json:"encrypted_column_metadata,omitempty"` + FilePath *string `thrift:"file_path,1" db:"file_path" json:"file_path,omitempty"` + FileOffset int64 `thrift:"file_offset,2,required" db:"file_offset" json:"file_offset"` + MetaData *ColumnMetaData `thrift:"meta_data,3" db:"meta_data" json:"meta_data,omitempty"` + OffsetIndexOffset *int64 `thrift:"offset_index_offset,4" db:"offset_index_offset" json:"offset_index_offset,omitempty"` + OffsetIndexLength *int32 `thrift:"offset_index_length,5" db:"offset_index_length" json:"offset_index_length,omitempty"` + ColumnIndexOffset *int64 `thrift:"column_index_offset,6" db:"column_index_offset" json:"column_index_offset,omitempty"` + ColumnIndexLength *int32 `thrift:"column_index_length,7" db:"column_index_length" json:"column_index_length,omitempty"` + CryptoMetadata *ColumnCryptoMetaData `thrift:"crypto_metadata,8" db:"crypto_metadata" json:"crypto_metadata,omitempty"` + EncryptedColumnMetadata []byte `thrift:"encrypted_column_metadata,9" db:"encrypted_column_metadata" json:"encrypted_column_metadata,omitempty"` } func NewColumnChunk() *ColumnChunk { - return &ColumnChunk{} + return &ColumnChunk{} } var ColumnChunk_FilePath_DEFAULT string + func (p *ColumnChunk) GetFilePath() string { - if !p.IsSetFilePath() { - return ColumnChunk_FilePath_DEFAULT - } -return *p.FilePath + if !p.IsSetFilePath() { + return ColumnChunk_FilePath_DEFAULT + } + return *p.FilePath } func (p *ColumnChunk) GetFileOffset() int64 { - return p.FileOffset + return p.FileOffset } + var ColumnChunk_MetaData_DEFAULT *ColumnMetaData + func (p *ColumnChunk) GetMetaData() *ColumnMetaData { - if !p.IsSetMetaData() { - return ColumnChunk_MetaData_DEFAULT - } -return p.MetaData + if !p.IsSetMetaData() { + return ColumnChunk_MetaData_DEFAULT + } + return p.MetaData } + var ColumnChunk_OffsetIndexOffset_DEFAULT int64 + func (p *ColumnChunk) GetOffsetIndexOffset() int64 { - if !p.IsSetOffsetIndexOffset() { - return ColumnChunk_OffsetIndexOffset_DEFAULT - } -return *p.OffsetIndexOffset + if !p.IsSetOffsetIndexOffset() { + return ColumnChunk_OffsetIndexOffset_DEFAULT + } + return *p.OffsetIndexOffset } + var ColumnChunk_OffsetIndexLength_DEFAULT int32 + func (p *ColumnChunk) GetOffsetIndexLength() int32 { - if !p.IsSetOffsetIndexLength() { - return ColumnChunk_OffsetIndexLength_DEFAULT - } -return *p.OffsetIndexLength + if !p.IsSetOffsetIndexLength() { + return ColumnChunk_OffsetIndexLength_DEFAULT + } + return *p.OffsetIndexLength } + var ColumnChunk_ColumnIndexOffset_DEFAULT int64 + func (p *ColumnChunk) GetColumnIndexOffset() int64 { - if !p.IsSetColumnIndexOffset() { - return ColumnChunk_ColumnIndexOffset_DEFAULT - } -return *p.ColumnIndexOffset + if !p.IsSetColumnIndexOffset() { + return ColumnChunk_ColumnIndexOffset_DEFAULT + } + return *p.ColumnIndexOffset } + var ColumnChunk_ColumnIndexLength_DEFAULT int32 + func (p *ColumnChunk) GetColumnIndexLength() int32 { - if !p.IsSetColumnIndexLength() { - return ColumnChunk_ColumnIndexLength_DEFAULT - } -return *p.ColumnIndexLength + if !p.IsSetColumnIndexLength() { + return ColumnChunk_ColumnIndexLength_DEFAULT + } + return *p.ColumnIndexLength } + var ColumnChunk_CryptoMetadata_DEFAULT *ColumnCryptoMetaData + func (p *ColumnChunk) GetCryptoMetadata() *ColumnCryptoMetaData { - if !p.IsSetCryptoMetadata() { - return ColumnChunk_CryptoMetadata_DEFAULT - } -return p.CryptoMetadata + if !p.IsSetCryptoMetadata() { + return ColumnChunk_CryptoMetadata_DEFAULT + } + return p.CryptoMetadata } + var ColumnChunk_EncryptedColumnMetadata_DEFAULT []byte func (p *ColumnChunk) GetEncryptedColumnMetadata() []byte { - return p.EncryptedColumnMetadata + return p.EncryptedColumnMetadata } func (p *ColumnChunk) IsSetFilePath() bool { - return p.FilePath != nil + return p.FilePath != nil } func (p *ColumnChunk) IsSetMetaData() bool { - return p.MetaData != nil + return p.MetaData != nil } func (p *ColumnChunk) IsSetOffsetIndexOffset() bool { - return p.OffsetIndexOffset != nil + return p.OffsetIndexOffset != nil } func (p *ColumnChunk) IsSetOffsetIndexLength() bool { - return p.OffsetIndexLength != nil + return p.OffsetIndexLength != nil } func (p *ColumnChunk) IsSetColumnIndexOffset() bool { - return p.ColumnIndexOffset != nil + return p.ColumnIndexOffset != nil } func (p *ColumnChunk) IsSetColumnIndexLength() bool { - return p.ColumnIndexLength != nil + return p.ColumnIndexLength != nil } func (p *ColumnChunk) IsSetCryptoMetadata() bool { - return p.CryptoMetadata != nil + return p.CryptoMetadata != nil } func (p *ColumnChunk) IsSetEncryptedColumnMetadata() bool { - return p.EncryptedColumnMetadata != nil + return p.EncryptedColumnMetadata != nil } func (p *ColumnChunk) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetFileOffset bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRING { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.I64 { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetFileOffset = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 4: - if fieldTypeId == thrift.I64 { - if err := p.ReadField4(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 5: - if fieldTypeId == thrift.I32 { - if err := p.ReadField5(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 6: - if fieldTypeId == thrift.I64 { - if err := p.ReadField6(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 7: - if fieldTypeId == thrift.I32 { - if err := p.ReadField7(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 8: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField8(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 9: - if fieldTypeId == thrift.STRING { - if err := p.ReadField9(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetFileOffset{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field FileOffset is not set")); - } - return nil -} - -func (p *ColumnChunk) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadString(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.FilePath = &v -} - return nil -} - -func (p *ColumnChunk) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.FileOffset = v -} - return nil -} - -func (p *ColumnChunk) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - p.MetaData = &ColumnMetaData{} - if err := p.MetaData.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MetaData), err) - } - return nil -} - -func (p *ColumnChunk) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 4: ", err) -} else { - p.OffsetIndexOffset = &v -} - return nil -} - -func (p *ColumnChunk) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 5: ", err) -} else { - p.OffsetIndexLength = &v -} - return nil -} - -func (p *ColumnChunk) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 6: ", err) -} else { - p.ColumnIndexOffset = &v -} - return nil -} - -func (p *ColumnChunk) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 7: ", err) -} else { - p.ColumnIndexLength = &v -} - return nil -} - -func (p *ColumnChunk) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { - p.CryptoMetadata = &ColumnCryptoMetaData{} - if err := p.CryptoMetadata.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.CryptoMetadata), err) - } - return nil -} - -func (p *ColumnChunk) ReadField9(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 9: ", err) -} else { - p.EncryptedColumnMetadata = v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetFileOffset bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRING { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.I64 { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetFileOffset = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 4: + if fieldTypeId == thrift.I64 { + if err := p.ReadField4(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 5: + if fieldTypeId == thrift.I32 { + if err := p.ReadField5(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 6: + if fieldTypeId == thrift.I64 { + if err := p.ReadField6(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 7: + if fieldTypeId == thrift.I32 { + if err := p.ReadField7(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 8: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField8(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 9: + if fieldTypeId == thrift.STRING { + if err := p.ReadField9(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetFileOffset { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field FileOffset is not set")) + } + return nil +} + +func (p *ColumnChunk) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadString(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.FilePath = &v + } + return nil +} + +func (p *ColumnChunk) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.FileOffset = v + } + return nil +} + +func (p *ColumnChunk) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + p.MetaData = &ColumnMetaData{} + if err := p.MetaData.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MetaData), err) + } + return nil +} + +func (p *ColumnChunk) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 4: ", err) + } else { + p.OffsetIndexOffset = &v + } + return nil +} + +func (p *ColumnChunk) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 5: ", err) + } else { + p.OffsetIndexLength = &v + } + return nil +} + +func (p *ColumnChunk) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 6: ", err) + } else { + p.ColumnIndexOffset = &v + } + return nil +} + +func (p *ColumnChunk) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 7: ", err) + } else { + p.ColumnIndexLength = &v + } + return nil +} + +func (p *ColumnChunk) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { + p.CryptoMetadata = &ColumnCryptoMetaData{} + if err := p.CryptoMetadata.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.CryptoMetadata), err) + } + return nil +} + +func (p *ColumnChunk) ReadField9(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 9: ", err) + } else { + p.EncryptedColumnMetadata = v + } + return nil } func (p *ColumnChunk) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "ColumnChunk"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - if err := p.writeField4(ctx, oprot); err != nil { return err } - if err := p.writeField5(ctx, oprot); err != nil { return err } - if err := p.writeField6(ctx, oprot); err != nil { return err } - if err := p.writeField7(ctx, oprot); err != nil { return err } - if err := p.writeField8(ctx, oprot); err != nil { return err } - if err := p.writeField9(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "ColumnChunk"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + if err := p.writeField4(ctx, oprot); err != nil { + return err + } + if err := p.writeField5(ctx, oprot); err != nil { + return err + } + if err := p.writeField6(ctx, oprot); err != nil { + return err + } + if err := p.writeField7(ctx, oprot); err != nil { + return err + } + if err := p.writeField8(ctx, oprot); err != nil { + return err + } + if err := p.writeField9(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *ColumnChunk) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetFilePath() { - if err := oprot.WriteFieldBegin(ctx, "file_path", thrift.STRING, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:file_path: ", p), err) } - if err := oprot.WriteString(ctx, string(*p.FilePath)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.file_path (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:file_path: ", p), err) } - } - return err + if p.IsSetFilePath() { + if err := oprot.WriteFieldBegin(ctx, "file_path", thrift.STRING, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:file_path: ", p), err) + } + if err := oprot.WriteString(ctx, string(*p.FilePath)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.file_path (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:file_path: ", p), err) + } + } + return err } func (p *ColumnChunk) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "file_offset", thrift.I64, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:file_offset: ", p), err) } - if err := oprot.WriteI64(ctx, int64(p.FileOffset)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.file_offset (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:file_offset: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "file_offset", thrift.I64, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:file_offset: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(p.FileOffset)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.file_offset (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:file_offset: ", p), err) + } + return err } func (p *ColumnChunk) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetMetaData() { - if err := oprot.WriteFieldBegin(ctx, "meta_data", thrift.STRUCT, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:meta_data: ", p), err) } - if err := p.MetaData.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MetaData), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:meta_data: ", p), err) } - } - return err + if p.IsSetMetaData() { + if err := oprot.WriteFieldBegin(ctx, "meta_data", thrift.STRUCT, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:meta_data: ", p), err) + } + if err := p.MetaData.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MetaData), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:meta_data: ", p), err) + } + } + return err } func (p *ColumnChunk) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetOffsetIndexOffset() { - if err := oprot.WriteFieldBegin(ctx, "offset_index_offset", thrift.I64, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:offset_index_offset: ", p), err) } - if err := oprot.WriteI64(ctx, int64(*p.OffsetIndexOffset)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.offset_index_offset (4) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:offset_index_offset: ", p), err) } - } - return err + if p.IsSetOffsetIndexOffset() { + if err := oprot.WriteFieldBegin(ctx, "offset_index_offset", thrift.I64, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:offset_index_offset: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(*p.OffsetIndexOffset)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.offset_index_offset (4) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:offset_index_offset: ", p), err) + } + } + return err } func (p *ColumnChunk) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetOffsetIndexLength() { - if err := oprot.WriteFieldBegin(ctx, "offset_index_length", thrift.I32, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:offset_index_length: ", p), err) } - if err := oprot.WriteI32(ctx, int32(*p.OffsetIndexLength)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.offset_index_length (5) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:offset_index_length: ", p), err) } - } - return err + if p.IsSetOffsetIndexLength() { + if err := oprot.WriteFieldBegin(ctx, "offset_index_length", thrift.I32, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:offset_index_length: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(*p.OffsetIndexLength)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.offset_index_length (5) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:offset_index_length: ", p), err) + } + } + return err } func (p *ColumnChunk) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetColumnIndexOffset() { - if err := oprot.WriteFieldBegin(ctx, "column_index_offset", thrift.I64, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:column_index_offset: ", p), err) } - if err := oprot.WriteI64(ctx, int64(*p.ColumnIndexOffset)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.column_index_offset (6) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:column_index_offset: ", p), err) } - } - return err + if p.IsSetColumnIndexOffset() { + if err := oprot.WriteFieldBegin(ctx, "column_index_offset", thrift.I64, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:column_index_offset: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(*p.ColumnIndexOffset)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.column_index_offset (6) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:column_index_offset: ", p), err) + } + } + return err } func (p *ColumnChunk) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetColumnIndexLength() { - if err := oprot.WriteFieldBegin(ctx, "column_index_length", thrift.I32, 7); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:column_index_length: ", p), err) } - if err := oprot.WriteI32(ctx, int32(*p.ColumnIndexLength)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.column_index_length (7) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 7:column_index_length: ", p), err) } - } - return err + if p.IsSetColumnIndexLength() { + if err := oprot.WriteFieldBegin(ctx, "column_index_length", thrift.I32, 7); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:column_index_length: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(*p.ColumnIndexLength)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.column_index_length (7) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 7:column_index_length: ", p), err) + } + } + return err } func (p *ColumnChunk) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetCryptoMetadata() { - if err := oprot.WriteFieldBegin(ctx, "crypto_metadata", thrift.STRUCT, 8); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:crypto_metadata: ", p), err) } - if err := p.CryptoMetadata.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.CryptoMetadata), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 8:crypto_metadata: ", p), err) } - } - return err + if p.IsSetCryptoMetadata() { + if err := oprot.WriteFieldBegin(ctx, "crypto_metadata", thrift.STRUCT, 8); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:crypto_metadata: ", p), err) + } + if err := p.CryptoMetadata.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.CryptoMetadata), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 8:crypto_metadata: ", p), err) + } + } + return err } func (p *ColumnChunk) writeField9(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetEncryptedColumnMetadata() { - if err := oprot.WriteFieldBegin(ctx, "encrypted_column_metadata", thrift.STRING, 9); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:encrypted_column_metadata: ", p), err) } - if err := oprot.WriteBinary(ctx, p.EncryptedColumnMetadata); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.encrypted_column_metadata (9) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 9:encrypted_column_metadata: ", p), err) } - } - return err + if p.IsSetEncryptedColumnMetadata() { + if err := oprot.WriteFieldBegin(ctx, "encrypted_column_metadata", thrift.STRING, 9); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:encrypted_column_metadata: ", p), err) + } + if err := oprot.WriteBinary(ctx, p.EncryptedColumnMetadata); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.encrypted_column_metadata (9) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 9:encrypted_column_metadata: ", p), err) + } + } + return err } func (p *ColumnChunk) Equals(other *ColumnChunk) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.FilePath != other.FilePath { - if p.FilePath == nil || other.FilePath == nil { - return false - } - if (*p.FilePath) != (*other.FilePath) { return false } - } - if p.FileOffset != other.FileOffset { return false } - if !p.MetaData.Equals(other.MetaData) { return false } - if p.OffsetIndexOffset != other.OffsetIndexOffset { - if p.OffsetIndexOffset == nil || other.OffsetIndexOffset == nil { - return false - } - if (*p.OffsetIndexOffset) != (*other.OffsetIndexOffset) { return false } - } - if p.OffsetIndexLength != other.OffsetIndexLength { - if p.OffsetIndexLength == nil || other.OffsetIndexLength == nil { - return false - } - if (*p.OffsetIndexLength) != (*other.OffsetIndexLength) { return false } - } - if p.ColumnIndexOffset != other.ColumnIndexOffset { - if p.ColumnIndexOffset == nil || other.ColumnIndexOffset == nil { - return false - } - if (*p.ColumnIndexOffset) != (*other.ColumnIndexOffset) { return false } - } - if p.ColumnIndexLength != other.ColumnIndexLength { - if p.ColumnIndexLength == nil || other.ColumnIndexLength == nil { - return false - } - if (*p.ColumnIndexLength) != (*other.ColumnIndexLength) { return false } - } - if !p.CryptoMetadata.Equals(other.CryptoMetadata) { return false } - if bytes.Compare(p.EncryptedColumnMetadata, other.EncryptedColumnMetadata) != 0 { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.FilePath != other.FilePath { + if p.FilePath == nil || other.FilePath == nil { + return false + } + if (*p.FilePath) != (*other.FilePath) { + return false + } + } + if p.FileOffset != other.FileOffset { + return false + } + if !p.MetaData.Equals(other.MetaData) { + return false + } + if p.OffsetIndexOffset != other.OffsetIndexOffset { + if p.OffsetIndexOffset == nil || other.OffsetIndexOffset == nil { + return false + } + if (*p.OffsetIndexOffset) != (*other.OffsetIndexOffset) { + return false + } + } + if p.OffsetIndexLength != other.OffsetIndexLength { + if p.OffsetIndexLength == nil || other.OffsetIndexLength == nil { + return false + } + if (*p.OffsetIndexLength) != (*other.OffsetIndexLength) { + return false + } + } + if p.ColumnIndexOffset != other.ColumnIndexOffset { + if p.ColumnIndexOffset == nil || other.ColumnIndexOffset == nil { + return false + } + if (*p.ColumnIndexOffset) != (*other.ColumnIndexOffset) { + return false + } + } + if p.ColumnIndexLength != other.ColumnIndexLength { + if p.ColumnIndexLength == nil || other.ColumnIndexLength == nil { + return false + } + if (*p.ColumnIndexLength) != (*other.ColumnIndexLength) { + return false + } + } + if !p.CryptoMetadata.Equals(other.CryptoMetadata) { + return false + } + if bytes.Compare(p.EncryptedColumnMetadata, other.EncryptedColumnMetadata) != 0 { + return false + } + return true } func (p *ColumnChunk) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("ColumnChunk(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("ColumnChunk(%+v)", *p) } func (p *ColumnChunk) Validate() error { - return nil + return nil } + // Attributes: -// - Columns: Metadata for each column chunk in this row group. +// - Columns: Metadata for each column chunk in this row group. +// // This list must have the same order as the SchemaElement list in FileMetaData. -// -// - TotalByteSize: Total byte size of all the uncompressed column data in this row group * -// - NumRows: Number of rows in this row group * -// - SortingColumns: If set, specifies a sort ordering of the rows in this RowGroup. +// +// - TotalByteSize: Total byte size of all the uncompressed column data in this row group * +// - NumRows: Number of rows in this row group * +// - SortingColumns: If set, specifies a sort ordering of the rows in this RowGroup. +// // The sorting columns can be a subset of all the columns. -// - FileOffset: Byte offset from beginning of file to first page (data or dictionary) +// - FileOffset: Byte offset from beginning of file to first page (data or dictionary) +// // in this row group * -// - TotalCompressedSize: Total byte size of all compressed (and potentially encrypted) column data +// - TotalCompressedSize: Total byte size of all compressed (and potentially encrypted) column data +// // in this row group * -// - Ordinal: Row group ordinal in the file * +// - Ordinal: Row group ordinal in the file * type RowGroup struct { - Columns []*ColumnChunk `thrift:"columns,1,required" db:"columns" json:"columns"` - TotalByteSize int64 `thrift:"total_byte_size,2,required" db:"total_byte_size" json:"total_byte_size"` - NumRows int64 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"` - SortingColumns []*SortingColumn `thrift:"sorting_columns,4" db:"sorting_columns" json:"sorting_columns,omitempty"` - FileOffset *int64 `thrift:"file_offset,5" db:"file_offset" json:"file_offset,omitempty"` - TotalCompressedSize *int64 `thrift:"total_compressed_size,6" db:"total_compressed_size" json:"total_compressed_size,omitempty"` - Ordinal *int16 `thrift:"ordinal,7" db:"ordinal" json:"ordinal,omitempty"` + Columns []*ColumnChunk `thrift:"columns,1,required" db:"columns" json:"columns"` + TotalByteSize int64 `thrift:"total_byte_size,2,required" db:"total_byte_size" json:"total_byte_size"` + NumRows int64 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"` + SortingColumns []*SortingColumn `thrift:"sorting_columns,4" db:"sorting_columns" json:"sorting_columns,omitempty"` + FileOffset *int64 `thrift:"file_offset,5" db:"file_offset" json:"file_offset,omitempty"` + TotalCompressedSize *int64 `thrift:"total_compressed_size,6" db:"total_compressed_size" json:"total_compressed_size,omitempty"` + Ordinal *int16 `thrift:"ordinal,7" db:"ordinal" json:"ordinal,omitempty"` } func NewRowGroup() *RowGroup { - return &RowGroup{} + return &RowGroup{} } - func (p *RowGroup) GetColumns() []*ColumnChunk { - return p.Columns + return p.Columns } func (p *RowGroup) GetTotalByteSize() int64 { - return p.TotalByteSize + return p.TotalByteSize } func (p *RowGroup) GetNumRows() int64 { - return p.NumRows + return p.NumRows } + var RowGroup_SortingColumns_DEFAULT []*SortingColumn func (p *RowGroup) GetSortingColumns() []*SortingColumn { - return p.SortingColumns + return p.SortingColumns } + var RowGroup_FileOffset_DEFAULT int64 + func (p *RowGroup) GetFileOffset() int64 { - if !p.IsSetFileOffset() { - return RowGroup_FileOffset_DEFAULT - } -return *p.FileOffset + if !p.IsSetFileOffset() { + return RowGroup_FileOffset_DEFAULT + } + return *p.FileOffset } + var RowGroup_TotalCompressedSize_DEFAULT int64 + func (p *RowGroup) GetTotalCompressedSize() int64 { - if !p.IsSetTotalCompressedSize() { - return RowGroup_TotalCompressedSize_DEFAULT - } -return *p.TotalCompressedSize + if !p.IsSetTotalCompressedSize() { + return RowGroup_TotalCompressedSize_DEFAULT + } + return *p.TotalCompressedSize } + var RowGroup_Ordinal_DEFAULT int16 + func (p *RowGroup) GetOrdinal() int16 { - if !p.IsSetOrdinal() { - return RowGroup_Ordinal_DEFAULT - } -return *p.Ordinal + if !p.IsSetOrdinal() { + return RowGroup_Ordinal_DEFAULT + } + return *p.Ordinal } func (p *RowGroup) IsSetSortingColumns() bool { - return p.SortingColumns != nil + return p.SortingColumns != nil } func (p *RowGroup) IsSetFileOffset() bool { - return p.FileOffset != nil + return p.FileOffset != nil } func (p *RowGroup) IsSetTotalCompressedSize() bool { - return p.TotalCompressedSize != nil + return p.TotalCompressedSize != nil } func (p *RowGroup) IsSetOrdinal() bool { - return p.Ordinal != nil + return p.Ordinal != nil } func (p *RowGroup) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetColumns bool = false; - var issetTotalByteSize bool = false; - var issetNumRows bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.LIST { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetColumns = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.I64 { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetTotalByteSize = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.I64 { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - issetNumRows = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 4: - if fieldTypeId == thrift.LIST { - if err := p.ReadField4(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 5: - if fieldTypeId == thrift.I64 { - if err := p.ReadField5(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 6: - if fieldTypeId == thrift.I64 { - if err := p.ReadField6(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 7: - if fieldTypeId == thrift.I16 { - if err := p.ReadField7(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetColumns{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Columns is not set")); - } - if !issetTotalByteSize{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field TotalByteSize is not set")); - } - if !issetNumRows{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumRows is not set")); - } - return nil -} - -func (p *RowGroup) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*ColumnChunk, 0, size) - p.Columns = tSlice - for i := 0; i < size; i ++ { - _elem10 := &ColumnChunk{} - if err := _elem10.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem10), err) - } - p.Columns = append(p.Columns, _elem10) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *RowGroup) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.TotalByteSize = v -} - return nil -} - -func (p *RowGroup) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 3: ", err) -} else { - p.NumRows = v -} - return nil -} - -func (p *RowGroup) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*SortingColumn, 0, size) - p.SortingColumns = tSlice - for i := 0; i < size; i ++ { - _elem11 := &SortingColumn{} - if err := _elem11.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem11), err) - } - p.SortingColumns = append(p.SortingColumns, _elem11) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *RowGroup) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 5: ", err) -} else { - p.FileOffset = &v -} - return nil -} - -func (p *RowGroup) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 6: ", err) -} else { - p.TotalCompressedSize = &v -} - return nil -} - -func (p *RowGroup) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI16(ctx); err != nil { - return thrift.PrependError("error reading field 7: ", err) -} else { - p.Ordinal = &v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetColumns bool = false + var issetTotalByteSize bool = false + var issetNumRows bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.LIST { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetColumns = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.I64 { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetTotalByteSize = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.I64 { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + issetNumRows = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 4: + if fieldTypeId == thrift.LIST { + if err := p.ReadField4(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 5: + if fieldTypeId == thrift.I64 { + if err := p.ReadField5(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 6: + if fieldTypeId == thrift.I64 { + if err := p.ReadField6(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 7: + if fieldTypeId == thrift.I16 { + if err := p.ReadField7(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetColumns { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Columns is not set")) + } + if !issetTotalByteSize { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field TotalByteSize is not set")) + } + if !issetNumRows { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumRows is not set")) + } + return nil +} + +func (p *RowGroup) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*ColumnChunk, 0, size) + p.Columns = tSlice + for i := 0; i < size; i++ { + _elem10 := &ColumnChunk{} + if err := _elem10.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem10), err) + } + p.Columns = append(p.Columns, _elem10) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *RowGroup) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.TotalByteSize = v + } + return nil +} + +func (p *RowGroup) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.NumRows = v + } + return nil +} + +func (p *RowGroup) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*SortingColumn, 0, size) + p.SortingColumns = tSlice + for i := 0; i < size; i++ { + _elem11 := &SortingColumn{} + if err := _elem11.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem11), err) + } + p.SortingColumns = append(p.SortingColumns, _elem11) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *RowGroup) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 5: ", err) + } else { + p.FileOffset = &v + } + return nil +} + +func (p *RowGroup) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 6: ", err) + } else { + p.TotalCompressedSize = &v + } + return nil +} + +func (p *RowGroup) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI16(ctx); err != nil { + return thrift.PrependError("error reading field 7: ", err) + } else { + p.Ordinal = &v + } + return nil } func (p *RowGroup) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "RowGroup"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - if err := p.writeField4(ctx, oprot); err != nil { return err } - if err := p.writeField5(ctx, oprot); err != nil { return err } - if err := p.writeField6(ctx, oprot); err != nil { return err } - if err := p.writeField7(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "RowGroup"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + if err := p.writeField4(ctx, oprot); err != nil { + return err + } + if err := p.writeField5(ctx, oprot); err != nil { + return err + } + if err := p.writeField6(ctx, oprot); err != nil { + return err + } + if err := p.writeField7(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *RowGroup) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "columns", thrift.LIST, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:columns: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.Columns)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.Columns { - if err := v.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:columns: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "columns", thrift.LIST, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:columns: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.Columns)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.Columns { + if err := v.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:columns: ", p), err) + } + return err } func (p *RowGroup) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "total_byte_size", thrift.I64, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:total_byte_size: ", p), err) } - if err := oprot.WriteI64(ctx, int64(p.TotalByteSize)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.total_byte_size (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:total_byte_size: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "total_byte_size", thrift.I64, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:total_byte_size: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(p.TotalByteSize)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.total_byte_size (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:total_byte_size: ", p), err) + } + return err } func (p *RowGroup) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "num_rows", thrift.I64, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:num_rows: ", p), err) } - if err := oprot.WriteI64(ctx, int64(p.NumRows)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_rows (3) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:num_rows: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "num_rows", thrift.I64, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:num_rows: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(p.NumRows)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_rows (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:num_rows: ", p), err) + } + return err } func (p *RowGroup) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetSortingColumns() { - if err := oprot.WriteFieldBegin(ctx, "sorting_columns", thrift.LIST, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:sorting_columns: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.SortingColumns)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.SortingColumns { - if err := v.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:sorting_columns: ", p), err) } - } - return err + if p.IsSetSortingColumns() { + if err := oprot.WriteFieldBegin(ctx, "sorting_columns", thrift.LIST, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:sorting_columns: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.SortingColumns)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.SortingColumns { + if err := v.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:sorting_columns: ", p), err) + } + } + return err } func (p *RowGroup) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetFileOffset() { - if err := oprot.WriteFieldBegin(ctx, "file_offset", thrift.I64, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:file_offset: ", p), err) } - if err := oprot.WriteI64(ctx, int64(*p.FileOffset)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.file_offset (5) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:file_offset: ", p), err) } - } - return err + if p.IsSetFileOffset() { + if err := oprot.WriteFieldBegin(ctx, "file_offset", thrift.I64, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:file_offset: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(*p.FileOffset)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.file_offset (5) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:file_offset: ", p), err) + } + } + return err } func (p *RowGroup) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetTotalCompressedSize() { - if err := oprot.WriteFieldBegin(ctx, "total_compressed_size", thrift.I64, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:total_compressed_size: ", p), err) } - if err := oprot.WriteI64(ctx, int64(*p.TotalCompressedSize)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.total_compressed_size (6) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:total_compressed_size: ", p), err) } - } - return err + if p.IsSetTotalCompressedSize() { + if err := oprot.WriteFieldBegin(ctx, "total_compressed_size", thrift.I64, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:total_compressed_size: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(*p.TotalCompressedSize)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.total_compressed_size (6) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:total_compressed_size: ", p), err) + } + } + return err } func (p *RowGroup) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetOrdinal() { - if err := oprot.WriteFieldBegin(ctx, "ordinal", thrift.I16, 7); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:ordinal: ", p), err) } - if err := oprot.WriteI16(ctx, int16(*p.Ordinal)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.ordinal (7) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 7:ordinal: ", p), err) } - } - return err + if p.IsSetOrdinal() { + if err := oprot.WriteFieldBegin(ctx, "ordinal", thrift.I16, 7); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:ordinal: ", p), err) + } + if err := oprot.WriteI16(ctx, int16(*p.Ordinal)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.ordinal (7) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 7:ordinal: ", p), err) + } + } + return err } func (p *RowGroup) Equals(other *RowGroup) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if len(p.Columns) != len(other.Columns) { return false } - for i, _tgt := range p.Columns { - _src12 := other.Columns[i] - if !_tgt.Equals(_src12) { return false } - } - if p.TotalByteSize != other.TotalByteSize { return false } - if p.NumRows != other.NumRows { return false } - if len(p.SortingColumns) != len(other.SortingColumns) { return false } - for i, _tgt := range p.SortingColumns { - _src13 := other.SortingColumns[i] - if !_tgt.Equals(_src13) { return false } - } - if p.FileOffset != other.FileOffset { - if p.FileOffset == nil || other.FileOffset == nil { - return false - } - if (*p.FileOffset) != (*other.FileOffset) { return false } - } - if p.TotalCompressedSize != other.TotalCompressedSize { - if p.TotalCompressedSize == nil || other.TotalCompressedSize == nil { - return false - } - if (*p.TotalCompressedSize) != (*other.TotalCompressedSize) { return false } - } - if p.Ordinal != other.Ordinal { - if p.Ordinal == nil || other.Ordinal == nil { - return false - } - if (*p.Ordinal) != (*other.Ordinal) { return false } - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if len(p.Columns) != len(other.Columns) { + return false + } + for i, _tgt := range p.Columns { + _src12 := other.Columns[i] + if !_tgt.Equals(_src12) { + return false + } + } + if p.TotalByteSize != other.TotalByteSize { + return false + } + if p.NumRows != other.NumRows { + return false + } + if len(p.SortingColumns) != len(other.SortingColumns) { + return false + } + for i, _tgt := range p.SortingColumns { + _src13 := other.SortingColumns[i] + if !_tgt.Equals(_src13) { + return false + } + } + if p.FileOffset != other.FileOffset { + if p.FileOffset == nil || other.FileOffset == nil { + return false + } + if (*p.FileOffset) != (*other.FileOffset) { + return false + } + } + if p.TotalCompressedSize != other.TotalCompressedSize { + if p.TotalCompressedSize == nil || other.TotalCompressedSize == nil { + return false + } + if (*p.TotalCompressedSize) != (*other.TotalCompressedSize) { + return false + } + } + if p.Ordinal != other.Ordinal { + if p.Ordinal == nil || other.Ordinal == nil { + return false + } + if (*p.Ordinal) != (*other.Ordinal) { + return false + } + } + return true } func (p *RowGroup) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("RowGroup(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("RowGroup(%+v)", *p) } func (p *RowGroup) Validate() error { - return nil + return nil } + // Empty struct to signal the order defined by the physical or logical type type TypeDefinedOrder struct { } func NewTypeDefinedOrder() *TypeDefinedOrder { - return &TypeDefinedOrder{} + return &TypeDefinedOrder{} } func (p *TypeDefinedOrder) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil } func (p *TypeDefinedOrder) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "TypeDefinedOrder"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "TypeDefinedOrder"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *TypeDefinedOrder) Equals(other *TypeDefinedOrder) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + return true } func (p *TypeDefinedOrder) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("TypeDefinedOrder(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("TypeDefinedOrder(%+v)", *p) } func (p *TypeDefinedOrder) Validate() error { - return nil + return nil } + // Union to specify the order used for the min_value and max_value fields for a // column. This union takes the role of an enhanced enum that allows rich // elements (which will be needed for a collation-based ordering in the future). -// +// // Possible values are: -// * TypeDefinedOrder - the column uses the order defined by its logical or -// physical type (if there is no logical type). -// +// - TypeDefinedOrder - the column uses the order defined by its logical or +// physical type (if there is no logical type). +// // If the reader does not support the value of this union, min and max stats // for this column should be ignored. -// +// // Attributes: -// - TYPE_ORDER: The sort orders for logical types are: -// UTF8 - unsigned byte-wise comparison -// INT8 - signed comparison -// INT16 - signed comparison -// INT32 - signed comparison -// INT64 - signed comparison -// UINT8 - unsigned comparison -// UINT16 - unsigned comparison -// UINT32 - unsigned comparison -// UINT64 - unsigned comparison -// DECIMAL - signed comparison of the represented value -// DATE - signed comparison -// TIME_MILLIS - signed comparison -// TIME_MICROS - signed comparison -// TIMESTAMP_MILLIS - signed comparison -// TIMESTAMP_MICROS - signed comparison -// INTERVAL - unsigned comparison -// JSON - unsigned byte-wise comparison -// BSON - unsigned byte-wise comparison -// ENUM - unsigned byte-wise comparison -// LIST - undefined -// MAP - undefined -// +// - TYPE_ORDER: The sort orders for logical types are: +// UTF8 - unsigned byte-wise comparison +// INT8 - signed comparison +// INT16 - signed comparison +// INT32 - signed comparison +// INT64 - signed comparison +// UINT8 - unsigned comparison +// UINT16 - unsigned comparison +// UINT32 - unsigned comparison +// UINT64 - unsigned comparison +// DECIMAL - signed comparison of the represented value +// DATE - signed comparison +// TIME_MILLIS - signed comparison +// TIME_MICROS - signed comparison +// TIMESTAMP_MILLIS - signed comparison +// TIMESTAMP_MICROS - signed comparison +// INTERVAL - unsigned comparison +// JSON - unsigned byte-wise comparison +// BSON - unsigned byte-wise comparison +// ENUM - unsigned byte-wise comparison +// LIST - undefined +// MAP - undefined +// // In the absence of logical types, the sort order is determined by the physical type: -// BOOLEAN - false, true -// INT32 - signed comparison -// INT64 - signed comparison -// INT96 (only used for legacy timestamps) - undefined -// FLOAT - signed comparison of the represented value (*) -// DOUBLE - signed comparison of the represented value (*) -// BYTE_ARRAY - unsigned byte-wise comparison -// FIXED_LEN_BYTE_ARRAY - unsigned byte-wise comparison -// +// +// BOOLEAN - false, true +// INT32 - signed comparison +// INT64 - signed comparison +// INT96 (only used for legacy timestamps) - undefined +// FLOAT - signed comparison of the represented value (*) +// DOUBLE - signed comparison of the represented value (*) +// BYTE_ARRAY - unsigned byte-wise comparison +// FIXED_LEN_BYTE_ARRAY - unsigned byte-wise comparison +// // (*) Because the sorting order is not specified properly for floating -// point values (relations vs. total ordering) the following -// compatibility rules should be applied when reading statistics: -// - If the min is a NaN, it should be ignored. -// - If the max is a NaN, it should be ignored. -// - If the min is +0, the row group may contain -0 values as well. -// - If the max is -0, the row group may contain +0 values as well. -// - When looking for NaN values, min and max should be ignored. -// -// When writing statistics the following rules should be followed: -// - NaNs should not be written to min or max statistics fields. -// - If the computed max value is zero (whether negative or positive), -// `+0.0` should be written into the max statistics field. -// - If the computed min value is zero (whether negative or positive), -// `-0.0` should be written into the min statistics field. +// +// point values (relations vs. total ordering) the following +// compatibility rules should be applied when reading statistics: +// - If the min is a NaN, it should be ignored. +// - If the max is a NaN, it should be ignored. +// - If the min is +0, the row group may contain -0 values as well. +// - If the max is -0, the row group may contain +0 values as well. +// - When looking for NaN values, min and max should be ignored. +// +// When writing statistics the following rules should be followed: +// - NaNs should not be written to min or max statistics fields. +// - If the computed max value is zero (whether negative or positive), +// `+0.0` should be written into the max statistics field. +// - If the computed min value is zero (whether negative or positive), +// `-0.0` should be written into the min statistics field. type ColumnOrder struct { - TYPE_ORDER *TypeDefinedOrder `thrift:"TYPE_ORDER,1" db:"TYPE_ORDER" json:"TYPE_ORDER,omitempty"` + TYPE_ORDER *TypeDefinedOrder `thrift:"TYPE_ORDER,1" db:"TYPE_ORDER" json:"TYPE_ORDER,omitempty"` } func NewColumnOrder() *ColumnOrder { - return &ColumnOrder{} + return &ColumnOrder{} } var ColumnOrder_TYPE_ORDER_DEFAULT *TypeDefinedOrder + func (p *ColumnOrder) GetTYPE_ORDER() *TypeDefinedOrder { - if !p.IsSetTYPE_ORDER() { - return ColumnOrder_TYPE_ORDER_DEFAULT - } -return p.TYPE_ORDER + if !p.IsSetTYPE_ORDER() { + return ColumnOrder_TYPE_ORDER_DEFAULT + } + return p.TYPE_ORDER } func (p *ColumnOrder) CountSetFieldsColumnOrder() int { - count := 0 - if (p.IsSetTYPE_ORDER()) { - count++ - } - return count + count := 0 + if p.IsSetTYPE_ORDER() { + count++ + } + return count } func (p *ColumnOrder) IsSetTYPE_ORDER() bool { - return p.TYPE_ORDER != nil + return p.TYPE_ORDER != nil } func (p *ColumnOrder) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *ColumnOrder) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - p.TYPE_ORDER = &TypeDefinedOrder{} - if err := p.TYPE_ORDER.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.TYPE_ORDER), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *ColumnOrder) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + p.TYPE_ORDER = &TypeDefinedOrder{} + if err := p.TYPE_ORDER.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.TYPE_ORDER), err) + } + return nil } func (p *ColumnOrder) Write(ctx context.Context, oprot thrift.TProtocol) error { - if c := p.CountSetFieldsColumnOrder(); c != 1 { - return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) - } - if err := oprot.WriteStructBegin(ctx, "ColumnOrder"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if c := p.CountSetFieldsColumnOrder(); c != 1 { + return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) + } + if err := oprot.WriteStructBegin(ctx, "ColumnOrder"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *ColumnOrder) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetTYPE_ORDER() { - if err := oprot.WriteFieldBegin(ctx, "TYPE_ORDER", thrift.STRUCT, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:TYPE_ORDER: ", p), err) } - if err := p.TYPE_ORDER.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.TYPE_ORDER), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:TYPE_ORDER: ", p), err) } - } - return err + if p.IsSetTYPE_ORDER() { + if err := oprot.WriteFieldBegin(ctx, "TYPE_ORDER", thrift.STRUCT, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:TYPE_ORDER: ", p), err) + } + if err := p.TYPE_ORDER.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.TYPE_ORDER), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:TYPE_ORDER: ", p), err) + } + } + return err } func (p *ColumnOrder) Equals(other *ColumnOrder) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if !p.TYPE_ORDER.Equals(other.TYPE_ORDER) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if !p.TYPE_ORDER.Equals(other.TYPE_ORDER) { + return false + } + return true } func (p *ColumnOrder) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("ColumnOrder(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("ColumnOrder(%+v)", *p) } func (p *ColumnOrder) Validate() error { - return nil + return nil } + // Attributes: -// - Offset: Offset of the page in the file * -// - CompressedPageSize: Size of the page, including header. Sum of compressed_page_size and header +// - Offset: Offset of the page in the file * +// - CompressedPageSize: Size of the page, including header. Sum of compressed_page_size and header +// // length -// - FirstRowIndex: Index within the RowGroup of the first row of the page; this means pages +// - FirstRowIndex: Index within the RowGroup of the first row of the page; this means pages +// // change on record boundaries (r = 0). type PageLocation struct { - Offset int64 `thrift:"offset,1,required" db:"offset" json:"offset"` - CompressedPageSize int32 `thrift:"compressed_page_size,2,required" db:"compressed_page_size" json:"compressed_page_size"` - FirstRowIndex int64 `thrift:"first_row_index,3,required" db:"first_row_index" json:"first_row_index"` + Offset int64 `thrift:"offset,1,required" db:"offset" json:"offset"` + CompressedPageSize int32 `thrift:"compressed_page_size,2,required" db:"compressed_page_size" json:"compressed_page_size"` + FirstRowIndex int64 `thrift:"first_row_index,3,required" db:"first_row_index" json:"first_row_index"` } func NewPageLocation() *PageLocation { - return &PageLocation{} + return &PageLocation{} } - func (p *PageLocation) GetOffset() int64 { - return p.Offset + return p.Offset } func (p *PageLocation) GetCompressedPageSize() int32 { - return p.CompressedPageSize + return p.CompressedPageSize } func (p *PageLocation) GetFirstRowIndex() int64 { - return p.FirstRowIndex + return p.FirstRowIndex } func (p *PageLocation) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetOffset bool = false; - var issetCompressedPageSize bool = false; - var issetFirstRowIndex bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.I64 { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetOffset = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.I32 { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetCompressedPageSize = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.I64 { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - issetFirstRowIndex = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetOffset{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Offset is not set")); - } - if !issetCompressedPageSize{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field CompressedPageSize is not set")); - } - if !issetFirstRowIndex{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field FirstRowIndex is not set")); - } - return nil -} - -func (p *PageLocation) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.Offset = v -} - return nil -} - -func (p *PageLocation) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.CompressedPageSize = v -} - return nil -} - -func (p *PageLocation) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 3: ", err) -} else { - p.FirstRowIndex = v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetOffset bool = false + var issetCompressedPageSize bool = false + var issetFirstRowIndex bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.I64 { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetOffset = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.I32 { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetCompressedPageSize = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.I64 { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + issetFirstRowIndex = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetOffset { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Offset is not set")) + } + if !issetCompressedPageSize { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field CompressedPageSize is not set")) + } + if !issetFirstRowIndex { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field FirstRowIndex is not set")) + } + return nil +} + +func (p *PageLocation) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.Offset = v + } + return nil +} + +func (p *PageLocation) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.CompressedPageSize = v + } + return nil +} + +func (p *PageLocation) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.FirstRowIndex = v + } + return nil } func (p *PageLocation) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "PageLocation"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "PageLocation"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *PageLocation) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "offset", thrift.I64, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:offset: ", p), err) } - if err := oprot.WriteI64(ctx, int64(p.Offset)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.offset (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:offset: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "offset", thrift.I64, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:offset: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(p.Offset)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.offset (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:offset: ", p), err) + } + return err } func (p *PageLocation) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "compressed_page_size", thrift.I32, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:compressed_page_size: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.CompressedPageSize)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.compressed_page_size (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:compressed_page_size: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "compressed_page_size", thrift.I32, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:compressed_page_size: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.CompressedPageSize)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.compressed_page_size (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:compressed_page_size: ", p), err) + } + return err } func (p *PageLocation) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "first_row_index", thrift.I64, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:first_row_index: ", p), err) } - if err := oprot.WriteI64(ctx, int64(p.FirstRowIndex)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.first_row_index (3) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:first_row_index: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "first_row_index", thrift.I64, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:first_row_index: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(p.FirstRowIndex)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.first_row_index (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:first_row_index: ", p), err) + } + return err } func (p *PageLocation) Equals(other *PageLocation) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.Offset != other.Offset { return false } - if p.CompressedPageSize != other.CompressedPageSize { return false } - if p.FirstRowIndex != other.FirstRowIndex { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.Offset != other.Offset { + return false + } + if p.CompressedPageSize != other.CompressedPageSize { + return false + } + if p.FirstRowIndex != other.FirstRowIndex { + return false + } + return true } func (p *PageLocation) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("PageLocation(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("PageLocation(%+v)", *p) } func (p *PageLocation) Validate() error { - return nil + return nil } + // Attributes: -// - PageLocations: PageLocations, ordered by increasing PageLocation.offset. It is required +// - PageLocations: PageLocations, ordered by increasing PageLocation.offset. It is required +// // that page_locations[i].first_row_index < page_locations[i+1].first_row_index. type OffsetIndex struct { - PageLocations []*PageLocation `thrift:"page_locations,1,required" db:"page_locations" json:"page_locations"` + PageLocations []*PageLocation `thrift:"page_locations,1,required" db:"page_locations" json:"page_locations"` } func NewOffsetIndex() *OffsetIndex { - return &OffsetIndex{} + return &OffsetIndex{} } - func (p *OffsetIndex) GetPageLocations() []*PageLocation { - return p.PageLocations + return p.PageLocations } func (p *OffsetIndex) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetPageLocations bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.LIST { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetPageLocations = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetPageLocations{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PageLocations is not set")); - } - return nil -} - -func (p *OffsetIndex) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*PageLocation, 0, size) - p.PageLocations = tSlice - for i := 0; i < size; i ++ { - _elem14 := &PageLocation{} - if err := _elem14.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem14), err) - } - p.PageLocations = append(p.PageLocations, _elem14) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetPageLocations bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.LIST { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetPageLocations = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetPageLocations { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PageLocations is not set")) + } + return nil +} + +func (p *OffsetIndex) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*PageLocation, 0, size) + p.PageLocations = tSlice + for i := 0; i < size; i++ { + _elem14 := &PageLocation{} + if err := _elem14.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem14), err) + } + p.PageLocations = append(p.PageLocations, _elem14) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil } func (p *OffsetIndex) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "OffsetIndex"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "OffsetIndex"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *OffsetIndex) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "page_locations", thrift.LIST, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:page_locations: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.PageLocations)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.PageLocations { - if err := v.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:page_locations: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "page_locations", thrift.LIST, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:page_locations: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.PageLocations)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.PageLocations { + if err := v.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:page_locations: ", p), err) + } + return err } func (p *OffsetIndex) Equals(other *OffsetIndex) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if len(p.PageLocations) != len(other.PageLocations) { return false } - for i, _tgt := range p.PageLocations { - _src15 := other.PageLocations[i] - if !_tgt.Equals(_src15) { return false } - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if len(p.PageLocations) != len(other.PageLocations) { + return false + } + for i, _tgt := range p.PageLocations { + _src15 := other.PageLocations[i] + if !_tgt.Equals(_src15) { + return false + } + } + return true } func (p *OffsetIndex) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("OffsetIndex(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("OffsetIndex(%+v)", *p) } func (p *OffsetIndex) Validate() error { - return nil + return nil } + // Description for ColumnIndex. // Each [i] refers to the page at OffsetIndex.page_locations[i] -// +// // Attributes: -// - NullPages: A list of Boolean values to determine the validity of the corresponding +// - NullPages: A list of Boolean values to determine the validity of the corresponding +// // min and max values. If true, a page contains only null values, and writers // have to set the corresponding entries in min_values and max_values to // byte[0], so that all lists have the same length. If false, the // corresponding entries in min_values and max_values must be valid. -// - MinValues: Two lists containing lower and upper bounds for the values of each page +// - MinValues: Two lists containing lower and upper bounds for the values of each page +// // determined by the ColumnOrder of the column. These may be the actual // minimum and maximum values found on a page, but can also be (more compact) // values that do not exist on a page. For example, instead of storing ""Blart @@ -9528,1722 +10840,1957 @@ func (p *OffsetIndex) Validate() error { // Such more compact values must still be valid values within the column's // logical type. Readers must make sure that list entries are populated before // using them by inspecting null_pages. -// - MaxValues -// - BoundaryOrder: Stores whether both min_values and max_values are ordered and if so, in +// - MaxValues +// - BoundaryOrder: Stores whether both min_values and max_values are ordered and if so, in +// // which direction. This allows readers to perform binary searches in both // lists. Readers cannot assume that max_values[i] <= min_values[i+1], even // if the lists are ordered. -// - NullCounts: A list containing the number of null values for each page * +// - NullCounts: A list containing the number of null values for each page * type ColumnIndex struct { - NullPages []bool `thrift:"null_pages,1,required" db:"null_pages" json:"null_pages"` - MinValues [][]byte `thrift:"min_values,2,required" db:"min_values" json:"min_values"` - MaxValues [][]byte `thrift:"max_values,3,required" db:"max_values" json:"max_values"` - BoundaryOrder BoundaryOrder `thrift:"boundary_order,4,required" db:"boundary_order" json:"boundary_order"` - NullCounts []int64 `thrift:"null_counts,5" db:"null_counts" json:"null_counts,omitempty"` + NullPages []bool `thrift:"null_pages,1,required" db:"null_pages" json:"null_pages"` + MinValues [][]byte `thrift:"min_values,2,required" db:"min_values" json:"min_values"` + MaxValues [][]byte `thrift:"max_values,3,required" db:"max_values" json:"max_values"` + BoundaryOrder BoundaryOrder `thrift:"boundary_order,4,required" db:"boundary_order" json:"boundary_order"` + NullCounts []int64 `thrift:"null_counts,5" db:"null_counts" json:"null_counts,omitempty"` } func NewColumnIndex() *ColumnIndex { - return &ColumnIndex{} + return &ColumnIndex{} } - func (p *ColumnIndex) GetNullPages() []bool { - return p.NullPages + return p.NullPages } func (p *ColumnIndex) GetMinValues() [][]byte { - return p.MinValues + return p.MinValues } func (p *ColumnIndex) GetMaxValues() [][]byte { - return p.MaxValues + return p.MaxValues } func (p *ColumnIndex) GetBoundaryOrder() BoundaryOrder { - return p.BoundaryOrder + return p.BoundaryOrder } + var ColumnIndex_NullCounts_DEFAULT []int64 func (p *ColumnIndex) GetNullCounts() []int64 { - return p.NullCounts + return p.NullCounts } func (p *ColumnIndex) IsSetNullCounts() bool { - return p.NullCounts != nil + return p.NullCounts != nil } func (p *ColumnIndex) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetNullPages bool = false; - var issetMinValues bool = false; - var issetMaxValues bool = false; - var issetBoundaryOrder bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.LIST { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetNullPages = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.LIST { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetMinValues = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.LIST { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - issetMaxValues = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 4: - if fieldTypeId == thrift.I32 { - if err := p.ReadField4(ctx, iprot); err != nil { - return err - } - issetBoundaryOrder = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 5: - if fieldTypeId == thrift.LIST { - if err := p.ReadField5(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetNullPages{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NullPages is not set")); - } - if !issetMinValues{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field MinValues is not set")); - } - if !issetMaxValues{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field MaxValues is not set")); - } - if !issetBoundaryOrder{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field BoundaryOrder is not set")); - } - return nil -} - -func (p *ColumnIndex) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]bool, 0, size) - p.NullPages = tSlice - for i := 0; i < size; i ++ { -var _elem16 bool - if v, err := iprot.ReadBool(ctx); err != nil { - return thrift.PrependError("error reading field 0: ", err) -} else { - _elem16 = v -} - p.NullPages = append(p.NullPages, _elem16) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *ColumnIndex) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([][]byte, 0, size) - p.MinValues = tSlice - for i := 0; i < size; i ++ { -var _elem17 []byte - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 0: ", err) -} else { - _elem17 = v -} - p.MinValues = append(p.MinValues, _elem17) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *ColumnIndex) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([][]byte, 0, size) - p.MaxValues = tSlice - for i := 0; i < size; i ++ { -var _elem18 []byte - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 0: ", err) -} else { - _elem18 = v -} - p.MaxValues = append(p.MaxValues, _elem18) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *ColumnIndex) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 4: ", err) -} else { - temp := BoundaryOrder(v) - p.BoundaryOrder = temp -} - return nil -} - -func (p *ColumnIndex) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]int64, 0, size) - p.NullCounts = tSlice - for i := 0; i < size; i ++ { -var _elem19 int64 - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 0: ", err) -} else { - _elem19 = v -} - p.NullCounts = append(p.NullCounts, _elem19) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetNullPages bool = false + var issetMinValues bool = false + var issetMaxValues bool = false + var issetBoundaryOrder bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.LIST { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetNullPages = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.LIST { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetMinValues = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.LIST { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + issetMaxValues = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 4: + if fieldTypeId == thrift.I32 { + if err := p.ReadField4(ctx, iprot); err != nil { + return err + } + issetBoundaryOrder = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 5: + if fieldTypeId == thrift.LIST { + if err := p.ReadField5(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetNullPages { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NullPages is not set")) + } + if !issetMinValues { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field MinValues is not set")) + } + if !issetMaxValues { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field MaxValues is not set")) + } + if !issetBoundaryOrder { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field BoundaryOrder is not set")) + } + return nil +} + +func (p *ColumnIndex) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]bool, 0, size) + p.NullPages = tSlice + for i := 0; i < size; i++ { + var _elem16 bool + if v, err := iprot.ReadBool(ctx); err != nil { + return thrift.PrependError("error reading field 0: ", err) + } else { + _elem16 = v + } + p.NullPages = append(p.NullPages, _elem16) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *ColumnIndex) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([][]byte, 0, size) + p.MinValues = tSlice + for i := 0; i < size; i++ { + var _elem17 []byte + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 0: ", err) + } else { + _elem17 = v + } + p.MinValues = append(p.MinValues, _elem17) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *ColumnIndex) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([][]byte, 0, size) + p.MaxValues = tSlice + for i := 0; i < size; i++ { + var _elem18 []byte + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 0: ", err) + } else { + _elem18 = v + } + p.MaxValues = append(p.MaxValues, _elem18) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *ColumnIndex) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 4: ", err) + } else { + temp := BoundaryOrder(v) + p.BoundaryOrder = temp + } + return nil +} + +func (p *ColumnIndex) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]int64, 0, size) + p.NullCounts = tSlice + for i := 0; i < size; i++ { + var _elem19 int64 + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 0: ", err) + } else { + _elem19 = v + } + p.NullCounts = append(p.NullCounts, _elem19) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil } func (p *ColumnIndex) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "ColumnIndex"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - if err := p.writeField4(ctx, oprot); err != nil { return err } - if err := p.writeField5(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "ColumnIndex"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + if err := p.writeField4(ctx, oprot); err != nil { + return err + } + if err := p.writeField5(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *ColumnIndex) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "null_pages", thrift.LIST, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:null_pages: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.BOOL, len(p.NullPages)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.NullPages { - if err := oprot.WriteBool(ctx, bool(v)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:null_pages: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "null_pages", thrift.LIST, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:null_pages: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.BOOL, len(p.NullPages)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.NullPages { + if err := oprot.WriteBool(ctx, bool(v)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:null_pages: ", p), err) + } + return err } func (p *ColumnIndex) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "min_values", thrift.LIST, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:min_values: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.STRING, len(p.MinValues)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.MinValues { - if err := oprot.WriteBinary(ctx, v); err != nil { - return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:min_values: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "min_values", thrift.LIST, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:min_values: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.STRING, len(p.MinValues)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.MinValues { + if err := oprot.WriteBinary(ctx, v); err != nil { + return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:min_values: ", p), err) + } + return err } func (p *ColumnIndex) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "max_values", thrift.LIST, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:max_values: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.STRING, len(p.MaxValues)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.MaxValues { - if err := oprot.WriteBinary(ctx, v); err != nil { - return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:max_values: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "max_values", thrift.LIST, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:max_values: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.STRING, len(p.MaxValues)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.MaxValues { + if err := oprot.WriteBinary(ctx, v); err != nil { + return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:max_values: ", p), err) + } + return err } func (p *ColumnIndex) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "boundary_order", thrift.I32, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:boundary_order: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.BoundaryOrder)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.boundary_order (4) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:boundary_order: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "boundary_order", thrift.I32, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:boundary_order: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.BoundaryOrder)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.boundary_order (4) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:boundary_order: ", p), err) + } + return err } func (p *ColumnIndex) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetNullCounts() { - if err := oprot.WriteFieldBegin(ctx, "null_counts", thrift.LIST, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:null_counts: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.I64, len(p.NullCounts)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.NullCounts { - if err := oprot.WriteI64(ctx, int64(v)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:null_counts: ", p), err) } - } - return err + if p.IsSetNullCounts() { + if err := oprot.WriteFieldBegin(ctx, "null_counts", thrift.LIST, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:null_counts: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.I64, len(p.NullCounts)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.NullCounts { + if err := oprot.WriteI64(ctx, int64(v)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:null_counts: ", p), err) + } + } + return err } func (p *ColumnIndex) Equals(other *ColumnIndex) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if len(p.NullPages) != len(other.NullPages) { return false } - for i, _tgt := range p.NullPages { - _src20 := other.NullPages[i] - if _tgt != _src20 { return false } - } - if len(p.MinValues) != len(other.MinValues) { return false } - for i, _tgt := range p.MinValues { - _src21 := other.MinValues[i] - if bytes.Compare(_tgt, _src21) != 0 { return false } - } - if len(p.MaxValues) != len(other.MaxValues) { return false } - for i, _tgt := range p.MaxValues { - _src22 := other.MaxValues[i] - if bytes.Compare(_tgt, _src22) != 0 { return false } - } - if p.BoundaryOrder != other.BoundaryOrder { return false } - if len(p.NullCounts) != len(other.NullCounts) { return false } - for i, _tgt := range p.NullCounts { - _src23 := other.NullCounts[i] - if _tgt != _src23 { return false } - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if len(p.NullPages) != len(other.NullPages) { + return false + } + for i, _tgt := range p.NullPages { + _src20 := other.NullPages[i] + if _tgt != _src20 { + return false + } + } + if len(p.MinValues) != len(other.MinValues) { + return false + } + for i, _tgt := range p.MinValues { + _src21 := other.MinValues[i] + if bytes.Compare(_tgt, _src21) != 0 { + return false + } + } + if len(p.MaxValues) != len(other.MaxValues) { + return false + } + for i, _tgt := range p.MaxValues { + _src22 := other.MaxValues[i] + if bytes.Compare(_tgt, _src22) != 0 { + return false + } + } + if p.BoundaryOrder != other.BoundaryOrder { + return false + } + if len(p.NullCounts) != len(other.NullCounts) { + return false + } + for i, _tgt := range p.NullCounts { + _src23 := other.NullCounts[i] + if _tgt != _src23 { + return false + } + } + return true } func (p *ColumnIndex) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("ColumnIndex(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("ColumnIndex(%+v)", *p) } func (p *ColumnIndex) Validate() error { - return nil + return nil } + // Attributes: -// - AadPrefix: AAD prefix * -// - AadFileUnique: Unique file identifier part of AAD suffix * -// - SupplyAadPrefix: In files encrypted with AAD prefix without storing it, +// - AadPrefix: AAD prefix * +// - AadFileUnique: Unique file identifier part of AAD suffix * +// - SupplyAadPrefix: In files encrypted with AAD prefix without storing it, +// // readers must supply the prefix * type AesGcmV1 struct { - AadPrefix []byte `thrift:"aad_prefix,1" db:"aad_prefix" json:"aad_prefix,omitempty"` - AadFileUnique []byte `thrift:"aad_file_unique,2" db:"aad_file_unique" json:"aad_file_unique,omitempty"` - SupplyAadPrefix *bool `thrift:"supply_aad_prefix,3" db:"supply_aad_prefix" json:"supply_aad_prefix,omitempty"` + AadPrefix []byte `thrift:"aad_prefix,1" db:"aad_prefix" json:"aad_prefix,omitempty"` + AadFileUnique []byte `thrift:"aad_file_unique,2" db:"aad_file_unique" json:"aad_file_unique,omitempty"` + SupplyAadPrefix *bool `thrift:"supply_aad_prefix,3" db:"supply_aad_prefix" json:"supply_aad_prefix,omitempty"` } func NewAesGcmV1() *AesGcmV1 { - return &AesGcmV1{} + return &AesGcmV1{} } var AesGcmV1_AadPrefix_DEFAULT []byte func (p *AesGcmV1) GetAadPrefix() []byte { - return p.AadPrefix + return p.AadPrefix } + var AesGcmV1_AadFileUnique_DEFAULT []byte func (p *AesGcmV1) GetAadFileUnique() []byte { - return p.AadFileUnique + return p.AadFileUnique } + var AesGcmV1_SupplyAadPrefix_DEFAULT bool + func (p *AesGcmV1) GetSupplyAadPrefix() bool { - if !p.IsSetSupplyAadPrefix() { - return AesGcmV1_SupplyAadPrefix_DEFAULT - } -return *p.SupplyAadPrefix + if !p.IsSetSupplyAadPrefix() { + return AesGcmV1_SupplyAadPrefix_DEFAULT + } + return *p.SupplyAadPrefix } func (p *AesGcmV1) IsSetAadPrefix() bool { - return p.AadPrefix != nil + return p.AadPrefix != nil } func (p *AesGcmV1) IsSetAadFileUnique() bool { - return p.AadFileUnique != nil + return p.AadFileUnique != nil } func (p *AesGcmV1) IsSetSupplyAadPrefix() bool { - return p.SupplyAadPrefix != nil + return p.SupplyAadPrefix != nil } func (p *AesGcmV1) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRING { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.STRING { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.BOOL { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *AesGcmV1) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.AadPrefix = v -} - return nil -} - -func (p *AesGcmV1) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.AadFileUnique = v -} - return nil -} - -func (p *AesGcmV1) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBool(ctx); err != nil { - return thrift.PrependError("error reading field 3: ", err) -} else { - p.SupplyAadPrefix = &v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRING { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.STRING { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.BOOL { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *AesGcmV1) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.AadPrefix = v + } + return nil +} + +func (p *AesGcmV1) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.AadFileUnique = v + } + return nil +} + +func (p *AesGcmV1) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBool(ctx); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.SupplyAadPrefix = &v + } + return nil } func (p *AesGcmV1) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "AesGcmV1"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "AesGcmV1"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *AesGcmV1) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetAadPrefix() { - if err := oprot.WriteFieldBegin(ctx, "aad_prefix", thrift.STRING, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:aad_prefix: ", p), err) } - if err := oprot.WriteBinary(ctx, p.AadPrefix); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.aad_prefix (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:aad_prefix: ", p), err) } - } - return err + if p.IsSetAadPrefix() { + if err := oprot.WriteFieldBegin(ctx, "aad_prefix", thrift.STRING, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:aad_prefix: ", p), err) + } + if err := oprot.WriteBinary(ctx, p.AadPrefix); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.aad_prefix (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:aad_prefix: ", p), err) + } + } + return err } func (p *AesGcmV1) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetAadFileUnique() { - if err := oprot.WriteFieldBegin(ctx, "aad_file_unique", thrift.STRING, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:aad_file_unique: ", p), err) } - if err := oprot.WriteBinary(ctx, p.AadFileUnique); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.aad_file_unique (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:aad_file_unique: ", p), err) } - } - return err + if p.IsSetAadFileUnique() { + if err := oprot.WriteFieldBegin(ctx, "aad_file_unique", thrift.STRING, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:aad_file_unique: ", p), err) + } + if err := oprot.WriteBinary(ctx, p.AadFileUnique); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.aad_file_unique (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:aad_file_unique: ", p), err) + } + } + return err } func (p *AesGcmV1) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetSupplyAadPrefix() { - if err := oprot.WriteFieldBegin(ctx, "supply_aad_prefix", thrift.BOOL, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:supply_aad_prefix: ", p), err) } - if err := oprot.WriteBool(ctx, bool(*p.SupplyAadPrefix)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.supply_aad_prefix (3) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:supply_aad_prefix: ", p), err) } - } - return err + if p.IsSetSupplyAadPrefix() { + if err := oprot.WriteFieldBegin(ctx, "supply_aad_prefix", thrift.BOOL, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:supply_aad_prefix: ", p), err) + } + if err := oprot.WriteBool(ctx, bool(*p.SupplyAadPrefix)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.supply_aad_prefix (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:supply_aad_prefix: ", p), err) + } + } + return err } func (p *AesGcmV1) Equals(other *AesGcmV1) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if bytes.Compare(p.AadPrefix, other.AadPrefix) != 0 { return false } - if bytes.Compare(p.AadFileUnique, other.AadFileUnique) != 0 { return false } - if p.SupplyAadPrefix != other.SupplyAadPrefix { - if p.SupplyAadPrefix == nil || other.SupplyAadPrefix == nil { - return false - } - if (*p.SupplyAadPrefix) != (*other.SupplyAadPrefix) { return false } - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if bytes.Compare(p.AadPrefix, other.AadPrefix) != 0 { + return false + } + if bytes.Compare(p.AadFileUnique, other.AadFileUnique) != 0 { + return false + } + if p.SupplyAadPrefix != other.SupplyAadPrefix { + if p.SupplyAadPrefix == nil || other.SupplyAadPrefix == nil { + return false + } + if (*p.SupplyAadPrefix) != (*other.SupplyAadPrefix) { + return false + } + } + return true } func (p *AesGcmV1) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("AesGcmV1(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("AesGcmV1(%+v)", *p) } func (p *AesGcmV1) Validate() error { - return nil + return nil } + // Attributes: -// - AadPrefix: AAD prefix * -// - AadFileUnique: Unique file identifier part of AAD suffix * -// - SupplyAadPrefix: In files encrypted with AAD prefix without storing it, +// - AadPrefix: AAD prefix * +// - AadFileUnique: Unique file identifier part of AAD suffix * +// - SupplyAadPrefix: In files encrypted with AAD prefix without storing it, +// // readers must supply the prefix * type AesGcmCtrV1 struct { - AadPrefix []byte `thrift:"aad_prefix,1" db:"aad_prefix" json:"aad_prefix,omitempty"` - AadFileUnique []byte `thrift:"aad_file_unique,2" db:"aad_file_unique" json:"aad_file_unique,omitempty"` - SupplyAadPrefix *bool `thrift:"supply_aad_prefix,3" db:"supply_aad_prefix" json:"supply_aad_prefix,omitempty"` + AadPrefix []byte `thrift:"aad_prefix,1" db:"aad_prefix" json:"aad_prefix,omitempty"` + AadFileUnique []byte `thrift:"aad_file_unique,2" db:"aad_file_unique" json:"aad_file_unique,omitempty"` + SupplyAadPrefix *bool `thrift:"supply_aad_prefix,3" db:"supply_aad_prefix" json:"supply_aad_prefix,omitempty"` } func NewAesGcmCtrV1() *AesGcmCtrV1 { - return &AesGcmCtrV1{} + return &AesGcmCtrV1{} } var AesGcmCtrV1_AadPrefix_DEFAULT []byte func (p *AesGcmCtrV1) GetAadPrefix() []byte { - return p.AadPrefix + return p.AadPrefix } + var AesGcmCtrV1_AadFileUnique_DEFAULT []byte func (p *AesGcmCtrV1) GetAadFileUnique() []byte { - return p.AadFileUnique + return p.AadFileUnique } + var AesGcmCtrV1_SupplyAadPrefix_DEFAULT bool + func (p *AesGcmCtrV1) GetSupplyAadPrefix() bool { - if !p.IsSetSupplyAadPrefix() { - return AesGcmCtrV1_SupplyAadPrefix_DEFAULT - } -return *p.SupplyAadPrefix + if !p.IsSetSupplyAadPrefix() { + return AesGcmCtrV1_SupplyAadPrefix_DEFAULT + } + return *p.SupplyAadPrefix } func (p *AesGcmCtrV1) IsSetAadPrefix() bool { - return p.AadPrefix != nil + return p.AadPrefix != nil } func (p *AesGcmCtrV1) IsSetAadFileUnique() bool { - return p.AadFileUnique != nil + return p.AadFileUnique != nil } func (p *AesGcmCtrV1) IsSetSupplyAadPrefix() bool { - return p.SupplyAadPrefix != nil + return p.SupplyAadPrefix != nil } func (p *AesGcmCtrV1) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRING { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.STRING { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.BOOL { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *AesGcmCtrV1) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.AadPrefix = v -} - return nil -} - -func (p *AesGcmCtrV1) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.AadFileUnique = v -} - return nil -} - -func (p *AesGcmCtrV1) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBool(ctx); err != nil { - return thrift.PrependError("error reading field 3: ", err) -} else { - p.SupplyAadPrefix = &v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRING { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.STRING { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.BOOL { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *AesGcmCtrV1) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.AadPrefix = v + } + return nil +} + +func (p *AesGcmCtrV1) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.AadFileUnique = v + } + return nil +} + +func (p *AesGcmCtrV1) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBool(ctx); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.SupplyAadPrefix = &v + } + return nil } func (p *AesGcmCtrV1) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "AesGcmCtrV1"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "AesGcmCtrV1"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *AesGcmCtrV1) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetAadPrefix() { - if err := oprot.WriteFieldBegin(ctx, "aad_prefix", thrift.STRING, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:aad_prefix: ", p), err) } - if err := oprot.WriteBinary(ctx, p.AadPrefix); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.aad_prefix (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:aad_prefix: ", p), err) } - } - return err + if p.IsSetAadPrefix() { + if err := oprot.WriteFieldBegin(ctx, "aad_prefix", thrift.STRING, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:aad_prefix: ", p), err) + } + if err := oprot.WriteBinary(ctx, p.AadPrefix); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.aad_prefix (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:aad_prefix: ", p), err) + } + } + return err } func (p *AesGcmCtrV1) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetAadFileUnique() { - if err := oprot.WriteFieldBegin(ctx, "aad_file_unique", thrift.STRING, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:aad_file_unique: ", p), err) } - if err := oprot.WriteBinary(ctx, p.AadFileUnique); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.aad_file_unique (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:aad_file_unique: ", p), err) } - } - return err + if p.IsSetAadFileUnique() { + if err := oprot.WriteFieldBegin(ctx, "aad_file_unique", thrift.STRING, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:aad_file_unique: ", p), err) + } + if err := oprot.WriteBinary(ctx, p.AadFileUnique); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.aad_file_unique (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:aad_file_unique: ", p), err) + } + } + return err } func (p *AesGcmCtrV1) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetSupplyAadPrefix() { - if err := oprot.WriteFieldBegin(ctx, "supply_aad_prefix", thrift.BOOL, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:supply_aad_prefix: ", p), err) } - if err := oprot.WriteBool(ctx, bool(*p.SupplyAadPrefix)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.supply_aad_prefix (3) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:supply_aad_prefix: ", p), err) } - } - return err + if p.IsSetSupplyAadPrefix() { + if err := oprot.WriteFieldBegin(ctx, "supply_aad_prefix", thrift.BOOL, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:supply_aad_prefix: ", p), err) + } + if err := oprot.WriteBool(ctx, bool(*p.SupplyAadPrefix)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.supply_aad_prefix (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:supply_aad_prefix: ", p), err) + } + } + return err } func (p *AesGcmCtrV1) Equals(other *AesGcmCtrV1) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if bytes.Compare(p.AadPrefix, other.AadPrefix) != 0 { return false } - if bytes.Compare(p.AadFileUnique, other.AadFileUnique) != 0 { return false } - if p.SupplyAadPrefix != other.SupplyAadPrefix { - if p.SupplyAadPrefix == nil || other.SupplyAadPrefix == nil { - return false - } - if (*p.SupplyAadPrefix) != (*other.SupplyAadPrefix) { return false } - } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if bytes.Compare(p.AadPrefix, other.AadPrefix) != 0 { + return false + } + if bytes.Compare(p.AadFileUnique, other.AadFileUnique) != 0 { + return false + } + if p.SupplyAadPrefix != other.SupplyAadPrefix { + if p.SupplyAadPrefix == nil || other.SupplyAadPrefix == nil { + return false + } + if (*p.SupplyAadPrefix) != (*other.SupplyAadPrefix) { + return false + } + } + return true } func (p *AesGcmCtrV1) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("AesGcmCtrV1(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("AesGcmCtrV1(%+v)", *p) } func (p *AesGcmCtrV1) Validate() error { - return nil + return nil } + // Attributes: -// - AES_GCM_V1 -// - AES_GCM_CTR_V1 +// - AES_GCM_V1 +// - AES_GCM_CTR_V1 type EncryptionAlgorithm struct { - AES_GCM_V1 *AesGcmV1 `thrift:"AES_GCM_V1,1" db:"AES_GCM_V1" json:"AES_GCM_V1,omitempty"` - AES_GCM_CTR_V1 *AesGcmCtrV1 `thrift:"AES_GCM_CTR_V1,2" db:"AES_GCM_CTR_V1" json:"AES_GCM_CTR_V1,omitempty"` + AES_GCM_V1 *AesGcmV1 `thrift:"AES_GCM_V1,1" db:"AES_GCM_V1" json:"AES_GCM_V1,omitempty"` + AES_GCM_CTR_V1 *AesGcmCtrV1 `thrift:"AES_GCM_CTR_V1,2" db:"AES_GCM_CTR_V1" json:"AES_GCM_CTR_V1,omitempty"` } func NewEncryptionAlgorithm() *EncryptionAlgorithm { - return &EncryptionAlgorithm{} + return &EncryptionAlgorithm{} } var EncryptionAlgorithm_AES_GCM_V1_DEFAULT *AesGcmV1 + func (p *EncryptionAlgorithm) GetAES_GCM_V1() *AesGcmV1 { - if !p.IsSetAES_GCM_V1() { - return EncryptionAlgorithm_AES_GCM_V1_DEFAULT - } -return p.AES_GCM_V1 + if !p.IsSetAES_GCM_V1() { + return EncryptionAlgorithm_AES_GCM_V1_DEFAULT + } + return p.AES_GCM_V1 } + var EncryptionAlgorithm_AES_GCM_CTR_V1_DEFAULT *AesGcmCtrV1 + func (p *EncryptionAlgorithm) GetAES_GCM_CTR_V1() *AesGcmCtrV1 { - if !p.IsSetAES_GCM_CTR_V1() { - return EncryptionAlgorithm_AES_GCM_CTR_V1_DEFAULT - } -return p.AES_GCM_CTR_V1 + if !p.IsSetAES_GCM_CTR_V1() { + return EncryptionAlgorithm_AES_GCM_CTR_V1_DEFAULT + } + return p.AES_GCM_CTR_V1 } func (p *EncryptionAlgorithm) CountSetFieldsEncryptionAlgorithm() int { - count := 0 - if (p.IsSetAES_GCM_V1()) { - count++ - } - if (p.IsSetAES_GCM_CTR_V1()) { - count++ - } - return count + count := 0 + if p.IsSetAES_GCM_V1() { + count++ + } + if p.IsSetAES_GCM_CTR_V1() { + count++ + } + return count } func (p *EncryptionAlgorithm) IsSetAES_GCM_V1() bool { - return p.AES_GCM_V1 != nil + return p.AES_GCM_V1 != nil } func (p *EncryptionAlgorithm) IsSetAES_GCM_CTR_V1() bool { - return p.AES_GCM_CTR_V1 != nil + return p.AES_GCM_CTR_V1 != nil } func (p *EncryptionAlgorithm) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - return nil -} - -func (p *EncryptionAlgorithm) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - p.AES_GCM_V1 = &AesGcmV1{} - if err := p.AES_GCM_V1.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.AES_GCM_V1), err) - } - return nil -} - -func (p *EncryptionAlgorithm) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - p.AES_GCM_CTR_V1 = &AesGcmCtrV1{} - if err := p.AES_GCM_CTR_V1.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.AES_GCM_CTR_V1), err) - } - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *EncryptionAlgorithm) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + p.AES_GCM_V1 = &AesGcmV1{} + if err := p.AES_GCM_V1.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.AES_GCM_V1), err) + } + return nil +} + +func (p *EncryptionAlgorithm) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + p.AES_GCM_CTR_V1 = &AesGcmCtrV1{} + if err := p.AES_GCM_CTR_V1.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.AES_GCM_CTR_V1), err) + } + return nil } func (p *EncryptionAlgorithm) Write(ctx context.Context, oprot thrift.TProtocol) error { - if c := p.CountSetFieldsEncryptionAlgorithm(); c != 1 { - return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) - } - if err := oprot.WriteStructBegin(ctx, "EncryptionAlgorithm"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if c := p.CountSetFieldsEncryptionAlgorithm(); c != 1 { + return fmt.Errorf("%T write union: exactly one field must be set (%d set)", p, c) + } + if err := oprot.WriteStructBegin(ctx, "EncryptionAlgorithm"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *EncryptionAlgorithm) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetAES_GCM_V1() { - if err := oprot.WriteFieldBegin(ctx, "AES_GCM_V1", thrift.STRUCT, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:AES_GCM_V1: ", p), err) } - if err := p.AES_GCM_V1.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.AES_GCM_V1), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:AES_GCM_V1: ", p), err) } - } - return err + if p.IsSetAES_GCM_V1() { + if err := oprot.WriteFieldBegin(ctx, "AES_GCM_V1", thrift.STRUCT, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:AES_GCM_V1: ", p), err) + } + if err := p.AES_GCM_V1.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.AES_GCM_V1), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:AES_GCM_V1: ", p), err) + } + } + return err } func (p *EncryptionAlgorithm) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetAES_GCM_CTR_V1() { - if err := oprot.WriteFieldBegin(ctx, "AES_GCM_CTR_V1", thrift.STRUCT, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:AES_GCM_CTR_V1: ", p), err) } - if err := p.AES_GCM_CTR_V1.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.AES_GCM_CTR_V1), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:AES_GCM_CTR_V1: ", p), err) } - } - return err + if p.IsSetAES_GCM_CTR_V1() { + if err := oprot.WriteFieldBegin(ctx, "AES_GCM_CTR_V1", thrift.STRUCT, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:AES_GCM_CTR_V1: ", p), err) + } + if err := p.AES_GCM_CTR_V1.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.AES_GCM_CTR_V1), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:AES_GCM_CTR_V1: ", p), err) + } + } + return err } func (p *EncryptionAlgorithm) Equals(other *EncryptionAlgorithm) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if !p.AES_GCM_V1.Equals(other.AES_GCM_V1) { return false } - if !p.AES_GCM_CTR_V1.Equals(other.AES_GCM_CTR_V1) { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if !p.AES_GCM_V1.Equals(other.AES_GCM_V1) { + return false + } + if !p.AES_GCM_CTR_V1.Equals(other.AES_GCM_CTR_V1) { + return false + } + return true } func (p *EncryptionAlgorithm) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("EncryptionAlgorithm(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("EncryptionAlgorithm(%+v)", *p) } func (p *EncryptionAlgorithm) Validate() error { - return nil + return nil } + // Description for file metadata -// +// // Attributes: -// - Version: Version of this file * -// - Schema: Parquet schema for this file. This schema contains metadata for all the columns. +// - Version: Version of this file * +// - Schema: Parquet schema for this file. This schema contains metadata for all the columns. +// // The schema is represented as a tree with a single root. The nodes of the tree // are flattened to a list by doing a depth-first traversal. // The column metadata contains the path in the schema for that column which can be // used to map columns to nodes in the schema. // The first element is the root * -// - NumRows: Number of rows in this file * -// - RowGroups: Row groups in this file * -// - KeyValueMetadata: Optional key/value metadata * -// - CreatedBy: String for application that wrote this file. This should be in the format +// - NumRows: Number of rows in this file * +// - RowGroups: Row groups in this file * +// - KeyValueMetadata: Optional key/value metadata * +// - CreatedBy: String for application that wrote this file. This should be in the format +// // version (build ). // e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55) -// -// - ColumnOrders: Sort order used for the min_value and max_value fields in the Statistics +// +// - ColumnOrders: Sort order used for the min_value and max_value fields in the Statistics +// // objects and the min_values and max_values fields in the ColumnIndex // objects of each column in this file. Sort orders are listed in the order // matching the columns in the schema. The indexes are not necessary the same // though, because only leaf nodes of the schema are represented in the list // of sort orders. -// +// // Without column_orders, the meaning of the min_value and max_value fields // in the Statistics object and the ColumnIndex object is undefined. To ensure // well-defined behaviour, if these fields are written to a Parquet file, // column_orders must be written as well. -// +// // The obsolete min and max fields in the Statistics object are always sorted // by signed comparison regardless of column_orders. -// - EncryptionAlgorithm: Encryption algorithm. This field is set only in encrypted files +// - EncryptionAlgorithm: Encryption algorithm. This field is set only in encrypted files +// // with plaintext footer. Files with encrypted footer store algorithm id // in FileCryptoMetaData structure. -// - FooterSigningKeyMetadata: Retrieval metadata of key used for signing the footer. +// - FooterSigningKeyMetadata: Retrieval metadata of key used for signing the footer. +// // Used only in encrypted files with plaintext footer. type FileMetaData struct { - Version int32 `thrift:"version,1,required" db:"version" json:"version"` - Schema []*SchemaElement `thrift:"schema,2,required" db:"schema" json:"schema"` - NumRows int64 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"` - RowGroups []*RowGroup `thrift:"row_groups,4,required" db:"row_groups" json:"row_groups"` - KeyValueMetadata []*KeyValue `thrift:"key_value_metadata,5" db:"key_value_metadata" json:"key_value_metadata,omitempty"` - CreatedBy *string `thrift:"created_by,6" db:"created_by" json:"created_by,omitempty"` - ColumnOrders []*ColumnOrder `thrift:"column_orders,7" db:"column_orders" json:"column_orders,omitempty"` - EncryptionAlgorithm *EncryptionAlgorithm `thrift:"encryption_algorithm,8" db:"encryption_algorithm" json:"encryption_algorithm,omitempty"` - FooterSigningKeyMetadata []byte `thrift:"footer_signing_key_metadata,9" db:"footer_signing_key_metadata" json:"footer_signing_key_metadata,omitempty"` + Version int32 `thrift:"version,1,required" db:"version" json:"version"` + Schema []*SchemaElement `thrift:"schema,2,required" db:"schema" json:"schema"` + NumRows int64 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"` + RowGroups []*RowGroup `thrift:"row_groups,4,required" db:"row_groups" json:"row_groups"` + KeyValueMetadata []*KeyValue `thrift:"key_value_metadata,5" db:"key_value_metadata" json:"key_value_metadata,omitempty"` + CreatedBy *string `thrift:"created_by,6" db:"created_by" json:"created_by,omitempty"` + ColumnOrders []*ColumnOrder `thrift:"column_orders,7" db:"column_orders" json:"column_orders,omitempty"` + EncryptionAlgorithm *EncryptionAlgorithm `thrift:"encryption_algorithm,8" db:"encryption_algorithm" json:"encryption_algorithm,omitempty"` + FooterSigningKeyMetadata []byte `thrift:"footer_signing_key_metadata,9" db:"footer_signing_key_metadata" json:"footer_signing_key_metadata,omitempty"` } func NewFileMetaData() *FileMetaData { - return &FileMetaData{} + return &FileMetaData{} } - func (p *FileMetaData) GetVersion() int32 { - return p.Version + return p.Version } func (p *FileMetaData) GetSchema() []*SchemaElement { - return p.Schema + return p.Schema } func (p *FileMetaData) GetNumRows() int64 { - return p.NumRows + return p.NumRows } func (p *FileMetaData) GetRowGroups() []*RowGroup { - return p.RowGroups + return p.RowGroups } + var FileMetaData_KeyValueMetadata_DEFAULT []*KeyValue func (p *FileMetaData) GetKeyValueMetadata() []*KeyValue { - return p.KeyValueMetadata + return p.KeyValueMetadata } + var FileMetaData_CreatedBy_DEFAULT string + func (p *FileMetaData) GetCreatedBy() string { - if !p.IsSetCreatedBy() { - return FileMetaData_CreatedBy_DEFAULT - } -return *p.CreatedBy + if !p.IsSetCreatedBy() { + return FileMetaData_CreatedBy_DEFAULT + } + return *p.CreatedBy } + var FileMetaData_ColumnOrders_DEFAULT []*ColumnOrder func (p *FileMetaData) GetColumnOrders() []*ColumnOrder { - return p.ColumnOrders + return p.ColumnOrders } + var FileMetaData_EncryptionAlgorithm_DEFAULT *EncryptionAlgorithm + func (p *FileMetaData) GetEncryptionAlgorithm() *EncryptionAlgorithm { - if !p.IsSetEncryptionAlgorithm() { - return FileMetaData_EncryptionAlgorithm_DEFAULT - } -return p.EncryptionAlgorithm + if !p.IsSetEncryptionAlgorithm() { + return FileMetaData_EncryptionAlgorithm_DEFAULT + } + return p.EncryptionAlgorithm } + var FileMetaData_FooterSigningKeyMetadata_DEFAULT []byte func (p *FileMetaData) GetFooterSigningKeyMetadata() []byte { - return p.FooterSigningKeyMetadata + return p.FooterSigningKeyMetadata } func (p *FileMetaData) IsSetKeyValueMetadata() bool { - return p.KeyValueMetadata != nil + return p.KeyValueMetadata != nil } func (p *FileMetaData) IsSetCreatedBy() bool { - return p.CreatedBy != nil + return p.CreatedBy != nil } func (p *FileMetaData) IsSetColumnOrders() bool { - return p.ColumnOrders != nil + return p.ColumnOrders != nil } func (p *FileMetaData) IsSetEncryptionAlgorithm() bool { - return p.EncryptionAlgorithm != nil + return p.EncryptionAlgorithm != nil } func (p *FileMetaData) IsSetFooterSigningKeyMetadata() bool { - return p.FooterSigningKeyMetadata != nil + return p.FooterSigningKeyMetadata != nil } func (p *FileMetaData) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetVersion bool = false; - var issetSchema bool = false; - var issetNumRows bool = false; - var issetRowGroups bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.I32 { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetVersion = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.LIST { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - issetSchema = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 3: - if fieldTypeId == thrift.I64 { - if err := p.ReadField3(ctx, iprot); err != nil { - return err - } - issetNumRows = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 4: - if fieldTypeId == thrift.LIST { - if err := p.ReadField4(ctx, iprot); err != nil { - return err - } - issetRowGroups = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 5: - if fieldTypeId == thrift.LIST { - if err := p.ReadField5(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 6: - if fieldTypeId == thrift.STRING { - if err := p.ReadField6(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 7: - if fieldTypeId == thrift.LIST { - if err := p.ReadField7(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 8: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField8(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 9: - if fieldTypeId == thrift.STRING { - if err := p.ReadField9(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetVersion{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Version is not set")); - } - if !issetSchema{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Schema is not set")); - } - if !issetNumRows{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumRows is not set")); - } - if !issetRowGroups{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field RowGroups is not set")); - } - return nil -} - -func (p *FileMetaData) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI32(ctx); err != nil { - return thrift.PrependError("error reading field 1: ", err) -} else { - p.Version = v -} - return nil -} - -func (p *FileMetaData) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*SchemaElement, 0, size) - p.Schema = tSlice - for i := 0; i < size; i ++ { - _elem24 := &SchemaElement{} - if err := _elem24.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem24), err) - } - p.Schema = append(p.Schema, _elem24) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *FileMetaData) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadI64(ctx); err != nil { - return thrift.PrependError("error reading field 3: ", err) -} else { - p.NumRows = v -} - return nil -} - -func (p *FileMetaData) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*RowGroup, 0, size) - p.RowGroups = tSlice - for i := 0; i < size; i ++ { - _elem25 := &RowGroup{} - if err := _elem25.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem25), err) - } - p.RowGroups = append(p.RowGroups, _elem25) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *FileMetaData) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*KeyValue, 0, size) - p.KeyValueMetadata = tSlice - for i := 0; i < size; i ++ { - _elem26 := &KeyValue{} - if err := _elem26.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem26), err) - } - p.KeyValueMetadata = append(p.KeyValueMetadata, _elem26) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *FileMetaData) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadString(ctx); err != nil { - return thrift.PrependError("error reading field 6: ", err) -} else { - p.CreatedBy = &v -} - return nil -} - -func (p *FileMetaData) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { - _, size, err := iprot.ReadListBegin(ctx) - if err != nil { - return thrift.PrependError("error reading list begin: ", err) - } - tSlice := make([]*ColumnOrder, 0, size) - p.ColumnOrders = tSlice - for i := 0; i < size; i ++ { - _elem27 := &ColumnOrder{} - if err := _elem27.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem27), err) - } - p.ColumnOrders = append(p.ColumnOrders, _elem27) - } - if err := iprot.ReadListEnd(ctx); err != nil { - return thrift.PrependError("error reading list end: ", err) - } - return nil -} - -func (p *FileMetaData) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { - p.EncryptionAlgorithm = &EncryptionAlgorithm{} - if err := p.EncryptionAlgorithm.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.EncryptionAlgorithm), err) - } - return nil -} - -func (p *FileMetaData) ReadField9(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 9: ", err) -} else { - p.FooterSigningKeyMetadata = v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetVersion bool = false + var issetSchema bool = false + var issetNumRows bool = false + var issetRowGroups bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.I32 { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetVersion = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.LIST { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + issetSchema = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 3: + if fieldTypeId == thrift.I64 { + if err := p.ReadField3(ctx, iprot); err != nil { + return err + } + issetNumRows = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 4: + if fieldTypeId == thrift.LIST { + if err := p.ReadField4(ctx, iprot); err != nil { + return err + } + issetRowGroups = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 5: + if fieldTypeId == thrift.LIST { + if err := p.ReadField5(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 6: + if fieldTypeId == thrift.STRING { + if err := p.ReadField6(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 7: + if fieldTypeId == thrift.LIST { + if err := p.ReadField7(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 8: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField8(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 9: + if fieldTypeId == thrift.STRING { + if err := p.ReadField9(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetVersion { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Version is not set")) + } + if !issetSchema { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Schema is not set")) + } + if !issetNumRows { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumRows is not set")) + } + if !issetRowGroups { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field RowGroups is not set")) + } + return nil +} + +func (p *FileMetaData) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(ctx); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.Version = v + } + return nil +} + +func (p *FileMetaData) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*SchemaElement, 0, size) + p.Schema = tSlice + for i := 0; i < size; i++ { + _elem24 := &SchemaElement{} + if err := _elem24.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem24), err) + } + p.Schema = append(p.Schema, _elem24) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *FileMetaData) ReadField3(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(ctx); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.NumRows = v + } + return nil +} + +func (p *FileMetaData) ReadField4(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*RowGroup, 0, size) + p.RowGroups = tSlice + for i := 0; i < size; i++ { + _elem25 := &RowGroup{} + if err := _elem25.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem25), err) + } + p.RowGroups = append(p.RowGroups, _elem25) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *FileMetaData) ReadField5(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*KeyValue, 0, size) + p.KeyValueMetadata = tSlice + for i := 0; i < size; i++ { + _elem26 := &KeyValue{} + if err := _elem26.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem26), err) + } + p.KeyValueMetadata = append(p.KeyValueMetadata, _elem26) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *FileMetaData) ReadField6(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadString(ctx); err != nil { + return thrift.PrependError("error reading field 6: ", err) + } else { + p.CreatedBy = &v + } + return nil +} + +func (p *FileMetaData) ReadField7(ctx context.Context, iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin(ctx) + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*ColumnOrder, 0, size) + p.ColumnOrders = tSlice + for i := 0; i < size; i++ { + _elem27 := &ColumnOrder{} + if err := _elem27.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem27), err) + } + p.ColumnOrders = append(p.ColumnOrders, _elem27) + } + if err := iprot.ReadListEnd(ctx); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *FileMetaData) ReadField8(ctx context.Context, iprot thrift.TProtocol) error { + p.EncryptionAlgorithm = &EncryptionAlgorithm{} + if err := p.EncryptionAlgorithm.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.EncryptionAlgorithm), err) + } + return nil +} + +func (p *FileMetaData) ReadField9(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 9: ", err) + } else { + p.FooterSigningKeyMetadata = v + } + return nil } func (p *FileMetaData) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "FileMetaData"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - if err := p.writeField3(ctx, oprot); err != nil { return err } - if err := p.writeField4(ctx, oprot); err != nil { return err } - if err := p.writeField5(ctx, oprot); err != nil { return err } - if err := p.writeField6(ctx, oprot); err != nil { return err } - if err := p.writeField7(ctx, oprot); err != nil { return err } - if err := p.writeField8(ctx, oprot); err != nil { return err } - if err := p.writeField9(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "FileMetaData"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + if err := p.writeField3(ctx, oprot); err != nil { + return err + } + if err := p.writeField4(ctx, oprot); err != nil { + return err + } + if err := p.writeField5(ctx, oprot); err != nil { + return err + } + if err := p.writeField6(ctx, oprot); err != nil { + return err + } + if err := p.writeField7(ctx, oprot); err != nil { + return err + } + if err := p.writeField8(ctx, oprot); err != nil { + return err + } + if err := p.writeField9(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *FileMetaData) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "version", thrift.I32, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:version: ", p), err) } - if err := oprot.WriteI32(ctx, int32(p.Version)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.version (1) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:version: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "version", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:version: ", p), err) + } + if err := oprot.WriteI32(ctx, int32(p.Version)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.version (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:version: ", p), err) + } + return err } func (p *FileMetaData) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "schema", thrift.LIST, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:schema: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.Schema)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.Schema { - if err := v.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:schema: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "schema", thrift.LIST, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:schema: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.Schema)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.Schema { + if err := v.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:schema: ", p), err) + } + return err } func (p *FileMetaData) writeField3(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "num_rows", thrift.I64, 3); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:num_rows: ", p), err) } - if err := oprot.WriteI64(ctx, int64(p.NumRows)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.num_rows (3) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 3:num_rows: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "num_rows", thrift.I64, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:num_rows: ", p), err) + } + if err := oprot.WriteI64(ctx, int64(p.NumRows)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_rows (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:num_rows: ", p), err) + } + return err } func (p *FileMetaData) writeField4(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "row_groups", thrift.LIST, 4); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:row_groups: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.RowGroups)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.RowGroups { - if err := v.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 4:row_groups: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "row_groups", thrift.LIST, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:row_groups: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.RowGroups)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.RowGroups { + if err := v.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:row_groups: ", p), err) + } + return err } func (p *FileMetaData) writeField5(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetKeyValueMetadata() { - if err := oprot.WriteFieldBegin(ctx, "key_value_metadata", thrift.LIST, 5); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:key_value_metadata: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.KeyValueMetadata)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.KeyValueMetadata { - if err := v.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 5:key_value_metadata: ", p), err) } - } - return err + if p.IsSetKeyValueMetadata() { + if err := oprot.WriteFieldBegin(ctx, "key_value_metadata", thrift.LIST, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:key_value_metadata: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.KeyValueMetadata)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.KeyValueMetadata { + if err := v.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:key_value_metadata: ", p), err) + } + } + return err } func (p *FileMetaData) writeField6(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetCreatedBy() { - if err := oprot.WriteFieldBegin(ctx, "created_by", thrift.STRING, 6); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:created_by: ", p), err) } - if err := oprot.WriteString(ctx, string(*p.CreatedBy)); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.created_by (6) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 6:created_by: ", p), err) } - } - return err + if p.IsSetCreatedBy() { + if err := oprot.WriteFieldBegin(ctx, "created_by", thrift.STRING, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:created_by: ", p), err) + } + if err := oprot.WriteString(ctx, string(*p.CreatedBy)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.created_by (6) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:created_by: ", p), err) + } + } + return err } func (p *FileMetaData) writeField7(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetColumnOrders() { - if err := oprot.WriteFieldBegin(ctx, "column_orders", thrift.LIST, 7); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:column_orders: ", p), err) } - if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.ColumnOrders)); err != nil { - return thrift.PrependError("error writing list begin: ", err) - } - for _, v := range p.ColumnOrders { - if err := v.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) - } - } - if err := oprot.WriteListEnd(ctx); err != nil { - return thrift.PrependError("error writing list end: ", err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 7:column_orders: ", p), err) } - } - return err + if p.IsSetColumnOrders() { + if err := oprot.WriteFieldBegin(ctx, "column_orders", thrift.LIST, 7); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:column_orders: ", p), err) + } + if err := oprot.WriteListBegin(ctx, thrift.STRUCT, len(p.ColumnOrders)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.ColumnOrders { + if err := v.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(ctx); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 7:column_orders: ", p), err) + } + } + return err } func (p *FileMetaData) writeField8(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetEncryptionAlgorithm() { - if err := oprot.WriteFieldBegin(ctx, "encryption_algorithm", thrift.STRUCT, 8); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:encryption_algorithm: ", p), err) } - if err := p.EncryptionAlgorithm.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.EncryptionAlgorithm), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 8:encryption_algorithm: ", p), err) } - } - return err + if p.IsSetEncryptionAlgorithm() { + if err := oprot.WriteFieldBegin(ctx, "encryption_algorithm", thrift.STRUCT, 8); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:encryption_algorithm: ", p), err) + } + if err := p.EncryptionAlgorithm.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.EncryptionAlgorithm), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 8:encryption_algorithm: ", p), err) + } + } + return err } func (p *FileMetaData) writeField9(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetFooterSigningKeyMetadata() { - if err := oprot.WriteFieldBegin(ctx, "footer_signing_key_metadata", thrift.STRING, 9); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:footer_signing_key_metadata: ", p), err) } - if err := oprot.WriteBinary(ctx, p.FooterSigningKeyMetadata); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.footer_signing_key_metadata (9) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 9:footer_signing_key_metadata: ", p), err) } - } - return err + if p.IsSetFooterSigningKeyMetadata() { + if err := oprot.WriteFieldBegin(ctx, "footer_signing_key_metadata", thrift.STRING, 9); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:footer_signing_key_metadata: ", p), err) + } + if err := oprot.WriteBinary(ctx, p.FooterSigningKeyMetadata); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.footer_signing_key_metadata (9) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 9:footer_signing_key_metadata: ", p), err) + } + } + return err } func (p *FileMetaData) Equals(other *FileMetaData) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if p.Version != other.Version { return false } - if len(p.Schema) != len(other.Schema) { return false } - for i, _tgt := range p.Schema { - _src28 := other.Schema[i] - if !_tgt.Equals(_src28) { return false } - } - if p.NumRows != other.NumRows { return false } - if len(p.RowGroups) != len(other.RowGroups) { return false } - for i, _tgt := range p.RowGroups { - _src29 := other.RowGroups[i] - if !_tgt.Equals(_src29) { return false } - } - if len(p.KeyValueMetadata) != len(other.KeyValueMetadata) { return false } - for i, _tgt := range p.KeyValueMetadata { - _src30 := other.KeyValueMetadata[i] - if !_tgt.Equals(_src30) { return false } - } - if p.CreatedBy != other.CreatedBy { - if p.CreatedBy == nil || other.CreatedBy == nil { - return false - } - if (*p.CreatedBy) != (*other.CreatedBy) { return false } - } - if len(p.ColumnOrders) != len(other.ColumnOrders) { return false } - for i, _tgt := range p.ColumnOrders { - _src31 := other.ColumnOrders[i] - if !_tgt.Equals(_src31) { return false } - } - if !p.EncryptionAlgorithm.Equals(other.EncryptionAlgorithm) { return false } - if bytes.Compare(p.FooterSigningKeyMetadata, other.FooterSigningKeyMetadata) != 0 { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if p.Version != other.Version { + return false + } + if len(p.Schema) != len(other.Schema) { + return false + } + for i, _tgt := range p.Schema { + _src28 := other.Schema[i] + if !_tgt.Equals(_src28) { + return false + } + } + if p.NumRows != other.NumRows { + return false + } + if len(p.RowGroups) != len(other.RowGroups) { + return false + } + for i, _tgt := range p.RowGroups { + _src29 := other.RowGroups[i] + if !_tgt.Equals(_src29) { + return false + } + } + if len(p.KeyValueMetadata) != len(other.KeyValueMetadata) { + return false + } + for i, _tgt := range p.KeyValueMetadata { + _src30 := other.KeyValueMetadata[i] + if !_tgt.Equals(_src30) { + return false + } + } + if p.CreatedBy != other.CreatedBy { + if p.CreatedBy == nil || other.CreatedBy == nil { + return false + } + if (*p.CreatedBy) != (*other.CreatedBy) { + return false + } + } + if len(p.ColumnOrders) != len(other.ColumnOrders) { + return false + } + for i, _tgt := range p.ColumnOrders { + _src31 := other.ColumnOrders[i] + if !_tgt.Equals(_src31) { + return false + } + } + if !p.EncryptionAlgorithm.Equals(other.EncryptionAlgorithm) { + return false + } + if bytes.Compare(p.FooterSigningKeyMetadata, other.FooterSigningKeyMetadata) != 0 { + return false + } + return true } func (p *FileMetaData) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("FileMetaData(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("FileMetaData(%+v)", *p) } func (p *FileMetaData) Validate() error { - return nil + return nil } + // Crypto metadata for files with encrypted footer * -// +// // Attributes: -// - EncryptionAlgorithm: Encryption algorithm. This field is only used for files +// - EncryptionAlgorithm: Encryption algorithm. This field is only used for files +// // with encrypted footer. Files with plaintext footer store algorithm id // inside footer (FileMetaData structure). -// - KeyMetadata: Retrieval metadata of key used for encryption of footer, +// - KeyMetadata: Retrieval metadata of key used for encryption of footer, +// // and (possibly) columns * type FileCryptoMetaData struct { - EncryptionAlgorithm *EncryptionAlgorithm `thrift:"encryption_algorithm,1,required" db:"encryption_algorithm" json:"encryption_algorithm"` - KeyMetadata []byte `thrift:"key_metadata,2" db:"key_metadata" json:"key_metadata,omitempty"` + EncryptionAlgorithm *EncryptionAlgorithm `thrift:"encryption_algorithm,1,required" db:"encryption_algorithm" json:"encryption_algorithm"` + KeyMetadata []byte `thrift:"key_metadata,2" db:"key_metadata" json:"key_metadata,omitempty"` } func NewFileCryptoMetaData() *FileCryptoMetaData { - return &FileCryptoMetaData{} + return &FileCryptoMetaData{} } var FileCryptoMetaData_EncryptionAlgorithm_DEFAULT *EncryptionAlgorithm + func (p *FileCryptoMetaData) GetEncryptionAlgorithm() *EncryptionAlgorithm { - if !p.IsSetEncryptionAlgorithm() { - return FileCryptoMetaData_EncryptionAlgorithm_DEFAULT - } -return p.EncryptionAlgorithm + if !p.IsSetEncryptionAlgorithm() { + return FileCryptoMetaData_EncryptionAlgorithm_DEFAULT + } + return p.EncryptionAlgorithm } + var FileCryptoMetaData_KeyMetadata_DEFAULT []byte func (p *FileCryptoMetaData) GetKeyMetadata() []byte { - return p.KeyMetadata + return p.KeyMetadata } func (p *FileCryptoMetaData) IsSetEncryptionAlgorithm() bool { - return p.EncryptionAlgorithm != nil + return p.EncryptionAlgorithm != nil } func (p *FileCryptoMetaData) IsSetKeyMetadata() bool { - return p.KeyMetadata != nil + return p.KeyMetadata != nil } func (p *FileCryptoMetaData) Read(ctx context.Context, iprot thrift.TProtocol) error { - if _, err := iprot.ReadStructBegin(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) - } - - var issetEncryptionAlgorithm bool = false; - - for { - _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) - if err != nil { - return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) - } - if fieldTypeId == thrift.STOP { break; } - switch fieldId { - case 1: - if fieldTypeId == thrift.STRUCT { - if err := p.ReadField1(ctx, iprot); err != nil { - return err - } - issetEncryptionAlgorithm = true - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - case 2: - if fieldTypeId == thrift.STRING { - if err := p.ReadField2(ctx, iprot); err != nil { - return err - } - } else { - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - default: - if err := iprot.Skip(ctx, fieldTypeId); err != nil { - return err - } - } - if err := iprot.ReadFieldEnd(ctx); err != nil { - return err - } - } - if err := iprot.ReadStructEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) - } - if !issetEncryptionAlgorithm{ - return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field EncryptionAlgorithm is not set")); - } - return nil -} - -func (p *FileCryptoMetaData) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { - p.EncryptionAlgorithm = &EncryptionAlgorithm{} - if err := p.EncryptionAlgorithm.Read(ctx, iprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.EncryptionAlgorithm), err) - } - return nil -} - -func (p *FileCryptoMetaData) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { - if v, err := iprot.ReadBinary(ctx); err != nil { - return thrift.PrependError("error reading field 2: ", err) -} else { - p.KeyMetadata = v -} - return nil + if _, err := iprot.ReadStructBegin(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetEncryptionAlgorithm bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin(ctx) + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if fieldTypeId == thrift.STRUCT { + if err := p.ReadField1(ctx, iprot); err != nil { + return err + } + issetEncryptionAlgorithm = true + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + case 2: + if fieldTypeId == thrift.STRING { + if err := p.ReadField2(ctx, iprot); err != nil { + return err + } + } else { + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + default: + if err := iprot.Skip(ctx, fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(ctx); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetEncryptionAlgorithm { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field EncryptionAlgorithm is not set")) + } + return nil +} + +func (p *FileCryptoMetaData) ReadField1(ctx context.Context, iprot thrift.TProtocol) error { + p.EncryptionAlgorithm = &EncryptionAlgorithm{} + if err := p.EncryptionAlgorithm.Read(ctx, iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.EncryptionAlgorithm), err) + } + return nil +} + +func (p *FileCryptoMetaData) ReadField2(ctx context.Context, iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(ctx); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.KeyMetadata = v + } + return nil } func (p *FileCryptoMetaData) Write(ctx context.Context, oprot thrift.TProtocol) error { - if err := oprot.WriteStructBegin(ctx, "FileCryptoMetaData"); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) } - if p != nil { - if err := p.writeField1(ctx, oprot); err != nil { return err } - if err := p.writeField2(ctx, oprot); err != nil { return err } - } - if err := oprot.WriteFieldStop(ctx); err != nil { - return thrift.PrependError("write field stop error: ", err) } - if err := oprot.WriteStructEnd(ctx); err != nil { - return thrift.PrependError("write struct stop error: ", err) } - return nil + if err := oprot.WriteStructBegin(ctx, "FileCryptoMetaData"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(ctx, oprot); err != nil { + return err + } + if err := p.writeField2(ctx, oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(ctx); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(ctx); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil } func (p *FileCryptoMetaData) writeField1(ctx context.Context, oprot thrift.TProtocol) (err error) { - if err := oprot.WriteFieldBegin(ctx, "encryption_algorithm", thrift.STRUCT, 1); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:encryption_algorithm: ", p), err) } - if err := p.EncryptionAlgorithm.Write(ctx, oprot); err != nil { - return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.EncryptionAlgorithm), err) - } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 1:encryption_algorithm: ", p), err) } - return err + if err := oprot.WriteFieldBegin(ctx, "encryption_algorithm", thrift.STRUCT, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:encryption_algorithm: ", p), err) + } + if err := p.EncryptionAlgorithm.Write(ctx, oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.EncryptionAlgorithm), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:encryption_algorithm: ", p), err) + } + return err } func (p *FileCryptoMetaData) writeField2(ctx context.Context, oprot thrift.TProtocol) (err error) { - if p.IsSetKeyMetadata() { - if err := oprot.WriteFieldBegin(ctx, "key_metadata", thrift.STRING, 2); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:key_metadata: ", p), err) } - if err := oprot.WriteBinary(ctx, p.KeyMetadata); err != nil { - return thrift.PrependError(fmt.Sprintf("%T.key_metadata (2) field write error: ", p), err) } - if err := oprot.WriteFieldEnd(ctx); err != nil { - return thrift.PrependError(fmt.Sprintf("%T write field end error 2:key_metadata: ", p), err) } - } - return err + if p.IsSetKeyMetadata() { + if err := oprot.WriteFieldBegin(ctx, "key_metadata", thrift.STRING, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:key_metadata: ", p), err) + } + if err := oprot.WriteBinary(ctx, p.KeyMetadata); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.key_metadata (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(ctx); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:key_metadata: ", p), err) + } + } + return err } func (p *FileCryptoMetaData) Equals(other *FileCryptoMetaData) bool { - if p == other { - return true - } else if p == nil || other == nil { - return false - } - if !p.EncryptionAlgorithm.Equals(other.EncryptionAlgorithm) { return false } - if bytes.Compare(p.KeyMetadata, other.KeyMetadata) != 0 { return false } - return true + if p == other { + return true + } else if p == nil || other == nil { + return false + } + if !p.EncryptionAlgorithm.Equals(other.EncryptionAlgorithm) { + return false + } + if bytes.Compare(p.KeyMetadata, other.KeyMetadata) != 0 { + return false + } + return true } func (p *FileCryptoMetaData) String() string { - if p == nil { - return "" - } - return fmt.Sprintf("FileCryptoMetaData(%+v)", *p) + if p == nil { + return "" + } + return fmt.Sprintf("FileCryptoMetaData(%+v)", *p) } func (p *FileCryptoMetaData) Validate() error { - return nil + return nil } diff --git a/go/parquet/internal/utils/bit_packing_avx2_amd64.go b/go/parquet/internal/utils/bit_packing_avx2_amd64.go index ee01f002b5ece..0455ccc505bfe 100644 --- a/go/parquet/internal/utils/bit_packing_avx2_amd64.go +++ b/go/parquet/internal/utils/bit_packing_avx2_amd64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package utils diff --git a/go/parquet/internal/utils/bit_packing_neon_arm64.go b/go/parquet/internal/utils/bit_packing_neon_arm64.go index 8d09c891155ef..09154e3e4b7dd 100755 --- a/go/parquet/internal/utils/bit_packing_neon_arm64.go +++ b/go/parquet/internal/utils/bit_packing_neon_arm64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package utils diff --git a/go/parquet/internal/utils/unpack_bool_amd64.go b/go/parquet/internal/utils/unpack_bool_amd64.go index 1e9680db4b21a..2b2054f3b00b8 100644 --- a/go/parquet/internal/utils/unpack_bool_amd64.go +++ b/go/parquet/internal/utils/unpack_bool_amd64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package utils diff --git a/go/parquet/internal/utils/unpack_bool_arm64.go b/go/parquet/internal/utils/unpack_bool_arm64.go index 2c3b19eca458b..879ffd3c9540d 100644 --- a/go/parquet/internal/utils/unpack_bool_arm64.go +++ b/go/parquet/internal/utils/unpack_bool_arm64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package utils @@ -21,13 +22,14 @@ package utils import ( "os" "strings" + + "golang.org/x/sys/cpu" ) -import "golang.org/x/sys/cpu" var byteToBoolFunc func([]byte, []bool) func init() { - // Added ability to enable extension via environment: + // Added ability to enable extension via environment: // ARM_ENABLE_EXT=NEON go test if ext, ok := os.LookupEnv("ARM_ENABLE_EXT"); ok { exts := strings.Split(ext, ",") diff --git a/go/parquet/internal/utils/unpack_bool_avx2_amd64.go b/go/parquet/internal/utils/unpack_bool_avx2_amd64.go index e0065e5aad16d..cec772a2ccf97 100644 --- a/go/parquet/internal/utils/unpack_bool_avx2_amd64.go +++ b/go/parquet/internal/utils/unpack_bool_avx2_amd64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package utils diff --git a/go/parquet/internal/utils/unpack_bool_neon_arm64.go b/go/parquet/internal/utils/unpack_bool_neon_arm64.go index 2e9808abbf157..ed46ce29e0309 100755 --- a/go/parquet/internal/utils/unpack_bool_neon_arm64.go +++ b/go/parquet/internal/utils/unpack_bool_neon_arm64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package utils diff --git a/go/parquet/internal/utils/unpack_bool_noasm.go b/go/parquet/internal/utils/unpack_bool_noasm.go index a715366c6418d..eba20fa9c0f56 100644 --- a/go/parquet/internal/utils/unpack_bool_noasm.go +++ b/go/parquet/internal/utils/unpack_bool_noasm.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build noasm // +build noasm package utils diff --git a/go/parquet/internal/utils/unpack_bool_sse4_amd64.go b/go/parquet/internal/utils/unpack_bool_sse4_amd64.go index 85e4aa77df73b..d00c37474e61c 100644 --- a/go/parquet/internal/utils/unpack_bool_sse4_amd64.go +++ b/go/parquet/internal/utils/unpack_bool_sse4_amd64.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build !noasm // +build !noasm package utils diff --git a/go/parquet/metadata/app_version.go b/go/parquet/metadata/app_version.go index f61c4c9703f78..fa54aec347575 100644 --- a/go/parquet/metadata/app_version.go +++ b/go/parquet/metadata/app_version.go @@ -74,7 +74,8 @@ func NewAppVersionExplicit(app string, major, minor, patch int) *AppVersion { // NewAppVersion parses a "created by" string such as "parquet-go 1.0.0". // // It also supports handling pre-releases and build info such as -// parquet-cpp version 1.5.0ab-xyz5.5.0+cd (build abcd) +// +// parquet-cpp version 1.5.0ab-xyz5.5.0+cd (build abcd) func NewAppVersion(createdby string) *AppVersion { v := &AppVersion{} diff --git a/go/parquet/schema/reflection.go b/go/parquet/schema/reflection.go index 1ec9c72f3dfc1..f961c6ef26d08 100644 --- a/go/parquet/schema/reflection.go +++ b/go/parquet/schema/reflection.go @@ -551,7 +551,7 @@ func typeToNode(name string, typ reflect.Type, repType parquet.Repetition, info // NewSchemaFromStruct generates a schema from an object type via reflection of // the type and reading struct tags for "parquet". // -// Rules +// # Rules // // Everything defaults to Required repetition, unless otherwise specified. // Pointer types become Optional repetition. @@ -571,7 +571,7 @@ func typeToNode(name string, typ reflect.Type, repType parquet.Repetition, info // // maps will become appropriate Map structures in the schema of the defined key and values. // -// Available Tags +// # Available Tags // // name: by default the node will have the same name as the field, this tag let's you specify a name // diff --git a/go/parquet/tools.go b/go/parquet/tools.go index b9ce84def5ae0..64e9419e4f711 100644 --- a/go/parquet/tools.go +++ b/go/parquet/tools.go @@ -14,6 +14,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +//go:build tools // +build tools package tools diff --git a/swift/data-generator/swift-datagen/main.go b/swift/data-generator/swift-datagen/main.go index a60fb562932fe..2f2e244ab5891 100644 --- a/swift/data-generator/swift-datagen/main.go +++ b/swift/data-generator/swift-datagen/main.go @@ -22,8 +22,8 @@ import ( "github.com/apache/arrow/go/v12/arrow" "github.com/apache/arrow/go/v12/arrow/array" - "github.com/apache/arrow/go/v12/arrow/memory" "github.com/apache/arrow/go/v12/arrow/ipc" + "github.com/apache/arrow/go/v12/arrow/memory" ) func writeBytes(rec arrow.Record, file_name string) { @@ -42,7 +42,6 @@ func writeBytes(rec arrow.Record, file_name string) { rr.Close() } - func writeBoolData() { alloc := memory.NewGoAllocator() schema := arrow.NewSchema([]arrow.Field{ @@ -53,14 +52,13 @@ func writeBoolData() { b := array.NewRecordBuilder(alloc, schema) defer b.Release() - b.Field(0).(*array.BooleanBuilder).AppendValues([]bool{true, false,}, nil) + b.Field(0).(*array.BooleanBuilder).AppendValues([]bool{true, false}, nil) b.Field(0).(*array.BooleanBuilder).AppendNull() - b.Field(0).(*array.BooleanBuilder).AppendValues([]bool{false, true,}, nil) + b.Field(0).(*array.BooleanBuilder).AppendValues([]bool{false, true}, nil) b.Field(1).(*array.StringBuilder).AppendValues([]string{"zero", "one", "two", "three", "four"}, nil) rec := b.NewRecord() defer rec.Release() - writeBytes(rec, "testdata_bool.arrow") } @@ -81,11 +79,10 @@ func writeDoubleData() { rec := b.NewRecord() defer rec.Release() - writeBytes(rec, "testdata_double.arrow") } func main() { - writeBoolData(); - writeDoubleData(); + writeBoolData() + writeDoubleData() } From 813fe2596751fe9577dbe9beca2c50a351c4c2dd Mon Sep 17 00:00:00 2001 From: Nic Crane Date: Mon, 3 Jun 2024 18:18:38 +0100 Subject: [PATCH 209/261] GH-41829: [R] Update relative URLs in README to absolute paths to prevent CRAN check failures (#41830) ### Rationale for this change Relative URLs in README mean we fail CRAN checks ### What changes are included in this PR? Update relative URLs to absolute URLs ### Are these changes tested? Nope ### Are there any user-facing changes? Nope * GitHub Issue: #41829 Authored-by: Nic Crane Signed-off-by: Nic Crane --- r/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/r/README.md b/r/README.md index 710fa8e8d7cb5..c3cd5a32eaf69 100644 --- a/r/README.md +++ b/r/README.md @@ -12,7 +12,7 @@ The R `{arrow}` package provides access to many of the features of the [Apache Arrow C++ library](https://arrow.apache.org/docs/cpp/index.html) for R users. The goal of arrow is to provide an Arrow C++ backend to `{dplyr}`, and access to the Arrow C++ library through familiar base R and tidyverse functions, or `{R6}` classes. -To learn more about the Apache Arrow project, see the parent documentation of the [Arrow Project](https://arrow.apache.org/). The Arrow project provides functionality for a wide range of data analysis tasks to store, process and move data fast. See the [read/write article](articles/read_write.html) to learn about reading and writing data files, [data wrangling](articles/data_wrangling.html) to learn how to use dplyr syntax with arrow objects, and the [function documentation](reference/acero.html) for a full list of supported functions within dplyr queries. +To learn more about the Apache Arrow project, see the parent documentation of the [Arrow Project](https://arrow.apache.org/). The Arrow project provides functionality for a wide range of data analysis tasks to store, process and move data fast. See the [read/write article](https://arrow.apache.org/docs/r/articles/read_write.html) to learn about reading and writing data files, [data wrangling](https://arrow.apache.org/docs/r/articles/data_wrangling.html) to learn how to use dplyr syntax with arrow objects, and the [function documentation](https://arrow.apache.org/docs/r/reference/acero.html) for a full list of supported functions within dplyr queries. ## Installation @@ -33,11 +33,11 @@ There are some special cases to note: - On macOS, the R you use with Arrow should match the architecture of the machine you are using. If you're using an ARM (aka M1, M2, etc.) processor use R compiled for arm64. If you're using an Intel based mac, use R compiled for x86. Using R and Arrow compiled for Intel based macs on an ARM based mac will result in segfaults and crashes. -- On Linux the installation process can sometimes be more involved because CRAN does not host binaries for Linux. For more information please see the [installation guide](articles/install.html). +- On Linux the installation process can sometimes be more involved because CRAN does not host binaries for Linux. For more information please see the [installation guide](https://arrow.apache.org/docs/r/articles/install.html). - If you are compiling arrow from source, please note that as of version 10.0.0, arrow requires C++17 to build. This has implications on Windows and CentOS 7. For Windows users it means you need to be running an R version of 4.0 or later. On CentOS 7, it means you need to install a newer compiler than the default system compiler gcc. See the [installation details article](https://arrow.apache.org/docs/r/articles/developers/install_details.html) for guidance. -- Development versions of arrow are released nightly. For information on how to installl nighhtly builds please see the [installing nightly builds](articles/install_nightly.html) article. +- Development versions of arrow are released nightly. For information on how to installl nightly builds please see the [installing nightly builds](https://arrow.apache.org/docs/r/articles/install_nightly.html) article. ## What can the arrow package do? From 1598782d4ffd3f7a961f379148f17f34e16caf2b Mon Sep 17 00:00:00 2001 From: Steve Lord <72518652+stevelorddremio@users.noreply.github.com> Date: Mon, 3 Jun 2024 12:57:42 -0700 Subject: [PATCH 210/261] GH-41262: [Java][FlightSQL] Implement stateless prepared statements (#41237) ### Rationale for this change Expand the number of implemented languages for stateless prepared statements to include Java. ### What changes are included in this PR? Update FlightSqlClient and include a stateless server implementation example with tests. ### Are these changes tested? Yes, tests are added to cover a stateless server implementation. ### Are there any user-facing changes? There is a modified FlightSqlClient that is required to enable use of stateless prepared statements. * GitHub Issue: #41262 Lead-authored-by: Steve Lord Co-authored-by: Mateusz Rzeszutek Signed-off-by: David Li --- .../arrow/flight/sql/FlightSqlClient.java | 27 +- .../DoPutPreparedStatementResultPOJO.java | 38 +++ .../flight/sql/example/FlightSqlExample.java | 60 +++-- .../example/FlightSqlStatelessExample.java | 238 ++++++++++++++++++ .../arrow/flight/sql/test/TestFlightSql.java | 63 +++-- .../sql/test/TestFlightSqlStateless.java | 99 ++++++++ 6 files changed, 474 insertions(+), 51 deletions(-) create mode 100644 java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/DoPutPreparedStatementResultPOJO.java create mode 100644 java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlStatelessExample.java create mode 100644 java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSqlStateless.java diff --git a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java index 6fe31fae9216b..a94dc563cfbcc 100644 --- a/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java +++ b/java/flight/flight-sql/src/main/java/org/apache/arrow/flight/sql/FlightSqlClient.java @@ -78,6 +78,7 @@ import org.apache.arrow.flight.SetSessionOptionsResult; import org.apache.arrow.flight.SyncPutListener; import org.apache.arrow.flight.Ticket; +import org.apache.arrow.flight.sql.impl.FlightSql; import org.apache.arrow.flight.sql.impl.FlightSql.ActionCreatePreparedStatementResult; import org.apache.arrow.flight.sql.impl.FlightSql.CommandPreparedStatementQuery; import org.apache.arrow.flight.sql.util.TableRef; @@ -1048,15 +1049,35 @@ private Schema deserializeSchema(final ByteString bytes) { public FlightInfo execute(final CallOption... options) { checkOpen(); - final FlightDescriptor descriptor = FlightDescriptor + FlightDescriptor descriptor = FlightDescriptor .command(Any.pack(CommandPreparedStatementQuery.newBuilder() .setPreparedStatementHandle(preparedStatementResult.getPreparedStatementHandle()) .build()) .toByteArray()); if (parameterBindingRoot != null && parameterBindingRoot.getRowCount() > 0) { - try (final SyncPutListener listener = putParameters(descriptor, options)) { - listener.getResult(); + try (final SyncPutListener putListener = putParameters(descriptor, options)) { + if (getParameterSchema().getFields().size() > 0 && + parameterBindingRoot != null && + parameterBindingRoot.getRowCount() > 0) { + final PutResult read = putListener.read(); + if (read != null) { + try (final ArrowBuf metadata = read.getApplicationMetadata()) { + final FlightSql.DoPutPreparedStatementResult doPutPreparedStatementResult = + FlightSql.DoPutPreparedStatementResult.parseFrom(metadata.nioBuffer()); + descriptor = FlightDescriptor + .command(Any.pack(CommandPreparedStatementQuery.newBuilder() + .setPreparedStatementHandle( + doPutPreparedStatementResult.getPreparedStatementHandle()) + .build()) + .toByteArray()); + } + } + } + } catch (final InterruptedException | ExecutionException e) { + throw CallStatus.CANCELLED.withCause(e).toRuntimeException(); + } catch (final InvalidProtocolBufferException e) { + throw CallStatus.INVALID_ARGUMENT.withCause(e).toRuntimeException(); } } diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/DoPutPreparedStatementResultPOJO.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/DoPutPreparedStatementResultPOJO.java new file mode 100644 index 0000000000000..ace78862b014d --- /dev/null +++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/DoPutPreparedStatementResultPOJO.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.flight.sql.example; + +import java.io.Serializable; + +public class DoPutPreparedStatementResultPOJO implements Serializable { + private String query; + private byte[] parameters; + + public DoPutPreparedStatementResultPOJO(String query, byte[] parameters) { + this.query = query; + this.parameters = parameters.clone(); + } + + public String getQuery() { + return query; + } + + public byte[] getParameters() { + return parameters; + } +} diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java index 52c402efd6f0b..36362fd8681d3 100644 --- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java +++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlExample.java @@ -156,21 +156,22 @@ * supports all current features of Flight SQL. */ public class FlightSqlExample implements FlightSqlProducer, AutoCloseable { - private static final String DATABASE_URI = "jdbc:derby:target/derbyDB"; private static final Logger LOGGER = getLogger(FlightSqlExample.class); - private static final Calendar DEFAULT_CALENDAR = JdbcToArrowUtils.getUtcCalendar(); + protected static final Calendar DEFAULT_CALENDAR = JdbcToArrowUtils.getUtcCalendar(); + public static final String DB_NAME = "derbyDB"; + private final String databaseUri; // ARROW-15315: Use ExecutorService to simulate an async scenario private final ExecutorService executorService = Executors.newFixedThreadPool(10); private final Location location; - private final PoolingDataSource dataSource; - private final BufferAllocator rootAllocator = new RootAllocator(); + protected final PoolingDataSource dataSource; + protected final BufferAllocator rootAllocator = new RootAllocator(); private final Cache> preparedStatementLoadingCache; private final Cache> statementLoadingCache; private final SqlInfoBuilder sqlInfoBuilder; public static void main(String[] args) throws Exception { Location location = Location.forGrpcInsecure("localhost", 55555); - final FlightSqlExample example = new FlightSqlExample(location); + final FlightSqlExample example = new FlightSqlExample(location, DB_NAME); Location listenLocation = Location.forGrpcInsecure("0.0.0.0", 55555); try (final BufferAllocator allocator = new RootAllocator(); final FlightServer server = FlightServer.builder(allocator, listenLocation, example).build()) { @@ -179,13 +180,14 @@ public static void main(String[] args) throws Exception { } } - public FlightSqlExample(final Location location) { + public FlightSqlExample(final Location location, final String dbName) { // TODO Constructor should not be doing work. checkState( - removeDerbyDatabaseIfExists() && populateDerbyDatabase(), + removeDerbyDatabaseIfExists(dbName) && populateDerbyDatabase(dbName), "Failed to reset Derby database!"); + databaseUri = "jdbc:derby:target/" + dbName; final ConnectionFactory connectionFactory = - new DriverManagerConnectionFactory(DATABASE_URI, new Properties()); + new DriverManagerConnectionFactory(databaseUri, new Properties()); final PoolableConnectionFactory poolableConnectionFactory = new PoolableConnectionFactory(connectionFactory, null); final ObjectPool connectionPool = new GenericObjectPool<>(poolableConnectionFactory); @@ -248,9 +250,9 @@ public FlightSqlExample(final Location location) { } - private static boolean removeDerbyDatabaseIfExists() { + public static boolean removeDerbyDatabaseIfExists(final String dbName) { boolean wasSuccess; - final Path path = Paths.get("target" + File.separator + "derbyDB"); + final Path path = Paths.get("target" + File.separator + dbName); try (final Stream walk = Files.walk(path)) { /* @@ -262,7 +264,7 @@ private static boolean removeDerbyDatabaseIfExists() { * this not expected. */ wasSuccess = walk.sorted(Comparator.reverseOrder()).map(Path::toFile).map(File::delete) - .reduce(Boolean::logicalAnd).orElseThrow(IOException::new); + .reduce(Boolean::logicalAnd).orElseThrow(IOException::new); } catch (IOException e) { /* * The only acceptable scenario for an `IOException` to be thrown here is if @@ -277,9 +279,12 @@ private static boolean removeDerbyDatabaseIfExists() { return wasSuccess; } - private static boolean populateDerbyDatabase() { - try (final Connection connection = DriverManager.getConnection("jdbc:derby:target/derbyDB;create=true"); + private static boolean populateDerbyDatabase(final String dbName) { + try (final Connection connection = DriverManager.getConnection("jdbc:derby:target/" + dbName + ";create=true"); Statement statement = connection.createStatement()) { + + dropTable(statement, "intTable"); + dropTable(statement, "foreignTable"); statement.execute("CREATE TABLE foreignTable (" + "id INT not null primary key GENERATED ALWAYS AS IDENTITY (START WITH 1, INCREMENT BY 1), " + "foreignName varchar(100), " + @@ -302,6 +307,18 @@ private static boolean populateDerbyDatabase() { return true; } + private static void dropTable(final Statement statement, final String tableName) throws SQLException { + try { + statement.execute("DROP TABLE " + tableName); + } catch (SQLException e) { + // sql error code for "object does not exist"; which is fine, we're trying to delete the table + // see https://db.apache.org/derby/docs/10.17/ref/rrefexcept71493.html + if (!"42Y55".equals(e.getSQLState())) { + throw e; + } + } + } + private static ArrowType getArrowTypeFromJdbcType(final int jdbcDataType, final int precision, final int scale) { try { return JdbcToArrowUtils.getArrowTypeFromJdbcType(new JdbcFieldInfo(jdbcDataType, precision, scale), @@ -778,7 +795,7 @@ public void createPreparedStatement(final ActionCreatePreparedStatementRequest r // Running on another thread Future unused = executorService.submit(() -> { try { - final ByteString preparedStatementHandle = copyFrom(randomUUID().toString().getBytes(StandardCharsets.UTF_8)); + final ByteString preparedStatementHandle = copyFrom(request.getQuery().getBytes(StandardCharsets.UTF_8)); // Ownership of the connection will be passed to the context. Do NOT close! final Connection connection = dataSource.getConnection(); final PreparedStatement preparedStatement = connection.prepareStatement(request.getQuery(), @@ -882,7 +899,7 @@ public Runnable acceptPutPreparedStatementUpdate(CommandPreparedStatementUpdate while (binder.next()) { preparedStatement.addBatch(); } - int[] recordCounts = preparedStatement.executeBatch(); + final int[] recordCounts = preparedStatement.executeBatch(); recordCount = Arrays.stream(recordCounts).sum(); } @@ -928,6 +945,7 @@ public Runnable acceptPutPreparedStatementQuery(CommandPreparedStatementQuery co .toRuntimeException()); return; } + ackStream.onCompleted(); }; } @@ -1035,7 +1053,7 @@ public void getStreamTables(final CommandGetTables command, final CallContext co final String[] tableTypes = protocolSize == 0 ? null : protocolStringList.toArray(new String[protocolSize]); - try (final Connection connection = DriverManager.getConnection(DATABASE_URI); + try (final Connection connection = DriverManager.getConnection(databaseUri); final VectorSchemaRoot vectorSchemaRoot = getTablesRoot( connection.getMetaData(), rootAllocator, @@ -1086,7 +1104,7 @@ public void getStreamPrimaryKeys(final CommandGetPrimaryKeys command, final Call final String schema = command.hasDbSchema() ? command.getDbSchema() : null; final String table = command.getTable(); - try (Connection connection = DriverManager.getConnection(DATABASE_URI)) { + try (Connection connection = DriverManager.getConnection(databaseUri)) { final ResultSet primaryKeys = connection.getMetaData().getPrimaryKeys(catalog, schema, table); final VarCharVector catalogNameVector = new VarCharVector("catalog_name", rootAllocator); @@ -1140,7 +1158,7 @@ public void getStreamExportedKeys(final CommandGetExportedKeys command, final Ca String schema = command.hasDbSchema() ? command.getDbSchema() : null; String table = command.getTable(); - try (Connection connection = DriverManager.getConnection(DATABASE_URI); + try (Connection connection = DriverManager.getConnection(databaseUri); ResultSet keys = connection.getMetaData().getExportedKeys(catalog, schema, table); VectorSchemaRoot vectorSchemaRoot = createVectors(keys)) { listener.start(vectorSchemaRoot); @@ -1165,7 +1183,7 @@ public void getStreamImportedKeys(final CommandGetImportedKeys command, final Ca String schema = command.hasDbSchema() ? command.getDbSchema() : null; String table = command.getTable(); - try (Connection connection = DriverManager.getConnection(DATABASE_URI); + try (Connection connection = DriverManager.getConnection(databaseUri); ResultSet keys = connection.getMetaData().getImportedKeys(catalog, schema, table); VectorSchemaRoot vectorSchemaRoot = createVectors(keys)) { listener.start(vectorSchemaRoot); @@ -1193,7 +1211,7 @@ public void getStreamCrossReference(CommandGetCrossReference command, CallContex final String pkTable = command.getPkTable(); final String fkTable = command.getFkTable(); - try (Connection connection = DriverManager.getConnection(DATABASE_URI); + try (Connection connection = DriverManager.getConnection(databaseUri); ResultSet keys = connection.getMetaData() .getCrossReference(pkCatalog, pkSchema, pkTable, fkCatalog, fkSchema, fkTable); VectorSchemaRoot vectorSchemaRoot = createVectors(keys)) { @@ -1280,7 +1298,7 @@ public void getStreamStatement(final TicketStatementQuery ticketStatementQuery, } } - private FlightInfo getFlightInfoForSchema(final T request, final FlightDescriptor descriptor, + protected FlightInfo getFlightInfoForSchema(final T request, final FlightDescriptor descriptor, final Schema schema) { final Ticket ticket = new Ticket(pack(request).toByteArray()); // TODO Support multiple endpoints. diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlStatelessExample.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlStatelessExample.java new file mode 100644 index 0000000000000..c79c09c0967dc --- /dev/null +++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/example/FlightSqlStatelessExample.java @@ -0,0 +1,238 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.flight.sql.example; + +import static java.lang.String.format; +import static org.apache.arrow.adapter.jdbc.JdbcToArrow.sqlToArrowVectorIterator; +import static org.apache.arrow.adapter.jdbc.JdbcToArrowUtils.jdbcToArrowSchema; +import static org.apache.arrow.flight.sql.impl.FlightSql.*; +import static org.slf4j.LoggerFactory.getLogger; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.StreamCorruptedException; +import java.nio.ByteBuffer; +import java.nio.channels.Channels; +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; + +import org.apache.arrow.adapter.jdbc.ArrowVectorIterator; +import org.apache.arrow.adapter.jdbc.JdbcParameterBinder; +import org.apache.arrow.flight.CallStatus; +import org.apache.arrow.flight.FlightDescriptor; +import org.apache.arrow.flight.FlightInfo; +import org.apache.arrow.flight.FlightStream; +import org.apache.arrow.flight.Location; +import org.apache.arrow.flight.PutResult; +import org.apache.arrow.flight.sql.FlightSqlProducer; +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.vector.VectorLoader; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.VectorUnloader; +import org.apache.arrow.vector.ipc.ArrowFileReader; +import org.apache.arrow.vector.ipc.ArrowFileWriter; +import org.apache.arrow.vector.ipc.SeekableReadChannel; +import org.apache.arrow.vector.ipc.message.ArrowBlock; +import org.apache.arrow.vector.types.pojo.Schema; +import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel; +import org.slf4j.Logger; + +import com.google.protobuf.ByteString; + +/** + * Example {@link FlightSqlProducer} implementation showing an Apache Derby backed Flight SQL server that generally + * supports all current features of Flight SQL. + */ +public class FlightSqlStatelessExample extends FlightSqlExample { + private static final Logger LOGGER = getLogger(FlightSqlStatelessExample.class); + public static final String DB_NAME = "derbyStatelessDB"; + + + public FlightSqlStatelessExample(final Location location, final String dbName) { + super(location, dbName); + } + + @Override + public Runnable acceptPutPreparedStatementQuery(CommandPreparedStatementQuery command, CallContext context, + FlightStream flightStream, StreamListener ackStream) { + + return () -> { + final String query = new String(command.getPreparedStatementHandle().toStringUtf8()); + try (Connection connection = dataSource.getConnection(); + PreparedStatement preparedStatement = createPreparedStatement(connection, query)) { + while (flightStream.next()) { + final VectorSchemaRoot root = flightStream.getRoot(); + final JdbcParameterBinder binder = JdbcParameterBinder.builder(preparedStatement, root).bindAll().build(); + while (binder.next()) { + // Do not execute() - will be done in a getStream call + } + + final ByteArrayOutputStream parametersStream = new ByteArrayOutputStream(); + try (ArrowFileWriter writer = new ArrowFileWriter(root, null, Channels.newChannel(parametersStream)) + ) { + writer.start(); + writer.writeBatch(); + } + + if (parametersStream.size() > 0) { + final DoPutPreparedStatementResultPOJO doPutPreparedStatementResultPOJO = + new DoPutPreparedStatementResultPOJO(query, parametersStream.toByteArray()); + + final byte[] doPutPreparedStatementResultPOJOArr = serializePOJO(doPutPreparedStatementResultPOJO); + final DoPutPreparedStatementResult doPutPreparedStatementResult = + DoPutPreparedStatementResult.newBuilder() + .setPreparedStatementHandle( + ByteString.copyFrom(ByteBuffer.wrap(doPutPreparedStatementResultPOJOArr))) + .build(); + + try (final ArrowBuf buffer = rootAllocator.buffer(doPutPreparedStatementResult.getSerializedSize())) { + buffer.writeBytes(doPutPreparedStatementResult.toByteArray()); + ackStream.onNext(PutResult.metadata(buffer)); + } + } + } + + } catch (SQLException | IOException e) { + ackStream.onError(CallStatus.INTERNAL + .withDescription("Failed to bind parameters: " + e.getMessage()) + .withCause(e) + .toRuntimeException()); + return; + } + + ackStream.onCompleted(); + }; + } + + @Override + public void getStreamPreparedStatement(final CommandPreparedStatementQuery command, final CallContext context, + final ServerStreamListener listener) { + final byte[] handle = command.getPreparedStatementHandle().toByteArray(); + try { + // Case where there are parameters + try { + final DoPutPreparedStatementResultPOJO doPutPreparedStatementResultPOJO = + deserializePOJO(handle); + final String query = doPutPreparedStatementResultPOJO.getQuery(); + + try (Connection connection = dataSource.getConnection(); + PreparedStatement statement = createPreparedStatement(connection, query); + ArrowFileReader reader = new ArrowFileReader(new SeekableReadChannel( + new ByteArrayReadableSeekableByteChannel( + doPutPreparedStatementResultPOJO.getParameters())), rootAllocator)) { + + for (ArrowBlock arrowBlock : reader.getRecordBlocks()) { + reader.loadRecordBatch(arrowBlock); + VectorSchemaRoot vectorSchemaRootRecover = reader.getVectorSchemaRoot(); + JdbcParameterBinder binder = JdbcParameterBinder.builder(statement, vectorSchemaRootRecover) + .bindAll().build(); + + while (binder.next()) { + executeQuery(statement, listener); + } + } + } + } catch (StreamCorruptedException e) { + // Case where there are no parameters + final String query = new String(command.getPreparedStatementHandle().toStringUtf8()); + try (Connection connection = dataSource.getConnection(); + PreparedStatement preparedStatement = createPreparedStatement(connection, query)) { + executeQuery(preparedStatement, listener); + } + } + } catch (final SQLException | IOException | ClassNotFoundException e) { + LOGGER.error(format("Failed to getStreamPreparedStatement: <%s>.", e.getMessage()), e); + listener.error(CallStatus.INTERNAL.withDescription("Failed to prepare statement: " + e).toRuntimeException()); + } finally { + listener.completed(); + } + } + + private void executeQuery(PreparedStatement statement, + final ServerStreamListener listener) throws IOException, SQLException { + try (final ResultSet resultSet = statement.executeQuery()) { + final Schema schema = jdbcToArrowSchema(resultSet.getMetaData(), DEFAULT_CALENDAR); + try (final VectorSchemaRoot vectorSchemaRoot = VectorSchemaRoot.create(schema, rootAllocator)) { + final VectorLoader loader = new VectorLoader(vectorSchemaRoot); + listener.start(vectorSchemaRoot); + + final ArrowVectorIterator iterator = sqlToArrowVectorIterator(resultSet, rootAllocator); + while (iterator.hasNext()) { + final VectorSchemaRoot batch = iterator.next(); + if (batch.getRowCount() == 0) { + break; + } + final VectorUnloader unloader = new VectorUnloader(batch); + loader.load(unloader.getRecordBatch()); + listener.putNext(); + vectorSchemaRoot.clear(); + } + listener.putNext(); + } + } + } + + @Override + public FlightInfo getFlightInfoPreparedStatement(final CommandPreparedStatementQuery command, + final CallContext context, + final FlightDescriptor descriptor) { + final byte[] handle = command.getPreparedStatementHandle().toByteArray(); + try { + String query; + try { + query = deserializePOJO(handle).getQuery(); + } catch (StreamCorruptedException e) { + query = new String(command.getPreparedStatementHandle().toStringUtf8()); + } + try (Connection connection = dataSource.getConnection(); + PreparedStatement statement = createPreparedStatement(connection, query)) { + ResultSetMetaData metaData = statement.getMetaData(); + return getFlightInfoForSchema(command, descriptor, + jdbcToArrowSchema(metaData, DEFAULT_CALENDAR)); + } + } catch (final SQLException | IOException | ClassNotFoundException e) { + LOGGER.error(format("There was a problem executing the prepared statement: <%s>.", e.getMessage()), e); + throw CallStatus.INTERNAL.withCause(e).toRuntimeException(); + } + } + + private DoPutPreparedStatementResultPOJO deserializePOJO(byte[] handle) throws IOException, ClassNotFoundException { + try (ByteArrayInputStream bis = new ByteArrayInputStream(handle); + ObjectInputStream ois = new ObjectInputStream(bis)) { + return (DoPutPreparedStatementResultPOJO) ois.readObject(); + } + } + + private byte[] serializePOJO(DoPutPreparedStatementResultPOJO doPutPreparedStatementResultPOJO) throws IOException { + try (ByteArrayOutputStream bos = new ByteArrayOutputStream(); + ObjectOutputStream oos = new ObjectOutputStream(bos)) { + oos.writeObject(doPutPreparedStatementResultPOJO); + return bos.toByteArray(); + } + } + + private PreparedStatement createPreparedStatement(Connection connection, String query) throws SQLException { + return connection.prepareStatement(query, ResultSet.TYPE_SCROLL_INSENSITIVE, ResultSet.CONCUR_READ_ONLY); + } +} diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java index a39736e939f0b..ffffdd62ac950 100644 --- a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java +++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSql.java @@ -87,63 +87,72 @@ public class TestFlightSql { Field.nullable("FOREIGNID", MinorType.INT.getType()))); private static final List> EXPECTED_RESULTS_FOR_STAR_SELECT_QUERY = ImmutableList.of( asList("1", "one", "1", "1"), asList("2", "zero", "0", "1"), asList("3", "negative one", "-1", "1")); - private static final List> EXPECTED_RESULTS_FOR_PARAMETER_BINDING = ImmutableList.of( + protected static final List> EXPECTED_RESULTS_FOR_PARAMETER_BINDING = ImmutableList.of( asList("1", "one", "1", "1")); private static final Map GET_SQL_INFO_EXPECTED_RESULTS_MAP = new LinkedHashMap<>(); - private static final String LOCALHOST = "localhost"; - private static BufferAllocator allocator; - private static FlightServer server; - private static FlightSqlClient sqlClient; + protected static final String LOCALHOST = "localhost"; + protected static BufferAllocator allocator; + protected static FlightServer server; + protected static FlightSqlClient sqlClient; @BeforeAll public static void setUp() throws Exception { + setUpClientServer(); + setUpExpectedResultsMap(); + } + + private static void setUpClientServer() throws Exception { allocator = new RootAllocator(Integer.MAX_VALUE); final Location serverLocation = Location.forGrpcInsecure(LOCALHOST, 0); - server = FlightServer.builder(allocator, serverLocation, new FlightSqlExample(serverLocation)) - .build() - .start(); + server = FlightServer.builder(allocator, serverLocation, + new FlightSqlExample(serverLocation, FlightSqlExample.DB_NAME)) + .build() + .start(); final Location clientLocation = Location.forGrpcInsecure(LOCALHOST, server.getPort()); sqlClient = new FlightSqlClient(FlightClient.builder(allocator, clientLocation).build()); + } + protected static void setUpExpectedResultsMap() { GET_SQL_INFO_EXPECTED_RESULTS_MAP - .put(Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME_VALUE), "Apache Derby"); + .put(Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_NAME_VALUE), "Apache Derby"); GET_SQL_INFO_EXPECTED_RESULTS_MAP - .put(Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_VERSION_VALUE), "10.14.2.0 - (1828579)"); + .put(Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_VERSION_VALUE), "10.14.2.0 - (1828579)"); GET_SQL_INFO_EXPECTED_RESULTS_MAP - .put(Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_ARROW_VERSION_VALUE), "10.14.2.0 - (1828579)"); + .put(Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_ARROW_VERSION_VALUE), "10.14.2.0 - (1828579)"); GET_SQL_INFO_EXPECTED_RESULTS_MAP - .put(Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY_VALUE), "false"); + .put(Integer.toString(FlightSql.SqlInfo.FLIGHT_SQL_SERVER_READ_ONLY_VALUE), "false"); GET_SQL_INFO_EXPECTED_RESULTS_MAP - .put(Integer.toString(FlightSql.SqlInfo.SQL_ALL_TABLES_ARE_SELECTABLE_VALUE), "true"); + .put(Integer.toString(FlightSql.SqlInfo.SQL_ALL_TABLES_ARE_SELECTABLE_VALUE), "true"); GET_SQL_INFO_EXPECTED_RESULTS_MAP - .put( - Integer.toString(FlightSql.SqlInfo.SQL_NULL_ORDERING_VALUE), - Integer.toString(FlightSql.SqlNullOrdering.SQL_NULLS_SORTED_AT_END_VALUE)); + .put( + Integer.toString(FlightSql.SqlInfo.SQL_NULL_ORDERING_VALUE), + Integer.toString(FlightSql.SqlNullOrdering.SQL_NULLS_SORTED_AT_END_VALUE)); GET_SQL_INFO_EXPECTED_RESULTS_MAP - .put(Integer.toString(FlightSql.SqlInfo.SQL_DDL_CATALOG_VALUE), "false"); + .put(Integer.toString(FlightSql.SqlInfo.SQL_DDL_CATALOG_VALUE), "false"); GET_SQL_INFO_EXPECTED_RESULTS_MAP - .put(Integer.toString(FlightSql.SqlInfo.SQL_DDL_SCHEMA_VALUE), "true"); + .put(Integer.toString(FlightSql.SqlInfo.SQL_DDL_SCHEMA_VALUE), "true"); GET_SQL_INFO_EXPECTED_RESULTS_MAP - .put(Integer.toString(FlightSql.SqlInfo.SQL_DDL_TABLE_VALUE), "true"); + .put(Integer.toString(FlightSql.SqlInfo.SQL_DDL_TABLE_VALUE), "true"); GET_SQL_INFO_EXPECTED_RESULTS_MAP - .put( - Integer.toString(FlightSql.SqlInfo.SQL_IDENTIFIER_CASE_VALUE), - Integer.toString(SqlSupportedCaseSensitivity.SQL_CASE_SENSITIVITY_UPPERCASE_VALUE)); + .put( + Integer.toString(FlightSql.SqlInfo.SQL_IDENTIFIER_CASE_VALUE), + Integer.toString(SqlSupportedCaseSensitivity.SQL_CASE_SENSITIVITY_UPPERCASE_VALUE)); GET_SQL_INFO_EXPECTED_RESULTS_MAP - .put(Integer.toString(FlightSql.SqlInfo.SQL_IDENTIFIER_QUOTE_CHAR_VALUE), "\""); + .put(Integer.toString(FlightSql.SqlInfo.SQL_IDENTIFIER_QUOTE_CHAR_VALUE), "\""); GET_SQL_INFO_EXPECTED_RESULTS_MAP - .put( - Integer.toString(FlightSql.SqlInfo.SQL_QUOTED_IDENTIFIER_CASE_VALUE), - Integer.toString(SqlSupportedCaseSensitivity.SQL_CASE_SENSITIVITY_CASE_INSENSITIVE_VALUE)); + .put( + Integer.toString(FlightSql.SqlInfo.SQL_QUOTED_IDENTIFIER_CASE_VALUE), + Integer.toString(SqlSupportedCaseSensitivity.SQL_CASE_SENSITIVITY_CASE_INSENSITIVE_VALUE)); GET_SQL_INFO_EXPECTED_RESULTS_MAP - .put(Integer.toString(FlightSql.SqlInfo.SQL_MAX_COLUMNS_IN_TABLE_VALUE), "42"); + .put(Integer.toString(FlightSql.SqlInfo.SQL_MAX_COLUMNS_IN_TABLE_VALUE), "42"); } @AfterAll public static void tearDown() throws Exception { close(sqlClient, server, allocator); + FlightSqlExample.removeDerbyDatabaseIfExists(FlightSqlExample.DB_NAME); } private static List> getNonConformingResultsForGetSqlInfo(final List> results) { diff --git a/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSqlStateless.java b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSqlStateless.java new file mode 100644 index 0000000000000..09c7b2ef87f45 --- /dev/null +++ b/java/flight/flight-sql/src/test/java/org/apache/arrow/flight/sql/test/TestFlightSqlStateless.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.flight.sql.test; + +import static org.apache.arrow.flight.sql.util.FlightStreamUtils.getResults; +import static org.apache.arrow.util.AutoCloseables.close; +import static org.hamcrest.CoreMatchers.*; + +import org.apache.arrow.flight.FlightClient; +import org.apache.arrow.flight.FlightEndpoint; +import org.apache.arrow.flight.FlightInfo; +import org.apache.arrow.flight.FlightServer; +import org.apache.arrow.flight.FlightStream; +import org.apache.arrow.flight.Location; +import org.apache.arrow.flight.sql.FlightSqlClient; +import org.apache.arrow.flight.sql.FlightSqlClient.PreparedStatement; +import org.apache.arrow.flight.sql.example.FlightSqlStatelessExample; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.types.pojo.Schema; +import org.hamcrest.MatcherAssert; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +/** + * Test direct usage of Flight SQL workflows. + */ +public class TestFlightSqlStateless extends TestFlightSql { + + @BeforeAll + public static void setUp() throws Exception { + setUpClientServer(); + setUpExpectedResultsMap(); + } + + @AfterAll + public static void tearDown() throws Exception { + close(sqlClient, server, allocator); + FlightSqlStatelessExample.removeDerbyDatabaseIfExists(FlightSqlStatelessExample.DB_NAME); + } + + private static void setUpClientServer() throws Exception { + allocator = new RootAllocator(Integer.MAX_VALUE); + + final Location serverLocation = Location.forGrpcInsecure(LOCALHOST, 0); + server = FlightServer.builder(allocator, serverLocation, + new FlightSqlStatelessExample(serverLocation, FlightSqlStatelessExample.DB_NAME)) + .build() + .start(); + + final Location clientLocation = Location.forGrpcInsecure(LOCALHOST, server.getPort()); + sqlClient = new FlightSqlClient(FlightClient.builder(allocator, clientLocation).build()); + } + + @Override + @Test + public void testSimplePreparedStatementResultsWithParameterBinding() throws Exception { + try (PreparedStatement prepare = sqlClient.prepare("SELECT * FROM intTable WHERE id = ?")) { + final Schema parameterSchema = prepare.getParameterSchema(); + try (final VectorSchemaRoot insertRoot = VectorSchemaRoot.create(parameterSchema, allocator)) { + insertRoot.allocateNew(); + + final IntVector valueVector = (IntVector) insertRoot.getVector(0); + valueVector.setSafe(0, 1); + insertRoot.setRowCount(1); + + prepare.setParameters(insertRoot); + final FlightInfo flightInfo = prepare.execute(); + + for (FlightEndpoint endpoint: flightInfo.getEndpoints()) { + try (FlightStream stream = sqlClient.getStream(endpoint.getTicket())) { + Assertions.assertAll( + () -> MatcherAssert.assertThat(stream.getSchema(), is(SCHEMA_INT_TABLE)), + () -> MatcherAssert.assertThat(getResults(stream), is(EXPECTED_RESULTS_FOR_PARAMETER_BINDING)) + ); + } + } + } + } + } +} From 7f0c4070dd723b2f7e1967d7f7f2cccf6fb256b7 Mon Sep 17 00:00:00 2001 From: Curt Hagenlocher Date: Mon, 3 Jun 2024 13:16:05 -0700 Subject: [PATCH 211/261] GH-41397: [C#] Downgrade macOS test runner to avoid infrastructure bug (#41934) ### What changes are included in this PR? Downgrades the macOS test image for C# to use an older operating system. This works around https://github.com/pythonnet/pythonnet/issues/2396. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #41397 Authored-by: Curt Hagenlocher Signed-off-by: Sutou Kouhei --- .github/workflows/csharp.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/csharp.yml b/.github/workflows/csharp.yml index 7ae3606a44812..e4db9f482e206 100644 --- a/.github/workflows/csharp.yml +++ b/.github/workflows/csharp.yml @@ -94,8 +94,8 @@ jobs: run: ci/scripts/csharp_test.sh $(pwd) macos: - name: ARM64 macOS 14 C# ${{ matrix.dotnet }} - runs-on: macos-latest + name: AMD64 macOS 13 C# ${{ matrix.dotnet }} + runs-on: macos-13 # Pending https://github.com/pythonnet/pythonnet/issues/2396 if: ${{ !contains(github.event.pull_request.title, 'WIP') }} timeout-minutes: 15 strategy: From 2b1593d78f915b1d5e12a83ba759ed95124dd300 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 4 Jun 2024 08:32:16 +0900 Subject: [PATCH 212/261] MINOR: [Go] Bump github.com/hamba/avro/v2 from 2.22.0 to 2.22.1 in /go (#41937) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [github.com/hamba/avro/v2](https://github.com/hamba/avro) from 2.22.0 to 2.22.1.
Release notes

Sourced from github.com/hamba/avro/v2's releases.

v2.22.1

What's Changed

New Contributors

Full Changelog: https://github.com/hamba/avro/compare/v2.22.0...v2.22.1

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=github.com/hamba/avro/v2&package-manager=go_modules&previous-version=2.22.0&new-version=2.22.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- go/go.mod | 2 +- go/go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go/go.mod b/go/go.mod index 9c70544539b16..b6a3ed207c6ad 100644 --- a/go/go.mod +++ b/go/go.mod @@ -47,7 +47,7 @@ require ( require ( github.com/google/uuid v1.6.0 - github.com/hamba/avro/v2 v2.22.0 + github.com/hamba/avro/v2 v2.22.1 github.com/substrait-io/substrait-go v0.4.2 github.com/tidwall/sjson v1.2.5 ) diff --git a/go/go.sum b/go/go.sum index 9e11041c333ac..79350f4a1cf27 100644 --- a/go/go.sum +++ b/go/go.sum @@ -43,8 +43,8 @@ github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbu github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26/go.mod h1:dDKJzRmX4S37WGHujM7tX//fmj1uioxKzKxz3lo4HJo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/hamba/avro/v2 v2.22.0 h1:IaBMFv5xmjo38f0oaP9jZiJFXg+lmHPPg7d9YotMnPg= -github.com/hamba/avro/v2 v2.22.0/go.mod h1:HOeTrE3kvWnBAgsufqhAzDDV5gvS0QXs65Z6BHfGgbg= +github.com/hamba/avro/v2 v2.22.1 h1:q1rAbfJsrbMaZPDLQvwUQMfQzp6H+hGXvckmU/lXemk= +github.com/hamba/avro/v2 v2.22.1/go.mod h1:HOeTrE3kvWnBAgsufqhAzDDV5gvS0QXs65Z6BHfGgbg= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= From 9b59157a443dcfd7bf6f7db53a1cad02ff04645f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 4 Jun 2024 08:32:36 +0900 Subject: [PATCH 213/261] MINOR: [Java] Bump org.apache.maven.plugins:maven-gpg-plugin from 3.2.2 to 3.2.4 in /java (#41939) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [org.apache.maven.plugins:maven-gpg-plugin](https://github.com/apache/maven-gpg-plugin) from 3.2.2 to 3.2.4.
Release notes

Sourced from org.apache.maven.plugins:maven-gpg-plugin's releases.

3.2.4

Release Notes - Maven GPG Plugin - Version 3.2.4


📦 Dependency updates

3.2.3

Release Notes - Maven GPG Plugin - Version 3.2.3

... (truncated)

Commits
  • 789149e [maven-release-plugin] prepare release maven-gpg-plugin-3.2.4
  • 893aedc [MGPG-125] Fix "bestPractices" (#95)
  • b6f0324 [MGPG-126] Bump commons-io:commons-io from 2.16.0 to 2.16.1 (#94)
  • 3c5878b [maven-release-plugin] prepare for next development iteration
  • 89b91a4 [maven-release-plugin] prepare release maven-gpg-plugin-3.2.3
  • fc2efa3 [MGPG-123][MGPG-124] Dependency upgrades (#93)
  • 50222d3 [MGPG-120] New mojo sign-deployed (#88)
  • a6c3a09 [MGPG-122] Bump org.apache.maven.plugins:maven-invoker-plugin from 3.6.0 to 3...
  • 78f5e37 [MGPG-121] Return the workaround for pseudo security (#90)
  • 582df74 [MGPG-117] Improve passphrase handling (#86)
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.maven.plugins:maven-gpg-plugin&package-manager=maven&previous-version=3.2.2&new-version=3.2.4)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- java/gandiva/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml index 26a28d55d238e..a87f26028ba86 100644 --- a/java/gandiva/pom.xml +++ b/java/gandiva/pom.xml @@ -131,7 +131,7 @@ org.apache.maven.plugins maven-gpg-plugin - 3.2.2 + 3.2.4 sign-artifacts From 9ab28c68263d6e059dd88908aaf8d2a5cf9b5eeb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 4 Jun 2024 08:32:55 +0900 Subject: [PATCH 214/261] MINOR: [Java] Bump dep.slf4j.version from 2.0.12 to 2.0.13 in /java (#41940) Bumps `dep.slf4j.version` from 2.0.12 to 2.0.13. Updates `org.slf4j:slf4j-api` from 2.0.12 to 2.0.13 Updates `org.slf4j:slf4j-jdk14` from 2.0.12 to 2.0.13 Updates `org.slf4j:jul-to-slf4j` from 2.0.12 to 2.0.13 Updates `org.slf4j:jcl-over-slf4j` from 2.0.12 to 2.0.13 Updates `org.slf4j:log4j-over-slf4j` from 2.0.12 to 2.0.13 Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- java/maven/pom.xml | 2 +- java/pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/java/maven/pom.xml b/java/maven/pom.xml index f290ded2e2913..470e198caebc1 100644 --- a/java/maven/pom.xml +++ b/java/maven/pom.xml @@ -272,7 +272,7 @@ org.slf4j jcl-over-slf4j - 2.0.12 + 2.0.13 diff --git a/java/pom.xml b/java/pom.xml index 289810daba3ac..e932c749bd832 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -81,7 +81,7 @@ ${project.build.directory}/generated-sources 1.9.0 5.10.2 - 2.0.12 + 2.0.13 33.0.0-jre 4.1.108.Final 1.63.0 From fd54260f2982c898fc8fc91752278446e2fdc56a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 4 Jun 2024 08:33:16 +0900 Subject: [PATCH 215/261] MINOR: [Java] Bump org.apache.maven.plugins:maven-install-plugin from 3.1.1 to 3.1.2 in /java (#41941) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [org.apache.maven.plugins:maven-install-plugin](https://github.com/apache/maven-install-plugin) from 3.1.1 to 3.1.2.
Release notes

Sourced from org.apache.maven.plugins:maven-install-plugin's releases.

3.1.2

Release Notes - Maven Install Plugin - Version 3.1.2


What's Changed

New Contributors

Full Changelog: https://github.com/apache/maven-install-plugin/compare/maven-install-plugin-3.1.1...maven-install-plugin-3.1.2

Commits
  • e1494c1 [maven-release-plugin] prepare release maven-install-plugin-3.1.2
  • 6712ae8 Remove workaround, handle it centrally
  • 1fa847f Bump workflow
  • bdc93d5 Quickfix: move to zulu
  • 7b9bf51 [MINSTALL-193] Parent 42, min Maven 3.6.3 (#64)
  • e914367 [MINSTALL-195] Include artifactId in InstallMojo#processProject messages
  • 3ebb448 [MINSTALL-197] Update to parent 41, cleanup (#61)
  • 30d2b53 [MINSTALL-192] - Code cleanups
  • 429ad5b [MNG-6829] Replace StringUtils#isEmpty(String) & #isNotEmpty(String) (#58)
  • f6377c4 configure notifications
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.maven.plugins:maven-install-plugin&package-manager=maven&previous-version=3.1.1&new-version=3.1.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- java/maven/module-info-compiler-maven-plugin/pom.xml | 2 +- java/performance/pom.xml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/java/maven/module-info-compiler-maven-plugin/pom.xml b/java/maven/module-info-compiler-maven-plugin/pom.xml index 9c1e8fe058110..57ba7933ea1c6 100644 --- a/java/maven/module-info-compiler-maven-plugin/pom.xml +++ b/java/maven/module-info-compiler-maven-plugin/pom.xml @@ -87,7 +87,7 @@
maven-install-plugin - 3.1.1 + 3.1.2 maven-deploy-plugin diff --git a/java/performance/pom.xml b/java/performance/pom.xml index 765b6a58cd8f0..f01e8d9a4e0e4 100644 --- a/java/performance/pom.xml +++ b/java/performance/pom.xml @@ -95,7 +95,7 @@ maven-install-plugin - 3.1.1 + 3.1.2 maven-jar-plugin From b08c8be3d63718b1ffb1699ba21b34d7c8c831c6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 4 Jun 2024 08:39:19 +0900 Subject: [PATCH 216/261] MINOR: [Java] Bump com.google.guava:guava-bom from 33.0.0-jre to 33.2.1-jre in /java (#41943) Bumps [com.google.guava:guava-bom](https://github.com/google/guava) from 33.0.0-jre to 33.2.1-jre.
Release notes

Sourced from com.google.guava:guava-bom's releases.

33.2.1

<dependency>
  <groupId>com.google.guava</groupId>
  <artifactId>guava</artifactId>
  <version>33.2.1-jre</version>
  <!-- or, for Android: -->
  <version>33.2.1-android</version>
</dependency>

Jar files

Guava requires one runtime dependency, which you can download here:

Javadoc

JDiff

Changelog

  • net: Changed InetAddress-String conversion methods to preserve the IPv6 scope ID if present. The scope ID can be necessary for IPv6-capable devices with multiple network interfaces. However, preserving it can also lead to problems for callers that rely on the returned values not to include the scope ID:
    • Callers might compensate for the old behavior of the methods by appending the scope ID to a returned string themselves. If so, you can update your code to stop doing so at the same time as you upgrade Guava. Of, if your code might run against multiple versions of Guava, you can check whether Guava has included a scope ID before you add one yourself.
    • Callers might pass the returned string to another system that does not understand scope IDs. If so, you can strip the scope ID off, whether by truncating the string form at a % character (leaving behind any trailing ] character in the case of forUriString) or by replacing the returned InetAddress with a new instance constructed by calling InetAddress.getByAddress(addr).
    • java.net.InetAddress validates any provided scope ID against the interfaces available on the machine. As a result, methods in InetAddresses may now fail if the scope ID fails validation.
      • Notable cases in which this may happen include:
        • if the code runs in an Android app without networking permission
        • if code passes InetAddress instances or strings across devices
      • If this is not the behavior that you want, then you can strip off the scope ID from the input string before passing it to Guava, as discussed above. (3f61870ac6)

33.2.0

Android users: Please test recent Guava versions

If you know of Guava Android users who have not yet upgraded to at least release 33.0.0, please encourage them to upgrade, preferably to today's release, 33.2.0. These releases have begun adding Java 8+ APIs to guava-android. While we don't anticipate problems, we do anticipate that any unexpected problems could force a disruptive rollback. To minimize any disruption, we'd like to catch any such problems early.

Please let us know of any problems you encounter.

Maven

... (truncated)

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=com.google.guava:guava-bom&package-manager=maven&previous-version=33.0.0-jre&new-version=33.2.1-jre)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- java/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/pom.xml b/java/pom.xml index e932c749bd832..9be9d431d4776 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -82,7 +82,7 @@ 1.9.0 5.10.2 2.0.13 - 33.0.0-jre + 33.2.1-jre 4.1.108.Final 1.63.0 3.23.1 From 2c2c6c505d11d0db0d41013ccdf50527c9a8ed18 Mon Sep 17 00:00:00 2001 From: Dominik Moritz Date: Mon, 3 Jun 2024 22:00:52 -0400 Subject: [PATCH 217/261] GH-41905: [JS] Update dependencies (#41906) --- js/bin/integration.ts | 2 +- js/package.json | 24 +- js/test/unit/builders/builder-tests.ts | 2 +- js/test/unit/builders/utils.ts | 2 +- js/test/unit/ipc/writer/streams-dom-tests.ts | 4 +- js/test/unit/ipc/writer/streams-node-tests.ts | 4 +- js/test/unit/table/assign-tests.ts | 2 +- js/yarn.lock | 846 ++++++++---------- 8 files changed, 414 insertions(+), 472 deletions(-) diff --git a/js/bin/integration.ts b/js/bin/integration.ts index f73388cc85cf0..d41ce08aa16b6 100755 --- a/js/bin/integration.ts +++ b/js/bin/integration.ts @@ -20,7 +20,7 @@ import * as fs from 'node:fs'; import * as Path from 'node:path'; import { glob } from 'glob'; -import { zip } from 'ix/iterable/zip.js'; +import { zip } from 'ix/iterable/zip'; import commandLineArgs from 'command-line-args'; // @ts-ignore import { parse as bignumJSONParse } from 'json-bignum'; diff --git a/js/package.json b/js/package.json index 7ed0daddfada0..9e61d94dc3b7b 100644 --- a/js/package.json +++ b/js/package.json @@ -52,10 +52,10 @@ "jest.config.js" ], "dependencies": { - "@swc/helpers": "^0.5.10", + "@swc/helpers": "^0.5.11", "@types/command-line-args": "^5.2.3", "@types/command-line-usage": "^5.0.4", - "@types/node": "^20.12.7", + "@types/node": "^20.13.0", "command-line-args": "^5.2.1", "command-line-usage": "^7.0.1", "flatbuffers": "^24.3.25", @@ -67,26 +67,26 @@ "@rollup/plugin-alias": "5.1.0", "@rollup/plugin-node-resolve": "15.2.3", "@rollup/stream": "3.0.1", - "@swc/core": "1.4.17", + "@swc/core": "1.5.24", "@types/benchmark": "2.1.5", "@types/glob": "8.1.0", "@types/jest": "29.5.12", "@types/multistream": "4.1.3", - "@typescript-eslint/eslint-plugin": "7.8.0", - "@typescript-eslint/parser": "7.8.0", + "@typescript-eslint/eslint-plugin": "7.11.0", + "@typescript-eslint/parser": "7.11.0", "async-done": "2.0.0", "benny": "3.7.1", "cross-env": "7.0.3", "del": "7.1.0", "del-cli": "5.1.0", - "esbuild": "0.20.2", + "esbuild": "0.21.4", "esbuild-plugin-alias": "0.2.1", "eslint": "8.57.0", - "eslint-plugin-jest": "28.4.0", + "eslint-plugin-jest": "28.5.0", "eslint-plugin-unicorn": "52.0.0", "esm": "https://github.com/jsg2021/esm/releases/download/v3.x.x-pr883/esm-3.x.x-pr883.tgz", "gulp": "4.0.2", - "glob": "10.3.12", + "glob": "10.4.1", "google-closure-compiler": "20240317.0.0", "gulp-esbuild": "0.12.0", "gulp-json-transform": "0.5.0", @@ -96,16 +96,16 @@ "gulp-terser": "2.1.0", "gulp-typescript": "5.0.1", "gulp-vinyl-size": "1.1.4", - "ix": "5.0.0", + "ix": "6.0.0", "jest": "29.7.0", - "jest-silent-reporter": "0.5.0", + "jest-silent-reporter": "0.6.0", "memfs": "4.9.2", "mkdirp": "3.0.1", "multistream": "4.1.0", "regenerator-runtime": "0.14.1", - "rollup": "4.17.2", + "rollup": "4.18.0", "rxjs": "7.8.1", - "ts-jest": "29.1.2", + "ts-jest": "29.1.4", "ts-node": "10.9.2", "typedoc": "0.25.13", "typescript": "5.4.5", diff --git a/js/test/unit/builders/builder-tests.ts b/js/test/unit/builders/builder-tests.ts index 4d1be9b225b08..c9174023f6dae 100644 --- a/js/test/unit/builders/builder-tests.ts +++ b/js/test/unit/builders/builder-tests.ts @@ -18,7 +18,7 @@ import 'web-streams-polyfill'; import '../../jest-extensions.js'; -import { from, fromDOMStream, toArray } from 'ix/asynciterable'; +import { from, fromDOMStream, toArray } from 'ix/Ix.asynciterable'; import { fromNodeStream } from 'ix/asynciterable/fromnodestream'; import { validateVector } from './utils.js'; diff --git a/js/test/unit/builders/utils.ts b/js/test/unit/builders/utils.ts index 1d0707a6ca5d9..7cc0a075d84d4 100644 --- a/js/test/unit/builders/utils.ts +++ b/js/test/unit/builders/utils.ts @@ -17,7 +17,7 @@ import 'web-streams-polyfill'; -import { from, fromDOMStream, toArray } from 'ix/asynciterable'; +import { from, fromDOMStream, toArray } from 'ix/Ix.asynciterable'; import { fromNodeStream } from 'ix/asynciterable/fromnodestream'; import 'ix/Ix.node'; diff --git a/js/test/unit/ipc/writer/streams-dom-tests.ts b/js/test/unit/ipc/writer/streams-dom-tests.ts index dc792c9cf82be..2040e89a48802 100644 --- a/js/test/unit/ipc/writer/streams-dom-tests.ts +++ b/js/test/unit/ipc/writer/streams-dom-tests.ts @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -import { as, from } from 'ix/asynciterable'; -import { flatMap, tap } from 'ix/asynciterable/operators'; +import { as, from } from 'ix/Ix.asynciterable'; +import { flatMap, tap } from 'ix/Ix.asynciterable.operators'; import { generateRandomTables } from '../../../data/tables.js'; import { diff --git a/js/test/unit/ipc/writer/streams-node-tests.ts b/js/test/unit/ipc/writer/streams-node-tests.ts index 1f4c9c7a02cfb..afcb6deb1e053 100644 --- a/js/test/unit/ipc/writer/streams-node-tests.ts +++ b/js/test/unit/ipc/writer/streams-node-tests.ts @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -import { as, from } from 'ix/asynciterable'; -import { flatMap, tap } from 'ix/asynciterable/operators'; +import { as, from } from 'ix/Ix.asynciterable'; +import { flatMap, tap } from 'ix/Ix.asynciterable.operators'; import 'ix/Ix.node'; import { generateRandomTables } from '../../../data/tables.js'; diff --git a/js/test/unit/table/assign-tests.ts b/js/test/unit/table/assign-tests.ts index f2a5ff4f37cac..8e1cdfec41cbf 100644 --- a/js/test/unit/table/assign-tests.ts +++ b/js/test/unit/table/assign-tests.ts @@ -17,7 +17,7 @@ /* eslint-disable jest/no-standalone-expect */ -import { zip } from 'ix/iterable'; +import { zip } from 'ix/Ix.iterable'; import '../../jest-extensions.js'; import * as generate from '../../generate-test-data.js'; diff --git a/js/yarn.lock b/js/yarn.lock index eb7ed33520f0a..d5527097340d9 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -362,230 +362,230 @@ resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.19.12.tgz#d1bc06aedb6936b3b6d313bf809a5a40387d2b7f" integrity sha512-bmoCYyWdEL3wDQIVbcyzRyeKLgk2WtWLTWz1ZIAZF/EGbNOwSA6ew3PftJ1PqMiOOGu0OyFMzG53L0zqIpPeNA== -"@esbuild/aix-ppc64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.20.2.tgz#a70f4ac11c6a1dfc18b8bbb13284155d933b9537" - integrity sha512-D+EBOJHXdNZcLJRBkhENNG8Wji2kgc9AZ9KiPr1JuZjsNtyHzrsfLRrY0tk2H2aoFu6RANO1y1iPPUCDYWkb5g== +"@esbuild/aix-ppc64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/aix-ppc64/-/aix-ppc64-0.21.4.tgz#f83eb142df3ca7b49531c1ed680b81e484316508" + integrity sha512-Zrm+B33R4LWPLjDEVnEqt2+SLTATlru1q/xYKVn8oVTbiRBGmK2VIMoIYGJDGyftnGaC788IuzGFAlb7IQ0Y8A== "@esbuild/android-arm64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.19.12.tgz#7ad65a36cfdb7e0d429c353e00f680d737c2aed4" integrity sha512-P0UVNGIienjZv3f5zq0DP3Nt2IE/3plFzuaS96vihvD0Hd6H/q4WXUGpCxD/E8YrSXfNyRPbpTq+T8ZQioSuPA== -"@esbuild/android-arm64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.20.2.tgz#db1c9202a5bc92ea04c7b6840f1bbe09ebf9e6b9" - integrity sha512-mRzjLacRtl/tWU0SvD8lUEwb61yP9cqQo6noDZP/O8VkwafSYwZ4yWy24kan8jE/IMERpYncRt2dw438LP3Xmg== +"@esbuild/android-arm64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/android-arm64/-/android-arm64-0.21.4.tgz#dd328039daccd6033b2d1e536c054914bfc92287" + integrity sha512-fYFnz+ObClJ3dNiITySBUx+oNalYUT18/AryMxfovLkYWbutXsct3Wz2ZWAcGGppp+RVVX5FiXeLYGi97umisA== "@esbuild/android-arm@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.19.12.tgz#b0c26536f37776162ca8bde25e42040c203f2824" integrity sha512-qg/Lj1mu3CdQlDEEiWrlC4eaPZ1KztwGJ9B6J+/6G+/4ewxJg7gqj8eVYWvao1bXrqGiW2rsBZFSX3q2lcW05w== -"@esbuild/android-arm@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.20.2.tgz#3b488c49aee9d491c2c8f98a909b785870d6e995" - integrity sha512-t98Ra6pw2VaDhqNWO2Oph2LXbz/EJcnLmKLGBJwEwXX/JAN83Fym1rU8l0JUWK6HkIbWONCSSatf4sf2NBRx/w== +"@esbuild/android-arm@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/android-arm/-/android-arm-0.21.4.tgz#76767a989720a97b206ea14c52af6e4589e48b0d" + integrity sha512-E7H/yTd8kGQfY4z9t3nRPk/hrhaCajfA3YSQSBrst8B+3uTcgsi8N+ZWYCaeIDsiVs6m65JPCaQN/DxBRclF3A== "@esbuild/android-x64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.19.12.tgz#cb13e2211282012194d89bf3bfe7721273473b3d" integrity sha512-3k7ZoUW6Q6YqhdhIaq/WZ7HwBpnFBlW905Fa4s4qWJyiNOgT1dOqDiVAQFwBH7gBRZr17gLrlFCRzF6jFh7Kew== -"@esbuild/android-x64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.20.2.tgz#3b1628029e5576249d2b2d766696e50768449f98" - integrity sha512-btzExgV+/lMGDDa194CcUQm53ncxzeBrWJcncOBxuC6ndBkKxnHdFJn86mCIgTELsooUmwUm9FkhSp5HYu00Rg== +"@esbuild/android-x64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/android-x64/-/android-x64-0.21.4.tgz#14a8ae3c35702d882086efb5a8f8d7b0038d8d35" + integrity sha512-mDqmlge3hFbEPbCWxp4fM6hqq7aZfLEHZAKGP9viq9wMUBVQx202aDIfc3l+d2cKhUJM741VrCXEzRFhPDKH3Q== "@esbuild/darwin-arm64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.19.12.tgz#cbee41e988020d4b516e9d9e44dd29200996275e" integrity sha512-B6IeSgZgtEzGC42jsI+YYu9Z3HKRxp8ZT3cqhvliEHovq8HSX2YX8lNocDn79gCKJXOSaEot9MVYky7AKjCs8g== -"@esbuild/darwin-arm64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.20.2.tgz#6e8517a045ddd86ae30c6608c8475ebc0c4000bb" - integrity sha512-4J6IRT+10J3aJH3l1yzEg9y3wkTDgDk7TSDFX+wKFiWjqWp/iCfLIYzGyasx9l0SAFPT1HwSCR+0w/h1ES/MjA== +"@esbuild/darwin-arm64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/darwin-arm64/-/darwin-arm64-0.21.4.tgz#7e735046005e4c12e9139e0bdd1fa6a754430d57" + integrity sha512-72eaIrDZDSiWqpmCzVaBD58c8ea8cw/U0fq/PPOTqE3c53D0xVMRt2ooIABZ6/wj99Y+h4ksT/+I+srCDLU9TA== "@esbuild/darwin-x64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.19.12.tgz#e37d9633246d52aecf491ee916ece709f9d5f4cd" integrity sha512-hKoVkKzFiToTgn+41qGhsUJXFlIjxI/jSYeZf3ugemDYZldIXIxhvwN6erJGlX4t5h417iFuheZ7l+YVn05N3A== -"@esbuild/darwin-x64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.20.2.tgz#90ed098e1f9dd8a9381695b207e1cff45540a0d0" - integrity sha512-tBcXp9KNphnNH0dfhv8KYkZhjc+H3XBkF5DKtswJblV7KlT9EI2+jeA8DgBjp908WEuYll6pF+UStUCfEpdysA== +"@esbuild/darwin-x64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/darwin-x64/-/darwin-x64-0.21.4.tgz#db623553547a5fe3502a63aa88306e9023178482" + integrity sha512-uBsuwRMehGmw1JC7Vecu/upOjTsMhgahmDkWhGLWxIgUn2x/Y4tIwUZngsmVb6XyPSTXJYS4YiASKPcm9Zitag== "@esbuild/freebsd-arm64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.19.12.tgz#1ee4d8b682ed363b08af74d1ea2b2b4dbba76487" integrity sha512-4aRvFIXmwAcDBw9AueDQ2YnGmz5L6obe5kmPT8Vd+/+x/JMVKCgdcRwH6APrbpNXsPz+K653Qg8HB/oXvXVukA== -"@esbuild/freebsd-arm64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.20.2.tgz#d71502d1ee89a1130327e890364666c760a2a911" - integrity sha512-d3qI41G4SuLiCGCFGUrKsSeTXyWG6yem1KcGZVS+3FYlYhtNoNgYrWcvkOoaqMhwXSMrZRl69ArHsGJ9mYdbbw== +"@esbuild/freebsd-arm64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/freebsd-arm64/-/freebsd-arm64-0.21.4.tgz#91cbad647c079bf932086fbd4749d7f563df67b8" + integrity sha512-8JfuSC6YMSAEIZIWNL3GtdUT5NhUA/CMUCpZdDRolUXNAXEE/Vbpe6qlGLpfThtY5NwXq8Hi4nJy4YfPh+TwAg== "@esbuild/freebsd-x64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.19.12.tgz#37a693553d42ff77cd7126764b535fb6cc28a11c" integrity sha512-EYoXZ4d8xtBoVN7CEwWY2IN4ho76xjYXqSXMNccFSx2lgqOG/1TBPW0yPx1bJZk94qu3tX0fycJeeQsKovA8gg== -"@esbuild/freebsd-x64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.20.2.tgz#aa5ea58d9c1dd9af688b8b6f63ef0d3d60cea53c" - integrity sha512-d+DipyvHRuqEeM5zDivKV1KuXn9WeRX6vqSqIDgwIfPQtwMP4jaDsQsDncjTDDsExT4lR/91OLjRo8bmC1e+Cw== +"@esbuild/freebsd-x64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/freebsd-x64/-/freebsd-x64-0.21.4.tgz#723299b9859ccbe5532fecbadba3ac33019ba8e8" + integrity sha512-8d9y9eQhxv4ef7JmXny7591P/PYsDFc4+STaxC1GBv0tMyCdyWfXu2jBuqRsyhY8uL2HU8uPyscgE2KxCY9imQ== "@esbuild/linux-arm64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.19.12.tgz#be9b145985ec6c57470e0e051d887b09dddb2d4b" integrity sha512-EoTjyYyLuVPfdPLsGVVVC8a0p1BFFvtpQDB/YLEhaXyf/5bczaGeN15QkR+O4S5LeJ92Tqotve7i1jn35qwvdA== -"@esbuild/linux-arm64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.20.2.tgz#055b63725df678379b0f6db9d0fa85463755b2e5" - integrity sha512-9pb6rBjGvTFNira2FLIWqDk/uaf42sSyLE8j1rnUpuzsODBq7FvpwHYZxQ/It/8b+QOS1RYfqgGFNLRI+qlq2A== +"@esbuild/linux-arm64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/linux-arm64/-/linux-arm64-0.21.4.tgz#531743f861e1ef6e50b874d6c784cda37aa5e685" + integrity sha512-/GLD2orjNU50v9PcxNpYZi+y8dJ7e7/LhQukN3S4jNDXCKkyyiyAz9zDw3siZ7Eh1tRcnCHAo/WcqKMzmi4eMQ== "@esbuild/linux-arm@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.19.12.tgz#207ecd982a8db95f7b5279207d0ff2331acf5eef" integrity sha512-J5jPms//KhSNv+LO1S1TX1UWp1ucM6N6XuL6ITdKWElCu8wXP72l9MM0zDTzzeikVyqFE6U8YAV9/tFyj0ti+w== -"@esbuild/linux-arm@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.20.2.tgz#76b3b98cb1f87936fbc37f073efabad49dcd889c" - integrity sha512-VhLPeR8HTMPccbuWWcEUD1Az68TqaTYyj6nfE4QByZIQEQVWBB8vup8PpR7y1QHL3CpcF6xd5WVBU/+SBEvGTg== +"@esbuild/linux-arm@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/linux-arm/-/linux-arm-0.21.4.tgz#1144b5654764960dd97d90ddf0893a9afc63ad91" + integrity sha512-2rqFFefpYmpMs+FWjkzSgXg5vViocqpq5a1PSRgT0AvSgxoXmGF17qfGAzKedg6wAwyM7UltrKVo9kxaJLMF/g== "@esbuild/linux-ia32@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.19.12.tgz#d0d86b5ca1562523dc284a6723293a52d5860601" integrity sha512-Thsa42rrP1+UIGaWz47uydHSBOgTUnwBwNq59khgIwktK6x60Hivfbux9iNR0eHCHzOLjLMLfUMLCypBkZXMHA== -"@esbuild/linux-ia32@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.20.2.tgz#c0e5e787c285264e5dfc7a79f04b8b4eefdad7fa" - integrity sha512-o10utieEkNPFDZFQm9CoP7Tvb33UutoJqg3qKf1PWVeeJhJw0Q347PxMvBgVVFgouYLGIhFYG0UGdBumROyiig== +"@esbuild/linux-ia32@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/linux-ia32/-/linux-ia32-0.21.4.tgz#c81b6f2ed3308d3b75ccefb5ac63bc4cf3a9d2e9" + integrity sha512-pNftBl7m/tFG3t2m/tSjuYeWIffzwAZT9m08+9DPLizxVOsUl8DdFzn9HvJrTQwe3wvJnwTdl92AonY36w/25g== "@esbuild/linux-loong64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.19.12.tgz#9a37f87fec4b8408e682b528391fa22afd952299" integrity sha512-LiXdXA0s3IqRRjm6rV6XaWATScKAXjI4R4LoDlvO7+yQqFdlr1Bax62sRwkVvRIrwXxvtYEHHI4dm50jAXkuAA== -"@esbuild/linux-loong64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.20.2.tgz#a6184e62bd7cdc63e0c0448b83801001653219c5" - integrity sha512-PR7sp6R/UC4CFVomVINKJ80pMFlfDfMQMYynX7t1tNTeivQ6XdX5r2XovMmha/VjR1YN/HgHWsVcTRIMkymrgQ== +"@esbuild/linux-loong64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/linux-loong64/-/linux-loong64-0.21.4.tgz#87b6af7cd0f2551653955fc2dc465b7f4464af0a" + integrity sha512-cSD2gzCK5LuVX+hszzXQzlWya6c7hilO71L9h4KHwqI4qeqZ57bAtkgcC2YioXjsbfAv4lPn3qe3b00Zt+jIfQ== "@esbuild/linux-mips64el@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.19.12.tgz#4ddebd4e6eeba20b509d8e74c8e30d8ace0b89ec" integrity sha512-fEnAuj5VGTanfJ07ff0gOA6IPsvrVHLVb6Lyd1g2/ed67oU1eFzL0r9WL7ZzscD+/N6i3dWumGE1Un4f7Amf+w== -"@esbuild/linux-mips64el@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.20.2.tgz#d08e39ce86f45ef8fc88549d29c62b8acf5649aa" - integrity sha512-4BlTqeutE/KnOiTG5Y6Sb/Hw6hsBOZapOVF6njAESHInhlQAghVVZL1ZpIctBOoTFbQyGW+LsVYZ8lSSB3wkjA== +"@esbuild/linux-mips64el@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/linux-mips64el/-/linux-mips64el-0.21.4.tgz#fec73cd39490a0c45d052bef03e011a0ad366c06" + integrity sha512-qtzAd3BJh7UdbiXCrg6npWLYU0YpufsV9XlufKhMhYMJGJCdfX/G6+PNd0+v877X1JG5VmjBLUiFB0o8EUSicA== "@esbuild/linux-ppc64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.19.12.tgz#adb67dadb73656849f63cd522f5ecb351dd8dee8" integrity sha512-nYJA2/QPimDQOh1rKWedNOe3Gfc8PabU7HT3iXWtNUbRzXS9+vgB0Fjaqr//XNbd82mCxHzik2qotuI89cfixg== -"@esbuild/linux-ppc64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.20.2.tgz#8d252f0b7756ffd6d1cbde5ea67ff8fd20437f20" - integrity sha512-rD3KsaDprDcfajSKdn25ooz5J5/fWBylaaXkuotBDGnMnDP1Uv5DLAN/45qfnf3JDYyJv/ytGHQaziHUdyzaAg== +"@esbuild/linux-ppc64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/linux-ppc64/-/linux-ppc64-0.21.4.tgz#ea3b5e13b0fc8666bd4c6f7ea58bd1830f3e6e78" + integrity sha512-yB8AYzOTaL0D5+2a4xEy7OVvbcypvDR05MsB/VVPVA7nL4hc5w5Dyd/ddnayStDgJE59fAgNEOdLhBxjfx5+dg== "@esbuild/linux-riscv64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.19.12.tgz#11bc0698bf0a2abf8727f1c7ace2112612c15adf" integrity sha512-2MueBrlPQCw5dVJJpQdUYgeqIzDQgw3QtiAHUC4RBz9FXPrskyyU3VI1hw7C0BSKB9OduwSJ79FTCqtGMWqJHg== -"@esbuild/linux-riscv64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.20.2.tgz#19f6dcdb14409dae607f66ca1181dd4e9db81300" - integrity sha512-snwmBKacKmwTMmhLlz/3aH1Q9T8v45bKYGE3j26TsaOVtjIag4wLfWSiZykXzXuE1kbCE+zJRmwp+ZbIHinnVg== +"@esbuild/linux-riscv64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/linux-riscv64/-/linux-riscv64-0.21.4.tgz#80d406f653fc6b193edaeb55ac88d4ac22c8f155" + integrity sha512-Y5AgOuVzPjQdgU59ramLoqSSiXddu7F3F+LI5hYy/d1UHN7K5oLzYBDZe23QmQJ9PIVUXwOdKJ/jZahPdxzm9w== "@esbuild/linux-s390x@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.19.12.tgz#e86fb8ffba7c5c92ba91fc3b27ed5a70196c3cc8" integrity sha512-+Pil1Nv3Umes4m3AZKqA2anfhJiVmNCYkPchwFJNEJN5QxmTs1uzyy4TvmDrCRNT2ApwSari7ZIgrPeUx4UZDg== -"@esbuild/linux-s390x@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.20.2.tgz#3c830c90f1a5d7dd1473d5595ea4ebb920988685" - integrity sha512-wcWISOobRWNm3cezm5HOZcYz1sKoHLd8VL1dl309DiixxVFoFe/o8HnwuIwn6sXre88Nwj+VwZUvJf4AFxkyrQ== +"@esbuild/linux-s390x@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/linux-s390x/-/linux-s390x-0.21.4.tgz#9cbd26854b5b12cf22fb54c96cd1adffaf6ace6f" + integrity sha512-Iqc/l/FFwtt8FoTK9riYv9zQNms7B8u+vAI/rxKuN10HgQIXaPzKZc479lZ0x6+vKVQbu55GdpYpeNWzjOhgbA== "@esbuild/linux-x64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.19.12.tgz#5f37cfdc705aea687dfe5dfbec086a05acfe9c78" integrity sha512-B71g1QpxfwBvNrfyJdVDexenDIt1CiDN1TIXLbhOw0KhJzE78KIFGX6OJ9MrtC0oOqMWf+0xop4qEU8JrJTwCg== -"@esbuild/linux-x64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.20.2.tgz#86eca35203afc0d9de0694c64ec0ab0a378f6fff" - integrity sha512-1MdwI6OOTsfQfek8sLwgyjOXAu+wKhLEoaOLTjbijk6E2WONYpH9ZU2mNtR+lZ2B4uwr+usqGuVfFT9tMtGvGw== +"@esbuild/linux-x64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/linux-x64/-/linux-x64-0.21.4.tgz#44dfe1c5cad855362c830c604dba97fbb16fc114" + integrity sha512-Td9jv782UMAFsuLZINfUpoF5mZIbAj+jv1YVtE58rFtfvoKRiKSkRGQfHTgKamLVT/fO7203bHa3wU122V/Bdg== "@esbuild/netbsd-x64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.19.12.tgz#29da566a75324e0d0dd7e47519ba2f7ef168657b" integrity sha512-3ltjQ7n1owJgFbuC61Oj++XhtzmymoCihNFgT84UAmJnxJfm4sYCiSLTXZtE00VWYpPMYc+ZQmB6xbSdVh0JWA== -"@esbuild/netbsd-x64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.20.2.tgz#e771c8eb0e0f6e1877ffd4220036b98aed5915e6" - integrity sha512-K8/DhBxcVQkzYc43yJXDSyjlFeHQJBiowJ0uVL6Tor3jGQfSGHNNJcWxNbOI8v5k82prYqzPuwkzHt3J1T1iZQ== +"@esbuild/netbsd-x64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/netbsd-x64/-/netbsd-x64-0.21.4.tgz#89b97d823e1cc4bf8c4e5dc8f76c8d6ceb1c87f3" + integrity sha512-Awn38oSXxsPMQxaV0Ipb7W/gxZtk5Tx3+W+rAPdZkyEhQ6968r9NvtkjhnhbEgWXYbgV+JEONJ6PcdBS+nlcpA== "@esbuild/openbsd-x64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.19.12.tgz#306c0acbdb5a99c95be98bdd1d47c916e7dc3ff0" integrity sha512-RbrfTB9SWsr0kWmb9srfF+L933uMDdu9BIzdA7os2t0TXhCRjrQyCeOt6wVxr79CKD4c+p+YhCj31HBkYcXebw== -"@esbuild/openbsd-x64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.20.2.tgz#9a795ae4b4e37e674f0f4d716f3e226dd7c39baf" - integrity sha512-eMpKlV0SThJmmJgiVyN9jTPJ2VBPquf6Kt/nAoo6DgHAoN57K15ZghiHaMvqjCye/uU4X5u3YSMgVBI1h3vKrQ== +"@esbuild/openbsd-x64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/openbsd-x64/-/openbsd-x64-0.21.4.tgz#080715bb4981c326364320d7b56835608e2bd98d" + integrity sha512-IsUmQeCY0aU374R82fxIPu6vkOybWIMc3hVGZ3ChRwL9hA1TwY+tS0lgFWV5+F1+1ssuvvXt3HFqe8roCip8Hg== "@esbuild/sunos-x64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.19.12.tgz#0933eaab9af8b9b2c930236f62aae3fc593faf30" integrity sha512-HKjJwRrW8uWtCQnQOz9qcU3mUZhTUQvi56Q8DPTLLB+DawoiQdjsYq+j+D3s9I8VFtDr+F9CjgXKKC4ss89IeA== -"@esbuild/sunos-x64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.20.2.tgz#7df23b61a497b8ac189def6e25a95673caedb03f" - integrity sha512-2UyFtRC6cXLyejf/YEld4Hajo7UHILetzE1vsRcGL3earZEW77JxrFjH4Ez2qaTiEfMgAXxfAZCm1fvM/G/o8w== +"@esbuild/sunos-x64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/sunos-x64/-/sunos-x64-0.21.4.tgz#8d838a8ac80e211536490108b72fb0091a811626" + integrity sha512-hsKhgZ4teLUaDA6FG/QIu2q0rI6I36tZVfM4DBZv3BG0mkMIdEnMbhc4xwLvLJSS22uWmaVkFkqWgIS0gPIm+A== "@esbuild/win32-arm64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.19.12.tgz#773bdbaa1971b36db2f6560088639ccd1e6773ae" integrity sha512-URgtR1dJnmGvX864pn1B2YUYNzjmXkuJOIqG2HdU62MVS4EHpU2946OZoTMnRUHklGtJdJZ33QfzdjGACXhn1A== -"@esbuild/win32-arm64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.20.2.tgz#f1ae5abf9ca052ae11c1bc806fb4c0f519bacf90" - integrity sha512-GRibxoawM9ZCnDxnP3usoUDO9vUkpAxIIZ6GQI+IlVmr5kP3zUq+l17xELTHMWTWzjxa2guPNyrpq1GWmPvcGQ== +"@esbuild/win32-arm64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/win32-arm64/-/win32-arm64-0.21.4.tgz#94afb4c2ac89b0f09791606d6d93fdab322f81c8" + integrity sha512-UUfMgMoXPoA/bvGUNfUBFLCh0gt9dxZYIx9W4rfJr7+hKe5jxxHmfOK8YSH4qsHLLN4Ck8JZ+v7Q5fIm1huErg== "@esbuild/win32-ia32@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.19.12.tgz#000516cad06354cc84a73f0943a4aa690ef6fd67" integrity sha512-+ZOE6pUkMOJfmxmBZElNOx72NKpIa/HFOMGzu8fqzQJ5kgf6aTGrcJaFsNiVMH4JKpMipyK+7k0n2UXN7a8YKQ== -"@esbuild/win32-ia32@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.20.2.tgz#241fe62c34d8e8461cd708277813e1d0ba55ce23" - integrity sha512-HfLOfn9YWmkSKRQqovpnITazdtquEW8/SoHW7pWpuEeguaZI4QnCRW6b+oZTztdBnZOS2hqJ6im/D5cPzBTTlQ== +"@esbuild/win32-ia32@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/win32-ia32/-/win32-ia32-0.21.4.tgz#822085cd52f2f1dd90eabb59346ffa779c0bab83" + integrity sha512-yIxbspZb5kGCAHWm8dexALQ9en1IYDfErzjSEq1KzXFniHv019VT3mNtTK7t8qdy4TwT6QYHI9sEZabONHg+aw== "@esbuild/win32-x64@0.19.12": version "0.19.12" resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.19.12.tgz#c57c8afbb4054a3ab8317591a0b7320360b444ae" integrity sha512-T1QyPSDCyMXaO3pzBkF96E8xMkiRYbUEZADd29SyPGabqxMViNoii+NcK7eWJAEoU6RZyEm5lVSIjTmcdoB9HA== -"@esbuild/win32-x64@0.20.2": - version "0.20.2" - resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.20.2.tgz#9c907b21e30a52db959ba4f80bb01a0cc403d5cc" - integrity sha512-N49X4lJX27+l9jbLKSqZ6bKNjzQvHaT8IIFUy+YIqmXQdjYCToGWwOItDrfby14c78aDd5NHQl29xingXfCdLQ== +"@esbuild/win32-x64@0.21.4": + version "0.21.4" + resolved "https://registry.yarnpkg.com/@esbuild/win32-x64/-/win32-x64-0.21.4.tgz#11ef0398f9abee161193461910a507ef0d4c0c32" + integrity sha512-sywLRD3UK/qRJt0oBwdpYLBibk7KiRfbswmWRDabuncQYSlf8aLEEUor/oP6KRz8KEG+HoiVLBhPRD5JWjS8Sg== "@eslint-community/eslint-utils@^4.2.0", "@eslint-community/eslint-utils@^4.4.0": version "4.4.0" @@ -1020,85 +1020,85 @@ estree-walker "^2.0.2" picomatch "^2.3.1" -"@rollup/rollup-android-arm-eabi@4.17.2": - version "4.17.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.17.2.tgz#1a32112822660ee104c5dd3a7c595e26100d4c2d" - integrity sha512-NM0jFxY8bB8QLkoKxIQeObCaDlJKewVlIEkuyYKm5An1tdVZ966w2+MPQ2l8LBZLjR+SgyV+nRkTIunzOYBMLQ== - -"@rollup/rollup-android-arm64@4.17.2": - version "4.17.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.17.2.tgz#5aeef206d65ff4db423f3a93f71af91b28662c5b" - integrity sha512-yeX/Usk7daNIVwkq2uGoq2BYJKZY1JfyLTaHO/jaiSwi/lsf8fTFoQW/n6IdAsx5tx+iotu2zCJwz8MxI6D/Bw== - -"@rollup/rollup-darwin-arm64@4.17.2": - version "4.17.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.17.2.tgz#6b66aaf003c70454c292cd5f0236ebdc6ffbdf1a" - integrity sha512-kcMLpE6uCwls023+kknm71ug7MZOrtXo+y5p/tsg6jltpDtgQY1Eq5sGfHcQfb+lfuKwhBmEURDga9N0ol4YPw== - -"@rollup/rollup-darwin-x64@4.17.2": - version "4.17.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.17.2.tgz#f64fc51ed12b19f883131ccbcea59fc68cbd6c0b" - integrity sha512-AtKwD0VEx0zWkL0ZjixEkp5tbNLzX+FCqGG1SvOu993HnSz4qDI6S4kGzubrEJAljpVkhRSlg5bzpV//E6ysTQ== - -"@rollup/rollup-linux-arm-gnueabihf@4.17.2": - version "4.17.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.17.2.tgz#1a7641111be67c10111f7122d1e375d1226cbf14" - integrity sha512-3reX2fUHqN7sffBNqmEyMQVj/CKhIHZd4y631duy0hZqI8Qoqf6lTtmAKvJFYa6bhU95B1D0WgzHkmTg33In0A== - -"@rollup/rollup-linux-arm-musleabihf@4.17.2": - version "4.17.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.17.2.tgz#c93fd632923e0fee25aacd2ae414288d0b7455bb" - integrity sha512-uSqpsp91mheRgw96xtyAGP9FW5ChctTFEoXP0r5FAzj/3ZRv3Uxjtc7taRQSaQM/q85KEKjKsZuiZM3GyUivRg== - -"@rollup/rollup-linux-arm64-gnu@4.17.2": - version "4.17.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.17.2.tgz#fa531425dd21d058a630947527b4612d9d0b4a4a" - integrity sha512-EMMPHkiCRtE8Wdk3Qhtciq6BndLtstqZIroHiiGzB3C5LDJmIZcSzVtLRbwuXuUft1Cnv+9fxuDtDxz3k3EW2A== - -"@rollup/rollup-linux-arm64-musl@4.17.2": - version "4.17.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.17.2.tgz#8acc16f095ceea5854caf7b07e73f7d1802ac5af" - integrity sha512-NMPylUUZ1i0z/xJUIx6VUhISZDRT+uTWpBcjdv0/zkp7b/bQDF+NfnfdzuTiB1G6HTodgoFa93hp0O1xl+/UbA== - -"@rollup/rollup-linux-powerpc64le-gnu@4.17.2": - version "4.17.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.17.2.tgz#94e69a8499b5cf368911b83a44bb230782aeb571" - integrity sha512-T19My13y8uYXPw/L/k0JYaX1fJKFT/PWdXiHr8mTbXWxjVF1t+8Xl31DgBBvEKclw+1b00Chg0hxE2O7bTG7GQ== - -"@rollup/rollup-linux-riscv64-gnu@4.17.2": - version "4.17.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.17.2.tgz#7ef1c781c7e59e85a6ce261cc95d7f1e0b56db0f" - integrity sha512-BOaNfthf3X3fOWAB+IJ9kxTgPmMqPPH5f5k2DcCsRrBIbWnaJCgX2ll77dV1TdSy9SaXTR5iDXRL8n7AnoP5cg== - -"@rollup/rollup-linux-s390x-gnu@4.17.2": - version "4.17.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.17.2.tgz#f15775841c3232fca9b78cd25a7a0512c694b354" - integrity sha512-W0UP/x7bnn3xN2eYMql2T/+wpASLE5SjObXILTMPUBDB/Fg/FxC+gX4nvCfPBCbNhz51C+HcqQp2qQ4u25ok6g== - -"@rollup/rollup-linux-x64-gnu@4.17.2": - version "4.17.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.17.2.tgz#b521d271798d037ad70c9f85dd97d25f8a52e811" - integrity sha512-Hy7pLwByUOuyaFC6mAr7m+oMC+V7qyifzs/nW2OJfC8H4hbCzOX07Ov0VFk/zP3kBsELWNFi7rJtgbKYsav9QQ== - -"@rollup/rollup-linux-x64-musl@4.17.2": - version "4.17.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.17.2.tgz#9254019cc4baac35800991315d133cc9fd1bf385" - integrity sha512-h1+yTWeYbRdAyJ/jMiVw0l6fOOm/0D1vNLui9iPuqgRGnXA0u21gAqOyB5iHjlM9MMfNOm9RHCQ7zLIzT0x11Q== - -"@rollup/rollup-win32-arm64-msvc@4.17.2": - version "4.17.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.17.2.tgz#27f65a89f6f52ee9426ec11e3571038e4671790f" - integrity sha512-tmdtXMfKAjy5+IQsVtDiCfqbynAQE/TQRpWdVataHmhMb9DCoJxp9vLcCBjEQWMiUYxO1QprH/HbY9ragCEFLA== - -"@rollup/rollup-win32-ia32-msvc@4.17.2": - version "4.17.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.17.2.tgz#a2fbf8246ed0bb014f078ca34ae6b377a90cb411" - integrity sha512-7II/QCSTAHuE5vdZaQEwJq2ZACkBpQDOmQsE6D6XUbnBHW8IAhm4eTufL6msLJorzrHDFv3CF8oCA/hSIRuZeQ== - -"@rollup/rollup-win32-x64-msvc@4.17.2": - version "4.17.2" - resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.17.2.tgz#5a2d08b81e8064b34242d5cc9973ef8dd1e60503" - integrity sha512-TGGO7v7qOq4CYmSBVEYpI1Y5xDuCEnbVC5Vth8mOsW0gDSzxNrVERPc790IGHsrT2dQSimgMr9Ub3Y1Jci5/8w== +"@rollup/rollup-android-arm-eabi@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm-eabi/-/rollup-android-arm-eabi-4.18.0.tgz#bbd0e616b2078cd2d68afc9824d1fadb2f2ffd27" + integrity sha512-Tya6xypR10giZV1XzxmH5wr25VcZSncG0pZIjfePT0OVBvqNEurzValetGNarVrGiq66EBVAFn15iYX4w6FKgQ== + +"@rollup/rollup-android-arm64@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-android-arm64/-/rollup-android-arm64-4.18.0.tgz#97255ef6384c5f73f4800c0de91f5f6518e21203" + integrity sha512-avCea0RAP03lTsDhEyfy+hpfr85KfyTctMADqHVhLAF3MlIkq83CP8UfAHUssgXTYd+6er6PaAhx/QGv4L1EiA== + +"@rollup/rollup-darwin-arm64@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-arm64/-/rollup-darwin-arm64-4.18.0.tgz#b6dd74e117510dfe94541646067b0545b42ff096" + integrity sha512-IWfdwU7KDSm07Ty0PuA/W2JYoZ4iTj3TUQjkVsO/6U+4I1jN5lcR71ZEvRh52sDOERdnNhhHU57UITXz5jC1/w== + +"@rollup/rollup-darwin-x64@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-darwin-x64/-/rollup-darwin-x64-4.18.0.tgz#e07d76de1cec987673e7f3d48ccb8e106d42c05c" + integrity sha512-n2LMsUz7Ynu7DoQrSQkBf8iNrjOGyPLrdSg802vk6XT3FtsgX6JbE8IHRvposskFm9SNxzkLYGSq9QdpLYpRNA== + +"@rollup/rollup-linux-arm-gnueabihf@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-gnueabihf/-/rollup-linux-arm-gnueabihf-4.18.0.tgz#9f1a6d218b560c9d75185af4b8bb42f9f24736b8" + integrity sha512-C/zbRYRXFjWvz9Z4haRxcTdnkPt1BtCkz+7RtBSuNmKzMzp3ZxdM28Mpccn6pt28/UWUCTXa+b0Mx1k3g6NOMA== + +"@rollup/rollup-linux-arm-musleabihf@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm-musleabihf/-/rollup-linux-arm-musleabihf-4.18.0.tgz#53618b92e6ffb642c7b620e6e528446511330549" + integrity sha512-l3m9ewPgjQSXrUMHg93vt0hYCGnrMOcUpTz6FLtbwljo2HluS4zTXFy2571YQbisTnfTKPZ01u/ukJdQTLGh9A== + +"@rollup/rollup-linux-arm64-gnu@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-gnu/-/rollup-linux-arm64-gnu-4.18.0.tgz#99a7ba5e719d4f053761a698f7b52291cefba577" + integrity sha512-rJ5D47d8WD7J+7STKdCUAgmQk49xuFrRi9pZkWoRD1UeSMakbcepWXPF8ycChBoAqs1pb2wzvbY6Q33WmN2ftw== + +"@rollup/rollup-linux-arm64-musl@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-arm64-musl/-/rollup-linux-arm64-musl-4.18.0.tgz#f53db99a45d9bc00ce94db8a35efa7c3c144a58c" + integrity sha512-be6Yx37b24ZwxQ+wOQXXLZqpq4jTckJhtGlWGZs68TgdKXJgw54lUUoFYrg6Zs/kjzAQwEwYbp8JxZVzZLRepQ== + +"@rollup/rollup-linux-powerpc64le-gnu@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-powerpc64le-gnu/-/rollup-linux-powerpc64le-gnu-4.18.0.tgz#cbb0837408fe081ce3435cf3730e090febafc9bf" + integrity sha512-hNVMQK+qrA9Todu9+wqrXOHxFiD5YmdEi3paj6vP02Kx1hjd2LLYR2eaN7DsEshg09+9uzWi2W18MJDlG0cxJA== + +"@rollup/rollup-linux-riscv64-gnu@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-riscv64-gnu/-/rollup-linux-riscv64-gnu-4.18.0.tgz#8ed09c1d1262ada4c38d791a28ae0fea28b80cc9" + integrity sha512-ROCM7i+m1NfdrsmvwSzoxp9HFtmKGHEqu5NNDiZWQtXLA8S5HBCkVvKAxJ8U+CVctHwV2Gb5VUaK7UAkzhDjlg== + +"@rollup/rollup-linux-s390x-gnu@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-s390x-gnu/-/rollup-linux-s390x-gnu-4.18.0.tgz#938138d3c8e0c96f022252a28441dcfb17afd7ec" + integrity sha512-0UyyRHyDN42QL+NbqevXIIUnKA47A+45WyasO+y2bGJ1mhQrfrtXUpTxCOrfxCR4esV3/RLYyucGVPiUsO8xjg== + +"@rollup/rollup-linux-x64-gnu@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-gnu/-/rollup-linux-x64-gnu-4.18.0.tgz#1a7481137a54740bee1ded4ae5752450f155d942" + integrity sha512-xuglR2rBVHA5UsI8h8UbX4VJ470PtGCf5Vpswh7p2ukaqBGFTnsfzxUBetoWBWymHMxbIG0Cmx7Y9qDZzr648w== + +"@rollup/rollup-linux-x64-musl@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-linux-x64-musl/-/rollup-linux-x64-musl-4.18.0.tgz#f1186afc601ac4f4fc25fac4ca15ecbee3a1874d" + integrity sha512-LKaqQL9osY/ir2geuLVvRRs+utWUNilzdE90TpyoX0eNqPzWjRm14oMEE+YLve4k/NAqCdPkGYDaDF5Sw+xBfg== + +"@rollup/rollup-win32-arm64-msvc@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-arm64-msvc/-/rollup-win32-arm64-msvc-4.18.0.tgz#ed6603e93636a96203c6915be4117245c1bd2daf" + integrity sha512-7J6TkZQFGo9qBKH0pk2cEVSRhJbL6MtfWxth7Y5YmZs57Pi+4x6c2dStAUvaQkHQLnEQv1jzBUW43GvZW8OFqA== + +"@rollup/rollup-win32-ia32-msvc@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-ia32-msvc/-/rollup-win32-ia32-msvc-4.18.0.tgz#14e0b404b1c25ebe6157a15edb9c46959ba74c54" + integrity sha512-Txjh+IxBPbkUB9+SXZMpv+b/vnTEtFyfWZgJ6iyCmt2tdx0OF5WhFowLmnh8ENGNpfUlUZkdI//4IEmhwPieNg== + +"@rollup/rollup-win32-x64-msvc@4.18.0": + version "4.18.0" + resolved "https://registry.yarnpkg.com/@rollup/rollup-win32-x64-msvc/-/rollup-win32-x64-msvc-4.18.0.tgz#5d694d345ce36b6ecf657349e03eb87297e68da4" + integrity sha512-UOo5FdvOL0+eIVTgS4tIdbW+TtnBLWg1YBCcU2KWM7nuNwRz9bksDX1bekJJCpu25N1DVWaCwnT39dVQxzqS8g== "@rollup/stream@3.0.1": version "3.0.1" @@ -1124,91 +1124,91 @@ dependencies: "@sinonjs/commons" "^3.0.0" -"@swc/core-darwin-arm64@1.4.17": - version "1.4.17" - resolved "https://registry.yarnpkg.com/@swc/core-darwin-arm64/-/core-darwin-arm64-1.4.17.tgz#e62fa7f247bdd1c0c50a3f99722da4dd098c7c67" - integrity sha512-HVl+W4LezoqHBAYg2JCqR+s9ife9yPfgWSj37iIawLWzOmuuJ7jVdIB7Ee2B75bEisSEKyxRlTl6Y1Oq3owBgw== - -"@swc/core-darwin-x64@1.4.17": - version "1.4.17" - resolved "https://registry.yarnpkg.com/@swc/core-darwin-x64/-/core-darwin-x64-1.4.17.tgz#1145cbb7575e317204ed3a7d0274bd26fe9ffab6" - integrity sha512-WYRO9Fdzq4S/he8zjW5I95G1zcvyd9yyD3Tgi4/ic84P5XDlSMpBDpBLbr/dCPjmSg7aUXxNQqKqGkl6dQxYlA== - -"@swc/core-linux-arm-gnueabihf@1.4.17": - version "1.4.17" - resolved "https://registry.yarnpkg.com/@swc/core-linux-arm-gnueabihf/-/core-linux-arm-gnueabihf-1.4.17.tgz#7145b3ada5cf9b748eaacbc9a7c7037ba0fb26bb" - integrity sha512-cgbvpWOvtMH0XFjvwppUCR+Y+nf6QPaGu6AQ5hqCP+5Lv2zO5PG0RfasC4zBIjF53xgwEaaWmGP5/361P30X8Q== - -"@swc/core-linux-arm64-gnu@1.4.17": - version "1.4.17" - resolved "https://registry.yarnpkg.com/@swc/core-linux-arm64-gnu/-/core-linux-arm64-gnu-1.4.17.tgz#5c0833ef132af17bd3cbdf2253f35b57c0cf62bb" - integrity sha512-l7zHgaIY24cF9dyQ/FOWbmZDsEj2a9gRFbmgx2u19e3FzOPuOnaopFj0fRYXXKCmtdx+anD750iBIYnTR+pq/Q== - -"@swc/core-linux-arm64-musl@1.4.17": - version "1.4.17" - resolved "https://registry.yarnpkg.com/@swc/core-linux-arm64-musl/-/core-linux-arm64-musl-1.4.17.tgz#5bfe81eb23c905f04b669a7d2b060a147a263483" - integrity sha512-qhH4gr9gAlVk8MBtzXbzTP3BJyqbAfUOATGkyUtohh85fPXQYuzVlbExix3FZXTwFHNidGHY8C+ocscI7uDaYw== - -"@swc/core-linux-x64-gnu@1.4.17": - version "1.4.17" - resolved "https://registry.yarnpkg.com/@swc/core-linux-x64-gnu/-/core-linux-x64-gnu-1.4.17.tgz#a0c19bc9635e86ebd1c7f8e9e026503d1a1bf83d" - integrity sha512-vRDFATL1oN5oZMImkwbgSHEkp8xG1ofEASBypze01W1Tqto8t+yo6gsp69wzCZBlxldsvPpvFZW55Jq0Rn+UnA== - -"@swc/core-linux-x64-musl@1.4.17": - version "1.4.17" - resolved "https://registry.yarnpkg.com/@swc/core-linux-x64-musl/-/core-linux-x64-musl-1.4.17.tgz#2179b9536235a3b02a46997ddb1c178dfadf1667" - integrity sha512-zQNPXAXn3nmPqv54JVEN8k2JMEcMTQ6veVuU0p5O+A7KscJq+AGle/7ZQXzpXSfUCXlLMX4wvd+rwfGhh3J4cw== - -"@swc/core-win32-arm64-msvc@1.4.17": - version "1.4.17" - resolved "https://registry.yarnpkg.com/@swc/core-win32-arm64-msvc/-/core-win32-arm64-msvc-1.4.17.tgz#3004a431c836c6b16b4660ea2425dde467a8ee36" - integrity sha512-z86n7EhOwyzxwm+DLE5NoLkxCTme2lq7QZlDjbQyfCxOt6isWz8rkW5QowTX8w9Rdmk34ncrjSLvnHOeLY17+w== - -"@swc/core-win32-ia32-msvc@1.4.17": - version "1.4.17" - resolved "https://registry.yarnpkg.com/@swc/core-win32-ia32-msvc/-/core-win32-ia32-msvc-1.4.17.tgz#59155485d5307fb2a267e5acb215e0f440b6f48f" - integrity sha512-JBwuSTJIgiJJX6wtr4wmXbfvOswHFj223AumUrK544QV69k60FJ9q2adPW9Csk+a8wm1hLxq4HKa2K334UHJ/g== - -"@swc/core-win32-x64-msvc@1.4.17": - version "1.4.17" - resolved "https://registry.yarnpkg.com/@swc/core-win32-x64-msvc/-/core-win32-x64-msvc-1.4.17.tgz#b98f25fc277fb0e319f25f9fd00a82023662716b" - integrity sha512-jFkOnGQamtVDBm3MF5Kq1lgW8vx4Rm1UvJWRUfg+0gx7Uc3Jp3QMFeMNw/rDNQYRDYPG3yunCC+2463ycd5+dg== - -"@swc/core@1.4.17": - version "1.4.17" - resolved "https://registry.yarnpkg.com/@swc/core/-/core-1.4.17.tgz#3ea4180fa5c54282b284006a6de1263ef1cf887f" - integrity sha512-tq+mdWvodMBNBBZbwFIMTVGYHe9N7zvEaycVVjfvAx20k1XozHbHhRv+9pEVFJjwRxLdXmtvFZd3QZHRAOpoNQ== - dependencies: - "@swc/counter" "^0.1.2" - "@swc/types" "^0.1.5" +"@swc/core-darwin-arm64@1.5.24": + version "1.5.24" + resolved "https://registry.yarnpkg.com/@swc/core-darwin-arm64/-/core-darwin-arm64-1.5.24.tgz#71875695bc617e57c2d93352f48317b4c41e0240" + integrity sha512-M7oLOcC0sw+UTyAuL/9uyB9GeO4ZpaBbH76JSH6g1m0/yg7LYJZGRmplhDmwVSDAR5Fq4Sjoi1CksmmGkgihGA== + +"@swc/core-darwin-x64@1.5.24": + version "1.5.24" + resolved "https://registry.yarnpkg.com/@swc/core-darwin-x64/-/core-darwin-x64-1.5.24.tgz#6b4c3eb9b21ab50b7324a82c9497ffeb2e8e0a57" + integrity sha512-MfcFjGGYognpSBSos2pYUNYJSmqEhuw5ceGr6qAdME7ddbjGXliza4W6FggsM+JnWwpqa31+e7/R+GetW4WkaQ== + +"@swc/core-linux-arm-gnueabihf@1.5.24": + version "1.5.24" + resolved "https://registry.yarnpkg.com/@swc/core-linux-arm-gnueabihf/-/core-linux-arm-gnueabihf-1.5.24.tgz#5730ed6ad86afe4ee8df04ee6f21430daead186c" + integrity sha512-amI2pwtcWV3E/m/nf+AQtn1LWDzKLZyjCmWd3ms7QjEueWYrY8cU1Y4Wp7wNNsxIoPOi8zek1Uj2wwFD/pttNQ== + +"@swc/core-linux-arm64-gnu@1.5.24": + version "1.5.24" + resolved "https://registry.yarnpkg.com/@swc/core-linux-arm64-gnu/-/core-linux-arm64-gnu-1.5.24.tgz#0a2478e8601391aa88f82bfece1dbc60d27cbcfd" + integrity sha512-sTSvmqMmgT1ynH/nP75Pc51s+iT4crZagHBiDOf5cq+kudUYjda9lWMs7xkXB/TUKFHPCRK0HGunl8bkwiIbuw== + +"@swc/core-linux-arm64-musl@1.5.24": + version "1.5.24" + resolved "https://registry.yarnpkg.com/@swc/core-linux-arm64-musl/-/core-linux-arm64-musl-1.5.24.tgz#e0199092dc611ca75f8a92dcea17de44e38f3fbf" + integrity sha512-vd2/hfOBGbrX21FxsFdXCUaffjkHvlZkeE2UMRajdXifwv79jqOHIJg3jXG1F3ZrhCghCzirFts4tAZgcG8XWg== + +"@swc/core-linux-x64-gnu@1.5.24": + version "1.5.24" + resolved "https://registry.yarnpkg.com/@swc/core-linux-x64-gnu/-/core-linux-x64-gnu-1.5.24.tgz#1fe347c9f28457c593f2fda5b0d4904a2b105ecd" + integrity sha512-Zrdzi7NqzQxm2BvAG5KyOSBEggQ7ayrxh599AqqevJmsUXJ8o2nMiWQOBvgCGp7ye+Biz3pvZn1EnRzAp+TpUg== + +"@swc/core-linux-x64-musl@1.5.24": + version "1.5.24" + resolved "https://registry.yarnpkg.com/@swc/core-linux-x64-musl/-/core-linux-x64-musl-1.5.24.tgz#bf6ac583fac211d704d2d78cfd0b7bf751268f5e" + integrity sha512-1F8z9NRi52jdZQCGc5sflwYSctL6omxiVmIFVp8TC9nngjQKc00TtX/JC2Eo2HwvgupkFVl5YQJidAck9YtmJw== + +"@swc/core-win32-arm64-msvc@1.5.24": + version "1.5.24" + resolved "https://registry.yarnpkg.com/@swc/core-win32-arm64-msvc/-/core-win32-arm64-msvc-1.5.24.tgz#41b9faf4db69cc08a43c3a176df2a7b94d765637" + integrity sha512-cKpP7KvS6Xr0jFSTBXY53HZX/YfomK5EMQYpCVDOvfsZeYHN20sQSKXfpVLvA/q2igVt1zzy1XJcOhpJcgiKLg== + +"@swc/core-win32-ia32-msvc@1.5.24": + version "1.5.24" + resolved "https://registry.yarnpkg.com/@swc/core-win32-ia32-msvc/-/core-win32-ia32-msvc-1.5.24.tgz#e123ad00e3b28d567d3851a86697fb3c54ed817a" + integrity sha512-IoPWfi0iwqjZuf7gE223+B97/ZwkKbu7qL5KzGP7g3hJrGSKAvv7eC5Y9r2iKKtLKyv5R/T6Ho0kFR/usi7rHw== + +"@swc/core-win32-x64-msvc@1.5.24": + version "1.5.24" + resolved "https://registry.yarnpkg.com/@swc/core-win32-x64-msvc/-/core-win32-x64-msvc-1.5.24.tgz#21fb87b1981253039e6d45255e31a875f446e397" + integrity sha512-zHgF2k1uVJL8KIW+PnVz1To4a3Cz9THbh2z2lbehaF/gKHugH4c3djBozU4das1v35KOqf5jWIEviBLql2wDLQ== + +"@swc/core@1.5.24": + version "1.5.24" + resolved "https://registry.yarnpkg.com/@swc/core/-/core-1.5.24.tgz#9ecb4601cb6a4fb19f227ec5fb59d07e23347dca" + integrity sha512-Eph9zvO4xvqWZGVzTdtdEJ0Vqf0VIML/o/e4Qd2RLOqtfgnlRi7avmMu5C0oqciJ0tk+hqdUKVUZ4JPoPaiGvQ== + dependencies: + "@swc/counter" "^0.1.3" + "@swc/types" "^0.1.7" optionalDependencies: - "@swc/core-darwin-arm64" "1.4.17" - "@swc/core-darwin-x64" "1.4.17" - "@swc/core-linux-arm-gnueabihf" "1.4.17" - "@swc/core-linux-arm64-gnu" "1.4.17" - "@swc/core-linux-arm64-musl" "1.4.17" - "@swc/core-linux-x64-gnu" "1.4.17" - "@swc/core-linux-x64-musl" "1.4.17" - "@swc/core-win32-arm64-msvc" "1.4.17" - "@swc/core-win32-ia32-msvc" "1.4.17" - "@swc/core-win32-x64-msvc" "1.4.17" - -"@swc/counter@^0.1.2", "@swc/counter@^0.1.3": + "@swc/core-darwin-arm64" "1.5.24" + "@swc/core-darwin-x64" "1.5.24" + "@swc/core-linux-arm-gnueabihf" "1.5.24" + "@swc/core-linux-arm64-gnu" "1.5.24" + "@swc/core-linux-arm64-musl" "1.5.24" + "@swc/core-linux-x64-gnu" "1.5.24" + "@swc/core-linux-x64-musl" "1.5.24" + "@swc/core-win32-arm64-msvc" "1.5.24" + "@swc/core-win32-ia32-msvc" "1.5.24" + "@swc/core-win32-x64-msvc" "1.5.24" + +"@swc/counter@^0.1.3": version "0.1.3" resolved "https://registry.yarnpkg.com/@swc/counter/-/counter-0.1.3.tgz#cc7463bd02949611c6329596fccd2b0ec782b0e9" integrity sha512-e2BR4lsJkkRlKZ/qCHPw9ZaSxc0MVUd7gtbtaB7aMvHeJVYe8sOB8DBZkP2DtISHGSku9sCK6T6cnY0CtXrOCQ== -"@swc/helpers@^0.5.10": +"@swc/helpers@^0.5.11": version "0.5.11" resolved "https://registry.yarnpkg.com/@swc/helpers/-/helpers-0.5.11.tgz#5bab8c660a6e23c13b2d23fcd1ee44a2db1b0cb7" integrity sha512-YNlnKRWF2sVojTpIyzwou9XoTNbzbzONwRhOoniEioF1AtaitTvVZblaQRrAzChWQ1bLYyYSWzM18y4WwgzJ+A== dependencies: tslib "^2.4.0" -"@swc/types@^0.1.5": - version "0.1.6" - resolved "https://registry.yarnpkg.com/@swc/types/-/types-0.1.6.tgz#2f13f748995b247d146de2784d3eb7195410faba" - integrity sha512-/JLo/l2JsT/LRd80C3HfbmVpxOAJ11FO2RCEslFrgzLltoP9j8XIbsyDcfCt2WWyX+CM96rBoNM+IToAkFOugg== +"@swc/types@^0.1.7": + version "0.1.7" + resolved "https://registry.yarnpkg.com/@swc/types/-/types-0.1.7.tgz#ea5d658cf460abff51507ca8d26e2d391bafb15e" + integrity sha512-scHWahbHF0eyj3JsxG9CFJgFdFNaVQCNAimBlT6PzS3n/HptxqREjsm4OH6AN3lYcffZYSPxXW8ua2BEHp0lJQ== dependencies: "@swc/counter" "^0.1.3" @@ -1348,7 +1348,7 @@ expect "^29.0.0" pretty-format "^29.0.0" -"@types/json-schema@*", "@types/json-schema@^7.0.12", "@types/json-schema@^7.0.15", "@types/json-schema@^7.0.8": +"@types/json-schema@*", "@types/json-schema@^7.0.8": version "7.0.15" resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.15.tgz#596a1747233694d50f6ad8a7869fcb6f56cf5841" integrity sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA== @@ -1370,17 +1370,19 @@ dependencies: "@types/node" "*" -"@types/node@*", "@types/node@^20.12.7": +"@types/node@*": version "20.12.8" resolved "https://registry.yarnpkg.com/@types/node/-/node-20.12.8.tgz#35897bf2bfe3469847ab04634636de09552e8256" integrity sha512-NU0rJLJnshZWdE/097cdCBbyW1h4hEg0xpovcoAQYHl8dnEyp/NAOiE45pvc+Bd1Dt+2r94v2eGFpQJ4R7g+2w== dependencies: undici-types "~5.26.4" -"@types/node@^13.7.4": - version "13.13.52" - resolved "https://registry.yarnpkg.com/@types/node/-/node-13.13.52.tgz#03c13be70b9031baaed79481c0c0cfb0045e53f7" - integrity sha512-s3nugnZumCC//n4moGGe6tkNMyYEdaDBitVjwPxXmR5lnMG5dHePinH2EdxkG3Rh1ghFHHixAG4NJhpJW1rthQ== +"@types/node@>=13.7.4", "@types/node@^20.13.0": + version "20.13.0" + resolved "https://registry.yarnpkg.com/@types/node/-/node-20.13.0.tgz#011a76bc1e71ae9a026dddcfd7039084f752c4b6" + integrity sha512-FM6AOb3khNkNIXPnHFDYaHerSv8uN22C91z098AnGccVu+Pcdhi+pNUFDi0iLmPIsVE0JBD0KVS7mzUYt4nRzQ== + dependencies: + undici-types "~5.26.4" "@types/normalize-package-data@^2.4.0": version "2.4.4" @@ -1392,11 +1394,6 @@ resolved "https://registry.yarnpkg.com/@types/resolve/-/resolve-1.20.2.tgz#97d26e00cd4a0423b4af620abecf3e6f442b7975" integrity sha512-60BCwRFOZCQhDncwQdxxeOEEkbc5dIMccYLwbxsS4TUNeVECQ/pBJ0j09mrHOl/JJvpRPGwO9SvE4nR2Nb/a4Q== -"@types/semver@^7.5.0", "@types/semver@^7.5.8": - version "7.5.8" - resolved "https://registry.yarnpkg.com/@types/semver/-/semver-7.5.8.tgz#8268a8c57a3e4abd25c165ecd36237db7948a55e" - integrity sha512-I8EUhyrgfLrcTkzV3TSsGyl1tSuPrEDzr0yd5m90UgNxQkyDXULk3b6MlQqTCpZpNtWe1K0hzclnZkTcLBe2UQ== - "@types/stack-utils@^2.0.0": version "2.0.3" resolved "https://registry.yarnpkg.com/@types/stack-utils/-/stack-utils-2.0.3.tgz#6209321eb2c1712a7e7466422b8cb1fc0d9dd5d8" @@ -1429,91 +1426,62 @@ dependencies: "@types/yargs-parser" "*" -"@typescript-eslint/eslint-plugin@7.8.0": - version "7.8.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-7.8.0.tgz#c78e309fe967cb4de05b85cdc876fb95f8e01b6f" - integrity sha512-gFTT+ezJmkwutUPmB0skOj3GZJtlEGnlssems4AjkVweUPGj7jRwwqg0Hhg7++kPGJqKtTYx+R05Ftww372aIg== +"@typescript-eslint/eslint-plugin@7.11.0": + version "7.11.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-7.11.0.tgz#f90f0914657ead08e1c75f66939c926edeab42dd" + integrity sha512-P+qEahbgeHW4JQ/87FuItjBj8O3MYv5gELDzr8QaQ7fsll1gSMTYb6j87MYyxwf3DtD7uGFB9ShwgmCJB5KmaQ== dependencies: "@eslint-community/regexpp" "^4.10.0" - "@typescript-eslint/scope-manager" "7.8.0" - "@typescript-eslint/type-utils" "7.8.0" - "@typescript-eslint/utils" "7.8.0" - "@typescript-eslint/visitor-keys" "7.8.0" - debug "^4.3.4" + "@typescript-eslint/scope-manager" "7.11.0" + "@typescript-eslint/type-utils" "7.11.0" + "@typescript-eslint/utils" "7.11.0" + "@typescript-eslint/visitor-keys" "7.11.0" graphemer "^1.4.0" ignore "^5.3.1" natural-compare "^1.4.0" - semver "^7.6.0" ts-api-utils "^1.3.0" -"@typescript-eslint/parser@7.8.0": - version "7.8.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-7.8.0.tgz#1e1db30c8ab832caffee5f37e677dbcb9357ddc8" - integrity sha512-KgKQly1pv0l4ltcftP59uQZCi4HUYswCLbTqVZEJu7uLX8CTLyswqMLqLN+2QFz4jCptqWVV4SB7vdxcH2+0kQ== +"@typescript-eslint/parser@7.11.0": + version "7.11.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-7.11.0.tgz#525ad8bee54a8f015f134edd241d91b84ab64839" + integrity sha512-yimw99teuaXVWsBcPO1Ais02kwJ1jmNA1KxE7ng0aT7ndr1pT1wqj0OJnsYVGKKlc4QJai86l/025L6z8CljOg== dependencies: - "@typescript-eslint/scope-manager" "7.8.0" - "@typescript-eslint/types" "7.8.0" - "@typescript-eslint/typescript-estree" "7.8.0" - "@typescript-eslint/visitor-keys" "7.8.0" + "@typescript-eslint/scope-manager" "7.11.0" + "@typescript-eslint/types" "7.11.0" + "@typescript-eslint/typescript-estree" "7.11.0" + "@typescript-eslint/visitor-keys" "7.11.0" debug "^4.3.4" -"@typescript-eslint/scope-manager@6.21.0": - version "6.21.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-6.21.0.tgz#ea8a9bfc8f1504a6ac5d59a6df308d3a0630a2b1" - integrity sha512-OwLUIWZJry80O99zvqXVEioyniJMa+d2GrqpUTqi5/v5D5rOrppJVBPa0yKCblcigC0/aYAzxxqQ1B+DS2RYsg== - dependencies: - "@typescript-eslint/types" "6.21.0" - "@typescript-eslint/visitor-keys" "6.21.0" - -"@typescript-eslint/scope-manager@7.8.0": - version "7.8.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-7.8.0.tgz#bb19096d11ec6b87fb6640d921df19b813e02047" - integrity sha512-viEmZ1LmwsGcnr85gIq+FCYI7nO90DVbE37/ll51hjv9aG+YZMb4WDE2fyWpUR4O/UrhGRpYXK/XajcGTk2B8g== +"@typescript-eslint/scope-manager@7.11.0": + version "7.11.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-7.11.0.tgz#cf5619b01de62a226a59add15a02bde457335d1d" + integrity sha512-27tGdVEiutD4POirLZX4YzT180vevUURJl4wJGmm6TrQoiYwuxTIY98PBp6L2oN+JQxzE0URvYlzJaBHIekXAw== dependencies: - "@typescript-eslint/types" "7.8.0" - "@typescript-eslint/visitor-keys" "7.8.0" + "@typescript-eslint/types" "7.11.0" + "@typescript-eslint/visitor-keys" "7.11.0" -"@typescript-eslint/type-utils@7.8.0": - version "7.8.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/type-utils/-/type-utils-7.8.0.tgz#9de166f182a6e4d1c5da76e94880e91831e3e26f" - integrity sha512-H70R3AefQDQpz9mGv13Uhi121FNMh+WEaRqcXTX09YEDky21km4dV1ZXJIp8QjXc4ZaVkXVdohvWDzbnbHDS+A== +"@typescript-eslint/type-utils@7.11.0": + version "7.11.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/type-utils/-/type-utils-7.11.0.tgz#ac216697d649084fedf4a910347b9642bd0ff099" + integrity sha512-WmppUEgYy+y1NTseNMJ6mCFxt03/7jTOy08bcg7bxJJdsM4nuhnchyBbE8vryveaJUf62noH7LodPSo5Z0WUCg== dependencies: - "@typescript-eslint/typescript-estree" "7.8.0" - "@typescript-eslint/utils" "7.8.0" + "@typescript-eslint/typescript-estree" "7.11.0" + "@typescript-eslint/utils" "7.11.0" debug "^4.3.4" ts-api-utils "^1.3.0" -"@typescript-eslint/types@6.21.0": - version "6.21.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-6.21.0.tgz#205724c5123a8fef7ecd195075fa6e85bac3436d" - integrity sha512-1kFmZ1rOm5epu9NZEZm1kckCDGj5UJEf7P1kliH4LKu/RkwpsfqqGmY2OOcUs18lSlQBKLDYBOGxRVtrMN5lpg== - -"@typescript-eslint/types@7.8.0": - version "7.8.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.8.0.tgz#1fd2577b3ad883b769546e2d1ef379f929a7091d" - integrity sha512-wf0peJ+ZGlcH+2ZS23aJbOv+ztjeeP8uQ9GgwMJGVLx/Nj9CJt17GWgWWoSmoRVKAX2X+7fzEnAjxdvK2gqCLw== +"@typescript-eslint/types@7.11.0": + version "7.11.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.11.0.tgz#5e9702a5e8b424b7fc690e338d359939257d6722" + integrity sha512-MPEsDRZTyCiXkD4vd3zywDCifi7tatc4K37KqTprCvaXptP7Xlpdw0NR2hRJTetG5TxbWDB79Ys4kLmHliEo/w== -"@typescript-eslint/typescript-estree@6.21.0": - version "6.21.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-6.21.0.tgz#c47ae7901db3b8bddc3ecd73daff2d0895688c46" - integrity sha512-6npJTkZcO+y2/kr+z0hc4HwNfrrP4kNYh57ek7yCNlrBjWQ1Y0OS7jiZTkgumrvkX5HkEKXFZkkdFNkaW2wmUQ== +"@typescript-eslint/typescript-estree@7.11.0": + version "7.11.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-7.11.0.tgz#7cbc569bc7336c3a494ceaf8204fdee5d5dbb7fa" + integrity sha512-cxkhZ2C/iyi3/6U9EPc5y+a6csqHItndvN/CzbNXTNrsC3/ASoYQZEt9uMaEp+xFNjasqQyszp5TumAVKKvJeQ== dependencies: - "@typescript-eslint/types" "6.21.0" - "@typescript-eslint/visitor-keys" "6.21.0" - debug "^4.3.4" - globby "^11.1.0" - is-glob "^4.0.3" - minimatch "9.0.3" - semver "^7.5.4" - ts-api-utils "^1.0.1" - -"@typescript-eslint/typescript-estree@7.8.0": - version "7.8.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-7.8.0.tgz#b028a9226860b66e623c1ee55cc2464b95d2987c" - integrity sha512-5pfUCOwK5yjPaJQNy44prjCwtr981dO8Qo9J9PwYXZ0MosgAbfEMB008dJ5sNo3+/BN6ytBPuSvXUg9SAqB0dg== - dependencies: - "@typescript-eslint/types" "7.8.0" - "@typescript-eslint/visitor-keys" "7.8.0" + "@typescript-eslint/types" "7.11.0" + "@typescript-eslint/visitor-keys" "7.11.0" debug "^4.3.4" globby "^11.1.0" is-glob "^4.0.3" @@ -1521,46 +1489,22 @@ semver "^7.6.0" ts-api-utils "^1.3.0" -"@typescript-eslint/utils@7.8.0": - version "7.8.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-7.8.0.tgz#57a79f9c0c0740ead2f622e444cfaeeb9fd047cd" - integrity sha512-L0yFqOCflVqXxiZyXrDr80lnahQfSOfc9ELAAZ75sqicqp2i36kEZZGuUymHNFoYOqxRT05up760b4iGsl02nQ== +"@typescript-eslint/utils@7.11.0", "@typescript-eslint/utils@^6.0.0 || ^7.0.0": + version "7.11.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-7.11.0.tgz#524f047f2209959424c3ef689b0d83b3bc09919c" + integrity sha512-xlAWwPleNRHwF37AhrZurOxA1wyXowW4PqVXZVUNCLjB48CqdPJoJWkrpH2nij9Q3Lb7rtWindtoXwxjxlKKCA== dependencies: "@eslint-community/eslint-utils" "^4.4.0" - "@types/json-schema" "^7.0.15" - "@types/semver" "^7.5.8" - "@typescript-eslint/scope-manager" "7.8.0" - "@typescript-eslint/types" "7.8.0" - "@typescript-eslint/typescript-estree" "7.8.0" - semver "^7.6.0" - -"@typescript-eslint/utils@^6.0.0": - version "6.21.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-6.21.0.tgz#4714e7a6b39e773c1c8e97ec587f520840cd8134" - integrity sha512-NfWVaC8HP9T8cbKQxHcsJBY5YE1O33+jpMwN45qzWWaPDZgLIbo12toGMWnmhvCpd3sIxkpDw3Wv1B3dYrbDQQ== - dependencies: - "@eslint-community/eslint-utils" "^4.4.0" - "@types/json-schema" "^7.0.12" - "@types/semver" "^7.5.0" - "@typescript-eslint/scope-manager" "6.21.0" - "@typescript-eslint/types" "6.21.0" - "@typescript-eslint/typescript-estree" "6.21.0" - semver "^7.5.4" + "@typescript-eslint/scope-manager" "7.11.0" + "@typescript-eslint/types" "7.11.0" + "@typescript-eslint/typescript-estree" "7.11.0" -"@typescript-eslint/visitor-keys@6.21.0": - version "6.21.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-6.21.0.tgz#87a99d077aa507e20e238b11d56cc26ade45fe47" - integrity sha512-JJtkDduxLi9bivAB+cYOVMtbkqdPOhZ+ZI5LC47MIRrDV4Yn2o+ZnW10Nkmr28xRpSpdJ6Sm42Hjf2+REYXm0A== +"@typescript-eslint/visitor-keys@7.11.0": + version "7.11.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-7.11.0.tgz#2c50cd292e67645eec05ac0830757071b4a4d597" + integrity sha512-7syYk4MzjxTEk0g/w3iqtgxnFQspDJfn6QKD36xMuuhTzjcxY7F8EmBLnALjVyaOF1/bVocu3bS/2/F7rXrveQ== dependencies: - "@typescript-eslint/types" "6.21.0" - eslint-visitor-keys "^3.4.1" - -"@typescript-eslint/visitor-keys@7.8.0": - version "7.8.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-7.8.0.tgz#7285aab991da8bee411a42edbd5db760d22fdd91" - integrity sha512-q4/gibTNBQNA0lGyYQCmWRS5D15n8rXh4QjK3KV+MBPlTYHpfBUT3D3PaPR/HeNiI9W6R7FvlkcGhNyAoP+caA== - dependencies: - "@typescript-eslint/types" "7.8.0" + "@typescript-eslint/types" "7.11.0" eslint-visitor-keys "^3.4.3" "@ungap/structured-clone@^1.2.0": @@ -2960,34 +2904,34 @@ esbuild-plugin-alias@0.2.1: resolved "https://registry.yarnpkg.com/esbuild-plugin-alias/-/esbuild-plugin-alias-0.2.1.tgz#45a86cb941e20e7c2bc68a2bea53562172494fcb" integrity sha512-jyfL/pwPqaFXyKnj8lP8iLk6Z0m099uXR45aSN8Av1XD4vhvQutxxPzgA2bTcAwQpa1zCXDcWOlhFgyP3GKqhQ== -esbuild@0.20.2: - version "0.20.2" - resolved "https://registry.yarnpkg.com/esbuild/-/esbuild-0.20.2.tgz#9d6b2386561766ee6b5a55196c6d766d28c87ea1" - integrity sha512-WdOOppmUNU+IbZ0PaDiTst80zjnrOkyJNHoKupIcVyU8Lvla3Ugx94VzkQ32Ijqd7UhHJy75gNWDMUekcrSJ6g== +esbuild@0.21.4: + version "0.21.4" + resolved "https://registry.yarnpkg.com/esbuild/-/esbuild-0.21.4.tgz#ceb501def8edb12a5bfd9c55f3a96db698edf022" + integrity sha512-sFMcNNrj+Q0ZDolrp5pDhH0nRPN9hLIM3fRPwgbLYJeSHHgnXSnbV3xYgSVuOeLWH9c73VwmEverVzupIv5xuA== optionalDependencies: - "@esbuild/aix-ppc64" "0.20.2" - "@esbuild/android-arm" "0.20.2" - "@esbuild/android-arm64" "0.20.2" - "@esbuild/android-x64" "0.20.2" - "@esbuild/darwin-arm64" "0.20.2" - "@esbuild/darwin-x64" "0.20.2" - "@esbuild/freebsd-arm64" "0.20.2" - "@esbuild/freebsd-x64" "0.20.2" - "@esbuild/linux-arm" "0.20.2" - "@esbuild/linux-arm64" "0.20.2" - "@esbuild/linux-ia32" "0.20.2" - "@esbuild/linux-loong64" "0.20.2" - "@esbuild/linux-mips64el" "0.20.2" - "@esbuild/linux-ppc64" "0.20.2" - "@esbuild/linux-riscv64" "0.20.2" - "@esbuild/linux-s390x" "0.20.2" - "@esbuild/linux-x64" "0.20.2" - "@esbuild/netbsd-x64" "0.20.2" - "@esbuild/openbsd-x64" "0.20.2" - "@esbuild/sunos-x64" "0.20.2" - "@esbuild/win32-arm64" "0.20.2" - "@esbuild/win32-ia32" "0.20.2" - "@esbuild/win32-x64" "0.20.2" + "@esbuild/aix-ppc64" "0.21.4" + "@esbuild/android-arm" "0.21.4" + "@esbuild/android-arm64" "0.21.4" + "@esbuild/android-x64" "0.21.4" + "@esbuild/darwin-arm64" "0.21.4" + "@esbuild/darwin-x64" "0.21.4" + "@esbuild/freebsd-arm64" "0.21.4" + "@esbuild/freebsd-x64" "0.21.4" + "@esbuild/linux-arm" "0.21.4" + "@esbuild/linux-arm64" "0.21.4" + "@esbuild/linux-ia32" "0.21.4" + "@esbuild/linux-loong64" "0.21.4" + "@esbuild/linux-mips64el" "0.21.4" + "@esbuild/linux-ppc64" "0.21.4" + "@esbuild/linux-riscv64" "0.21.4" + "@esbuild/linux-s390x" "0.21.4" + "@esbuild/linux-x64" "0.21.4" + "@esbuild/netbsd-x64" "0.21.4" + "@esbuild/openbsd-x64" "0.21.4" + "@esbuild/sunos-x64" "0.21.4" + "@esbuild/win32-arm64" "0.21.4" + "@esbuild/win32-ia32" "0.21.4" + "@esbuild/win32-x64" "0.21.4" esbuild@^0.19.6: version "0.19.12" @@ -3043,12 +2987,12 @@ escape-string-regexp@^4.0.0: resolved "https://registry.yarnpkg.com/escape-string-regexp/-/escape-string-regexp-4.0.0.tgz#14ba83a5d373e3d311e5afca29cf5bfad965bf34" integrity sha512-TtpcNJ3XAzx3Gq8sWRzJaVajRs0uVxA2YAkdb1jm2YkPz4G6egUFAyA3n5vtEIZefPk5Wa4UXbKuS5fKkJWdgA== -eslint-plugin-jest@28.4.0: - version "28.4.0" - resolved "https://registry.yarnpkg.com/eslint-plugin-jest/-/eslint-plugin-jest-28.4.0.tgz#213be88f799a35ca9d63ce1a30081bb32b8da765" - integrity sha512-ORVHiFPC8RQxHLyQJ37MxNilK9k+cPzjHz65T8gAbpYZunGutXvKqwfM3WXBCvFDF1QBeYJJu9LB/i5cuXBs+g== +eslint-plugin-jest@28.5.0: + version "28.5.0" + resolved "https://registry.yarnpkg.com/eslint-plugin-jest/-/eslint-plugin-jest-28.5.0.tgz#b497b795de37f671eaccd38bd83030186ff5dc8d" + integrity sha512-6np6DGdmNq/eBbA7HOUNV8fkfL86PYwBfwyb8n23FXgJNTR8+ot3smRHjza9LGsBBZRypK3qyF79vMjohIL8eQ== dependencies: - "@typescript-eslint/utils" "^6.0.0" + "@typescript-eslint/utils" "^6.0.0 || ^7.0.0" eslint-plugin-unicorn@52.0.0: version "52.0.0" @@ -3663,16 +3607,16 @@ glob-watcher@^5.0.3: normalize-path "^3.0.0" object.defaults "^1.1.0" -glob@10.3.12: - version "10.3.12" - resolved "https://registry.yarnpkg.com/glob/-/glob-10.3.12.tgz#3a65c363c2e9998d220338e88a5f6ac97302960b" - integrity sha512-TCNv8vJ+xz4QiqTpfOJA7HvYv+tNIRHKfUWw/q+v2jdgN4ebz+KY9tGx5J4rHP0o84mNP+ApH66HRX8us3Khqg== +glob@10.4.1: + version "10.4.1" + resolved "https://registry.yarnpkg.com/glob/-/glob-10.4.1.tgz#0cfb01ab6a6b438177bfe6a58e2576f6efe909c2" + integrity sha512-2jelhlq3E4ho74ZyVLN03oKdAZVUa6UDZzFLVH1H7dnoax+y9qyaq8zBkfDIggjniU19z0wU18y16jMB2eyVIw== dependencies: foreground-child "^3.1.0" - jackspeak "^2.3.6" - minimatch "^9.0.1" - minipass "^7.0.4" - path-scurry "^1.10.2" + jackspeak "^3.1.2" + minimatch "^9.0.4" + minipass "^7.1.2" + path-scurry "^1.11.1" glob@^7.1.1, glob@^7.1.3, glob@^7.1.4: version "7.2.3" @@ -4405,18 +4349,18 @@ istextorbinary@^3.0.0: binaryextensions "^2.2.0" textextensions "^3.2.0" -ix@5.0.0: - version "5.0.0" - resolved "https://registry.yarnpkg.com/ix/-/ix-5.0.0.tgz#b9e292f79b1876bbf696809fe86e42930bdbfcd4" - integrity sha512-6LyyrHnvNrSy5pKtW/KA+KKusHrB223aBJCJlIGPN7QBfDkEEtNrAkAz9lLLShIcdJntq6BiPCHuKaCM/9wwXw== +ix@6.0.0: + version "6.0.0" + resolved "https://registry.yarnpkg.com/ix/-/ix-6.0.0.tgz#c1875523f8090c7146dc3ac3412a763663887f27" + integrity sha512-B/KeYkHtOWbr3ttckqWT9uha2ixw9fGVDxX+DwVXhO+P5eOhyCadt+aC30hRBvG+do+tbI3xbYDMYN6dp1C4Vw== dependencies: - "@types/node" "^13.7.4" - tslib "^2.3.0" + "@types/node" ">=13.7.4" + tslib "^2.6.2" -jackspeak@^2.3.6: - version "2.3.6" - resolved "https://registry.yarnpkg.com/jackspeak/-/jackspeak-2.3.6.tgz#647ecc472238aee4b06ac0e461acc21a8c505ca8" - integrity sha512-N3yCS/NegsOBokc8GAdM8UcmfsKiSS8cipheD/nivzr700H+nsMOxJjQnvwOcRYVuFkdH0wGUvW2WbXGmrZGbQ== +jackspeak@^3.1.2: + version "3.1.2" + resolved "https://registry.yarnpkg.com/jackspeak/-/jackspeak-3.1.2.tgz#eada67ea949c6b71de50f1b09c92a961897b90ab" + integrity sha512-kWmLKn2tRtfYMF/BakihVVRzBKOxz4gJMiL2Rj91WnAB5TPZumSH99R/Yf1qE1u4uRimvCSJfm6hnxohXeEXjQ== dependencies: "@isaacs/cliui" "^8.0.2" optionalDependencies: @@ -4696,10 +4640,10 @@ jest-runtime@^29.7.0: slash "^3.0.0" strip-bom "^4.0.0" -jest-silent-reporter@0.5.0: - version "0.5.0" - resolved "https://registry.yarnpkg.com/jest-silent-reporter/-/jest-silent-reporter-0.5.0.tgz#5fd8ccd61665227e3bf19d908b7350719d06ff38" - integrity sha512-epdLt8Oj0a1AyRiR6F8zx/1SVT1Mi7VU3y4wB2uOBHs/ohIquC7v2eeja7UN54uRPyHInIKWdL+RdG228n5pJQ== +jest-silent-reporter@0.6.0: + version "0.6.0" + resolved "https://registry.yarnpkg.com/jest-silent-reporter/-/jest-silent-reporter-0.6.0.tgz#e9c63a3b1e3c80571d690d998faf842f576b6a60" + integrity sha512-4nmS+5o7ycVlvbQOTx7CnGdbBtP2646hnDgQpQLaVhjHcQNHD+gqBAetyjRDlgpZ8+8N82MWI59K+EX2LsVk7g== dependencies: chalk "^4.0.0" jest-util "^26.0.0" @@ -5292,13 +5236,6 @@ min-indent@^1.0.0, min-indent@^1.0.1: resolved "https://registry.yarnpkg.com/min-indent/-/min-indent-1.0.1.tgz#a63f681673b30571fbe8bc25686ae746eefa9869" integrity sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg== -minimatch@9.0.3: - version "9.0.3" - resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-9.0.3.tgz#a6e00c3de44c3a542bfaae70abfc22420a6da825" - integrity sha512-RHiac9mvaRw0x3AYRgDC1CxAP7HTcNrrECeA8YYJeWnpo+2Q5CegtZjaotWTWxDG3UeGA1coE05iH1mPjT/2mg== - dependencies: - brace-expansion "^2.0.1" - minimatch@^3.0.4, minimatch@^3.0.5, minimatch@^3.1.1, minimatch@^3.1.2: version "3.1.2" resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-3.1.2.tgz#19cd194bfd3e428f049a70817c038d89ab4be35b" @@ -5306,7 +5243,7 @@ minimatch@^3.0.4, minimatch@^3.0.5, minimatch@^3.1.1, minimatch@^3.1.2: dependencies: brace-expansion "^1.1.7" -minimatch@^9.0.1, minimatch@^9.0.3, minimatch@^9.0.4: +minimatch@^9.0.3, minimatch@^9.0.4: version "9.0.4" resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-9.0.4.tgz#8e49c731d1749cbec05050ee5145147b32496a51" integrity sha512-KqWh+VchfxcMNRAJjj2tnsSJdNbHsVgnkBhTNrW7AjVo6OvLtxw8zfT9oLw1JSohlFzJ8jCoTgaoXvJ+kHt6fw== @@ -5327,11 +5264,16 @@ minimist@1.x: resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.8.tgz#c1a464e7693302e082a075cee0c057741ac4772c" integrity sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA== -"minipass@^5.0.0 || ^6.0.2 || ^7.0.0", minipass@^7.0.4: +"minipass@^5.0.0 || ^6.0.2 || ^7.0.0": version "7.0.4" resolved "https://registry.yarnpkg.com/minipass/-/minipass-7.0.4.tgz#dbce03740f50a4786ba994c1fb908844d27b038c" integrity sha512-jYofLM5Dam9279rdkWzqHozUo4ybjdZmCsDHePy5V/PbBcVMiSZR97gmAy45aqi8CK1lG2ECd356FU86avfwUQ== +minipass@^7.1.2: + version "7.1.2" + resolved "https://registry.yarnpkg.com/minipass/-/minipass-7.1.2.tgz#93a9626ce5e5e66bd4db86849e7515e92340a707" + integrity sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw== + mixin-deep@^1.2.0: version "1.3.2" resolved "https://registry.yarnpkg.com/mixin-deep/-/mixin-deep-1.3.2.tgz#1120b43dc359a785dce65b55b82e257ccf479566" @@ -5722,10 +5664,10 @@ path-root@^0.1.1: dependencies: path-root-regex "^0.1.0" -path-scurry@^1.10.2: - version "1.10.2" - resolved "https://registry.yarnpkg.com/path-scurry/-/path-scurry-1.10.2.tgz#8f6357eb1239d5fa1da8b9f70e9c080675458ba7" - integrity sha512-7xTavNy5RQXnsjANvVvMkEjvloOinkAjv/Z6Ildz9v2RinZ4SBKTWFOVRbaF8p0vpHnyjV/UwNDdKuUv6M5qcA== +path-scurry@^1.11.1: + version "1.11.1" + resolved "https://registry.yarnpkg.com/path-scurry/-/path-scurry-1.11.1.tgz#7960a668888594a0720b12a911d1a742ab9f11d2" + integrity sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA== dependencies: lru-cache "^10.2.0" minipass "^5.0.0 || ^6.0.2 || ^7.0.0" @@ -6191,29 +6133,29 @@ rimraf@^3.0.2: dependencies: glob "^7.1.3" -rollup@4.17.2: - version "4.17.2" - resolved "https://registry.yarnpkg.com/rollup/-/rollup-4.17.2.tgz#26d1785d0144122277fdb20ab3a24729ae68301f" - integrity sha512-/9ClTJPByC0U4zNLowV1tMBe8yMEAxewtR3cUNX5BoEpGH3dQEWpJLr6CLp0fPdYRF/fzVOgvDb1zXuakwF5kQ== +rollup@4.18.0: + version "4.18.0" + resolved "https://registry.yarnpkg.com/rollup/-/rollup-4.18.0.tgz#497f60f0c5308e4602cf41136339fbf87d5f5dda" + integrity sha512-QmJz14PX3rzbJCN1SG4Xe/bAAX2a6NpCP8ab2vfu2GiUr8AQcr2nCV/oEO3yneFarB67zk8ShlIyWb2LGTb3Sg== dependencies: "@types/estree" "1.0.5" optionalDependencies: - "@rollup/rollup-android-arm-eabi" "4.17.2" - "@rollup/rollup-android-arm64" "4.17.2" - "@rollup/rollup-darwin-arm64" "4.17.2" - "@rollup/rollup-darwin-x64" "4.17.2" - "@rollup/rollup-linux-arm-gnueabihf" "4.17.2" - "@rollup/rollup-linux-arm-musleabihf" "4.17.2" - "@rollup/rollup-linux-arm64-gnu" "4.17.2" - "@rollup/rollup-linux-arm64-musl" "4.17.2" - "@rollup/rollup-linux-powerpc64le-gnu" "4.17.2" - "@rollup/rollup-linux-riscv64-gnu" "4.17.2" - "@rollup/rollup-linux-s390x-gnu" "4.17.2" - "@rollup/rollup-linux-x64-gnu" "4.17.2" - "@rollup/rollup-linux-x64-musl" "4.17.2" - "@rollup/rollup-win32-arm64-msvc" "4.17.2" - "@rollup/rollup-win32-ia32-msvc" "4.17.2" - "@rollup/rollup-win32-x64-msvc" "4.17.2" + "@rollup/rollup-android-arm-eabi" "4.18.0" + "@rollup/rollup-android-arm64" "4.18.0" + "@rollup/rollup-darwin-arm64" "4.18.0" + "@rollup/rollup-darwin-x64" "4.18.0" + "@rollup/rollup-linux-arm-gnueabihf" "4.18.0" + "@rollup/rollup-linux-arm-musleabihf" "4.18.0" + "@rollup/rollup-linux-arm64-gnu" "4.18.0" + "@rollup/rollup-linux-arm64-musl" "4.18.0" + "@rollup/rollup-linux-powerpc64le-gnu" "4.18.0" + "@rollup/rollup-linux-riscv64-gnu" "4.18.0" + "@rollup/rollup-linux-s390x-gnu" "4.18.0" + "@rollup/rollup-linux-x64-gnu" "4.18.0" + "@rollup/rollup-linux-x64-musl" "4.18.0" + "@rollup/rollup-win32-arm64-msvc" "4.18.0" + "@rollup/rollup-win32-ia32-msvc" "4.18.0" + "@rollup/rollup-win32-x64-msvc" "4.18.0" fsevents "~2.3.2" run-parallel@^1.1.9: @@ -6916,15 +6858,15 @@ trim-newlines@^4.0.2: resolved "https://registry.yarnpkg.com/trim-newlines/-/trim-newlines-4.1.1.tgz#28c88deb50ed10c7ba6dc2474421904a00139125" integrity sha512-jRKj0n0jXWo6kh62nA5TEh3+4igKDXLvzBJcPpiizP7oOolUrYIxmVBG9TOtHYFHoddUk6YvAkGeGoSVTXfQXQ== -ts-api-utils@^1.0.1, ts-api-utils@^1.3.0: +ts-api-utils@^1.3.0: version "1.3.0" resolved "https://registry.yarnpkg.com/ts-api-utils/-/ts-api-utils-1.3.0.tgz#4b490e27129f1e8e686b45cc4ab63714dc60eea1" integrity sha512-UQMIo7pb8WRomKR1/+MFVLTroIvDVtMX3K6OUir8ynLyzB8Jeriont2bTAtmNPa1ekAgN7YPDyf6V+ygrdU+eQ== -ts-jest@29.1.2: - version "29.1.2" - resolved "https://registry.yarnpkg.com/ts-jest/-/ts-jest-29.1.2.tgz#7613d8c81c43c8cb312c6904027257e814c40e09" - integrity sha512-br6GJoH/WUX4pu7FbZXuWGKGNDuU7b8Uj77g/Sp7puZV6EXzuByl6JrECvm0MzVzSTkSHWTihsXt+5XYER5b+g== +ts-jest@29.1.4: + version "29.1.4" + resolved "https://registry.yarnpkg.com/ts-jest/-/ts-jest-29.1.4.tgz#26f8a55ce31e4d2ef7a1fd47dc7fa127e92793ef" + integrity sha512-YiHwDhSvCiItoAgsKtoLFCuakDzDsJ1DLDnSouTaTmdOcOwIkSzbLXduaQ6M5DRVhuZC/NYaaZ/mtHbWMv/S6Q== dependencies: bs-logger "0.x" fast-json-stable-stringify "2.x" @@ -6954,7 +6896,7 @@ ts-node@10.9.2: v8-compile-cache-lib "^3.0.1" yn "3.1.1" -tslib@^2.0.0, tslib@^2.1.0, tslib@^2.3.0, tslib@^2.4.0, tslib@^2.6.2: +tslib@^2.0.0, tslib@^2.1.0, tslib@^2.4.0, tslib@^2.6.2: version "2.6.2" resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.6.2.tgz#703ac29425e7b37cd6fd456e92404d46d1f3e4ae" integrity sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q== From 4ec1c986a033bcb286b9a0d7eb76df8e56cdc1c3 Mon Sep 17 00:00:00 2001 From: abandy Date: Mon, 3 Jun 2024 22:21:46 -0400 Subject: [PATCH 218/261] GH-41945: [Swift] Add interface ArrowArrayHolderBuilder (#41946) ### Rationale for this change This change adds the implementation of the ArrowArrayHolderBuilder interface which allows appending to Arrays and completing them without needing the generic info. This is needed for Nested types as well as for the Swift arrow Codable implementation. ### What changes are included in this PR? Adding the interface and the implementation of the interface. ### Are these changes tested? Yes, test has been added. * GitHub Issue: #41945 Authored-by: Alva Bandy Signed-off-by: Sutou Kouhei --- .../Sources/Arrow/ArrowArrayBuilder.swift | 15 +++++++++- swift/Arrow/Tests/ArrowTests/ArrayTests.swift | 28 ++++++++++++++++++- 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift b/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift index b78f0ccd74997..4865b8a791256 100644 --- a/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift +++ b/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift @@ -17,7 +17,12 @@ import Foundation -public class ArrowArrayBuilder> { +public protocol ArrowArrayHolderBuilder { + func toHolder() throws -> ArrowArrayHolder + func appendAny(_ val: Any?) +} + +public class ArrowArrayBuilder>: ArrowArrayHolderBuilder { let type: ArrowType let bufferBuilder: T public var length: UInt {return self.bufferBuilder.length} @@ -34,6 +39,10 @@ public class ArrowArrayBuilder> self.bufferBuilder.append(val) } + public func appendAny(_ val: Any?) { + self.bufferBuilder.append(val as? T.ItemType) + } + public func finish() throws -> ArrowArray { let buffers = self.bufferBuilder.finish() let arrowData = try ArrowData(self.type, buffers: buffers, nullCount: self.nullCount) @@ -43,6 +52,10 @@ public class ArrowArrayBuilder> public func getStride() -> Int { return self.type.getStride() } + + public func toHolder() throws -> ArrowArrayHolder { + return try ArrowArrayHolderImpl(self.finish()) + } } public class NumberArrayBuilder: ArrowArrayBuilder, FixedArray> { diff --git a/swift/Arrow/Tests/ArrowTests/ArrayTests.swift b/swift/Arrow/Tests/ArrowTests/ArrayTests.swift index f5bfa0506e62f..10ffc4f96d83e 100644 --- a/swift/Arrow/Tests/ArrowTests/ArrayTests.swift +++ b/swift/Arrow/Tests/ArrowTests/ArrayTests.swift @@ -245,4 +245,30 @@ final class ArrayTests: XCTestCase { try checkHolderForType(ArrowType(ArrowType.ArrowBool)) try checkHolderForType(ArrowType(ArrowType.ArrowString)) } -} + + func testArrowArrayHolderBuilder() throws { + let uint8HBuilder: ArrowArrayHolderBuilder = + (try ArrowArrayBuilders.loadNumberArrayBuilder() as NumberArrayBuilder) + for index in 0..<100 { + uint8HBuilder.appendAny(UInt8(index)) + } + + let uint8Holder = try uint8HBuilder.toHolder() + XCTAssertEqual(uint8Holder.nullCount, 0) + XCTAssertEqual(uint8Holder.length, 100) + + let stringHBuilder: ArrowArrayHolderBuilder = + (try ArrowArrayBuilders.loadStringArrayBuilder()) + for index in 0..<100 { + if index % 10 == 9 { + stringHBuilder.appendAny(nil) + } else { + stringHBuilder.appendAny("test" + String(index)) + } + } + + let stringHolder = try stringHBuilder.toHolder() + XCTAssertEqual(stringHolder.nullCount, 10) + XCTAssertEqual(stringHolder.length, 100) + } + } From 7d60148d24a454b135988a3a394938d207ad90e7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 4 Jun 2024 14:08:15 +0900 Subject: [PATCH 219/261] MINOR: [JS] Bump @types/node from 20.13.0 to 20.14.1 in /js (#41948) Bumps [@ types/node](https://github.com/DefinitelyTyped/DefinitelyTyped/tree/HEAD/types/node) from 20.13.0 to 20.14.1.
Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=@ types/node&package-manager=npm_and_yarn&previous-version=20.13.0&new-version=20.14.1)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- js/yarn.lock | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/js/yarn.lock b/js/yarn.lock index d5527097340d9..c19f69a901bfa 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -1370,17 +1370,10 @@ dependencies: "@types/node" "*" -"@types/node@*": - version "20.12.8" - resolved "https://registry.yarnpkg.com/@types/node/-/node-20.12.8.tgz#35897bf2bfe3469847ab04634636de09552e8256" - integrity sha512-NU0rJLJnshZWdE/097cdCBbyW1h4hEg0xpovcoAQYHl8dnEyp/NAOiE45pvc+Bd1Dt+2r94v2eGFpQJ4R7g+2w== - dependencies: - undici-types "~5.26.4" - -"@types/node@>=13.7.4", "@types/node@^20.13.0": - version "20.13.0" - resolved "https://registry.yarnpkg.com/@types/node/-/node-20.13.0.tgz#011a76bc1e71ae9a026dddcfd7039084f752c4b6" - integrity sha512-FM6AOb3khNkNIXPnHFDYaHerSv8uN22C91z098AnGccVu+Pcdhi+pNUFDi0iLmPIsVE0JBD0KVS7mzUYt4nRzQ== +"@types/node@*", "@types/node@>=13.7.4", "@types/node@^20.13.0": + version "20.14.1" + resolved "https://registry.yarnpkg.com/@types/node/-/node-20.14.1.tgz#2434dbcb1f039e31f2c0e9969da93f52cf6348f3" + integrity sha512-T2MzSGEu+ysB/FkWfqmhV3PLyQlowdptmmgD20C6QxsS8Fmv5SjpZ1ayXaEC0S21/h5UJ9iA6W/5vSNU5l00OA== dependencies: undici-types "~5.26.4" From 8d2db80586a2cd65e03efc9ccd66e08e5b484692 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 4 Jun 2024 14:08:35 +0900 Subject: [PATCH 220/261] MINOR: [JS] Bump @typescript-eslint/parser from 7.11.0 to 7.12.0 in /js (#41949) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [@ typescript-eslint/parser](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/parser) from 7.11.0 to 7.12.0.
Release notes

Sourced from @​typescript-eslint/parser's releases.

v7.12.0

7.12.0 (2024-06-03)

🚀 Features

  • eslint-plugin: [no-useless-template-literals] rename to no-useless-template-expression (deprecate no-useless-template-literals) (#8821)
  • eslint-plugin: [no-floating-promises] add option 'allowForKnownSafePromises' (#9186)
  • rule-tester: check for parsing errors in suggestion fixes (#9052)
  • rule-tester: port checkDuplicateTestCases from ESLint (#9026)

🩹 Fixes

  • no-useless-template-expression -> no-unnecessary-template-expression (#9174)
  • eslint-plugin: [no-unnecessary-type-assertion] combine template literal check with const variable check (#8820)
  • eslint-plugin: [dot-notation] fix false positive when accessing private/protected property with optional chaining (#8851)
  • eslint-plugin: [explicit-member-accessibility] refine report locations (#8869)
  • eslint-plugin: [no-unnecessary-type-assertion] declares are always defined, so always check declares (#8901)
  • eslint-plugin: [prefer-literal-enum-member] allow using member it self on allowBitwiseExpressions (#9114)
  • eslint-plugin: [return-await] clean up in-try-catch detection and make autofixes safe (#9031)
  • eslint-plugin: [member-ordering] also TSMethodSignature can be get/set (#9193)
  • types: correct typing ParserOptions (#9202)

❤️ Thank You

You can read about our versioning strategy and releases on our website.

Changelog

Sourced from @​typescript-eslint/parser's changelog.

7.12.0 (2024-06-03)

🩹 Fixes

  • types: correct typing ParserOptions

❤️ Thank You

  • Abraham Guo
  • Han Yeong-woo
  • Joshua Chen
  • Kim Sang Du
  • Kirk Waiblinger
  • YeonJuan

You can read about our versioning strategy and releases on our website.

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=@ typescript-eslint/parser&package-manager=npm_and_yarn&previous-version=7.11.0&new-version=7.12.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- js/package.json | 2 +- js/yarn.lock | 53 ++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 45 insertions(+), 10 deletions(-) diff --git a/js/package.json b/js/package.json index 9e61d94dc3b7b..b55b637e2750d 100644 --- a/js/package.json +++ b/js/package.json @@ -73,7 +73,7 @@ "@types/jest": "29.5.12", "@types/multistream": "4.1.3", "@typescript-eslint/eslint-plugin": "7.11.0", - "@typescript-eslint/parser": "7.11.0", + "@typescript-eslint/parser": "7.12.0", "async-done": "2.0.0", "benny": "3.7.1", "cross-env": "7.0.3", diff --git a/js/yarn.lock b/js/yarn.lock index c19f69a901bfa..ec311730b8918 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -1434,15 +1434,15 @@ natural-compare "^1.4.0" ts-api-utils "^1.3.0" -"@typescript-eslint/parser@7.11.0": - version "7.11.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-7.11.0.tgz#525ad8bee54a8f015f134edd241d91b84ab64839" - integrity sha512-yimw99teuaXVWsBcPO1Ais02kwJ1jmNA1KxE7ng0aT7ndr1pT1wqj0OJnsYVGKKlc4QJai86l/025L6z8CljOg== - dependencies: - "@typescript-eslint/scope-manager" "7.11.0" - "@typescript-eslint/types" "7.11.0" - "@typescript-eslint/typescript-estree" "7.11.0" - "@typescript-eslint/visitor-keys" "7.11.0" +"@typescript-eslint/parser@7.12.0": + version "7.12.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/parser/-/parser-7.12.0.tgz#8761df3345528b35049353db80010b385719b1c3" + integrity sha512-dm/J2UDY3oV3TKius2OUZIFHsomQmpHtsV0FTh1WO8EKgHLQ1QCADUqscPgTpU+ih1e21FQSRjXckHn3txn6kQ== + dependencies: + "@typescript-eslint/scope-manager" "7.12.0" + "@typescript-eslint/types" "7.12.0" + "@typescript-eslint/typescript-estree" "7.12.0" + "@typescript-eslint/visitor-keys" "7.12.0" debug "^4.3.4" "@typescript-eslint/scope-manager@7.11.0": @@ -1453,6 +1453,14 @@ "@typescript-eslint/types" "7.11.0" "@typescript-eslint/visitor-keys" "7.11.0" +"@typescript-eslint/scope-manager@7.12.0": + version "7.12.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-7.12.0.tgz#259c014362de72dd34f995efe6bd8dda486adf58" + integrity sha512-itF1pTnN6F3unPak+kutH9raIkL3lhH1YRPGgt7QQOh43DQKVJXmWkpb+vpc/TiDHs6RSd9CTbDsc/Y+Ygq7kg== + dependencies: + "@typescript-eslint/types" "7.12.0" + "@typescript-eslint/visitor-keys" "7.12.0" + "@typescript-eslint/type-utils@7.11.0": version "7.11.0" resolved "https://registry.yarnpkg.com/@typescript-eslint/type-utils/-/type-utils-7.11.0.tgz#ac216697d649084fedf4a910347b9642bd0ff099" @@ -1468,6 +1476,11 @@ resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.11.0.tgz#5e9702a5e8b424b7fc690e338d359939257d6722" integrity sha512-MPEsDRZTyCiXkD4vd3zywDCifi7tatc4K37KqTprCvaXptP7Xlpdw0NR2hRJTetG5TxbWDB79Ys4kLmHliEo/w== +"@typescript-eslint/types@7.12.0": + version "7.12.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.12.0.tgz#bf208f971a8da1e7524a5d9ae2b5f15192a37981" + integrity sha512-o+0Te6eWp2ppKY3mLCU+YA9pVJxhUJE15FV7kxuD9jgwIAa+w/ycGJBMrYDTpVGUM/tgpa9SeMOugSabWFq7bg== + "@typescript-eslint/typescript-estree@7.11.0": version "7.11.0" resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-7.11.0.tgz#7cbc569bc7336c3a494ceaf8204fdee5d5dbb7fa" @@ -1482,6 +1495,20 @@ semver "^7.6.0" ts-api-utils "^1.3.0" +"@typescript-eslint/typescript-estree@7.12.0": + version "7.12.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-7.12.0.tgz#e6c1074f248b3db6573ab6a7c47a39c4cd498ff9" + integrity sha512-5bwqLsWBULv1h6pn7cMW5dXX/Y2amRqLaKqsASVwbBHMZSnHqE/HN4vT4fE0aFsiwxYvr98kqOWh1a8ZKXalCQ== + dependencies: + "@typescript-eslint/types" "7.12.0" + "@typescript-eslint/visitor-keys" "7.12.0" + debug "^4.3.4" + globby "^11.1.0" + is-glob "^4.0.3" + minimatch "^9.0.4" + semver "^7.6.0" + ts-api-utils "^1.3.0" + "@typescript-eslint/utils@7.11.0", "@typescript-eslint/utils@^6.0.0 || ^7.0.0": version "7.11.0" resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-7.11.0.tgz#524f047f2209959424c3ef689b0d83b3bc09919c" @@ -1500,6 +1527,14 @@ "@typescript-eslint/types" "7.11.0" eslint-visitor-keys "^3.4.3" +"@typescript-eslint/visitor-keys@7.12.0": + version "7.12.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-7.12.0.tgz#c053b55a996679528beeedd8e565710ce1ae1ad3" + integrity sha512-uZk7DevrQLL3vSnfFl5bj4sL75qC9D6EdjemIdbtkuUmIheWpuiiylSY01JxJE7+zGrOWDZrp1WxOuDntvKrHQ== + dependencies: + "@typescript-eslint/types" "7.12.0" + eslint-visitor-keys "^3.4.3" + "@ungap/structured-clone@^1.2.0": version "1.2.0" resolved "https://registry.yarnpkg.com/@ungap/structured-clone/-/structured-clone-1.2.0.tgz#756641adb587851b5ccb3e095daf27ae581c8406" From 7bc2452b350867b3ddc9de9ceceeef0e4d722941 Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Tue, 4 Jun 2024 12:35:13 +0530 Subject: [PATCH 221/261] GH-41902: [Java] Variadic Buffer Counts Incorrect (#41930) ### Rationale for this change In the initial PR for `variadicBufferCounts` addition to Java spec, the non variadic buffer-ed vectors were assigned with 0 valued non-empty `variadicBufferCounts`. And this caused CIs to fail in Arrow Rust. ### What changes are included in this PR? This PR changes such that non variadic buffer-ed vectors would contain an empty `variadicBufferCounts` attribute in `ArrowRecordBatch` interface in Java. Also this includes upgrade to JUNIT5. ### Are these changes tested? Yes, from existing tests and a new test added. ### Are there any user-facing changes? No * GitHub Issue: #41902 Authored-by: Vibhatha Abeykoon Signed-off-by: David Li --- .../apache/arrow/c/StructVectorLoader.java | 29 +++++-- .../apache/arrow/c/StructVectorUnloader.java | 5 +- .../org/apache/arrow/c/DictionaryTest.java | 60 ++++++++++++++ .../org/apache/arrow/vector/VectorLoader.java | 20 +++-- .../apache/arrow/vector/VectorUnloader.java | 5 +- .../apache/arrow/vector/TestValueVector.java | 56 +++++++++++++ .../arrow/vector/TestVarCharViewVector.java | 80 +++++++++++++++++++ 7 files changed, 238 insertions(+), 17 deletions(-) diff --git a/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java b/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java index 27acf84d30157..1b0c59163a187 100644 --- a/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java +++ b/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java @@ -27,6 +27,7 @@ import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.util.Collections2; +import org.apache.arrow.vector.BaseVariableWidthViewVector; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.TypeLayout; import org.apache.arrow.vector.complex.StructVector; @@ -54,7 +55,14 @@ public class StructVectorLoader { /** * Construct with a schema. - * + *

+ * The schema referred to here can be obtained from the struct vector. + * The schema here should be the children of a struct vector, not a schema + * containing the struct field itself. + * For example: + * + * Schema schema = new Schema(structVector.getField().getChildren()); + * * @param schema buffers are added based on schema. */ public StructVectorLoader(Schema schema) { @@ -90,7 +98,7 @@ public StructVector load(BufferAllocator allocator, ArrowRecordBatch recordBatch .fromCompressionType(recordBatch.getBodyCompression().getCodec()); decompressionNeeded = codecType != CompressionUtil.CodecType.NO_COMPRESSION; CompressionCodec codec = decompressionNeeded ? factory.createCodec(codecType) : NoCompressionCodec.INSTANCE; - Iterator variadicBufferCounts = null; + Iterator variadicBufferCounts = Collections.emptyIterator(); if (recordBatch.getVariadicBufferCounts() != null && !recordBatch.getVariadicBufferCounts().isEmpty()) { variadicBufferCounts = recordBatch.getVariadicBufferCounts().iterator(); } @@ -98,9 +106,10 @@ public StructVector load(BufferAllocator allocator, ArrowRecordBatch recordBatch loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes, codec, variadicBufferCounts); } result.loadFieldBuffers(new ArrowFieldNode(recordBatch.getLength(), 0), Collections.singletonList(null)); - if (nodes.hasNext() || buffers.hasNext()) { - throw new IllegalArgumentException("not all nodes and buffers were consumed. nodes: " + - Collections2.toList(nodes).toString() + " buffers: " + Collections2.toList(buffers).toString()); + if (nodes.hasNext() || buffers.hasNext() || variadicBufferCounts.hasNext()) { + throw new IllegalArgumentException("not all nodes, buffers and variadicBufferCounts were consumed. nodes: " + + Collections2.toString(nodes) + " buffers: " + Collections2.toString(buffers) + " variadicBufferCounts: " + + Collections2.toString(variadicBufferCounts)); } return result; } @@ -109,10 +118,14 @@ private void loadBuffers(FieldVector vector, Field field, Iterator buf CompressionCodec codec, Iterator variadicBufferCounts) { checkArgument(nodes.hasNext(), "no more field nodes for field %s and vector %s", field, vector); ArrowFieldNode fieldNode = nodes.next(); - // variadicBufferLayoutCount will be 0 for vectors of type except BaseVariableWidthViewVector + // variadicBufferLayoutCount will be 0 for vectors of a type except BaseVariableWidthViewVector long variadicBufferLayoutCount = 0; - if (variadicBufferCounts != null) { - variadicBufferLayoutCount = variadicBufferCounts.next(); + if (vector instanceof BaseVariableWidthViewVector) { + if (variadicBufferCounts.hasNext()) { + variadicBufferLayoutCount = variadicBufferCounts.next(); + } else { + throw new IllegalStateException("No variadicBufferCounts available for BaseVariableWidthViewVector"); + } } int bufferLayoutCount = (int) (variadicBufferLayoutCount + TypeLayout.getTypeBufferCount(field.getType())); List ownBuffers = new ArrayList<>(bufferLayoutCount); diff --git a/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java b/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java index 8d015157ebf38..82539acf6f292 100644 --- a/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java +++ b/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java @@ -109,7 +109,10 @@ private void appendNodes(FieldVector vector, List nodes, List fieldBuffers = vector.getFieldBuffers(); long variadicBufferCount = getVariadicBufferCount(vector); int expectedBufferCount = (int) (TypeLayout.getTypeBufferCount(vector.getField().getType()) + variadicBufferCount); - variadicBufferCounts.add(variadicBufferCount); + // only update variadicBufferCounts for vectors that have variadic buffers + if (variadicBufferCount > 0) { + variadicBufferCounts.add(variadicBufferCount); + } if (fieldBuffers.size() != expectedBufferCount) { throw new IllegalArgumentException(String.format("wrong number of buffers for field %s in vector %s. found: %s", vector.getField(), vector.getClass().getSimpleName(), fieldBuffers)); diff --git a/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java b/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java index d892781756ede..aa1264e4842eb 100644 --- a/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java +++ b/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java @@ -17,6 +17,8 @@ package org.apache.arrow.c; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.ByteArrayInputStream; @@ -29,6 +31,7 @@ import org.apache.arrow.c.ArrowSchema; import org.apache.arrow.c.CDataDictionaryProvider; import org.apache.arrow.c.Data; +import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.util.AutoCloseables; import org.apache.arrow.vector.FieldVector; @@ -36,13 +39,19 @@ import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.ViewVarCharVector; import org.apache.arrow.vector.compare.VectorEqualsVisitor; +import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.dictionary.Dictionary; import org.apache.arrow.vector.dictionary.DictionaryEncoder; import org.apache.arrow.vector.dictionary.DictionaryProvider; import org.apache.arrow.vector.ipc.ArrowStreamReader; import org.apache.arrow.vector.ipc.ArrowStreamWriter; +import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; +import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.DictionaryEncoding; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; @@ -216,4 +225,55 @@ private ArrowStreamReader createMultiBatchReader() throws IOException { return new ArrowStreamReader(in, allocator); } + private void createStructVector(StructVector vector) { + final ViewVarCharVector child1 = vector.addOrGet("f0", + FieldType.nullable(MinorType.VIEWVARCHAR.getType()), ViewVarCharVector.class); + final IntVector child2 = vector.addOrGet("f1", + FieldType.nullable(MinorType.INT.getType()), IntVector.class); + + // Write the values to child 1 + child1.allocateNew(); + child1.set(0, "01234567890".getBytes()); + child1.set(1, "012345678901234567".getBytes()); + vector.setIndexDefined(0); + + // Write the values to child 2 + child2.allocateNew(); + child2.set(0, 10); + child2.set(1, 11); + vector.setIndexDefined(1); + + vector.setValueCount(2); + } + + @Test + public void testVectorLoadUnloadOnStructVector() { + try (final StructVector structVector1 = StructVector.empty("struct", allocator)) { + createStructVector(structVector1); + Field field1 = structVector1.getField(); + Schema schema = new Schema(field1.getChildren()); + StructVectorUnloader vectorUnloader = new StructVectorUnloader(structVector1); + + try ( + ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); + BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("struct", 0, Long.MAX_VALUE); + ) { + // validating recordBatch contains an output for variadicBufferCounts + assertFalse(recordBatch.getVariadicBufferCounts().isEmpty()); + assertEquals(1, recordBatch.getVariadicBufferCounts().size()); + assertEquals(1, recordBatch.getVariadicBufferCounts().get(0)); + + StructVectorLoader vectorLoader = new StructVectorLoader(schema); + try (StructVector structVector2 = vectorLoader.load(finalVectorsAllocator, recordBatch)) { + // Improve this after fixing https://github.com/apache/arrow/issues/41933 + // assertTrue(VectorEqualsVisitor.vectorEquals(structVector1, structVector2), "vectors are not equivalent"); + assertTrue(VectorEqualsVisitor.vectorEquals(structVector1.getChild("f0"), structVector2.getChild("f0")), + "vectors are not equivalent"); + assertTrue(VectorEqualsVisitor.vectorEquals(structVector1.getChild("f1"), structVector2.getChild("f1")), + "vectors are not equivalent"); + } + } + } + } + } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java index 9590e70f46770..dec536ae6cc1f 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java @@ -20,6 +20,7 @@ import static org.apache.arrow.util.Preconditions.checkArgument; import java.util.ArrayList; +import java.util.Collections; import java.util.Iterator; import java.util.List; @@ -80,7 +81,7 @@ public void load(ArrowRecordBatch recordBatch) { CompressionUtil.CodecType.fromCompressionType(recordBatch.getBodyCompression().getCodec()); decompressionNeeded = codecType != CompressionUtil.CodecType.NO_COMPRESSION; CompressionCodec codec = decompressionNeeded ? factory.createCodec(codecType) : NoCompressionCodec.INSTANCE; - Iterator variadicBufferCounts = null; + Iterator variadicBufferCounts = Collections.emptyIterator();; if (recordBatch.getVariadicBufferCounts() != null && !recordBatch.getVariadicBufferCounts().isEmpty()) { variadicBufferCounts = recordBatch.getVariadicBufferCounts().iterator(); } @@ -89,9 +90,10 @@ public void load(ArrowRecordBatch recordBatch) { loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes, codec, variadicBufferCounts); } root.setRowCount(recordBatch.getLength()); - if (nodes.hasNext() || buffers.hasNext()) { - throw new IllegalArgumentException("not all nodes and buffers were consumed. nodes: " + - Collections2.toString(nodes) + " buffers: " + Collections2.toString(buffers)); + if (nodes.hasNext() || buffers.hasNext() || variadicBufferCounts.hasNext()) { + throw new IllegalArgumentException("not all nodes, buffers and variadicBufferCounts were consumed. nodes: " + + Collections2.toString(nodes) + " buffers: " + Collections2.toString(buffers) + " variadicBufferCounts: " + + Collections2.toString(variadicBufferCounts)); } } @@ -104,10 +106,14 @@ private void loadBuffers( Iterator variadicBufferCounts) { checkArgument(nodes.hasNext(), "no more field nodes for field %s and vector %s", field, vector); ArrowFieldNode fieldNode = nodes.next(); - // variadicBufferLayoutCount will be 0 for vectors of type except BaseVariableWidthViewVector + // variadicBufferLayoutCount will be 0 for vectors of a type except BaseVariableWidthViewVector long variadicBufferLayoutCount = 0; - if (variadicBufferCounts != null) { - variadicBufferLayoutCount = variadicBufferCounts.next(); + if (vector instanceof BaseVariableWidthViewVector) { + if (variadicBufferCounts.hasNext()) { + variadicBufferLayoutCount = variadicBufferCounts.next(); + } else { + throw new IllegalStateException("No variadicBufferCounts available for BaseVariableWidthViewVector"); + } } int bufferLayoutCount = (int) (variadicBufferLayoutCount + TypeLayout.getTypeBufferCount(field.getType())); List ownBuffers = new ArrayList<>(bufferLayoutCount); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java index 8528099b6d619..6e7ab34eba9de 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java @@ -103,7 +103,10 @@ private void appendNodes(FieldVector vector, List nodes, List fieldBuffers = vector.getFieldBuffers(); long variadicBufferCount = getVariadicBufferCount(vector); int expectedBufferCount = (int) (TypeLayout.getTypeBufferCount(vector.getField().getType()) + variadicBufferCount); - variadicBufferCounts.add(variadicBufferCount); + // only update variadicBufferCounts for vectors that have variadic buffers + if (variadicBufferCount > 0) { + variadicBufferCounts.add(variadicBufferCount); + } if (fieldBuffers.size() != expectedBufferCount) { throw new IllegalArgumentException(String.format( "wrong number of buffers for field %s in vector %s. found: %s", diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java index fda14b24a4c8b..b0d316070a335 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestValueVector.java @@ -3441,4 +3441,60 @@ public void testSplitAndTransferFixedWithVector2() { } target.close(); } + + @Test + public void testVectorLoadUnloadOnNonVariadicVectors() { + + try (final IntVector vector1 = new IntVector("myvector", allocator)) { + + setVector(vector1, 1, 2, 3, 4, 5, 6); + vector1.setValueCount(15); + + /* Check the vector output */ + assertEquals(1, vector1.get(0)); + assertEquals(2, vector1.get(1)); + assertEquals(3, vector1.get(2)); + assertEquals(4, vector1.get(3)); + assertEquals(5, vector1.get(4)); + assertEquals(6, vector1.get(5)); + + Field field = vector1.getField(); + String fieldName = field.getName(); + + List fields = new ArrayList<>(); + List fieldVectors = new ArrayList<>(); + + fields.add(field); + fieldVectors.add(vector1); + + Schema schema = new Schema(fields); + + VectorSchemaRoot schemaRoot1 = new VectorSchemaRoot(schema, fieldVectors, vector1.getValueCount()); + VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1); + + try ( + ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); + BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("new vector", 0, Long.MAX_VALUE); + VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, finalVectorsAllocator); + ) { + + // validating recordBatch doesn't contain an output for variadicBufferCounts + assertTrue(recordBatch.getVariadicBufferCounts().isEmpty()); + + VectorLoader vectorLoader = new VectorLoader(schemaRoot2); + vectorLoader.load(recordBatch); + + IntVector vector2 = (IntVector) schemaRoot2.getVector(fieldName); + vector2.setValueCount(25); + + /* Check the vector output */ + assertEquals(1, vector2.get(0)); + assertEquals(2, vector2.get(1)); + assertEquals(3, vector2.get(2)); + assertEquals(4, vector2.get(3)); + assertEquals(5, vector2.get(4)); + assertEquals(6, vector2.get(5)); + } + } + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java index 1ba3bc3576fb2..817941ecb46d6 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java @@ -23,6 +23,7 @@ import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertSame; @@ -2212,6 +2213,85 @@ public void testSplitAndTransfer9() { } } + @Test + public void testVectorLoadUnloadOnMixedTypes() { + + try (final IntVector vector1 = new IntVector("myvector", allocator); + final ViewVarCharVector vector2 = new ViewVarCharVector("myviewvector", allocator)) { + + final int valueCount = 15; + + setVector(vector1, 1, 2, 3, 4, 5, 6); + vector1.setValueCount(valueCount); + + setVector(vector2, STR1, STR2, STR3, STR4, STR5, STR6); + vector1.setValueCount(valueCount); + + /* Check the vector output */ + assertEquals(1, vector1.get(0)); + assertEquals(2, vector1.get(1)); + assertEquals(3, vector1.get(2)); + assertEquals(4, vector1.get(3)); + assertEquals(5, vector1.get(4)); + assertEquals(6, vector1.get(5)); + + Field field1 = vector1.getField(); + String fieldName1 = field1.getName(); + + Field field2 = vector2.getField(); + String fieldName2 = field2.getName(); + + List fields = new ArrayList<>(2); + List fieldVectors = new ArrayList<>(2); + + fields.add(field1); + fields.add(field2); + fieldVectors.add(vector1); + fieldVectors.add(vector2); + + Schema schema = new Schema(fields); + + VectorSchemaRoot schemaRoot1 = new VectorSchemaRoot(schema, fieldVectors, valueCount); + VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1); + + try ( + ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); + BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("new vector", 0, Long.MAX_VALUE); + VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, finalVectorsAllocator); + ) { + + // validating recordBatch contains an output for variadicBufferCounts + assertFalse(recordBatch.getVariadicBufferCounts().isEmpty()); + assertEquals(1, recordBatch.getVariadicBufferCounts().size()); + + VectorLoader vectorLoader = new VectorLoader(schemaRoot2); + vectorLoader.load(recordBatch); + + IntVector vector3 = (IntVector) schemaRoot2.getVector(fieldName1); + vector3.setValueCount(25); + + /* Check the vector output */ + assertEquals(1, vector3.get(0)); + assertEquals(2, vector3.get(1)); + assertEquals(3, vector3.get(2)); + assertEquals(4, vector3.get(3)); + assertEquals(5, vector3.get(4)); + assertEquals(6, vector3.get(5)); + + ViewVarCharVector vector4 = (ViewVarCharVector) schemaRoot2.getVector(fieldName2); + vector4.setValueCount(25); + + /* Check the vector output */ + assertArrayEquals(STR1, vector4.get(0)); + assertArrayEquals(STR2, vector4.get(1)); + assertArrayEquals(STR3, vector4.get(2)); + assertArrayEquals(STR4, vector4.get(3)); + assertArrayEquals(STR5, vector4.get(4)); + assertArrayEquals(STR6, vector4.get(5)); + } + } + } + private String generateRandomString(int length) { Random random = new Random(); StringBuilder sb = new StringBuilder(length); From 524a463207fdb799b2cd784b7ef95052165882ec Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Tue, 4 Jun 2024 14:25:05 +0530 Subject: [PATCH 222/261] GH-39649: [Java][CI] Fix or suppress spurious errorprone warnings stage 2 (#39777) ### Rationale for this change This PR is a continuation of errorprone warning fixes. ### What changes are included in this PR? Fixing warnings suggested by the errorpone module. - [x] Adapter - [x] C - [x] Format - [x] Maven - [x] Memory - [x] Performance ### Are these changes tested? The existing test cases will be covering them. ### Are there any user-facing changes? No * Closes: #39649 Lead-authored-by: Vibhatha Abeykoon Co-authored-by: Vibhatha Lakmal Abeykoon Co-authored-by: vibhatha Co-authored-by: Vibhatha Lakmal Abeykoon Co-authored-by: David Li Signed-off-by: David Li --- docs/source/developers/java/development.rst | 16 +++++++++++ .../arrow/adapter/avro/AvroToArrowUtils.java | 28 +++++++++---------- .../avro/AvroToArrowVectorIterator.java | 2 ++ .../adapter/avro/consumers/Consumer.java | 1 + .../arrow/adapter/avro/AvroSkipFieldTest.java | 9 +++--- .../arrow/adapter/avro/AvroTestBase.java | 4 +-- .../adapter/avro/AvroToArrowIteratorTest.java | 4 +-- .../arrow/adapter/avro/AvroToArrowTest.java | 2 -- .../adapter/jdbc/consumer/BinaryConsumer.java | 2 +- .../adapter/jdbc/consumer/ClobConsumer.java | 8 +++--- .../adapter/jdbc/consumer/JdbcConsumer.java | 1 + .../adapter/jdbc/JdbcParameterBinderTest.java | 10 +++---- .../jdbc/JdbcToArrowCommentMetadataTest.java | 9 ------ .../adapter/jdbc/JdbcToArrowConfigTest.java | 8 +++--- .../adapter/jdbc/JdbcToArrowTestHelper.java | 7 +++-- .../arrow/adapter/jdbc/ResultSetUtility.java | 23 ++++++--------- .../adapter/jdbc/UnreliableMetaDataTest.java | 3 -- .../adapter/jdbc/h2/JdbcAliasToArrowTest.java | 4 +-- .../adapter/jdbc/h2/JdbcToArrowArrayTest.java | 3 +- .../jdbc/h2/JdbcToArrowCharSetTest.java | 3 ++ .../jdbc/h2/JdbcToArrowDataTypesTest.java | 2 ++ .../jdbc/h2/JdbcToArrowMapDataTypeTest.java | 2 ++ .../adapter/jdbc/h2/JdbcToArrowNullTest.java | 2 ++ .../h2/JdbcToArrowOptionalColumnsTest.java | 2 ++ .../adapter/jdbc/h2/JdbcToArrowTest.java | 2 ++ .../jdbc/h2/JdbcToArrowTimeZoneTest.java | 2 ++ .../main/java/org/apache/arrow/c/Format.java | 3 +- .../apache/arrow/c/jni/CDataJniException.java | 4 +-- .../org/apache/arrow/c/DictionaryTest.java | 25 +++++++++-------- .../org/apache/arrow/c/NativeUtilTest.java | 2 +- .../org/apache/arrow/c/RoundtripTest.java | 4 +-- .../memory/AllocationOutcomeDetails.java | 18 ++++++------ .../arrow/memory/AllocationReservation.java | 2 +- .../apache/arrow/memory/BaseAllocator.java | 2 ++ .../org/apache/arrow/memory/BufferLedger.java | 3 +- .../apache/arrow/memory/BufferManager.java | 2 +- .../apache/arrow/memory/CheckAllocator.java | 1 + .../apache/arrow/memory/ChildAllocator.java | 2 +- .../arrow/memory/LowCostIdentityHashMap.java | 3 +- .../rounding/DefaultRoundingPolicy.java | 4 --- .../arrow/memory/util/ArrowBufPointer.java | 3 +- .../org/apache/arrow/memory/util/Float16.java | 4 +-- .../arrow/memory/util/HistoricalLog.java | 11 ++++++-- .../apache/arrow/memory/util/MemoryUtil.java | 1 + .../arrow/memory/util/hash/MurmurHasher.java | 2 +- .../arrow/memory/util/hash/SimpleHasher.java | 1 + .../org/apache/arrow/memory/TestArrowBuf.java | 2 +- .../arrow/memory/TestBaseAllocator.java | 4 +-- .../memory/TestLowCostIdentityHashMap.java | 2 +- .../memory/util/TestArrowBufPointer.java | 20 ++++++------- .../memory/util/TestByteFunctionHelpers.java | 6 ++-- .../memory/util/hash/TestArrowBufHasher.java | 12 ++++---- .../java/io/netty/buffer/NettyArrowBuf.java | 2 +- .../buffer/UnsafeDirectLittleEndian.java | 19 ++++--------- .../buffer/TestUnsafeDirectLittleEndian.java | 3 +- .../memory/netty/NettyAllocationManager.java | 4 +-- .../arrow/memory/netty/TestEndianness.java | 16 +++++------ .../memory/netty/TestNettyAllocator.java | 1 + java/pom.xml | 4 ++- 59 files changed, 187 insertions(+), 164 deletions(-) diff --git a/docs/source/developers/java/development.rst b/docs/source/developers/java/development.rst index 3f0ff6cdd0103..9f78eccf6c525 100644 --- a/docs/source/developers/java/development.rst +++ b/docs/source/developers/java/development.rst @@ -137,3 +137,19 @@ This applies the style to all pom.xml files under the current directory or from .. _conbench: https://github.com/conbench/conbench .. _checkstyle: https://github.com/apache/arrow/blob/main/java/dev/checkstyle/checkstyle.xml .. _Apache Maven pom.xml guidelines: https://maven.apache.org/developers/conventions/code.html#pom-code-convention + + +Build Caching +============= + +Build caching is done through Develocity (formerly Maven Enterprise). To force +a build without the cache, run:: + + mvn clean install -Ddevelocity.cache.local.enabled=false -Ddevelocity.cache.remote.enabled=false + +This can be useful to make sure you see all warnings from ErrorProne, for example. + +ErrorProne +========== + +ErrorProne should be disabled for generated code. diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowUtils.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowUtils.java index 1f5ad9e768950..c949f4b1ec5b6 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowUtils.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowUtils.java @@ -27,6 +27,7 @@ import java.util.Arrays; import java.util.HashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; @@ -95,7 +96,6 @@ import org.apache.avro.LogicalType; import org.apache.avro.LogicalTypes; import org.apache.avro.Schema; -import org.apache.avro.Schema.Type; import org.apache.avro.io.Decoder; /** @@ -159,7 +159,7 @@ private static Consumer createConsumer( final BufferAllocator allocator = config.getAllocator(); - final Type type = schema.getType(); + final Schema.Type type = schema.getType(); final LogicalType logicalType = schema.getLogicalType(); final ArrowType arrowType; @@ -215,7 +215,7 @@ private static Consumer createConsumer( vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroTimeMillisConsumer((TimeMilliVector) vector); } else { - arrowType = new ArrowType.Int(32, /*signed=*/true); + arrowType = new ArrowType.Int(32, /*isSigned=*/true); fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroIntConsumer((IntVector) vector); @@ -244,7 +244,7 @@ private static Consumer createConsumer( vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroTimestampMicrosConsumer((TimeStampMicroVector) vector); } else { - arrowType = new ArrowType.Int(64, /*signed=*/true); + arrowType = new ArrowType.Int(64, /*isSigned=*/true); fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroLongConsumer((BigIntVector) vector); @@ -278,7 +278,7 @@ private static Consumer createConsumer( case NULL: arrowType = new ArrowType.Null(); fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); - vector = fieldType.createNewSingleVector(name, allocator, /*schemaCallback=*/null); + vector = fieldType.createNewSingleVector(name, allocator, /*schemaCallBack=*/null); consumer = new AvroNullConsumer((NullVector) vector); break; default: @@ -305,7 +305,7 @@ private static ArrowType createDecimalArrowType(LogicalTypes.Decimal logicalType private static Consumer createSkipConsumer(Schema schema) { SkipFunction skipFunction; - Type type = schema.getType(); + Schema.Type type = schema.getType(); switch (type) { case UNION: @@ -391,7 +391,7 @@ static CompositeAvroConsumer createCompositeConsumer( final Set skipFieldNames = config.getSkipFieldNames(); Schema.Type type = schema.getType(); - if (type == Type.RECORD) { + if (type == Schema.Type.RECORD) { for (Schema.Field field : schema.getFields()) { if (skipFieldNames.contains(field.name())) { consumers.add(createSkipConsumer(field.schema())); @@ -416,7 +416,7 @@ private static FieldVector createVector(FieldVector consumerVector, FieldType fi private static String getDefaultFieldName(ArrowType type) { Types.MinorType minorType = Types.getMinorTypeForArrowType(type); - return minorType.name().toLowerCase(); + return minorType.name().toLowerCase(Locale.ROOT); } private static Field avroSchemaToField(Schema schema, String name, AvroToArrowConfig config) { @@ -429,7 +429,7 @@ private static Field avroSchemaToField( AvroToArrowConfig config, Map externalProps) { - final Type type = schema.getType(); + final Schema.Type type = schema.getType(); final LogicalType logicalType = schema.getLogicalType(); final List children = new ArrayList<>(); final FieldType fieldType; @@ -457,7 +457,7 @@ private static Field avroSchemaToField( FieldType structFieldType = new FieldType(false, new ArrowType.Struct(), /*dictionary=*/null); Field structField = new Field("internal", structFieldType, Arrays.asList(keyField, valueField)); children.add(structField); - fieldType = createFieldType(new ArrowType.Map(/*keySorted=*/false), schema, externalProps); + fieldType = createFieldType(new ArrowType.Map(/*keysSorted=*/false), schema, externalProps); break; case RECORD: final Set skipFieldNames = config.getSkipFieldNames(); @@ -509,7 +509,7 @@ private static Field avroSchemaToField( } else if (logicalType instanceof LogicalTypes.TimeMillis) { intArrowType = new ArrowType.Time(TimeUnit.MILLISECOND, 32); } else { - intArrowType = new ArrowType.Int(32, /*signed=*/true); + intArrowType = new ArrowType.Int(32, /*isSigned=*/true); } fieldType = createFieldType(intArrowType, schema, externalProps); break; @@ -525,7 +525,7 @@ private static Field avroSchemaToField( } else if (logicalType instanceof LogicalTypes.TimestampMicros) { longArrowType = new ArrowType.Timestamp(TimeUnit.MICROSECOND, null); } else { - longArrowType = new ArrowType.Int(64, /*signed=*/true); + longArrowType = new ArrowType.Int(64, /*isSigned=*/true); } fieldType = createFieldType(longArrowType, schema, externalProps); break; @@ -668,7 +668,7 @@ private static Consumer createUnionConsumer(Schema schema, String name, AvroToAr FieldVector consumerVector) { final int size = schema.getTypes().size(); - final boolean nullable = schema.getTypes().stream().anyMatch(t -> t.getType() == Type.NULL); + final boolean nullable = schema.getTypes().stream().anyMatch(t -> t.getType() == Schema.Type.NULL); UnionVector unionVector; if (consumerVector == null) { @@ -709,7 +709,7 @@ static VectorSchemaRoot avroToArrowVectors( final Set skipFieldNames = config.getSkipFieldNames(); Schema.Type type = schema.getType(); - if (type == Type.RECORD) { + if (type == Schema.Type.RECORD) { for (Schema.Field field : schema.getFields()) { if (skipFieldNames.contains(field.name())) { consumers.add(createSkipConsumer(field.schema())); diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java index 4a439ade81181..9a0cfd97a49a1 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java @@ -162,6 +162,7 @@ public boolean hasNext() { /** * Gets the next vector. The user is responsible for freeing its resources. */ + @Override public VectorSchemaRoot next() { Preconditions.checkArgument(hasNext()); VectorSchemaRoot returned = nextBatch; @@ -177,6 +178,7 @@ public VectorSchemaRoot next() { /** * Clean up resources. */ + @Override public void close() { if (nextBatch != null) { nextBatch.close(); diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/Consumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/Consumer.java index c2ae1ce77b282..8eaaf74cff68a 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/Consumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/Consumer.java @@ -53,6 +53,7 @@ public interface Consumer extends AutoCloseable { /** * Close this consumer when occurs exception to avoid potential leak. */ + @Override void close() throws Exception; /** diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroSkipFieldTest.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroSkipFieldTest.java index a37eca6514e04..54fa26afe3fa8 100644 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroSkipFieldTest.java +++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroSkipFieldTest.java @@ -20,6 +20,7 @@ import static org.junit.Assert.assertEquals; import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; @@ -223,7 +224,7 @@ public void testSkipStringField() throws Exception { ArrayList expectedData = new ArrayList<>(); for (int i = 0; i < 5; i++) { - final byte[] testBytes = ("test" + i).getBytes(); + final byte[] testBytes = ("test" + i).getBytes(StandardCharsets.UTF_8); GenericRecord record = new GenericData.Record(schema); GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema()); fixed.bytes(testBytes); @@ -257,7 +258,7 @@ public void testSkipBytesField() throws Exception { ArrayList expectedData = new ArrayList<>(); for (int i = 0; i < 5; i++) { - final byte[] testBytes = ("test" + i).getBytes(); + final byte[] testBytes = ("test" + i).getBytes(StandardCharsets.UTF_8); GenericRecord record = new GenericData.Record(schema); GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema()); fixed.bytes(testBytes); @@ -291,7 +292,7 @@ public void testSkipFixedField() throws Exception { ArrayList expectedData = new ArrayList<>(); for (int i = 0; i < 5; i++) { - final byte[] testBytes = ("test" + i).getBytes(); + final byte[] testBytes = ("test" + i).getBytes(StandardCharsets.UTF_8); GenericRecord record = new GenericData.Record(schema); GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema()); fixed.bytes(testBytes); @@ -325,7 +326,7 @@ public void testSkipEnumField() throws Exception { ArrayList expectedData = new ArrayList<>(); for (int i = 0; i < 5; i++) { - final byte[] testBytes = ("test" + i).getBytes(); + final byte[] testBytes = ("test" + i).getBytes(StandardCharsets.UTF_8); GenericRecord record = new GenericData.Record(schema); GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema()); fixed.bytes(testBytes); diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroTestBase.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroTestBase.java index 60a3a285db3aa..a91bba7b84fb4 100644 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroTestBase.java +++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroTestBase.java @@ -145,7 +145,7 @@ protected void checkPrimitiveResult(List data, FieldVector vector) { } } - protected void checkRecordResult(Schema schema, ArrayList data, VectorSchemaRoot root) { + protected void checkRecordResult(Schema schema, List data, VectorSchemaRoot root) { assertEquals(data.size(), root.getRowCount()); assertEquals(schema.getFields().size(), root.getFieldVectors().size()); @@ -194,7 +194,7 @@ protected void checkArrayResult(List> expected, List vectors } } - protected void checkRecordResult(Schema schema, ArrayList data, List roots) { + protected void checkRecordResult(Schema schema, List data, List roots) { roots.forEach(root -> { assertEquals(schema.getFields().size(), root.getFieldVectors().size()); }); diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java index 02f7a3733734c..7f2edb08fdabc 100644 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java +++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java @@ -181,13 +181,13 @@ public void runLargeNumberOfRows() throws Exception { } } - assertEquals(x, targetRows); + assertEquals(targetRows, x); } /** * Fake avro decoder to test large data. */ - private class FakeDecoder extends Decoder { + private static class FakeDecoder extends Decoder { private int numRows; diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowTest.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowTest.java index 1c64204191762..26f72173b6b7e 100644 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowTest.java +++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowTest.java @@ -87,10 +87,8 @@ public void testFixedAttributes() throws Exception { Schema schema = getSchema("attrs/test_fixed_attr.avsc"); List data = new ArrayList<>(); - List expected = new ArrayList<>(); for (int i = 0; i < 5; i++) { byte[] value = ("value" + i).getBytes(StandardCharsets.UTF_8); - expected.add(value); GenericData.Fixed fixed = new GenericData.Fixed(schema); fixed.bytes(value); data.add(fixed); diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumer.java index 8c5f61169d405..538d161f9e9c7 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumer.java @@ -74,7 +74,7 @@ public void consume(InputStream is) throws IOException { vector.getDataBuffer().setBytes(startOffset + dataLength, reuseBytes, 0, read); dataLength += read; } - offsetBuffer.setInt((currentIndex + 1) * VarBinaryVector.OFFSET_WIDTH, startOffset + dataLength); + offsetBuffer.setInt((currentIndex + 1) * ((long) VarBinaryVector.OFFSET_WIDTH), startOffset + dataLength); BitVectorHelper.setBit(vector.getValidityBuffer(), currentIndex); vector.setLastSet(currentIndex); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java index a52d9b73b4db0..3ed0c2d3cbb2f 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java @@ -88,7 +88,7 @@ public void consume(ResultSet resultSet) throws SQLException { ArrowBuf dataBuffer = vector.getDataBuffer(); ArrowBuf offsetBuffer = vector.getOffsetBuffer(); - int startIndex = offsetBuffer.getInt(currentIndex * 4); + int startIndex = offsetBuffer.getInt(currentIndex * 4L); while (read <= length) { String str = clob.getSubString(read, readSize); byte[] bytes = str.getBytes(StandardCharsets.UTF_8); @@ -106,7 +106,7 @@ public void consume(ResultSet resultSet) throws SQLException { totalBytes += bytes.length; read += readSize; } - offsetBuffer.setInt((currentIndex + 1) * 4, startIndex + totalBytes); + offsetBuffer.setInt((currentIndex + 1) * 4L, startIndex + totalBytes); BitVectorHelper.setBit(vector.getValidityBuffer(), currentIndex); vector.setLastSet(currentIndex); } @@ -139,7 +139,7 @@ public void consume(ResultSet resultSet) throws SQLException { ArrowBuf dataBuffer = vector.getDataBuffer(); ArrowBuf offsetBuffer = vector.getOffsetBuffer(); - int startIndex = offsetBuffer.getInt(currentIndex * 4); + int startIndex = offsetBuffer.getInt(currentIndex * 4L); while (read <= length) { String str = clob.getSubString(read, readSize); byte[] bytes = str.getBytes(StandardCharsets.UTF_8); @@ -157,7 +157,7 @@ public void consume(ResultSet resultSet) throws SQLException { totalBytes += bytes.length; read += readSize; } - offsetBuffer.setInt((currentIndex + 1) * 4, startIndex + totalBytes); + offsetBuffer.setInt((currentIndex + 1) * 4L, startIndex + totalBytes); BitVectorHelper.setBit(vector.getValidityBuffer(), currentIndex); vector.setLastSet(currentIndex); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/JdbcConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/JdbcConsumer.java index 480dfe3a1c57f..7c867c7ad64d3 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/JdbcConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/JdbcConsumer.java @@ -37,6 +37,7 @@ public interface JdbcConsumer extends AutoCloseable { /** * Close this consumer, do some clean work such as clear reuse ArrowBuf. */ + @Override void close() throws Exception; /** diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinderTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinderTest.java index 15b9ab0386159..a94f0aa454f1d 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinderTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinderTest.java @@ -109,8 +109,8 @@ void bindOrder() throws SQLException { final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { final JdbcParameterBinder binder = JdbcParameterBinder.builder(statement, root) - .bind(/*paramIndex=*/ 1, /*colIndex=*/ 2) - .bind(/*paramIndex=*/ 2, /*colIndex=*/ 0) + .bind(/*parameterIndex=*/ 1, /*columnIndex=*/ 2) + .bind(/*parameterIndex=*/ 2, /*columnIndex=*/ 0) .build(); assertThat(binder.next()).isFalse(); @@ -169,7 +169,7 @@ void customBinder() throws SQLException { final JdbcParameterBinder binder = JdbcParameterBinder.builder(statement, root) .bind( - /*paramIndex=*/ 1, + /*parameterIndex=*/ 1, new ColumnBinder() { private final IntVector vector = (IntVector) root.getVector(0); @Override @@ -275,11 +275,11 @@ void time32() throws SQLException { @Test void time64() throws SQLException { testSimpleType(new ArrowType.Time(TimeUnit.MICROSECOND, 64), Types.TIME, - (valueVectors, index, value) -> valueVectors.setSafe(index, (int) (value.getTime() * 1_000)), + (valueVectors, index, value) -> valueVectors.setSafe(index, (value.getTime() * 1_000)), TimeMicroVector::setNull, Arrays.asList(new Time(-128_000), new Time(104_000), new Time(-42_000))); testSimpleType(new ArrowType.Time(TimeUnit.NANOSECOND, 64), Types.TIME, - (valueVectors, index, value) -> valueVectors.setSafe(index, (int) (value.getTime() * 1_000_000)), + (valueVectors, index, value) -> valueVectors.setSafe(index, (value.getTime() * 1_000_000)), TimeNanoVector::setNull, Arrays.asList(new Time(-128), new Time(104), new Time(-42))); } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowCommentMetadataTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowCommentMetadataTest.java index 07cab0d829fed..4ee65944c3a14 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowCommentMetadataTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowCommentMetadataTest.java @@ -19,9 +19,6 @@ import static org.assertj.core.api.Assertions.assertThat; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Paths; import java.sql.Connection; import java.sql.DatabaseMetaData; import java.sql.DriverManager; @@ -34,7 +31,6 @@ import java.util.HashMap; import java.util.HashSet; import java.util.Map; -import java.util.Objects; import java.util.Set; import org.apache.arrow.memory.RootAllocator; @@ -228,9 +224,4 @@ private String getColumnComment(DatabaseMetaData metaData, String tableName, Str } return null; } - - private String getExpectedSchema(String expectedResource) throws java.io.IOException, java.net.URISyntaxException { - return new String(Files.readAllBytes(Paths.get(Objects.requireNonNull( - JdbcToArrowCommentMetadataTest.class.getResource(expectedResource)).toURI())), StandardCharsets.UTF_8); - } } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java index 68a681b052cd3..d4fb7c32997a7 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java @@ -89,8 +89,8 @@ public void testConfig() { JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(allocator, calendar); JdbcToArrowConfig config = builder.build(); - assertTrue(allocator == config.getAllocator()); - assertTrue(calendar == config.getCalendar()); + assertEquals(allocator, config.getAllocator()); + assertEquals(calendar, config.getCalendar()); Calendar newCalendar = Calendar.getInstance(); BufferAllocator newAllocator = new RootAllocator(Integer.SIZE); @@ -98,8 +98,8 @@ public void testConfig() { builder.setAllocator(newAllocator).setCalendar(newCalendar); config = builder.build(); - assertTrue(newAllocator == config.getAllocator()); - assertTrue(newCalendar == config.getCalendar()); + assertEquals(newAllocator, config.getAllocator()); + assertEquals(newCalendar, config.getCalendar()); } @Test diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java index 91f2f465dd989..7dd881b3f7cec 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java @@ -24,6 +24,7 @@ import java.math.BigDecimal; import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.util.AbstractMap; @@ -399,7 +400,7 @@ public static byte[][] getCharArray(String[] values, String dataType) { byte[][] valueArr = new byte[dataArr.length][]; int i = 0; for (String data : dataArr) { - valueArr[i++] = "null".equals(data.trim()) ? null : data.trim().getBytes(); + valueArr[i++] = "null".equals(data.trim()) ? null : data.trim().getBytes(StandardCharsets.UTF_8); } return valueArr; } @@ -419,11 +420,12 @@ public static byte[][] getBinaryValues(String[] values, String dataType) { byte[][] valueArr = new byte[dataArr.length][]; int i = 0; for (String data : dataArr) { - valueArr[i++] = "null".equals(data.trim()) ? null : data.trim().getBytes(); + valueArr[i++] = "null".equals(data.trim()) ? null : data.trim().getBytes(StandardCharsets.UTF_8); } return valueArr; } + @SuppressWarnings("StringSplitter") public static String[] getValues(String[] values, String dataType) { String value = ""; for (String val : values) { @@ -440,6 +442,7 @@ public static Integer[][] getListValues(String[] values, String dataType) { return getListValues(dataArr); } + @SuppressWarnings("StringSplitter") public static Integer[][] getListValues(String[] dataArr) { Integer[][] valueArr = new Integer[dataArr.length][]; int i = 0; diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java index ccc7681c5bc8b..b05a59a9a04d8 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java @@ -66,17 +66,17 @@ public static MockResultSet generateBasicResultSet(int rows) throws SQLException } public static class MockResultSet extends ThrowingResultSet { - private final ArrayList rows; + private final List rows; private int index = 0; private boolean isClosed = false; private ResultSetMetaData metadata; private boolean wasNull; - public MockResultSet(ArrayList rows) throws SQLException { + public MockResultSet(List rows) throws SQLException { this(rows, MockResultSetMetaData.fromRows(rows)); } - public MockResultSet(ArrayList rows, ResultSetMetaData metadata) { + public MockResultSet(List rows, ResultSetMetaData metadata) { this.rows = rows; this.metadata = metadata; this.wasNull = false; @@ -252,8 +252,8 @@ public Builder addDataElement(Object val, int sqlType) { return this.addDataElement(new MockDataElement(val, sqlType)); } - public Builder setMetaData(ResultSetMetaData metaData) { - this.metadata = metaData; + public Builder setMetaData(ResultSetMetaData metadata) { + this.metadata = metadata; return this; } @@ -318,7 +318,7 @@ public String getColumnTypeName(int column) throws SQLException { return columns.get(column - 1).getTypeName(); } - public static MockResultSetMetaData fromRows(ArrayList rows) throws SQLException { + public static MockResultSetMetaData fromRows(List rows) throws SQLException { // Note: This attempts to dynamically construct ResultSetMetaData from the first row in a given result set. // If there are now rows, or the result set contains no columns, this cannot be dynamically generated and // an exception will be thrown. @@ -338,7 +338,6 @@ public static MockResultSetMetaData fromRows(ArrayList rows) throws SQL } public static class MockColumnMetaData { - private int index; private int sqlType; private int precision; private int scale; @@ -385,7 +384,6 @@ private int getDisplaySize() { public static MockColumnMetaData fromDataElement(MockDataElement element, int i) throws SQLException { return MockColumnMetaData.builder() - .index(i) .sqlType(element.getSqlType()) .precision(element.getPrecision()) .scale(element.getScale()) @@ -403,11 +401,6 @@ public static Builder builder() { public static class Builder { private MockColumnMetaData columnMetaData = new MockColumnMetaData(); - public Builder index(int index) { - this.columnMetaData.index = index; - return this; - } - public Builder label(String label) { this.columnMetaData.label = label; return this; @@ -453,9 +446,9 @@ public MockColumnMetaData build() { } public static class MockRow { - private final ArrayList dataElements; + private final List dataElements; - public MockRow(ArrayList elements) { + public MockRow(List elements) { this.dataElements = elements; } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/UnreliableMetaDataTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/UnreliableMetaDataTest.java index 3eb886faabc10..053604073fd66 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/UnreliableMetaDataTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/UnreliableMetaDataTest.java @@ -171,7 +171,6 @@ public void testIncorrectNullability() throws Exception { // ARROW-17005: ResultSetMetaData may indicate a field is non-nullable even when there are nulls ResultSetUtility.MockResultSetMetaData.MockColumnMetaData columnMetaData = ResultSetUtility.MockResultSetMetaData.MockColumnMetaData.builder() - .index(1) .sqlType(Types.INTEGER) .nullable(ResultSetMetaData.columnNoNulls) .build(); @@ -257,7 +256,6 @@ public void testIncorrectNullability() throws Exception { private ResultSet buildIncorrectPrecisionAndScaleMetaDataResultSet() throws SQLException { ResultSetUtility.MockResultSetMetaData.MockColumnMetaData columnMetaData = ResultSetUtility.MockResultSetMetaData.MockColumnMetaData.builder() - .index(1) .sqlType(Types.DECIMAL) .precision(0) .scale(0) @@ -277,7 +275,6 @@ private ResultSet buildIncorrectPrecisionAndScaleMetaDataResultSet() throws SQLE private ResultSet buildVaryingPrecisionAndScaleResultSet() throws SQLException { ResultSetUtility.MockResultSetMetaData.MockColumnMetaData columnMetaData = ResultSetUtility.MockResultSetMetaData.MockColumnMetaData.builder() - .index(1) .sqlType(Types.DECIMAL) .precision(0) .scale(0) diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java index d9acfe88f4f8b..d32c2bbab91a8 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java @@ -111,8 +111,8 @@ public void testJdbcAliasToArrow() throws Exception { assertEquals(rowCount, vector.getRowCount()); Schema vectorSchema = vector.getSchema(); List vectorFields = vectorSchema.getFields(); - assertEquals(vectorFields.get(0).getName(), COLUMN_A); - assertEquals(vectorFields.get(1).getName(), COLUMN_B); + assertEquals(COLUMN_A, vectorFields.get(0).getName()); + assertEquals(COLUMN_B, vectorFields.get(1).getName()); } } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java index 377e332b43a13..eabbdc5a25e5d 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java @@ -22,6 +22,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; +import java.nio.charset.StandardCharsets; import java.sql.Array; import java.sql.Connection; import java.sql.DriverManager; @@ -284,7 +285,7 @@ private void assertStringVectorEquals(ListVector listVector, int rowCount, Strin assertEquals(1, listVector.isSet(row)); assertEquals(expectedValues[row].length, offset - prevOffset); for (int i = prevOffset; i < offset; ++i) { - assertArrayEquals(expectedValues[row][i - prevOffset].getBytes(), vector.get(i)); + assertArrayEquals(expectedValues[row][i - prevOffset].getBytes(StandardCharsets.UTF_8), vector.get(i)); } prevOffset = offset; diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java index 422b55070aaf9..ab1b4b7fc2fea 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java @@ -76,6 +76,7 @@ public JdbcToArrowCharSetTest(Table table) { * @throws ClassNotFoundException on error */ @Before + @Override public void setUp() throws SQLException, ClassNotFoundException { String url = "jdbc:h2:mem:JdbcToArrowTest?characterEncoding=UTF-8"; String driver = "org.h2.Driver"; @@ -107,6 +108,7 @@ public static Collection getTestData() throws SQLException, ClassNotFo * the multi-byte CJK characters. */ @Test + @Override public void testJdbcToArrowValues() throws SQLException, IOException { testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), false); @@ -142,6 +144,7 @@ public void testJdbcSchemaMetadata() throws SQLException { * @param isIncludeMapVector is this dataset checks includes map column. * Jdbc type to 'map' mapping declared in configuration only manually */ + @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java index ae4fffd0f94f0..54e7d5ffb27ed 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java @@ -145,6 +145,7 @@ public static Collection getTestData() throws SQLException, ClassNotFo * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes. */ @Test + @Override public void testJdbcToArrowValues() throws SQLException, IOException { testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), false); @@ -185,6 +186,7 @@ public void testJdbcSchemaMetadata() throws SQLException { * @param isIncludeMapVector is this dataset checks includes map column. * Jdbc type to 'map' mapping declared in configuration only manually */ + @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java index 43862a93c39c9..a5d1ffa3f64de 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java @@ -45,6 +45,7 @@ public JdbcToArrowMapDataTypeTest() throws IOException { * Test Method to test JdbcToArrow Functionality for Map form Types.OTHER column */ @Test + @Override public void testJdbcToArrowValues() throws SQLException, IOException { Calendar calendar = Calendar.getInstance(); ResultSetMetaData rsmd = getQueryMetaData(table.getQuery()); @@ -68,6 +69,7 @@ public void testJdbcToArrowValues() throws SQLException, IOException { * @param isIncludeMapVector is this dataset checks includes map column. * Jdbc type to 'map' mapping declared in configuration only manually */ + @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { assertMapVectorValues((MapVector) root.getVector(MAP), table.getRowCount(), getMapValues(table.getValues(), MAP)); diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java index 5731f27c5b345..31d32bd648906 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java @@ -124,6 +124,7 @@ public static Collection getTestData() throws SQLException, ClassNotFo * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with null values. */ @Test + @Override public void testJdbcToArrowValues() throws SQLException, IOException { testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), false); @@ -168,6 +169,7 @@ public void testJdbcSchemaMetadata() throws SQLException { * @param isIncludeMapVector is this dataset checks includes map column. * Jdbc type to 'map' mapping declared in configuration only manually */ + @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java index eebcbe64c0e0c..4d0bbfc7a993c 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java @@ -70,6 +70,7 @@ public static Collection getTestData() throws SQLException, ClassNotFo * Test Method to test JdbcToArrow Functionality for dealing with nullable and non-nullable columns. */ @Test + @Override public void testJdbcToArrowValues() throws SQLException, IOException { testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)), false); } @@ -82,6 +83,7 @@ public void testJdbcToArrowValues() throws SQLException, IOException { * @param isIncludeMapVector is this dataset checks includes map column. * Jdbc type to 'map' mapping declared in configuration only manually */ + @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java index 7641fa7f1659c..a925dd7ee32a8 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java @@ -101,6 +101,7 @@ public static Collection getTestData() throws SQLException, ClassNotFo * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with only one test data file. */ @Test + @Override public void testJdbcToArrowValues() throws SQLException, IOException { testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), false); @@ -149,6 +150,7 @@ public void testJdbcSchemaMetadata() throws SQLException { * @param isIncludeMapVector is this dataset checks includes map column. * Jdbc type to 'map' mapping declared in configuration only manually */ + @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); assertBigIntVectorValues((BigIntVector) root.getVector(BIGINT), table.getRowCount(), diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java index 462a75da5143a..fe08db161c8ac 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java @@ -103,6 +103,7 @@ public static Collection getTestData() throws SQLException, ClassNotFo * Time and Timestamp datatype. */ @Test + @Override public void testJdbcToArrowValues() throws SQLException, IOException { testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))), false); @@ -139,6 +140,7 @@ public void testJdbcSchemaMetadata() throws SQLException { * @param isIncludeMapVector is this dataset checks includes map column. * Jdbc type to 'map' mapping declared in configuration only manually */ + @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); diff --git a/java/c/src/main/java/org/apache/arrow/c/Format.java b/java/c/src/main/java/org/apache/arrow/c/Format.java index 2875e46f749c4..a5f44859e8327 100644 --- a/java/c/src/main/java/org/apache/arrow/c/Format.java +++ b/java/c/src/main/java/org/apache/arrow/c/Format.java @@ -18,6 +18,7 @@ package org.apache.arrow.c; import java.util.Arrays; +import java.util.Locale; import java.util.stream.Collectors; import org.apache.arrow.util.Preconditions; @@ -127,7 +128,7 @@ static String asString(ArrowType arrowType) { String.format("Int type with bitwidth %d is unsupported", type.getBitWidth())); } if (type.getIsSigned()) { - format = format.toLowerCase(); + format = format.toLowerCase(Locale.ROOT); } return format; } diff --git a/java/c/src/main/java/org/apache/arrow/c/jni/CDataJniException.java b/java/c/src/main/java/org/apache/arrow/c/jni/CDataJniException.java index bebd434f3db3e..df16839c78a16 100644 --- a/java/c/src/main/java/org/apache/arrow/c/jni/CDataJniException.java +++ b/java/c/src/main/java/org/apache/arrow/c/jni/CDataJniException.java @@ -36,10 +36,10 @@ public int getErrno() { } @Override - public String toString() { + public String getMessage() { return "CDataJniException{" + "errno=" + errno + - ", message=" + getMessage() + + ", message=" + super.getMessage() + '}'; } } diff --git a/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java b/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java index aa1264e4842eb..48fe2d87f8f3d 100644 --- a/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java +++ b/java/c/src/test/java/org/apache/arrow/c/DictionaryTest.java @@ -25,6 +25,7 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.channels.Channels; +import java.nio.charset.StandardCharsets; import java.util.Collections; import org.apache.arrow.c.ArrowArray; @@ -104,9 +105,9 @@ public void testWithDictionary() throws Exception { // create dictionary and provider final VarCharVector dictVector = new VarCharVector("dict", allocator); dictVector.allocateNewSafe(); - dictVector.setSafe(0, "aa".getBytes()); - dictVector.setSafe(1, "bb".getBytes()); - dictVector.setSafe(2, "cc".getBytes()); + dictVector.setSafe(0, "aa".getBytes(StandardCharsets.UTF_8)); + dictVector.setSafe(1, "bb".getBytes(StandardCharsets.UTF_8)); + dictVector.setSafe(2, "cc".getBytes(StandardCharsets.UTF_8)); dictVector.setValueCount(3); Dictionary dictionary = new Dictionary(dictVector, new DictionaryEncoding(0L, false, /* indexType= */null)); @@ -115,10 +116,10 @@ public void testWithDictionary() throws Exception { // create vector and encode it final VarCharVector vector = new VarCharVector("vector", allocator); vector.allocateNewSafe(); - vector.setSafe(0, "bb".getBytes()); - vector.setSafe(1, "bb".getBytes()); - vector.setSafe(2, "cc".getBytes()); - vector.setSafe(3, "aa".getBytes()); + vector.setSafe(0, "bb".getBytes(StandardCharsets.UTF_8)); + vector.setSafe(1, "bb".getBytes(StandardCharsets.UTF_8)); + vector.setSafe(2, "cc".getBytes(StandardCharsets.UTF_8)); + vector.setSafe(3, "aa".getBytes(StandardCharsets.UTF_8)); vector.setValueCount(4); // get the encoded vector @@ -172,11 +173,11 @@ private ArrowStreamReader createMultiBatchReader() throws IOException { // create dictionary and provider DictionaryProvider.MapDictionaryProvider provider = new DictionaryProvider.MapDictionaryProvider(); dictVector.allocateNewSafe(); - dictVector.setSafe(0, "aa".getBytes()); - dictVector.setSafe(1, "bb".getBytes()); - dictVector.setSafe(2, "cc".getBytes()); - dictVector.setSafe(3, "dd".getBytes()); - dictVector.setSafe(4, "ee".getBytes()); + dictVector.setSafe(0, "aa".getBytes(StandardCharsets.UTF_8)); + dictVector.setSafe(1, "bb".getBytes(StandardCharsets.UTF_8)); + dictVector.setSafe(2, "cc".getBytes(StandardCharsets.UTF_8)); + dictVector.setSafe(3, "dd".getBytes(StandardCharsets.UTF_8)); + dictVector.setSafe(4, "ee".getBytes(StandardCharsets.UTF_8)); dictVector.setValueCount(5); Dictionary dictionary = new Dictionary(dictVector, new DictionaryEncoding(0L, false, /* indexType= */null)); provider.put(dictionary); diff --git a/java/c/src/test/java/org/apache/arrow/c/NativeUtilTest.java b/java/c/src/test/java/org/apache/arrow/c/NativeUtilTest.java index f46a0128c8644..9a322b7637922 100644 --- a/java/c/src/test/java/org/apache/arrow/c/NativeUtilTest.java +++ b/java/c/src/test/java/org/apache/arrow/c/NativeUtilTest.java @@ -64,7 +64,7 @@ public void testString() { @Test public void testToJavaArray() { long[] nativeArray = new long[] { 1, 2, 3 }; - try (ArrowBuf buffer = allocator.buffer(Long.BYTES * nativeArray.length, null)) { + try (ArrowBuf buffer = allocator.buffer(Long.BYTES * ((long) nativeArray.length), null)) { for (long value : nativeArray) { buffer.writeLong(value); } diff --git a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java index 768394ef7ab60..8dcd2ff9a2368 100644 --- a/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java +++ b/java/c/src/test/java/org/apache/arrow/c/RoundtripTest.java @@ -511,7 +511,7 @@ public void testNullVector() { @Test public void testVarBinaryVector() { try (final VarBinaryVector vector = new VarBinaryVector("v", allocator)) { - setVector(vector, "abc".getBytes(), "def".getBytes(), null); + setVector(vector, "abc".getBytes(StandardCharsets.UTF_8), "def".getBytes(StandardCharsets.UTF_8), null); assertTrue(roundtrip(vector, VarBinaryVector.class)); } } @@ -537,7 +537,7 @@ public void testLargeVarBinaryVector() { String str = "hello world"; try (ArrowBuf buf = allocator.buffer(16)) { - buf.setBytes(0, str.getBytes()); + buf.setBytes(0, str.getBytes(StandardCharsets.UTF_8)); binHolder.start = 0; binHolder.end = str.length(); binHolder.buffer = buf; diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcomeDetails.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcomeDetails.java index 3ceda71cce0fe..fd534b189987c 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcomeDetails.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationOutcomeDetails.java @@ -119,16 +119,14 @@ public boolean isAllocationFailed() { @Override public String toString() { - return new StringBuilder() - .append("allocator[" + accountant.getName() + "]") - .append(" reservation: " + accountant.getInitReservation()) - .append(" limit: " + limit) - .append(" used: " + used) - .append(" requestedSize: " + requestedSize) - .append(" allocatedSize: " + allocatedSize) - .append(" localAllocationStatus: " + (allocationFailed ? "fail" : "success")) - .append("\n") - .toString(); + return "allocator[" + accountant.getName() + "]" + + " reservation: " + accountant.getInitReservation() + + " limit: " + limit + + " used: " + used + + " requestedSize: " + requestedSize + + " allocatedSize: " + allocatedSize + + " localAllocationStatus: " + (allocationFailed ? "fail" : "success") + + "\n"; } } diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java index c672dc48d79ca..cc6cbf7e6f2c1 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/AllocationReservation.java @@ -84,5 +84,5 @@ public interface AllocationReservation extends AutoCloseable { */ boolean isClosed(); - void close(); + @Override void close(); } diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java index 189c800ba0fe5..89b8ffd322a9b 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BaseAllocator.java @@ -532,6 +532,8 @@ public String toVerboseString() { return sb.toString(); } + /* Remove @SuppressWarnings after fixing https://github.com/apache/arrow/issues/41951 */ + @SuppressWarnings("FormatStringAnnotation") private void hist(String noteFormat, Object... args) { if (historicalLog != null) { historicalLog.recordEvent(noteFormat, args); diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java index 62d268a1f4493..c610066c982bd 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferLedger.java @@ -58,6 +58,7 @@ boolean isOwningLedger() { return this == allocationManager.getOwningLedger(); } + @Override public BufferAllocator getKey() { return allocator; } @@ -419,7 +420,7 @@ public TransferResult transferOwnership(final ArrowBuf srcBuffer, final BufferAl /** * The outcome of a Transfer. */ - public class TransferResult implements OwnershipTransferResult { + public static class TransferResult implements OwnershipTransferResult { // Whether this transfer fit within the target allocator's capacity. final boolean allocationFit; diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferManager.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferManager.java index 6b622e7192789..e7877d7a3e287 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferManager.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/BufferManager.java @@ -49,5 +49,5 @@ public interface BufferManager extends AutoCloseable { */ ArrowBuf getManagedBuffer(long size); - void close(); + @Override void close(); } diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/CheckAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/CheckAllocator.java index dac4a3fcff59a..bfbf1f212e69a 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/CheckAllocator.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/CheckAllocator.java @@ -60,6 +60,7 @@ static String check() { } } + @SuppressWarnings("URLEqualsHashCode") private static Set scanClasspath() { // LinkedHashSet appropriate here because it preserves insertion order // during iteration diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java index 67156f89d13aa..f3132cb46a21c 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/ChildAllocator.java @@ -21,7 +21,7 @@ /** * Child allocator class. Only slightly different from the {@see RootAllocator}, * in that these can't be created directly, but must be obtained from - * {@see BufferAllocator#newChildAllocator(AllocatorOwner, long, long, int)}. + * {@link BufferAllocator#newChildAllocator(String, AllocationListener, long, long)}. * *

Child allocators can only be created by the root, or other children, so * this class is package private.

diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/LowCostIdentityHashMap.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/LowCostIdentityHashMap.java index 740233ef411ff..251028aff0e3c 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/LowCostIdentityHashMap.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/LowCostIdentityHashMap.java @@ -255,7 +255,7 @@ void rehash() { } private void computeMaxSize() { - threshold = (int) ((long) (elementData.length) * LOAD_FACTOR / 10000); + threshold = (int) ((long) elementData.length * LOAD_FACTOR / 10000); } /** @@ -309,7 +309,6 @@ private void computeMaxSize() { elementData[index] = null; } } - return (V) result; } diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java index 7ba231b0c2095..5939e803fdcd6 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/rounding/DefaultRoundingPolicy.java @@ -44,20 +44,16 @@ public class DefaultRoundingPolicy implements RoundingPolicy { static { int defaultPageSize = Integer.getInteger("org.apache.memory.allocator.pageSize", 8192); - Throwable pageSizeFallbackCause = null; try { validateAndCalculatePageShifts(defaultPageSize); } catch (Throwable t) { - pageSizeFallbackCause = t; defaultPageSize = 8192; } int defaultMaxOrder = Integer.getInteger("org.apache.memory.allocator.maxOrder", 11); - Throwable maxOrderFallbackCause = null; try { validateAndCalculateChunkSize(defaultPageSize, defaultMaxOrder); } catch (Throwable t) { - maxOrderFallbackCause = t; defaultMaxOrder = 11; } DEFAULT_CHUNK_SIZE = validateAndCalculateChunkSize(defaultPageSize, defaultMaxOrder); diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ArrowBufPointer.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ArrowBufPointer.java index b41576847d6b7..5775dd794348b 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ArrowBufPointer.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/ArrowBufPointer.java @@ -27,7 +27,7 @@ * Pointer to a memory region within an {@link ArrowBuf}. * It will be used as the basis for calculating hash code within a vector, and equality determination. */ -public final class ArrowBufPointer { +public final class ArrowBufPointer implements Comparable { /** * The hash code when the arrow buffer is null. @@ -174,6 +174,7 @@ public int hashCode() { * a positive integer if this pointer is larger; * a negative integer if this pointer is smaller. */ + @Override public int compareTo(ArrowBufPointer that) { if (this.buf == null || that.buf == null) { if (this.buf == null && that.buf == null) { diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java index 8040158fd090e..5b80816d48ff7 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/Float16.java @@ -161,7 +161,7 @@ public static float toFloat(short b) { int bits = b & 0xffff; int s = bits & SIGN_MASK; int e = (bits >>> EXPONENT_SHIFT) & SHIFTED_EXPONENT_MASK; - int m = (bits) & SIGNIFICAND_MASK; + int m = bits & SIGNIFICAND_MASK; int outE = 0; int outM = 0; if (e == 0) { // Denormal or 0 @@ -209,7 +209,7 @@ public static short toFloat16(float f) { int bits = Float.floatToRawIntBits(f); int s = (bits >>> FP32_SIGN_SHIFT); int e = (bits >>> FP32_EXPONENT_SHIFT) & FP32_SHIFTED_EXPONENT_MASK; - int m = (bits) & FP32_SIGNIFICAND_MASK; + int m = bits & FP32_SIGNIFICAND_MASK; int outE = 0; int outM = 0; if (e == 0xff) { // Infinite or NaN diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java index 21f063c939ec8..910cc1c21d72d 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/HistoricalLog.java @@ -17,8 +17,9 @@ package org.apache.arrow.memory.util; +import java.util.ArrayDeque; import java.util.Arrays; -import java.util.LinkedList; +import java.util.Deque; import org.checkerframework.checker.nullness.qual.Nullable; import org.slf4j.Logger; @@ -30,7 +31,7 @@ */ public class HistoricalLog { - private final LinkedList history = new LinkedList<>(); + private final Deque history = new ArrayDeque<>(); private final String idString; // the formatted id string private final int limit; // the limit on the number of events kept private @Nullable Event firstEvent; // the first stack trace recorded @@ -44,6 +45,8 @@ public class HistoricalLog { * associated with the object instance is best. * @param args for the format string, or nothing if none are required */ + @SuppressWarnings("FormatStringAnnotation") + /* Remove @SuppressWarnings after fixing https://github.com/apache/arrow/issues/41951 */ public HistoricalLog(final String idStringFormat, Object... args) { this(Integer.MAX_VALUE, idStringFormat, args); } @@ -66,7 +69,9 @@ public HistoricalLog(final String idStringFormat, Object... args) { * associated with the object instance is best. * @param args for the format string, or nothing if none are required */ + @SuppressWarnings("AnnotateFormatMethod") public HistoricalLog(final int limit, final String idStringFormat, Object... args) { + // Remove @SuppressWarnings after fixing https://github.com/apache/arrow/issues/41951 this.limit = limit; this.idString = String.format(idStringFormat, args); this.firstEvent = null; @@ -80,7 +85,9 @@ public HistoricalLog(final int limit, final String idStringFormat, Object... arg * @param noteFormat {@link String#format} format string that describes the event * @param args for the format string, or nothing if none are required */ + @SuppressWarnings("AnnotateFormatMethod") public synchronized void recordEvent(final String noteFormat, Object... args) { + // Remove @SuppressWarnings after fixing https://github.com/apache/arrow/issues/41951 final String note = String.format(noteFormat, args); final Event event = new Event(note); if (firstEvent == null) { diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java index 2f74a985a3ff4..727e3531ee83f 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/MemoryUtil.java @@ -58,6 +58,7 @@ public class MemoryUtil { public static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN; // Java 1.8, 9, 11, 17, 21 becomes 1, 9, 11, 17, and 21. + @SuppressWarnings("StringSplitter") private static final int majorVersion = Integer.parseInt(System.getProperty("java.specification.version").split("\\D+")[0]); diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java index 5de98d23bb83b..9c5d0b9086113 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/MurmurHasher.java @@ -162,7 +162,7 @@ public boolean equals(@Nullable Object o) { if (this == o) { return true; } - if (o == null || getClass() != o.getClass()) { + if (!(o instanceof MurmurHasher)) { return false; } MurmurHasher that = (MurmurHasher) o; diff --git a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/SimpleHasher.java b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/SimpleHasher.java index 3bf3c2a828338..670129d3fb2a2 100644 --- a/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/SimpleHasher.java +++ b/java/memory/memory-core/src/main/java/org/apache/arrow/memory/util/hash/SimpleHasher.java @@ -53,6 +53,7 @@ protected SimpleHasher() { * @param length length of the memory region. * @return the hash code. */ + @Override public int hashCode(long address, long length) { int hashValue = 0; int index = 0; diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java index b4385b72a38cf..f01d152f84bf3 100644 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java +++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestArrowBuf.java @@ -141,7 +141,7 @@ public void testSetBytesBigEndian() { } @Test - /** + /* * Test that allocation history is not recorded even though * assertions are enabled in tests (GH-34338). */ diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java index 535d5c15e8916..1f9e65831b438 100644 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java +++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestBaseAllocator.java @@ -374,7 +374,7 @@ public void testCustomizedAllocationManager() { assertEquals(1, arrowBuf1.getInt(0)); try { - final ArrowBuf arrowBuf2 = allocator.buffer(1); + allocator.buffer(1); fail("allocated memory beyond max allowed"); } catch (OutOfMemoryException e) { // expected @@ -1077,7 +1077,7 @@ public void testMemoryLeakWithReservation() throws Exception { "child2", 1024, MAX_ALLOCATION); rootAllocator.verify(); - ArrowBuf buff = childAllocator2.buffer(256); + childAllocator2.buffer(256); Exception exception = assertThrows(IllegalStateException.class, () -> { childAllocator2.close(); diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestLowCostIdentityHashMap.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestLowCostIdentityHashMap.java index 0cabc4a0571f2..234a6447ddb62 100644 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestLowCostIdentityHashMap.java +++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/TestLowCostIdentityHashMap.java @@ -147,7 +147,7 @@ public void testLargeMap() throws Exception { assertTrue(hashMap.isEmpty()); } - private class StringWithKey implements ValueWithKeyIncluded { + private static class StringWithKey implements ValueWithKeyIncluded { private String myValue; private String myKey; diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestArrowBufPointer.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestArrowBufPointer.java index a1d5624a7e8c0..04e588ed16fc8 100644 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestArrowBufPointer.java +++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestArrowBufPointer.java @@ -55,16 +55,16 @@ public void testArrowBufPointersEqual() { try (ArrowBuf buf1 = allocator.buffer(BUFFER_LENGTH); ArrowBuf buf2 = allocator.buffer(BUFFER_LENGTH)) { for (int i = 0; i < BUFFER_LENGTH / 4; i++) { - buf1.setInt(i * 4, i * 1234); - buf2.setInt(i * 4, i * 1234); + buf1.setInt(i * 4L, i * 1234); + buf2.setInt(i * 4L, i * 1234); } ArrowBufPointer ptr1 = new ArrowBufPointer(null, 0, 100); ArrowBufPointer ptr2 = new ArrowBufPointer(null, 100, 5032); assertTrue(ptr1.equals(ptr2)); for (int i = 0; i < BUFFER_LENGTH / 4; i++) { - ptr1.set(buf1, i * 4, 4); - ptr2.set(buf2, i * 4, 4); + ptr1.set(buf1, i * 4L, 4); + ptr2.set(buf2, i * 4L, 4); assertTrue(ptr1.equals(ptr2)); } } @@ -76,8 +76,8 @@ public void testArrowBufPointersHashCode() { try (ArrowBuf buf1 = allocator.buffer(vectorLength * 4); ArrowBuf buf2 = allocator.buffer(vectorLength * 4)) { for (int i = 0; i < vectorLength; i++) { - buf1.setInt(i * 4, i); - buf2.setInt(i * 4, i); + buf1.setInt(i * 4L, i); + buf2.setInt(i * 4L, i); } CounterHasher hasher1 = new CounterHasher(); @@ -90,8 +90,8 @@ public void testArrowBufPointersHashCode() { assertEquals(ArrowBufPointer.NULL_HASH_CODE, pointer2.hashCode()); for (int i = 0; i < vectorLength; i++) { - pointer1.set(buf1, i * 4, 4); - pointer2.set(buf2, i * 4, 4); + pointer1.set(buf1, i * 4L, 4); + pointer2.set(buf2, i * 4L, 4); assertEquals(pointer1.hashCode(), pointer2.hashCode()); @@ -188,7 +188,7 @@ public void testArrowBufPointersComparison() { * Hasher with a counter that increments each time a hash code is calculated. * This is to validate that the hash code in {@link ArrowBufPointer} is reused. */ - class CounterHasher implements ArrowBufHasher { + static class CounterHasher implements ArrowBufHasher { protected int counter = 0; @@ -211,7 +211,7 @@ public int hashCode() { @Override public boolean equals(Object o) { - return o != null && this.getClass() == o.getClass(); + return o instanceof CounterHasher; } } } diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestByteFunctionHelpers.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestByteFunctionHelpers.java index 04a715962dfe9..7a44a5f2d72fd 100644 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestByteFunctionHelpers.java +++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/TestByteFunctionHelpers.java @@ -19,6 +19,8 @@ import static org.junit.Assert.assertEquals; +import java.nio.charset.StandardCharsets; + import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; @@ -121,9 +123,9 @@ public void testStringCompare() { String rightStr = rightStrings[i]; ArrowBuf left = allocator.buffer(SIZE); - left.setBytes(0, leftStr.getBytes()); + left.setBytes(0, leftStr.getBytes(StandardCharsets.UTF_8)); ArrowBuf right = allocator.buffer(SIZE); - right.setBytes(0, rightStr.getBytes()); + right.setBytes(0, rightStr.getBytes(StandardCharsets.UTF_8)); assertEquals(leftStr.compareTo(rightStr) < 0 ? -1 : 1, ByteFunctionHelpers.compare(left, 0, leftStr.length(), right, 0, rightStr.length())); diff --git a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/hash/TestArrowBufHasher.java b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/hash/TestArrowBufHasher.java index 3da0602bdfd9c..cc5ce49e54828 100644 --- a/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/hash/TestArrowBufHasher.java +++ b/java/memory/memory-core/src/test/java/org/apache/arrow/memory/util/hash/TestArrowBufHasher.java @@ -66,8 +66,8 @@ public void testHasher() { ArrowBuf buf2 = allocator.buffer(BUFFER_LENGTH)) { // prepare data for (int i = 0; i < BUFFER_LENGTH / 4; i++) { - buf1.setFloat(i * 4, i / 10.0f); - buf2.setFloat(i * 4, i / 10.0f); + buf1.setFloat(i * 4L, i / 10.0f); + buf2.setFloat(i * 4L, i / 10.0f); } verifyHashCodesEqual(buf1, 0, 100, buf2, 0, 100); @@ -95,7 +95,7 @@ public void testHasherNegative() { try (ArrowBuf buf = allocator.buffer(BUFFER_LENGTH)) { // prepare data for (int i = 0; i < BUFFER_LENGTH / 4; i++) { - buf.setFloat(i * 4, i / 10.0f); + buf.setFloat(i * 4L, i / 10.0f); } assertThrows(IllegalArgumentException.class, () -> { @@ -120,13 +120,13 @@ public void testHasherLessThanInt() { buf2.writeBytes("bar2".getBytes(StandardCharsets.UTF_8)); for (int i = 1; i <= 4; i ++) { - verifyHashCodeNotEqual(buf1, 0, i, buf2, 0, i); + verifyHashCodeNotEqual(buf1, i, buf2, i); } } } - private void verifyHashCodeNotEqual(ArrowBuf buf1, int offset1, int length1, - ArrowBuf buf2, int offset2, int length2) { + private void verifyHashCodeNotEqual(ArrowBuf buf1, int length1, + ArrowBuf buf2, int length2) { int hashCode1 = hasher.hashCode(buf1, 0, length1); int hashCode2 = hasher.hashCode(buf2, 0, length2); assertNotEquals(hashCode1, hashCode2); diff --git a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java index 466444c7d53e8..ae1f30a868406 100644 --- a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java +++ b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/NettyArrowBuf.java @@ -413,7 +413,7 @@ protected int _getUnsignedMedium(int index) { protected int _getUnsignedMediumLE(int index) { this.chk(index, 3); long addr = this.addr(index); - return PlatformDependent.getByte(addr) & 255 | + return (PlatformDependent.getByte(addr) & 255) | (Short.reverseBytes(PlatformDependent.getShort(addr + 1L)) & '\uffff') << 8; } diff --git a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/UnsafeDirectLittleEndian.java b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/UnsafeDirectLittleEndian.java index e51c6c3d48882..ab0cd0c9e6e50 100644 --- a/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/UnsafeDirectLittleEndian.java +++ b/java/memory/memory-netty-buffer-patch/src/main/java/io/netty/buffer/UnsafeDirectLittleEndian.java @@ -36,19 +36,12 @@ public class UnsafeDirectLittleEndian extends WrappedByteBuf { private final AbstractByteBuf wrapped; private final long memoryAddress; - UnsafeDirectLittleEndian(DuplicatedByteBuf buf) { - this(buf, true); - } - - UnsafeDirectLittleEndian(LargeBuffer buf) { - this(buf, true); - } - - UnsafeDirectLittleEndian(PooledUnsafeDirectByteBuf buf) { - this(buf, true); - } - - private UnsafeDirectLittleEndian(AbstractByteBuf buf, boolean fake) { + /** + * Constructs a new instance. + * + * @param buf The buffer to wrap + */ + public UnsafeDirectLittleEndian(AbstractByteBuf buf) { super(buf); this.wrapped = buf; diff --git a/java/memory/memory-netty-buffer-patch/src/test/java/io/netty/buffer/TestUnsafeDirectLittleEndian.java b/java/memory/memory-netty-buffer-patch/src/test/java/io/netty/buffer/TestUnsafeDirectLittleEndian.java index 043c2c1605a63..4717e48f27bef 100644 --- a/java/memory/memory-netty-buffer-patch/src/test/java/io/netty/buffer/TestUnsafeDirectLittleEndian.java +++ b/java/memory/memory-netty-buffer-patch/src/test/java/io/netty/buffer/TestUnsafeDirectLittleEndian.java @@ -23,7 +23,6 @@ import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; -import java.nio.ByteOrder; import java.nio.charset.StandardCharsets; import org.junit.Test; @@ -34,9 +33,9 @@ import io.netty.buffer.UnsafeDirectLittleEndian; public class TestUnsafeDirectLittleEndian { - private static final boolean LITTLE_ENDIAN = ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN; @Test + @SuppressWarnings("CatchAndPrintStackTrace") public void testPrimitiveGetSet() { ByteBuf byteBuf = Unpooled.directBuffer(64); UnsafeDirectLittleEndian unsafeDirect = new UnsafeDirectLittleEndian(new LargeBuffer(byteBuf)); diff --git a/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/netty/NettyAllocationManager.java b/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/netty/NettyAllocationManager.java index 58354d0c2eebd..1e4e06df7e9ac 100644 --- a/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/netty/NettyAllocationManager.java +++ b/java/memory/memory-netty/src/main/java/org/apache/arrow/memory/netty/NettyAllocationManager.java @@ -68,11 +68,9 @@ public ArrowBuf empty() { /** * The cut-off value for switching allocation strategies. */ - private final int allocationCutOffValue; NettyAllocationManager(BufferAllocator accountingAllocator, long requestedSize, int allocationCutOffValue) { super(accountingAllocator); - this.allocationCutOffValue = allocationCutOffValue; if (requestedSize > allocationCutOffValue) { this.memoryChunk = null; @@ -92,7 +90,7 @@ public ArrowBuf empty() { /** * Get the underlying memory chunk managed by this AllocationManager. * @return the underlying memory chunk if the request size is not greater than the - * {@link NettyAllocationManager#allocationCutOffValue}, or null otherwise. + * cutoff value provided in the constructor , or null otherwise. * * @deprecated this method will be removed in a future release. */ diff --git a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestEndianness.java b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestEndianness.java index a782523cbc6d6..0c99062021f39 100644 --- a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestEndianness.java +++ b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestEndianness.java @@ -36,15 +36,15 @@ public void testNativeEndian() { final ByteBuf b = NettyArrowBuf.unwrapBuffer(a.buffer(4)); b.setInt(0, 35); if (ByteOrder.nativeOrder() == ByteOrder.LITTLE_ENDIAN) { - assertEquals(b.getByte(0), 35); - assertEquals(b.getByte(1), 0); - assertEquals(b.getByte(2), 0); - assertEquals(b.getByte(3), 0); + assertEquals(35, b.getByte(0)); + assertEquals(0, b.getByte(1)); + assertEquals(0, b.getByte(2)); + assertEquals(0, b.getByte(3)); } else { - assertEquals(b.getByte(0), 0); - assertEquals(b.getByte(1), 0); - assertEquals(b.getByte(2), 0); - assertEquals(b.getByte(3), 35); + assertEquals(0, b.getByte(0)); + assertEquals(0, b.getByte(1)); + assertEquals(0, b.getByte(2)); + assertEquals(35, b.getByte(3)); } b.release(); a.close(); diff --git a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestNettyAllocator.java b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestNettyAllocator.java index a6da36bb35aa7..792ae53a9404d 100644 --- a/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestNettyAllocator.java +++ b/java/memory/memory-netty/src/test/java/org/apache/arrow/memory/netty/TestNettyAllocator.java @@ -39,6 +39,7 @@ public class TestNettyAllocator { @Test + @SuppressWarnings("SynchronizeOnNonFinalField") public void testMemoryUsage() { ListAppender memoryLogsAppender = new ListAppender<>(); memoryLogsAppender.list = Collections.synchronizedList(memoryLogsAppender.list); diff --git a/java/pom.xml b/java/pom.xml index 9be9d431d4776..a59d29c576398 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -95,6 +95,8 @@ true 9+181-r4173-1 2.24.0 + 2.24.0 + 2.10.0 3.12.1 5.11.0 5.2.0 @@ -995,7 +997,7 @@ UTF-8 -XDcompilePolicy=simple - -Xplugin:ErrorProne -XepExcludedPaths:.*/(target/generated-sources)/.* + -Xplugin:ErrorProne -XepExcludedPaths:.*/(target/generated-source|format/src/main/java/org/apache/arrow/flatbuf)/.* -J--add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED -J--add-exports=jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED -J--add-exports=jdk.compiler/com.sun.tools.javac.main=ALL-UNNAMED From d02a91b390465dbf530bfba4d100421922b3edda Mon Sep 17 00:00:00 2001 From: mwish Date: Tue, 4 Jun 2024 22:41:32 +0800 Subject: [PATCH 223/261] GH-41608: [C++][Python] Extends the add_key_value to parquet::arrow and PyArrow (#41633) ### Rationale for this change The previous pr ( https://github.com/apache/arrow/pull/34889 ) add a `AddKeyValueMetadata` to FileWriter. And now we should export it to Parquet Arrow and Python API. ### What changes are included in this PR? 1. Add `AddKeyValueMetadata` in parquet::arrow 2. Add `add_key_value_metadata` in pyarrow 3. testing ### Are these changes tested? Yes ### Are there any user-facing changes? New api allowing add key-value metadata to Parquet file * GitHub Issue: #41608 Authored-by: mwish Signed-off-by: mwish --- cpp/src/parquet/CMakeLists.txt | 1 + cpp/src/parquet/arrow/arrow_metadata_test.cc | 97 +++++++++++++++++++ cpp/src/parquet/arrow/writer.cc | 8 ++ cpp/src/parquet/arrow/writer.h | 10 ++ cpp/src/parquet/file_writer.h | 2 +- python/pyarrow/_parquet.pxd | 1 + python/pyarrow/_parquet.pyx | 12 ++- python/pyarrow/parquet/core.py | 13 +++ .../tests/parquet/test_parquet_writer.py | 15 +++ 9 files changed, 157 insertions(+), 2 deletions(-) create mode 100644 cpp/src/parquet/arrow/arrow_metadata_test.cc diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index 5ac5085a694c8..dc80f08e72cfe 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -397,6 +397,7 @@ add_parquet_test(writer-test add_parquet_test(arrow-test SOURCES + arrow/arrow_metadata_test.cc arrow/arrow_reader_writer_test.cc arrow/arrow_schema_test.cc arrow/arrow_statistics_test.cc) diff --git a/cpp/src/parquet/arrow/arrow_metadata_test.cc b/cpp/src/parquet/arrow/arrow_metadata_test.cc new file mode 100644 index 0000000000000..6f512227708b9 --- /dev/null +++ b/cpp/src/parquet/arrow/arrow_metadata_test.cc @@ -0,0 +1,97 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "gtest/gtest.h" + +#include "arrow/table.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/util/key_value_metadata.h" + +#include "parquet/api/writer.h" + +#include "parquet/arrow/reader.h" +#include "parquet/arrow/schema.h" +#include "parquet/arrow/writer.h" +#include "parquet/file_writer.h" +#include "parquet/test_util.h" + +namespace parquet::arrow { + +TEST(Metadata, AppendMetadata) { + // A sample table, type and structure does not matter in this test case + auto schema = ::arrow::schema({::arrow::field("f", ::arrow::utf8())}); + auto table = ::arrow::Table::Make( + schema, {::arrow::ArrayFromJSON(::arrow::utf8(), R"(["a", "b", "c"])")}); + + auto sink = CreateOutputStream(); + ArrowWriterProperties::Builder builder; + builder.store_schema(); + ASSERT_OK_AND_ASSIGN(auto writer, + parquet::arrow::FileWriter::Open( + *schema, ::arrow::default_memory_pool(), sink, + parquet::default_writer_properties(), builder.build())); + + auto kv_meta = std::make_shared(); + kv_meta->Append("test_key_1", "test_value_1"); + // would be overwritten later. + kv_meta->Append("test_key_2", "test_value_2_temp"); + ASSERT_OK(writer->AddKeyValueMetadata(kv_meta)); + + // Key value metadata that will be added to the file. + auto kv_meta_added = std::make_shared<::arrow::KeyValueMetadata>(); + kv_meta_added->Append("test_key_2", "test_value_2"); + kv_meta_added->Append("test_key_3", "test_value_3"); + + ASSERT_OK(writer->AddKeyValueMetadata(kv_meta_added)); + ASSERT_OK(writer->Close()); + + // return error if the file is closed + ASSERT_RAISES(IOError, writer->AddKeyValueMetadata(kv_meta_added)); + + auto verify_key_value_metadata = + [&](const std::shared_ptr& key_value_metadata) { + ASSERT_TRUE(nullptr != key_value_metadata); + + // Verify keys that were added before file writer was closed are present. + for (int i = 1; i <= 3; ++i) { + auto index = std::to_string(i); + PARQUET_ASSIGN_OR_THROW(auto value, + key_value_metadata->Get("test_key_" + index)); + EXPECT_EQ("test_value_" + index, value); + } + EXPECT_TRUE(key_value_metadata->Contains("ARROW:schema")); + }; + // verify the metadata in writer + verify_key_value_metadata(writer->metadata()->key_value_metadata()); + + ASSERT_OK(writer->Close()); + + ASSERT_OK_AND_ASSIGN(auto buffer, sink->Finish()); + // verify the metadata in reader + { + std::unique_ptr reader; + FileReaderBuilder reader_builder; + ASSERT_OK_NO_THROW( + reader_builder.Open(std::make_shared<::arrow::io::BufferReader>(buffer))); + ASSERT_OK( + reader_builder.properties(default_arrow_reader_properties())->Build(&reader)); + + verify_key_value_metadata(reader->parquet_reader()->metadata()->key_value_metadata()); + } +} + +} // namespace parquet::arrow diff --git a/cpp/src/parquet/arrow/writer.cc b/cpp/src/parquet/arrow/writer.cc index bd6f542d11c72..4fd7ef1b47b39 100644 --- a/cpp/src/parquet/arrow/writer.cc +++ b/cpp/src/parquet/arrow/writer.cc @@ -482,6 +482,14 @@ class FileWriterImpl : public FileWriter { return writer_->metadata(); } + /// \brief Append the key-value metadata to the file metadata + ::arrow::Status AddKeyValueMetadata( + const std::shared_ptr& key_value_metadata) + override { + PARQUET_CATCH_NOT_OK(writer_->AddKeyValueMetadata(key_value_metadata)); + return Status::OK(); + } + private: friend class FileWriter; diff --git a/cpp/src/parquet/arrow/writer.h b/cpp/src/parquet/arrow/writer.h index 1decafedc97fd..4a1a033a7b7b8 100644 --- a/cpp/src/parquet/arrow/writer.h +++ b/cpp/src/parquet/arrow/writer.h @@ -143,6 +143,16 @@ class PARQUET_EXPORT FileWriter { virtual ~FileWriter(); virtual MemoryPool* memory_pool() const = 0; + /// \brief Add key-value metadata to the file. + /// \param[in] key_value_metadata the metadata to add. + /// \note This will overwrite any existing metadata with the same key. + /// \return Error if Close() has been called. + /// + /// WARNING: If `store_schema` is enabled, `ARROW:schema` would be stored + /// in the key-value metadata. Overwriting this key would result in + /// `store_schema` being unusable during read. + virtual ::arrow::Status AddKeyValueMetadata( + const std::shared_ptr& key_value_metadata) = 0; /// \brief Return the file metadata, only available after calling Close(). virtual const std::shared_ptr metadata() const = 0; }; diff --git a/cpp/src/parquet/file_writer.h b/cpp/src/parquet/file_writer.h index 31706af86dbde..d5ea1d7c98a0e 100644 --- a/cpp/src/parquet/file_writer.h +++ b/cpp/src/parquet/file_writer.h @@ -202,7 +202,7 @@ class PARQUET_EXPORT ParquetFileWriter { /// \brief Add key-value metadata to the file. /// \param[in] key_value_metadata the metadata to add. - /// \note This will overwrite any existing metadata with the same key. + /// \note This will overwrite any existing metadata with the same key(s). /// \throw ParquetException if Close() has been called. void AddKeyValueMetadata( const std::shared_ptr& key_value_metadata); diff --git a/python/pyarrow/_parquet.pxd b/python/pyarrow/_parquet.pxd index ae4094d8b4b5f..1bfa505c54470 100644 --- a/python/pyarrow/_parquet.pxd +++ b/python/pyarrow/_parquet.pxd @@ -554,6 +554,7 @@ cdef extern from "parquet/arrow/writer.h" namespace "parquet::arrow" nogil: CStatus WriteTable(const CTable& table, int64_t chunk_size) CStatus NewRowGroup(int64_t chunk_size) CStatus Close() + CStatus AddKeyValueMetadata(const shared_ptr[const CKeyValueMetadata]& key_value_metadata) const shared_ptr[CFileMetaData] metadata() const diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index f7724b9b1fdc7..414f0cef4e52b 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -29,9 +29,10 @@ from pyarrow.includes.libarrow_python cimport * from pyarrow.lib cimport (_Weakrefable, Buffer, Schema, check_status, MemoryPool, maybe_unbox_memory_pool, - Table, NativeFile, + Table, KeyValueMetadata, pyarrow_wrap_chunked_array, pyarrow_wrap_schema, + pyarrow_unwrap_metadata, pyarrow_unwrap_schema, pyarrow_wrap_table, pyarrow_wrap_batch, @@ -2206,6 +2207,15 @@ cdef class ParquetWriter(_Weakrefable): check_status(self.writer.get() .WriteTable(deref(ctable), c_row_group_size)) + def add_key_value_metadata(self, key_value_metadata): + cdef: + shared_ptr[const CKeyValueMetadata] c_metadata + + c_metadata = pyarrow_unwrap_metadata(KeyValueMetadata(key_value_metadata)) + with nogil: + check_status(self.writer.get() + .AddKeyValueMetadata(c_metadata)) + @property def metadata(self): cdef: diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py index 81798b1544474..eaff79c8b137c 100644 --- a/python/pyarrow/parquet/core.py +++ b/python/pyarrow/parquet/core.py @@ -1108,6 +1108,19 @@ def close(self): if self.file_handle is not None: self.file_handle.close() + def add_key_value_metadata(self, key_value_metadata): + """ + Add key-value metadata to the file. + This will overwrite any existing metadata with the same key. + + Parameters + ---------- + key_value_metadata : dict + Keys and values must be string-like / coercible to bytes. + """ + assert self.is_open + self.writer.add_key_value_metadata(key_value_metadata) + def _get_pandas_index_columns(keyvalues): return (json.loads(keyvalues[b'pandas'].decode('utf8')) diff --git a/python/pyarrow/tests/parquet/test_parquet_writer.py b/python/pyarrow/tests/parquet/test_parquet_writer.py index f4ee7529ae87d..bc3714a6232b1 100644 --- a/python/pyarrow/tests/parquet/test_parquet_writer.py +++ b/python/pyarrow/tests/parquet/test_parquet_writer.py @@ -346,3 +346,18 @@ def test_parquet_writer_store_schema(tempdir): meta = pq.read_metadata(path2) assert meta.metadata is None + + +def test_parquet_writer_append_key_value_metadata(tempdir): + table = pa.Table.from_arrays([pa.array([], type='int32')], ['f0']) + path = tempdir / 'metadata.parquet' + + with pq.ParquetWriter(path, table.schema) as writer: + writer.write_table(table) + writer.add_key_value_metadata({'key1': '1', 'key2': 'x'}) + writer.add_key_value_metadata({'key2': '2', 'key3': '3'}) + reader = pq.ParquetFile(path) + metadata = reader.metadata.metadata + assert metadata[b'key1'] == b'1' + assert metadata[b'key2'] == b'2' + assert metadata[b'key3'] == b'3' From a44b5372c3933180935e7fbd462fc15a1c298335 Mon Sep 17 00:00:00 2001 From: Haocheng Liu <30446009+HaochengLIU@users.noreply.github.com> Date: Tue, 4 Jun 2024 11:56:13 -0400 Subject: [PATCH 224/261] GH-41493: [C++][S3] Add a new option to check existence before CreateDir (#41822) ### Rationale for this change I have a use case that thousands of jobs are writing hive partitioned parquet files daily to the same bucket via S3FS filesystem. The gist here is a lot of keys are being created at the same time hense jobs hits `AWS Error SLOW_DOWN. during Put Object operation: The object exceeded the rate limit for object mutation operations(create, update, and delete). Please reduce your rate request error.` frequently throughout the day since the code is creating directories pessimistically. ### What changes are included in this PR? Add a new S3Option to check the existence of the directory before creation in `CreateDir`. It's disabled by default. When it's enabled, the CreateDir function will check the existence of the directory first before creation. It ensures that the create operation is only acted when necessary. Though there are more I/O calls, but it avoids hitting the cloud vendor put object limit. ### Are these changes tested? Add test cases when the flag is set to true. Right on top of the mind i donno how to ensure it's working in these tests. But in our production environment, we have very similar code and it worked well. ### Are there any user-facing changes? * GitHub Issue: #41493 Lead-authored-by: Haocheng Liu Co-authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- cpp/src/arrow/filesystem/s3fs.cc | 43 +++++++++++++++++++++++---- cpp/src/arrow/filesystem/s3fs.h | 11 +++++++ cpp/src/arrow/filesystem/s3fs_test.cc | 31 ++++++++++++++++++- 3 files changed, 78 insertions(+), 7 deletions(-) diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index 43666f32b3da6..78e02c31a35a3 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -2860,6 +2860,7 @@ Status S3FileSystem::CreateDir(const std::string& s, bool recursive) { return impl_->CreateBucket(path.bucket); } + FileInfo file_info; // Create object if (recursive) { // Ensure bucket exists @@ -2867,10 +2868,33 @@ Status S3FileSystem::CreateDir(const std::string& s, bool recursive) { if (!bucket_exists) { RETURN_NOT_OK(impl_->CreateBucket(path.bucket)); } + + auto key_i = path.key_parts.begin(); + std::string parent_key{}; + if (options().check_directory_existence_before_creation) { + // Walk up the directory first to find the first existing parent + for (const auto& part : path.key_parts) { + parent_key += part; + parent_key += kSep; + } + for (key_i = path.key_parts.end(); key_i-- != path.key_parts.begin();) { + ARROW_ASSIGN_OR_RAISE(file_info, + this->GetFileInfo(path.bucket + kSep + parent_key)); + if (file_info.type() != FileType::NotFound) { + // Found! + break; + } else { + // remove the kSep and the part + parent_key.pop_back(); + parent_key.erase(parent_key.end() - key_i->size(), parent_key.end()); + } + } + key_i++; // Above for loop moves one extra iterator at the end + } // Ensure that all parents exist, then the directory itself - std::string parent_key; - for (const auto& part : path.key_parts) { - parent_key += part; + // Create all missing directories + for (; key_i < path.key_parts.end(); ++key_i) { + parent_key += *key_i; parent_key += kSep; RETURN_NOT_OK(impl_->CreateEmptyDir(path.bucket, parent_key)); } @@ -2888,11 +2912,18 @@ Status S3FileSystem::CreateDir(const std::string& s, bool recursive) { "': parent directory does not exist"); } } + } - // XXX Should we check that no non-directory entry exists? - // Minio does it for us, not sure about other S3 implementations. - return impl_->CreateEmptyDir(path.bucket, path.key); + // Check if the directory exists already + if (options().check_directory_existence_before_creation) { + ARROW_ASSIGN_OR_RAISE(file_info, this->GetFileInfo(path.full_path)); + if (file_info.type() != FileType::NotFound) { + return Status::OK(); + } } + // XXX Should we check that no non-directory entry exists? + // Minio does it for us, not sure about other S3 implementations. + return impl_->CreateEmptyDir(path.bucket, path.key); } Status S3FileSystem::DeleteDir(const std::string& s) { diff --git a/cpp/src/arrow/filesystem/s3fs.h b/cpp/src/arrow/filesystem/s3fs.h index 82d08bc5ea89a..fbbe9d0b3f42b 100644 --- a/cpp/src/arrow/filesystem/s3fs.h +++ b/cpp/src/arrow/filesystem/s3fs.h @@ -166,6 +166,17 @@ struct ARROW_EXPORT S3Options { /// Whether to allow deletion of buckets bool allow_bucket_deletion = false; + /// Whether to allow pessimistic directory creation in CreateDir function + /// + /// By default, CreateDir function will try to create the directory without checking its + /// existence. It's an optimization to try directory creation and catch the error, + /// rather than issue two dependent I/O calls. + /// Though for key/value storage like Google Cloud Storage, too many creation calls will + /// breach the rate limit for object mutation operations and cause serious consequences. + /// It's also possible you don't have creation access for the parent directory. Set it + /// to be true to address these scenarios. + bool check_directory_existence_before_creation = false; + /// \brief Default metadata for OpenOutputStream. /// /// This will be ignored if non-empty metadata is passed to OpenOutputStream. diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc index 88cc96956e34c..7bfa120eda678 100644 --- a/cpp/src/arrow/filesystem/s3fs_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_test.cc @@ -922,9 +922,13 @@ TEST_F(TestS3FS, CreateDir) { // New "directory" AssertFileInfo(fs_.get(), "bucket/newdir", FileType::NotFound); - ASSERT_OK(fs_->CreateDir("bucket/newdir")); + ASSERT_OK(fs_->CreateDir("bucket/newdir", /*recursive=*/false)); AssertFileInfo(fs_.get(), "bucket/newdir", FileType::Directory); + // By default CreateDir uses recursvie mode, make it explictly to be false + ASSERT_RAISES(IOError, + fs_->CreateDir("bucket/newdir/newsub/newsubsub", /*recursive=*/false)); + // New "directory", recursive ASSERT_OK(fs_->CreateDir("bucket/newdir/newsub/newsubsub", /*recursive=*/true)); AssertFileInfo(fs_.get(), "bucket/newdir/newsub", FileType::Directory); @@ -939,6 +943,31 @@ TEST_F(TestS3FS, CreateDir) { // Extraneous slashes ASSERT_RAISES(Invalid, fs_->CreateDir("bucket//somedir")); ASSERT_RAISES(Invalid, fs_->CreateDir("bucket/somedir//newdir")); + + // check existing before creation + options_.check_directory_existence_before_creation = true; + MakeFileSystem(); + // New "directory" again + AssertFileInfo(fs_.get(), "bucket/checknewdir", FileType::NotFound); + ASSERT_OK(fs_->CreateDir("bucket/checknewdir")); + AssertFileInfo(fs_.get(), "bucket/checknewdir", FileType::Directory); + + ASSERT_RAISES(IOError, fs_->CreateDir("bucket/checknewdir/newsub/newsubsub/newsubsub/", + /*recursive=*/false)); + + // New "directory" again, recursive + ASSERT_OK(fs_->CreateDir("bucket/checknewdir/newsub/newsubsub", /*recursive=*/true)); + AssertFileInfo(fs_.get(), "bucket/checknewdir/newsub", FileType::Directory); + AssertFileInfo(fs_.get(), "bucket/checknewdir/newsub/newsubsub", FileType::Directory); + AssertFileInfo(fs_.get(), "bucket/checknewdir/newsub/newsubsub/newsubsub", + FileType::NotFound); + // Try creation with the same name + ASSERT_OK(fs_->CreateDir("bucket/checknewdir/newsub/newsubsub/newsubsub/", + /*recursive=*/true)); + AssertFileInfo(fs_.get(), "bucket/checknewdir/newsub", FileType::Directory); + AssertFileInfo(fs_.get(), "bucket/checknewdir/newsub/newsubsub", FileType::Directory); + AssertFileInfo(fs_.get(), "bucket/checknewdir/newsub/newsubsub/newsubsub", + FileType::Directory); } TEST_F(TestS3FS, DeleteFile) { From ac1eadb5e0291314430583f99727d8c0f6d9b901 Mon Sep 17 00:00:00 2001 From: Tom Scott-Coombes <62209801+tscottcoombes1@users.noreply.github.com> Date: Tue, 4 Jun 2024 23:31:43 +0100 Subject: [PATCH 225/261] GH-40494: [Go] add support for protobuf messages (#40496) ### Rationale for this change Support for protobuf messages ### What changes are included in this PR? Ability to create a schema from a protobuf message Ability to create a record from a protobuf message Some customisations ### Are these changes tested? Yes, couple of unit tests included ### Are there any user-facing changes? No * GitHub Issue: #40494 Lead-authored-by: Tom Scott-Coombes Co-authored-by: Tom Scott-Coombes <62209801+tscottcoombes1@users.noreply.github.com> Co-authored-by: Matt Topol Co-authored-by: tscottcoombes1 <62209801+tscottcoombes1@users.noreply.github.com> Signed-off-by: Matt Topol --- dev/release/rat_exclude_files.txt | 1 + go/arrow/datatype_nested.go | 2 +- go/arrow/util/messages/README.md | 25 + go/arrow/util/messages/types.proto | 56 ++ go/arrow/util/protobuf_reflect.go | 865 +++++++++++++++++++++++++ go/arrow/util/protobuf_reflect_test.go | 311 +++++++++ go/arrow/util/util_message/types.pb.go | 539 +++++++++++++++ go/go.mod | 2 + go/go.sum | 2 + 9 files changed, 1802 insertions(+), 1 deletion(-) create mode 100644 go/arrow/util/messages/README.md create mode 100644 go/arrow/util/messages/types.proto create mode 100644 go/arrow/util/protobuf_reflect.go create mode 100644 go/arrow/util/protobuf_reflect_test.go create mode 100644 go/arrow/util/util_message/types.pb.go diff --git a/dev/release/rat_exclude_files.txt b/dev/release/rat_exclude_files.txt index 0cc1348f50b95..ef325090f2f4b 100644 --- a/dev/release/rat_exclude_files.txt +++ b/dev/release/rat_exclude_files.txt @@ -63,6 +63,7 @@ go.work.sum go/go.sum go/arrow/Gopkg.lock go/arrow/flight/gen/flight/*.pb.go +go/arrow/util/util_message/*.pb.go go/arrow/internal/cpu/* go/arrow/type_string.go go/arrow/cdata/test/go.sum diff --git a/go/arrow/datatype_nested.go b/go/arrow/datatype_nested.go index 1e65ccd4594ca..b38983b7f2e5d 100644 --- a/go/arrow/datatype_nested.go +++ b/go/arrow/datatype_nested.go @@ -877,7 +877,7 @@ func DenseUnionFromArrays(children []Array, fields []string, codes []UnionTypeCo } // DenseUnionOf is equivalent to UnionOf(arrow.DenseMode, fields, typeCodes), -// constructing a SparseUnionType from a list of fields and type codes. +// constructing a DenseUnionType from a list of fields and type codes. // // If len(fields) != len(typeCodes) this will panic. They are allowed to be // of length 0. diff --git a/go/arrow/util/messages/README.md b/go/arrow/util/messages/README.md new file mode 100644 index 0000000000000..312484f701a46 --- /dev/null +++ b/go/arrow/util/messages/README.md @@ -0,0 +1,25 @@ + + +How to generate the .pb.go files + +``` +cd go/arrow/util/ +protoc -I ./ --go_out=./messages ./messages/types.proto +``` diff --git a/go/arrow/util/messages/types.proto b/go/arrow/util/messages/types.proto new file mode 100644 index 0000000000000..c085273ca35e0 --- /dev/null +++ b/go/arrow/util/messages/types.proto @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; +import "google/protobuf/any.proto"; + +option go_package = "../util_message"; + +message ExampleMessage { + string field1 = 1; +} + +message AllTheTypes { + string str = 1; + int32 int32 = 2; + int64 int64 = 3; + sint32 sint32 = 4; + sint64 sin64 = 5; + uint32 uint32 = 6; + uint64 uint64 = 7; + fixed32 fixed32 = 8; + fixed64 fixed64 = 9; + sfixed32 sfixed32 = 10; + bool bool = 11; + bytes bytes = 12; + double double = 13; + ExampleEnum enum = 14; + ExampleMessage message = 15; + oneof oneof { + string oneofstring = 16; + ExampleMessage oneofmessage = 17; + } + google.protobuf.Any any = 18; + map simple_map = 19; + map complex_map = 20; + repeated string simple_list = 21; + repeated ExampleMessage complex_list = 22; + + enum ExampleEnum { + OPTION_0 = 0; + OPTION_1 = 1; + } +} diff --git a/go/arrow/util/protobuf_reflect.go b/go/arrow/util/protobuf_reflect.go new file mode 100644 index 0000000000000..b4c8d68db8b0d --- /dev/null +++ b/go/arrow/util/protobuf_reflect.go @@ -0,0 +1,865 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package util + +import ( + "fmt" + "reflect" + + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/huandu/xstrings" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/reflect/protoreflect" + "google.golang.org/protobuf/types/known/anypb" +) + +// ProtobufTypeHandler provides options on how protobuf fields should be handled in the conversion to arrow +type ProtobufTypeHandler int + +const ( + // OneOfNull means do not wrap oneOfs in a union, they are treated as separate fields + OneOfNull ProtobufTypeHandler = iota + // OneOfDenseUnion maps the protobuf OneOf to an arrow.DENSE_UNION + OneOfDenseUnion + // EnumNumber uses the Enum numeric value + EnumNumber + // EnumValue uses the Enum string value + EnumValue + // EnumDictionary uses both the numeric and string value and maps to an arrow.Dictionary + EnumDictionary +) + +type schemaOptions struct { + exclusionPolicy func(pfr *ProtobufFieldReflection) bool + fieldNameFormatter func(str string) string + oneOfHandler ProtobufTypeHandler + enumHandler ProtobufTypeHandler +} + +// ProtobufFieldReflection represents the metadata and values of a protobuf field +type ProtobufFieldReflection struct { + parent *ProtobufMessageReflection + descriptor protoreflect.FieldDescriptor + prValue protoreflect.Value + rValue reflect.Value + schemaOptions + arrow.Field +} + +func (pfr *ProtobufFieldReflection) isNull() bool { + for pfr.rValue.Kind() == reflect.Ptr { + if pfr.rValue.IsNil() { + return true + } + pfr.rValue = pfr.rValue.Elem() + } + + if !pfr.rValue.IsValid() || !pfr.prValue.IsValid() { + return true + } + return false +} + +func (pfr *ProtobufFieldReflection) arrowField() arrow.Field { + return arrow.Field{ + Name: pfr.name(), + Type: pfr.getDataType(), + Nullable: true, + } +} + +func (pfr *ProtobufFieldReflection) protoreflectValue() protoreflect.Value { + return pfr.prValue +} + +func (pfr *ProtobufFieldReflection) reflectValue() reflect.Value { + return pfr.rValue +} + +func (pfr *ProtobufFieldReflection) GetDescriptor() protoreflect.FieldDescriptor { + return pfr.descriptor +} + +func (pfr *ProtobufFieldReflection) name() string { + if pfr.isOneOf() && pfr.schemaOptions.oneOfHandler != OneOfNull { + return pfr.fieldNameFormatter(string(pfr.descriptor.ContainingOneof().Name())) + } + return pfr.fieldNameFormatter(string(pfr.descriptor.Name())) +} + +func (pfr *ProtobufFieldReflection) arrowType() arrow.Type { + if pfr.isOneOf() && pfr.schemaOptions.oneOfHandler == OneOfDenseUnion { + return arrow.DENSE_UNION + } + if pfr.isEnum() { + switch pfr.enumHandler { + case EnumNumber: + return arrow.INT32 + case EnumValue: + return arrow.STRING + case EnumDictionary: + return arrow.DICTIONARY + } + } + if pfr.isStruct() { + return arrow.STRUCT + } + if pfr.isMap() { + return arrow.MAP + } + if pfr.isList() { + return arrow.LIST + } + switch pfr.descriptor.Kind() { + case protoreflect.Int32Kind: + return arrow.INT32 + case protoreflect.Int64Kind: + return arrow.INT64 + case protoreflect.Sint32Kind: + return arrow.INT32 + case protoreflect.Sint64Kind: + return arrow.INT64 + case protoreflect.Uint32Kind: + return arrow.UINT32 + case protoreflect.Uint64Kind: + return arrow.UINT64 + case protoreflect.Fixed32Kind: + return arrow.UINT32 + case protoreflect.Fixed64Kind: + return arrow.UINT64 + case protoreflect.Sfixed32Kind: + return arrow.INT32 + case protoreflect.Sfixed64Kind: + return arrow.INT64 + case protoreflect.FloatKind: + return arrow.FLOAT32 + case protoreflect.DoubleKind: + return arrow.FLOAT64 + case protoreflect.StringKind: + return arrow.STRING + case protoreflect.BytesKind: + return arrow.BINARY + case protoreflect.BoolKind: + return arrow.BOOL + } + return arrow.NULL +} + +func (pfr *ProtobufFieldReflection) isOneOf() bool { + return pfr.descriptor.ContainingOneof() != nil +} + +func (pfr *ProtobufFieldReflection) isEnum() bool { + return pfr.descriptor.Kind() == protoreflect.EnumKind +} + +func (pfr *ProtobufFieldReflection) isStruct() bool { + return pfr.descriptor.Kind() == protoreflect.MessageKind && !pfr.descriptor.IsMap() && pfr.rValue.Kind() != reflect.Slice +} + +func (pfr *ProtobufFieldReflection) isMap() bool { + return pfr.descriptor.Kind() == protoreflect.MessageKind && pfr.descriptor.IsMap() +} + +func (pfr *ProtobufFieldReflection) isList() bool { + return pfr.descriptor.IsList() && pfr.rValue.Kind() == reflect.Slice +} + +// ProtobufMessageReflection represents the metadata and values of a protobuf message +type ProtobufMessageReflection struct { + descriptor protoreflect.MessageDescriptor + message protoreflect.Message + rValue reflect.Value + schemaOptions + fields []ProtobufMessageFieldReflection +} + +func (psr ProtobufMessageReflection) unmarshallAny() ProtobufMessageReflection { + if psr.descriptor.FullName() == "google.protobuf.Any" && psr.rValue.IsValid() { + for psr.rValue.Type().Kind() == reflect.Ptr { + psr.rValue = reflect.Indirect(psr.rValue) + } + fieldValueAsAny, _ := psr.rValue.Interface().(anypb.Any) + msg, _ := fieldValueAsAny.UnmarshalNew() + + v := reflect.ValueOf(msg) + for v.Kind() == reflect.Ptr { + v = reflect.Indirect(v) + } + + return ProtobufMessageReflection{ + descriptor: msg.ProtoReflect().Descriptor(), + message: msg.ProtoReflect(), + rValue: v, + schemaOptions: psr.schemaOptions, + } + } else { + return psr + } +} + +func (psr ProtobufMessageReflection) getArrowFields() []arrow.Field { + var fields []arrow.Field + + for pfr := range psr.generateStructFields() { + fields = append(fields, arrow.Field{ + Name: pfr.name(), + Type: pfr.getDataType(), + Nullable: true, + }) + } + + return fields +} + +type protobufListReflection struct { + ProtobufFieldReflection +} + +func (pfr *ProtobufFieldReflection) asList() protobufListReflection { + return protobufListReflection{*pfr} +} + +func (plr protobufListReflection) getDataType() arrow.DataType { + for li := range plr.generateListItems() { + return arrow.ListOf(li.getDataType()) + } + pfr := ProtobufFieldReflection{ + descriptor: plr.descriptor, + schemaOptions: plr.schemaOptions, + } + return arrow.ListOf(pfr.getDataType()) +} + +type protobufUnionReflection struct { + ProtobufFieldReflection +} + +func (pfr *ProtobufFieldReflection) asUnion() protobufUnionReflection { + return protobufUnionReflection{*pfr} +} + +func (pur protobufUnionReflection) isThisOne() bool { + for pur.rValue.Kind() == reflect.Ptr || pur.rValue.Kind() == reflect.Interface { + pur.rValue = pur.rValue.Elem() + } + return pur.rValue.Field(0).String() == pur.prValue.String() +} + +func (pur protobufUnionReflection) whichOne() arrow.UnionTypeCode { + fds := pur.descriptor.ContainingOneof().Fields() + for i := 0; i < fds.Len(); i++ { + pfr := pur.parent.getFieldByName(string(fds.Get(i).Name())) + if pfr.asUnion().isThisOne() { + return pur.getUnionTypeCode(int32(pfr.descriptor.Number())) + } + } + // i.e. all null + return -1 +} + +func (pur protobufUnionReflection) getField() *ProtobufFieldReflection { + fds := pur.descriptor.ContainingOneof().Fields() + for i := 0; i < fds.Len(); i++ { + pfr := pur.parent.getFieldByName(string(fds.Get(i).Name())) + if pfr.asUnion().isThisOne() { + return pfr + } + } + // i.e. all null + return nil +} + +func (pur protobufUnionReflection) getUnionTypeCode(n int32) arrow.UnionTypeCode { + //We use the index of the field number as there is a limit on the arrow.UnionTypeCode (127) + //which a protobuf Number could realistically exceed + fds := pur.descriptor.ContainingOneof().Fields() + for i := 0; i < fds.Len(); i++ { + if n == int32(fds.Get(i).Number()) { + return int8(i) + } + } + return -1 +} + +func (pur protobufUnionReflection) generateUnionFields() chan *ProtobufFieldReflection { + out := make(chan *ProtobufFieldReflection) + go func() { + defer close(out) + fds := pur.descriptor.ContainingOneof().Fields() + for i := 0; i < fds.Len(); i++ { + pfr := pur.parent.getFieldByName(string(fds.Get(i).Name())) + // Do not get stuck in a recursion loop + pfr.oneOfHandler = OneOfNull + if pfr.exclusionPolicy(pfr) { + continue + } + out <- pfr + } + }() + + return out +} + +func (pur protobufUnionReflection) getArrowFields() []arrow.Field { + var fields []arrow.Field + + for pfr := range pur.generateUnionFields() { + fields = append(fields, pfr.arrowField()) + } + + return fields +} + +func (pur protobufUnionReflection) getDataType() arrow.DataType { + fds := pur.getArrowFields() + typeCodes := make([]arrow.UnionTypeCode, len(fds)) + for i := 0; i < len(fds); i++ { + typeCodes[i] = arrow.UnionTypeCode(i) + } + return arrow.DenseUnionOf(fds, typeCodes) +} + +type protobufDictReflection struct { + ProtobufFieldReflection +} + +func (pfr *ProtobufFieldReflection) asDictionary() protobufDictReflection { + return protobufDictReflection{*pfr} +} + +func (pdr protobufDictReflection) getDataType() arrow.DataType { + return &arrow.DictionaryType{ + IndexType: arrow.PrimitiveTypes.Int32, + ValueType: arrow.BinaryTypes.String, + Ordered: false, + } +} + +func (pdr protobufDictReflection) getDictValues(mem memory.Allocator) arrow.Array { + enumValues := pdr.descriptor.Enum().Values() + bldr := array.NewStringBuilder(mem) + for i := 0; i < enumValues.Len(); i++ { + bldr.Append(string(enumValues.Get(i).Name())) + } + return bldr.NewArray() +} + +type protobufMapReflection struct { + ProtobufFieldReflection +} + +func (pfr *ProtobufFieldReflection) asMap() protobufMapReflection { + return protobufMapReflection{*pfr} +} + +func (pmr protobufMapReflection) getDataType() arrow.DataType { + for kvp := range pmr.generateKeyValuePairs() { + return kvp.getDataType() + } + return protobufMapKeyValuePairReflection{ + k: ProtobufFieldReflection{ + parent: pmr.parent, + descriptor: pmr.descriptor.MapKey(), + schemaOptions: pmr.schemaOptions, + }, + v: ProtobufFieldReflection{ + parent: pmr.parent, + descriptor: pmr.descriptor.MapValue(), + schemaOptions: pmr.schemaOptions, + }, + }.getDataType() +} + +type protobufMapKeyValuePairReflection struct { + k ProtobufFieldReflection + v ProtobufFieldReflection +} + +func (pmr protobufMapKeyValuePairReflection) getDataType() arrow.DataType { + return arrow.MapOf(pmr.k.getDataType(), pmr.v.getDataType()) +} + +func (pmr protobufMapReflection) generateKeyValuePairs() chan protobufMapKeyValuePairReflection { + out := make(chan protobufMapKeyValuePairReflection) + + go func() { + defer close(out) + for _, k := range pmr.rValue.MapKeys() { + kvp := protobufMapKeyValuePairReflection{ + k: ProtobufFieldReflection{ + parent: pmr.parent, + descriptor: pmr.descriptor.MapKey(), + prValue: getMapKey(k), + rValue: k, + schemaOptions: pmr.schemaOptions, + }, + v: ProtobufFieldReflection{ + parent: pmr.parent, + descriptor: pmr.descriptor.MapValue(), + prValue: pmr.prValue.Map().Get(protoreflect.MapKey(getMapKey(k))), + rValue: pmr.rValue.MapIndex(k), + schemaOptions: pmr.schemaOptions, + }, + } + out <- kvp + } + }() + + return out +} + +func getMapKey(v reflect.Value) protoreflect.Value { + switch v.Kind() { + case reflect.String: + return protoreflect.ValueOf(v.String()) + case reflect.Int32, reflect.Int64: + return protoreflect.ValueOf(v.Int()) + case reflect.Bool: + return protoreflect.ValueOf(v.Bool()) + case reflect.Uint32, reflect.Uint64: + return protoreflect.ValueOf(v.Uint()) + default: + panic("Unmapped protoreflect map key type") + } +} + +func (psr ProtobufMessageReflection) generateStructFields() chan *ProtobufFieldReflection { + out := make(chan *ProtobufFieldReflection) + + go func() { + defer close(out) + fds := psr.descriptor.Fields() + for i := 0; i < fds.Len(); i++ { + pfr := psr.getFieldByName(string(fds.Get(i).Name())) + if psr.exclusionPolicy(pfr) { + continue + } + if pfr.arrowType() == arrow.DENSE_UNION { + if pfr.descriptor.Number() != pfr.descriptor.ContainingOneof().Fields().Get(0).Number() { + continue + } + } + out <- pfr + } + }() + + return out +} + +func (psr ProtobufMessageReflection) generateFields() chan *ProtobufFieldReflection { + out := make(chan *ProtobufFieldReflection) + + go func() { + defer close(out) + fds := psr.descriptor.Fields() + for i := 0; i < fds.Len(); i++ { + pfr := psr.getFieldByName(string(fds.Get(i).Name())) + if psr.exclusionPolicy(pfr) { + continue + } + if pfr.arrowType() == arrow.DENSE_UNION { + if pfr.descriptor.Number() != pfr.descriptor.ContainingOneof().Fields().Get(0).Number() { + continue + } + } + out <- pfr + } + }() + + return out +} + +func (pfr *ProtobufFieldReflection) asStruct() ProtobufMessageReflection { + psr := ProtobufMessageReflection{ + descriptor: pfr.descriptor.Message(), + rValue: pfr.rValue, + schemaOptions: pfr.schemaOptions, + } + if pfr.prValue.IsValid() { + psr.message = pfr.prValue.Message() + } + psr = psr.unmarshallAny() + return psr +} + +func (psr ProtobufMessageReflection) getDataType() arrow.DataType { + return arrow.StructOf(psr.getArrowFields()...) +} + +func (psr ProtobufMessageReflection) getFieldByName(n string) *ProtobufFieldReflection { + fd := psr.descriptor.Fields().ByTextName(xstrings.ToSnakeCase(n)) + fv := psr.rValue + if fv.IsValid() { + if !fv.IsZero() { + for fv.Kind() == reflect.Ptr || fv.Kind() == reflect.Interface { + fv = fv.Elem() + } + if fd.ContainingOneof() != nil { + n = string(fd.ContainingOneof().Name()) + } + fv = fv.FieldByName(xstrings.ToCamelCase(n)) + for fv.Kind() == reflect.Ptr { + fv = fv.Elem() + } + } + } + pfr := ProtobufFieldReflection{ + parent: &psr, + descriptor: fd, + rValue: fv, + schemaOptions: psr.schemaOptions, + } + if psr.message != nil { + pfr.prValue = psr.message.Get(fd) + } + return &pfr +} + +func (plr protobufListReflection) generateListItems() chan ProtobufFieldReflection { + out := make(chan ProtobufFieldReflection) + + go func() { + defer close(out) + for i := 0; i < plr.prValue.List().Len(); i++ { + out <- ProtobufFieldReflection{ + descriptor: plr.descriptor, + prValue: plr.prValue.List().Get(i), + rValue: plr.rValue.Index(i), + schemaOptions: plr.schemaOptions, + } + } + }() + + return out +} + +func (pfr *ProtobufFieldReflection) getDataType() arrow.DataType { + switch pfr.arrowType() { + case arrow.DENSE_UNION: + return pfr.asUnion().getDataType() + case arrow.DICTIONARY: + return pfr.asDictionary().getDataType() + case arrow.LIST: + return pfr.asList().getDataType() + case arrow.MAP: + return pfr.asMap().getDataType() + case arrow.STRUCT: + return pfr.asStruct().getDataType() + case arrow.INT32: + return arrow.PrimitiveTypes.Int32 + case arrow.INT64: + return arrow.PrimitiveTypes.Int64 + case arrow.UINT32: + return arrow.PrimitiveTypes.Uint32 + case arrow.UINT64: + return arrow.PrimitiveTypes.Uint64 + case arrow.FLOAT32: + return arrow.PrimitiveTypes.Float32 + case arrow.FLOAT64: + return arrow.PrimitiveTypes.Float64 + case arrow.STRING: + return arrow.BinaryTypes.String + case arrow.BINARY: + return arrow.BinaryTypes.Binary + case arrow.BOOL: + return arrow.FixedWidthTypes.Boolean + } + return nil +} + +type protobufReflection interface { + name() string + arrowType() arrow.Type + protoreflectValue() protoreflect.Value + reflectValue() reflect.Value + GetDescriptor() protoreflect.FieldDescriptor + isNull() bool + isEnum() bool + asDictionary() protobufDictReflection + isList() bool + asList() protobufListReflection + isMap() bool + asMap() protobufMapReflection + isStruct() bool + asStruct() ProtobufMessageReflection + isOneOf() bool + asUnion() protobufUnionReflection +} + +// ProtobufMessageFieldReflection links together the message and it's fields +type ProtobufMessageFieldReflection struct { + parent *ProtobufMessageReflection + protobufReflection + arrow.Field +} + +// Schema returns an arrow.Schema representing a protobuf message +func (msg ProtobufMessageReflection) Schema() *arrow.Schema { + var fields []arrow.Field + for _, f := range msg.fields { + fields = append(fields, f.Field) + } + return arrow.NewSchema(fields, nil) +} + +// Record returns an arrow.Record for a protobuf message +func (msg ProtobufMessageReflection) Record(mem memory.Allocator) arrow.Record { + if mem == nil { + mem = memory.NewGoAllocator() + } + + schema := msg.Schema() + + recordBuilder := array.NewRecordBuilder(mem, schema) + + var fieldNames []string + for i, f := range msg.fields { + f.AppendValueOrNull(recordBuilder.Field(i), mem) + fieldNames = append(fieldNames, f.protobufReflection.name()) + } + + var arrays []arrow.Array + for _, bldr := range recordBuilder.Fields() { + a := bldr.NewArray() + arrays = append(arrays, a) + } + + structArray, _ := array.NewStructArray(arrays, fieldNames) + + return array.RecordFromStructArray(structArray, schema) +} + +// NewProtobufMessageReflection initialises a ProtobufMessageReflection +// can be used to convert a protobuf message into an arrow Record +func NewProtobufMessageReflection(msg proto.Message, options ...option) *ProtobufMessageReflection { + v := reflect.ValueOf(msg) + for v.Kind() == reflect.Ptr { + v = v.Elem() + } + includeAll := func(pfr *ProtobufFieldReflection) bool { + return false + } + noFormatting := func(str string) string { + return str + } + psr := &ProtobufMessageReflection{ + descriptor: msg.ProtoReflect().Descriptor(), + message: msg.ProtoReflect(), + rValue: v, + schemaOptions: schemaOptions{ + exclusionPolicy: includeAll, + fieldNameFormatter: noFormatting, + oneOfHandler: OneOfNull, + enumHandler: EnumDictionary, + }, + } + + for _, opt := range options { + opt(psr) + } + + var fields []ProtobufMessageFieldReflection + + for pfr := range psr.generateFields() { + fields = append(fields, ProtobufMessageFieldReflection{ + parent: psr, + protobufReflection: pfr, + Field: pfr.arrowField(), + }) + } + + psr.fields = fields + + return psr +} + +type option func(*ProtobufMessageReflection) + +// WithExclusionPolicy is an option for a ProtobufMessageReflection +// WithExclusionPolicy acts as a deny filter on the fields of a protobuf message +// i.e. prevents them from being included in the schema. +// A use case for this is to exclude fields containing PII. +func WithExclusionPolicy(ex func(pfr *ProtobufFieldReflection) bool) option { + return func(psr *ProtobufMessageReflection) { + psr.exclusionPolicy = ex + } +} + +// WithFieldNameFormatter is an option for a ProtobufMessageReflection +// WithFieldNameFormatter enables customisation of the field names in the arrow schema +// By default, the field names are taken from the protobuf message (.proto file) +func WithFieldNameFormatter(formatter func(str string) string) option { + return func(psr *ProtobufMessageReflection) { + psr.fieldNameFormatter = formatter + } +} + +// WithOneOfHandler is an option for a ProtobufMessageReflection +// WithOneOfHandler enables customisation of the protobuf oneOf type in the arrow schema +// By default, the oneOfs are mapped to separate columns +func WithOneOfHandler(oneOfHandler ProtobufTypeHandler) option { + return func(psr *ProtobufMessageReflection) { + psr.oneOfHandler = oneOfHandler + } +} + +// WithEnumHandler is an option for a ProtobufMessageReflection +// WithEnumHandler enables customisation of the protobuf Enum type in the arrow schema +// By default, the Enums are mapped to arrow.Dictionary +func WithEnumHandler(enumHandler ProtobufTypeHandler) option { + return func(psr *ProtobufMessageReflection) { + psr.enumHandler = enumHandler + } +} + +// AppendValueOrNull add the value of a protobuf field to an arrow array builder +func (f ProtobufMessageFieldReflection) AppendValueOrNull(b array.Builder, mem memory.Allocator) error { + pv := f.protoreflectValue() + fd := f.GetDescriptor() + + if f.isNull() { + b.AppendNull() + return nil + } + + switch b.Type().ID() { + case arrow.STRING: + if f.protobufReflection.isEnum() { + b.(*array.StringBuilder).Append(string(fd.Enum().Values().ByNumber(pv.Enum()).Name())) + } else { + b.(*array.StringBuilder).Append(pv.String()) + } + case arrow.BINARY: + b.(*array.BinaryBuilder).Append(pv.Bytes()) + case arrow.INT32: + if f.protobufReflection.isEnum() { + b.(*array.Int32Builder).Append(int32(f.reflectValue().Int())) + } else { + b.(*array.Int32Builder).Append(int32(pv.Int())) + } + case arrow.INT64: + b.(*array.Int64Builder).Append(pv.Int()) + case arrow.FLOAT64: + b.(*array.Float64Builder).Append(pv.Float()) + case arrow.UINT32: + b.(*array.Uint32Builder).Append(uint32(pv.Uint())) + case arrow.UINT64: + b.(*array.Uint64Builder).Append(pv.Uint()) + case arrow.BOOL: + b.(*array.BooleanBuilder).Append(pv.Bool()) + case arrow.DENSE_UNION: + ub := b.(array.UnionBuilder) + pur := f.asUnion() + if pur.whichOne() == -1 { + ub.AppendNull() + break + } + ub.Append(pur.whichOne()) + cb := ub.Child(int(pur.whichOne())) + err := ProtobufMessageFieldReflection{ + parent: f.parent, + protobufReflection: pur.getField(), + Field: pur.arrowField(), + }.AppendValueOrNull(cb, mem) + if err != nil { + return err + } + case arrow.DICTIONARY: + pdr := f.asDictionary() + db := b.(*array.BinaryDictionaryBuilder) + err := db.InsertStringDictValues(pdr.getDictValues(mem).(*array.String)) + if err != nil { + return err + } + enumNum := int(f.reflectValue().Int()) + enumVal := fd.Enum().Values().ByNumber(protoreflect.EnumNumber(enumNum)).Name() + err = db.AppendValueFromString(string(enumVal)) + if err != nil { + return err + } + case arrow.STRUCT: + sb := b.(*array.StructBuilder) + sb.Append(true) + child := ProtobufMessageFieldReflection{ + parent: f.parent, + } + for i, field := range f.Field.Type.(*arrow.StructType).Fields() { + child.protobufReflection = f.asStruct().getFieldByName(field.Name) + child.Field = field + err := child.AppendValueOrNull(sb.FieldBuilder(i), mem) + if err != nil { + return err + } + } + case arrow.LIST: + lb := b.(*array.ListBuilder) + l := pv.List().Len() + if l == 0 { + lb.AppendEmptyValue() + break + } + lb.ValueBuilder().Reserve(l) + lb.Append(true) + child := ProtobufMessageFieldReflection{ + parent: f.parent, + Field: f.Field.Type.(*arrow.ListType).ElemField(), + } + for li := range f.asList().generateListItems() { + child.protobufReflection = &li + err := child.AppendValueOrNull(lb.ValueBuilder(), mem) + if err != nil { + return err + } + } + case arrow.MAP: + mb := b.(*array.MapBuilder) + l := pv.Map().Len() + if l == 0 { + mb.AppendEmptyValue() + break + } + mb.KeyBuilder().Reserve(l) + mb.ItemBuilder().Reserve(l) + mb.Append(true) + k := ProtobufMessageFieldReflection{ + parent: f.parent, + Field: f.Field.Type.(*arrow.MapType).KeyField(), + } + v := ProtobufMessageFieldReflection{ + parent: f.parent, + Field: f.Field.Type.(*arrow.MapType).ItemField(), + } + for kvp := range f.asMap().generateKeyValuePairs() { + k.protobufReflection = &kvp.k + err := k.AppendValueOrNull(mb.KeyBuilder(), mem) + if err != nil { + return err + } + v.protobufReflection = &kvp.v + err = v.AppendValueOrNull(mb.ItemBuilder(), mem) + if err != nil { + return err + } + } + default: + return fmt.Errorf("not able to appendValueOrNull for type %s", b.Type().ID()) + } + return nil +} diff --git a/go/arrow/util/protobuf_reflect_test.go b/go/arrow/util/protobuf_reflect_test.go new file mode 100644 index 0000000000000..ab3cbdf9a6b13 --- /dev/null +++ b/go/arrow/util/protobuf_reflect_test.go @@ -0,0 +1,311 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package util + +import ( + "strings" + "testing" + + "github.com/apache/arrow/go/v17/arrow" + "github.com/apache/arrow/go/v17/arrow/array" + "github.com/apache/arrow/go/v17/arrow/memory" + "github.com/apache/arrow/go/v17/arrow/util/util_message" + "github.com/huandu/xstrings" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "google.golang.org/protobuf/types/known/anypb" +) + +func SetupTest() util_message.AllTheTypes { + msg := util_message.ExampleMessage{ + Field1: "Example", + } + + anyMsg, _ := anypb.New(&msg) + + return util_message.AllTheTypes{ + Str: "Hello", + Int32: 10, + Int64: 100, + Sint32: -10, + Sin64: -100, + Uint32: 10, + Uint64: 100, + Fixed32: 10, + Fixed64: 1000, + Sfixed32: 10, + Bool: false, + Bytes: []byte("Hello, world!"), + Double: 1.1, + Enum: util_message.AllTheTypes_OPTION_1, + Message: &msg, + Oneof: &util_message.AllTheTypes_Oneofstring{Oneofstring: "World"}, + Any: anyMsg, + //Breaks the test as the Golang maps have a non-deterministic order + //SimpleMap: map[int32]string{99: "Hello", 100: "World", 98: "How", 101: "Are", 1: "You"}, + SimpleMap: map[int32]string{99: "Hello"}, + ComplexMap: map[string]*util_message.ExampleMessage{"complex": &msg}, + SimpleList: []string{"Hello", "World"}, + ComplexList: []*util_message.ExampleMessage{&msg}, + } +} + +func TestGetSchema(t *testing.T) { + msg := SetupTest() + + got := NewProtobufMessageReflection(&msg).Schema().String() + want := `schema: + fields: 22 + - str: type=utf8, nullable + - int32: type=int32, nullable + - int64: type=int64, nullable + - sint32: type=int32, nullable + - sin64: type=int64, nullable + - uint32: type=uint32, nullable + - uint64: type=uint64, nullable + - fixed32: type=uint32, nullable + - fixed64: type=uint64, nullable + - sfixed32: type=int32, nullable + - bool: type=bool, nullable + - bytes: type=binary, nullable + - double: type=float64, nullable + - enum: type=dictionary, nullable + - message: type=struct, nullable + - oneofstring: type=utf8, nullable + - oneofmessage: type=struct, nullable + - any: type=struct, nullable + - simple_map: type=map, nullable + - complex_map: type=map, items_nullable>, nullable + - simple_list: type=list, nullable + - complex_list: type=list, nullable>, nullable` + + require.Equal(t, want, got, "got: %s\nwant: %s", got, want) + + got = NewProtobufMessageReflection(&msg, WithOneOfHandler(OneOfDenseUnion)).Schema().String() + want = `schema: + fields: 21 + - str: type=utf8, nullable + - int32: type=int32, nullable + - int64: type=int64, nullable + - sint32: type=int32, nullable + - sin64: type=int64, nullable + - uint32: type=uint32, nullable + - uint64: type=uint64, nullable + - fixed32: type=uint32, nullable + - fixed64: type=uint64, nullable + - sfixed32: type=int32, nullable + - bool: type=bool, nullable + - bytes: type=binary, nullable + - double: type=float64, nullable + - enum: type=dictionary, nullable + - message: type=struct, nullable + - oneof: type=dense_union, nullable=1>, nullable + - any: type=struct, nullable + - simple_map: type=map, nullable + - complex_map: type=map, items_nullable>, nullable + - simple_list: type=list, nullable + - complex_list: type=list, nullable>, nullable` + + require.Equal(t, want, got, "got: %s\nwant: %s", got, want) + + excludeComplex := func(pfr *ProtobufFieldReflection) bool { + return pfr.isMap() || pfr.isList() || pfr.isStruct() + } + + got = NewProtobufMessageReflection(&msg, WithExclusionPolicy(excludeComplex)).Schema().String() + want = `schema: + fields: 15 + - str: type=utf8, nullable + - int32: type=int32, nullable + - int64: type=int64, nullable + - sint32: type=int32, nullable + - sin64: type=int64, nullable + - uint32: type=uint32, nullable + - uint64: type=uint64, nullable + - fixed32: type=uint32, nullable + - fixed64: type=uint64, nullable + - sfixed32: type=int32, nullable + - bool: type=bool, nullable + - bytes: type=binary, nullable + - double: type=float64, nullable + - enum: type=dictionary, nullable + - oneofstring: type=utf8, nullable` + + require.Equal(t, want, got, "got: %s\nwant: %s", got, want) + + got = NewProtobufMessageReflection( + &msg, + WithExclusionPolicy(excludeComplex), + WithFieldNameFormatter(xstrings.ToCamelCase), + ).Schema().String() + want = `schema: + fields: 15 + - Str: type=utf8, nullable + - Int32: type=int32, nullable + - Int64: type=int64, nullable + - Sint32: type=int32, nullable + - Sin64: type=int64, nullable + - Uint32: type=uint32, nullable + - Uint64: type=uint64, nullable + - Fixed32: type=uint32, nullable + - Fixed64: type=uint64, nullable + - Sfixed32: type=int32, nullable + - Bool: type=bool, nullable + - Bytes: type=binary, nullable + - Double: type=float64, nullable + - Enum: type=dictionary, nullable + - Oneofstring: type=utf8, nullable` + + require.Equal(t, want, got, "got: %s\nwant: %s", got, want) + + onlyEnum := func(pfr *ProtobufFieldReflection) bool { + return !pfr.isEnum() + } + got = NewProtobufMessageReflection( + &msg, + WithExclusionPolicy(onlyEnum), + WithEnumHandler(EnumNumber), + ).Schema().String() + want = `schema: + fields: 1 + - enum: type=int32, nullable` + + require.Equal(t, want, got, "got: %s\nwant: %s", got, want) + + got = NewProtobufMessageReflection( + &msg, + WithExclusionPolicy(onlyEnum), + WithEnumHandler(EnumValue), + ).Schema().String() + want = `schema: + fields: 1 + - enum: type=utf8, nullable` + + require.Equal(t, want, got, "got: %s\nwant: %s", got, want) +} + +func TestRecordFromProtobuf(t *testing.T) { + msg := SetupTest() + + pmr := NewProtobufMessageReflection(&msg, WithOneOfHandler(OneOfDenseUnion)) + schema := pmr.Schema() + got := pmr.Record(nil) + jsonStr := `[ + { + "str":"Hello", + "int32":10, + "int64":100, + "sint32":-10, + "sin64":-100, + "uint32":10, + "uint64":100, + "fixed32":10, + "fixed64":1000, + "sfixed32":10, + "bool":false, + "bytes":"SGVsbG8sIHdvcmxkIQ==", + "double":1.1, + "enum":"OPTION_1", + "message":{"field1":"Example"}, + "oneof": [0, "World"], + "any":{"field1":"Example"}, + "simple_map":[{"key":99,"value":"Hello"}], + "complex_map":[{"key":"complex","value":{"field1":"Example"}}], + "simple_list":["Hello","World"], + "complex_list":[{"field1":"Example"}] + } + ]` + want, _, err := array.RecordFromJSON(memory.NewGoAllocator(), schema, strings.NewReader(jsonStr)) + + require.NoError(t, err) + require.EqualExportedValues(t, got, want, "got: %s\nwant: %s", got, want) + + onlyEnum := func(pfr *ProtobufFieldReflection) bool { return !pfr.isEnum() } + pmr = NewProtobufMessageReflection(&msg, WithExclusionPolicy(onlyEnum), WithEnumHandler(EnumValue)) + got = pmr.Record(nil) + jsonStr = `[ { "enum":"OPTION_1" } ]` + want, _, err = array.RecordFromJSON(memory.NewGoAllocator(), pmr.Schema(), strings.NewReader(jsonStr)) + require.NoError(t, err) + require.True(t, array.RecordEqual(got, want), "got: %s\nwant: %s", got, want) + + pmr = NewProtobufMessageReflection(&msg, WithExclusionPolicy(onlyEnum), WithEnumHandler(EnumNumber)) + got = pmr.Record(nil) + jsonStr = `[ { "enum":"1" } ]` + want, _, err = array.RecordFromJSON(memory.NewGoAllocator(), pmr.Schema(), strings.NewReader(jsonStr)) + require.NoError(t, err) + require.True(t, array.RecordEqual(got, want), "got: %s\nwant: %s", got, want) +} + +func TestNullRecordFromProtobuf(t *testing.T) { + pmr := NewProtobufMessageReflection(&util_message.AllTheTypes{}) + schema := pmr.Schema() + got := pmr.Record(nil) + _, _ = got.MarshalJSON() + jsonStr := `[ + { + "str":"", + "int32":0, + "int64":0, + "sint32":0, + "sin64":0, + "uint32":0, + "uint64":0, + "fixed32":0, + "fixed64":0, + "sfixed32":0, + "bool":false, + "bytes":"", + "double":0, + "enum":"OPTION_0", + "message":null, + "oneofmessage":{"field1":""}, + "oneofstring":"", + "any":null, + "simple_map":[], + "complex_map":[], + "simple_list":[], + "complex_list":[] + } + ]` + + want, _, err := array.RecordFromJSON(memory.NewGoAllocator(), schema, strings.NewReader(jsonStr)) + + require.NoError(t, err) + require.EqualExportedValues(t, got, want, "got: %s\nwant: %s", got, want) +} + +type testProtobufReflection struct { + ProtobufFieldReflection +} + +func (tpr testProtobufReflection) isNull() bool { + return false +} + +func TestAppendValueOrNull(t *testing.T) { + unsupportedField := arrow.Field{Name: "Test", Type: arrow.FixedWidthTypes.Time32s} + schema := arrow.NewSchema([]arrow.Field{unsupportedField}, nil) + mem := memory.NewGoAllocator() + recordBuilder := array.NewRecordBuilder(mem, schema) + pmfr := ProtobufMessageFieldReflection{ + protobufReflection: &testProtobufReflection{}, + Field: arrow.Field{Name: "Test", Type: arrow.FixedWidthTypes.Time32s}, + } + got := pmfr.AppendValueOrNull(recordBuilder.Field(0), mem) + want := "not able to appendValueOrNull for type TIME32" + assert.EqualErrorf(t, got, want, "Error is: %v, want: %v", got, want) +} diff --git a/go/arrow/util/util_message/types.pb.go b/go/arrow/util/util_message/types.pb.go new file mode 100644 index 0000000000000..80e18847c1970 --- /dev/null +++ b/go/arrow/util/util_message/types.pb.go @@ -0,0 +1,539 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.31.0 +// protoc v4.24.4 +// source: messages/types.proto + +package util_message + +import ( + reflect "reflect" + sync "sync" + + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + anypb "google.golang.org/protobuf/types/known/anypb" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type AllTheTypes_ExampleEnum int32 + +const ( + AllTheTypes_OPTION_0 AllTheTypes_ExampleEnum = 0 + AllTheTypes_OPTION_1 AllTheTypes_ExampleEnum = 1 +) + +// Enum value maps for AllTheTypes_ExampleEnum. +var ( + AllTheTypes_ExampleEnum_name = map[int32]string{ + 0: "OPTION_0", + 1: "OPTION_1", + } + AllTheTypes_ExampleEnum_value = map[string]int32{ + "OPTION_0": 0, + "OPTION_1": 1, + } +) + +func (x AllTheTypes_ExampleEnum) Enum() *AllTheTypes_ExampleEnum { + p := new(AllTheTypes_ExampleEnum) + *p = x + return p +} + +func (x AllTheTypes_ExampleEnum) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (AllTheTypes_ExampleEnum) Descriptor() protoreflect.EnumDescriptor { + return file_messages_types_proto_enumTypes[0].Descriptor() +} + +func (AllTheTypes_ExampleEnum) Type() protoreflect.EnumType { + return &file_messages_types_proto_enumTypes[0] +} + +func (x AllTheTypes_ExampleEnum) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use AllTheTypes_ExampleEnum.Descriptor instead. +func (AllTheTypes_ExampleEnum) EnumDescriptor() ([]byte, []int) { + return file_messages_types_proto_rawDescGZIP(), []int{1, 0} +} + +type ExampleMessage struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Field1 string `protobuf:"bytes,1,opt,name=field1,proto3" json:"field1,omitempty"` +} + +func (x *ExampleMessage) Reset() { + *x = ExampleMessage{} + if protoimpl.UnsafeEnabled { + mi := &file_messages_types_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *ExampleMessage) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ExampleMessage) ProtoMessage() {} + +func (x *ExampleMessage) ProtoReflect() protoreflect.Message { + mi := &file_messages_types_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ExampleMessage.ProtoReflect.Descriptor instead. +func (*ExampleMessage) Descriptor() ([]byte, []int) { + return file_messages_types_proto_rawDescGZIP(), []int{0} +} + +func (x *ExampleMessage) GetField1() string { + if x != nil { + return x.Field1 + } + return "" +} + +type AllTheTypes struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Str string `protobuf:"bytes,1,opt,name=str,proto3" json:"str,omitempty"` + Int32 int32 `protobuf:"varint,2,opt,name=int32,proto3" json:"int32,omitempty"` + Int64 int64 `protobuf:"varint,3,opt,name=int64,proto3" json:"int64,omitempty"` + Sint32 int32 `protobuf:"zigzag32,4,opt,name=sint32,proto3" json:"sint32,omitempty"` + Sin64 int64 `protobuf:"zigzag64,5,opt,name=sin64,proto3" json:"sin64,omitempty"` + Uint32 uint32 `protobuf:"varint,6,opt,name=uint32,proto3" json:"uint32,omitempty"` + Uint64 uint64 `protobuf:"varint,7,opt,name=uint64,proto3" json:"uint64,omitempty"` + Fixed32 uint32 `protobuf:"fixed32,8,opt,name=fixed32,proto3" json:"fixed32,omitempty"` + Fixed64 uint64 `protobuf:"fixed64,9,opt,name=fixed64,proto3" json:"fixed64,omitempty"` + Sfixed32 int32 `protobuf:"fixed32,10,opt,name=sfixed32,proto3" json:"sfixed32,omitempty"` + Bool bool `protobuf:"varint,11,opt,name=bool,proto3" json:"bool,omitempty"` + Bytes []byte `protobuf:"bytes,12,opt,name=bytes,proto3" json:"bytes,omitempty"` + Double float64 `protobuf:"fixed64,13,opt,name=double,proto3" json:"double,omitempty"` + Enum AllTheTypes_ExampleEnum `protobuf:"varint,14,opt,name=enum,proto3,enum=AllTheTypes_ExampleEnum" json:"enum,omitempty"` + Message *ExampleMessage `protobuf:"bytes,15,opt,name=message,proto3" json:"message,omitempty"` + // Types that are assignable to Oneof: + // + // *AllTheTypes_Oneofstring + // *AllTheTypes_Oneofmessage + Oneof isAllTheTypes_Oneof `protobuf_oneof:"oneof"` + Any *anypb.Any `protobuf:"bytes,18,opt,name=any,proto3" json:"any,omitempty"` + SimpleMap map[int32]string `protobuf:"bytes,19,rep,name=simple_map,json=simpleMap,proto3" json:"simple_map,omitempty" protobuf_key:"varint,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + ComplexMap map[string]*ExampleMessage `protobuf:"bytes,20,rep,name=complex_map,json=complexMap,proto3" json:"complex_map,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + SimpleList []string `protobuf:"bytes,21,rep,name=simple_list,json=simpleList,proto3" json:"simple_list,omitempty"` + ComplexList []*ExampleMessage `protobuf:"bytes,22,rep,name=complex_list,json=complexList,proto3" json:"complex_list,omitempty"` +} + +func (x *AllTheTypes) Reset() { + *x = AllTheTypes{} + if protoimpl.UnsafeEnabled { + mi := &file_messages_types_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *AllTheTypes) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*AllTheTypes) ProtoMessage() {} + +func (x *AllTheTypes) ProtoReflect() protoreflect.Message { + mi := &file_messages_types_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use AllTheTypes.ProtoReflect.Descriptor instead. +func (*AllTheTypes) Descriptor() ([]byte, []int) { + return file_messages_types_proto_rawDescGZIP(), []int{1} +} + +func (x *AllTheTypes) GetStr() string { + if x != nil { + return x.Str + } + return "" +} + +func (x *AllTheTypes) GetInt32() int32 { + if x != nil { + return x.Int32 + } + return 0 +} + +func (x *AllTheTypes) GetInt64() int64 { + if x != nil { + return x.Int64 + } + return 0 +} + +func (x *AllTheTypes) GetSint32() int32 { + if x != nil { + return x.Sint32 + } + return 0 +} + +func (x *AllTheTypes) GetSin64() int64 { + if x != nil { + return x.Sin64 + } + return 0 +} + +func (x *AllTheTypes) GetUint32() uint32 { + if x != nil { + return x.Uint32 + } + return 0 +} + +func (x *AllTheTypes) GetUint64() uint64 { + if x != nil { + return x.Uint64 + } + return 0 +} + +func (x *AllTheTypes) GetFixed32() uint32 { + if x != nil { + return x.Fixed32 + } + return 0 +} + +func (x *AllTheTypes) GetFixed64() uint64 { + if x != nil { + return x.Fixed64 + } + return 0 +} + +func (x *AllTheTypes) GetSfixed32() int32 { + if x != nil { + return x.Sfixed32 + } + return 0 +} + +func (x *AllTheTypes) GetBool() bool { + if x != nil { + return x.Bool + } + return false +} + +func (x *AllTheTypes) GetBytes() []byte { + if x != nil { + return x.Bytes + } + return nil +} + +func (x *AllTheTypes) GetDouble() float64 { + if x != nil { + return x.Double + } + return 0 +} + +func (x *AllTheTypes) GetEnum() AllTheTypes_ExampleEnum { + if x != nil { + return x.Enum + } + return AllTheTypes_OPTION_0 +} + +func (x *AllTheTypes) GetMessage() *ExampleMessage { + if x != nil { + return x.Message + } + return nil +} + +func (m *AllTheTypes) GetOneof() isAllTheTypes_Oneof { + if m != nil { + return m.Oneof + } + return nil +} + +func (x *AllTheTypes) GetOneofstring() string { + if x, ok := x.GetOneof().(*AllTheTypes_Oneofstring); ok { + return x.Oneofstring + } + return "" +} + +func (x *AllTheTypes) GetOneofmessage() *ExampleMessage { + if x, ok := x.GetOneof().(*AllTheTypes_Oneofmessage); ok { + return x.Oneofmessage + } + return nil +} + +func (x *AllTheTypes) GetAny() *anypb.Any { + if x != nil { + return x.Any + } + return nil +} + +func (x *AllTheTypes) GetSimpleMap() map[int32]string { + if x != nil { + return x.SimpleMap + } + return nil +} + +func (x *AllTheTypes) GetComplexMap() map[string]*ExampleMessage { + if x != nil { + return x.ComplexMap + } + return nil +} + +func (x *AllTheTypes) GetSimpleList() []string { + if x != nil { + return x.SimpleList + } + return nil +} + +func (x *AllTheTypes) GetComplexList() []*ExampleMessage { + if x != nil { + return x.ComplexList + } + return nil +} + +type isAllTheTypes_Oneof interface { + isAllTheTypes_Oneof() +} + +type AllTheTypes_Oneofstring struct { + Oneofstring string `protobuf:"bytes,16,opt,name=oneofstring,proto3,oneof"` +} + +type AllTheTypes_Oneofmessage struct { + Oneofmessage *ExampleMessage `protobuf:"bytes,17,opt,name=oneofmessage,proto3,oneof"` +} + +func (*AllTheTypes_Oneofstring) isAllTheTypes_Oneof() {} + +func (*AllTheTypes_Oneofmessage) isAllTheTypes_Oneof() {} + +var File_messages_types_proto protoreflect.FileDescriptor + +var file_messages_types_proto_rawDesc = []byte{ + 0x0a, 0x14, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x73, 0x2f, 0x74, 0x79, 0x70, 0x65, 0x73, + 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a, 0x19, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, 0x61, 0x6e, 0x79, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x22, 0x28, 0x0a, 0x0e, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x4d, 0x65, 0x73, 0x73, + 0x61, 0x67, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x31, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x06, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x31, 0x22, 0xa9, 0x07, 0x0a, 0x0b, + 0x41, 0x6c, 0x6c, 0x54, 0x68, 0x65, 0x54, 0x79, 0x70, 0x65, 0x73, 0x12, 0x10, 0x0a, 0x03, 0x73, + 0x74, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x73, 0x74, 0x72, 0x12, 0x14, 0x0a, + 0x05, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x05, 0x69, 0x6e, + 0x74, 0x33, 0x32, 0x12, 0x14, 0x0a, 0x05, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x18, 0x03, 0x20, 0x01, + 0x28, 0x03, 0x52, 0x05, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x69, 0x6e, + 0x74, 0x33, 0x32, 0x18, 0x04, 0x20, 0x01, 0x28, 0x11, 0x52, 0x06, 0x73, 0x69, 0x6e, 0x74, 0x33, + 0x32, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x69, 0x6e, 0x36, 0x34, 0x18, 0x05, 0x20, 0x01, 0x28, 0x12, + 0x52, 0x05, 0x73, 0x69, 0x6e, 0x36, 0x34, 0x12, 0x16, 0x0a, 0x06, 0x75, 0x69, 0x6e, 0x74, 0x33, + 0x32, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x06, 0x75, 0x69, 0x6e, 0x74, 0x33, 0x32, 0x12, + 0x16, 0x0a, 0x06, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x18, 0x07, 0x20, 0x01, 0x28, 0x04, 0x52, + 0x06, 0x75, 0x69, 0x6e, 0x74, 0x36, 0x34, 0x12, 0x18, 0x0a, 0x07, 0x66, 0x69, 0x78, 0x65, 0x64, + 0x33, 0x32, 0x18, 0x08, 0x20, 0x01, 0x28, 0x07, 0x52, 0x07, 0x66, 0x69, 0x78, 0x65, 0x64, 0x33, + 0x32, 0x12, 0x18, 0x0a, 0x07, 0x66, 0x69, 0x78, 0x65, 0x64, 0x36, 0x34, 0x18, 0x09, 0x20, 0x01, + 0x28, 0x06, 0x52, 0x07, 0x66, 0x69, 0x78, 0x65, 0x64, 0x36, 0x34, 0x12, 0x1a, 0x0a, 0x08, 0x73, + 0x66, 0x69, 0x78, 0x65, 0x64, 0x33, 0x32, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x0f, 0x52, 0x08, 0x73, + 0x66, 0x69, 0x78, 0x65, 0x64, 0x33, 0x32, 0x12, 0x12, 0x0a, 0x04, 0x62, 0x6f, 0x6f, 0x6c, 0x18, + 0x0b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x04, 0x62, 0x6f, 0x6f, 0x6c, 0x12, 0x14, 0x0a, 0x05, 0x62, + 0x79, 0x74, 0x65, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x05, 0x62, 0x79, 0x74, 0x65, + 0x73, 0x12, 0x16, 0x0a, 0x06, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x18, 0x0d, 0x20, 0x01, 0x28, + 0x01, 0x52, 0x06, 0x64, 0x6f, 0x75, 0x62, 0x6c, 0x65, 0x12, 0x2c, 0x0a, 0x04, 0x65, 0x6e, 0x75, + 0x6d, 0x18, 0x0e, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x18, 0x2e, 0x41, 0x6c, 0x6c, 0x54, 0x68, 0x65, + 0x54, 0x79, 0x70, 0x65, 0x73, 0x2e, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x45, 0x6e, 0x75, + 0x6d, 0x52, 0x04, 0x65, 0x6e, 0x75, 0x6d, 0x12, 0x29, 0x0a, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, + 0x67, 0x65, 0x18, 0x0f, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x0f, 0x2e, 0x45, 0x78, 0x61, 0x6d, 0x70, + 0x6c, 0x65, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x52, 0x07, 0x6d, 0x65, 0x73, 0x73, 0x61, + 0x67, 0x65, 0x12, 0x22, 0x0a, 0x0b, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x73, 0x74, 0x72, 0x69, 0x6e, + 0x67, 0x18, 0x10, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x0b, 0x6f, 0x6e, 0x65, 0x6f, 0x66, + 0x73, 0x74, 0x72, 0x69, 0x6e, 0x67, 0x12, 0x35, 0x0a, 0x0c, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x6d, + 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x18, 0x11, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x0f, 0x2e, 0x45, + 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x48, 0x00, 0x52, + 0x0c, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x12, 0x26, 0x0a, + 0x03, 0x61, 0x6e, 0x79, 0x18, 0x12, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x67, 0x6f, 0x6f, + 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x41, 0x6e, 0x79, + 0x52, 0x03, 0x61, 0x6e, 0x79, 0x12, 0x3a, 0x0a, 0x0a, 0x73, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x5f, + 0x6d, 0x61, 0x70, 0x18, 0x13, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1b, 0x2e, 0x41, 0x6c, 0x6c, 0x54, + 0x68, 0x65, 0x54, 0x79, 0x70, 0x65, 0x73, 0x2e, 0x53, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x4d, 0x61, + 0x70, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x09, 0x73, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x4d, 0x61, + 0x70, 0x12, 0x3d, 0x0a, 0x0b, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x5f, 0x6d, 0x61, 0x70, + 0x18, 0x14, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x41, 0x6c, 0x6c, 0x54, 0x68, 0x65, 0x54, + 0x79, 0x70, 0x65, 0x73, 0x2e, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x4d, 0x61, 0x70, 0x45, + 0x6e, 0x74, 0x72, 0x79, 0x52, 0x0a, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x4d, 0x61, 0x70, + 0x12, 0x1f, 0x0a, 0x0b, 0x73, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x5f, 0x6c, 0x69, 0x73, 0x74, 0x18, + 0x15, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0a, 0x73, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x4c, 0x69, 0x73, + 0x74, 0x12, 0x32, 0x0a, 0x0c, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x5f, 0x6c, 0x69, 0x73, + 0x74, 0x18, 0x16, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x0f, 0x2e, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, + 0x65, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x52, 0x0b, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65, + 0x78, 0x4c, 0x69, 0x73, 0x74, 0x1a, 0x3c, 0x0a, 0x0e, 0x53, 0x69, 0x6d, 0x70, 0x6c, 0x65, 0x4d, + 0x61, 0x70, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x05, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, + 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, + 0x02, 0x38, 0x01, 0x1a, 0x4e, 0x0a, 0x0f, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x78, 0x4d, 0x61, + 0x70, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x25, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x0f, 0x2e, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, + 0x65, 0x4d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, + 0x02, 0x38, 0x01, 0x22, 0x29, 0x0a, 0x0b, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x45, 0x6e, + 0x75, 0x6d, 0x12, 0x0c, 0x0a, 0x08, 0x4f, 0x50, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x30, 0x10, 0x00, + 0x12, 0x0c, 0x0a, 0x08, 0x4f, 0x50, 0x54, 0x49, 0x4f, 0x4e, 0x5f, 0x31, 0x10, 0x01, 0x42, 0x07, + 0x0a, 0x05, 0x6f, 0x6e, 0x65, 0x6f, 0x66, 0x42, 0x11, 0x5a, 0x0f, 0x2e, 0x2e, 0x2f, 0x75, 0x74, + 0x69, 0x6c, 0x5f, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x33, +} + +var ( + file_messages_types_proto_rawDescOnce sync.Once + file_messages_types_proto_rawDescData = file_messages_types_proto_rawDesc +) + +func file_messages_types_proto_rawDescGZIP() []byte { + file_messages_types_proto_rawDescOnce.Do(func() { + file_messages_types_proto_rawDescData = protoimpl.X.CompressGZIP(file_messages_types_proto_rawDescData) + }) + return file_messages_types_proto_rawDescData +} + +var file_messages_types_proto_enumTypes = make([]protoimpl.EnumInfo, 1) +var file_messages_types_proto_msgTypes = make([]protoimpl.MessageInfo, 4) +var file_messages_types_proto_goTypes = []interface{}{ + (AllTheTypes_ExampleEnum)(0), // 0: AllTheTypes.ExampleEnum + (*ExampleMessage)(nil), // 1: ExampleMessage + (*AllTheTypes)(nil), // 2: AllTheTypes + nil, // 3: AllTheTypes.SimpleMapEntry + nil, // 4: AllTheTypes.ComplexMapEntry + (*anypb.Any)(nil), // 5: google.protobuf.Any +} +var file_messages_types_proto_depIdxs = []int32{ + 0, // 0: AllTheTypes.enum:type_name -> AllTheTypes.ExampleEnum + 1, // 1: AllTheTypes.message:type_name -> ExampleMessage + 1, // 2: AllTheTypes.oneofmessage:type_name -> ExampleMessage + 5, // 3: AllTheTypes.any:type_name -> google.protobuf.Any + 3, // 4: AllTheTypes.simple_map:type_name -> AllTheTypes.SimpleMapEntry + 4, // 5: AllTheTypes.complex_map:type_name -> AllTheTypes.ComplexMapEntry + 1, // 6: AllTheTypes.complex_list:type_name -> ExampleMessage + 1, // 7: AllTheTypes.ComplexMapEntry.value:type_name -> ExampleMessage + 8, // [8:8] is the sub-list for method output_type + 8, // [8:8] is the sub-list for method input_type + 8, // [8:8] is the sub-list for extension type_name + 8, // [8:8] is the sub-list for extension extendee + 0, // [0:8] is the sub-list for field type_name +} + +func init() { file_messages_types_proto_init() } +func file_messages_types_proto_init() { + if File_messages_types_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_messages_types_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*ExampleMessage); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_messages_types_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*AllTheTypes); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + file_messages_types_proto_msgTypes[1].OneofWrappers = []interface{}{ + (*AllTheTypes_Oneofstring)(nil), + (*AllTheTypes_Oneofmessage)(nil), + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_messages_types_proto_rawDesc, + NumEnums: 1, + NumMessages: 4, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_messages_types_proto_goTypes, + DependencyIndexes: file_messages_types_proto_depIdxs, + EnumInfos: file_messages_types_proto_enumTypes, + MessageInfos: file_messages_types_proto_msgTypes, + }.Build() + File_messages_types_proto = out.File + file_messages_types_proto_rawDesc = nil + file_messages_types_proto_goTypes = nil + file_messages_types_proto_depIdxs = nil +} diff --git a/go/go.mod b/go/go.mod index b6a3ed207c6ad..b6fccf6735254 100644 --- a/go/go.mod +++ b/go/go.mod @@ -46,7 +46,9 @@ require ( ) require ( + github.com/golang/protobuf v1.5.4 github.com/google/uuid v1.6.0 + github.com/huandu/xstrings v1.4.0 github.com/hamba/avro/v2 v2.22.1 github.com/substrait-io/substrait-go v0.4.2 github.com/tidwall/sjson v1.2.5 diff --git a/go/go.sum b/go/go.sum index 79350f4a1cf27..d963493108d86 100644 --- a/go/go.sum +++ b/go/go.sum @@ -49,6 +49,8 @@ github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= +github.com/huandu/xstrings v1.4.0 h1:D17IlohoQq4UcpqD7fDk80P7l+lwAmlFaBHgOipl2FU= +github.com/huandu/xstrings v1.4.0/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/klauspost/asmfmt v1.3.2 h1:4Ri7ox3EwapiOjCki+hw14RyKk201CN4rzyCJRFLpK4= From e4baf6be2167eb6ccbda90275304336f49998eac Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Wed, 5 Jun 2024 01:41:30 +0200 Subject: [PATCH 226/261] GH-38553 : [C++] Replace null_count with MayHaveNulls in ListArrayFromArray and MapArray (#41957) ### Rationale for this change Offsets could have `null_count() == -1` (`kUnknownNullCount`) meaning that offsets might contain nulls that are not accounted for which can produce failures (https://github.com/apache/arrow/issues/38553) when working with `ListArray` or `MapArray`. `null_count()` should be replaced with `MayHaveNulls()`. ### What changes are included in this PR? `null_count` is replaced with `MayHaveNulls` in `ListArrayFromArray`, `MapArray::FromArraysInternal` and `MapArray::ValidateChildData`. Some tests had to be updated. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #38553 Authored-by: AlenkaF Signed-off-by: Felipe Oliveira Carvalho --- cpp/src/arrow/array/array_list_test.cc | 2 +- cpp/src/arrow/array/array_nested.cc | 8 ++++---- python/pyarrow/tests/test_array.py | 14 +++++++++++++- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/cpp/src/arrow/array/array_list_test.cc b/cpp/src/arrow/array/array_list_test.cc index 55f91dc34167b..063b68706b313 100644 --- a/cpp/src/arrow/array/array_list_test.cc +++ b/cpp/src/arrow/array/array_list_test.cc @@ -1383,7 +1383,7 @@ TEST_F(TestMapArray, FromArrays) { // Null bitmap and offset with offset ASSERT_RAISES(NotImplemented, - MapArray::FromArrays(offsets3->Slice(2), keys, items, pool_, + MapArray::FromArrays(offsets1->Slice(2), keys, items, pool_, offsets3->data()->buffers[0])); } diff --git a/cpp/src/arrow/array/array_nested.cc b/cpp/src/arrow/array/array_nested.cc index bb5c6bf018006..2f6bca3d571ed 100644 --- a/cpp/src/arrow/array/array_nested.cc +++ b/cpp/src/arrow/array/array_nested.cc @@ -115,7 +115,7 @@ Result::ArrayType>> ListArrayFromArray return Status::TypeError("List offsets must be ", OffsetArrowType::type_name()); } - if (null_bitmap != nullptr && offsets.null_count() > 0) { + if (null_bitmap != nullptr && offsets.data()->MayHaveNulls()) { return Status::Invalid( "Ambiguous to specify both validity map and offsets with nulls"); } @@ -827,7 +827,7 @@ Result> MapArray::FromArraysInternal( return Status::Invalid("Map key and item arrays must be equal length"); } - if (null_bitmap != nullptr && offsets->null_count() > 0) { + if (null_bitmap != nullptr && offsets->data()->MayHaveNulls()) { return Status::Invalid( "Ambiguous to specify both validity map and offsets with nulls"); } @@ -893,13 +893,13 @@ Status MapArray::ValidateChildData( if (pair_data->type->id() != Type::STRUCT) { return Status::Invalid("Map array child array should have struct type"); } - if (pair_data->null_count != 0) { + if (pair_data->MayHaveNulls()) { return Status::Invalid("Map array child array should have no nulls"); } if (pair_data->child_data.size() != 2) { return Status::Invalid("Map array child array should have two fields"); } - if (pair_data->child_data[0]->null_count != 0) { + if (pair_data->child_data[0]->MayHaveNulls()) { return Status::Invalid("Map array keys array should have no nulls"); } return Status::OK(); diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index 49a00517fca9f..88394c77e429d 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -1013,6 +1013,18 @@ def test_list_array_types_from_arrays_fail(list_array_type, list_type_factory): arr_slice.offsets, arr_slice.values, mask=arr_slice.is_null()) +def test_map_cast(): + # GH-38553 + t = pa.map_(pa.int64(), pa.int64()) + arr = pa.array([{1: 2}], type=t) + result = arr.cast(pa.map_(pa.int32(), pa.int64())) + + t_expected = pa.map_(pa.int32(), pa.int64()) + expected = pa.array([{1: 2}], type=t_expected) + + assert result.equals(expected) + + def test_map_labelled(): # ARROW-13735 t = pa.map_(pa.field("name", "string", nullable=False), "int64") @@ -1105,7 +1117,7 @@ def test_map_from_arrays(): # error if null bitmap passed to sliced offset msg2 = 'Null bitmap with offsets slice not supported.' - offsets = pa.array(offsets, pa.int32()) + offsets = pa.array([0, 2, 2, 6], pa.int32()) with pytest.raises(pa.ArrowNotImplementedError, match=msg2): pa.MapArray.from_arrays(offsets.slice(2), keys, items, pa.map_( keys.type, From fb0773cfcc7d7c3d8b86de022c4d0dc1ae709f91 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 5 Jun 2024 10:38:53 +0900 Subject: [PATCH 227/261] MINOR: [JS] Bump eslint-plugin-unicorn from 52.0.0 to 53.0.0 in /js (#41916) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [eslint-plugin-unicorn](https://github.com/sindresorhus/eslint-plugin-unicorn) from 52.0.0 to 53.0.0.
Release notes

Sourced from eslint-plugin-unicorn's releases.

v53.0.0

New rules

Breaking

  • Require Node.js 18.18 (#2250) 598f57b

Meta

Improvements

  • Support ESLint 9 (#2250) 598f57b
  • no-array-method-this-argument: Check Array.fromAsync() (#2330) 99489b9
  • prefer-number-properties: Add checkNaN option (#2315) d30de50
  • template-indent: Support member expression paths in tags and functions (#2346) aabcf1d
  • prefer-number-properties: Don&#39;t require by default for Infinity/-Infinity to be written as Number.POSITIVE_INFINITY/Number.NEGATIVE_INFINITY (#2312) e0dfed2
  • escape-case: Ignore String.raw (#2342) 45bd444
  • no-hex-escape: Ignore String.raw (#2343) cc02a7f
  • prefer-dom-node-dataset: Ignore awaited getAttribute call (#2334) 45f23d5
  • prevent-abbreviations: Support non-ASCII filenames (#2308) 28762c8
  • throw-new-error: Check all call expressions instead of just argument of ThrowStatement (#2332) 1626852

https://github.com/sindresorhus/eslint-plugin-unicorn/compare/v52.0.0...v53.0.0

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=eslint-plugin-unicorn&package-manager=npm_and_yarn&previous-version=52.0.0&new-version=53.0.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: Sutou Kouhei --- js/package.json | 2 +- js/yarn.lock | 78 ++++++++++++++++++++++++++++++++++--------------- 2 files changed, 56 insertions(+), 24 deletions(-) diff --git a/js/package.json b/js/package.json index b55b637e2750d..e7be3c80d82c9 100644 --- a/js/package.json +++ b/js/package.json @@ -83,7 +83,7 @@ "esbuild-plugin-alias": "0.2.1", "eslint": "8.57.0", "eslint-plugin-jest": "28.5.0", - "eslint-plugin-unicorn": "52.0.0", + "eslint-plugin-unicorn": "53.0.0", "esm": "https://github.com/jsg2021/esm/releases/download/v3.x.x-pr883/esm-3.x.x-pr883.tgz", "gulp": "4.0.2", "glob": "10.4.1", diff --git a/js/yarn.lock b/js/yarn.lock index ec311730b8918..3cf3284a9f306 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -174,10 +174,10 @@ resolved "https://registry.yarnpkg.com/@babel/helper-string-parser/-/helper-string-parser-7.24.1.tgz#f99c36d3593db9540705d0739a1f10b5e20c696e" integrity sha512-2ofRCjnnA9y+wk8b9IAREroeUP02KHp431N2mhKniy2yKIDKpbrHv9eXwm8cBeWQYcJmzv5qKCu65P47eCF7CQ== -"@babel/helper-validator-identifier@^7.22.20": - version "7.22.20" - resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.22.20.tgz#c4ae002c61d2879e724581d96665583dbc1dc0e0" - integrity sha512-Y4OZ+ytlatR8AI+8KZfKuL5urKp7qey08ha31L8b3BwewJAoJamTzyvxPR/5D+KkdJCGPq/+8TukHBlY10FX9A== +"@babel/helper-validator-identifier@^7.22.20", "@babel/helper-validator-identifier@^7.24.5": + version "7.24.6" + resolved "https://registry.yarnpkg.com/@babel/helper-validator-identifier/-/helper-validator-identifier-7.24.6.tgz#08bb6612b11bdec78f3feed3db196da682454a5e" + integrity sha512-4yA7s865JHaqUdRbnaxarZREuPTHrjpDT+pXoAZ1yhyo6uFnIEpS8VMu16siFOHDpZNKYv5BObhsB//ycbICyw== "@babel/helper-validator-option@^7.23.5": version "7.23.5" @@ -614,6 +614,21 @@ minimatch "^3.1.2" strip-json-comments "^3.1.1" +"@eslint/eslintrc@^3.0.2": + version "3.1.0" + resolved "https://registry.yarnpkg.com/@eslint/eslintrc/-/eslintrc-3.1.0.tgz#dbd3482bfd91efa663cbe7aa1f506839868207b6" + integrity sha512-4Bfj15dVJdoy3RfZmmo86RK1Fwzn6SstsvK9JS+BaVKqC6QQQQyXekNaC+g+LKNgkQ+2VhGAzm6hO40AhMR3zQ== + dependencies: + ajv "^6.12.4" + debug "^4.3.2" + espree "^10.0.1" + globals "^14.0.0" + ignore "^5.2.0" + import-fresh "^3.2.1" + js-yaml "^4.1.0" + minimatch "^3.1.2" + strip-json-comments "^3.1.1" + "@eslint/js@8.57.0": version "8.57.0" resolved "https://registry.yarnpkg.com/@eslint/js/-/js-8.57.0.tgz#a5417ae8427873f1dd08b70b3574b453e67b5f7f" @@ -1691,7 +1706,7 @@ acorn@^6.4.1: resolved "https://registry.yarnpkg.com/acorn/-/acorn-6.4.2.tgz#35866fd710528e92de10cf06016498e47e39e1e6" integrity sha512-XtGIhXwF8YM8bJhGxG5kXgjkEuNGLTkoYqVE+KMR+aspr4KGYmKYg7yUe3KghyQ9yheNwLnjmzh/7+gfDBmHCQ== -acorn@^8.0.4, acorn@^8.4.1, acorn@^8.7.1, acorn@^8.8.2, acorn@^8.9.0: +acorn@^8.0.4, acorn@^8.11.3, acorn@^8.4.1, acorn@^8.7.1, acorn@^8.8.2, acorn@^8.9.0: version "8.11.3" resolved "https://registry.yarnpkg.com/acorn/-/acorn-8.11.3.tgz#71e0b14e13a4ec160724b38fb7b0f233b1b81d7a" integrity sha512-Y9rRfJG5jcKOE0CLisYbojUjIrIEE7AGMzA/Sm4BslANhbS+cDMpgBdcPT91oJ7OuJ9hYJBx59RjbhxVnrF8Xg== @@ -2547,10 +2562,10 @@ copy-props@^2.0.1: each-props "^1.3.2" is-plain-object "^5.0.0" -core-js-compat@^3.34.0: - version "3.36.1" - resolved "https://registry.yarnpkg.com/core-js-compat/-/core-js-compat-3.36.1.tgz#1818695d72c99c25d621dca94e6883e190cea3c8" - integrity sha512-Dk997v9ZCt3X/npqzyGdTlq6t7lDBhZwGvV94PKzDArjp7BTRm7WlDAXYd/OWdeFHO8OChQYRJNJvUCqCbrtKA== +core-js-compat@^3.37.0: + version "3.37.1" + resolved "https://registry.yarnpkg.com/core-js-compat/-/core-js-compat-3.37.1.tgz#c844310c7852f4bdf49b8d339730b97e17ff09ee" + integrity sha512-9TNiImhKvQqSUkOvk/mMRZzOANTiEVC7WaBNhHcKM7x+/5E1l5NvsysR19zuDQScE8k+kfQXWRN3AtS/eOSHpg== dependencies: browserslist "^4.23.0" @@ -3022,17 +3037,17 @@ eslint-plugin-jest@28.5.0: dependencies: "@typescript-eslint/utils" "^6.0.0 || ^7.0.0" -eslint-plugin-unicorn@52.0.0: - version "52.0.0" - resolved "https://registry.yarnpkg.com/eslint-plugin-unicorn/-/eslint-plugin-unicorn-52.0.0.tgz#c7a559edd52e3932cf2b3a05c3b0efc604c1eeb8" - integrity sha512-1Yzm7/m+0R4djH0tjDjfVei/ju2w3AzUGjG6q8JnuNIL5xIwsflyCooW5sfBvQp2pMYQFSWWCFONsjCax1EHng== +eslint-plugin-unicorn@53.0.0: + version "53.0.0" + resolved "https://registry.yarnpkg.com/eslint-plugin-unicorn/-/eslint-plugin-unicorn-53.0.0.tgz#df3a5c9ecabeb759e6fd867b2d84198466ac8c4d" + integrity sha512-kuTcNo9IwwUCfyHGwQFOK/HjJAYzbODHN3wP0PgqbW+jbXqpNWxNVpVhj2tO9SixBwuAdmal8rVcWKBxwFnGuw== dependencies: - "@babel/helper-validator-identifier" "^7.22.20" + "@babel/helper-validator-identifier" "^7.24.5" "@eslint-community/eslint-utils" "^4.4.0" - "@eslint/eslintrc" "^2.1.4" + "@eslint/eslintrc" "^3.0.2" ci-info "^4.0.0" clean-regexp "^1.0.0" - core-js-compat "^3.34.0" + core-js-compat "^3.37.0" esquery "^1.5.0" indent-string "^4.0.0" is-builtin-module "^3.2.1" @@ -3041,7 +3056,7 @@ eslint-plugin-unicorn@52.0.0: read-pkg-up "^7.0.1" regexp-tree "^0.1.27" regjsparser "^0.10.0" - semver "^7.5.4" + semver "^7.6.1" strip-indent "^3.0.0" eslint-scope@5.1.1: @@ -3065,6 +3080,11 @@ eslint-visitor-keys@^3.3.0, eslint-visitor-keys@^3.4.1, eslint-visitor-keys@^3.4 resolved "https://registry.yarnpkg.com/eslint-visitor-keys/-/eslint-visitor-keys-3.4.3.tgz#0cd72fe8550e3c2eae156a96a4dddcd1c8ac5800" integrity sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag== +eslint-visitor-keys@^4.0.0: + version "4.0.0" + resolved "https://registry.yarnpkg.com/eslint-visitor-keys/-/eslint-visitor-keys-4.0.0.tgz#e3adc021aa038a2a8e0b2f8b0ce8f66b9483b1fb" + integrity sha512-OtIRv/2GyiF6o/d8K7MYKKbXrOUBIK6SfkIRM4Z0dY3w+LiQ0vy3F57m0Z71bjbyeiWFiHJ8brqnmE6H6/jEuw== + eslint@8.57.0: version "8.57.0" resolved "https://registry.yarnpkg.com/eslint/-/eslint-8.57.0.tgz#c786a6fd0e0b68941aaf624596fb987089195668" @@ -3123,6 +3143,15 @@ esniff@^2.0.1: event-emitter "^0.3.5" type "^2.7.2" +espree@^10.0.1: + version "10.0.1" + resolved "https://registry.yarnpkg.com/espree/-/espree-10.0.1.tgz#600e60404157412751ba4a6f3a2ee1a42433139f" + integrity sha512-MWkrWZbJsL2UwnjxTX3gG8FneachS/Mwg7tdGXce011sJd5b0JG54vat5KHnfSBODZ3Wvzd2WnjxyzsRoVv+ww== + dependencies: + acorn "^8.11.3" + acorn-jsx "^5.3.2" + eslint-visitor-keys "^4.0.0" + espree@^9.6.0, espree@^9.6.1: version "9.6.1" resolved "https://registry.yarnpkg.com/espree/-/espree-9.6.1.tgz#a2a17b8e434690a5432f2f8018ce71d331a48c6f" @@ -3690,6 +3719,11 @@ globals@^13.19.0: dependencies: type-fest "^0.20.2" +globals@^14.0.0: + version "14.0.0" + resolved "https://registry.yarnpkg.com/globals/-/globals-14.0.0.tgz#898d7413c29babcf6bafe56fcadded858ada724e" + integrity sha512-oahGvuMGQlPw/ivIYBjVSrWAfWLBeku5tpPE2fOPLi+WHffIWbuh2tCjhyQhTBPMf5E9jDEH4FOmTYgYwbKwtQ== + globby@^11.1.0: version "11.1.0" resolved "https://registry.yarnpkg.com/globby/-/globby-11.1.0.tgz#bd4be98bb042f83d796f7e3811991fbe82a0d34b" @@ -6248,12 +6282,10 @@ semver@^6.3.0, semver@^6.3.1: resolved "https://registry.yarnpkg.com/semver/-/semver-6.3.1.tgz#556d2ef8689146e46dcea4bfdd095f3434dffcb4" integrity sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA== -semver@^7.3.4, semver@^7.5.3, semver@^7.5.4, semver@^7.6.0: - version "7.6.0" - resolved "https://registry.yarnpkg.com/semver/-/semver-7.6.0.tgz#1a46a4db4bffcccd97b743b5005c8325f23d4e2d" - integrity sha512-EnwXhrlwXMk9gKu5/flx5sv/an57AkRplG3hTK68W7FRDN+k+OWBj65M7719OkA82XLBxrcX0KSHj+X5COhOVg== - dependencies: - lru-cache "^6.0.0" +semver@^7.3.4, semver@^7.5.3, semver@^7.5.4, semver@^7.6.0, semver@^7.6.1: + version "7.6.2" + resolved "https://registry.yarnpkg.com/semver/-/semver-7.6.2.tgz#1e3b34759f896e8f14d6134732ce798aeb0c6e13" + integrity sha512-FNAIBWCx9qcRhoHcgcJ0gvU7SN1lYU2ZXuSfl04bSC5OpvDHFyJCjdNHomPXxjQlCBU67YW64PzY7/VIEH7F2w== serialize-javascript@^6.0.1: version "6.0.2" From ad897bb843b06bb7d54c5913396ac54f736e4e7c Mon Sep 17 00:00:00 2001 From: Anja Kefala Date: Tue, 4 Jun 2024 19:00:05 -0700 Subject: [PATCH 228/261] GH-37929: [Python] begin moving static settings to pyproject.toml (#41041) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change To migrate Arrow to modern Python packaging standards, see [PEP-517](https://peps.python.org/pep-0517/) and [PEP-518](https://peps.python.org/pep-0518/). * GitHub Issue: #37929 This PR focuses on migrating the static settings, the metadata and version, to pyproject.toml. Future PRs will migrate more of the build process to pyproject.toml. Lead-authored-by: anjakefala Co-authored-by: Raúl Cumplido Co-authored-by: Joris Van den Bossche Signed-off-by: Jacob Wujciak-Jens --- .github/workflows/dev.yml | 2 +- ci/conda_env_python.txt | 4 +- dev/release/01-prepare-test.rb | 6 +- dev/release/post-11-bump-versions-test.rb | 6 +- dev/release/utils-prepare.sh | 10 ++- .../python-minimal-build/github.linux.yml | 2 +- .../examples/minimal_build/Dockerfile.ubuntu | 3 +- python/examples/minimal_build/build_conda.sh | 9 +- python/examples/minimal_build/build_venv.sh | 11 +-- python/pyproject.toml | 62 +++++++++++++- python/requirements-build.txt | 4 +- python/setup.py | 85 ------------------- 12 files changed, 89 insertions(+), 115 deletions(-) diff --git a/.github/workflows/dev.yml b/.github/workflows/dev.yml index 6111d1d2e5fe3..1ea12b0a4d23d 100644 --- a/.github/workflows/dev.yml +++ b/.github/workflows/dev.yml @@ -120,7 +120,7 @@ jobs: shell: bash run: | gem install test-unit - pip install "cython>=0.29.31" setuptools six pytest jira + pip install "cython>=0.29.31" setuptools six pytest jira setuptools-scm - name: Run Release Test env: ARROW_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/ci/conda_env_python.txt b/ci/conda_env_python.txt index 4366e30010389..bf915493de302 100644 --- a/ci/conda_env_python.txt +++ b/ci/conda_env_python.txt @@ -26,5 +26,5 @@ numpy>=1.16.6 pytest pytest-faulthandler s3fs>=2023.10.0 -setuptools -setuptools_scm +setuptools>=64 +setuptools_scm>=8 diff --git a/dev/release/01-prepare-test.rb b/dev/release/01-prepare-test.rb index bf6cfede15c81..fbd0b2996077c 100644 --- a/dev/release/01-prepare-test.rb +++ b/dev/release/01-prepare-test.rb @@ -254,10 +254,10 @@ def test_version_pre_tag ], }, { - path: "python/setup.py", + path: "python/pyproject.toml", hunks: [ - ["-default_version = '#{@snapshot_version}'", - "+default_version = '#{@release_version}'"], + ["-fallback_version = '#{@release_version}a0'", + "+fallback_version = '#{@release_version}'"], ], }, { diff --git a/dev/release/post-11-bump-versions-test.rb b/dev/release/post-11-bump-versions-test.rb index 966c723f70adf..8ad404ef33202 100644 --- a/dev/release/post-11-bump-versions-test.rb +++ b/dev/release/post-11-bump-versions-test.rb @@ -179,10 +179,10 @@ def test_version_post_tag ], }, { - path: "python/setup.py", + path: "python/pyproject.toml", hunks: [ - ["-default_version = '#{@snapshot_version}'", - "+default_version = '#{@next_snapshot_version}'"], + ["-fallback_version = '#{@release_version}a0'", + "+fallback_version = '#{@next_version}a0'"], ], }, { diff --git a/dev/release/utils-prepare.sh b/dev/release/utils-prepare.sh index dfe9b052b09fa..c255e728a335b 100644 --- a/dev/release/utils-prepare.sh +++ b/dev/release/utils-prepare.sh @@ -26,10 +26,12 @@ update_versions() { release) local version=${base_version} local r_version=${base_version} + local python_version=${base_version} ;; snapshot) local version=${next_version}-SNAPSHOT local r_version=${base_version}.9000 + local python_version=${next_version}a0 ;; esac local major_version=${version%%.*} @@ -126,10 +128,10 @@ update_versions() { pushd "${ARROW_DIR}/python" sed -i.bak -E -e \ - "s/^default_version = '.+'/default_version = '${version}'/" \ - setup.py - rm -f setup.py.bak - git add setup.py + "s/^fallback_version = '.+'/fallback_version = '${python_version}'/" \ + pyproject.toml + rm -f pyproject.toml.bak + git add pyproject.toml sed -i.bak -E -e \ "s/^set\(PYARROW_VERSION \".+\"\)/set(PYARROW_VERSION \"${version}\")/" \ CMakeLists.txt diff --git a/dev/tasks/python-minimal-build/github.linux.yml b/dev/tasks/python-minimal-build/github.linux.yml index e776312b93f95..d97968b86b362 100644 --- a/dev/tasks/python-minimal-build/github.linux.yml +++ b/dev/tasks/python-minimal-build/github.linux.yml @@ -26,7 +26,7 @@ jobs: runs-on: ubuntu-latest {{ macros.github_set_env(env) }} steps: - {{ macros.github_checkout_arrow(submodules=false)|indent }} + {{ macros.github_checkout_arrow(fetch_depth=0, submodules=false)|indent }} - name: Run minimal build example run: | diff --git a/python/examples/minimal_build/Dockerfile.ubuntu b/python/examples/minimal_build/Dockerfile.ubuntu index ebea4b045e592..07cd69c082461 100644 --- a/python/examples/minimal_build/Dockerfile.ubuntu +++ b/python/examples/minimal_build/Dockerfile.ubuntu @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -FROM ubuntu:focal +FROM ubuntu:jammy ENV DEBIAN_FRONTEND=noninteractive @@ -32,6 +32,7 @@ RUN apt-get update -y -q && \ python3-dev \ python3-pip \ python3-venv \ + tzdata \ && \ apt-get clean && rm -rf /var/lib/apt/lists* diff --git a/python/examples/minimal_build/build_conda.sh b/python/examples/minimal_build/build_conda.sh index 72c3a5f9ea2cd..e90c800ae2eb1 100755 --- a/python/examples/minimal_build/build_conda.sh +++ b/python/examples/minimal_build/build_conda.sh @@ -97,9 +97,8 @@ export CMAKE_PREFIX_PATH=${ARROW_HOME}${CMAKE_PREFIX_PATH:+:${CMAKE_PREFIX_PATH} export PYARROW_BUILD_TYPE=Debug export PYARROW_CMAKE_GENERATOR=Ninja -# You can run either "develop" or "build_ext --inplace". Your pick - -# python setup.py build_ext --inplace -python setup.py develop +# Use the same command that we use on python_build.sh +python -m pip install --no-deps --no-build-isolation -vv . +popd -py.test pyarrow +pytest -vv -r s ${PYTEST_ARGS} --pyargs pyarrow diff --git a/python/examples/minimal_build/build_venv.sh b/python/examples/minimal_build/build_venv.sh index 3bd641d0e72c9..f462c4e9b9d0a 100755 --- a/python/examples/minimal_build/build_venv.sh +++ b/python/examples/minimal_build/build_venv.sh @@ -16,7 +16,7 @@ # specific language governing permissions and limitations # under the License. -set -e +set -ex #---------------------------------------------------------------------- # Change this to whatever makes sense for your system @@ -35,6 +35,7 @@ source $WORKDIR/venv/bin/activate git config --global --add safe.directory $ARROW_ROOT pip install -r $ARROW_ROOT/python/requirements-build.txt +pip install wheel #---------------------------------------------------------------------- # Build C++ library @@ -68,11 +69,11 @@ export CMAKE_PREFIX_PATH=${ARROW_HOME}${CMAKE_PREFIX_PATH:+:${CMAKE_PREFIX_PATH} export PYARROW_BUILD_TYPE=Debug export PYARROW_CMAKE_GENERATOR=Ninja -# You can run either "develop" or "build_ext --inplace". Your pick +# Use the same command that we use on python_build.sh +python -m pip install --no-deps --no-build-isolation -vv . -# python setup.py build_ext --inplace -python setup.py develop +popd pip install -r $ARROW_ROOT/python/requirements-test.txt -py.test pyarrow +pytest -vv -r s ${PYTEST_ARGS} --pyargs pyarrow diff --git a/python/pyproject.toml b/python/pyproject.toml index 1588e690a7247..f72c3a91eb436 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -24,7 +24,63 @@ requires = [ # continue using oldest-support-numpy. "oldest-supported-numpy>=0.14; python_version<'3.9'", "numpy>=1.25; python_version>='3.9'", - "setuptools_scm", - "setuptools >= 40.1.0", - "wheel" + # configuring setuptools_scm in pyproject.toml requires + # versions released after 2022 + "setuptools_scm[toml]>=8", + "setuptools>=64", ] +build-backend = "setuptools.build_meta" + +[project] +name = "pyarrow" +dynamic = ["version"] +requires-python = ">=3.8" +dependencies = [ + "numpy >= 1.16.6" +] +description = "Python library for Apache Arrow" +readme = {file = "README.md", content-type = "text/markdown"} +license = {text = "Apache Software License"} +classifiers = [ + 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Langauge :: Python :: 3.12', +] +maintainers = [ + {name = "Apache Arrow Developers", email = "dev@arrow.apache.org"} +] + +[project.urls] +Homepage = "https://arrow.apache.org/" +Documentation = "https://arrow.apache.org/docs/python" +Repository = "https://github.com/apache/arrow" +Issues = "https://github.com/apache/arrow/issues" + +[project.optional-dependencies] +test = [ + 'pytest', + 'hypothesis', + 'cffi', + 'pytz', + 'pandas' +] + +[tool.setuptools] +zip-safe=false +include-package-data=true + +[tool.setuptools.packages.find] +where = ["."] + +[tool.setuptools.package-data] +pyarrow = ["*.pxd", "*.pyx", "includes/*.pxd"] + +[tool.setuptools_scm] +root = '..' +version_file = 'pyarrow/_generated_version.py' +version_scheme = 'guess-next-dev' +git_describe_command = 'git describe --dirty --tags --long --match "apache-arrow-[0-9]*.*"' +fallback_version = '17.0.0a0' diff --git a/python/requirements-build.txt b/python/requirements-build.txt index 87dcc148ad161..c150c842a0cc6 100644 --- a/python/requirements-build.txt +++ b/python/requirements-build.txt @@ -1,5 +1,5 @@ cython>=0.29.31 oldest-supported-numpy>=0.14; python_version<'3.9' numpy>=1.25; python_version>='3.9' -setuptools_scm -setuptools>=38.6.0 +setuptools_scm>=8 +setuptools>=64 diff --git a/python/setup.py b/python/setup.py index ed2b7961e5fbb..b738b2f77290e 100755 --- a/python/setup.py +++ b/python/setup.py @@ -352,61 +352,11 @@ def get_outputs(self): for name in self.get_names()] -# If the event of not running from a git clone (e.g. from a git archive -# or a Python sdist), see if we can set the version number ourselves -default_version = '17.0.0-SNAPSHOT' -if (not os.path.exists('../.git') and - not os.environ.get('SETUPTOOLS_SCM_PRETEND_VERSION')): - os.environ['SETUPTOOLS_SCM_PRETEND_VERSION'] = \ - default_version.replace('-SNAPSHOT', 'a0') - - -# See https://github.com/pypa/setuptools_scm#configuration-parameters -scm_version_write_to_prefix = os.environ.get( - 'SETUPTOOLS_SCM_VERSION_WRITE_TO_PREFIX', setup_dir) - - -def parse_git(root, **kwargs): - """ - Parse function for setuptools_scm that ignores tags for non-C++ - subprojects, e.g. apache-arrow-js-XXX tags. - """ - from setuptools_scm.git import parse - kwargs['describe_command'] =\ - 'git describe --dirty --tags --long --match "apache-arrow-[0-9]*.*"' - return parse(root, **kwargs) - - -def guess_next_dev_version(version): - if version.exact: - return version.format_with('{tag}') - else: - def guess_next_version(tag_version): - return default_version.replace('-SNAPSHOT', '') - return version.format_next_version(guess_next_version) - - -with open('README.md') as f: - long_description = f.read() - - class BinaryDistribution(Distribution): def has_ext_modules(foo): return True -install_requires = ( - 'numpy >= 1.16.6', -) - - -# Only include pytest-runner in setup_requires if we're invoking tests -if {'pytest', 'test', 'ptr'}.intersection(sys.argv): - setup_requires = ['pytest-runner'] -else: - setup_requires = [] - - if strtobool(os.environ.get('PYARROW_INSTALL_TESTS', '1')): packages = find_namespace_packages(include=['pyarrow*']) exclude_package_data = {} @@ -420,11 +370,7 @@ def has_ext_modules(foo): setup( - name='pyarrow', packages=packages, - zip_safe=False, - package_data={'pyarrow': ['*.pxd', '*.pyx', 'includes/*.pxd']}, - include_package_data=True, exclude_package_data=exclude_package_data, distclass=BinaryDistribution, # Dummy extension to trigger build_ext @@ -432,35 +378,4 @@ def has_ext_modules(foo): cmdclass={ 'build_ext': build_ext }, - use_scm_version={ - 'root': os.path.dirname(setup_dir), - 'parse': parse_git, - 'write_to': os.path.join(scm_version_write_to_prefix, - 'pyarrow/_generated_version.py'), - 'version_scheme': guess_next_dev_version - }, - setup_requires=['setuptools_scm', 'cython >= 0.29.31'] + setup_requires, - install_requires=install_requires, - tests_require=['pytest', 'pandas', 'hypothesis'], - python_requires='>=3.8', - description='Python library for Apache Arrow', - long_description=long_description, - long_description_content_type='text/markdown', - classifiers=[ - 'License :: OSI Approved :: Apache Software License', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3.10', - 'Programming Language :: Python :: 3.11', - 'Programming Language :: Python :: 3.12', - ], - license='Apache License, Version 2.0', - maintainer='Apache Arrow Developers', - maintainer_email='dev@arrow.apache.org', - test_suite='pyarrow.tests', - url='https://arrow.apache.org/', - project_urls={ - 'Documentation': 'https://arrow.apache.org/docs/python', - 'Source': 'https://github.com/apache/arrow', - }, ) From 5a8644156f55254e5852f94533bb5578b04d7a25 Mon Sep 17 00:00:00 2001 From: Alenka Frim Date: Wed, 5 Jun 2024 14:14:52 +0200 Subject: [PATCH 229/261] GH-40062: [C++] Move RecordBatch::ToTensor code from record_batch.cc to tensor.cc (#41932) ### Rationale for this change This is a precursor PR to https://github.com/apache/arrow/pull/41870 with the purpose to make the review of #41870 easier (the diff of the code will be visible as it currently isn't because the code was moved to table.cc. I should also live in tensor.cc). ### What changes are included in this PR? The code from `RecordBatch::ToTensor` in record_batch.cc is moved to `RecordBatchToTensor` in tensor.cc. ### Are these changes tested? Existing tests should pass. ### Are there any user-facing changes? No. **This PR does not close the linked issue yet, it is just a precursor!** * GitHub Issue: #40062 Authored-by: AlenkaF Signed-off-by: AlenkaF --- cpp/src/arrow/record_batch.cc | 198 +------------------------------- cpp/src/arrow/tensor.cc | 206 ++++++++++++++++++++++++++++++++++ cpp/src/arrow/tensor.h | 4 + 3 files changed, 212 insertions(+), 196 deletions(-) diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc index 351f72f52365b..e3a8c0d710cb8 100644 --- a/cpp/src/arrow/record_batch.cc +++ b/cpp/src/arrow/record_batch.cc @@ -35,7 +35,6 @@ #include "arrow/type.h" #include "arrow/util/iterator.h" #include "arrow/util/logging.h" -#include "arrow/util/unreachable.h" #include "arrow/util/vector.h" #include "arrow/visit_type_inline.h" @@ -286,204 +285,11 @@ Result> RecordBatch::ToStructArray() const { /*offset=*/0); } -template -struct ConvertColumnsToTensorVisitor { - Out*& out_values; - const ArrayData& in_data; - - template - Status Visit(const T&) { - if constexpr (is_numeric(T::type_id)) { - using In = typename T::c_type; - auto in_values = ArraySpan(in_data).GetSpan(1, in_data.length); - - if (in_data.null_count == 0) { - if constexpr (std::is_same_v) { - memcpy(out_values, in_values.data(), in_values.size_bytes()); - out_values += in_values.size(); - } else { - for (In in_value : in_values) { - *out_values++ = static_cast(in_value); - } - } - } else { - for (int64_t i = 0; i < in_data.length; ++i) { - *out_values++ = - in_data.IsNull(i) ? static_cast(NAN) : static_cast(in_values[i]); - } - } - return Status::OK(); - } - Unreachable(); - } -}; - -template -struct ConvertColumnsToTensorRowMajorVisitor { - Out*& out_values; - const ArrayData& in_data; - int num_cols; - int col_idx; - - template - Status Visit(const T&) { - if constexpr (is_numeric(T::type_id)) { - using In = typename T::c_type; - auto in_values = ArraySpan(in_data).GetSpan(1, in_data.length); - - if (in_data.null_count == 0) { - for (int64_t i = 0; i < in_data.length; ++i) { - out_values[i * num_cols + col_idx] = static_cast(in_values[i]); - } - } else { - for (int64_t i = 0; i < in_data.length; ++i) { - out_values[i * num_cols + col_idx] = - in_data.IsNull(i) ? static_cast(NAN) : static_cast(in_values[i]); - } - } - return Status::OK(); - } - Unreachable(); - } -}; - -template -inline void ConvertColumnsToTensor(const RecordBatch& batch, uint8_t* out, - bool row_major) { - using CType = typename arrow::TypeTraits::CType; - auto* out_values = reinterpret_cast(out); - - int i = 0; - for (const auto& column : batch.columns()) { - if (row_major) { - ConvertColumnsToTensorRowMajorVisitor visitor{out_values, *column->data(), - batch.num_columns(), i++}; - DCHECK_OK(VisitTypeInline(*column->type(), &visitor)); - } else { - ConvertColumnsToTensorVisitor visitor{out_values, *column->data()}; - DCHECK_OK(VisitTypeInline(*column->type(), &visitor)); - } - } -} - Result> RecordBatch::ToTensor(bool null_to_nan, bool row_major, MemoryPool* pool) const { - if (num_columns() == 0) { - return Status::TypeError( - "Conversion to Tensor for RecordBatches without columns/schema is not " - "supported."); - } - // Check for no validity bitmap of each field - // if null_to_nan conversion is set to false - for (int i = 0; i < num_columns(); ++i) { - if (column(i)->null_count() > 0 && !null_to_nan) { - return Status::TypeError( - "Can only convert a RecordBatch with no nulls. Set null_to_nan to true to " - "convert nulls to NaN"); - } - } - - // Check for supported data types and merge fields - // to get the resulting uniform data type - if (!is_integer(column(0)->type()->id()) && !is_floating(column(0)->type()->id())) { - return Status::TypeError("DataType is not supported: ", - column(0)->type()->ToString()); - } - std::shared_ptr result_field = schema_->field(0); - std::shared_ptr result_type = result_field->type(); - - Field::MergeOptions options; - options.promote_integer_to_float = true; - options.promote_integer_sign = true; - options.promote_numeric_width = true; - - if (num_columns() > 1) { - for (int i = 1; i < num_columns(); ++i) { - if (!is_numeric(column(i)->type()->id())) { - return Status::TypeError("DataType is not supported: ", - column(i)->type()->ToString()); - } - - // Casting of float16 is not supported, throw an error in this case - if ((column(i)->type()->id() == Type::HALF_FLOAT || - result_field->type()->id() == Type::HALF_FLOAT) && - column(i)->type()->id() != result_field->type()->id()) { - return Status::NotImplemented("Casting from or to halffloat is not supported."); - } - - ARROW_ASSIGN_OR_RAISE( - result_field, result_field->MergeWith( - schema_->field(i)->WithName(result_field->name()), options)); - } - result_type = result_field->type(); - } - - // Check if result_type is signed or unsigned integer and null_to_nan is set to true - // Then all columns should be promoted to float type - if (is_integer(result_type->id()) && null_to_nan) { - ARROW_ASSIGN_OR_RAISE( - result_field, - result_field->MergeWith(field(result_field->name(), float32()), options)); - result_type = result_field->type(); - } - - // Allocate memory - ARROW_ASSIGN_OR_RAISE( - std::shared_ptr result, - AllocateBuffer(result_type->bit_width() * num_columns() * num_rows(), pool)); - // Copy data - switch (result_type->id()) { - case Type::UINT8: - ConvertColumnsToTensor(*this, result->mutable_data(), row_major); - break; - case Type::UINT16: - case Type::HALF_FLOAT: - ConvertColumnsToTensor(*this, result->mutable_data(), row_major); - break; - case Type::UINT32: - ConvertColumnsToTensor(*this, result->mutable_data(), row_major); - break; - case Type::UINT64: - ConvertColumnsToTensor(*this, result->mutable_data(), row_major); - break; - case Type::INT8: - ConvertColumnsToTensor(*this, result->mutable_data(), row_major); - break; - case Type::INT16: - ConvertColumnsToTensor(*this, result->mutable_data(), row_major); - break; - case Type::INT32: - ConvertColumnsToTensor(*this, result->mutable_data(), row_major); - break; - case Type::INT64: - ConvertColumnsToTensor(*this, result->mutable_data(), row_major); - break; - case Type::FLOAT: - ConvertColumnsToTensor(*this, result->mutable_data(), row_major); - break; - case Type::DOUBLE: - ConvertColumnsToTensor(*this, result->mutable_data(), row_major); - break; - default: - return Status::TypeError("DataType is not supported: ", result_type->ToString()); - } - - // Construct Tensor object - const auto& fixed_width_type = - internal::checked_cast(*result_type); - std::vector shape = {num_rows(), num_columns()}; - std::vector strides; std::shared_ptr tensor; - - if (row_major) { - ARROW_RETURN_NOT_OK( - internal::ComputeRowMajorStrides(fixed_width_type, shape, &strides)); - } else { - ARROW_RETURN_NOT_OK( - internal::ComputeColumnMajorStrides(fixed_width_type, shape, &strides)); - } - ARROW_ASSIGN_OR_RAISE(tensor, - Tensor::Make(result_type, std::move(result), shape, strides)); + ARROW_RETURN_NOT_OK( + internal::RecordBatchToTensor(*this, null_to_nan, row_major, pool, &tensor)); return tensor; } diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc index 77ccedbde15c6..b47f1a1075b37 100644 --- a/cpp/src/arrow/tensor.cc +++ b/cpp/src/arrow/tensor.cc @@ -18,6 +18,7 @@ #include "arrow/tensor.h" #include +#include #include #include #include @@ -27,12 +28,14 @@ #include #include +#include "arrow/record_batch.h" #include "arrow/status.h" #include "arrow/type.h" #include "arrow/type_traits.h" #include "arrow/util/checked_cast.h" #include "arrow/util/int_util_overflow.h" #include "arrow/util/logging.h" +#include "arrow/util/unreachable.h" #include "arrow/visit_type_inline.h" namespace arrow { @@ -220,6 +223,209 @@ Status ValidateTensorParameters(const std::shared_ptr& type, return Status::OK(); } +template +struct ConvertColumnsToTensorVisitor { + Out*& out_values; + const ArrayData& in_data; + + template + Status Visit(const T&) { + if constexpr (is_numeric(T::type_id)) { + using In = typename T::c_type; + auto in_values = ArraySpan(in_data).GetSpan(1, in_data.length); + + if (in_data.null_count == 0) { + if constexpr (std::is_same_v) { + memcpy(out_values, in_values.data(), in_values.size_bytes()); + out_values += in_values.size(); + } else { + for (In in_value : in_values) { + *out_values++ = static_cast(in_value); + } + } + } else { + for (int64_t i = 0; i < in_data.length; ++i) { + *out_values++ = + in_data.IsNull(i) ? static_cast(NAN) : static_cast(in_values[i]); + } + } + return Status::OK(); + } + Unreachable(); + } +}; + +template +struct ConvertColumnsToTensorRowMajorVisitor { + Out*& out_values; + const ArrayData& in_data; + int num_cols; + int col_idx; + + template + Status Visit(const T&) { + if constexpr (is_numeric(T::type_id)) { + using In = typename T::c_type; + auto in_values = ArraySpan(in_data).GetSpan(1, in_data.length); + + if (in_data.null_count == 0) { + for (int64_t i = 0; i < in_data.length; ++i) { + out_values[i * num_cols + col_idx] = static_cast(in_values[i]); + } + } else { + for (int64_t i = 0; i < in_data.length; ++i) { + out_values[i * num_cols + col_idx] = + in_data.IsNull(i) ? static_cast(NAN) : static_cast(in_values[i]); + } + } + return Status::OK(); + } + Unreachable(); + } +}; + +template +inline void ConvertColumnsToTensor(const RecordBatch& batch, uint8_t* out, + bool row_major) { + using CType = typename arrow::TypeTraits::CType; + auto* out_values = reinterpret_cast(out); + + int i = 0; + for (const auto& column : batch.columns()) { + if (row_major) { + ConvertColumnsToTensorRowMajorVisitor visitor{out_values, *column->data(), + batch.num_columns(), i++}; + DCHECK_OK(VisitTypeInline(*column->type(), &visitor)); + } else { + ConvertColumnsToTensorVisitor visitor{out_values, *column->data()}; + DCHECK_OK(VisitTypeInline(*column->type(), &visitor)); + } + } +} + +Status RecordBatchToTensor(const RecordBatch& batch, bool null_to_nan, bool row_major, + MemoryPool* pool, std::shared_ptr* tensor) { + if (batch.num_columns() == 0) { + return Status::TypeError( + "Conversion to Tensor for RecordBatches without columns/schema is not " + "supported."); + } + // Check for no validity bitmap of each field + // if null_to_nan conversion is set to false + for (int i = 0; i < batch.num_columns(); ++i) { + if (batch.column(i)->null_count() > 0 && !null_to_nan) { + return Status::TypeError( + "Can only convert a RecordBatch with no nulls. Set null_to_nan to true to " + "convert nulls to NaN"); + } + } + + // Check for supported data types and merge fields + // to get the resulting uniform data type + if (!is_integer(batch.column(0)->type()->id()) && + !is_floating(batch.column(0)->type()->id())) { + return Status::TypeError("DataType is not supported: ", + batch.column(0)->type()->ToString()); + } + std::shared_ptr result_field = batch.schema()->field(0); + std::shared_ptr result_type = result_field->type(); + + Field::MergeOptions options; + options.promote_integer_to_float = true; + options.promote_integer_sign = true; + options.promote_numeric_width = true; + + if (batch.num_columns() > 1) { + for (int i = 1; i < batch.num_columns(); ++i) { + if (!is_numeric(batch.column(i)->type()->id())) { + return Status::TypeError("DataType is not supported: ", + batch.column(i)->type()->ToString()); + } + + // Casting of float16 is not supported, throw an error in this case + if ((batch.column(i)->type()->id() == Type::HALF_FLOAT || + result_field->type()->id() == Type::HALF_FLOAT) && + batch.column(i)->type()->id() != result_field->type()->id()) { + return Status::NotImplemented("Casting from or to halffloat is not supported."); + } + + ARROW_ASSIGN_OR_RAISE( + result_field, + result_field->MergeWith( + batch.schema()->field(i)->WithName(result_field->name()), options)); + } + result_type = result_field->type(); + } + + // Check if result_type is signed or unsigned integer and null_to_nan is set to true + // Then all columns should be promoted to float type + if (is_integer(result_type->id()) && null_to_nan) { + ARROW_ASSIGN_OR_RAISE( + result_field, + result_field->MergeWith(field(result_field->name(), float32()), options)); + result_type = result_field->type(); + } + + // Allocate memory + ARROW_ASSIGN_OR_RAISE( + std::shared_ptr result, + AllocateBuffer(result_type->bit_width() * batch.num_columns() * batch.num_rows(), + pool)); + // Copy data + switch (result_type->id()) { + case Type::UINT8: + ConvertColumnsToTensor(batch, result->mutable_data(), row_major); + break; + case Type::UINT16: + case Type::HALF_FLOAT: + ConvertColumnsToTensor(batch, result->mutable_data(), row_major); + break; + case Type::UINT32: + ConvertColumnsToTensor(batch, result->mutable_data(), row_major); + break; + case Type::UINT64: + ConvertColumnsToTensor(batch, result->mutable_data(), row_major); + break; + case Type::INT8: + ConvertColumnsToTensor(batch, result->mutable_data(), row_major); + break; + case Type::INT16: + ConvertColumnsToTensor(batch, result->mutable_data(), row_major); + break; + case Type::INT32: + ConvertColumnsToTensor(batch, result->mutable_data(), row_major); + break; + case Type::INT64: + ConvertColumnsToTensor(batch, result->mutable_data(), row_major); + break; + case Type::FLOAT: + ConvertColumnsToTensor(batch, result->mutable_data(), row_major); + break; + case Type::DOUBLE: + ConvertColumnsToTensor(batch, result->mutable_data(), row_major); + break; + default: + return Status::TypeError("DataType is not supported: ", result_type->ToString()); + } + + // Construct Tensor object + const auto& fixed_width_type = + internal::checked_cast(*result_type); + std::vector shape = {batch.num_rows(), batch.num_columns()}; + std::vector strides; + + if (row_major) { + ARROW_RETURN_NOT_OK( + internal::ComputeRowMajorStrides(fixed_width_type, shape, &strides)); + } else { + ARROW_RETURN_NOT_OK( + internal::ComputeColumnMajorStrides(fixed_width_type, shape, &strides)); + } + ARROW_ASSIGN_OR_RAISE(*tensor, + Tensor::Make(result_type, std::move(result), shape, strides)); + return Status::OK(); +} + } // namespace internal /// Constructor with strides and dimension names diff --git a/cpp/src/arrow/tensor.h b/cpp/src/arrow/tensor.h index ff6f3735f9193..dd3a21fae495a 100644 --- a/cpp/src/arrow/tensor.h +++ b/cpp/src/arrow/tensor.h @@ -77,6 +77,10 @@ Status ValidateTensorParameters(const std::shared_ptr& type, const std::vector& strides, const std::vector& dim_names); +ARROW_EXPORT +Status RecordBatchToTensor(const RecordBatch& batch, bool null_to_nan, bool row_major, + MemoryPool* pool, std::shared_ptr* tensor); + } // namespace internal class ARROW_EXPORT Tensor { From 37d0acdccbf1228574434499ccb3a63d7a09e16f Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 5 Jun 2024 16:28:27 +0200 Subject: [PATCH 230/261] GH-41983: [Dev] Run issue labeling bot only when opening an issue (not editing) (#41986) ### Rationale for this change Currently the bot will remove manually added Component labels, because at that point you are editing the issue and the workflow is run again, reinstating the labels in the "Components" section in the top post created by the issue form. Therefore, restrict this bot to only run when the issue is "opened" * GitHub Issue: #41983 Authored-by: Joris Van den Bossche Signed-off-by: Jacob Wujciak-Jens --- .github/workflows/issue_bot.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/issue_bot.yml b/.github/workflows/issue_bot.yml index ec614ca1e7c56..2725825b56952 100644 --- a/.github/workflows/issue_bot.yml +++ b/.github/workflows/issue_bot.yml @@ -21,7 +21,6 @@ on: issues: types: - opened - - edited permissions: contents: read From 0b5f0a2af191078cb86d467035b1f19560e2e93a Mon Sep 17 00:00:00 2001 From: Jaap Versteegh Date: Wed, 5 Jun 2024 16:53:56 +0200 Subject: [PATCH 231/261] GH-41502: [Python] Fix reading column index with decimal values (#41503) Fix for #41502 Convert pandas "decimal" to "object" in numpy. * GitHub Issue: #41502 Authored-by: Jaap Versteegh Signed-off-by: Joris Van den Bossche --- python/pyarrow/pandas_compat.py | 5 +++++ python/pyarrow/tests/test_pandas.py | 11 +++++++++++ 2 files changed, 16 insertions(+) diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py index 00fa19604e5c3..e246f1263d20d 100644 --- a/python/pyarrow/pandas_compat.py +++ b/python/pyarrow/pandas_compat.py @@ -23,6 +23,7 @@ # module bug (ARROW-11983) import concurrent.futures.thread # noqa from copy import deepcopy +import decimal from itertools import zip_longest import json import operator @@ -1027,6 +1028,7 @@ def _is_generated_index_name(name): 'string': np.str_, 'integer': np.int64, 'floating': np.float64, + 'decimal': np.object_, 'empty': np.object_, } @@ -1105,6 +1107,9 @@ def _reconstruct_columns_from_metadata(columns, column_indexes): tz = pa.lib.string_to_tzinfo( column_indexes[0]['metadata']['timezone']) level = pd.to_datetime(level, utc=True).tz_convert(tz) + # GH-41503: if the column index was decimal, restore to decimal + elif pandas_dtype == "decimal": + level = _pandas_api.pd.Index([decimal.Decimal(i) for i in level]) elif level.dtype != dtype: level = level.astype(dtype) # ARROW-9096: if original DataFrame was upcast we keep that diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index 3678b4e57a9a8..be2c5b14e68b0 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -221,6 +221,17 @@ def test_column_index_names_with_tz(self): ) _check_pandas_roundtrip(df, preserve_index=True) + def test_column_index_names_with_decimal(self): + # GH-41503: Test valid roundtrip with decimal value in column index + df = pd.DataFrame( + [[decimal.Decimal(5), decimal.Decimal(6)]], + columns=pd.MultiIndex.from_product( + [[decimal.Decimal(1)], [decimal.Decimal(2), decimal.Decimal(3)]] + ), + index=[decimal.Decimal(4)], + ) + _check_pandas_roundtrip(df, preserve_index=True) + def test_range_index_shortcut(self): # ARROW-1639 index_name = 'foo' From cd7ebc0b47668339f315b4ba224ce271c46c6cf5 Mon Sep 17 00:00:00 2001 From: mwish Date: Wed, 5 Jun 2024 23:21:14 +0800 Subject: [PATCH 232/261] GH-41953: [C++] Minor enhance code style for FixedShapeTensorType (#41954) ### Rationale for this change Minor enhance code style for FixedShapeTensorType ### What changes are included in this PR? 1. Remove some `shared_ptr` temp variables 2. Some interfaces allowing return reference ### Are these changes tested? Covered by existing ### Are there any user-facing changes? no * GitHub Issue: #41953 Authored-by: mwish Signed-off-by: Antoine Pitrou --- cpp/src/arrow/extension/fixed_shape_tensor.cc | 66 ++++++++++--------- cpp/src/arrow/extension/fixed_shape_tensor.h | 4 +- 2 files changed, 36 insertions(+), 34 deletions(-) diff --git a/cpp/src/arrow/extension/fixed_shape_tensor.cc b/cpp/src/arrow/extension/fixed_shape_tensor.cc index 1101b08307332..944a134a707b1 100644 --- a/cpp/src/arrow/extension/fixed_shape_tensor.cc +++ b/cpp/src/arrow/extension/fixed_shape_tensor.cc @@ -207,44 +207,44 @@ std::shared_ptr FixedShapeTensorType::MakeArray( Result> FixedShapeTensorType::MakeTensor( const std::shared_ptr& scalar) { - const auto ext_scalar = internal::checked_pointer_cast(scalar); - const auto ext_type = - internal::checked_pointer_cast(scalar->type); - if (!is_fixed_width(*ext_type->value_type())) { + const auto& ext_scalar = internal::checked_cast(*scalar); + const auto& ext_type = + internal::checked_cast(*scalar->type); + if (!is_fixed_width(*ext_type.value_type())) { return Status::TypeError("Cannot convert non-fixed-width values to Tensor."); } - const auto array = - internal::checked_pointer_cast(ext_scalar->value)->value; + const auto& array = + internal::checked_cast(ext_scalar.value.get())->value; if (array->null_count() > 0) { return Status::Invalid("Cannot convert data with nulls to Tensor."); } - const auto value_type = - internal::checked_pointer_cast(ext_type->value_type()); - const auto byte_width = value_type->byte_width(); + const auto& value_type = + internal::checked_cast(*ext_type.value_type()); + const auto byte_width = value_type.byte_width(); - std::vector permutation = ext_type->permutation(); + std::vector permutation = ext_type.permutation(); if (permutation.empty()) { - permutation.resize(ext_type->ndim()); + permutation.resize(ext_type.ndim()); std::iota(permutation.begin(), permutation.end(), 0); } - std::vector shape = ext_type->shape(); + std::vector shape = ext_type.shape(); internal::Permute(permutation, &shape); - std::vector dim_names = ext_type->dim_names(); + std::vector dim_names = ext_type.dim_names(); if (!dim_names.empty()) { internal::Permute(permutation, &dim_names); } std::vector strides; - RETURN_NOT_OK(ComputeStrides(*value_type.get(), shape, permutation, &strides)); + RETURN_NOT_OK(ComputeStrides(value_type, shape, permutation, &strides)); const auto start_position = array->offset() * byte_width; const auto size = std::accumulate(shape.begin(), shape.end(), static_cast(1), std::multiplies<>()); const auto buffer = SliceBuffer(array->data()->buffers[1], start_position, size * byte_width); - return Tensor::Make(ext_type->value_type(), buffer, shape, strides, dim_names); + return Tensor::Make(ext_type.value_type(), buffer, shape, strides, dim_names); } Result> FixedShapeTensorArray::FromTensor( @@ -257,12 +257,14 @@ Result> FixedShapeTensorArray::FromTensor permutation.erase(permutation.begin()); std::vector cell_shape; + cell_shape.reserve(permutation.size()); for (auto i : permutation) { cell_shape.emplace_back(tensor->shape()[i]); } std::vector dim_names; if (!tensor->dim_names().empty()) { + dim_names.reserve(permutation.size()); for (auto i : permutation) { dim_names.emplace_back(tensor->dim_names()[i]); } @@ -337,9 +339,9 @@ const Result> FixedShapeTensorArray::ToTensor() const { // To convert an array of n dimensional tensors to a n+1 dimensional tensor we // interpret the array's length as the first dimension the new tensor. - const auto ext_type = - internal::checked_pointer_cast(this->type()); - const auto value_type = ext_type->value_type(); + const auto& ext_type = + internal::checked_cast(*this->type()); + const auto& value_type = ext_type.value_type(); ARROW_RETURN_IF( !is_fixed_width(*value_type), Status::TypeError(value_type->ToString(), " is not valid data type for a tensor")); @@ -350,35 +352,35 @@ const Result> FixedShapeTensorArray::ToTensor() const { // will get permutation index 0 and remaining values from ext_type->permutation() need // to be shifted to fill the [1, ndim+1) range. Computed permutation will be used to // generate the new tensor's shape, strides and dim_names. - std::vector permutation = ext_type->permutation(); + std::vector permutation = ext_type.permutation(); if (permutation.empty()) { - permutation.resize(ext_type->ndim() + 1); + permutation.resize(ext_type.ndim() + 1); std::iota(permutation.begin(), permutation.end(), 0); } else { - for (auto i = 0; i < static_cast(ext_type->ndim()); i++) { + for (auto i = 0; i < static_cast(ext_type.ndim()); i++) { permutation[i] += 1; } permutation.insert(permutation.begin(), 1, 0); } - std::vector dim_names = ext_type->dim_names(); + std::vector dim_names = ext_type.dim_names(); if (!dim_names.empty()) { dim_names.insert(dim_names.begin(), 1, ""); internal::Permute(permutation, &dim_names); } - std::vector shape = ext_type->shape(); + std::vector shape = ext_type.shape(); auto cell_size = std::accumulate(shape.begin(), shape.end(), static_cast(1), std::multiplies<>()); shape.insert(shape.begin(), 1, this->length()); internal::Permute(permutation, &shape); std::vector tensor_strides; - const auto fw_value_type = internal::checked_pointer_cast(value_type); + const auto* fw_value_type = internal::checked_cast(value_type.get()); ARROW_RETURN_NOT_OK( - ComputeStrides(*fw_value_type.get(), shape, permutation, &tensor_strides)); + ComputeStrides(*fw_value_type, shape, permutation, &tensor_strides)); - const auto raw_buffer = this->storage()->data()->child_data[0]->buffers[1]; + const auto& raw_buffer = this->storage()->data()->child_data[0]->buffers[1]; ARROW_ASSIGN_OR_RAISE( const auto buffer, SliceBufferSafe(raw_buffer, this->offset() * cell_size * value_type->byte_width())); @@ -389,7 +391,7 @@ const Result> FixedShapeTensorArray::ToTensor() const { Result> FixedShapeTensorType::Make( const std::shared_ptr& value_type, const std::vector& shape, const std::vector& permutation, const std::vector& dim_names) { - const auto ndim = shape.size(); + const size_t ndim = shape.size(); if (!permutation.empty() && ndim != permutation.size()) { return Status::Invalid("permutation size must match shape size. Expected: ", ndim, " Got: ", permutation.size()); @@ -402,18 +404,18 @@ Result> FixedShapeTensorType::Make( RETURN_NOT_OK(internal::IsPermutationValid(permutation)); } - const auto size = std::accumulate(shape.begin(), shape.end(), static_cast(1), - std::multiplies<>()); + const int64_t size = std::accumulate(shape.begin(), shape.end(), + static_cast(1), std::multiplies<>()); return std::make_shared(value_type, static_cast(size), shape, permutation, dim_names); } const std::vector& FixedShapeTensorType::strides() { if (strides_.empty()) { - auto value_type = internal::checked_pointer_cast(this->value_type_); + auto value_type = internal::checked_cast(this->value_type_.get()); std::vector tensor_strides; - ARROW_CHECK_OK(ComputeStrides(*value_type.get(), this->shape(), this->permutation(), - &tensor_strides)); + ARROW_CHECK_OK( + ComputeStrides(*value_type, this->shape(), this->permutation(), &tensor_strides)); strides_ = tensor_strides; } return strides_; diff --git a/cpp/src/arrow/extension/fixed_shape_tensor.h b/cpp/src/arrow/extension/fixed_shape_tensor.h index 3fec79b5c2a3c..20ec20a64c2d4 100644 --- a/cpp/src/arrow/extension/fixed_shape_tensor.h +++ b/cpp/src/arrow/extension/fixed_shape_tensor.h @@ -67,10 +67,10 @@ class ARROW_EXPORT FixedShapeTensorType : public ExtensionType { size_t ndim() const { return shape_.size(); } /// Shape of tensor elements - const std::vector shape() const { return shape_; } + const std::vector& shape() const { return shape_; } /// Value type of tensor elements - const std::shared_ptr value_type() const { return value_type_; } + const std::shared_ptr& value_type() const { return value_type_; } /// Strides of tensor elements. Strides state offset in bytes between adjacent /// elements along each dimension. In case permutation is non-empty strides are From 51bc2a61c90a89a29dacacbada190aa06f232271 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 5 Jun 2024 17:57:30 +0200 Subject: [PATCH 233/261] GH-41797: [C++][S3] Remove GetBucketRegion hack for newer AWS SDK versions (#41798) ### Rationale for this change To get the region a S3 bucket resides on, it is required to issue a HeadBucket request and parse the response headers for a certain header value. Unfortunately, the AWS SDK doesn't let us access arbitrary headers on successful responses for S3 model requests, which had us implement a workaround by calling lower-level SDK APIs. However, the SDK recently added a `GetBucketRegion` method on `HeadBucketRequest`, which obsoletes the need for this workaround. We now use this method if the available AWS SDK version is recent enough. ### Are these changes tested? By existing tests on the various CI configurations. ### Are there any user-facing changes? No. * GitHub Issue: #41797 Authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- cpp/src/arrow/filesystem/s3fs.cc | 81 +++++++++++++++++++++++--------- 1 file changed, 59 insertions(+), 22 deletions(-) diff --git a/cpp/src/arrow/filesystem/s3fs.cc b/cpp/src/arrow/filesystem/s3fs.cc index 78e02c31a35a3..c456be2d0d3cd 100644 --- a/cpp/src/arrow/filesystem/s3fs.cc +++ b/cpp/src/arrow/filesystem/s3fs.cc @@ -601,44 +601,81 @@ class S3Client : public Aws::S3::S3Client { public: using Aws::S3::S3Client::S3Client; + static inline constexpr auto kBucketRegionHeaderName = "x-amz-bucket-region"; + + std::string GetBucketRegionFromHeaders( + const Aws::Http::HeaderValueCollection& headers) { + const auto it = headers.find(ToAwsString(kBucketRegionHeaderName)); + if (it != headers.end()) { + return std::string(FromAwsString(it->second)); + } + return std::string(); + } + + template + Result GetBucketRegionFromError( + const std::string& bucket, const Aws::Client::AWSError& error) { + std::string region = GetBucketRegionFromHeaders(error.GetResponseHeaders()); + if (!region.empty()) { + return region; + } else if (error.GetResponseCode() == Aws::Http::HttpResponseCode::NOT_FOUND) { + return Status::IOError("Bucket '", bucket, "' not found"); + } else { + return ErrorToStatus( + std::forward_as_tuple("When resolving region for bucket '", bucket, "': "), + "HeadBucket", error); + } + } + +#if ARROW_AWS_SDK_VERSION_CHECK(1, 11, 212) + // HeadBucketResult::GetBucketRegion appeared in AWS SDK 1.11.212 + Result GetBucketRegion(const std::string& bucket, + const S3Model::HeadBucketRequest& request) { + auto outcome = this->HeadBucket(request); + if (!outcome.IsSuccess()) { + return GetBucketRegionFromError(bucket, outcome.GetError()); + } + auto&& region = std::move(outcome).GetResult().GetBucketRegion(); + if (region.empty()) { + return Status::IOError("When resolving region for bucket '", request.GetBucket(), + "': missing 'x-amz-bucket-region' header in response"); + } + return region; + } +#else // To get a bucket's region, we must extract the "x-amz-bucket-region" header // from the response to a HEAD bucket request. // Unfortunately, the S3Client APIs don't let us access the headers of successful // responses. So we have to cook a AWS request and issue it ourselves. - - Result GetBucketRegion(const S3Model::HeadBucketRequest& request) { + Result GetBucketRegion(const std::string& bucket, + const S3Model::HeadBucketRequest& request) { auto uri = GeneratePresignedUrl(request.GetBucket(), /*key=*/"", Aws::Http::HttpMethod::HTTP_HEAD); // NOTE: The signer region argument isn't passed here, as there's no easy // way of computing it (the relevant method is private). auto outcome = MakeRequest(uri, request, Aws::Http::HttpMethod::HTTP_HEAD, Aws::Auth::SIGV4_SIGNER); - const auto code = outcome.IsSuccess() ? outcome.GetResult().GetResponseCode() - : outcome.GetError().GetResponseCode(); - const auto& headers = outcome.IsSuccess() - ? outcome.GetResult().GetHeaderValueCollection() - : outcome.GetError().GetResponseHeaders(); - - const auto it = headers.find(ToAwsString("x-amz-bucket-region")); - if (it == headers.end()) { - if (code == Aws::Http::HttpResponseCode::NOT_FOUND) { - return Status::IOError("Bucket '", request.GetBucket(), "' not found"); - } else if (!outcome.IsSuccess()) { - return ErrorToStatus(std::forward_as_tuple("When resolving region for bucket '", - request.GetBucket(), "': "), - "HeadBucket", outcome.GetError()); - } else { - return Status::IOError("When resolving region for bucket '", request.GetBucket(), - "': missing 'x-amz-bucket-region' header in response"); - } + if (!outcome.IsSuccess()) { + return GetBucketRegionFromError(bucket, outcome.GetError()); + } + std::string region = + GetBucketRegionFromHeaders(outcome.GetResult().GetHeaderValueCollection()); + if (!region.empty()) { + return region; + } else if (outcome.GetResult().GetResponseCode() == + Aws::Http::HttpResponseCode::NOT_FOUND) { + return Status::IOError("Bucket '", request.GetBucket(), "' not found"); + } else { + return Status::IOError("When resolving region for bucket '", request.GetBucket(), + "': missing 'x-amz-bucket-region' header in response"); } - return std::string(FromAwsString(it->second)); } +#endif Result GetBucketRegion(const std::string& bucket) { S3Model::HeadBucketRequest req; req.SetBucket(ToAwsString(bucket)); - return GetBucketRegion(req); + return GetBucketRegion(bucket, req); } S3Model::CompleteMultipartUploadOutcome CompleteMultipartUploadWithErrorFixup( From 9ee6ea701e20d1b47934f977d87811624061d597 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 5 Jun 2024 19:06:46 +0200 Subject: [PATCH 234/261] MINOR: [C++][CI] Work around bug in conda-forge benchmark package (#41987) ### Rationale for this change Work around bug in version 1.8.4 of the benchmark package: https://github.com/conda-forge/benchmark-feedstock/issues/36 ### Are these changes tested? By regular CI jobs. ### Are there any user-facing changes? No. Authored-by: Antoine Pitrou Signed-off-by: Jacob Wujciak-Jens --- .github/workflows/cpp.yml | 2 ++ ci/conda_env_cpp.txt | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml index dd5abbe1b4b1b..e539fadb859fe 100644 --- a/.github/workflows/cpp.yml +++ b/.github/workflows/cpp.yml @@ -21,6 +21,7 @@ on: push: paths: - '.github/workflows/cpp.yml' + - 'ci/conda_env_*' - 'ci/docker/**' - 'ci/scripts/cpp_*' - 'ci/scripts/install_azurite.sh' @@ -35,6 +36,7 @@ on: pull_request: paths: - '.github/workflows/cpp.yml' + - 'ci/conda_env_*' - 'ci/docker/**' - 'ci/scripts/cpp_*' - 'ci/scripts/install_azurite.sh' diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt index 52e456eaab0cc..f28a24cac8d2d 100644 --- a/ci/conda_env_cpp.txt +++ b/ci/conda_env_cpp.txt @@ -21,7 +21,7 @@ azure-identity-cpp>=1.6.0 azure-storage-blobs-cpp>=12.10.0 azure-storage-common-cpp>=12.5.0 azure-storage-files-datalake-cpp>=12.9.0 -benchmark>=1.6.0 +benchmark>=1.6.0,!=1.8.4 boost-cpp>=1.68.0 brotli bzip2 From 3d1120551737787b3e2008389b67b451a045fb10 Mon Sep 17 00:00:00 2001 From: abandy Date: Wed, 5 Jun 2024 22:58:04 -0400 Subject: [PATCH 235/261] GH-41999: [Swift] Add methods for adding array and vargs to arrow array (#42000) ### Rationale for this change Would be nice to have methods for adding an array of values or variable args to when constructing an arrow array. ### Are these changes tested? Yes, tests are included * GitHub Issue: #41999 Authored-by: Alva Bandy Signed-off-by: Sutou Kouhei --- .../Sources/Arrow/ArrowArrayBuilder.swift | 12 ++++++ swift/Arrow/Tests/ArrowTests/ArrayTests.swift | 40 ++++++++++++++++++- 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift b/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift index 4865b8a791256..40f9628d8f162 100644 --- a/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift +++ b/swift/Arrow/Sources/Arrow/ArrowArrayBuilder.swift @@ -35,6 +35,18 @@ public class ArrowArrayBuilder> self.bufferBuilder = try T() } + public func append(_ vals: T.ItemType?...) { + for val in vals { + self.bufferBuilder.append(val) + } + } + + public func append(_ vals: [T.ItemType?]) { + for val in vals { + self.bufferBuilder.append(val) + } + } + public func append(_ val: T.ItemType?) { self.bufferBuilder.append(val) } diff --git a/swift/Arrow/Tests/ArrowTests/ArrayTests.swift b/swift/Arrow/Tests/ArrowTests/ArrayTests.swift index 10ffc4f96d83e..ed0cb1148e871 100644 --- a/swift/Arrow/Tests/ArrowTests/ArrayTests.swift +++ b/swift/Arrow/Tests/ArrowTests/ArrayTests.swift @@ -18,7 +18,7 @@ import XCTest @testable import Arrow -final class ArrayTests: XCTestCase { +final class ArrayTests: XCTestCase { // swiftlint:disable:this type_body_length func testPrimitiveArray() throws { // This is an example of a functional test case. // Use XCTAssert and related functions to verify your tests produce the correct @@ -271,4 +271,40 @@ final class ArrayTests: XCTestCase { XCTAssertEqual(stringHolder.nullCount, 10) XCTAssertEqual(stringHolder.length, 100) } - } + + func testAddVArgs() throws { + let arrayBuilder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + arrayBuilder.append(0, 1, 2, 3, 4, 5, 6, 7, 8, 9) + XCTAssertEqual(arrayBuilder.length, 10) + XCTAssertEqual(try arrayBuilder.finish()[2], 2) + let doubleBuilder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + doubleBuilder.append(0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8) + XCTAssertEqual(doubleBuilder.length, 9) + XCTAssertEqual(try doubleBuilder.finish()[4], 4.4) + let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder() + stringBuilder.append("0", "1", "2", "3", "4", "5", "6") + XCTAssertEqual(stringBuilder.length, 7) + XCTAssertEqual(try stringBuilder.finish()[4], "4") + let boolBuilder = try ArrowArrayBuilders.loadBoolArrayBuilder() + boolBuilder.append(true, false, true, false) + XCTAssertEqual(try boolBuilder.finish()[2], true) + } + + func testAddArray() throws { + let arrayBuilder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + arrayBuilder.append([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + XCTAssertEqual(arrayBuilder.length, 10) + XCTAssertEqual(try arrayBuilder.finish()[2], 2) + let doubleBuilder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + doubleBuilder.append([0, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8]) + XCTAssertEqual(doubleBuilder.length, 9) + XCTAssertEqual(try doubleBuilder.finish()[4], 4.4) + let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder() + stringBuilder.append(["0", "1", "2", "3", "4", "5", "6"]) + XCTAssertEqual(stringBuilder.length, 7) + XCTAssertEqual(try stringBuilder.finish()[4], "4") + let boolBuilder = try ArrowArrayBuilders.loadBoolArrayBuilder() + boolBuilder.append([true, false, true, false]) + XCTAssertEqual(try boolBuilder.finish()[2], true) + } +} From dbcce63b925118bb1ba35455a684a21620a250ac Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Thu, 6 Jun 2024 10:42:47 +0530 Subject: [PATCH 236/261] MINOR: [Java] Bump com.google.errorprone:error_prone_core from 2.10.0 to 2.28.0 in /java - Manual Fix (#41996) ### Rationale for this change Based on the [dependabot PR ](https://github.com/apache/arrow/pull/41944), making correct amendments to reflect the necessary error-prone library upgrade. ### What changes are included in this PR? - [X] Upgrade error-prone version for JDK 11+ - [X] Keeping the existing error-prone version for JDK 8, and added notes and references as reasoning. ### Are these changes tested? Tested by existing build. ### Are there any user-facing changes? No Authored-by: Vibhatha Abeykoon Signed-off-by: David Li --- java/pom.xml | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/java/pom.xml b/java/pom.xml index a59d29c576398..0e9b7f0e25a34 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -94,9 +94,7 @@ 2 true 9+181-r4173-1 - 2.24.0 - 2.24.0 - 2.10.0 + 2.28.0 3.12.1 5.11.0 5.2.0 @@ -946,6 +944,9 @@ Do not activate Error Prone while running with Eclipse/M2E as it causes incompatibilities with other annotation processors. See https://github.com/jbosstools/m2e-apt/issues/62 for details + + Note: Maintaining error-prone version with JDK8 + See https://github.com/google/error-prone/blob/f8e33bc460be82ab22256a7ef8b979d7a2cacaba/docs/installation.md#jdk-8 --> 1.8 @@ -969,6 +970,13 @@ com.google.errorprone error_prone_core + 2.10.0 From 41810749e086278aea541240a16e9cf1e32eab80 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 6 Jun 2024 16:15:52 +0900 Subject: [PATCH 237/261] MINOR: [Java] Bump org.apache.calcite.avatica:avatica from 1.24.0 to 1.25.0 in /java (#41212) Bumps [org.apache.calcite.avatica:avatica](https://github.com/apache/calcite-avatica) from 1.24.0 to 1.25.0.
Commits
  • 62b0fdd [CALCITE-6334] Release Avatica 1.25.0
  • c0cb4b7 [CALCITE-6248] Illegal dates are accepted by casts
  • 8c36e01 [CALCITE-6282] Avatica ignores time precision when returning TIME results
  • ef9a5a6 [CALCITE-6137] Upgrade Gradle from 8.1.1 to 8.4, support jdk21
  • c12c3a3 Apply same vcs.xml as for Calcite
  • 4b9c823 [CALCITE-6209] Long queries are failing with "java.net.SocketTimeoutException...
  • 275a082 [CALCITE-6280] Jetty version number leaked by Avatica http server
  • bc7ba9e Disable JIRA worklog notifications for GitHub PRs
  • 178ff82 Add Calcite CLI tool to list of Avatica Clients on website
  • 07c6b8d [CALCITE-6212] Config locale = en_US for javadoc task
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.apache.calcite.avatica:avatica&package-manager=maven&previous-version=1.24.0&new-version=1.25.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) You can trigger a rebase of this PR by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
> **Note** > Automatic rebases have been disabled on this pull request as it has been open for over 30 days. Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: David Li --- java/flight/flight-sql-jdbc-core/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/flight/flight-sql-jdbc-core/pom.xml b/java/flight/flight-sql-jdbc-core/pom.xml index fbab69df3b305..459412e0f8d8b 100644 --- a/java/flight/flight-sql-jdbc-core/pom.xml +++ b/java/flight/flight-sql-jdbc-core/pom.xml @@ -125,7 +125,7 @@ org.apache.calcite.avatica avatica - 1.24.0 + 1.25.0 From 8b2c7e2893fec41c0b6ed51f5e05a7510b427f64 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 6 Jun 2024 16:16:42 +0900 Subject: [PATCH 238/261] MINOR: [Java] Bump com.github.luben:zstd-jni from 1.5.5-11 to 1.5.6-3 in /java (#41521) Bumps [com.github.luben:zstd-jni](https://github.com/luben/zstd-jni) from 1.5.5-11 to 1.5.6-3.
Commits
  • c77a765 v1.5.6-3
  • 2d33a1e Fix missing Native free of Dict: Even when using a by-reference buffer, the d...
  • 1ff8933 Use cross-compile for i386
  • b833326 Adding a getByReferenceBuffer() method.
  • 2a262bf Add new constructor to ZstdDictCompress and ZstdDictDecompress that
  • a516a43 Add back some inspection on MacOS, bump version
  • 2e00ab1 Enable tests on M1 MacOS
  • c76455c Add debugging in MacOS CI
  • ec1ddeb Use the M1 MacOS runner to compile the aarch64 binary
  • fb6a35d Update also checkout and setup-qemu actions
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=com.github.luben:zstd-jni&package-manager=maven&previous-version=1.5.5-11&new-version=1.5.6-3)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) You can trigger a rebase of this PR by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
> **Note** > Automatic rebases have been disabled on this pull request as it has been open for over 30 days. Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: David Li --- java/compression/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/compression/pom.xml b/java/compression/pom.xml index 6ed0be6815ca3..a3d6c0ac558dd 100644 --- a/java/compression/pom.xml +++ b/java/compression/pom.xml @@ -47,7 +47,7 @@ com.github.luben zstd-jni - 1.5.5-11 + 1.5.6-3 From cea7323772beb5be6e5002624be36e6575a36418 Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Thu, 6 Jun 2024 13:07:11 +0530 Subject: [PATCH 239/261] GH-41968: [Java] Implement TransferPair functionality for BinaryView (#41980) ### Rationale for this change This PR contains the transferPair functionality for BinaryView vectors. ### What changes are included in this PR? This includes the addition of transferPair functionality in `ViewVarCharBinaryVector` and corresponding test cases. ### Are these changes tested? Yes ### Are there any user-facing changes? No * GitHub Issue: #41968 Authored-by: Vibhatha Abeykoon Signed-off-by: David Li --- .../arrow/vector/ViewVarBinaryVector.java | 45 +- .../arrow/vector/TestSplitAndTransfer.java | 392 +++++---- .../arrow/vector/TestVarCharViewVector.java | 814 ++++++++++-------- 3 files changed, 756 insertions(+), 495 deletions(-) diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java b/java/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java index 393df96b2969e..0a043b51067ef 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ViewVarBinaryVector.java @@ -205,14 +205,12 @@ public void setSafe(int index, NullableViewVarBinaryHolder holder) { */ @Override public TransferPair getTransferPair(String ref, BufferAllocator allocator) { - // TODO: https://github.com/apache/arrow/issues/40932 - throw new UnsupportedOperationException("Unsupported operation"); + return new TransferImpl(ref, allocator); } @Override public TransferPair getTransferPair(Field field, BufferAllocator allocator) { - // TODO: https://github.com/apache/arrow/issues/40932 - throw new UnsupportedOperationException("Unsupported operation"); + return new TransferImpl(field, allocator); } /** @@ -223,7 +221,42 @@ public TransferPair getTransferPair(Field field, BufferAllocator allocator) { */ @Override public TransferPair makeTransferPair(ValueVector to) { - // TODO: https://github.com/apache/arrow/issues/40932 - throw new UnsupportedOperationException("Unsupported operation"); + return new TransferImpl((ViewVarBinaryVector) to); + } + + private class TransferImpl implements TransferPair { + ViewVarBinaryVector to; + + public TransferImpl(String ref, BufferAllocator allocator) { + to = new ViewVarBinaryVector(ref, field.getFieldType(), allocator); + } + + public TransferImpl(Field field, BufferAllocator allocator) { + to = new ViewVarBinaryVector(field, allocator); + } + + public TransferImpl(ViewVarBinaryVector to) { + this.to = to; + } + + @Override + public ViewVarBinaryVector getTo() { + return to; + } + + @Override + public void transfer() { + transferTo(to); + } + + @Override + public void splitAndTransfer(int startIndex, int length) { + splitAndTransferTo(startIndex, length, to); + } + + @Override + public void copyValueSafe(int fromIndex, int toIndex) { + to.copyFromSafe(fromIndex, toIndex, ViewVarBinaryVector.this); + } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java index d2c03930ca37a..fece93de9bf14 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestSplitAndTransfer.java @@ -67,7 +67,8 @@ private void populateVarcharVector(final VarCharVector vector, int valueCount, S vector.setValueCount(valueCount); } - private void populateViewVarcharVector(final ViewVarCharVector vector, int valueCount, String[] compareArray) { + private void populateBaseVariableWidthViewVector(final BaseVariableWidthViewVector vector, int valueCount, + String[] compareArray) { for (int i = 0; i < valueCount; i += 3) { final String s = String.format("%010d", i); vector.set(i, s.getBytes(StandardCharsets.UTF_8)); @@ -120,11 +121,16 @@ public void testWithEmptyVector() { transferPair = varCharVector.getTransferPair(allocator); transferPair.splitAndTransfer(0, 0); assertEquals(0, transferPair.getTo().getValueCount()); - // BaseVariableWidthViewVector + // BaseVariableWidthViewVector: ViewVarCharVector ViewVarCharVector viewVarCharVector = new ViewVarCharVector("", allocator); transferPair = viewVarCharVector.getTransferPair(allocator); transferPair.splitAndTransfer(0, 0); assertEquals(0, transferPair.getTo().getValueCount()); + // BaseVariableWidthVector: ViewVarBinaryVector + ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("", allocator); + transferPair = viewVarBinaryVector.getTransferPair(allocator); + transferPair.splitAndTransfer(0, 0); + assertEquals(0, transferPair.getTo().getValueCount()); // BaseLargeVariableWidthVector LargeVarCharVector largeVarCharVector = new LargeVarCharVector("", allocator); transferPair = largeVarCharVector.getTransferPair(allocator); @@ -225,36 +231,46 @@ public void test() throws Exception { } } - @Test - public void testView() throws Exception { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { - viewVarCharVector.allocateNew(10000, 1000); - - final int valueCount = 500; - final String[] compareArray = new String[valueCount]; + private void testView(BaseVariableWidthViewVector vector) { + vector.allocateNew(10000, 1000); + final int valueCount = 500; + final String[] compareArray = new String[valueCount]; - populateViewVarcharVector(viewVarCharVector, valueCount, compareArray); + populateBaseVariableWidthViewVector(vector, valueCount, compareArray); - final TransferPair tp = viewVarCharVector.getTransferPair(allocator); - final ViewVarCharVector newViewVarCharVector = (ViewVarCharVector) tp.getTo(); - final int[][] startLengths = {{0, 201}, {201, 0}, {201, 200}, {401, 99}}; + final TransferPair tp = vector.getTransferPair(allocator); + final BaseVariableWidthViewVector newVector = (BaseVariableWidthViewVector) tp.getTo();; + final int[][] startLengths = {{0, 201}, {201, 0}, {201, 200}, {401, 99}}; - for (final int[] startLength : startLengths) { - final int start = startLength[0]; - final int length = startLength[1]; - tp.splitAndTransfer(start, length); - for (int i = 0; i < length; i++) { - final boolean expectedSet = ((start + i) % 3) == 0; - if (expectedSet) { - final byte[] expectedValue = compareArray[start + i].getBytes(StandardCharsets.UTF_8); - assertFalse(newViewVarCharVector.isNull(i)); - assertArrayEquals(expectedValue, newViewVarCharVector.get(i)); - } else { - assertTrue(newViewVarCharVector.isNull(i)); - } + for (final int[] startLength : startLengths) { + final int start = startLength[0]; + final int length = startLength[1]; + tp.splitAndTransfer(start, length); + for (int i = 0; i < length; i++) { + final boolean expectedSet = ((start + i) % 3) == 0; + if (expectedSet) { + final byte[] expectedValue = compareArray[start + i].getBytes(StandardCharsets.UTF_8); + assertFalse(newVector.isNull(i)); + assertArrayEquals(expectedValue, newVector.get(i)); + } else { + assertTrue(newVector.isNull(i)); } - newViewVarCharVector.clear(); } + newVector.clear(); + } + } + + @Test + public void testUtf8View() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + testView(viewVarCharVector); + } + } + + @Test + public void testBinaryView() throws Exception { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator)) { + testView(viewVarBinaryVector); } } @@ -282,35 +298,47 @@ public void testMemoryConstrainedTransfer() { } } - @Test - public void testMemoryConstrainedTransferInViews() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { - // Here we have the target vector being transferred with a long string - // hence, the data buffer will be allocated. - // The default data buffer allocation takes - // BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * BaseVariableWidthViewVector.ELEMENT_SIZE - // set limit = BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * - // BaseVariableWidthViewVector.ELEMENT_SIZE - final int setLimit = BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * - BaseVariableWidthViewVector.ELEMENT_SIZE; - allocator.setLimit(setLimit); - - viewVarCharVector.allocateNew(16000, 1000); + private void testMemoryConstrainedTransferInViews(BaseVariableWidthViewVector vector) { + // Here we have the target vector being transferred with a long string + // hence, the data buffer will be allocated. + // The default data buffer allocation takes + // BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * BaseVariableWidthViewVector.ELEMENT_SIZE + // set limit = BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * + // BaseVariableWidthViewVector.ELEMENT_SIZE + final int setLimit = BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * + BaseVariableWidthViewVector.ELEMENT_SIZE; + allocator.setLimit(setLimit); - final int valueCount = 1000; + vector.allocateNew(16000, 1000); - populateViewVarcharVector(viewVarCharVector, valueCount, null); + final int valueCount = 1000; - final TransferPair tp = viewVarCharVector.getTransferPair(allocator); - final ViewVarCharVector newViewVarCharVector = (ViewVarCharVector) tp.getTo(); - final int[][] startLengths = {{0, 700}, {700, 299}}; + populateBaseVariableWidthViewVector(vector, valueCount, null); - for (final int[] startLength : startLengths) { - final int start = startLength[0]; - final int length = startLength[1]; - tp.splitAndTransfer(start, length); - newViewVarCharVector.clear(); - } + final TransferPair tp = vector.getTransferPair(allocator); + final BaseVariableWidthViewVector newVector = (BaseVariableWidthViewVector) tp.getTo(); + + final int[][] startLengths = {{0, 700}, {700, 299}}; + + for (final int[] startLength : startLengths) { + final int start = startLength[0]; + final int length = startLength[1]; + tp.splitAndTransfer(start, length); + newVector.clear(); + } + } + + @Test + public void testMemoryConstrainedTransferInUtf8Views() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + testMemoryConstrainedTransferInViews(viewVarCharVector); + } + } + + @Test + public void testMemoryConstrainedTransferInBinaryViews() { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator)) { + testMemoryConstrainedTransferInViews(viewVarBinaryVector); } } @@ -345,34 +373,45 @@ public void testTransfer() { } } - @Test - public void testTransferInViews() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { - viewVarCharVector.allocateNew(16000, 1000); + private void testTransferInViews(BaseVariableWidthViewVector vector) { + vector.allocateNew(16000, 1000); - final int valueCount = 500; - final String[] compareArray = new String[valueCount]; - populateViewVarcharVector(viewVarCharVector, valueCount, compareArray); + final int valueCount = 500; + final String[] compareArray = new String[valueCount]; + populateBaseVariableWidthViewVector(vector, valueCount, compareArray); - final TransferPair tp = viewVarCharVector.getTransferPair(allocator); - final ViewVarCharVector newViewVarCharVector = (ViewVarCharVector) tp.getTo(); - tp.transfer(); + final TransferPair tp = vector.getTransferPair(allocator); + final BaseVariableWidthViewVector newVector = (BaseVariableWidthViewVector) tp.getTo(); + tp.transfer(); - assertEquals(0, viewVarCharVector.valueCount); - assertEquals(valueCount, newViewVarCharVector.valueCount); + assertEquals(0, vector.valueCount); + assertEquals(valueCount, newVector.valueCount); - for (int i = 0; i < valueCount; i++) { - final boolean expectedSet = (i % 3) == 0; - if (expectedSet) { - final byte[] expectedValue = compareArray[i].getBytes(StandardCharsets.UTF_8); - assertFalse(newViewVarCharVector.isNull(i)); - assertArrayEquals(expectedValue, newViewVarCharVector.get(i)); - } else { - assertTrue(newViewVarCharVector.isNull(i)); - } + for (int i = 0; i < valueCount; i++) { + final boolean expectedSet = (i % 3) == 0; + if (expectedSet) { + final byte[] expectedValue = compareArray[i].getBytes(StandardCharsets.UTF_8); + assertFalse(newVector.isNull(i)); + assertArrayEquals(expectedValue, newVector.get(i)); + } else { + assertTrue(newVector.isNull(i)); } + } + + newVector.clear(); + } - newViewVarCharVector.clear(); + @Test + public void testTransferInUtf8Views() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + testTransferInViews(viewVarCharVector); + } + } + + @Test + public void testTransferInBinaryViews() { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator)) { + testTransferInViews(viewVarBinaryVector); } } @@ -424,21 +463,31 @@ public void testSplitAndTransferNon() { } } - @Test - public void testSplitAndTransferNonInViews() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + private void testSplitAndTransferNonInViews(BaseVariableWidthViewVector vector) { + vector.allocateNew(16000, 1000); + final int valueCount = 500; + populateBaseVariableWidthViewVector(vector, valueCount, null); - viewVarCharVector.allocateNew(16000, 1000); - final int valueCount = 500; - populateViewVarcharVector(viewVarCharVector, valueCount, null); + final TransferPair tp = vector.getTransferPair(allocator); + BaseVariableWidthViewVector newVector = (BaseVariableWidthViewVector) tp.getTo(); - final TransferPair tp = viewVarCharVector.getTransferPair(allocator); - ViewVarCharVector newViewVarCharVector = (ViewVarCharVector) tp.getTo(); + tp.splitAndTransfer(0, 0); + assertEquals(0, newVector.getValueCount()); - tp.splitAndTransfer(0, 0); - assertEquals(0, newViewVarCharVector.getValueCount()); + newVector.clear(); + } - newViewVarCharVector.clear(); + @Test + public void testSplitAndTransferNonInUtf8Views() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + testSplitAndTransferNonInViews(viewVarCharVector); + } + } + + @Test + public void testSplitAndTransferNonInBinaryViews() { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator)) { + testSplitAndTransferNonInViews(viewVarBinaryVector); } } @@ -460,21 +509,31 @@ public void testSplitAndTransferAll() { } } - @Test - public void testSplitAndTransferAllInViews() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + private void testSplitAndTransferAllInViews(BaseVariableWidthViewVector vector) { + vector.allocateNew(16000, 1000); + final int valueCount = 500; + populateBaseVariableWidthViewVector(vector, valueCount, null); - viewVarCharVector.allocateNew(16000, 1000); - final int valueCount = 500; - populateViewVarcharVector(viewVarCharVector, valueCount, null); + final TransferPair tp = vector.getTransferPair(allocator); + BaseVariableWidthViewVector newViewVarCharVector = (BaseVariableWidthViewVector) tp.getTo(); - final TransferPair tp = viewVarCharVector.getTransferPair(allocator); - ViewVarCharVector newViewVarCharVector = (ViewVarCharVector) tp.getTo(); + tp.splitAndTransfer(0, valueCount); + assertEquals(valueCount, newViewVarCharVector.getValueCount()); - tp.splitAndTransfer(0, valueCount); - assertEquals(valueCount, newViewVarCharVector.getValueCount()); + newViewVarCharVector.clear(); + } - newViewVarCharVector.clear(); + @Test + public void testSplitAndTransferAllInUtf8Views() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + testSplitAndTransferAllInViews(viewVarCharVector); + } + } + + @Test + public void testSplitAndTransferAllInBinaryViews() { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator)) { + testSplitAndTransferAllInViews(viewVarBinaryVector); } } @@ -499,24 +558,35 @@ public void testInvalidStartIndex() { } } - @Test - public void testInvalidStartIndexInViews() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); - final ViewVarCharVector newViewVarCharVector = new ViewVarCharVector("newvector", allocator)) { + private void testInvalidStartIndexInViews(BaseVariableWidthViewVector vector, BaseVariableWidthViewVector newVector) { + vector.allocateNew(16000, 1000); + final int valueCount = 500; + populateBaseVariableWidthViewVector(vector, valueCount, null); - viewVarCharVector.allocateNew(16000, 1000); - final int valueCount = 500; - populateViewVarcharVector(viewVarCharVector, valueCount, null); + final TransferPair tp = vector.makeTransferPair(newVector); - final TransferPair tp = viewVarCharVector.makeTransferPair(newViewVarCharVector); + IllegalArgumentException e = assertThrows( + IllegalArgumentException.class, + () -> tp.splitAndTransfer(valueCount, 10)); - IllegalArgumentException e = assertThrows( - IllegalArgumentException.class, - () -> tp.splitAndTransfer(valueCount, 10)); + assertEquals("Invalid parameters startIndex: 500, length: 10 for valueCount: 500", e.getMessage()); - assertEquals("Invalid parameters startIndex: 500, length: 10 for valueCount: 500", e.getMessage()); + newVector.clear(); + } - newViewVarCharVector.clear(); + @Test + public void testInvalidStartIndexInUtf8Views() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); + final ViewVarCharVector newViewVarCharVector = new ViewVarCharVector("newvector", allocator)) { + testInvalidStartIndexInViews(viewVarCharVector, newViewVarCharVector); + } + } + + @Test + public void testInvalidStartIndexInBinaryViews() { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator); + final ViewVarBinaryVector newViewVarBinaryVector = new ViewVarBinaryVector("newvector", allocator)) { + testInvalidStartIndexInViews(viewVarBinaryVector, newViewVarBinaryVector); } } @@ -541,24 +611,35 @@ public void testInvalidLength() { } } - @Test - public void testInvalidLengthInViews() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); - final ViewVarCharVector newViewVarCharVector = new ViewVarCharVector("newvector", allocator)) { + private void testInvalidLengthInViews(BaseVariableWidthViewVector vector, BaseVariableWidthViewVector newVector) { + vector.allocateNew(16000, 1000); + final int valueCount = 500; + populateBaseVariableWidthViewVector(vector, valueCount, null); - viewVarCharVector.allocateNew(16000, 1000); - final int valueCount = 500; - populateViewVarcharVector(viewVarCharVector, valueCount, null); + final TransferPair tp = vector.makeTransferPair(newVector); - final TransferPair tp = viewVarCharVector.makeTransferPair(newViewVarCharVector); + IllegalArgumentException e = assertThrows( + IllegalArgumentException.class, + () -> tp.splitAndTransfer(0, valueCount * 2)); - IllegalArgumentException e = assertThrows( - IllegalArgumentException.class, - () -> tp.splitAndTransfer(0, valueCount * 2)); + assertEquals("Invalid parameters startIndex: 0, length: 1000 for valueCount: 500", e.getMessage()); - assertEquals("Invalid parameters startIndex: 0, length: 1000 for valueCount: 500", e.getMessage()); + newVector.clear(); + } - newViewVarCharVector.clear(); + @Test + public void testInvalidLengthInUtf8Views() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); + final ViewVarCharVector newViewVarCharVector = new ViewVarCharVector("newvector", allocator)) { + testInvalidLengthInViews(viewVarCharVector, newViewVarCharVector); + } + } + + @Test + public void testInvalidLengthInBinaryViews() { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator); + final ViewVarBinaryVector newViewVarBinaryVector = new ViewVarBinaryVector("newvector", allocator)) { + testInvalidLengthInViews(viewVarBinaryVector, newViewVarBinaryVector); } } @@ -580,21 +661,33 @@ public void testZeroStartIndexAndLength() { } } - @Test - public void testZeroStartIndexAndLengthInViews() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); - final ViewVarCharVector newViewVarCharVector = new ViewVarCharVector("newvector", allocator)) { + private void testZeroStartIndexAndLengthInViews(BaseVariableWidthViewVector vector, + BaseVariableWidthViewVector newVector) { + vector.allocateNew(0, 0); + final int valueCount = 0; + populateBaseVariableWidthViewVector(vector, valueCount, null); - viewVarCharVector.allocateNew(0, 0); - final int valueCount = 0; - populateViewVarcharVector(viewVarCharVector, valueCount, null); + final TransferPair tp = vector.makeTransferPair(newVector); - final TransferPair tp = viewVarCharVector.makeTransferPair(newViewVarCharVector); + tp.splitAndTransfer(0, 0); + assertEquals(valueCount, newVector.getValueCount()); - tp.splitAndTransfer(0, 0); - assertEquals(valueCount, newViewVarCharVector.getValueCount()); + newVector.clear(); + } - newViewVarCharVector.clear(); + @Test + public void testZeroStartIndexAndLengthInUtf8Views() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); + final ViewVarCharVector newViewVarCharVector = new ViewVarCharVector("newvector", allocator)) { + testZeroStartIndexAndLengthInViews(viewVarCharVector, newViewVarCharVector); + } + } + + @Test + public void testZeroStartIndexAndLengthInBinaryViews() { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator); + final ViewVarBinaryVector newViewVarBinaryVector = new ViewVarBinaryVector("newvector", allocator)) { + testZeroStartIndexAndLengthInViews(viewVarBinaryVector, newViewVarBinaryVector); } } @@ -616,21 +709,32 @@ public void testZeroLength() { } } - @Test - public void testZeroLengthInViews() { - try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); - final ViewVarCharVector newViewVarCharVector = new ViewVarCharVector("newvector", allocator)) { + private void testZeroLengthInViews(BaseVariableWidthViewVector vector, BaseVariableWidthViewVector newVector) { + vector.allocateNew(16000, 1000); + final int valueCount = 500; + populateBaseVariableWidthViewVector(vector, valueCount, null); - viewVarCharVector.allocateNew(16000, 1000); - final int valueCount = 500; - populateViewVarcharVector(viewVarCharVector, valueCount, null); + final TransferPair tp = vector.makeTransferPair(newVector); - final TransferPair tp = viewVarCharVector.makeTransferPair(newViewVarCharVector); + tp.splitAndTransfer(500, 0); + assertEquals(0, newVector.getValueCount()); - tp.splitAndTransfer(500, 0); - assertEquals(0, newViewVarCharVector.getValueCount()); + newVector.clear(); + } - newViewVarCharVector.clear(); + @Test + public void testZeroLengthInUtf8Views() { + try (final ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator); + final ViewVarCharVector newViewVarCharVector = new ViewVarCharVector("newvector", allocator)) { + testZeroLengthInViews(viewVarCharVector, newViewVarCharVector); + } + } + + @Test + public void testZeroLengthInBinaryViews() { + try (final ViewVarBinaryVector viewVarBinaryVector = new ViewVarBinaryVector("myvector", allocator); + final ViewVarBinaryVector newViewVarBinaryVector = new ViewVarBinaryVector("newvector", allocator)) { + testZeroLengthInViews(viewVarBinaryVector, newViewVarBinaryVector); } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java index 817941ecb46d6..ebf9b58da7b40 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java @@ -37,7 +37,9 @@ import java.util.List; import java.util.Objects; import java.util.Random; +import java.util.function.BiConsumer; import java.util.function.Function; +import java.util.stream.IntStream; import java.util.stream.Stream; import org.apache.arrow.memory.ArrowBuf; @@ -86,9 +88,12 @@ public class TestVarCharViewVector { private BufferAllocator allocator; + private Random random; + @BeforeEach public void prepare() { allocator = new RootAllocator(Integer.MAX_VALUE); + random = new Random(); } @AfterEach @@ -1717,6 +1722,44 @@ public void testCopyFromSafeWithNulls(Function validateVector = (targetVector, expectedData) -> { + IntStream.range(startIndex, length).forEach(i -> + assertArrayEquals(expectedData[i], targetVector.get(i - startIndex))); + }; + try (final ViewVarCharVector targetVector = newViewVarCharVector("split-target", allocator)) { try (final ViewVarCharVector sourceVector = newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - sourceVector.allocateNew(1024 * 10, 1024); - - sourceVector.set(0, STR4); - sourceVector.set(1, STR5); - sourceVector.set(2, STR6); - sourceVector.setValueCount(3); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - sourceVector.splitAndTransferTo(0, 2, targetVector); - // we allocate view and data buffers for the target vector - assertTrue(allocatedMem < allocator.getAllocatedMemory()); - - // The validity buffer is sliced from the same buffer.See BaseFixedWidthViewVector#allocateBytes. - // Therefore, the refcnt of the validity buffer is increased once since the startIndex is 0. - assertEquals(validityRefCnt + 1, sourceVector.getValidityBuffer().refCnt()); - // since the new view buffer is allocated, the refcnt is the same as the source vector. - assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); + testSplitAndTransferOnSlicedBufferHelper(targetVector, sourceVector, + startIndex, length, data); } - assertArrayEquals(STR4, targetVector.get(0)); - assertArrayEquals(STR5, targetVector.get(1)); + validateVector.accept(targetVector, data); + } + + final byte [][] binaryData = generateBinaryDataArray(3, 10); + + try (final ViewVarBinaryVector targetVector = newViewVarBinaryVector("split-target", allocator)) { + try (final ViewVarBinaryVector sourceVector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + testSplitAndTransferOnSlicedBufferHelper(targetVector, sourceVector, + startIndex, length, binaryData); + } + validateVector.accept(targetVector, binaryData); } } @@ -1760,36 +1803,58 @@ public void testSplitAndTransfer1() { * With a long string included. */ @Test - public void testSplitAndTransfer2() { + public void testSplitAndTransferWithLongStringsOnSlicedBuffer() { + final byte [][] data = new byte[][]{STR2, STR5, STR6}; + final int startIndex = 0; + final int length = 2; + + BiConsumer validateVector = (targetVector, expectedData) -> { + IntStream.range(startIndex, length).forEach(i -> + assertArrayEquals(expectedData[i], targetVector.get(i - startIndex))); + }; + try (final ViewVarCharVector targetVector = newViewVarCharVector("split-target", allocator)) { try (final ViewVarCharVector sourceVector = newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - sourceVector.allocateNew(1024 * 10, 1024); - - sourceVector.set(0, STR2); - sourceVector.set(1, STR5); - sourceVector.set(2, STR6); - sourceVector.setValueCount(3); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - sourceVector.splitAndTransferTo(0, 2, targetVector); - // we allocate view and data buffers for the target vector - assertTrue(allocatedMem < allocator.getAllocatedMemory()); - - // The validity buffer is sliced from the same buffer.See BaseFixedWidthViewVector#allocateBytes. - // Therefore, the refcnt of the validity buffer is increased once since the startIndex is 0. - assertEquals(validityRefCnt + 1, sourceVector.getValidityBuffer().refCnt()); - // since the new view buffer is allocated, the refcnt is the same as the source vector. - assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); - - assertArrayEquals(STR2, sourceVector.get(0)); - assertArrayEquals(STR5, sourceVector.get(1)); - assertArrayEquals(STR6, sourceVector.get(2)); + testSplitAndTransferOnSlicedBufferHelper(targetVector, sourceVector, + startIndex, length, data); } - assertArrayEquals(STR2, targetVector.get(0)); - assertArrayEquals(STR5, targetVector.get(1)); + validateVector.accept(targetVector, data); + } + + final byte [][] binaryData = generateBinaryDataArray(3, 18); + try (final ViewVarBinaryVector targetVector = newViewVarBinaryVector("split-target", allocator)) { + try (final ViewVarBinaryVector sourceVector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + testSplitAndTransferOnSlicedBufferHelper(targetVector, sourceVector, + startIndex, length, binaryData); + } + validateVector.accept(targetVector, binaryData); + } + } + + private void testSplitAndTransferOnSlicedVectorHelper(BaseVariableWidthViewVector sourceVector, + BaseVariableWidthViewVector targetVector, int startIndex, int length, byte[][] data) { + sourceVector.allocateNew(1024 * 10, 1024); + + for (int i = 0; i < data.length; i++) { + sourceVector.set(i, data[i]); + } + sourceVector.setValueCount(data.length); + + final long allocatedMem = allocator.getAllocatedMemory(); + final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); + final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); + + sourceVector.splitAndTransferTo(startIndex, length, targetVector); + // we allocate view and data buffers for the target vector + assertTrue(allocatedMem < allocator.getAllocatedMemory()); + // The validity buffer is sliced from the same buffer.See BaseFixedWidthViewVector#allocateBytes. + // Therefore, the refcnt of the validity buffer is increased once since the startIndex is 0. + assertEquals(validityRefCnt + 1, sourceVector.getValidityBuffer().refCnt()); + // since the new view buffer is allocated, the refcnt is the same as the source vector. + assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); + + for (int i = startIndex; i < length ; i++) { + assertArrayEquals(data[i], targetVector.get(i - startIndex)); } } @@ -1800,35 +1865,31 @@ public void testSplitAndTransfer2() { * With short strings. */ @Test - public void testSplitAndTransfer3() { + public void testSplitAndTransferWithShortStringsOnSlicedVector() { + byte [][] data = new byte[][]{STR4, STR5, STR6}; + final int startIndex = 0; + final int length = 2; + + BiConsumer validateVector = (sourceVector, expectedData) -> { + IntStream.range(startIndex, length).forEach(i -> + assertArrayEquals(expectedData[i], sourceVector.get(i))); + }; + try (final ViewVarCharVector sourceVector = newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { try (final ViewVarCharVector targetVector = newViewVarCharVector("split-target", allocator)) { - sourceVector.allocateNew(1024 * 10, 1024); - - sourceVector.set(0, STR4); - sourceVector.set(1, STR5); - sourceVector.set(2, STR6); - sourceVector.setValueCount(3); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - sourceVector.splitAndTransferTo(0, 2, targetVector); - // we allocate view and data buffers for the target vector - assertTrue(allocatedMem < allocator.getAllocatedMemory()); - // The validity buffer is sliced from the same buffer.See BaseFixedWidthViewVector#allocateBytes. - // Therefore, the refcnt of the validity buffer is increased once since the startIndex is 0. - assertEquals(validityRefCnt + 1, sourceVector.getValidityBuffer().refCnt()); - // since the new view buffer is allocated, the refcnt is the same as the source vector. - assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); - - assertArrayEquals(STR4, targetVector.get(0)); - assertArrayEquals(STR5, targetVector.get(1)); + testSplitAndTransferOnSlicedVectorHelper(sourceVector, targetVector, + startIndex, length, data); + } + validateVector.accept(sourceVector, data); + } + + byte [][] binaryData = generateBinaryDataArray(3, 10); + try (final ViewVarBinaryVector sourceVector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + try (final ViewVarBinaryVector targetVector = newViewVarBinaryVector("split-target", allocator)) { + testSplitAndTransferOnSlicedVectorHelper(sourceVector, targetVector, + startIndex, length, binaryData); } - assertArrayEquals(STR4, sourceVector.get(0)); - assertArrayEquals(STR5, sourceVector.get(1)); - assertArrayEquals(STR6, sourceVector.get(2)); + validateVector.accept(sourceVector, binaryData); } } @@ -1839,35 +1900,77 @@ public void testSplitAndTransfer3() { * With a long string included. */ @Test - public void testSplitAndTransfer4() { + public void testSplitAndTransferWithLongStringsOnSlicedVector() { + final byte [][] data = new byte[][]{STR2, STR5, STR6}; + final int startIndex = 0; + final int length = 2; + + BiConsumer validateVector = (sourceVector, expectedData) -> { + IntStream.range(startIndex, length).forEach(i -> + assertArrayEquals(expectedData[i], sourceVector.get(i))); + }; + try (final ViewVarCharVector sourceVector = newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { try (final ViewVarCharVector targetVector = newViewVarCharVector("split-target", allocator)) { - sourceVector.allocateNew(1024 * 10, 1024); - - sourceVector.set(0, STR2); - sourceVector.set(1, STR5); - sourceVector.set(2, STR6); - sourceVector.setValueCount(3); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - sourceVector.splitAndTransferTo(0, 2, targetVector); - // we allocate view and data buffers for the target vector - assertTrue(allocatedMem < allocator.getAllocatedMemory()); - // The validity buffer is sliced from the same buffer.See BaseFixedWidthViewVector#allocateBytes. - // Therefore, the refcnt of the validity buffer is increased once since the startIndex is 0. - assertEquals(validityRefCnt + 1, sourceVector.getValidityBuffer().refCnt()); - // since the new view buffer is allocated, the refcnt is the same as the source vector. - assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); - - assertArrayEquals(STR2, targetVector.get(0)); - assertArrayEquals(STR5, targetVector.get(1)); + testSplitAndTransferOnSlicedVectorHelper(sourceVector, targetVector, + startIndex, length, data); + } + validateVector.accept(sourceVector, data); + } + + final byte [][] binaryData = generateBinaryDataArray(3, 20); + try (final ViewVarBinaryVector sourceVector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + try (final ViewVarBinaryVector targetVector = newViewVarBinaryVector("split-target", allocator)) { + testSplitAndTransferOnSlicedVectorHelper(sourceVector, targetVector, + startIndex, length, binaryData); + } + validateVector.accept(sourceVector, binaryData); + } + } + + private void testSplitAndTransferOnValiditySplitHelper( + BaseVariableWidthViewVector targetVector, BaseVariableWidthViewVector sourceVector, + int startIndex, int length, byte[][] data) { + sourceVector.allocateNew(1024 * 10, 1024); + + sourceVector.set(0, new byte[0]); + sourceVector.setNull(1); + for (int i = 0; i < data.length; i++) { + if (data[i] == null) { + sourceVector.setNull(i); + } else { + sourceVector.set(i, data[i]); + } + } + sourceVector.setValueCount(data.length); + + final long allocatedMem = allocator.getAllocatedMemory(); + final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); + final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); + + sourceVector.splitAndTransferTo(startIndex, length, targetVector); + // the allocation only consists in the size needed for the validity buffer + final long validitySize = + DefaultRoundingPolicy.DEFAULT_ROUNDING_POLICY.getRoundedSize( + BaseValueVector.getValidityBufferSizeFromCount(2)); + // we allocate view and data buffers for the target vector + assertTrue(allocatedMem + validitySize < allocator.getAllocatedMemory()); + // The validity is sliced from the same buffer.See BaseFixedWidthViewVector#allocateBytes. + // Since values up to the startIndex are empty/null validity refcnt should not change. + assertEquals(validityRefCnt, sourceVector.getValidityBuffer().refCnt()); + // since the new view buffer is allocated, the refcnt is the same as the source vector. + assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); + + for (int i = startIndex; i < startIndex + length; i++) { + assertArrayEquals(data[i], targetVector.get(i - startIndex)); + } + + for (int i = 0; i < data.length; i++) { + if (data[i] == null) { + assertTrue(sourceVector.isNull(i)); + } else { + assertArrayEquals(data[i], sourceVector.get(i)); } - assertArrayEquals(STR2, sourceVector.get(0)); - assertArrayEquals(STR5, sourceVector.get(1)); - assertArrayEquals(STR6, sourceVector.get(2)); } } @@ -1878,43 +1981,24 @@ public void testSplitAndTransfer4() { * With short strings. */ @Test - public void testSplitAndTransfer5() { + public void testSplitAndTransferWithShortStringsOnValiditySplit() { + final byte [][] data = new byte[][]{new byte[0], null, STR4, STR5, STR6}; + final int startIndex = 2; + final int length = 2; + try (final ViewVarCharVector targetVector = newViewVarCharVector("split-target", allocator); final ViewVarCharVector sourceVector = newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - sourceVector.allocateNew(1024 * 10, 1024); - - sourceVector.set(0, new byte[0]); - sourceVector.setNull(1); - sourceVector.set(2, STR4); - sourceVector.set(3, STR5); - sourceVector.set(4, STR6); - sourceVector.setValueCount(5); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - sourceVector.splitAndTransferTo(2, 2, targetVector); - // the allocation only consists in the size needed for the validity buffer - final long validitySize = - DefaultRoundingPolicy.DEFAULT_ROUNDING_POLICY.getRoundedSize( - BaseValueVector.getValidityBufferSizeFromCount(2)); - // we allocate view and data buffers for the target vector - assertTrue(allocatedMem + validitySize < allocator.getAllocatedMemory()); - // The validity is sliced from the same buffer.See BaseFixedWidthViewVector#allocateBytes. - // Since values up to the startIndex are empty/null validity refcnt should not change. - assertEquals(validityRefCnt, sourceVector.getValidityBuffer().refCnt()); - // since the new view buffer is allocated, the refcnt is the same as the source vector. - assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); - - assertArrayEquals(STR4, targetVector.get(0)); - assertArrayEquals(STR5, targetVector.get(1)); - - assertArrayEquals(new byte[0], sourceVector.get(0)); - assertTrue(sourceVector.isNull(1)); - assertArrayEquals(STR4, sourceVector.get(2)); - assertArrayEquals(STR5, sourceVector.get(3)); - assertArrayEquals(STR6, sourceVector.get(4)); + testSplitAndTransferOnValiditySplitHelper(targetVector, sourceVector, + startIndex, length, data); + } + + final byte [][] binaryData = generateBinaryDataArray(5, 10); + binaryData[0] = new byte[0]; + binaryData[1] = null; + try (final ViewVarBinaryVector targetVector = newViewVarBinaryVector("split-target", allocator); + final ViewVarBinaryVector sourceVector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + testSplitAndTransferOnValiditySplitHelper(targetVector, sourceVector, + startIndex, length, binaryData); } } @@ -1925,44 +2009,59 @@ public void testSplitAndTransfer5() { * With long strings. */ @Test - public void testSplitAndTransfer6() { + public void testSplitAndTransferWithLongStringsOnValiditySplit() { + final byte [][] data = new byte[][]{new byte[0], null, STR1, STR2, STR3}; + final int startIndex = 2; + final int length = 2; + try (final ViewVarCharVector targetVector = newViewVarCharVector("split-target", allocator); final ViewVarCharVector sourceVector = newViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - sourceVector.allocateNew(1024 * 10, 1024); - - sourceVector.set(0, new byte[0]); - sourceVector.setNull(1); - sourceVector.set(2, STR1); - sourceVector.set(3, STR2); - sourceVector.set(4, STR3); - sourceVector.setValueCount(5); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - sourceVector.splitAndTransferTo(2, 2, targetVector); - // the allocation consists in the size needed for the validity buffer and the long string - // allocation - final long validitySize = - DefaultRoundingPolicy.DEFAULT_ROUNDING_POLICY.getRoundedSize( - BaseValueVector.getValidityBufferSizeFromCount(2)); - // we allocate view and data buffers for the target vector - assertTrue(allocatedMem + validitySize < allocator.getAllocatedMemory()); - // The validity is sliced from the same buffer.See BaseFixedWidthViewVector#allocateBytes. - // Since values up to the startIndex are empty/null validity refcnt should not change. - assertEquals(validityRefCnt, sourceVector.getValidityBuffer().refCnt()); - // since the new view buffer is allocated, the refcnt is the same as the source vector. - assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); - - assertArrayEquals(STR1, targetVector.get(0)); - assertArrayEquals(STR2, targetVector.get(1)); - - assertArrayEquals(new byte[0], sourceVector.get(0)); - assertTrue(sourceVector.isNull(1)); - assertArrayEquals(STR1, sourceVector.get(2)); - assertArrayEquals(STR2, sourceVector.get(3)); - assertArrayEquals(STR3, sourceVector.get(4)); + testSplitAndTransferOnValiditySplitHelper(targetVector, sourceVector, + startIndex, length, data); + } + + final byte [][] binaryData = generateBinaryDataArray(5, 18); + binaryData[0] = new byte[0]; + binaryData[1] = null; + + try (final ViewVarBinaryVector targetVector = newViewVarBinaryVector("split-target", allocator); + final ViewVarBinaryVector sourceVector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + testSplitAndTransferOnValiditySplitHelper(targetVector, sourceVector, + startIndex, length, data); + } + } + + private void testSplitAndTransferOnAllocatorToAllocator(BaseVariableWidthViewVector targetVector, + BaseVariableWidthViewVector sourceVector, int startIndex, int length, byte[][] data) { + sourceVector.allocateNew(50, data.length); + + for (int i = 0; i < data.length; i++) { + sourceVector.set(i, data[i]); + } + sourceVector.setValueCount(data.length); + + final long allocatedMem = allocator.getAllocatedMemory(); + final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); + final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); + + sourceVector.splitAndTransferTo(startIndex, length, targetVector); + + if (sourceVector.getDataBuffers().isEmpty()) { + // no extra allocation as strings are all inline + assertEquals(allocatedMem, allocator.getAllocatedMemory()); + } else { + // extra allocation as some strings are not inline + assertTrue(allocatedMem < allocator.getAllocatedMemory()); + } + + // the refcnts of each buffer for this test should be the same as what + // the source allocator ended up with. + assertEquals(validityRefCnt, sourceVector.getValidityBuffer().refCnt()); + // since the new view buffer is allocated, the refcnt is the same as the source vector. + assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); + + for (int i = 0; i < data.length; i++) { + assertArrayEquals(data[i], sourceVector.get(i)); } } @@ -1973,39 +2072,36 @@ public void testSplitAndTransfer6() { * With short strings. */ @Test - public void testSplitAndTransfer7() { + public void testSplitAndTransferWithShortStringsOnAllocatorToAllocator() { final int maxAllocation = 512; + final byte [][] data = new byte[][]{STR4, STR5, STR6}; + final int startIndex = 0; + final int length = 2; + + BiConsumer validateVector = (targetVector, expectedData) -> { + IntStream.range(startIndex, length).forEach(i -> + assertArrayEquals(expectedData[i], targetVector.get(i - startIndex))); + }; + try (final BufferAllocator targetAllocator = allocator.newChildAllocator("target-alloc", 256, maxAllocation); final ViewVarCharVector targetVector = newViewVarCharVector("split-target", targetAllocator)) { try (final BufferAllocator sourceAllocator = allocator.newChildAllocator("source-alloc", 256, maxAllocation); final ViewVarCharVector sourceVector = newViewVarCharVector(EMPTY_SCHEMA_PATH, sourceAllocator)) { - sourceVector.allocateNew(50, 3); - - sourceVector.set(0, STR4); - sourceVector.set(1, STR5); - sourceVector.set(2, STR6); - sourceVector.setValueCount(3); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - sourceVector.splitAndTransferTo(0, 2, targetVector); - // no extra allocation as strings are all inline - assertEquals(allocatedMem, allocator.getAllocatedMemory()); - - // the refcnts of each buffer for this test should be the same as what - // the source allocator ended up with. - assertEquals(validityRefCnt, sourceVector.getValidityBuffer().refCnt()); - // since the new view buffer is allocated, the refcnt is the same as the source vector. - assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); - - assertArrayEquals(STR4, sourceVector.get(0)); - assertArrayEquals(STR5, sourceVector.get(1)); - assertArrayEquals(STR6, sourceVector.get(2)); + testSplitAndTransferOnAllocatorToAllocator(targetVector, sourceVector, + startIndex, length, data); } - assertArrayEquals(STR4, targetVector.get(0)); - assertArrayEquals(STR5, targetVector.get(1)); + validateVector.accept(targetVector, data); + } + + final byte [][] binaryData = generateBinaryDataArray(3, 10); + try (final BufferAllocator targetAllocator = allocator.newChildAllocator("target-alloc", 256, maxAllocation); + final ViewVarBinaryVector targetVector = newViewVarBinaryVector("split-target", targetAllocator)) { + try (final BufferAllocator sourceAllocator = allocator.newChildAllocator("source-alloc", 256, maxAllocation); + final ViewVarBinaryVector sourceVector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, sourceAllocator)) { + testSplitAndTransferOnAllocatorToAllocator(targetVector, sourceVector, + startIndex, length, binaryData); + } + validateVector.accept(targetVector, binaryData); } } @@ -2016,12 +2112,21 @@ public void testSplitAndTransfer7() { * With long strings. */ @Test - public void testSplitAndTransfer8() { + public void testSplitAndTransferWithLongStringsOnAllocatorToAllocator() { final int initialReservation = 1024; // Here we have the target vector being transferred with a long string // hence, the data buffer will be allocated. // The default data buffer allocation takes // BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * BaseVariableWidthViewVector.ELEMENT_SIZE + final byte [][] data = new byte[][]{STR1, STR2, STR3}; + final int startIndex = 0; + final int length = 2; + + BiConsumer validateVector = (targetVector, expectedData) -> { + IntStream.range(startIndex, length).forEach(i -> + assertArrayEquals(expectedData[i], targetVector.get(i - startIndex))); + }; + final int maxAllocation = initialReservation + BaseVariableWidthViewVector.INITIAL_VIEW_VALUE_ALLOCATION * BaseVariableWidthViewVector.ELEMENT_SIZE; try (final BufferAllocator targetAllocator = allocator.newChildAllocator("target-alloc", @@ -2030,136 +2135,169 @@ public void testSplitAndTransfer8() { try (final BufferAllocator sourceAllocator = allocator.newChildAllocator("source-alloc", initialReservation, maxAllocation); final ViewVarCharVector sourceVector = newViewVarCharVector(EMPTY_SCHEMA_PATH, sourceAllocator)) { - sourceVector.allocateNew(48, 3); - - sourceVector.set(0, STR1); - sourceVector.set(1, STR2); - sourceVector.set(2, STR3); - sourceVector.setValueCount(3); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - sourceVector.splitAndTransferTo(0, 2, targetVector); - // we allocate view and data buffers for the target vector - assertTrue(allocatedMem < allocator.getAllocatedMemory()); - - // the refcnts of each buffer for this test should be the same as what - // the source allocator ended up with. - assertEquals(validityRefCnt, sourceVector.getValidityBuffer().refCnt()); - // since the new view buffer is allocated, the refcnt is the same as the source vector. - assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); - - assertArrayEquals(STR1, sourceVector.get(0)); - assertArrayEquals(STR2, sourceVector.get(1)); - assertArrayEquals(STR3, sourceVector.get(2)); + testSplitAndTransferOnAllocatorToAllocator(targetVector, sourceVector, + startIndex, length, data); } - assertArrayEquals(STR1, targetVector.get(0)); - assertArrayEquals(STR2, targetVector.get(1)); + validateVector.accept(targetVector, data); } - } - @Test - public void testReallocAfterVectorTransfer1() { - try (final ViewVarCharVector vector = new ViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - /* 4096 values with 16 bytes per record */ - final int bytesPerRecord = 32; - vector.allocateNew(4096 * bytesPerRecord, 4096); - int valueCapacity = vector.getValueCapacity(); - assertTrue(valueCapacity >= 4096); - - /* populate the vector */ - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - vector.set(i, STR1); - } else { - vector.set(i, STR2); - } + final byte [][] binaryData = generateBinaryDataArray(3, 18); + + try (final BufferAllocator targetAllocator = allocator.newChildAllocator("target-alloc", + initialReservation, maxAllocation); + final ViewVarBinaryVector targetVector = newViewVarBinaryVector("split-target", targetAllocator)) { + try (final BufferAllocator sourceAllocator = allocator.newChildAllocator("source-alloc", + initialReservation, maxAllocation); + final ViewVarBinaryVector sourceVector = newViewVarBinaryVector(EMPTY_SCHEMA_PATH, sourceAllocator)) { + testSplitAndTransferOnAllocatorToAllocator(targetVector, sourceVector, + startIndex, length, binaryData); } + validateVector.accept(targetVector, binaryData); + } + } - /* Check the vector output */ - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - assertArrayEquals(STR1, vector.get(i)); - } else { - assertArrayEquals(STR2, vector.get(i)); - } + private void testReallocAfterVectorTransferHelper(BaseVariableWidthViewVector vector, + byte[] str1, byte[] str2) { + /* 4096 values with 16 bytes per record */ + final int bytesPerRecord = 32; + vector.allocateNew(4096 * bytesPerRecord, 4096); + int valueCapacity = vector.getValueCapacity(); + assertTrue(valueCapacity >= 4096); + + /* populate the vector */ + for (int i = 0; i < valueCapacity; i++) { + if ((i & 1) == 1) { + vector.set(i, str1); + } else { + vector.set(i, str2); } + } - /* trigger first realloc */ - vector.setSafe(valueCapacity, STR2, 0, STR2.length); - assertTrue(vector.getValueCapacity() >= 2 * valueCapacity); - while (vector.getByteCapacity() < bytesPerRecord * vector.getValueCapacity()) { - vector.reallocViewBuffer(); - vector.reallocViewDataBuffer(); + /* Check the vector output */ + for (int i = 0; i < valueCapacity; i++) { + if ((i & 1) == 1) { + assertArrayEquals(str1, vector.get(i)); + } else { + assertArrayEquals(str2, vector.get(i)); } + } - /* populate the remaining vector */ - for (int i = valueCapacity; i < vector.getValueCapacity(); i++) { - if ((i & 1) == 1) { - vector.set(i, STR1); - } else { - vector.set(i, STR2); - } + /* trigger first realloc */ + vector.setSafe(valueCapacity, str2, 0, str2.length); + assertTrue(vector.getValueCapacity() >= 2 * valueCapacity); + while (vector.getByteCapacity() < bytesPerRecord * vector.getValueCapacity()) { + vector.reallocViewBuffer(); + vector.reallocViewDataBuffer(); + } + + /* populate the remaining vector */ + for (int i = valueCapacity; i < vector.getValueCapacity(); i++) { + if ((i & 1) == 1) { + vector.set(i, str1); + } else { + vector.set(i, str2); } + } - /* Check the vector output */ - valueCapacity = vector.getValueCapacity(); - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - assertArrayEquals(STR1, vector.get(i)); - } else { - assertArrayEquals(STR2, vector.get(i)); - } + /* Check the vector output */ + valueCapacity = vector.getValueCapacity(); + for (int i = 0; i < valueCapacity; i++) { + if ((i & 1) == 1) { + assertArrayEquals(str1, vector.get(i)); + } else { + assertArrayEquals(str2, vector.get(i)); } + } + + /* trigger second realloc */ + vector.setSafe(valueCapacity + bytesPerRecord, str2, 0, str2.length); + assertTrue(vector.getValueCapacity() >= 2 * valueCapacity); + while (vector.getByteCapacity() < bytesPerRecord * vector.getValueCapacity()) { + vector.reallocViewBuffer(); + vector.reallocViewDataBuffer(); + } - /* trigger second realloc */ - vector.setSafe(valueCapacity + bytesPerRecord, STR2, 0, STR2.length); - assertTrue(vector.getValueCapacity() >= 2 * valueCapacity); - while (vector.getByteCapacity() < bytesPerRecord * vector.getValueCapacity()) { - vector.reallocViewBuffer(); - vector.reallocViewDataBuffer(); + /* populate the remaining vector */ + for (int i = valueCapacity; i < vector.getValueCapacity(); i++) { + if ((i & 1) == 1) { + vector.set(i, str1); + } else { + vector.set(i, str2); } + } - /* populate the remaining vector */ - for (int i = valueCapacity; i < vector.getValueCapacity(); i++) { - if ((i & 1) == 1) { - vector.set(i, STR1); - } else { - vector.set(i, STR2); - } + /* Check the vector output */ + valueCapacity = vector.getValueCapacity(); + for (int i = 0; i < valueCapacity; i++) { + if ((i & 1) == 1) { + assertArrayEquals(str1, vector.get(i)); + } else { + assertArrayEquals(str2, vector.get(i)); } + } - /* Check the vector output */ - valueCapacity = vector.getValueCapacity(); - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - assertArrayEquals(STR1, vector.get(i)); - } else { - assertArrayEquals(STR2, vector.get(i)); - } + /* We are potentially working with 4x the size of vector buffer + * that we initially started with. + * Now let's transfer the vector. + */ + + TransferPair transferPair = vector.getTransferPair(allocator); + transferPair.transfer(); + BaseVariableWidthViewVector toVector = (BaseVariableWidthViewVector) transferPair.getTo(); + valueCapacity = toVector.getValueCapacity(); + + for (int i = 0; i < valueCapacity; i++) { + if ((i & 1) == 1) { + assertArrayEquals(str1, toVector.get(i)); + } else { + assertArrayEquals(str2, toVector.get(i)); } + } + toVector.close(); + } - /* We are potentially working with 4x the size of vector buffer - * that we initially started with. - * Now let's transfer the vector. - */ + @Test + public void testReallocAfterVectorTransfer() { + try (final ViewVarCharVector vector = new ViewVarCharVector(EMPTY_SCHEMA_PATH, allocator)) { + testReallocAfterVectorTransferHelper(vector, STR1, STR2); + } - TransferPair transferPair = vector.getTransferPair(allocator); - transferPair.transfer(); - ViewVarCharVector toVector = (ViewVarCharVector) transferPair.getTo(); - valueCapacity = toVector.getValueCapacity(); + try (final ViewVarBinaryVector vector = new ViewVarBinaryVector(EMPTY_SCHEMA_PATH, allocator)) { + testReallocAfterVectorTransferHelper(vector, generateRandomBinaryData(12), + generateRandomBinaryData(13)); + } + } - for (int i = 0; i < valueCapacity; i++) { - if ((i & 1) == 1) { - assertArrayEquals(STR1, toVector.get(i)); - } else { - assertArrayEquals(STR2, toVector.get(i)); - } - } + private void testSplitAndTransferWithMultipleDataBuffersHelper(BaseVariableWidthViewVector sourceVector, + BaseVariableWidthViewVector targetVector, int startIndex, int length, byte[][] data) { + sourceVector.allocateNew(48, 4); + + for (int i = 0; i < data.length; i++) { + sourceVector.set(i, data[i]); + } + sourceVector.setValueCount(data.length); + + // we should have multiple data buffers + assertTrue(sourceVector.getDataBuffers().size() > 1); + + final long allocatedMem = allocator.getAllocatedMemory(); + final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); + final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - toVector.close(); + // split and transfer with slice starting at the beginning: + // this should not allocate anything new + sourceVector.splitAndTransferTo(startIndex, length, targetVector); + // we allocate view and data buffers for the target vector + assertTrue(allocatedMem < allocator.getAllocatedMemory()); + + // the refcnts of each buffer for this test should be the same as what + // the source allocator ended up with. + assertEquals(validityRefCnt, sourceVector.getValidityBuffer().refCnt()); + // since the new view buffer is allocated, the refcnt is the same as the source vector. + assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); + + for (int i = 0; i < data.length; i++) { + assertArrayEquals(data[i], sourceVector.get(i)); } } @@ -2171,45 +2309,31 @@ public void testReallocAfterVectorTransfer1() { * Check multi-data buffer source copying */ @Test - public void testSplitAndTransfer9() { + public void testSplitAndTransferWithMultipleDataBuffers() { + final String str4 = generateRandomString(35); + final byte[][] data = new byte[][]{STR1, STR2, STR3, str4.getBytes(StandardCharsets.UTF_8)}; + final int startIndex = 1; + final int length = 3; + + BiConsumer validateVector = (targetVector, expectedData) -> { + IntStream.range(startIndex, length).forEach(i -> + assertArrayEquals(expectedData[i], targetVector.get(i - startIndex))); + }; + try (final ViewVarCharVector targetVector = new ViewVarCharVector("target", allocator)) { - String str4 = generateRandomString(35); try (final ViewVarCharVector sourceVector = new ViewVarCharVector("source", allocator)) { - sourceVector.allocateNew(48, 4); - - sourceVector.set(0, STR1); - sourceVector.set(1, STR2); - sourceVector.set(2, STR3); - sourceVector.set(3, str4.getBytes(StandardCharsets.UTF_8)); - sourceVector.setValueCount(4); - - // we should have multiple data buffers - assertTrue(sourceVector.getDataBuffers().size() > 1); - - final long allocatedMem = allocator.getAllocatedMemory(); - final int validityRefCnt = sourceVector.getValidityBuffer().refCnt(); - final int dataRefCnt = sourceVector.getDataBuffer().refCnt(); - - // split and transfer with slice starting at the beginning: - // this should not allocate anything new - sourceVector.splitAndTransferTo(1, 3, targetVector); - // we allocate view and data buffers for the target vector - assertTrue(allocatedMem < allocator.getAllocatedMemory()); - - // the refcnts of each buffer for this test should be the same as what - // the source allocator ended up with. - assertEquals(validityRefCnt, sourceVector.getValidityBuffer().refCnt()); - // since the new view buffer is allocated, the refcnt is the same as the source vector. - assertEquals(dataRefCnt, sourceVector.getDataBuffer().refCnt()); - - assertArrayEquals(STR1, sourceVector.get(0)); - assertArrayEquals(STR2, sourceVector.get(1)); - assertArrayEquals(STR3, sourceVector.get(2)); - assertArrayEquals(str4.getBytes(StandardCharsets.UTF_8), sourceVector.get(3)); + testSplitAndTransferWithMultipleDataBuffersHelper(sourceVector, targetVector, + startIndex, length, data); + } + validateVector.accept(targetVector, data); + } + + try (final ViewVarBinaryVector targetVector = new ViewVarBinaryVector("target", allocator)) { + try (final ViewVarBinaryVector sourceVector = new ViewVarBinaryVector("source", allocator)) { + testSplitAndTransferWithMultipleDataBuffersHelper(sourceVector, targetVector, + startIndex, length, data); } - assertArrayEquals(STR2, targetVector.get(0)); - assertArrayEquals(STR3, targetVector.get(1)); - assertArrayEquals(str4.getBytes(StandardCharsets.UTF_8), targetVector.get(2)); + validateVector.accept(targetVector, data); } } From 67c6df1f2addd39b77e0e66e999164acff3d2ae3 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 6 Jun 2024 17:40:49 +0900 Subject: [PATCH 240/261] GH-41964: [CI][C++] Clear cache for mamba on AppVeyor (#41977) ### Rationale for this change It seems that mamba may use invalid download URL when there are invalid caches. ### What changes are included in this PR? Clear caches explicitly. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #41964 Authored-by: Sutou Kouhei Signed-off-by: Antoine Pitrou --- ci/appveyor-cpp-setup.bat | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ci/appveyor-cpp-setup.bat b/ci/appveyor-cpp-setup.bat index 5c4a11832d5ee..5a9dffa166fb7 100644 --- a/ci/appveyor-cpp-setup.bat +++ b/ci/appveyor-cpp-setup.bat @@ -66,6 +66,9 @@ set CONDA_PACKAGES=%CONDA_PACKAGES% --file=ci\conda_env_cpp.txt @rem Force conda to use conda-forge conda config --add channels conda-forge conda config --remove channels defaults +@rem Ensure using the latest information. If there are invalid caches, +@rem mamba may use invalid download URL. +mamba clean --all -y @rem Arrow conda environment mamba create -n arrow -y -c conda-forge ^ --file=ci\conda_env_python.txt ^ From 374b8f6ddec3b7614408ea874ffb29981c2a295d Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 6 Jun 2024 18:40:30 +0900 Subject: [PATCH 241/261] GH-41903: [CI][GLib] Use the latest Ruby to use OpenSSL 3 (#42001) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change Old Ruby ships OpenSSL 1 but googld-cloud-cpp requires OpenSSL 3. We need to use Ruby that ships OpenSSL 3. ### What changes are included in this PR? Use the latest Ruby. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #41903 Authored-by: Sutou Kouhei Signed-off-by: Raúl Cumplido --- .github/workflows/ruby.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml index 35c4460d47bc6..eb00bc5f92a8d 100644 --- a/.github/workflows/ruby.yml +++ b/.github/workflows/ruby.yml @@ -197,9 +197,7 @@ jobs: mingw-n-bits: - 64 ruby-version: - # TODO: Use the latest Ruby again when we fix GH-39130. - # - ruby - - "3.1" + - ruby env: ARROW_BUILD_STATIC: OFF ARROW_BUILD_TESTS: OFF From cbb0e4cb06fe2cc93295a6b6a9d81dad047feb61 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 6 Jun 2024 11:18:46 -0400 Subject: [PATCH 242/261] MINOR: [JS] Bump @typescript-eslint/eslint-plugin from 7.11.0 to 7.12.0 in /js (#41950) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [@typescript-eslint/eslint-plugin](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/eslint-plugin) from 7.11.0 to 7.12.0.
Release notes

Sourced from @​typescript-eslint/eslint-plugin's releases.

v7.12.0

7.12.0 (2024-06-03)

🚀 Features

  • eslint-plugin: [no-useless-template-literals] rename to no-useless-template-expression (deprecate no-useless-template-literals) (#8821)
  • eslint-plugin: [no-floating-promises] add option 'allowForKnownSafePromises' (#9186)
  • rule-tester: check for parsing errors in suggestion fixes (#9052)
  • rule-tester: port checkDuplicateTestCases from ESLint (#9026)

🩹 Fixes

  • no-useless-template-expression -> no-unnecessary-template-expression (#9174)
  • eslint-plugin: [no-unnecessary-type-assertion] combine template literal check with const variable check (#8820)
  • eslint-plugin: [dot-notation] fix false positive when accessing private/protected property with optional chaining (#8851)
  • eslint-plugin: [explicit-member-accessibility] refine report locations (#8869)
  • eslint-plugin: [no-unnecessary-type-assertion] declares are always defined, so always check declares (#8901)
  • eslint-plugin: [prefer-literal-enum-member] allow using member it self on allowBitwiseExpressions (#9114)
  • eslint-plugin: [return-await] clean up in-try-catch detection and make autofixes safe (#9031)
  • eslint-plugin: [member-ordering] also TSMethodSignature can be get/set (#9193)
  • types: correct typing ParserOptions (#9202)

❤️ Thank You

You can read about our versioning strategy and releases on our website.

Changelog

Sourced from @​typescript-eslint/eslint-plugin's changelog.

7.12.0 (2024-06-03)

🚀 Features

  • eslint-plugin: [no-useless-template-literals] rename to no-useless-template-expression (deprecate no-useless-template-literals)

  • rule-tester: check for parsing errors in suggestion fixes

  • rule-tester: port checkDuplicateTestCases from ESLint

  • eslint-plugin: [no-floating-promises] add option 'allowForKnownSafePromises'

🩹 Fixes

  • no-useless-template-expression -> no-unnecessary-template-expression

  • eslint-plugin: [no-unnecessary-type-assertion] combine template literal check with const variable check

  • eslint-plugin: [dot-notation] fix false positive when accessing private/protected property with optional chaining

  • eslint-plugin: [explicit-member-accessibility] refine report locations

  • eslint-plugin: [no-unnecessary-type-assertion] declares are always defined, so always check declares

  • eslint-plugin: [prefer-literal-enum-member] allow using member it self on allowBitwiseExpressions

  • eslint-plugin: [return-await] clean up in-try-catch detection and make autofixes safe

  • eslint-plugin: [member-ordering] also TSMethodSignature can be get/set

❤️ Thank You

  • Abraham Guo
  • Han Yeong-woo
  • Joshua Chen
  • Kim Sang Du
  • Kirk Waiblinger
  • YeonJuan

You can read about our versioning strategy and releases on our website.

Commits
  • 7e93b28 chore(release): publish 7.12.0
  • d0adcf1 docs: clarify what require-await does (#9200)
  • 04990d5 feat(eslint-plugin): [no-floating-promises] add option 'allowForKnownSafeProm...
  • ad85249 docs: mention related ESLint rules in no-unused-vars page (#9198)
  • e80a8d6 docs: improve description for no-dynamic-delete (#9195)
  • 9f92b30 docs: explicitly mention unbound-method limitation with thisArg (#9197)
  • 08a9448 docs: add example with PascalCase function components (#9196)
  • 5ca7f6e feat(rule-tester): port checkDuplicateTestCases from ESLint (#9026)
  • a9dd526 fix(eslint-plugin): [member-ordering] also TSMethodSignature can be get/set (...
  • 2619c3b fix(eslint-plugin): [return-await] clean up in-try-catch detection and make a...
  • Additional commits viewable in compare view

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=@typescript-eslint/eslint-plugin&package-manager=npm_and_yarn&previous-version=7.11.0&new-version=7.12.0)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- js/package.json | 2 +- js/yarn.lock | 77 ++++++++++++++----------------------------------- 2 files changed, 22 insertions(+), 57 deletions(-) diff --git a/js/package.json b/js/package.json index e7be3c80d82c9..ecb6d3a366f7e 100644 --- a/js/package.json +++ b/js/package.json @@ -72,7 +72,7 @@ "@types/glob": "8.1.0", "@types/jest": "29.5.12", "@types/multistream": "4.1.3", - "@typescript-eslint/eslint-plugin": "7.11.0", + "@typescript-eslint/eslint-plugin": "7.12.0", "@typescript-eslint/parser": "7.12.0", "async-done": "2.0.0", "benny": "3.7.1", diff --git a/js/yarn.lock b/js/yarn.lock index 3cf3284a9f306..5ab52beaf8f15 100644 --- a/js/yarn.lock +++ b/js/yarn.lock @@ -1434,16 +1434,16 @@ dependencies: "@types/yargs-parser" "*" -"@typescript-eslint/eslint-plugin@7.11.0": - version "7.11.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-7.11.0.tgz#f90f0914657ead08e1c75f66939c926edeab42dd" - integrity sha512-P+qEahbgeHW4JQ/87FuItjBj8O3MYv5gELDzr8QaQ7fsll1gSMTYb6j87MYyxwf3DtD7uGFB9ShwgmCJB5KmaQ== +"@typescript-eslint/eslint-plugin@7.12.0": + version "7.12.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/eslint-plugin/-/eslint-plugin-7.12.0.tgz#f87a32e8972b8a60024f2f8f12205e7c8108bc41" + integrity sha512-7F91fcbuDf/d3S8o21+r3ZncGIke/+eWk0EpO21LXhDfLahriZF9CGj4fbAetEjlaBdjdSm9a6VeXbpbT6Z40Q== dependencies: "@eslint-community/regexpp" "^4.10.0" - "@typescript-eslint/scope-manager" "7.11.0" - "@typescript-eslint/type-utils" "7.11.0" - "@typescript-eslint/utils" "7.11.0" - "@typescript-eslint/visitor-keys" "7.11.0" + "@typescript-eslint/scope-manager" "7.12.0" + "@typescript-eslint/type-utils" "7.12.0" + "@typescript-eslint/utils" "7.12.0" + "@typescript-eslint/visitor-keys" "7.12.0" graphemer "^1.4.0" ignore "^5.3.1" natural-compare "^1.4.0" @@ -1460,14 +1460,6 @@ "@typescript-eslint/visitor-keys" "7.12.0" debug "^4.3.4" -"@typescript-eslint/scope-manager@7.11.0": - version "7.11.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-7.11.0.tgz#cf5619b01de62a226a59add15a02bde457335d1d" - integrity sha512-27tGdVEiutD4POirLZX4YzT180vevUURJl4wJGmm6TrQoiYwuxTIY98PBp6L2oN+JQxzE0URvYlzJaBHIekXAw== - dependencies: - "@typescript-eslint/types" "7.11.0" - "@typescript-eslint/visitor-keys" "7.11.0" - "@typescript-eslint/scope-manager@7.12.0": version "7.12.0" resolved "https://registry.yarnpkg.com/@typescript-eslint/scope-manager/-/scope-manager-7.12.0.tgz#259c014362de72dd34f995efe6bd8dda486adf58" @@ -1476,40 +1468,21 @@ "@typescript-eslint/types" "7.12.0" "@typescript-eslint/visitor-keys" "7.12.0" -"@typescript-eslint/type-utils@7.11.0": - version "7.11.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/type-utils/-/type-utils-7.11.0.tgz#ac216697d649084fedf4a910347b9642bd0ff099" - integrity sha512-WmppUEgYy+y1NTseNMJ6mCFxt03/7jTOy08bcg7bxJJdsM4nuhnchyBbE8vryveaJUf62noH7LodPSo5Z0WUCg== +"@typescript-eslint/type-utils@7.12.0": + version "7.12.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/type-utils/-/type-utils-7.12.0.tgz#9dfaaa1972952f395ec5be4f5bbfc4d3cdc63908" + integrity sha512-lib96tyRtMhLxwauDWUp/uW3FMhLA6D0rJ8T7HmH7x23Gk1Gwwu8UZ94NMXBvOELn6flSPiBrCKlehkiXyaqwA== dependencies: - "@typescript-eslint/typescript-estree" "7.11.0" - "@typescript-eslint/utils" "7.11.0" + "@typescript-eslint/typescript-estree" "7.12.0" + "@typescript-eslint/utils" "7.12.0" debug "^4.3.4" ts-api-utils "^1.3.0" -"@typescript-eslint/types@7.11.0": - version "7.11.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.11.0.tgz#5e9702a5e8b424b7fc690e338d359939257d6722" - integrity sha512-MPEsDRZTyCiXkD4vd3zywDCifi7tatc4K37KqTprCvaXptP7Xlpdw0NR2hRJTetG5TxbWDB79Ys4kLmHliEo/w== - "@typescript-eslint/types@7.12.0": version "7.12.0" resolved "https://registry.yarnpkg.com/@typescript-eslint/types/-/types-7.12.0.tgz#bf208f971a8da1e7524a5d9ae2b5f15192a37981" integrity sha512-o+0Te6eWp2ppKY3mLCU+YA9pVJxhUJE15FV7kxuD9jgwIAa+w/ycGJBMrYDTpVGUM/tgpa9SeMOugSabWFq7bg== -"@typescript-eslint/typescript-estree@7.11.0": - version "7.11.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-7.11.0.tgz#7cbc569bc7336c3a494ceaf8204fdee5d5dbb7fa" - integrity sha512-cxkhZ2C/iyi3/6U9EPc5y+a6csqHItndvN/CzbNXTNrsC3/ASoYQZEt9uMaEp+xFNjasqQyszp5TumAVKKvJeQ== - dependencies: - "@typescript-eslint/types" "7.11.0" - "@typescript-eslint/visitor-keys" "7.11.0" - debug "^4.3.4" - globby "^11.1.0" - is-glob "^4.0.3" - minimatch "^9.0.4" - semver "^7.6.0" - ts-api-utils "^1.3.0" - "@typescript-eslint/typescript-estree@7.12.0": version "7.12.0" resolved "https://registry.yarnpkg.com/@typescript-eslint/typescript-estree/-/typescript-estree-7.12.0.tgz#e6c1074f248b3db6573ab6a7c47a39c4cd498ff9" @@ -1524,23 +1497,15 @@ semver "^7.6.0" ts-api-utils "^1.3.0" -"@typescript-eslint/utils@7.11.0", "@typescript-eslint/utils@^6.0.0 || ^7.0.0": - version "7.11.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-7.11.0.tgz#524f047f2209959424c3ef689b0d83b3bc09919c" - integrity sha512-xlAWwPleNRHwF37AhrZurOxA1wyXowW4PqVXZVUNCLjB48CqdPJoJWkrpH2nij9Q3Lb7rtWindtoXwxjxlKKCA== +"@typescript-eslint/utils@7.12.0", "@typescript-eslint/utils@^6.0.0 || ^7.0.0": + version "7.12.0" + resolved "https://registry.yarnpkg.com/@typescript-eslint/utils/-/utils-7.12.0.tgz#c6e58fd7f724cdccc848f71e388ad80cbdb95dd0" + integrity sha512-Y6hhwxwDx41HNpjuYswYp6gDbkiZ8Hin9Bf5aJQn1bpTs3afYY4GX+MPYxma8jtoIV2GRwTM/UJm/2uGCVv+DQ== dependencies: "@eslint-community/eslint-utils" "^4.4.0" - "@typescript-eslint/scope-manager" "7.11.0" - "@typescript-eslint/types" "7.11.0" - "@typescript-eslint/typescript-estree" "7.11.0" - -"@typescript-eslint/visitor-keys@7.11.0": - version "7.11.0" - resolved "https://registry.yarnpkg.com/@typescript-eslint/visitor-keys/-/visitor-keys-7.11.0.tgz#2c50cd292e67645eec05ac0830757071b4a4d597" - integrity sha512-7syYk4MzjxTEk0g/w3iqtgxnFQspDJfn6QKD36xMuuhTzjcxY7F8EmBLnALjVyaOF1/bVocu3bS/2/F7rXrveQ== - dependencies: - "@typescript-eslint/types" "7.11.0" - eslint-visitor-keys "^3.4.3" + "@typescript-eslint/scope-manager" "7.12.0" + "@typescript-eslint/types" "7.12.0" + "@typescript-eslint/typescript-estree" "7.12.0" "@typescript-eslint/visitor-keys@7.12.0": version "7.12.0" From 93712bfc71a5013231b950b2b655d77b14f83fa7 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Thu, 6 Jun 2024 17:38:48 +0200 Subject: [PATCH 243/261] GH-41691: [Doc] Remove notion of "logical type" (#41958) In several places in the Arrow specification and documentation we use the term "logical types", but we don't use it consistently and we don't actually have physical types (only physical layouts) to contrast it with. This creates confusion for readers as it is not immediately clear whether all data types are "logical" and if there is a meaningful distinction behind our usage of this term. Also address GH-14752 by adding a table of data types with their respective parameters and the corresponding layouts. * GitHub Issue: #41691 Authored-by: Antoine Pitrou Signed-off-by: Antoine Pitrou --- docs/source/format/Columnar.rst | 160 ++++++++++++++++++++----- docs/source/format/Versioning.rst | 2 +- docs/source/python/data.rst | 14 +-- docs/source/python/extending_types.rst | 2 +- 4 files changed, 138 insertions(+), 40 deletions(-) diff --git a/docs/source/format/Columnar.rst b/docs/source/format/Columnar.rst index 7c853de7829be..c4dc772808af4 100644 --- a/docs/source/format/Columnar.rst +++ b/docs/source/format/Columnar.rst @@ -70,21 +70,131 @@ concepts, here is a small glossary to help disambiguate. without taking into account any value semantics. For example, a 32-bit signed integer array and 32-bit floating point array have the same layout. -* **Parent** and **child arrays**: names to express relationships - between physical value arrays in a nested type structure. For - example, a ``List``-type parent array has a T-type array as its - child (see more on lists below). +* **Data type**: An application-facing semantic value type that is + implemented using some physical layout. For example, Decimal128 + values are stored as 16 bytes in a fixed-size binary + layout. A timestamp may be stored as 64-bit fixed-size layout. * **Primitive type**: a data type having no child types. This includes such types as fixed bit-width, variable-size binary, and null types. * **Nested type**: a data type whose full structure depends on one or more other child types. Two fully-specified nested types are equal if and only if their child types are equal. For example, ``List`` is distinct from ``List`` iff U and V are different types. -* **Logical type**: An application-facing semantic value type that is - implemented using some physical layout. For example, Decimal - values are stored as 16 bytes in a fixed-size binary - layout. Similarly, strings can be stored as ``List<1-byte>``. A - timestamp may be stored as 64-bit fixed-size layout. +* **Parent** and **child arrays**: names to express relationships + between physical value arrays in a nested type structure. For + example, a ``List``-type parent array has a T-type array as its + child (see more on lists below). +* **Parametric type**: a type which requires additional parameters + for full determination of its semantics. For example, all nested types + are parametric by construction. A timestamp is also parametric as it needs + a unit (such as microseconds) and a timezone. + +Data Types +========== + +The file `Schema.fbs`_ defines built-in data types supported by the +Arrow columnar format. Each data type uses a well-defined physical layout. + +`Schema.fbs`_ is the authoritative source for the description of the +standard Arrow data types. However, we also provide the below table for +convenience: + ++--------------------+------------------------------+------------------------------------------------------------+ +| Type | Type Parameters *(1)* | Physical Memory Layout | ++====================+==============================+============================================================+ +| Null | | Null | ++--------------------+------------------------------+------------------------------------------------------------+ +| Boolean | | Fixed-size Primitive | ++--------------------+------------------------------+------------------------------------------------------------+ +| Int | * bit width | *" (same as above)* | +| | * signedness | | ++--------------------+------------------------------+------------------------------------------------------------+ +| Floating Point | * precision | *"* | ++--------------------+------------------------------+------------------------------------------------------------+ +| Decimal | * bit width | *"* | +| | * scale | | +| | * precision | | ++--------------------+------------------------------+------------------------------------------------------------+ +| Date | * unit | *"* | ++--------------------+------------------------------+------------------------------------------------------------+ +| Time | * bit width *(2)* | *"* | +| | * unit | | ++--------------------+------------------------------+------------------------------------------------------------+ +| Timestamp | * unit | *"* | +| | * timezone | | ++--------------------+------------------------------+------------------------------------------------------------+ +| Interval | * unit | *"* | ++--------------------+------------------------------+------------------------------------------------------------+ +| Duration | * unit | *"* | ++--------------------+------------------------------+------------------------------------------------------------+ +| Fixed-Size Binary | * byte width | Fixed-size Binary | ++--------------------+------------------------------+------------------------------------------------------------+ +| Binary | | Variable-size Binary with 32-bit offsets | ++--------------------+------------------------------+------------------------------------------------------------+ +| Utf8 | | *"* | ++--------------------+------------------------------+------------------------------------------------------------+ +| Large Binary | | Variable-size Binary with 64-bit offsets | ++--------------------+------------------------------+------------------------------------------------------------+ +| Large Utf8 | | *"* | ++--------------------+------------------------------+------------------------------------------------------------+ +| Binary View | | Variable-size Binary View | ++--------------------+------------------------------+------------------------------------------------------------+ +| Utf8 View | | *"* | ++--------------------+------------------------------+------------------------------------------------------------+ +| Fixed-Size List | * *value type* | Fixed-size List | +| | * list size | | ++--------------------+------------------------------+------------------------------------------------------------+ +| List | * *value type* | Variable-size List with 32-bit offsets | ++--------------------+------------------------------+------------------------------------------------------------+ +| Large List | * *value type* | Variable-size List with 64-bit offsets | ++--------------------+------------------------------+------------------------------------------------------------+ +| List View | * *value type* | Variable-size List View with 32-bit offsets and sizes | ++--------------------+------------------------------+------------------------------------------------------------+ +| Large List View | * *value type* | Variable-size List View with 64-bit offsets and sizes | ++--------------------+------------------------------+------------------------------------------------------------+ +| Struct | * *children* | Struct | ++--------------------+------------------------------+------------------------------------------------------------+ +| Map | * *children* | Variable-size List of Structs | +| | * keys sortedness | | ++--------------------+------------------------------+------------------------------------------------------------+ +| Union | * *children* | Dense or Sparse Union *(3)* | +| | * mode | | +| | * type ids | | ++--------------------+------------------------------+------------------------------------------------------------+ +| Dictionary | * *index type* *(4)* | Dictionary Encoded | +| | * *value type* | | +| | * orderedness | | ++--------------------+------------------------------+------------------------------------------------------------+ +| Run-End Encoded | * *run end type* *(5)* | Run-End Encoded | +| | * *value type* | | ++--------------------+------------------------------+------------------------------------------------------------+ + +* \(1) Type parameters listed in *italics* denote a data type's child types. + +* \(2) The *bit width* parameter of a Time type is technically redundant as + each *unit* mandates a single bit width. + +* \(3) Whether a Union type uses the Sparse or Dense layout is denoted by its + *mode* parameter. + +* \(4) The *index type* of a Dictionary type can only be an integer type, + preferably signed, with width 8 to 64 bits. + +* \(5) The *run end type* of a Run-End Encoded type can only be a signed integer type + with width 16 to 64 bits. + +.. note:: + Sometimes the term "logical type" is used to denote the Arrow data types + and distinguish them from their respective physical layouts. However, + unlike other type systems such as `Apache Parquet `__'s, + the Arrow type system doesn't have separate notions of physical types and + logical types. + + The Arrow type system separately provides + :ref:`extension types `, which allow + annotating standard Arrow data types with richer application-facing semantics + (for example defining a "JSON" type laid upon the standard String data type). + .. _format_layout: @@ -93,7 +203,7 @@ Physical Memory Layout Arrays are defined by a few pieces of metadata and data: -* A logical data type. +* A data type. * A sequence of buffers. * A length as a 64-bit signed integer. Implementations are permitted to be limited to 32-bit lengths, see more on this below. @@ -103,8 +213,8 @@ Arrays are defined by a few pieces of metadata and data: Nested arrays additionally have a sequence of one or more sets of these items, called the **child arrays**. -Each logical data type has a well-defined physical layout. Here are -the different physical layouts defined by Arrow: +Each data type has a well-defined physical layout. Here are the different +physical layouts defined by Arrow: * **Primitive (fixed-size)**: a sequence of values each having the same byte or bit width @@ -138,7 +248,7 @@ the different physical layouts defined by Arrow: * **Run-End Encoded (REE)**: a nested layout consisting of two child arrays, one representing values, and one representing the logical index where the run of a corresponding value ends. -* **Null**: a sequence of all null values, having null logical type +* **Null**: a sequence of all null values. The Arrow columnar memory layout only applies to *data* and not *metadata*. Implementations are free to represent metadata in-memory @@ -313,7 +423,7 @@ arrays have a single values buffer, variable-size binary have an **offsets** buffer and **data** buffer. The offsets buffer contains ``length + 1`` signed integers (either -32-bit or 64-bit, depending on the logical type), which encode the +32-bit or 64-bit, depending on the data type), which encode the start position of each slot in the data buffer. The length of the value in each slot is computed using the difference between the offset at that slot's index and the subsequent offset. For example, the @@ -1070,17 +1180,6 @@ of memory buffers for each layout. "Dictionary-encoded",validity,data (indices),, "Run-end encoded",,,, -Logical Types -============= - -The `Schema.fbs`_ defines built-in logical types supported by the -Arrow columnar format. Each logical type uses one of the above -physical layouts. Nested logical types may have different physical -layouts depending on the particular realization of the type. - -We do not go into detail about the logical types definitions in this -document as we consider `Schema.fbs`_ to be authoritative. - .. _format-ipc: Serialization and Interprocess Communication (IPC) @@ -1160,17 +1259,16 @@ Schema message -------------- The Flatbuffers files `Schema.fbs`_ contains the definitions for all -built-in logical data types and the ``Schema`` metadata type which -represents the schema of a given record batch. A schema consists of -an ordered sequence of fields, each having a name and type. A -serialized ``Schema`` does not contain any data buffers, only type -metadata. +built-in data types and the ``Schema`` metadata type which represents +the schema of a given record batch. A schema consists of an ordered +sequence of fields, each having a name and type. A serialized ``Schema`` +does not contain any data buffers, only type metadata. The ``Field`` Flatbuffers type contains the metadata for a single array. This includes: * The field's name -* The field's logical type +* The field's data type * Whether the field is semantically nullable. While this has no bearing on the array's physical layout, many systems distinguish nullable and non-nullable fields and we want to allow them to diff --git a/docs/source/format/Versioning.rst b/docs/source/format/Versioning.rst index 7ba01107074d0..8fcf11b21f0cc 100644 --- a/docs/source/format/Versioning.rst +++ b/docs/source/format/Versioning.rst @@ -51,7 +51,7 @@ data. An increase in the **minor** version of the format version, such as 1.0.0 to 1.1.0, indicates that 1.1.0 contains new features not available in 1.0.0. So long as these features are not used (such as a -new logical data type), forward compatibility is preserved. +new data type), forward compatibility is preserved. Long-Term Stability =================== diff --git a/docs/source/python/data.rst b/docs/source/python/data.rst index 598c8c125fb83..4a0f2af6d4868 100644 --- a/docs/source/python/data.rst +++ b/docs/source/python/data.rst @@ -26,8 +26,8 @@ with memory buffers, like the ones explained in the documentation on :ref:`Memory and IO `. These data structures are exposed in Python through a series of interrelated classes: -* **Type Metadata**: Instances of ``pyarrow.DataType``, which describe a logical - array type +* **Type Metadata**: Instances of ``pyarrow.DataType``, which describe the + type of an array and govern how its values are interpreted * **Schemas**: Instances of ``pyarrow.Schema``, which describe a named collection of types. These can be thought of as the column types in a table-like object. @@ -55,8 +55,8 @@ array data. These include: * **Nested types**: list, map, struct, and union * **Dictionary type**: An encoded categorical type (more on this later) -Each logical data type in Arrow has a corresponding factory function for -creating an instance of that type object in Python: +Each data type in Arrow has a corresponding factory function for creating +an instance of that type object in Python: .. ipython:: python @@ -72,9 +72,9 @@ creating an instance of that type object in Python: print(t4) print(t5) -We use the name **logical type** because the **physical** storage may be the -same for one or more types. For example, ``int64``, ``float64``, and -``timestamp[ms]`` all occupy 64 bits per value. +.. note:: + Different data types might use a given physical storage. For example, + ``int64``, ``float64``, and ``timestamp[ms]`` all occupy 64 bits per value. These objects are ``metadata``; they are used for describing the data in arrays, schemas, and record batches. In Python, they can be used in functions where the diff --git a/docs/source/python/extending_types.rst b/docs/source/python/extending_types.rst index 83fce84f47c08..d746505348157 100644 --- a/docs/source/python/extending_types.rst +++ b/docs/source/python/extending_types.rst @@ -118,7 +118,7 @@ Defining extension types ("user-defined types") Arrow has the notion of extension types in the metadata specification as a possibility to extend the built-in types. This is done by annotating any of the -built-in Arrow logical types (the "storage type") with a custom type name and +built-in Arrow data types (the "storage type") with a custom type name and optional serialized representation ("ARROW:extension:name" and "ARROW:extension:metadata" keys in the Field’s custom_metadata of an IPC message). From 164be4882176c5f84eb7cde52b98d69a72fe7ea8 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Thu, 6 Jun 2024 14:24:47 -0300 Subject: [PATCH 244/261] GH-41994 [C++]: kernel.cc: Remove defaults on switch so that compiler can check full enum coverage for us (#41995) ### Rationale for this change To let the compiler warn us about missing cases and make the non-handled cases more obvious. ### What changes are included in this PR? Removal of `default` in the switches and improving some dchecks with a message. ### Are these changes tested? By existing tests. * GitHub Issue: #41994 Authored-by: Felipe Oliveira Carvalho Signed-off-by: Felipe Oliveira Carvalho --- cpp/src/arrow/compute/kernel.cc | 42 ++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/cpp/src/arrow/compute/kernel.cc b/cpp/src/arrow/compute/kernel.cc index 9cc5cc10917ee..5c87ef2cd0561 100644 --- a/cpp/src/arrow/compute/kernel.cc +++ b/cpp/src/arrow/compute/kernel.cc @@ -361,7 +361,8 @@ size_t InputType::Hash() const { case InputType::EXACT_TYPE: hash_combine(result, type_->Hash()); break; - default: + case InputType::ANY_TYPE: + case InputType::USE_TYPE_MATCHER: break; } return result; @@ -378,10 +379,8 @@ std::string InputType::ToString() const { break; case InputType::USE_TYPE_MATCHER: { ss << type_matcher_->ToString(); - } break; - default: - DCHECK(false); break; + } } return ss.str(); } @@ -400,9 +399,8 @@ bool InputType::Equals(const InputType& other) const { return type_->Equals(*other.type_); case InputType::USE_TYPE_MATCHER: return type_matcher_->Equals(*other.type_matcher_); - default: - return false; } + return false; } bool InputType::Matches(const DataType& type) const { @@ -411,21 +409,23 @@ bool InputType::Matches(const DataType& type) const { return type_->Equals(type); case InputType::USE_TYPE_MATCHER: return type_matcher_->Matches(type); - default: - // ANY_TYPE + case InputType::ANY_TYPE: return true; } + return false; } bool InputType::Matches(const Datum& value) const { switch (value.kind()) { + case Datum::NONE: + case Datum::RECORD_BATCH: + case Datum::TABLE: + DCHECK(false) << "Matches expects ARRAY, CHUNKED_ARRAY or SCALAR"; + return false; case Datum::ARRAY: case Datum::CHUNKED_ARRAY: case Datum::SCALAR: break; - default: - DCHECK(false); - return false; } return Matches(*value.type()); } @@ -445,11 +445,13 @@ const TypeMatcher& InputType::type_matcher() const { Result OutputType::Resolve(KernelContext* ctx, const std::vector& types) const { - if (kind_ == OutputType::FIXED) { - return type_.get(); - } else { - return resolver_(ctx, types); + switch (kind_) { + case OutputType::FIXED: + return type_; + case OutputType::COMPUTED: + break; } + return resolver_(ctx, types); } const std::shared_ptr& OutputType::type() const { @@ -463,11 +465,13 @@ const OutputType::Resolver& OutputType::resolver() const { } std::string OutputType::ToString() const { - if (kind_ == OutputType::FIXED) { - return type_->ToString(); - } else { - return "computed"; + switch (kind_) { + case OutputType::FIXED: + return type_->ToString(); + case OutputType::COMPUTED: + break; } + return "computed"; } // ---------------------------------------------------------------------- From 41ae29ebd98e66d1502d6a830f88d6da056c670e Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Fri, 7 Jun 2024 10:15:50 +0900 Subject: [PATCH 245/261] GH-42005: [Java][Integration][CI] Fix ARROW_BUILD_ROOT Path to find pom.xml (#42008) ### Rationale for this change This PR aims to fix the issue where the integration tests are failing due to the missing `/java/pom.xml` file. It appears that the current code incorrectly determines the path to `ARROW_BUILD_ROOT`, leading to the failure in locating the `pom.xml` file. ### What changes are included in this PR? - Updating the `ARROW_BUILD_ROOT` path determination logic in `tester_java.py` to correctly reference the project root. ### Are these changes tested? Maybe, Yes. ### Are there any user-facing changes? No. * GitHub Issue: #42005 Authored-by: Hyunseok Seo Signed-off-by: Sutou Kouhei --- dev/archery/archery/integration/tester_java.py | 2 +- dev/archery/archery/integration/tester_js.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/archery/archery/integration/tester_java.py b/dev/archery/archery/integration/tester_java.py index ccc807410a848..9b14c6939cde8 100644 --- a/dev/archery/archery/integration/tester_java.py +++ b/dev/archery/archery/integration/tester_java.py @@ -28,7 +28,7 @@ ARROW_BUILD_ROOT = os.environ.get( 'ARROW_BUILD_ROOT', - Path(__file__).resolve().parents[5] + Path(__file__).resolve().parents[4] ) diff --git a/dev/archery/archery/integration/tester_js.py b/dev/archery/archery/integration/tester_js.py index 3d1a229931cde..dcf56f9a5ab6b 100644 --- a/dev/archery/archery/integration/tester_js.py +++ b/dev/archery/archery/integration/tester_js.py @@ -24,7 +24,7 @@ ARROW_BUILD_ROOT = os.environ.get( 'ARROW_BUILD_ROOT', - Path(__file__).resolve().parents[5] + Path(__file__).resolve().parents[4] ) ARROW_JS_ROOT = os.path.join(ARROW_BUILD_ROOT, 'js') _EXE_PATH = os.path.join(ARROW_JS_ROOT, 'bin') From a708fabfe6f90a890978d8f026c70cdf18caf251 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Thu, 6 Jun 2024 19:25:36 -0700 Subject: [PATCH 246/261] MINOR: [C++] Include `` before using `std::string` (#42004) ### Rationale for this change I work on MSVC's STL, and we regularly build popular open-source projects, including yours, with development builds of the MSVC toolset. This allows us to find and fix toolset regressions before they affect users, and also allows us to provide advance notice of breaking changes, which is the case here. We recently merged https://github.com/microsoft/STL/pull/4633 which will ship in VS 2022 17.11 Preview 3. This improved build throughput by refactoring `` so that it no longer drags in `std::string`. It's also a source-breaking change for code that wasn't properly including ``. Your `cpp/src/arrow/json/object_writer.h` declares `std::string Serialize();` without including ``. When built with our updated STL, this will emit a compiler error: ``` C:\gitP\apache\arrow\cpp\src\arrow/json/object_writer.h(39): error C2039: 'string': is not a member of 'std' ``` ### What changes are included in this PR? The fix is simple and portable: include the necessary header. ### Are these changes tested? Nope, I'm totally YOLOing it. If it builds, it's good. (This will be tested in MSVC's internal "Real World Code" test infrastructure. Also, after VS 2022 17.11 ships, your existing build/test coverage will ensure that this keeps compiling.) ### Are there any user-facing changes? No. Authored-by: Stephan T. Lavavej Signed-off-by: Sutou Kouhei --- cpp/src/arrow/json/object_writer.h | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/arrow/json/object_writer.h b/cpp/src/arrow/json/object_writer.h index b15b09dbdacfc..cf1ce62194fb8 100644 --- a/cpp/src/arrow/json/object_writer.h +++ b/cpp/src/arrow/json/object_writer.h @@ -18,6 +18,7 @@ #pragma once #include +#include #include #include "arrow/util/visibility.h" From 290e606c4dd937cd34dbccd6f6801ff1ac1d8b9b Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 7 Jun 2024 11:34:22 +0900 Subject: [PATCH 247/261] GH-41652: [C++][CMake][Windows] Don't build needless object libraries (#41658) ### Rationale for this change * We don't need an object library for a shared library with `ARROW_BUILD_SHARED=OFF`. * We don't need an object library for a static library with `ARROW_BUILD_STATIC=OFF`. ### What changes are included in this PR? Don't build needless object libraries based on `ARROW_BUILD_SHARED`/`ARROW_BUILD_STATIC`. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #41652 Authored-by: Sutou Kouhei Signed-off-by: Sutou Kouhei --- cpp/src/arrow/CMakeLists.txt | 37 +++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 150a304975cad..5bcd4625b3b67 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -200,22 +200,29 @@ function(arrow_add_object_library PREFIX) set(SOURCES ${ARGN}) string(TOLOWER "${PREFIX}" prefix) if(WIN32) - add_library(${prefix}_shared OBJECT ${SOURCES}) - add_library(${prefix}_static OBJECT ${SOURCES}) - set_target_properties(${prefix}_shared PROPERTIES POSITION_INDEPENDENT_CODE ON) - set_target_properties(${prefix}_static PROPERTIES POSITION_INDEPENDENT_CODE ON) - target_compile_definitions(${prefix}_shared PRIVATE ARROW_EXPORTING) - target_compile_definitions(${prefix}_static PRIVATE ARROW_STATIC) - target_compile_features(${prefix}_shared PRIVATE cxx_std_17) - target_compile_features(${prefix}_static PRIVATE cxx_std_17) - set(${PREFIX}_TARGET_SHARED - ${prefix}_shared - PARENT_SCOPE) - set(${PREFIX}_TARGET_STATIC - ${prefix}_static - PARENT_SCOPE) + set(targets) + if(ARROW_BUILD_SHARED) + add_library(${prefix}_shared OBJECT ${SOURCES}) + set_target_properties(${prefix}_shared PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_compile_definitions(${prefix}_shared PRIVATE ARROW_EXPORTING) + target_compile_features(${prefix}_shared PRIVATE cxx_std_17) + set(${PREFIX}_TARGET_SHARED + ${prefix}_shared + PARENT_SCOPE) + list(APPEND targets ${prefix}_shared) + endif() + if(ARROW_BUILD_STATIC) + add_library(${prefix}_static OBJECT ${SOURCES}) + set_target_properties(${prefix}_static PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_compile_definitions(${prefix}_static PRIVATE ARROW_STATIC) + target_compile_features(${prefix}_static PRIVATE cxx_std_17) + set(${PREFIX}_TARGET_STATIC + ${prefix}_static + PARENT_SCOPE) + list(APPEND targets ${prefix}_static) + endif() set(${PREFIX}_TARGETS - ${prefix}_shared ${prefix}_static + ${targets} PARENT_SCOPE) else() add_library(${prefix} OBJECT ${SOURCES}) From 01d2fa0d461869a07b2ffeee517beb8116bd0ce2 Mon Sep 17 00:00:00 2001 From: Laurent Goujon Date: Thu, 6 Jun 2024 23:19:06 -0700 Subject: [PATCH 248/261] GH-41307: [Java] Use org.apache:apache parent pom version 31 (#41772) Use/update Maven modules to `org.apache:apache:31` and clean up Maven modules to remove unnecessary configuration or outdated workarounds * Add `org.apache:apache:31` to `org.apache.arrow:arrow-bom` and `org.apache.arrow.maven.plugins:arrow-maven-plugins` to make them conformant with ASF standards * Update `org.apache.arrow:arrow-java-root` parent to `org.apache:parent:31` * Use `version.*` and other properties to override plugin versions defined by `org.apache:parent` * Move standalone plugin versions under pluginManagement at the top level * Cleanup redundant plugin version or configuration declaration * Update `maven-dependency-plugin` to 3.6.1 and add the required overrides when necessary * Update `maven-shade-plugin` to 3.5.1 (via `org.apache:parent`) - disable reduced dependency pom creation for non-terminal modules * Remove enforcer check for java and maven version (handled by `org.apache:parent`) * Remove unnecessary `mvnrepository` link comments * Remove `m2e.version` property check in profiles (only needed for errorprone plugin configuration which is incompatible with M2E) * Cleanup `argLine` overrides for surefire/failsafe plugins * Remove unnecessary `../pom.xml` `` directives * Remove source/target/encoding configuration properties for `maven-compiler-plugin`, `maven-javadoc-plugin` and `maven-resources-plugin` as it is handled by `org.apache:parent` and plugin themselves * Remove unnecessary copy of codegen templates in `arrow-vector` module * Remove unnecessary junit jupiter engine dependencies for surefire/failsafe plugins. * GitHub Issue: #41307 Lead-authored-by: Laurent Goujon Co-authored-by: Laurent Goujon Signed-off-by: David Li --- ci/scripts/java_full_build.sh | 10 +- dev/tasks/tasks.yml | 2 - java/adapter/avro/pom.xml | 9 - java/adapter/jdbc/pom.xml | 7 - java/adapter/orc/pom.xml | 17 ++ java/bom/pom.xml | 44 +++- java/c/pom.xml | 1 - java/flight/flight-core/pom.xml | 29 +-- java/flight/flight-integration-tests/pom.xml | 2 - java/flight/flight-sql-jdbc-core/pom.xml | 10 - java/flight/flight-sql-jdbc-driver/pom.xml | 1 - java/flight/flight-sql/pom.xml | 5 - java/format/pom.xml | 2 - java/gandiva/pom.xml | 19 +- .../module-info-compiler-maven-plugin/pom.xml | 28 +-- java/maven/pom.xml | 120 +++++----- java/memory/memory-core/pom.xml | 22 +- java/performance/pom.xml | 41 +--- java/pom.xml | 207 +++++++----------- java/tools/pom.xml | 22 +- java/vector/pom.xml | 94 +------- 21 files changed, 226 insertions(+), 466 deletions(-) diff --git a/ci/scripts/java_full_build.sh b/ci/scripts/java_full_build.sh index 2734f3e9dbec2..d914aa2d8472e 100755 --- a/ci/scripts/java_full_build.sh +++ b/ci/scripts/java_full_build.sh @@ -49,21 +49,13 @@ fi # build the entire project mvn clean \ install \ - assembly:single \ - source:jar \ - javadoc:jar \ -Papache-release \ -Parrow-c-data \ -Parrow-jni \ -Darrow.cpp.build.dir=$dist_dir \ - -Darrow.c.jni.dist.dir=$dist_dir \ - -DdescriptorId=source-release + -Darrow.c.jni.dist.dir=$dist_dir # copy all jar, zip and pom files to the distribution folder -find . \ - "(" -name "*-javadoc.jar" -o -name "*-sources.jar" ")" \ - -exec echo {} ";" \ - -exec cp {} $dist_dir ";" find ~/.m2/repository/org/apache/arrow \ "(" \ -name "*.jar" -o \ diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml index d8e09ec2070bb..2d84751d0f363 100644 --- a/dev/tasks/tasks.yml +++ b/dev/tasks/tasks.yml @@ -747,7 +747,6 @@ tasks: - arrow-jdbc-{no_rc_snapshot_version}.pom - arrow-maven-plugins-{no_rc_snapshot_version}-cyclonedx.json - arrow-maven-plugins-{no_rc_snapshot_version}-cyclonedx.xml - - arrow-maven-plugins-{no_rc_snapshot_version}-src.zip - arrow-maven-plugins-{no_rc_snapshot_version}.pom - arrow-memory-core-{no_rc_snapshot_version}-cyclonedx.json - arrow-memory-core-{no_rc_snapshot_version}-cyclonedx.xml @@ -851,7 +850,6 @@ tasks: - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-cyclonedx.xml - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-javadoc.jar - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-sources.jar - - module-info-compiler-maven-plugin-{no_rc_snapshot_version}-src.zip - module-info-compiler-maven-plugin-{no_rc_snapshot_version}.jar - module-info-compiler-maven-plugin-{no_rc_snapshot_version}.pom diff --git a/java/adapter/avro/pom.xml b/java/adapter/avro/pom.xml index 0046fcac62a22..f9bf29596796f 100644 --- a/java/adapter/avro/pom.xml +++ b/java/adapter/avro/pom.xml @@ -25,36 +25,27 @@ http://maven.apache.org - - org.apache.arrow arrow-memory-core - - org.apache.arrow arrow-memory-netty runtime - - org.apache.arrow arrow-vector - org.immutables value-annotations - org.apache.avro avro ${dep.avro.version} - diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml index 17681538ac97e..2f2911dd9da95 100644 --- a/java/adapter/jdbc/pom.xml +++ b/java/adapter/jdbc/pom.xml @@ -26,20 +26,17 @@ - org.apache.arrow arrow-memory-core - org.apache.arrow arrow-memory-netty runtime - org.apache.arrow arrow-vector @@ -51,7 +48,6 @@ value-annotations - com.h2database h2 @@ -94,9 +90,6 @@ jdk11+ [11,] - - !m2e.version - diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml index ca817510bf3e3..bc89c4698eecf 100644 --- a/java/adapter/orc/pom.xml +++ b/java/adapter/orc/pom.xml @@ -134,5 +134,22 @@ + + + org.apache.maven.plugins + maven-dependency-plugin + + + analyze + + + + org.apache.arrow:arrow-format + + + + + + diff --git a/java/bom/pom.xml b/java/bom/pom.xml index 12b9950ad80fc..77aed2d0f6a37 100644 --- a/java/bom/pom.xml +++ b/java/bom/pom.xml @@ -15,7 +15,7 @@ org.apache apache - 18 + 31 org.apache.arrow @@ -27,6 +27,19 @@ + + 1.8 + 1.8 + 3.12.0 + 3.2.5 + 0.16.1 + 3.7.1 + 3.12.1 + 3.6.1 + 3.2.4 + 3.2.2 + 3.6.3 + 3.5.0 @@ -138,11 +151,9 @@ ${project.version} -
- @@ -156,12 +167,10 @@ org.apache.maven.plugins maven-project-info-reports-plugin - 3.5.0 org.apache.maven.plugins maven-site-plugin - 3.12.1 com.diffplug.spotless @@ -188,13 +197,34 @@ org.apache.maven.plugins maven-project-info-reports-plugin - 3.5.0 org.apache.maven.plugins maven-site-plugin - 3.12.1 + + + + apache-release + + + + org.apache.maven.plugins + maven-assembly-plugin + + + source-release-assembly + + + true + + + + + + + + diff --git a/java/c/pom.xml b/java/c/pom.xml index bfb233315a839..afb6e0cd8b890 100644 --- a/java/c/pom.xml +++ b/java/c/pom.xml @@ -83,5 +83,4 @@ - diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml index b565572b383ab..f2070d4ff7cba 100644 --- a/java/flight/flight-core/pom.xml +++ b/java/flight/flight-core/pom.xml @@ -15,7 +15,6 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT - ../pom.xml flight-core @@ -151,13 +150,6 @@ org.apache.maven.plugins maven-shade-plugin - - 3.2.4 shade-main @@ -166,6 +158,7 @@ package + false true shaded @@ -192,6 +185,7 @@ package + false true shaded-ext @@ -244,7 +238,6 @@ org.apache.maven.plugins maven-dependency-plugin - 3.3.0 analyze @@ -264,7 +257,6 @@ org.codehaus.mojo build-helper-maven-plugin - 1.9.1 add-generated-sources-to-classpath @@ -282,7 +274,6 @@ maven-assembly-plugin - 3.7.1 jar-with-dependencies @@ -299,13 +290,6 @@ - - - kr.motd.maven - os-maven-plugin - 1.7.1 - - @@ -313,18 +297,14 @@ jdk11+ [11,] - - !m2e.version - org.apache.maven.plugins maven-surefire-plugin - - --add-opens=org.apache.arrow.flight.core/org.apache.arrow.flight.perf.impl=protobuf.java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED - false + + --add-opens=org.apache.arrow.flight.core/org.apache.arrow.flight.perf.impl=protobuf.java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED ${project.basedir}/../../../testing/data @@ -334,5 +314,4 @@ - diff --git a/java/flight/flight-integration-tests/pom.xml b/java/flight/flight-integration-tests/pom.xml index 74016d81e91e5..cd2c28ba8959f 100644 --- a/java/flight/flight-integration-tests/pom.xml +++ b/java/flight/flight-integration-tests/pom.xml @@ -15,7 +15,6 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT - ../pom.xml flight-integration-tests @@ -63,7 +62,6 @@ maven-assembly-plugin - 3.7.1 jar-with-dependencies diff --git a/java/flight/flight-sql-jdbc-core/pom.xml b/java/flight/flight-sql-jdbc-core/pom.xml index 459412e0f8d8b..50d7b2617a5a9 100644 --- a/java/flight/flight-sql-jdbc-core/pom.xml +++ b/java/flight/flight-sql-jdbc-core/pom.xml @@ -15,7 +15,6 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT - ../pom.xml flight-sql-jdbc-core @@ -47,20 +46,17 @@ - org.apache.arrow arrow-memory-core - org.apache.arrow arrow-memory-netty runtime - org.apache.arrow arrow-vector @@ -136,11 +132,6 @@ - - - src/main/resources - - maven-surefire-plugin @@ -154,7 +145,6 @@ org.codehaus.mojo properties-maven-plugin - 1.2.1 write-project-properties-to-file diff --git a/java/flight/flight-sql-jdbc-driver/pom.xml b/java/flight/flight-sql-jdbc-driver/pom.xml index b3afbe1defdba..4456270e7b347 100644 --- a/java/flight/flight-sql-jdbc-driver/pom.xml +++ b/java/flight/flight-sql-jdbc-driver/pom.xml @@ -15,7 +15,6 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT - ../pom.xml flight-sql-jdbc-driver diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml index e6d703c673ad5..14fde34c3b4f3 100644 --- a/java/flight/flight-sql/pom.xml +++ b/java/flight/flight-sql/pom.xml @@ -15,7 +15,6 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT - ../pom.xml flight-sql @@ -119,9 +118,6 @@ jdk11+ [11,] - - !m2e.version - @@ -136,5 +132,4 @@ - diff --git a/java/format/pom.xml b/java/format/pom.xml index e9eded79de660..4483047e20960 100644 --- a/java/format/pom.xml +++ b/java/format/pom.xml @@ -31,7 +31,6 @@ - @@ -42,6 +41,5 @@ - diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml index a87f26028ba86..1c17023e5c8ad 100644 --- a/java/gandiva/pom.xml +++ b/java/gandiva/pom.xml @@ -22,13 +22,12 @@ jar Arrow Gandiva Java wrappers around the native Gandiva SQL expression compiler. + - 1.8 - 1.8 - 3.25.1 true ../../../cpp/release-build + org.apache.arrow @@ -51,7 +50,6 @@ com.google.protobuf protobuf-java - ${protobuf.version} com.google.guava @@ -62,6 +60,7 @@ slf4j-api + @@ -88,14 +87,6 @@ - - - - kr.motd.maven - os-maven-plugin - 1.7.1 - - @@ -105,7 +96,6 @@ org.apache.maven.plugins maven-source-plugin - 2.2.1 attach-sources @@ -118,7 +108,6 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.6.3 attach-javadocs @@ -131,7 +120,6 @@ org.apache.maven.plugins maven-gpg-plugin - 3.2.4 sign-artifacts @@ -146,5 +134,4 @@ - diff --git a/java/maven/module-info-compiler-maven-plugin/pom.xml b/java/maven/module-info-compiler-maven-plugin/pom.xml index 57ba7933ea1c6..9f0cd7b1039dd 100644 --- a/java/maven/module-info-compiler-maven-plugin/pom.xml +++ b/java/maven/module-info-compiler-maven-plugin/pom.xml @@ -64,39 +64,14 @@ org.apache.maven.plugin-tools maven-plugin-annotations - 3.11.0 + ${maven.plugin.tools.version} provided - - - maven-clean-plugin - 3.3.2 - - - maven-plugin-plugin - 3.12.0 - - - maven-jar-plugin - 3.3.0 - - - maven-install-plugin - 3.1.2 - - - maven-deploy-plugin - 3.1.1 - - - maven-invoker-plugin - 3.1.0 - com.gradle develocity-maven-extension @@ -118,7 +93,6 @@ org.apache.maven.plugins maven-plugin-plugin - 3.12.0 true diff --git a/java/maven/pom.xml b/java/maven/pom.xml index 470e198caebc1..72140dd6570d0 100644 --- a/java/maven/pom.xml +++ b/java/maven/pom.xml @@ -15,6 +15,13 @@ Note: Do not inherit from the Arrow parent POM as plugins can be referenced during the parent POM, introducing circular dependencies. --> + + org.apache + apache + 31 + + + org.apache.arrow.maven.plugins arrow-maven-plugins 17.0.0-SNAPSHOT @@ -27,25 +34,38 @@ true + + 1.8 + 1.8 + 3.12.0 + 3.2.5 + 0.16.1 + 3.7.1 + 3.12.1 + 3.6.1 + 3.2.4 + 3.2.2 + 3.6.3 + 3.5.0 - org.apache.maven.plugins - maven-project-info-reports-plugin - 3.5.0 + com.diffplug.spotless + spotless-maven-plugin + 2.30.0 - org.apache.maven.plugins - maven-site-plugin - 3.12.1 + pl.project13.maven + git-commit-id-plugin + 4.0.5 - com.diffplug.spotless - spotless-maven-plugin - 2.30.0 + org.cyclonedx + cyclonedx-maven-plugin + 2.7.11 @@ -119,11 +139,6 @@ **/logback.xml - true - - true - true - org.apache.arrow ${username} @@ -143,43 +158,17 @@ - - org.apache.maven.plugins - maven-resources-plugin - - UTF-8 - - org.apache.maven.plugins maven-compiler-plugin - UTF-8 - 1.8 - 1.8 2048m - false true maven-enforcer-plugin - - validate_java_and_maven_version - - enforce - - verify - false - - - - [3.3.0,4) - - - - avoid_bad_dependencies @@ -205,8 +194,6 @@ pl.project13.maven git-commit-id-plugin - 4.0.5 - dd.MM.yyyy '@' HH:mm:ss z false @@ -248,7 +235,6 @@ org.apache.maven.plugins maven-checkstyle-plugin - 3.1.0 ../dev/checkstyle/checkstyle.xml ../dev/checkstyle/checkstyle.license @@ -288,7 +274,6 @@ org.cyclonedx cyclonedx-maven-plugin - 2.7.11 @@ -298,28 +283,6 @@ - - - org.apache.maven.plugins - maven-assembly-plugin - - - src - - - - - - single - - package - - - - org.apache.maven.plugins maven-project-info-reports-plugin @@ -353,13 +316,34 @@ org.apache.maven.plugins maven-project-info-reports-plugin - 3.5.0 org.apache.maven.plugins maven-site-plugin - 3.12.1 + + + + apache-release + + + + org.apache.maven.plugins + maven-assembly-plugin + + + source-release-assembly + + + true + + + + + + + + diff --git a/java/memory/memory-core/pom.xml b/java/memory/memory-core/pom.xml index 1e29ccf8ab9db..783a13a6fb0ad 100644 --- a/java/memory/memory-core/pom.xml +++ b/java/memory/memory-core/pom.xml @@ -61,9 +61,6 @@ jdk11+ [11,] - - !m2e.version - @@ -92,7 +89,6 @@ org.apache.maven.plugins maven-surefire-plugin - opens-tests @@ -101,12 +97,9 @@ test - - -Dfoo=bar - - - **/TestArrowBuf.java - + + + **/TestOpens.java @@ -129,9 +122,6 @@ org.apache.maven.plugins maven-compiler-plugin - 8 - 8 - UTF-8 -Xmaxerrs @@ -150,12 +140,6 @@ ${checker.framework.version} - - - org.immutables.value.internal.$processor$.$Processor - - org.checkerframework.checker.nullness.NullnessChecker - diff --git a/java/performance/pom.xml b/java/performance/pom.xml index f01e8d9a4e0e4..07ca8d1e61d48 100644 --- a/java/performance/pom.xml +++ b/java/performance/pom.xml @@ -22,9 +22,7 @@ JMH Performance benchmarks for other Arrow libraries. - UTF-8 1.37 - 1.8 benchmarks true .* @@ -83,42 +81,6 @@ - - - - maven-clean-plugin - 3.3.2 - - - maven-deploy-plugin - 3.1.1 - - - maven-install-plugin - 3.1.2 - - - maven-jar-plugin - 3.3.0 - - - maven-javadoc-plugin - 3.6.3 - - - maven-resources-plugin - 3.3.1 - - - maven-source-plugin - 2.2.1 - - - maven-surefire-plugin - 3.2.5 - - - org.apache.maven.plugins @@ -144,6 +106,7 @@ package ${uberjar.name} + false org.openjdk.jmh.Main @@ -166,7 +129,6 @@ org.codehaus.mojo exec-maven-plugin - 3.2.0 ${skip.perf.benchmarks} test @@ -203,5 +165,4 @@ - diff --git a/java/pom.xml b/java/pom.xml index 0e9b7f0e25a34..9624444cf422d 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -15,7 +15,7 @@ org.apache apache - 18 + 31 org.apache.arrow @@ -85,7 +85,7 @@ 33.2.1-jre 4.1.108.Final 1.63.0 - 3.23.1 + 3.25.1 2.17.0 3.4.0 23.5.26 @@ -95,10 +95,28 @@ true 9+181-r4173-1 2.28.0 - 3.12.1 5.11.0 5.2.0 3.43.0 + none + -Xdoclint:none + + 1.8 + 1.8 + 3.12.0 + 3.2.5 + 0.16.1 + 3.7.1 + 3.12.1 + 3.6.1 + 3.2.4 + + 3.2.2 + 3.6.3 + 3.5.0 @@ -269,40 +287,16 @@ 8.3.0 test - - - - - - org.apache.maven.plugins - maven-dependency-plugin - - 3.1.2 - - - org.apache.rat - apache-rat-plugin - 0.16.1 - - - org.apache.maven.plugins - maven-resources-plugin - 3.3.1 - org.apache.maven.plugins maven-compiler-plugin - ${maven-compiler-plugin.version} + true **/module-info.java **/module-info.java false @@ -315,18 +309,8 @@ - - maven-enforcer-plugin - 3.4.1 - - - org.apache.maven.plugins - maven-shade-plugin - 3.5.1 - maven-surefire-plugin - 3.2.5 true true @@ -341,22 +325,9 @@ 1048576 - - - org.junit.jupiter - junit-jupiter-engine - ${dep.junit.jupiter.version} - - - org.apache.maven.surefire - surefire-junit-platform - 3.2.5 - - maven-failsafe-plugin - 3.2.5 ${project.build.directory} @@ -445,6 +416,22 @@ + + + org.apache.drill.tools + drill-fmpp-maven-plugin + [1.0,) + + generate + + + + + false + true + + + @@ -452,9 +439,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.6.3 - 8 **/module-info.java @@ -465,16 +450,6 @@ module-info-compiler-maven-plugin ${project.version} - - org.apache.maven.plugins - maven-project-info-reports-plugin - 3.5.0 - - - org.apache.maven.plugins - maven-site-plugin - 3.12.1 - com.gradle develocity-maven-extension @@ -522,6 +497,36 @@ spotless-maven-plugin 2.30.0 + + org.codehaus.mojo + build-helper-maven-plugin + 1.9.1 + + + org.codehaus.mojo + properties-maven-plugin + 1.2.1 + + + org.codehaus.mojo + exec-maven-plugin + 3.2.0 + + + pl.project13.maven + git-commit-id-plugin + 4.0.5 + + + org.cyclonedx + cyclonedx-maven-plugin + 2.7.11 + + + org.apache.drill.tools + drill-fmpp-maven-plugin + 1.21.1 + @@ -595,11 +600,6 @@ **/logback.xml - true - - true - true - org.apache.arrow ${username} @@ -619,42 +619,17 @@ - - org.apache.maven.plugins - maven-resources-plugin - - UTF-8 - - org.apache.maven.plugins maven-compiler-plugin - 1.8 - 1.8 2048m - false true maven-enforcer-plugin - - validate_java_and_maven_version - - enforce - - verify - false - - - - [3.3.0,4) - - - - avoid_bad_dependencies @@ -683,8 +658,6 @@ pl.project13.maven git-commit-id-plugin - 4.0.5 - dd.MM.yyyy '@' HH:mm:ss z false @@ -726,7 +699,6 @@ org.apache.maven.plugins maven-checkstyle-plugin - 3.1.0 **/module-info.java dev/checkstyle/checkstyle.xml @@ -789,7 +761,6 @@ org.cyclonedx cyclonedx-maven-plugin - 2.7.11 @@ -820,12 +791,10 @@ org.apache.maven.plugins maven-project-info-reports-plugin - 3.5.0 org.apache.maven.plugins maven-site-plugin - 3.12.1 com.diffplug.spotless @@ -860,7 +829,6 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.6.3 **/module-info.java @@ -888,28 +856,15 @@ org.apache.maven.plugins maven-project-info-reports-plugin - 3.5.0 org.apache.maven.plugins maven-site-plugin - 3.12.1 - - java-nodoclint - - [1.8,) - - - none - -Xdoclint:none - - - arrow-c-data @@ -960,7 +915,6 @@ org.apache.maven.plugins maven-compiler-plugin - true -XDcompilePolicy=simple -Xplugin:ErrorProne @@ -1000,9 +954,6 @@ org.apache.maven.plugins maven-compiler-plugin - 8 - 8 - UTF-8 -XDcompilePolicy=simple -Xplugin:ErrorProne -XepExcludedPaths:.*/(target/generated-source|format/src/main/java/org/apache/arrow/flatbuf)/.* @@ -1026,6 +977,16 @@ + + + + + jdk11+ + + [11,] + + + org.apache.maven.plugins maven-surefire-plugin @@ -1033,6 +994,13 @@ --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED + + org.apache.maven.plugins + maven-failsafe-plugin + + --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED + + @@ -1073,7 +1041,6 @@ org.jacoco jacoco-maven-plugin - 0.8.11 @@ -1119,7 +1086,6 @@ org.codehaus.mojo exec-maven-plugin - 3.2.0 cdata-cmake @@ -1176,7 +1142,6 @@ org.codehaus.mojo exec-maven-plugin - 3.2.0 jni-cpp-cmake @@ -1283,7 +1248,6 @@ org.codehaus.mojo exec-maven-plugin - 3.2.0 jni-cpp-cmake @@ -1373,5 +1337,4 @@ - diff --git a/java/tools/pom.xml b/java/tools/pom.xml index 5d9db75e525bd..53dcd51771054 100644 --- a/java/tools/pom.xml +++ b/java/tools/pom.xml @@ -54,6 +54,11 @@ 1.3.14 test + com.fasterxml.jackson.core jackson-core @@ -85,7 +90,6 @@ maven-assembly-plugin - 3.7.1 jar-with-dependencies @@ -101,7 +105,21 @@ + + org.apache.maven.plugins + maven-dependency-plugin + + + analyze + verify + + + com.fasterxml.jackson.core:* + + + + + - diff --git a/java/vector/pom.xml b/java/vector/pom.xml index c39504df2b207..6ff869ee21aff 100644 --- a/java/vector/pom.xml +++ b/java/vector/pom.xml @@ -76,64 +76,7 @@ - - - - true - - - false - - apache - apache - https://repo.maven.apache.org/maven2/ - - - - - - - codegen - - ${basedir}/src/main/codegen - - - - - - - org.eclipse.m2e - lifecycle-mapping - 1.0.0 - - - - - - org.apache.drill.tools - drill-fmpp-maven-plugin - [1.0,) - - generate - - - - - false - true - - - - - - - - - - org.apache.maven.plugins @@ -163,33 +106,10 @@ - - maven-resources-plugin - - - - copy-fmpp-resources - - copy-resources - - initialize - - ${project.build.directory}/codegen - - - src/main/codegen - false - - - - - - org.apache.drill.tools drill-fmpp-maven-plugin - 1.21.1 generate-fmpp @@ -200,7 +120,7 @@ src/main/codegen/config.fmpp ${project.build.directory}/generated-sources/fmpp - ${project.build.directory}/codegen/templates + src/main/codegen/templates @@ -208,13 +128,6 @@ org.apache.maven.plugins maven-shade-plugin - - 3.2.4 @@ -228,10 +141,9 @@ com.google.flatbuffers:* + false true shade-format-flatbuffers - true - true com.google.flatbuffers @@ -243,7 +155,6 @@ - @@ -276,5 +187,4 @@ - From b51e997df7dcde843befffed2d63d6a8e741beef Mon Sep 17 00:00:00 2001 From: Haocheng Liu <30446009+HaochengLIU@users.noreply.github.com> Date: Fri, 7 Jun 2024 04:03:15 -0400 Subject: [PATCH 249/261] GH-41960: Expose new S3 option check_directory_existence_before_creation (#41972) ### Rationale for this change Expose new S3 option `check_directory_existence_before_creation` from GH-41493 ### What changes are included in this PR? Expose new S3 option `check_directory_existence_before_creation` from GH-41493 ### Are these changes tested? yes ### Are there any user-facing changes? Yes. Python function documentation is updated. * GitHub Issue: #41960 Lead-authored-by: Haocheng Liu Co-authored-by: Joris Van den Bossche Signed-off-by: Joris Van den Bossche --- python/pyarrow/_s3fs.pyx | 20 ++++++++++++++++---- python/pyarrow/includes/libarrow_fs.pxd | 1 + python/pyarrow/tests/test_fs.py | 5 +++++ 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/python/pyarrow/_s3fs.pyx b/python/pyarrow/_s3fs.pyx index f5bab99a49f7a..ba6603322838d 100644 --- a/python/pyarrow/_s3fs.pyx +++ b/python/pyarrow/_s3fs.pyx @@ -185,7 +185,7 @@ cdef class S3FileSystem(FileSystem): session_token : str, default None AWS Session Token. An optional session token, required if access_key and secret_key are temporary credentials from STS. - anonymous : boolean, default False + anonymous : bool, default False Whether to connect anonymously if access_key and secret_key are None. If true, will not attempt to look up credentials using standard AWS configuration methods. @@ -217,7 +217,7 @@ cdef class S3FileSystem(FileSystem): S3 connection transport scheme. endpoint_override : str, default None Override region with a connect string such as "localhost:9000" - background_writes : boolean, default True + background_writes : bool, default True Whether file writes will be issued in the background, without blocking. default_metadata : mapping or pyarrow.KeyValueMetadata, default None @@ -237,11 +237,20 @@ cdef class S3FileSystem(FileSystem): 'port': 8020, 'username': 'username', 'password': 'password'}) allow_bucket_creation : bool, default False - Whether to allow CreateDir at the bucket-level. This option may also be + Whether to allow directory creation at the bucket-level. This option may also be passed in a URI query parameter. allow_bucket_deletion : bool, default False - Whether to allow DeleteDir at the bucket-level. This option may also be + Whether to allow directory deletion at the bucket-level. This option may also be passed in a URI query parameter. + check_directory_existence_before_creation : bool, default false + Whether to check the directory existence before creating it. + If false, when creating a directory the code will not check if it already + exists or not. It's an optimization to try directory creation and catch the error, + rather than issue two dependent I/O calls. + If true, when creating a directory the code will only create the directory when necessary + at the cost of extra I/O calls. This can be used for key/value cloud storage which has + a hard rate limit to number of object mutation operations or scenerios such as + the directories already exist and you do not have creation access. retry_strategy : S3RetryStrategy, default AwsStandardS3RetryStrategy(max_attempts=3) The retry strategy to use with S3; fail after max_attempts. Available strategies are AwsStandardS3RetryStrategy, AwsDefaultS3RetryStrategy. @@ -273,6 +282,7 @@ cdef class S3FileSystem(FileSystem): role_arn=None, session_name=None, external_id=None, load_frequency=900, proxy_options=None, allow_bucket_creation=False, allow_bucket_deletion=False, + check_directory_existence_before_creation=False, retry_strategy: S3RetryStrategy = AwsStandardS3RetryStrategy( max_attempts=3), force_virtual_addressing=False): @@ -387,6 +397,7 @@ cdef class S3FileSystem(FileSystem): options.value().allow_bucket_creation = allow_bucket_creation options.value().allow_bucket_deletion = allow_bucket_deletion + options.value().check_directory_existence_before_creation = check_directory_existence_before_creation options.value().force_virtual_addressing = force_virtual_addressing if isinstance(retry_strategy, AwsStandardS3RetryStrategy): @@ -447,6 +458,7 @@ cdef class S3FileSystem(FileSystem): background_writes=opts.background_writes, allow_bucket_creation=opts.allow_bucket_creation, allow_bucket_deletion=opts.allow_bucket_deletion, + check_directory_existence_before_creation=opts.check_directory_existence_before_creation, default_metadata=pyarrow_wrap_metadata(opts.default_metadata), proxy_options={'scheme': frombytes(opts.proxy_options.scheme), 'host': frombytes(opts.proxy_options.host), diff --git a/python/pyarrow/includes/libarrow_fs.pxd b/python/pyarrow/includes/libarrow_fs.pxd index f1f2985f65394..cc260b80c7779 100644 --- a/python/pyarrow/includes/libarrow_fs.pxd +++ b/python/pyarrow/includes/libarrow_fs.pxd @@ -157,6 +157,7 @@ cdef extern from "arrow/filesystem/api.h" namespace "arrow::fs" nogil: c_bool background_writes c_bool allow_bucket_creation c_bool allow_bucket_deletion + c_bool check_directory_existence_before_creation c_bool force_virtual_addressing shared_ptr[const CKeyValueMetadata] default_metadata c_string role_arn diff --git a/python/pyarrow/tests/test_fs.py b/python/pyarrow/tests/test_fs.py index 845f1eccecc72..58380f1652558 100644 --- a/python/pyarrow/tests/test_fs.py +++ b/python/pyarrow/tests/test_fs.py @@ -1226,6 +1226,11 @@ def test_s3_options(pickle_module): assert isinstance(fs, S3FileSystem) assert pickle_module.loads(pickle_module.dumps(fs)) == fs + fs = S3FileSystem(allow_bucket_creation=True, allow_bucket_deletion=True, + check_directory_existence_before_creation=True) + assert isinstance(fs, S3FileSystem) + assert pickle_module.loads(pickle_module.dumps(fs)) == fs + fs = S3FileSystem(request_timeout=0.5, connect_timeout=0.25) assert isinstance(fs, S3FileSystem) assert pickle_module.loads(pickle_module.dumps(fs)) == fs From 1dde3995238d4a771c9525e1e5189c1db4a8a95a Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Fri, 7 Jun 2024 17:12:53 +0900 Subject: [PATCH 250/261] GH-42017: [CI][Python][C++] Fix utf8proc detection for wheel on Windows (#42022) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change utf8proc in vcpkg provides CMake package. If we use it, we don't need to care about static library name (`utf8proc.lib` or `utf8proc_static.lib`). ### What changes are included in this PR? Use `unofficial-utf8proc` CMake package with vcpkg. ### Are these changes tested? Yes. ### Are there any user-facing changes? No. * GitHub Issue: #42017 Authored-by: Sutou Kouhei Signed-off-by: Raúl Cumplido --- cpp/cmake_modules/Findutf8proc.cmake | 2 +- cpp/cmake_modules/ThirdpartyToolchain.cmake | 12 +++++++----- cpp/cmake_modules/Usevcpkg.cmake | 3 --- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/cpp/cmake_modules/Findutf8proc.cmake b/cpp/cmake_modules/Findutf8proc.cmake index e347414090549..9721f76f0631b 100644 --- a/cpp/cmake_modules/Findutf8proc.cmake +++ b/cpp/cmake_modules/Findutf8proc.cmake @@ -19,7 +19,7 @@ if(utf8proc_FOUND) return() endif() -if(ARROW_PACKAGE_KIND STREQUAL "vcpkg") +if(ARROW_PACKAGE_KIND STREQUAL "vcpkg" OR VCPKG_TOOLCHAIN) set(find_package_args "") if(utf8proc_FIND_VERSION) list(APPEND find_package_args ${utf8proc_FIND_VERSION}) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index f102c7bb81683..3c58ba649c4dd 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -2819,11 +2819,13 @@ macro(build_utf8proc) endmacro() if(ARROW_WITH_UTF8PROC) - resolve_dependency(utf8proc - PC_PACKAGE_NAMES - libutf8proc - REQUIRED_VERSION - "2.2.0") + set(utf8proc_resolve_dependency_args utf8proc PC_PACKAGE_NAMES libutf8proc) + if(NOT VCPKG_TOOLCHAIN) + # utf8proc in vcpkg doesn't provide version information: + # https://github.com/microsoft/vcpkg/issues/39176 + list(APPEND utf8proc_resolve_dependency_args REQUIRED_VERSION "2.2.0") + endif() + resolve_dependency(${utf8proc_resolve_dependency_args}) endif() macro(build_cares) diff --git a/cpp/cmake_modules/Usevcpkg.cmake b/cpp/cmake_modules/Usevcpkg.cmake index 37a732f4b85a0..b6192468da342 100644 --- a/cpp/cmake_modules/Usevcpkg.cmake +++ b/cpp/cmake_modules/Usevcpkg.cmake @@ -237,9 +237,6 @@ set(LZ4_ROOT CACHE STRING "") if(CMAKE_HOST_WIN32) - set(utf8proc_MSVC_STATIC_LIB_SUFFIX - "" - CACHE STRING "") set(LZ4_MSVC_LIB_PREFIX "" CACHE STRING "") From a045770b94972bb4063bde13cb95f1c5b5c8bbe8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Cumplido?= Date: Fri, 7 Jun 2024 10:16:50 +0200 Subject: [PATCH 251/261] GH-42006: [CI][Python] Use pip install -e instead of setup.py build_ext --inplace for installing pyarrow on verification script (#42007) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### Rationale for this change Due to https://github.com/apache/arrow/issues/37929 we require a higher version of setuptools and setuptools_scm to be installed otherwise the job fails with setuptools_scm failing with ` TypeError: Configuration.__init__() got an unexpected keyword argument 'version_file'` ### What changes are included in this PR? Remove the dependencies for the environment and let installation handle those using pip install -e instead of setup.py build_ext --inplace for installing pyarrow on verification script ### Are these changes tested? Via Archery ### Are there any user-facing changes? No * GitHub Issue: #42006 Lead-authored-by: Raúl Cumplido Co-authored-by: Joris Van den Bossche Signed-off-by: Raúl Cumplido --- dev/release/verify-release-candidate.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 3ed871bd5305b..fcaaa423a4c75 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -756,7 +756,7 @@ test_python() { show_header "Build and test Python libraries" # Build and test Python - maybe_setup_virtualenv "cython>=0.29.31" numpy "setuptools_scm<8.0.0" setuptools + maybe_setup_virtualenv maybe_setup_conda --file ci/conda_env_python.txt if [ "${USE_CONDA}" -gt 0 ]; then @@ -788,7 +788,7 @@ test_python() { pushd python # Build pyarrow - python setup.py build_ext --inplace + python -m pip install -e . # Check mandatory and optional imports python -c " From fe4d04f081e55ca2de7b1b67b10ad7dca96cfd9e Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Fri, 7 Jun 2024 22:26:34 +0900 Subject: [PATCH 252/261] GH-42002: [Java] Update Unit Tests for Vector Module (#42019) ### Rationale for this change Update package from JUnit 4(`org.junit`) to JUnit 5(`org.junit.jupiter`). ### What changes are included in this PR? - [x] Replacing `org.junit` with `org.junit.jupiter.api`. - [x] Updating `Assertions.assertXXX` to `assertXXX` using static imports. - [x] Updating annotations such as `@ Before`, `@ BeforeClass`, `@ After`, `@ AfterClass`. - `@ Before` -> `@ BeforeEach` - `@ BeforeClass` -> `@ BeforeAll` - `@ After` -> `@ AfterEach` - `@ AfterClass` -> `@ AfterAll` - `@ Test` -> `@ Test` with `org.junit.jupiter` - [x] Removing unused `@ Rule` Annotation - [x] Updating `Parameterized` test - [x] Doing self review ### Are these changes tested? Yes, existing tests have passed. ### Are there any user-facing changes? No. * GitHub Issue: #42002 Authored-by: Hyunseok Seo Signed-off-by: David Li --- .../arrow/vector/ITTestLargeVector.java | 8 +- .../apache/arrow/vector/TestBitVector.java | 73 +++--- .../arrow/vector/TestBitVectorHelper.java | 6 +- .../vector/TestBufferOwnershipTransfer.java | 8 +- .../org/apache/arrow/vector/TestCopyFrom.java | 85 +++--- .../arrow/vector/TestDecimal256Vector.java | 18 +- .../arrow/vector/TestDecimalVector.java | 20 +- .../arrow/vector/TestDenseUnionVector.java | 25 +- .../arrow/vector/TestDictionaryVector.java | 28 +- .../arrow/vector/TestDurationVector.java | 16 +- .../vector/TestFixedSizeBinaryVector.java | 16 +- .../arrow/vector/TestFixedSizeListVector.java | 120 ++++----- .../TestIntervalMonthDayNanoVector.java | 13 +- .../arrow/vector/TestIntervalYearVector.java | 14 +- .../arrow/vector/TestLargeListVector.java | 35 ++- .../vector/TestLargeVarBinaryVector.java | 18 +- .../arrow/vector/TestLargeVarCharVector.java | 92 +++---- .../apache/arrow/vector/TestListVector.java | 35 ++- .../arrow/vector/TestNullCheckingForGet.java | 10 +- .../vector/TestOutOfMemoryForValueVector.java | 52 ++-- ...TestOversizedAllocationForValueVector.java | 155 +++++------ .../arrow/vector/TestPeriodDuration.java | 6 +- .../apache/arrow/vector/TestStructVector.java | 23 +- .../apache/arrow/vector/TestUnionVector.java | 25 +- .../arrow/vector/TestVarCharListVector.java | 17 +- .../apache/arrow/vector/TestVectorAlloc.java | 14 +- .../arrow/vector/TestVectorReAlloc.java | 70 ++--- .../apache/arrow/vector/TestVectorReset.java | 14 +- .../arrow/vector/TestVectorSchemaRoot.java | 43 +-- .../arrow/vector/TestVectorUnloadLoad.java | 35 ++- .../vector/compare/TestTypeEqualsVisitor.java | 14 +- .../complex/TestDenseUnionBufferSize.java | 3 +- .../complex/impl/TestComplexCopier.java | 13 +- .../complex/impl/TestPromotableWriter.java | 42 +-- .../complex/writer/TestComplexWriter.java | 244 +++++++++--------- .../complex/writer/TestSimpleWriter.java | 38 +-- .../apache/arrow/vector/ipc/BaseFileTest.java | 209 +++++++-------- .../ipc/ITTestIPCWithLargeArrowBuffers.java | 10 +- .../vector/ipc/MessageSerializerTest.java | 14 +- .../arrow/vector/ipc/TestArrowFile.java | 6 +- .../arrow/vector/ipc/TestArrowFooter.java | 4 +- .../vector/ipc/TestArrowReaderWriter.java | 37 ++- .../arrow/vector/ipc/TestArrowStream.java | 15 +- .../arrow/vector/ipc/TestArrowStreamPipe.java | 20 +- .../apache/arrow/vector/ipc/TestJSONFile.java | 11 +- .../arrow/vector/ipc/TestRoundTrip.java | 158 +++++++----- .../ipc/TestUIntDictionaryRoundTrip.java | 88 +++---- .../message/TestMessageMetadataResult.java | 4 +- .../apache/arrow/vector/pojo/TestConvert.java | 6 +- .../testing/TestValueVectorPopulator.java | 12 +- .../testing/ValueVectorDataPopulator.java | 2 +- .../vector/types/pojo/TestExtensionType.java | 67 +++-- .../arrow/vector/types/pojo/TestField.java | 8 +- .../arrow/vector/types/pojo/TestSchema.java | 10 +- .../arrow/vector/util/DecimalUtilityTest.java | 19 +- .../vector/util/TestDataSizeRoundingUtil.java | 4 +- .../TestElementAddressableVectorIterator.java | 14 +- .../arrow/vector/util/TestMapWithOrdinal.java | 12 +- .../vector/util/TestMultiMapWithOrdinal.java | 43 +-- .../vector/util/TestReusableByteArray.java | 20 +- .../arrow/vector/util/TestSchemaUtil.java | 4 +- .../arrow/vector/util/TestValidator.java | 6 +- .../arrow/vector/util/TestVectorAppender.java | 14 +- .../vector/util/TestVectorBatchAppender.java | 12 +- .../util/TestVectorSchemaRootAppender.java | 12 +- .../vector/validate/TestValidateVector.java | 12 +- .../validate/TestValidateVectorFull.java | 12 +- .../TestValidateVectorSchemaRoot.java | 12 +- .../TestValidateVectorTypeVisitor.java | 10 +- 69 files changed, 1191 insertions(+), 1144 deletions(-) diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java b/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java index 8596399e7e08c..b65e6fd36c158 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ITTestLargeVector.java @@ -17,9 +17,9 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.math.BigDecimal; import java.nio.charset.StandardCharsets; @@ -28,7 +28,7 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.holders.NullableDecimalHolder; -import org.junit.Test; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java index 075a05c04b641..cebd70fcc5a71 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVector.java @@ -17,10 +17,10 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.stream.IntStream; @@ -29,22 +29,21 @@ import org.apache.arrow.memory.util.hash.MurmurHasher; import org.apache.arrow.vector.testing.ValueVectorDataPopulator; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestBitVector { private static final String EMPTY_SCHEMA_PATH = ""; private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -124,8 +123,8 @@ public void testSplitAndTransfer() throws Exception { for (int i = 0; i < length; i++) { int actual = toVector.get(i); int expected = sourceVector.get(start + i); - assertEquals("different data values not expected --> sourceVector index: " + (start + i) + - " toVector index: " + i, expected, actual); + assertEquals(expected, actual, + "different data values not expected --> sourceVector index: " + (start + i) + " toVector index: " + i); } } } @@ -167,8 +166,8 @@ public void testSplitAndTransfer1() throws Exception { for (int i = 0; i < length; i++) { int actual = toVector.get(i); int expected = sourceVector.get(start + i); - assertEquals("different data values not expected --> sourceVector index: " + (start + i) + - " toVector index: " + i, expected, actual); + assertEquals(expected, actual, + "different data values not expected --> sourceVector index: " + (start + i) + " toVector index: " + i); } } } @@ -218,8 +217,8 @@ public void testSplitAndTransfer2() throws Exception { for (int i = 0; i < length; i++) { int actual = toVector.get(i); int expected = sourceVector.get(start + i); - assertEquals("different data values not expected --> sourceVector index: " + (start + i) + - " toVector index: " + i, expected, actual); + assertEquals(expected, actual, + "different data values not expected --> sourceVector index: " + (start + i) + " toVector index: " + i); } } } @@ -241,9 +240,9 @@ public void testReallocAfterVectorTransfer1() { for (int i = 0; i < valueCapacity; i++) { if ((i & 1) == 1) { - assertEquals("unexpected cleared bit at index: " + i, 1, vector.get(i)); + assertEquals(1, vector.get(i), "unexpected cleared bit at index: " + i); } else { - assertTrue("unexpected set bit at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "unexpected set bit at index: " + i); } } @@ -259,9 +258,9 @@ public void testReallocAfterVectorTransfer1() { for (int i = 0; i < valueCapacity * 2; i++) { if (((i & 1) == 1) || (i == valueCapacity)) { - assertEquals("unexpected cleared bit at index: " + i, 1, vector.get(i)); + assertEquals(1, vector.get(i), "unexpected cleared bit at index: " + i); } else { - assertTrue("unexpected set bit at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "unexpected set bit at index: " + i); } } @@ -277,9 +276,9 @@ public void testReallocAfterVectorTransfer1() { for (int i = 0; i < valueCapacity * 4; i++) { if (((i & 1) == 1) || (i == valueCapacity) || (i == valueCapacity * 2)) { - assertEquals("unexpected cleared bit at index: " + i, 1, vector.get(i)); + assertEquals(1, vector.get(i), "unexpected cleared bit at index: " + i); } else { - assertTrue("unexpected set bit at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "unexpected set bit at index: " + i); } } @@ -297,12 +296,12 @@ public void testReallocAfterVectorTransfer1() { if (i <= valueCapacity * 4) { if (((i & 1) == 1) || (i == valueCapacity) || (i == valueCapacity * 2) || (i == valueCapacity * 4)) { - assertEquals("unexpected cleared bit at index: " + i, 1, toVector.get(i)); + assertEquals(1, toVector.get(i), "unexpected cleared bit at index: " + i); } else { - assertTrue("unexpected set bit at index: " + i, toVector.isNull(i)); + assertTrue(toVector.isNull(i), "unexpected set bit at index: " + i); } } else { - assertTrue("unexpected set bit at index: " + i, toVector.isNull(i)); + assertTrue(toVector.isNull(i), "unexpected set bit at index: " + i); } } @@ -325,9 +324,9 @@ public void testReallocAfterVectorTransfer2() { for (int i = 0; i < valueCapacity; i++) { if ((i & 1) == 1) { - assertFalse("unexpected cleared bit at index: " + i, vector.isNull(i)); + assertFalse(vector.isNull(i), "unexpected cleared bit at index: " + i); } else { - assertTrue("unexpected set bit at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "unexpected set bit at index: " + i); } } @@ -343,9 +342,9 @@ public void testReallocAfterVectorTransfer2() { for (int i = 0; i < valueCapacity * 2; i++) { if (((i & 1) == 1) || (i == valueCapacity)) { - assertFalse("unexpected cleared bit at index: " + i, vector.isNull(i)); + assertFalse(vector.isNull(i), "unexpected cleared bit at index: " + i); } else { - assertTrue("unexpected set bit at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "unexpected set bit at index: " + i); } } @@ -361,9 +360,9 @@ public void testReallocAfterVectorTransfer2() { for (int i = 0; i < valueCapacity * 4; i++) { if (((i & 1) == 1) || (i == valueCapacity) || (i == valueCapacity * 2)) { - assertFalse("unexpected cleared bit at index: " + i, vector.isNull(i)); + assertFalse(vector.isNull(i), "unexpected cleared bit at index: " + i); } else { - assertTrue("unexpected set bit at index: " + i, vector.isNull(i)); + assertTrue(vector.isNull(i), "unexpected set bit at index: " + i); } } @@ -381,12 +380,12 @@ public void testReallocAfterVectorTransfer2() { if (i <= valueCapacity * 4) { if (((i & 1) == 1) || (i == valueCapacity) || (i == valueCapacity * 2) || (i == valueCapacity * 4)) { - assertFalse("unexpected cleared bit at index: " + i, toVector.isNull(i)); + assertFalse(toVector.isNull(i), "unexpected cleared bit at index: " + i); } else { - assertTrue("unexpected set bit at index: " + i, toVector.isNull(i)); + assertTrue(toVector.isNull(i), "unexpected set bit at index: " + i); } } else { - assertTrue("unexpected set bit at index: " + i, toVector.isNull(i)); + assertTrue(toVector.isNull(i), "unexpected set bit at index: " + i); } } @@ -500,13 +499,13 @@ private void validateRange(int length, int start, int count) { bitVector.allocateNew(length); bitVector.setRangeToOne(start, count); for (int i = 0; i < start; i++) { - Assert.assertTrue(desc + i, bitVector.isNull(i)); + assertTrue(bitVector.isNull(i), desc + i); } for (int i = start; i < start + count; i++) { - Assert.assertEquals(desc + i, 1, bitVector.get(i)); + assertEquals(1, bitVector.get(i), desc + i); } for (int i = start + count; i < length; i++) { - Assert.assertTrue(desc + i, bitVector.isNull(i)); + assertTrue(bitVector.isNull(i), desc + i); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java index 1da4a4c4914b9..b1ef45c918b72 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestBitVectorHelper.java @@ -17,16 +17,16 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.memory.util.MemoryUtil; import org.apache.arrow.vector.ipc.message.ArrowFieldNode; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestBitVectorHelper { @Test diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java b/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java index 056b6bdd2b787..b38e046659669 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestBufferOwnershipTransfer.java @@ -17,9 +17,9 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.StandardCharsets; @@ -30,7 +30,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.CallBack; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestBufferOwnershipTransfer { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java b/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java index 97de27bec8237..7d4d08636d740 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestCopyFrom.java @@ -18,9 +18,10 @@ package org.apache.arrow.vector; import static org.apache.arrow.vector.TestUtils.newVector; -import static org.junit.Assert.*; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.math.BigDecimal; import java.nio.charset.StandardCharsets; @@ -31,9 +32,9 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.types.Types.MinorType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; /* * Tested field types: @@ -60,12 +61,12 @@ public class TestCopyFrom { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -99,10 +100,7 @@ public void testCopyFromWithNulls() { if (i % 3 == 0) { assertNull(vector.getObject(i)); } else { - assertEquals( - "unexpected value at index: " + i, - Integer.toString(i), - vector.getObject(i).toString()); + assertEquals(Integer.toString(i), vector.getObject(i).toString(), "unexpected value at index: " + i); } } @@ -116,10 +114,7 @@ public void testCopyFromWithNulls() { if (i % 3 == 0) { assertNull(vector2.getObject(i)); } else { - assertEquals( - "unexpected value at index: " + i, - Integer.toString(i), - vector2.getObject(i).toString()); + assertEquals(Integer.toString(i), vector2.getObject(i).toString(), "unexpected value at index: " + i); } } @@ -133,10 +128,7 @@ public void testCopyFromWithNulls() { if (i % 3 == 0) { assertNull(vector2.getObject(i)); } else { - assertEquals( - "unexpected value at index: " + i, - Integer.toString(i), - vector2.getObject(i).toString()); + assertEquals(Integer.toString(i), vector2.getObject(i).toString(), "unexpected value at index: " + i); } } } @@ -171,10 +163,7 @@ public void testCopyFromWithNulls1() { if (i % 3 == 0) { assertNull(vector.getObject(i)); } else { - assertEquals( - "unexpected value at index: " + i, - Integer.toString(i), - vector.getObject(i).toString()); + assertEquals(Integer.toString(i), vector.getObject(i).toString(), "unexpected value at index: " + i); } } @@ -192,10 +181,7 @@ public void testCopyFromWithNulls1() { if (i % 3 == 0) { assertNull(vector2.getObject(i)); } else { - assertEquals( - "unexpected value at index: " + i, - Integer.toString(i), - vector2.getObject(i).toString()); + assertEquals(Integer.toString(i), vector2.getObject(i).toString(), "unexpected value at index: " + i); } } @@ -209,10 +195,7 @@ public void testCopyFromWithNulls1() { if (i % 3 == 0) { assertNull(vector2.getObject(i)); } else { - assertEquals( - "unexpected value at index: " + i, - Integer.toString(i), - vector2.getObject(i).toString()); + assertEquals(Integer.toString(i), vector2.getObject(i).toString(), "unexpected value at index: " + i); } } } @@ -247,7 +230,7 @@ public void testCopyFromWithNulls2() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, 1000 + i, vector1.get(i)); + assertEquals(1000 + i, vector1.get(i), "unexpected value at index: " + i); } } @@ -274,7 +257,7 @@ public void testCopyFromWithNulls2() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, 1000 + i, vector2.get(i)); + assertEquals(1000 + i, vector2.get(i), "unexpected value at index: " + i); } } } @@ -309,7 +292,7 @@ public void testCopyFromWithNulls3() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, 10000000000L + (long) i, vector1.get(i)); + assertEquals(10000000000L + (long) i, vector1.get(i), "unexpected value at index: " + i); } } @@ -336,7 +319,7 @@ public void testCopyFromWithNulls3() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, 10000000000L + (long) i, vector2.get(i)); + assertEquals(10000000000L + (long) i, vector2.get(i), "unexpected value at index: " + i); } } } @@ -450,7 +433,7 @@ public void testCopyFromWithNulls5() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, 100.25f + (float) i, vector1.get(i), 0); + assertEquals(100.25f + (float) i, vector1.get(i), 0, "unexpected value at index: " + i); } } @@ -477,7 +460,7 @@ public void testCopyFromWithNulls5() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, 100.25f + i * 1.0f, vector2.get(i), 0); + assertEquals(100.25f + i * 1.0f, vector2.get(i), 0, "unexpected value at index: " + i); } } } @@ -512,8 +495,7 @@ public void testCopyFromWithNulls6() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals( - "unexpected value at index: " + i, 123456.7865 + (double) i, vector1.get(i), 0); + assertEquals(123456.7865 + (double) i, vector1.get(i), 0, "unexpected value at index: " + i); } } @@ -540,8 +522,7 @@ public void testCopyFromWithNulls6() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals( - "unexpected value at index: " + i, 123456.7865 + (double) i, vector2.get(i), 0); + assertEquals(123456.7865 + (double) i, vector2.get(i), 0, "unexpected value at index: " + i); } } } @@ -715,7 +696,7 @@ public void testCopyFromWithNulls9() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val + (short) i, vector1.get(i)); + assertEquals(val + (short) i, vector1.get(i), "unexpected value at index: " + i); } } @@ -742,7 +723,7 @@ public void testCopyFromWithNulls9() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val + (short) i, vector2.get(i)); + assertEquals(val + (short) i, vector2.get(i), "unexpected value at index: " + i); } } } @@ -778,7 +759,7 @@ public void testCopyFromWithNulls10() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val + (long) i, vector1.get(i)); + assertEquals(val + (long) i, vector1.get(i), "unexpected value at index: " + i); } } @@ -805,7 +786,7 @@ public void testCopyFromWithNulls10() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val + (long) i, vector2.get(i)); + assertEquals(val + (long) i, vector2.get(i), "unexpected value at index: " + i); } } } @@ -841,7 +822,7 @@ public void testCopyFromWithNulls11() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val + i, vector1.get(i)); + assertEquals(val + i, vector1.get(i), "unexpected value at index: " + i); } } @@ -868,7 +849,7 @@ public void testCopyFromWithNulls11() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val + i, vector2.get(i)); + assertEquals(val + i, vector2.get(i), "unexpected value at index: " + i); } } } @@ -906,7 +887,7 @@ public void testCopyFromWithNulls12() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val, vector1.get(i)); + assertEquals(val, vector1.get(i), "unexpected value at index: " + i); val++; } } @@ -934,7 +915,7 @@ public void testCopyFromWithNulls12() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val, vector2.get(i)); + assertEquals(val, vector2.get(i), "unexpected value at index: " + i); val++; } } @@ -1039,7 +1020,7 @@ public void testCopyFromWithNulls14() { if ((i & 1) == 0) { assertNull(vector1.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val + (long) i, vector1.get(i)); + assertEquals(val + (long) i, vector1.get(i), "unexpected value at index: " + i); } } @@ -1066,7 +1047,7 @@ public void testCopyFromWithNulls14() { if (((i & 1) == 0) || (i >= initialCapacity)) { assertNull(vector2.getObject(i)); } else { - assertEquals("unexpected value at index: " + i, val + (long) i, vector2.get(i)); + assertEquals(val + (long) i, vector2.get(i), "unexpected value at index: " + i); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java index fc5dfc38587a4..6886abcc63cdf 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimal256Vector.java @@ -17,10 +17,10 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.math.BigDecimal; import java.math.BigInteger; @@ -29,9 +29,9 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestDecimal256Vector { @@ -49,12 +49,12 @@ public class TestDecimal256Vector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -80,7 +80,7 @@ public void testValuesWriteRead() { for (int i = 0; i < intValues.length; i++) { BigDecimal value = decimalVector.getObject(i); - assertEquals("unexpected data at index: " + i, values[i], value); + assertEquals(values[i], value, "unexpected data at index: " + i); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java index 572f13fea1ed1..c7a12fd6ac87c 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDecimalVector.java @@ -17,10 +17,10 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; import java.math.BigDecimal; import java.math.BigInteger; @@ -29,9 +29,9 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestDecimalVector { @@ -49,12 +49,12 @@ public class TestDecimalVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -80,7 +80,7 @@ public void testValuesWriteRead() { for (int i = 0; i < intValues.length; i++) { BigDecimal value = decimalVector.getObject(i); - assertEquals("unexpected data at index: " + i, values[i], value); + assertEquals(values[i], value, "unexpected data at index: " + i); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java index 0621fd4527520..0b74f760d2941 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDenseUnionVector.java @@ -17,10 +17,10 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; import java.util.HashMap; @@ -47,21 +47,21 @@ import org.apache.arrow.vector.util.JsonStringHashMap; import org.apache.arrow.vector.util.Text; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestDenseUnionVector { private static final String EMPTY_SCHEMA_PATH = ""; private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -262,8 +262,8 @@ public void testSplitAndTransfer() throws Exception { /* check the toVector output after doing the splitAndTransfer */ for (int i = 0; i < length; i++) { - assertEquals("Different data at indexes: " + (start + i) + "and " + i, sourceVector.getObject(start + i), - toVector.getObject(i)); + assertEquals(sourceVector.getObject(start + i), toVector.getObject(i), + "Different data at indexes: " + (start + i) + "and " + i); } } } @@ -356,7 +356,8 @@ public void testSplitAndTransferWithMixedVectors() throws Exception { /* check the toVector output after doing the splitAndTransfer */ for (int i = 0; i < length; i++) { - assertEquals("Different values at index: " + i, sourceVector.getObject(start + i), toVector.getObject(i)); + assertEquals(sourceVector.getObject(start + i), toVector.getObject(i), + "Different values at index: " + i); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java index 9ffa79470eeb8..caccc2360e85c 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDictionaryVector.java @@ -19,7 +19,11 @@ import static org.apache.arrow.vector.TestUtils.*; import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; -import static org.junit.Assert.*; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; import java.nio.charset.StandardCharsets; import java.util.Arrays; @@ -49,9 +53,9 @@ import org.apache.arrow.vector.types.pojo.DictionaryEncoding; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.Text; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestDictionaryVector { @@ -63,12 +67,12 @@ public class TestDictionaryVector { byte[][] data = new byte[][] {zero, one, two}; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -896,7 +900,7 @@ public void testNoMemoryLeak() { assertEquals("Dictionary encoding not defined for value:" + new Text(two), e.getMessage()); } } - assertEquals("encode memory leak", 0, allocator.getAllocatedMemory()); + assertEquals(0, allocator.getAllocatedMemory(), "encode memory leak"); // test no memory leak when decode try (final IntVector indices = newVector(IntVector.class, "", Types.MinorType.INT, allocator); @@ -914,7 +918,7 @@ public void testNoMemoryLeak() { assertEquals("Provided dictionary does not contain value for index 3", e.getMessage()); } } - assertEquals("decode memory leak", 0, allocator.getAllocatedMemory()); + assertEquals(0, allocator.getAllocatedMemory(), "decode memory leak"); } @Test @@ -942,7 +946,7 @@ public void testListNoMemoryLeak() { assertEquals("Dictionary encoding not defined for value:20", e.getMessage()); } } - assertEquals("list encode memory leak", 0, allocator.getAllocatedMemory()); + assertEquals(0, allocator.getAllocatedMemory(), "list encode memory leak"); try (final ListVector indices = ListVector.empty("indices", allocator); final ListVector dictionaryVector = ListVector.empty("dict", allocator)) { @@ -966,7 +970,7 @@ public void testListNoMemoryLeak() { assertEquals("Provided dictionary does not contain value for index 3", e.getMessage()); } } - assertEquals("list decode memory leak", 0, allocator.getAllocatedMemory()); + assertEquals(0, allocator.getAllocatedMemory(), "list decode memory leak"); } @Test @@ -1003,7 +1007,7 @@ public void testStructNoMemoryLeak() { assertEquals("Dictionary encoding not defined for value:baz", e.getMessage()); } } - assertEquals("struct encode memory leak", 0, allocator.getAllocatedMemory()); + assertEquals(0, allocator.getAllocatedMemory(), "struct encode memory leak"); try (final StructVector indices = StructVector.empty("indices", allocator); final VarCharVector dictVector1 = new VarCharVector("f0", allocator); @@ -1040,7 +1044,7 @@ public void testStructNoMemoryLeak() { assertEquals("Provided dictionary does not contain value for index 3", e.getMessage()); } } - assertEquals("struct decode memory leak", 0, allocator.getAllocatedMemory()); + assertEquals(0, allocator.getAllocatedMemory(), "struct decode memory leak"); } private void testDictionary(Dictionary dictionary, ToIntBiFunction valGetter) { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestDurationVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestDurationVector.java index c5d4d296cc024..6ed44be849726 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestDurationVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestDurationVector.java @@ -17,9 +17,9 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertSame; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; import java.time.Duration; @@ -28,19 +28,19 @@ import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestDurationVector { RootAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java index b9cd89e4ad731..4b52c7a41ff07 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeBinaryVector.java @@ -17,8 +17,12 @@ package org.apache.arrow.vector; -import static org.junit.Assert.*; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; @@ -26,9 +30,9 @@ import org.apache.arrow.vector.holders.NullableFixedSizeBinaryHolder; import org.apache.arrow.vector.util.ReusableByteArray; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestFixedSizeBinaryVector { private static final int numValues = 123; @@ -85,7 +89,7 @@ private static void failWithException(String message) throws Exception { } - @Before + @BeforeEach public void init() throws Exception { allocator = new DirtyRootAllocator(Integer.MAX_VALUE, (byte) 100); vector = new FixedSizeBinaryVector("fixedSizeBinary", allocator, typeWidth); @@ -128,7 +132,7 @@ public void init() throws Exception { largeNullableHolder.buffer = largeBuf; } - @After + @AfterEach public void terminate() throws Exception { for (int i = 0; i < numValues; i++) { bufs[i].close(); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java index bde6dd491dd71..54ce8e2ae0e7c 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestFixedSizeListVector.java @@ -17,11 +17,12 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.math.BigDecimal; import java.nio.ByteBuffer; @@ -41,21 +42,20 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.Text; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestFixedSizeListVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -76,12 +76,12 @@ public void testIntType() { UnionFixedSizeListReader reader = vector.getReader(); for (int i = 0; i < 10; i++) { reader.setPosition(i); - Assert.assertTrue(reader.isSet()); - Assert.assertTrue(reader.next()); + assertTrue(reader.isSet()); + assertTrue(reader.next()); assertEquals(i, reader.reader().readInteger().intValue()); - Assert.assertTrue(reader.next()); + assertTrue(reader.next()); assertEquals(i + 10, reader.reader().readInteger().intValue()); - Assert.assertFalse(reader.next()); + assertFalse(reader.next()); assertEquals(Arrays.asList(i, i + 10), reader.readObject()); } } @@ -107,16 +107,16 @@ public void testFloatTypeNullable() { for (int i = 0; i < 10; i++) { reader.setPosition(i); if (i % 2 == 0) { - Assert.assertTrue(reader.isSet()); - Assert.assertTrue(reader.next()); + assertTrue(reader.isSet()); + assertTrue(reader.next()); assertEquals(i + 0.1f, reader.reader().readFloat(), 0.00001); - Assert.assertTrue(reader.next()); + assertTrue(reader.next()); assertEquals(i + 10.1f, reader.reader().readFloat(), 0.00001); - Assert.assertFalse(reader.next()); + assertFalse(reader.next()); assertEquals(Arrays.asList(i + 0.1f, i + 10.1f), reader.readObject()); } else { - Assert.assertFalse(reader.isSet()); - Assert.assertNull(reader.readObject()); + assertFalse(reader.isSet()); + assertNull(reader.readObject()); } } } @@ -149,18 +149,18 @@ public void testNestedInList() { reader.setPosition(i); if (i % 2 == 0) { for (int j = 0; j < i % 7; j++) { - Assert.assertTrue(reader.next()); + assertTrue(reader.next()); FieldReader innerListReader = reader.reader(); for (int k = 0; k < 2; k++) { - Assert.assertTrue(innerListReader.next()); + assertTrue(innerListReader.next()); assertEquals(k + j, innerListReader.reader().readInteger().intValue()); } - Assert.assertFalse(innerListReader.next()); + assertFalse(innerListReader.next()); } - Assert.assertFalse(reader.next()); + assertFalse(reader.next()); } else { - Assert.assertFalse(reader.isSet()); - Assert.assertNull(reader.readObject()); + assertFalse(reader.isSet()); + assertNull(reader.readObject()); } } } @@ -196,40 +196,40 @@ public void testTransferPair() { UnionFixedSizeListReader reader = to.getReader(); reader.setPosition(0); - Assert.assertFalse(reader.isSet()); - Assert.assertNull(reader.readObject()); + assertFalse(reader.isSet()); + assertNull(reader.readObject()); reader.setPosition(1); - Assert.assertTrue(reader.isSet()); - Assert.assertTrue(reader.next()); + assertTrue(reader.isSet()); + assertTrue(reader.next()); assertEquals(0.1f, reader.reader().readFloat(), 0.00001); - Assert.assertTrue(reader.next()); + assertTrue(reader.next()); assertEquals(10.1f, reader.reader().readFloat(), 0.00001); - Assert.assertFalse(reader.next()); + assertFalse(reader.next()); assertEquals(Arrays.asList(0.1f, 10.1f), reader.readObject()); reader.setPosition(2); - Assert.assertTrue(reader.isSet()); - Assert.assertTrue(reader.next()); + assertTrue(reader.isSet()); + assertTrue(reader.next()); assertEquals(2.1f, reader.reader().readFloat(), 0.00001); - Assert.assertTrue(reader.next()); + assertTrue(reader.next()); assertEquals(12.1f, reader.reader().readFloat(), 0.00001); - Assert.assertFalse(reader.next()); + assertFalse(reader.next()); assertEquals(Arrays.asList(2.1f, 12.1f), reader.readObject()); reader.setPosition(3); - Assert.assertTrue(reader.isSet()); - Assert.assertTrue(reader.next()); + assertTrue(reader.isSet()); + assertTrue(reader.next()); assertEquals(4.1f, reader.reader().readFloat(), 0.00001); - Assert.assertTrue(reader.next()); + assertTrue(reader.next()); assertEquals(14.1f, reader.reader().readFloat(), 0.00001); - Assert.assertFalse(reader.next()); + assertFalse(reader.next()); assertEquals(Arrays.asList(4.1f, 14.1f), reader.readObject()); for (int i = 4; i < 10; i++) { reader.setPosition(i); - Assert.assertFalse(reader.isSet()); - Assert.assertNull(reader.readObject()); + assertFalse(reader.isSet()); + assertNull(reader.readObject()); } } } @@ -238,11 +238,11 @@ public void testTransferPair() { public void testConsistentChildName() throws Exception { try (FixedSizeListVector listVector = FixedSizeListVector.empty("sourceVector", /*size=*/2, allocator)) { String emptyListStr = listVector.getField().toString(); - Assert.assertTrue(emptyListStr.contains(ListVector.DATA_VECTOR_NAME)); + assertTrue(emptyListStr.contains(ListVector.DATA_VECTOR_NAME)); listVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); String emptyVectorStr = listVector.getField().toString(); - Assert.assertTrue(emptyVectorStr.contains(ListVector.DATA_VECTOR_NAME)); + assertTrue(emptyVectorStr.contains(ListVector.DATA_VECTOR_NAME)); } } @@ -354,27 +354,29 @@ public void testDecimalIndexCheck() throws Exception { } - @Test(expected = IllegalStateException.class) + @Test public void testWriteIllegalData() throws Exception { - try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", /*size=*/3, allocator)) { + assertThrows(IllegalStateException.class, () -> { + try (final FixedSizeListVector vector1 = FixedSizeListVector.empty("vector", /*size=*/3, allocator)) { - UnionFixedSizeListWriter writer1 = vector1.getWriter(); - writer1.allocate(); + UnionFixedSizeListWriter writer1 = vector1.getWriter(); + writer1.allocate(); - int[] values1 = new int[] {1, 2, 3}; - int[] values2 = new int[] {4, 5, 6, 7, 8}; + int[] values1 = new int[]{1, 2, 3}; + int[] values2 = new int[]{4, 5, 6, 7, 8}; - //set some values - writeListVector(vector1, writer1, values1); - writeListVector(vector1, writer1, values2); - writer1.setValueCount(3); + //set some values + writeListVector(vector1, writer1, values1); + writeListVector(vector1, writer1, values2); + writer1.setValueCount(3); - assertEquals(3, vector1.getValueCount()); - int[] realValue1 = convertListToIntArray(vector1.getObject(0)); - assertTrue(Arrays.equals(values1, realValue1)); - int[] realValue2 = convertListToIntArray(vector1.getObject(1)); - assertTrue(Arrays.equals(values2, realValue2)); - } + assertEquals(3, vector1.getValueCount()); + int[] realValue1 = convertListToIntArray(vector1.getObject(0)); + assertTrue(Arrays.equals(values1, realValue1)); + int[] realValue2 = convertListToIntArray(vector1.getObject(1)); + assertTrue(Arrays.equals(values2, realValue2)); + } + }); } @Test diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalMonthDayNanoVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalMonthDayNanoVector.java index 681897b93c12c..82bf1dd423b5e 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalMonthDayNanoVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalMonthDayNanoVector.java @@ -17,8 +17,7 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; - +import static org.junit.jupiter.api.Assertions.assertEquals; import java.time.Duration; import java.time.Period; @@ -29,20 +28,20 @@ import org.apache.arrow.vector.types.IntervalUnit; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.ArrowType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestIntervalMonthDayNanoVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalYearVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalYearVector.java index 4b2ae2eb3d49b..6cb72f38307df 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalYearVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestIntervalYearVector.java @@ -17,28 +17,28 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertSame; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertSame; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.types.IntervalUnit; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestIntervalYearVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java index ffd87c99d508d..d4bb3d4c97bcf 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java @@ -17,11 +17,11 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; import java.util.Arrays; @@ -39,21 +39,20 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestLargeListVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -91,11 +90,11 @@ public void testCopyFrom() throws Exception { // assert the output vector is correct FieldReader reader = outVector.getReader(); - Assert.assertTrue("shouldn't be null", reader.isSet()); + assertTrue(reader.isSet(), "shouldn't be null"); reader.setPosition(1); - Assert.assertFalse("should be null", reader.isSet()); + assertFalse(reader.isSet(), "should be null"); reader.setPosition(2); - Assert.assertTrue("shouldn't be null", reader.isSet()); + assertTrue(reader.isSet(), "shouldn't be null"); /* index 0 */ @@ -433,15 +432,15 @@ public void testSplitAndTransfer() throws Exception { dataLength2 = (int) toOffsetBuffer.getLong((i + 1) * LargeListVector.OFFSET_WIDTH) - (int) toOffsetBuffer.getLong(i * LargeListVector.OFFSET_WIDTH); - assertEquals("Different data lengths at index: " + i + " and start: " + start, - dataLength1, dataLength2); + assertEquals(dataLength1, dataLength2, + "Different data lengths at index: " + i + " and start: " + start); offset1 = (int) offsetBuffer.getLong((start + i) * LargeListVector.OFFSET_WIDTH); offset2 = (int) toOffsetBuffer.getLong(i * LargeListVector.OFFSET_WIDTH); for (int j = 0; j < dataLength1; j++) { - assertEquals("Different data at indexes: " + offset1 + " and " + offset2, - dataVector.getObject(offset1), dataVector1.getObject(offset2)); + assertEquals(dataVector.getObject(offset1), dataVector1.getObject(offset2), + "Different data at indexes: " + offset1 + " and " + offset2); offset1++; offset2++; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java index 36607903b01a2..3a51cca51706c 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarBinaryVector.java @@ -17,10 +17,10 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.StandardCharsets; import java.util.Arrays; @@ -32,20 +32,20 @@ import org.apache.arrow.vector.holders.NullableLargeVarBinaryHolder; import org.apache.arrow.vector.util.ReusableByteArray; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestLargeVarBinaryVector { private BufferAllocator allocator; - @Before + @BeforeEach public void prepare() { allocator = new RootAllocator(Integer.MAX_VALUE); } - @After + @AfterEach public void shutdown() { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java index 06b27a9eba156..aa9c7fed38a6b 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestLargeVarCharVector.java @@ -17,12 +17,14 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; import java.nio.charset.StandardCharsets; import java.util.ArrayList; @@ -41,11 +43,9 @@ import org.apache.arrow.vector.util.OversizedAllocationException; import org.apache.arrow.vector.util.Text; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; -import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestLargeVarCharVector { @@ -58,12 +58,12 @@ public class TestLargeVarCharVector { private BufferAllocator allocator; - @Before + @BeforeEach public void prepare() { allocator = new RootAllocator(Integer.MAX_VALUE); } - @After + @AfterEach public void shutdown() { allocator.close(); } @@ -162,7 +162,7 @@ public void testInvalidStartIndex() { final TransferPair tp = largeVarCharVector.makeTransferPair(newLargeVarCharVector); - IllegalArgumentException e = Assertions.assertThrows( + IllegalArgumentException e = assertThrows( IllegalArgumentException.class, () -> tp.splitAndTransfer(valueCount, 10)); @@ -181,7 +181,7 @@ public void testInvalidLength() { final TransferPair tp = largeVarCharVector.makeTransferPair(newLargeVarCharVector); - IllegalArgumentException e = Assertions.assertThrows( + IllegalArgumentException e = assertThrows( IllegalArgumentException.class, () -> tp.splitAndTransfer(0, valueCount * 2)); @@ -298,39 +298,43 @@ public void testSetLastSetUsage() { } } - @Test(expected = OutOfMemoryException.class) + @Test public void testVectorAllocateNew() { - try (RootAllocator smallAllocator = new RootAllocator(200); - LargeVarCharVector vector = new LargeVarCharVector("vec", smallAllocator)) { - vector.allocateNew(); - } + assertThrows(OutOfMemoryException.class, () -> { + try (RootAllocator smallAllocator = new RootAllocator(200); + LargeVarCharVector vector = new LargeVarCharVector("vec", smallAllocator)) { + vector.allocateNew(); + } + }); } - @Test(expected = OversizedAllocationException.class) + @Test public void testLargeVariableVectorReallocation() { - final LargeVarCharVector vector = new LargeVarCharVector("vector", allocator); - // edge case 1: value count = MAX_VALUE_ALLOCATION - final long expectedAllocationInBytes = BaseValueVector.MAX_ALLOCATION_SIZE; - final int expectedOffsetSize = 10; - try { - vector.allocateNew(expectedAllocationInBytes, 10); - assertTrue(expectedOffsetSize <= vector.getValueCapacity()); - assertTrue(expectedAllocationInBytes <= vector.getDataBuffer().capacity()); - vector.reAlloc(); - assertTrue(expectedOffsetSize * 2 <= vector.getValueCapacity()); - assertTrue(expectedAllocationInBytes * 2 <= vector.getDataBuffer().capacity()); - } finally { - vector.close(); - } + assertThrows(OversizedAllocationException.class, () -> { + final LargeVarCharVector vector = new LargeVarCharVector("vector", allocator); + // edge case 1: value count = MAX_VALUE_ALLOCATION + final long expectedAllocationInBytes = BaseValueVector.MAX_ALLOCATION_SIZE; + final int expectedOffsetSize = 10; + try { + vector.allocateNew(expectedAllocationInBytes, 10); + assertTrue(expectedOffsetSize <= vector.getValueCapacity()); + assertTrue(expectedAllocationInBytes <= vector.getDataBuffer().capacity()); + vector.reAlloc(); + assertTrue(expectedOffsetSize * 2 <= vector.getValueCapacity()); + assertTrue(expectedAllocationInBytes * 2 <= vector.getDataBuffer().capacity()); + } finally { + vector.close(); + } - // common: value count < MAX_VALUE_ALLOCATION - try { - vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 2, 0); - vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION - vector.reAlloc(); // this tests if it overflows - } finally { - vector.close(); - } + // common: value count < MAX_VALUE_ALLOCATION + try { + vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 2, 0); + vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION + vector.reAlloc(); // this tests if it overflows + } finally { + vector.close(); + } + }); } @Test @@ -784,7 +788,7 @@ public void testNullableType() { try { vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8)); - Assert.fail("Expected out of bounds exception"); + fail("Expected out of bounds exception"); } catch (Exception e) { // ok } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java index 97f2d9fd6def1..cbcb6cf9d7963 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListVector.java @@ -17,12 +17,12 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; import java.util.Arrays; @@ -44,21 +44,20 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestListVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -96,11 +95,11 @@ public void testCopyFrom() throws Exception { // assert the output vector is correct FieldReader reader = outVector.getReader(); - Assert.assertTrue("shouldn't be null", reader.isSet()); + assertTrue(reader.isSet(), "shouldn't be null"); reader.setPosition(1); - Assert.assertFalse("should be null", reader.isSet()); + assertFalse(reader.isSet(), "should be null"); reader.setPosition(2); - Assert.assertTrue("shouldn't be null", reader.isSet()); + assertTrue(reader.isSet(), "shouldn't be null"); /* index 0 */ @@ -439,15 +438,15 @@ public void testSplitAndTransfer() throws Exception { dataLength2 = toOffsetBuffer.getInt((i + 1) * ListVector.OFFSET_WIDTH) - toOffsetBuffer.getInt(i * ListVector.OFFSET_WIDTH); - assertEquals("Different data lengths at index: " + i + " and start: " + start, - dataLength1, dataLength2); + assertEquals(dataLength1, dataLength2, + "Different data lengths at index: " + i + " and start: " + start); offset1 = offsetBuffer.getInt((start + i) * ListVector.OFFSET_WIDTH); offset2 = toOffsetBuffer.getInt(i * ListVector.OFFSET_WIDTH); for (int j = 0; j < dataLength1; j++) { - assertEquals("Different data at indexes: " + offset1 + " and " + offset2, - dataVector.getObject(offset1), dataVector1.getObject(offset2)); + assertEquals(dataVector.getObject(offset1), dataVector1.getObject(offset2), + "Different data at indexes: " + offset1 + " and " + offset2); offset1++; offset2++; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestNullCheckingForGet.java b/java/vector/src/test/java/org/apache/arrow/vector/TestNullCheckingForGet.java index f1345e88ab8b9..51ad470bb6417 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestNullCheckingForGet.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestNullCheckingForGet.java @@ -17,11 +17,13 @@ package org.apache.arrow.vector; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + import java.lang.reflect.Field; import java.net.URLClassLoader; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; /** * Test cases for {@link NullCheckingForGet}. @@ -63,7 +65,7 @@ public void testDefaultValue() throws Exception { ClassLoader classLoader = copyClassLoader(); if (classLoader != null) { boolean nullCheckingEnabled = getFlagValue(classLoader); - Assert.assertTrue(nullCheckingEnabled); + assertTrue(nullCheckingEnabled); } } @@ -79,7 +81,7 @@ public void testEnableSysProperty() throws Exception { ClassLoader classLoader = copyClassLoader(); if (classLoader != null) { boolean nullCheckingEnabled = getFlagValue(classLoader); - Assert.assertFalse(nullCheckingEnabled); + assertFalse(nullCheckingEnabled); } // restore system property diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestOutOfMemoryForValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestOutOfMemoryForValueVector.java index 7f26b5c1b79f6..200786f54a92d 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestOutOfMemoryForValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestOutOfMemoryForValueVector.java @@ -17,12 +17,14 @@ package org.apache.arrow.vector; +import static org.junit.jupiter.api.Assertions.assertThrows; + import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OutOfMemoryException; import org.apache.arrow.memory.RootAllocator; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; /** * This class tests cases where we expect to receive {@link OutOfMemoryException}. @@ -33,40 +35,48 @@ public class TestOutOfMemoryForValueVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(200); // Start with low memory limit } - @Test(expected = OutOfMemoryException.class) + @Test public void variableWidthVectorAllocateNew() { - try (VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(); - } + assertThrows(OutOfMemoryException.class, () -> { + try (VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) { + vector.allocateNew(); + } + }); } - @Test(expected = OutOfMemoryException.class) + @Test public void variableWidthVectorAllocateNewCustom() { - try (VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(2342, 234); - } + assertThrows(OutOfMemoryException.class, () -> { + try (VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator)) { + vector.allocateNew(2342, 234); + } + }); } - @Test(expected = OutOfMemoryException.class) + @Test public void fixedWidthVectorAllocateNew() { - try (IntVector vector = new IntVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(); - } + assertThrows(OutOfMemoryException.class, () -> { + try (IntVector vector = new IntVector(EMPTY_SCHEMA_PATH, allocator)) { + vector.allocateNew(); + } + }); } - @Test(expected = OutOfMemoryException.class) + @Test public void fixedWidthVectorAllocateNewCustom() { - try (IntVector vector = new IntVector(EMPTY_SCHEMA_PATH, allocator)) { - vector.allocateNew(2342); - } + assertThrows(OutOfMemoryException.class, () -> { + try (IntVector vector = new IntVector(EMPTY_SCHEMA_PATH, allocator)) { + vector.allocateNew(2342); + } + }); } - @After + @AfterEach public void terminate() { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestOversizedAllocationForValueVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestOversizedAllocationForValueVector.java index 23414e9f5df1c..f89828e4ceeb2 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestOversizedAllocationForValueVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestOversizedAllocationForValueVector.java @@ -18,15 +18,16 @@ package org.apache.arrow.vector; import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.util.OversizedAllocationException; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; /** * This class tests that OversizedAllocationException occurs when a large memory is allocated for a vector. @@ -39,94 +40,100 @@ public class TestOversizedAllocationForValueVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } - @Test(expected = OversizedAllocationException.class) + @Test public void testFixedVectorReallocation() { - final UInt4Vector vector = new UInt4Vector(EMPTY_SCHEMA_PATH, allocator); - // edge case 1: buffer size = max value capacity - final int expectedValueCapacity = checkedCastToInt(BaseValueVector.MAX_ALLOCATION_SIZE / 4); - try { - vector.allocateNew(expectedValueCapacity); - assertEquals(expectedValueCapacity, vector.getValueCapacity()); - vector.reAlloc(); - assertEquals(expectedValueCapacity * 2, vector.getValueCapacity()); - } finally { - vector.close(); - } + assertThrows(OversizedAllocationException.class, () -> { + final UInt4Vector vector = new UInt4Vector(EMPTY_SCHEMA_PATH, allocator); + // edge case 1: buffer size = max value capacity + final int expectedValueCapacity = checkedCastToInt(BaseValueVector.MAX_ALLOCATION_SIZE / 4); + try { + vector.allocateNew(expectedValueCapacity); + assertEquals(expectedValueCapacity, vector.getValueCapacity()); + vector.reAlloc(); + assertEquals(expectedValueCapacity * 2, vector.getValueCapacity()); + } finally { + vector.close(); + } - // common case: value count < max value capacity - try { - vector.allocateNew(checkedCastToInt(BaseValueVector.MAX_ALLOCATION_SIZE / 8)); - vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION - vector.reAlloc(); // this should throw an IOOB - } finally { - vector.close(); - } + // common case: value count < max value capacity + try { + vector.allocateNew(checkedCastToInt(BaseValueVector.MAX_ALLOCATION_SIZE / 8)); + vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION + vector.reAlloc(); // this should throw an IOOB + } finally { + vector.close(); + } + }); } - @Test(expected = OversizedAllocationException.class) + @Test public void testBitVectorReallocation() { - final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator); - // edge case 1: buffer size ~ max value capacity - final int expectedValueCapacity = 1 << 29; - try { - vector.allocateNew(expectedValueCapacity); - assertEquals(expectedValueCapacity, vector.getValueCapacity()); - vector.reAlloc(); - assertEquals(expectedValueCapacity * 2, vector.getValueCapacity()); - } finally { - vector.close(); - } + assertThrows(OversizedAllocationException.class, () -> { + final BitVector vector = new BitVector(EMPTY_SCHEMA_PATH, allocator); + // edge case 1: buffer size ~ max value capacity + final int expectedValueCapacity = 1 << 29; + try { + vector.allocateNew(expectedValueCapacity); + assertEquals(expectedValueCapacity, vector.getValueCapacity()); + vector.reAlloc(); + assertEquals(expectedValueCapacity * 2, vector.getValueCapacity()); + } finally { + vector.close(); + } - // common: value count < MAX_VALUE_ALLOCATION - try { - vector.allocateNew(expectedValueCapacity); - for (int i = 0; i < 3; i++) { - vector.reAlloc(); // expand buffer size + // common: value count < MAX_VALUE_ALLOCATION + try { + vector.allocateNew(expectedValueCapacity); + for (int i = 0; i < 3; i++) { + vector.reAlloc(); // expand buffer size + } + assertEquals(Integer.MAX_VALUE, vector.getValueCapacity()); + vector.reAlloc(); // buffer size ~ max allocation + assertEquals(Integer.MAX_VALUE, vector.getValueCapacity()); + vector.reAlloc(); // overflow + } finally { + vector.close(); } - assertEquals(Integer.MAX_VALUE, vector.getValueCapacity()); - vector.reAlloc(); // buffer size ~ max allocation - assertEquals(Integer.MAX_VALUE, vector.getValueCapacity()); - vector.reAlloc(); // overflow - } finally { - vector.close(); - } + }); } - @Test(expected = OversizedAllocationException.class) + @Test public void testVariableVectorReallocation() { - final VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator); - // edge case 1: value count = MAX_VALUE_ALLOCATION - final long expectedAllocationInBytes = BaseValueVector.MAX_ALLOCATION_SIZE; - final int expectedOffsetSize = 10; - try { - vector.allocateNew(expectedAllocationInBytes, 10); - assertTrue(expectedOffsetSize <= vector.getValueCapacity()); - assertTrue(expectedAllocationInBytes <= vector.getDataBuffer().capacity()); - vector.reAlloc(); - assertTrue(expectedOffsetSize * 2 <= vector.getValueCapacity()); - assertTrue(expectedAllocationInBytes * 2 <= vector.getDataBuffer().capacity()); - } finally { - vector.close(); - } + assertThrows(OversizedAllocationException.class, () -> { + final VarCharVector vector = new VarCharVector(EMPTY_SCHEMA_PATH, allocator); + // edge case 1: value count = MAX_VALUE_ALLOCATION + final long expectedAllocationInBytes = BaseValueVector.MAX_ALLOCATION_SIZE; + final int expectedOffsetSize = 10; + try { + vector.allocateNew(expectedAllocationInBytes, 10); + assertTrue(expectedOffsetSize <= vector.getValueCapacity()); + assertTrue(expectedAllocationInBytes <= vector.getDataBuffer().capacity()); + vector.reAlloc(); + assertTrue(expectedOffsetSize * 2 <= vector.getValueCapacity()); + assertTrue(expectedAllocationInBytes * 2 <= vector.getDataBuffer().capacity()); + } finally { + vector.close(); + } - // common: value count < MAX_VALUE_ALLOCATION - try { - vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 2, 0); - vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION - vector.reAlloc(); // this tests if it overflows - } finally { - vector.close(); - } + // common: value count < MAX_VALUE_ALLOCATION + try { + vector.allocateNew(BaseValueVector.MAX_ALLOCATION_SIZE / 2, 0); + vector.reAlloc(); // value allocation reaches to MAX_VALUE_ALLOCATION + vector.reAlloc(); // this tests if it overflows + } finally { + vector.close(); + } + }); } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestPeriodDuration.java b/java/vector/src/test/java/org/apache/arrow/vector/TestPeriodDuration.java index 2b9f4cca8c22f..bf4cda6b4271a 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestPeriodDuration.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestPeriodDuration.java @@ -17,8 +17,8 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; import java.time.Duration; import java.time.LocalDate; @@ -26,7 +26,7 @@ import java.time.Period; import java.time.temporal.ChronoUnit; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestPeriodDuration { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java index 68f5e14dabb9b..ccb2890863314 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestStructVector.java @@ -17,7 +17,11 @@ package org.apache.arrow.vector; -import static org.junit.Assert.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; import java.util.ArrayList; import java.util.HashMap; @@ -39,21 +43,20 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestStructVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -64,7 +67,7 @@ public void testFieldMetadata() throws Exception { metadata.put("k1", "v1"); FieldType type = new FieldType(true, Struct.INSTANCE, null, metadata); try (StructVector vector = new StructVector("struct", allocator, type, null)) { - Assert.assertEquals(vector.getField().getMetadata(), type.getMetadata()); + assertEquals(vector.getField().getMetadata(), type.getMetadata()); } } @@ -108,8 +111,8 @@ public void testAllocateAfterReAlloc() throws Exception { /* * Verify that the buffer sizes haven't changed. */ - Assert.assertEquals(vector.getValidityBuffer().capacity(), savedValidityBufferCapacity); - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValidityBuffer().capacity(), savedValidityBufferCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java index 1b0387feb73ff..10298112ddc98 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestUnionVector.java @@ -17,11 +17,11 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; import java.util.HashMap; @@ -44,21 +44,21 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestUnionVector { private static final String EMPTY_SCHEMA_PATH = ""; private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -283,8 +283,8 @@ public void testSplitAndTransfer() throws Exception { /* check the toVector output after doing the splitAndTransfer */ for (int i = 0; i < length; i++) { - assertEquals("Different data at indexes: " + (start + i) + "and " + i, sourceVector.getObject(start + i), - toVector.getObject(i)); + assertEquals(sourceVector.getObject(start + i), toVector.getObject(i), + "Different data at indexes: " + (start + i) + "and " + i); } } } @@ -373,7 +373,8 @@ public void testSplitAndTransferWithMixedVectors() throws Exception { /* check the toVector output after doing the splitAndTransfer */ for (int i = 0; i < length; i++) { - assertEquals("Different values at index: " + i, sourceVector.getObject(start + i), toVector.getObject(i)); + assertEquals(sourceVector.getObject(start + i), toVector.getObject(i), + "Different values at index: " + i); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java index bfe489fa5af4e..6d4e64837adbc 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharListVector.java @@ -17,6 +17,8 @@ package org.apache.arrow.vector; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.nio.charset.StandardCharsets; import org.apache.arrow.memory.ArrowBuf; @@ -25,21 +27,20 @@ import org.apache.arrow.vector.complex.impl.UnionListWriter; import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestVarCharListVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -72,8 +73,8 @@ public void testVarCharListWithNulls() { writer.setValueCount(2); - Assert.assertEquals(2, vector.getValueCount()); - Assert.assertEquals(2, vector.getDataVector().getValueCount()); + assertEquals(2, vector.getValueCount()); + assertEquals(2, vector.getDataVector().getValueCount()); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java index b96f6ab6afedd..02a85faa20cd6 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorAlloc.java @@ -17,8 +17,8 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.Arrays; import java.util.Collections; @@ -39,23 +39,23 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestVectorAlloc { private BufferAllocator rootAllocator; private BufferAllocator policyAllocator; - @Before + @BeforeEach public void init() { rootAllocator = new RootAllocator(Long.MAX_VALUE); policyAllocator = new RootAllocator(AllocationListener.NOOP, Integer.MAX_VALUE, new CustomPolicy()); } - @After + @AfterEach public void terminate() throws Exception { rootAllocator.close(); policyAllocator.close(); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java index 9043bd4f8f2d4..21cbefae45161 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReAlloc.java @@ -17,7 +17,10 @@ package org.apache.arrow.vector; -import static org.junit.Assert.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; import java.nio.charset.StandardCharsets; @@ -37,22 +40,21 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.DataSizeRoundingUtil; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestVectorReAlloc { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -68,7 +70,7 @@ public void testFixedType() { try { vector.set(initialCapacity, 0); - Assert.fail("Expected out of bounds exception"); + fail("Expected out of bounds exception"); } catch (Exception e) { // ok } @@ -92,7 +94,7 @@ public void testNullableType() { try { vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8)); - Assert.fail("Expected out of bounds exception"); + fail("Expected out of bounds exception"); } catch (Exception e) { // ok } @@ -101,7 +103,7 @@ public void testNullableType() { assertTrue(vector.getValueCapacity() >= 2 * initialCapacity); vector.set(initialCapacity, "foo".getBytes(StandardCharsets.UTF_8)); - assertEquals("foo", new String(vector.get(initialCapacity), StandardCharsets.UTF_8)); + assertEquals(new String(vector.get(initialCapacity), StandardCharsets.UTF_8), "foo"); } } @@ -117,7 +119,7 @@ public void testListType() { try { vector.getInnerValueCountAt(2014); - Assert.fail("Expected out of bounds exception"); + fail("Expected out of bounds exception"); } catch (Exception e) { // ok } @@ -140,7 +142,7 @@ public void testStructType() { try { vector.getObject(513); - Assert.fail("Expected out of bounds exception"); + fail("Expected out of bounds exception"); } catch (Exception e) { // ok } @@ -161,7 +163,7 @@ public void testVariableWidthTypeSetNullValues() { for (int i = 0; i < numNullValues1; i++) { v1.setNull(i); } - Assert.assertTrue(v1.getBufferSizeFor(numNullValues1) > 0); + assertTrue(v1.getBufferSizeFor(numNullValues1) > 0); } try (final BaseLargeVariableWidthVector v2 = new LargeVarCharVector("var2", allocator)) { @@ -171,7 +173,7 @@ public void testVariableWidthTypeSetNullValues() { for (int i = 0; i < numNullValues2; i++) { v2.setNull(i); } - Assert.assertTrue(v2.getBufferSizeFor(numNullValues2) > 0); + assertTrue(v2.getBufferSizeFor(numNullValues2) > 0); } } @@ -194,7 +196,7 @@ public void testFixedAllocateAfterReAlloc() throws Exception { /* * Verify that the buffer sizes haven't changed. */ - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } @@ -218,8 +220,8 @@ public void testVariableAllocateAfterReAlloc() throws Exception { /* * Verify that the buffer sizes haven't changed. */ - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); - Assert.assertEquals(vector.valueBuffer.capacity(), savedValueBufferSize); + assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.valueBuffer.capacity(), savedValueBufferSize); } } @@ -243,8 +245,8 @@ public void testLargeVariableAllocateAfterReAlloc() throws Exception { /* * Verify that the buffer sizes haven't changed. */ - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); - Assert.assertEquals(vector.valueBuffer.capacity(), savedValueBufferSize); + assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.valueBuffer.capacity(), savedValueBufferSize); } } @@ -256,8 +258,8 @@ public void testVarCharAllocateNew() throws Exception { vector.allocateNew(count); // verify that the validity buffer and value buffer have capacity for at least 'count' elements. - Assert.assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); - Assert.assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseVariableWidthVector.OFFSET_WIDTH); + assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); + assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseVariableWidthVector.OFFSET_WIDTH); } } @@ -269,8 +271,8 @@ public void testLargeVarCharAllocateNew() throws Exception { vector.allocateNew(count); // verify that the validity buffer and value buffer have capacity for at least 'count' elements. - Assert.assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); - Assert.assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH); + assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); + assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH); } } @@ -282,8 +284,8 @@ public void testVarCharAllocateNewUsingHelper() throws Exception { AllocationHelper.allocateNew(vector, count); // verify that the validity buffer and value buffer have capacity for at least 'count' elements. - Assert.assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); - Assert.assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseVariableWidthVector.OFFSET_WIDTH); + assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); + assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseVariableWidthVector.OFFSET_WIDTH); } } @@ -295,8 +297,8 @@ public void testLargeVarCharAllocateNewUsingHelper() throws Exception { AllocationHelper.allocateNew(vector, count); // verify that the validity buffer and value buffer have capacity for at least 'count' elements. - Assert.assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); - Assert.assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH); + assertTrue(vector.getValidityBuffer().capacity() >= DataSizeRoundingUtil.divideBy8Ceil(count)); + assertTrue(vector.getOffsetBuffer().capacity() >= (count + 1) * BaseLargeVariableWidthVector.OFFSET_WIDTH); } } @@ -314,7 +316,7 @@ public void testFixedRepeatedClearAndSet() throws Exception { } // should be deterministic, and not cause a run-away increase in capacity. - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } @@ -333,7 +335,7 @@ public void testVariableRepeatedClearAndSet() throws Exception { } // should be deterministic, and not cause a run-away increase in capacity. - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } @@ -359,7 +361,7 @@ public void testRepeatedValueVectorClearAndSet() throws Exception { } // should be deterministic, and not cause a run-away increase in capacity. - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } @@ -386,7 +388,7 @@ public void testStructVectorClearAndSet() throws Exception { } // should be deterministic, and not cause a run-away increase in capacity. - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } @@ -415,7 +417,7 @@ public void testFixedSizeListVectorClearAndSet() { } // should be deterministic, and not cause a run-away increase in capacity. - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } @@ -440,7 +442,7 @@ public void testUnionVectorClearAndSet() { } // should be deterministic, and not cause a run-away increase in capacity. - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } @@ -468,7 +470,7 @@ public void testDenseUnionVectorClearAndSet() { } // should be deterministic, and not cause a run-away increase in capacity. - Assert.assertEquals(vector.getValueCapacity(), savedValueCapacity); + assertEquals(vector.getValueCapacity(), savedValueCapacity); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java index 19700e02161c7..2a6f86426ae8a 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java @@ -17,8 +17,8 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.StandardCharsets; @@ -34,20 +34,20 @@ import org.apache.arrow.vector.types.pojo.ArrowType.FixedSizeList; import org.apache.arrow.vector.types.pojo.ArrowType.Int; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestVectorReset { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java index 207962eb45b85..76500052fa632 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorSchemaRoot.java @@ -17,10 +17,11 @@ package org.apache.arrow.vector; -import static junit.framework.TestCase.assertTrue; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; import java.util.Arrays; @@ -35,20 +36,20 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestVectorSchemaRoot { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() { allocator.close(); } @@ -226,20 +227,22 @@ public void testSlice() { } } - @Test(expected = IllegalArgumentException.class) + @Test public void testSliceWithInvalidParam() { - try (final IntVector intVector = new IntVector("intVector", allocator); - final Float4Vector float4Vector = new Float4Vector("float4Vector", allocator)) { - intVector.setValueCount(10); - float4Vector.setValueCount(10); - for (int i = 0; i < 10; i++) { - intVector.setSafe(i, i); - float4Vector.setSafe(i, i + 0.1f); - } - final VectorSchemaRoot original = new VectorSchemaRoot(Arrays.asList(intVector, float4Vector)); + assertThrows(IllegalArgumentException.class, () -> { + try (final IntVector intVector = new IntVector("intVector", allocator); + final Float4Vector float4Vector = new Float4Vector("float4Vector", allocator)) { + intVector.setValueCount(10); + float4Vector.setValueCount(10); + for (int i = 0; i < 10; i++) { + intVector.setSafe(i, i); + float4Vector.setSafe(i, i + 0.1f); + } + final VectorSchemaRoot original = new VectorSchemaRoot(Arrays.asList(intVector, float4Vector)); - original.slice(0, 20); - } + original.slice(0, 20); + } + }); } @Test diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java index eac72f4b2c893..82ae5c038cbc2 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorUnloadLoad.java @@ -18,9 +18,9 @@ package org.apache.arrow.vector; import static java.util.Arrays.asList; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; import java.util.ArrayList; @@ -44,21 +44,20 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestVectorUnloadLoad { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -116,9 +115,9 @@ public void testUnloadLoad() throws IOException { FieldReader bigIntReader = newRoot.getVector("bigInt").getReader(); for (int i = 0; i < count; i++) { intReader.setPosition(i); - Assert.assertEquals(i, intReader.readInteger().intValue()); + assertEquals(i, intReader.readInteger().intValue()); bigIntReader.setPosition(i); - Assert.assertEquals(i, bigIntReader.readLong().longValue()); + assertEquals(i, bigIntReader.readLong().longValue()); } } } @@ -188,7 +187,7 @@ public void testUnloadLoadAddPadding() throws IOException { for (int j = 0; j < i % 4 + 1; j++) { expected.add(i); } - Assert.assertEquals(expected, reader.readObject()); + assertEquals(expected, reader.readObject()); } } @@ -256,9 +255,9 @@ public void testLoadValidityBuffer() throws IOException { IntVector intDefinedVector = (IntVector) newRoot.getVector("intDefined"); IntVector intNullVector = (IntVector) newRoot.getVector("intNull"); for (int i = 0; i < count; i++) { - assertFalse("#" + i, intDefinedVector.isNull(i)); - assertEquals("#" + i, i, intDefinedVector.get(i)); - assertTrue("#" + i, intNullVector.isNull(i)); + assertFalse(intDefinedVector.isNull(i), "#" + i); + assertEquals(i, intDefinedVector.get(i), "#" + i); + assertTrue(intNullVector.isNull(i), "#" + i); } intDefinedVector.setSafe(count + 10, 1234); assertTrue(intDefinedVector.isNull(count + 1)); @@ -319,13 +318,13 @@ public void testUnloadLoadDuplicates() throws IOException { vectorLoader.load(recordBatch); List targets = newRoot.getFieldVectors(); - Assert.assertEquals(sources.size(), targets.size()); + assertEquals(sources.size(), targets.size()); for (int k = 0; k < sources.size(); k++) { IntVector src = (IntVector) sources.get(k); IntVector tgt = (IntVector) targets.get(k); - Assert.assertEquals(src.getValueCount(), tgt.getValueCount()); + assertEquals(src.getValueCount(), tgt.getValueCount()); for (int i = 0; i < count; i++) { - Assert.assertEquals(src.get(i), tgt.get(i)); + assertEquals(src.get(i), tgt.get(i)); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java index 736b0f1b1aeac..6ff81faba73e8 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestTypeEqualsVisitor.java @@ -17,8 +17,8 @@ package org.apache.arrow.vector.compare; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.StandardCharsets; import java.util.HashMap; @@ -41,20 +41,20 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestTypeEqualsVisitor { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/TestDenseUnionBufferSize.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/TestDenseUnionBufferSize.java index 82ef7a479d05c..0e24fd0af6806 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/TestDenseUnionBufferSize.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/TestDenseUnionBufferSize.java @@ -17,7 +17,8 @@ package org.apache.arrow.vector.complex; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java index 29f25170332a2..67bdb9945fc94 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestComplexCopier.java @@ -17,8 +17,8 @@ package org.apache.arrow.vector.complex.impl; -import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.math.BigDecimal; @@ -39,9 +39,9 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.DecimalUtility; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestComplexCopier { @@ -49,12 +49,12 @@ public class TestComplexCopier { private static final int COUNT = 100; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -90,7 +90,6 @@ public void testCopyFixedSizeListVector() { // validate equals assertTrue(VectorEqualsVisitor.vectorEquals(from, to)); - } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java index b7fc681c16118..3a54d539c290a 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/impl/TestPromotableWriter.java @@ -17,9 +17,9 @@ package org.apache.arrow.vector.complex.impl; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; import java.nio.ByteBuffer; @@ -50,21 +50,21 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.Text; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestPromotableWriter { private static final String EMPTY_SCHEMA_PATH = ""; private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -128,33 +128,33 @@ public void testPromoteToUnion() throws Exception { final UnionVector uv = v.getChild("A", UnionVector.class); - assertFalse("0 shouldn't be null", uv.isNull(0)); + assertFalse(uv.isNull(0), "0 shouldn't be null"); assertEquals(false, uv.getObject(0)); - assertFalse("1 shouldn't be null", uv.isNull(1)); + assertFalse(uv.isNull(1), "1 shouldn't be null"); assertEquals(true, uv.getObject(1)); - assertFalse("2 shouldn't be null", uv.isNull(2)); + assertFalse(uv.isNull(2), "2 shouldn't be null"); assertEquals(10, uv.getObject(2)); - assertNull("3 should be null", uv.getObject(3)); + assertNull(uv.getObject(3), "3 should be null"); - assertFalse("4 shouldn't be null", uv.isNull(4)); + assertFalse(uv.isNull(4), "4 shouldn't be null"); assertEquals(100, uv.getObject(4)); - assertFalse("5 shouldn't be null", uv.isNull(5)); + assertFalse(uv.isNull(5), "5 shouldn't be null"); assertEquals(123123L, uv.getObject(5)); - assertFalse("6 shouldn't be null", uv.isNull(6)); + assertFalse(uv.isNull(6), "6 shouldn't be null"); NullableTimeStampMilliTZHolder readBackHolder = new NullableTimeStampMilliTZHolder(); uv.getTimeStampMilliTZVector().get(6, readBackHolder); assertEquals(12345L, readBackHolder.value); assertEquals("UTC", readBackHolder.timezone); - assertFalse("7 shouldn't be null", uv.isNull(7)); + assertFalse(uv.isNull(7), "7 shouldn't be null"); assertEquals(444413L, ((java.time.Duration) uv.getObject(7)).getSeconds()); - assertFalse("8 shouldn't be null", uv.isNull(8)); + assertFalse(uv.isNull(8), "8 shouldn't be null"); assertEquals(18978, ByteBuffer.wrap(uv.getFixedSizeBinaryVector().get(8)).order(ByteOrder.nativeOrder()).getInt()); @@ -172,10 +172,10 @@ public void testPromoteToUnion() throws Exception { Field childField1 = container.getField().getChildren().get(0).getChildren().get(0); Field childField2 = container.getField().getChildren().get(0).getChildren().get(1); - assertEquals("Child field should be union type: " + - childField1.getName(), ArrowTypeID.Union, childField1.getType().getTypeID()); - assertEquals("Child field should be decimal type: " + - childField2.getName(), ArrowTypeID.Decimal, childField2.getType().getTypeID()); + assertEquals(ArrowTypeID.Union, childField1.getType().getTypeID(), + "Child field should be union type: " + childField1.getName()); + assertEquals(ArrowTypeID.Decimal, childField2.getType().getTypeID(), + "Child field should be decimal type: " + childField2.getName()); buf.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java index 19f0ea9d4e392..c7ed893d4c340 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestComplexWriter.java @@ -17,7 +17,12 @@ package org.apache.arrow.vector.complex.writer; -import static org.junit.Assert.*; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.math.BigDecimal; import java.nio.ByteBuffer; @@ -92,10 +97,9 @@ import org.apache.arrow.vector.util.JsonStringHashMap; import org.apache.arrow.vector.util.Text; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestComplexWriter { @@ -103,12 +107,12 @@ public class TestComplexWriter { private static final int COUNT = 100; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Integer.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -119,8 +123,8 @@ public void simpleNestedTypes() { StructReader rootReader = new SingleStructReaderImpl(parent).reader("root"); for (int i = 0; i < COUNT; i++) { rootReader.setPosition(i); - Assert.assertEquals(i, rootReader.reader("int").readInteger().intValue()); - Assert.assertEquals(i, rootReader.reader("bigInt").readLong().longValue()); + assertEquals(i, rootReader.reader("int").readInteger().intValue()); + assertEquals(i, rootReader.reader("bigInt").readLong().longValue()); } parent.close(); @@ -210,15 +214,15 @@ private void checkNullableStruct(NonNullableStructVector structVector) { StructReader rootReader = new SingleStructReaderImpl(structVector).reader("root"); for (int i = 0; i < COUNT; i++) { rootReader.setPosition(i); - assertTrue("index is set: " + i, rootReader.isSet()); + assertTrue(rootReader.isSet(), "index is set: " + i); FieldReader struct = rootReader.reader("struct"); if (i % 2 == 0) { - assertTrue("index is set: " + i, struct.isSet()); - assertNotNull("index is set: " + i, struct.readObject()); + assertTrue(struct.isSet(), "index is set: " + i); + assertNotNull(struct.readObject(), "index is set: " + i); assertEquals(i, struct.reader("nested").readLong().longValue()); } else { - assertFalse("index is not set: " + i, struct.isSet()); - assertNull("index is not set: " + i, struct.readObject()); + assertFalse(struct.isSet(), "index is not set: " + i); + assertNull(struct.readObject(), "index is not set: " + i); } } } @@ -245,11 +249,11 @@ public void testList() { StructReader rootReader = new SingleStructReaderImpl(parent).reader("root"); rootReader.setPosition(0); - assertTrue("row 0 list is not set", rootReader.reader("list").isSet()); + assertTrue(rootReader.reader("list").isSet(), "row 0 list is not set"); assertEquals(Long.valueOf(0), rootReader.reader("list").reader().readLong()); rootReader.setPosition(1); - assertFalse("row 1 list is set", rootReader.reader("list").isSet()); + assertFalse(rootReader.reader("list").isSet(), "row 1 list is set"); } } @@ -312,9 +316,9 @@ public void testListScalarNull() { for (int j = 0; j < i % 7; j++) { listReader.next(); if (j % 2 == 0) { - assertFalse("index is set: " + j, listReader.reader().isSet()); + assertFalse(listReader.reader().isSet(), "index is set: " + j); } else { - assertTrue("index is not set: " + j, listReader.reader().isSet()); + assertTrue(listReader.reader().isSet(), "index is not set: " + j); assertEquals(j, listReader.reader().readInteger().intValue()); } } @@ -392,7 +396,7 @@ public void listTimeStampMilliTZType() { for (int j = 0; j < i % 7; j++) { listReader.next(); if (j % 2 == 0) { - assertFalse("index is set: " + j, listReader.reader().isSet()); + assertFalse(listReader.reader().isSet(), "index is set: " + j); } else { NullableTimeStampMilliTZHolder actual = new NullableTimeStampMilliTZHolder(); listReader.reader().read(actual); @@ -430,7 +434,7 @@ public void listDurationType() { for (int j = 0; j < i % 7; j++) { listReader.next(); if (j % 2 == 0) { - assertFalse("index is set: " + j, listReader.reader().isSet()); + assertFalse(listReader.reader().isSet(), "index is set: " + j); } else { NullableDurationHolder actual = new NullableDurationHolder(); listReader.reader().read(actual); @@ -472,7 +476,7 @@ public void listFixedSizeBinaryType() throws Exception { for (int j = 0; j < i % 7; j++) { listReader.next(); if (j % 2 == 0) { - assertFalse("index is set: " + j, listReader.reader().isSet()); + assertFalse(listReader.reader().isSet(), "index is set: " + j); } else { NullableFixedSizeBinaryHolder actual = new NullableFixedSizeBinaryHolder(); listReader.reader().read(actual); @@ -505,11 +509,11 @@ public void listScalarTypeNullable() { for (int i = 0; i < COUNT; i++) { listReader.setPosition(i); if (i % 2 == 0) { - assertTrue("index is set: " + i, listReader.isSet()); - assertEquals("correct length at: " + i, i % 7, ((List) listReader.readObject()).size()); + assertTrue(listReader.isSet(), "index is set: " + i); + assertEquals(i % 7, ((List) listReader.readObject()).size(), "correct length at: " + i); } else { - assertFalse("index is not set: " + i, listReader.isSet()); - assertNull("index is not set: " + i, listReader.readObject()); + assertFalse(listReader.isSet(), "index is not set: " + i); + assertNull(listReader.readObject(), "index is not set: " + i); } } } @@ -537,8 +541,8 @@ public void listStructType() { listReader.setPosition(i); for (int j = 0; j < i % 7; j++) { listReader.next(); - Assert.assertEquals("record: " + i, j, listReader.reader().reader("int").readInteger().intValue()); - Assert.assertEquals(j, listReader.reader().reader("bigInt").readLong().longValue()); + assertEquals(j, listReader.reader().reader("int").readInteger().intValue(), "record: " + i); + assertEquals(j, listReader.reader().reader("bigInt").readLong().longValue()); } } } @@ -601,7 +605,7 @@ private void checkListOfLists(final ListVector listVector) { FieldReader innerListReader = listReader.reader(); for (int k = 0; k < i % 13; k++) { innerListReader.next(); - Assert.assertEquals("record: " + i, k, innerListReader.reader().readInteger().intValue()); + assertEquals(k, innerListReader.reader().readInteger().intValue(), "record: " + i); } } } @@ -673,9 +677,9 @@ private void checkUnionList(ListVector listVector) { for (int k = 0; k < i % 13; k++) { innerListReader.next(); if (k % 2 == 0) { - Assert.assertEquals("record: " + i, k, innerListReader.reader().readInteger().intValue()); + assertEquals(k, innerListReader.reader().readInteger().intValue(), "record: " + i); } else { - Assert.assertEquals("record: " + i, k, innerListReader.reader().readLong().longValue()); + assertEquals(k, innerListReader.reader().readLong().longValue(), "record: " + i); } } } @@ -724,11 +728,11 @@ private void checkListMap(ListVector listVector) { UnionMapReader mapReader = (UnionMapReader) listReader.reader(); for (int k = 0; k < i % 13; k++) { mapReader.next(); - Assert.assertEquals("record key: " + i, k, mapReader.key().readInteger().intValue()); + assertEquals(k, mapReader.key().readInteger().intValue(), "record key: " + i); if (k % 2 == 0) { - Assert.assertEquals("record value: " + i, k, mapReader.value().readLong().longValue()); + assertEquals(k, mapReader.value().readLong().longValue(), "record value: " + i); } else { - Assert.assertNull("record value: " + i, mapReader.value().readLong()); + assertNull(mapReader.value().readLong(), "record value: " + i); } } } @@ -772,24 +776,24 @@ public void simpleUnion() throws Exception { for (int i = 0; i < COUNT; i++) { unionReader.setPosition(i); if (i % 5 == 0) { - Assert.assertEquals(i, unionReader.readInteger().intValue()); + assertEquals(i, unionReader.readInteger().intValue()); } else if (i % 5 == 1) { NullableTimeStampMilliTZHolder holder = new NullableTimeStampMilliTZHolder(); unionReader.read(holder); - Assert.assertEquals(i, holder.value); - Assert.assertEquals("AsdfTimeZone", holder.timezone); + assertEquals(i, holder.value); + assertEquals("AsdfTimeZone", holder.timezone); } else if (i % 5 == 2) { NullableDurationHolder holder = new NullableDurationHolder(); unionReader.read(holder); - Assert.assertEquals(i, holder.value); - Assert.assertEquals(TimeUnit.NANOSECOND, holder.unit); + assertEquals(i, holder.value); + assertEquals(TimeUnit.NANOSECOND, holder.unit); } else if (i % 5 == 3) { NullableFixedSizeBinaryHolder holder = new NullableFixedSizeBinaryHolder(); unionReader.read(holder); assertEquals(i, holder.buffer.getInt(0)); assertEquals(4, holder.byteWidth); } else { - Assert.assertEquals((float) i, unionReader.readFloat(), 1e-12); + assertEquals((float) i, unionReader.readFloat(), 1e-12); } } vector.close(); @@ -808,12 +812,12 @@ public void promotableWriter() { bigIntWriter.writeBigInt(i); } Field field = parent.getField().getChildren().get(0).getChildren().get(0); - Assert.assertEquals("a", field.getName()); - Assert.assertEquals(Int.TYPE_TYPE, field.getType().getTypeID()); + assertEquals("a", field.getName()); + assertEquals(Int.TYPE_TYPE, field.getType().getTypeID()); Int intType = (Int) field.getType(); - Assert.assertEquals(64, intType.getBitWidth()); - Assert.assertTrue(intType.getIsSigned()); + assertEquals(64, intType.getBitWidth()); + assertTrue(intType.getIsSigned()); for (int i = 100; i < 200; i++) { VarCharWriter varCharWriter = rootWriter.varChar("a"); varCharWriter.setPosition(i); @@ -824,23 +828,23 @@ public void promotableWriter() { tempBuf.close(); } field = parent.getField().getChildren().get(0).getChildren().get(0); - Assert.assertEquals("a", field.getName()); - Assert.assertEquals(Union.TYPE_TYPE, field.getType().getTypeID()); - Assert.assertEquals(Int.TYPE_TYPE, field.getChildren().get(0).getType().getTypeID()); - Assert.assertEquals(Utf8.TYPE_TYPE, field.getChildren().get(1).getType().getTypeID()); + assertEquals("a", field.getName()); + assertEquals(Union.TYPE_TYPE, field.getType().getTypeID()); + assertEquals(Int.TYPE_TYPE, field.getChildren().get(0).getType().getTypeID()); + assertEquals(Utf8.TYPE_TYPE, field.getChildren().get(1).getType().getTypeID()); StructReader rootReader = new SingleStructReaderImpl(parent).reader("root"); for (int i = 0; i < 100; i++) { rootReader.setPosition(i); FieldReader reader = rootReader.reader("a"); Long value = reader.readLong(); - Assert.assertNotNull("index: " + i, value); - Assert.assertEquals(i, value.intValue()); + assertNotNull(value, "index: " + i); + assertEquals(i, value.intValue()); } for (int i = 100; i < 200; i++) { rootReader.setPosition(i); FieldReader reader = rootReader.reader("a"); Text value = reader.readText(); - Assert.assertEquals(Integer.toString(i), value.toString()); + assertEquals(Integer.toString(i), value.toString()); } } } @@ -857,14 +861,14 @@ public void promotableWriterSchema() { rootWriter.varChar("a"); Field field = parent.getField().getChildren().get(0).getChildren().get(0); - Assert.assertEquals("a", field.getName()); - Assert.assertEquals(ArrowTypeID.Union, field.getType().getTypeID()); + assertEquals("a", field.getName()); + assertEquals(ArrowTypeID.Union, field.getType().getTypeID()); - Assert.assertEquals(ArrowTypeID.Int, field.getChildren().get(0).getType().getTypeID()); + assertEquals(ArrowTypeID.Int, field.getChildren().get(0).getType().getTypeID()); Int intType = (Int) field.getChildren().get(0).getType(); - Assert.assertEquals(64, intType.getBitWidth()); - Assert.assertTrue(intType.getIsSigned()); - Assert.assertEquals(ArrowTypeID.Utf8, field.getChildren().get(1).getType().getTypeID()); + assertEquals(64, intType.getBitWidth()); + assertTrue(intType.getIsSigned()); + assertEquals(ArrowTypeID.Utf8, field.getChildren().get(1).getType().getTypeID()); } } @@ -901,18 +905,18 @@ public void structWriterMixedCaseFieldNames() { List fieldsCaseSensitive = parent.getField().getChildren().get(0).getChildren(); Set fieldNamesCaseSensitive = getFieldNames(fieldsCaseSensitive); - Assert.assertEquals(11, fieldNamesCaseSensitive.size()); - Assert.assertTrue(fieldNamesCaseSensitive.contains("int_field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("Int_Field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("float_field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("Float_Field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("struct_field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("struct_field::char_field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("struct_field::Char_Field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::bit_field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::Bit_Field")); + assertEquals(11, fieldNamesCaseSensitive.size()); + assertTrue(fieldNamesCaseSensitive.contains("int_field")); + assertTrue(fieldNamesCaseSensitive.contains("Int_Field")); + assertTrue(fieldNamesCaseSensitive.contains("float_field")); + assertTrue(fieldNamesCaseSensitive.contains("Float_Field")); + assertTrue(fieldNamesCaseSensitive.contains("struct_field")); + assertTrue(fieldNamesCaseSensitive.contains("struct_field::char_field")); + assertTrue(fieldNamesCaseSensitive.contains("struct_field::Char_Field")); + assertTrue(fieldNamesCaseSensitive.contains("list_field")); + assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$")); + assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::bit_field")); + assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::Bit_Field")); // test case-insensitive StructWriter ComplexWriter writerCaseInsensitive = new ComplexWriterImpl("rootCaseInsensitive", parent, false, false); @@ -932,14 +936,14 @@ public void structWriterMixedCaseFieldNames() { List fieldsCaseInsensitive = parent.getField().getChildren().get(1).getChildren(); Set fieldNamesCaseInsensitive = getFieldNames(fieldsCaseInsensitive); - Assert.assertEquals(7, fieldNamesCaseInsensitive.size()); - Assert.assertTrue(fieldNamesCaseInsensitive.contains("int_field")); - Assert.assertTrue(fieldNamesCaseInsensitive.contains("float_field")); - Assert.assertTrue(fieldNamesCaseInsensitive.contains("struct_field")); - Assert.assertTrue(fieldNamesCaseInsensitive.contains("struct_field::char_field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$")); - Assert.assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::bit_field")); + assertEquals(7, fieldNamesCaseInsensitive.size()); + assertTrue(fieldNamesCaseInsensitive.contains("int_field")); + assertTrue(fieldNamesCaseInsensitive.contains("float_field")); + assertTrue(fieldNamesCaseInsensitive.contains("struct_field")); + assertTrue(fieldNamesCaseInsensitive.contains("struct_field::char_field")); + assertTrue(fieldNamesCaseSensitive.contains("list_field")); + assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$")); + assertTrue(fieldNamesCaseSensitive.contains("list_field::$data$::bit_field")); } } @@ -976,15 +980,15 @@ public void timeStampSecWriter() throws Exception { FieldReader secReader = rootReader.reader("sec"); secReader.setPosition(0); LocalDateTime secDateTime = secReader.readLocalDateTime(); - Assert.assertEquals(expectedSecDateTime, secDateTime); + assertEquals(expectedSecDateTime, secDateTime); long secLong = secReader.readLong(); - Assert.assertEquals(expectedSecs, secLong); + assertEquals(expectedSecs, secLong); } { FieldReader secTZReader = rootReader.reader("secTZ"); secTZReader.setPosition(1); long secTZLong = secTZReader.readLong(); - Assert.assertEquals(expectedSecs, secTZLong); + assertEquals(expectedSecs, secTZLong); } } } @@ -1022,27 +1026,27 @@ public void timeStampMilliWriters() throws Exception { FieldReader milliReader = rootReader.reader("milli"); milliReader.setPosition(0); LocalDateTime milliDateTime = milliReader.readLocalDateTime(); - Assert.assertEquals(expectedMilliDateTime, milliDateTime); + assertEquals(expectedMilliDateTime, milliDateTime); long milliLong = milliReader.readLong(); - Assert.assertEquals(expectedMillis, milliLong); + assertEquals(expectedMillis, milliLong); } { FieldReader milliTZReader = rootReader.reader("milliTZ"); milliTZReader.setPosition(0); long milliTZLong = milliTZReader.readLong(); - Assert.assertEquals(expectedMillis, milliTZLong); + assertEquals(expectedMillis, milliTZLong); } } } private void checkTimestampField(Field field, String name) { - Assert.assertEquals(name, field.getName()); - Assert.assertEquals(ArrowType.Timestamp.TYPE_TYPE, field.getType().getTypeID()); + assertEquals(name, field.getName()); + assertEquals(ArrowType.Timestamp.TYPE_TYPE, field.getType().getTypeID()); } private void checkTimestampTZField(Field field, String name, String tz) { checkTimestampField(field, name); - Assert.assertEquals(tz, ((Timestamp) field.getType()).getTimezone()); + assertEquals(tz, ((Timestamp) field.getType()).getTimezone()); } @Test @@ -1079,15 +1083,15 @@ public void timeStampMicroWriters() throws Exception { FieldReader microReader = rootReader.reader("micro"); microReader.setPosition(0); LocalDateTime microDateTime = microReader.readLocalDateTime(); - Assert.assertEquals(expectedMicroDateTime, microDateTime); + assertEquals(expectedMicroDateTime, microDateTime); long microLong = microReader.readLong(); - Assert.assertEquals(expectedMicros, microLong); + assertEquals(expectedMicros, microLong); } { FieldReader microReader = rootReader.reader("microTZ"); microReader.setPosition(1); long microLong = microReader.readLong(); - Assert.assertEquals(expectedMicros, microLong); + assertEquals(expectedMicros, microLong); } } } @@ -1125,18 +1129,18 @@ public void timeStampNanoWriters() throws Exception { FieldReader nanoReader = rootReader.reader("nano"); nanoReader.setPosition(0); LocalDateTime nanoDateTime = nanoReader.readLocalDateTime(); - Assert.assertEquals(expectedNanoDateTime, nanoDateTime); + assertEquals(expectedNanoDateTime, nanoDateTime); long nanoLong = nanoReader.readLong(); - Assert.assertEquals(expectedNanos, nanoLong); + assertEquals(expectedNanos, nanoLong); } { FieldReader nanoReader = rootReader.reader("nanoTZ"); nanoReader.setPosition(0); long nanoLong = nanoReader.readLong(); - Assert.assertEquals(expectedNanos, nanoLong); + assertEquals(expectedNanos, nanoLong); NullableTimeStampNanoTZHolder h = new NullableTimeStampNanoTZHolder(); nanoReader.read(h); - Assert.assertEquals(expectedNanos, h.value); + assertEquals(expectedNanos, h.value); } } @@ -1173,8 +1177,8 @@ public void fixedSizeBinaryWriters() throws Exception { // schema List children = parent.getField().getChildren().get(0).getChildren(); - Assert.assertEquals(fieldName, children.get(0).getName()); - Assert.assertEquals(ArrowType.FixedSizeBinary.TYPE_TYPE, children.get(0).getType().getTypeID()); + assertEquals(fieldName, children.get(0).getName()); + assertEquals(ArrowType.FixedSizeBinary.TYPE_TYPE, children.get(0).getType().getTypeID()); // read StructReader rootReader = new SingleStructReaderImpl(parent).reader("root"); @@ -1183,7 +1187,7 @@ public void fixedSizeBinaryWriters() throws Exception { for (int i = 0; i < numValues; i++) { fixedSizeBinaryReader.setPosition(i); byte[] readValues = fixedSizeBinaryReader.readByteArray(); - Assert.assertArrayEquals(values[i], readValues); + assertArrayEquals(values[i], readValues); } } @@ -1369,17 +1373,17 @@ public void testListWriterWithNulls() { for (int i = 0; i < COUNT; i++) { listReader.setPosition(i); if (i % 2 == 0) { - Assert.assertTrue(listReader.isSet()); + assertTrue(listReader.isSet()); listReader.next(); if (i % 4 == 0) { - Assert.assertNull(listReader.reader().readInteger()); + assertNull(listReader.reader().readInteger()); } else { - Assert.assertEquals(i, listReader.reader().readInteger().intValue()); + assertEquals(i, listReader.reader().readInteger().intValue()); listReader.next(); - Assert.assertEquals(i * 2, listReader.reader().readInteger().intValue()); + assertEquals(i * 2, listReader.reader().readInteger().intValue()); } } else { - Assert.assertFalse(listReader.isSet()); + assertFalse(listReader.isSet()); } } } @@ -1419,20 +1423,20 @@ public void testListOfListWriterWithNulls() { for (int i = 0; i < COUNT; i++) { listReader.setPosition(i); if (i % 2 == 0) { - Assert.assertTrue(listReader.isSet()); + assertTrue(listReader.isSet()); listReader.next(); if (i % 4 == 0) { - Assert.assertFalse(listReader.reader().isSet()); + assertFalse(listReader.reader().isSet()); } else { listReader.reader().next(); - Assert.assertFalse(listReader.reader().reader().isSet()); + assertFalse(listReader.reader().reader().isSet()); listReader.reader().next(); - Assert.assertEquals(i, listReader.reader().reader().readInteger().intValue()); + assertEquals(i, listReader.reader().reader().readInteger().intValue()); listReader.reader().next(); - Assert.assertEquals(i * 2, listReader.reader().reader().readInteger().intValue()); + assertEquals(i * 2, listReader.reader().reader().readInteger().intValue()); } } else { - Assert.assertFalse(listReader.isSet()); + assertFalse(listReader.isSet()); } } } @@ -1478,23 +1482,23 @@ public void testListOfListOfListWriterWithNulls() { for (int i = 0; i < COUNT; i++) { listReader.setPosition(i); if (i % 4 == 0) { - Assert.assertFalse(listReader.isSet()); + assertFalse(listReader.isSet()); } else { - Assert.assertTrue(listReader.isSet()); + assertTrue(listReader.isSet()); listReader.next(); if (i % 4 == 1) { - Assert.assertFalse(listReader.reader().isSet()); + assertFalse(listReader.reader().isSet()); } else if (i % 4 == 2) { listReader.reader().next(); - Assert.assertFalse(listReader.reader().reader().isSet()); + assertFalse(listReader.reader().reader().isSet()); } else { listReader.reader().next(); listReader.reader().reader().next(); - Assert.assertFalse(listReader.reader().reader().reader().isSet()); + assertFalse(listReader.reader().reader().reader().isSet()); listReader.reader().reader().next(); - Assert.assertEquals(i, listReader.reader().reader().reader().readInteger().intValue()); + assertEquals(i, listReader.reader().reader().reader().readInteger().intValue()); listReader.reader().reader().next(); - Assert.assertEquals(i * 2, listReader.reader().reader().reader().readInteger().intValue()); + assertEquals(i * 2, listReader.reader().reader().reader().readInteger().intValue()); } } } @@ -1507,7 +1511,7 @@ public void testStructOfList() { structVector.addOrGetList("childList1"); NullableStructReaderImpl structReader = structVector.getReader(); FieldReader childListReader = structReader.reader("childList1"); - Assert.assertNotNull(childListReader); + assertNotNull(childListReader); } try (StructVector structVector = StructVector.empty("struct2", allocator)) { @@ -1523,9 +1527,9 @@ public void testStructOfList() { NullableStructReaderImpl structReader = structVector.getReader(); FieldReader childListReader = structReader.reader("childList2"); int size = childListReader.size(); - Assert.assertEquals(1, size); + assertEquals(1, size); int data = childListReader.reader().readInteger(); - Assert.assertEquals(10, data); + assertEquals(10, data); } try (StructVector structVector = StructVector.empty("struct3", allocator)) { @@ -1545,9 +1549,9 @@ public void testStructOfList() { structReader.setPosition(3); FieldReader childListReader = structReader.reader("childList3"); int size = childListReader.size(); - Assert.assertEquals(1, size); + assertEquals(1, size); int data = ((List) childListReader.readObject()).get(0); - Assert.assertEquals(3, data); + assertEquals(3, data); } try (StructVector structVector = StructVector.empty("struct4", allocator)) { @@ -1564,7 +1568,7 @@ public void testStructOfList() { structReader.setPosition(3); FieldReader childListReader = structReader.reader("childList4"); int size = childListReader.size(); - Assert.assertEquals(0, size); + assertEquals(0, size); } } @@ -1618,7 +1622,7 @@ public void testMapWithNulls() { mapWriter.endMap(); writer.setValueCount(1); UnionMapReader mapReader = (UnionMapReader) new SingleStructReaderImpl(parent).reader("root"); - Assert.assertNull(mapReader.key().readInteger()); + assertNull(mapReader.key().readInteger()); assertEquals(1, mapReader.value().readInteger().intValue()); } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java index 27b8f1796ee31..f17c370c89522 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/complex/writer/TestSimpleWriter.java @@ -17,6 +17,9 @@ package org.apache.arrow.vector.complex.writer; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.nio.ByteBuffer; import org.apache.arrow.memory.BufferAllocator; @@ -30,21 +33,20 @@ import org.apache.arrow.vector.complex.impl.VarBinaryWriterImpl; import org.apache.arrow.vector.complex.impl.VarCharWriterImpl; import org.apache.arrow.vector.util.Text; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestSimpleWriter { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Integer.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -56,7 +58,7 @@ public void testWriteByteArrayToVarBinary() throws Exception { byte[] input = new byte[] { 0x01, 0x02 }; writer.writeVarBinary(input); byte[] result = vector.get(0); - Assert.assertArrayEquals(input, result); + assertArrayEquals(input, result); } } @@ -67,7 +69,7 @@ public void testWriteByteArrayWithOffsetToVarBinary() throws Exception { byte[] input = new byte[] { 0x01, 0x02 }; writer.writeVarBinary(input, 1, 1); byte[] result = vector.get(0); - Assert.assertArrayEquals(new byte[] { 0x02 }, result); + assertArrayEquals(new byte[] { 0x02 }, result); } } @@ -79,7 +81,7 @@ public void testWriteByteBufferToVarBinary() throws Exception { ByteBuffer buffer = ByteBuffer.wrap(input); writer.writeVarBinary(buffer); byte[] result = vector.get(0); - Assert.assertArrayEquals(input, result); + assertArrayEquals(input, result); } } @@ -91,7 +93,7 @@ public void testWriteByteBufferWithOffsetToVarBinary() throws Exception { ByteBuffer buffer = ByteBuffer.wrap(input); writer.writeVarBinary(buffer, 1, 1); byte[] result = vector.get(0); - Assert.assertArrayEquals(new byte[] { 0x02 }, result); + assertArrayEquals(new byte[] { 0x02 }, result); } } @@ -102,7 +104,7 @@ public void testWriteByteArrayToLargeVarBinary() throws Exception { byte[] input = new byte[] { 0x01, 0x02 }; writer.writeLargeVarBinary(input); byte[] result = vector.get(0); - Assert.assertArrayEquals(input, result); + assertArrayEquals(input, result); } } @@ -113,7 +115,7 @@ public void testWriteByteArrayWithOffsetToLargeVarBinary() throws Exception { byte[] input = new byte[] { 0x01, 0x02 }; writer.writeLargeVarBinary(input, 1, 1); byte[] result = vector.get(0); - Assert.assertArrayEquals(new byte[] { 0x02 }, result); + assertArrayEquals(new byte[] { 0x02 }, result); } } @@ -125,7 +127,7 @@ public void testWriteByteBufferToLargeVarBinary() throws Exception { ByteBuffer buffer = ByteBuffer.wrap(input); writer.writeLargeVarBinary(buffer); byte[] result = vector.get(0); - Assert.assertArrayEquals(input, result); + assertArrayEquals(input, result); } } @@ -137,7 +139,7 @@ public void testWriteByteBufferWithOffsetToLargeVarBinary() throws Exception { ByteBuffer buffer = ByteBuffer.wrap(input); writer.writeLargeVarBinary(buffer, 1, 1); byte[] result = vector.get(0); - Assert.assertArrayEquals(new byte[] { 0x02 }, result); + assertArrayEquals(new byte[] { 0x02 }, result); } } @@ -148,7 +150,7 @@ public void testWriteStringToVarChar() throws Exception { String input = "testInput"; writer.writeVarChar(input); String result = vector.getObject(0).toString(); - Assert.assertEquals(input, result); + assertEquals(input, result); } } @@ -159,7 +161,7 @@ public void testWriteTextToVarChar() throws Exception { String input = "testInput"; writer.writeVarChar(new Text(input)); String result = vector.getObject(0).toString(); - Assert.assertEquals(input, result); + assertEquals(input, result); } } @@ -170,7 +172,7 @@ public void testWriteStringToLargeVarChar() throws Exception { String input = "testInput"; writer.writeLargeVarChar(input); String result = vector.getObject(0).toString(); - Assert.assertEquals(input, result); + assertEquals(input, result); } } @@ -181,7 +183,7 @@ public void testWriteTextToLargeVarChar() throws Exception { String input = "testInput"; writer.writeLargeVarChar(new Text(input)); String result = vector.getObject(0).toString(); - Assert.assertEquals(input, result); + assertEquals(input, result); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java index de9187edb667e..77eeb3589058d 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/BaseFileTest.java @@ -18,9 +18,12 @@ package org.apache.arrow.vector.ipc; import static org.apache.arrow.vector.TestUtils.newVarCharVector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; import java.math.BigDecimal; @@ -84,9 +87,8 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.JsonStringArrayList; import org.apache.arrow.vector.util.Text; -import org.junit.After; -import org.junit.Assert; -import org.junit.Before; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -98,12 +100,12 @@ public class BaseFileTest { protected static final int COUNT = 10; protected BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Integer.MAX_VALUE); } - @After + @AfterEach public void tearDown() { allocator.close(); } @@ -150,17 +152,20 @@ protected void writeData(int count, StructVector parent) { protected void validateContent(int count, VectorSchemaRoot root) { for (int i = 0; i < count; i++) { - Assert.assertEquals(i, root.getVector("int").getObject(i)); - Assert.assertEquals((Short) uint1Values[i % uint1Values.length], + assertEquals(i, root.getVector("int").getObject(i)); + assertEquals((Short) uint1Values[i % uint1Values.length], ((UInt1Vector) root.getVector("uint1")).getObjectNoOverflow(i)); - Assert.assertEquals("Failed for index: " + i, (Character) uint2Values[i % uint2Values.length], - (Character) ((UInt2Vector) root.getVector("uint2")).get(i)); - Assert.assertEquals("Failed for index: " + i, (Long) uint4Values[i % uint4Values.length], - ((UInt4Vector) root.getVector("uint4")).getObjectNoOverflow(i)); - Assert.assertEquals("Failed for index: " + i, uint8Values[i % uint8Values.length], - ((UInt8Vector) root.getVector("uint8")).getObjectNoOverflow(i)); - Assert.assertEquals(Long.valueOf(i), root.getVector("bigInt").getObject(i)); - Assert.assertEquals(i == 0 ? Float.NaN : i, root.getVector("float").getObject(i)); + assertEquals((Character) uint2Values[i % uint2Values.length], + (Character) ((UInt2Vector) root.getVector("uint2")).get(i), + "Failed for index: " + i); + assertEquals((Long) uint4Values[i % uint4Values.length], + ((UInt4Vector) root.getVector("uint4")).getObjectNoOverflow(i), + "Failed for index: " + i); + assertEquals(uint8Values[i % uint8Values.length], + ((UInt8Vector) root.getVector("uint8")).getObjectNoOverflow(i), + "Failed for index: " + i); + assertEquals(Long.valueOf(i), root.getVector("bigInt").getObject(i)); + assertEquals(i == 0 ? Float.NaN : i, root.getVector("float").getObject(i)); } } @@ -210,23 +215,23 @@ public void printVectors(List vectors) { } protected void validateComplexContent(int count, VectorSchemaRoot root) { - Assert.assertEquals(count, root.getRowCount()); + assertEquals(count, root.getRowCount()); printVectors(root.getFieldVectors()); for (int i = 0; i < count; i++) { Object intVal = root.getVector("int").getObject(i); if (i % 5 != 3) { - Assert.assertEquals(i, intVal); + assertEquals(i, intVal); } else { - Assert.assertNull(intVal); + assertNull(intVal); } - Assert.assertEquals(Long.valueOf(i), root.getVector("bigInt").getObject(i)); - Assert.assertEquals(i % 3, ((List) root.getVector("list").getObject(i)).size()); + assertEquals(Long.valueOf(i), root.getVector("bigInt").getObject(i)); + assertEquals(i % 3, ((List) root.getVector("list").getObject(i)).size()); NullableTimeStampMilliHolder h = new NullableTimeStampMilliHolder(); FieldReader structReader = root.getVector("struct").getReader(); structReader.setPosition(i); structReader.reader("timestamp").read(h); - Assert.assertEquals(i, h.value); + assertEquals(i, h.value); } } @@ -235,7 +240,7 @@ private LocalDateTime makeDateTimeFromCount(int i) { } protected void writeDateTimeData(int count, StructVector parent) { - Assert.assertTrue(count < 100); + assertTrue(count < 100); ComplexWriter writer = new ComplexWriterImpl("root", parent); StructWriter rootWriter = writer.rootAsStruct(); DateMilliWriter dateWriter = rootWriter.dateMilli("date"); @@ -268,22 +273,22 @@ protected void writeDateTimeData(int count, StructVector parent) { } protected void validateDateTimeContent(int count, VectorSchemaRoot root) { - Assert.assertEquals(count, root.getRowCount()); + assertEquals(count, root.getRowCount()); printVectors(root.getFieldVectors()); for (int i = 0; i < count; i++) { LocalDateTime dt = makeDateTimeFromCount(i); LocalDateTime dtMilli = dt.minusNanos(i); LocalDateTime dateVal = ((DateMilliVector) root.getVector("date")).getObject(i); LocalDateTime dateExpected = dt.toLocalDate().atStartOfDay(); - Assert.assertEquals(dateExpected, dateVal); + assertEquals(dateExpected, dateVal); LocalTime timeVal = ((TimeMilliVector) root.getVector("time")).getObject(i).toLocalTime(); - Assert.assertEquals(dtMilli.toLocalTime(), timeVal); + assertEquals(dtMilli.toLocalTime(), timeVal); Object timestampMilliVal = root.getVector("timestamp-milli").getObject(i); - Assert.assertEquals(dtMilli, timestampMilliVal); + assertEquals(dtMilli, timestampMilliVal); Object timestampMilliTZVal = root.getVector("timestamp-milliTZ").getObject(i); - Assert.assertEquals(dt.atZone(ZoneId.of("Europe/Paris")).toInstant().toEpochMilli(), timestampMilliTZVal); + assertEquals(dt.atZone(ZoneId.of("Europe/Paris")).toInstant().toEpochMilli(), timestampMilliTZVal); Object timestampNanoVal = root.getVector("timestamp-nano").getObject(i); - Assert.assertEquals(dt, timestampNanoVal); + assertEquals(dt, timestampNanoVal); } } @@ -355,66 +360,66 @@ protected VectorSchemaRoot writeFlatDictionaryData( protected void validateFlatDictionary(VectorSchemaRoot root, DictionaryProvider provider) { FieldVector vector1A = root.getVector("varcharA"); - Assert.assertNotNull(vector1A); + assertNotNull(vector1A); DictionaryEncoding encoding1A = vector1A.getField().getDictionary(); - Assert.assertNotNull(encoding1A); - Assert.assertEquals(1L, encoding1A.getId()); + assertNotNull(encoding1A); + assertEquals(1L, encoding1A.getId()); - Assert.assertEquals(6, vector1A.getValueCount()); - Assert.assertEquals(0, vector1A.getObject(0)); - Assert.assertEquals(1, vector1A.getObject(1)); - Assert.assertEquals(null, vector1A.getObject(2)); - Assert.assertEquals(2, vector1A.getObject(3)); - Assert.assertEquals(1, vector1A.getObject(4)); - Assert.assertEquals(2, vector1A.getObject(5)); + assertEquals(6, vector1A.getValueCount()); + assertEquals(0, vector1A.getObject(0)); + assertEquals(1, vector1A.getObject(1)); + assertEquals(null, vector1A.getObject(2)); + assertEquals(2, vector1A.getObject(3)); + assertEquals(1, vector1A.getObject(4)); + assertEquals(2, vector1A.getObject(5)); FieldVector vector1B = root.getVector("varcharB"); - Assert.assertNotNull(vector1B); + assertNotNull(vector1B); DictionaryEncoding encoding1B = vector1A.getField().getDictionary(); - Assert.assertNotNull(encoding1B); - Assert.assertTrue(encoding1A.equals(encoding1B)); - Assert.assertEquals(1L, encoding1B.getId()); - - Assert.assertEquals(6, vector1B.getValueCount()); - Assert.assertEquals(2, vector1B.getObject(0)); - Assert.assertEquals(1, vector1B.getObject(1)); - Assert.assertEquals(2, vector1B.getObject(2)); - Assert.assertEquals(null, vector1B.getObject(3)); - Assert.assertEquals(1, vector1B.getObject(4)); - Assert.assertEquals(0, vector1B.getObject(5)); + assertNotNull(encoding1B); + assertTrue(encoding1A.equals(encoding1B)); + assertEquals(1L, encoding1B.getId()); + + assertEquals(6, vector1B.getValueCount()); + assertEquals(2, vector1B.getObject(0)); + assertEquals(1, vector1B.getObject(1)); + assertEquals(2, vector1B.getObject(2)); + assertEquals(null, vector1B.getObject(3)); + assertEquals(1, vector1B.getObject(4)); + assertEquals(0, vector1B.getObject(5)); FieldVector vector2 = root.getVector("sizes"); - Assert.assertNotNull(vector2); + assertNotNull(vector2); DictionaryEncoding encoding2 = vector2.getField().getDictionary(); - Assert.assertNotNull(encoding2); - Assert.assertEquals(2L, encoding2.getId()); + assertNotNull(encoding2); + assertEquals(2L, encoding2.getId()); - Assert.assertEquals(6, vector2.getValueCount()); - Assert.assertEquals(null, vector2.getObject(0)); - Assert.assertEquals(2, vector2.getObject(1)); - Assert.assertEquals(1, vector2.getObject(2)); - Assert.assertEquals(1, vector2.getObject(3)); - Assert.assertEquals(2, vector2.getObject(4)); - Assert.assertEquals(null, vector2.getObject(5)); + assertEquals(6, vector2.getValueCount()); + assertEquals(null, vector2.getObject(0)); + assertEquals(2, vector2.getObject(1)); + assertEquals(1, vector2.getObject(2)); + assertEquals(1, vector2.getObject(3)); + assertEquals(2, vector2.getObject(4)); + assertEquals(null, vector2.getObject(5)); Dictionary dictionary1 = provider.lookup(1L); - Assert.assertNotNull(dictionary1); + assertNotNull(dictionary1); VarCharVector dictionaryVector = ((VarCharVector) dictionary1.getVector()); - Assert.assertEquals(3, dictionaryVector.getValueCount()); - Assert.assertEquals(new Text("foo"), dictionaryVector.getObject(0)); - Assert.assertEquals(new Text("bar"), dictionaryVector.getObject(1)); - Assert.assertEquals(new Text("baz"), dictionaryVector.getObject(2)); + assertEquals(3, dictionaryVector.getValueCount()); + assertEquals(new Text("foo"), dictionaryVector.getObject(0)); + assertEquals(new Text("bar"), dictionaryVector.getObject(1)); + assertEquals(new Text("baz"), dictionaryVector.getObject(2)); Dictionary dictionary2 = provider.lookup(2L); - Assert.assertNotNull(dictionary2); + assertNotNull(dictionary2); dictionaryVector = ((VarCharVector) dictionary2.getVector()); - Assert.assertEquals(3, dictionaryVector.getValueCount()); - Assert.assertEquals(new Text("micro"), dictionaryVector.getObject(0)); - Assert.assertEquals(new Text("small"), dictionaryVector.getObject(1)); - Assert.assertEquals(new Text("large"), dictionaryVector.getObject(2)); + assertEquals(3, dictionaryVector.getValueCount()); + assertEquals(new Text("micro"), dictionaryVector.getObject(0)); + assertEquals(new Text("small"), dictionaryVector.getObject(1)); + assertEquals(new Text("large"), dictionaryVector.getObject(2)); } protected VectorSchemaRoot writeNestedDictionaryData( @@ -456,26 +461,26 @@ protected VectorSchemaRoot writeNestedDictionaryData( protected void validateNestedDictionary(VectorSchemaRoot root, DictionaryProvider provider) { FieldVector vector = root.getFieldVectors().get(0); - Assert.assertNotNull(vector); - Assert.assertNull(vector.getField().getDictionary()); + assertNotNull(vector); + assertNull(vector.getField().getDictionary()); Field nestedField = vector.getField().getChildren().get(0); DictionaryEncoding encoding = nestedField.getDictionary(); - Assert.assertNotNull(encoding); - Assert.assertEquals(2L, encoding.getId()); - Assert.assertEquals(new ArrowType.Int(32, true), encoding.getIndexType()); + assertNotNull(encoding); + assertEquals(2L, encoding.getId()); + assertEquals(new ArrowType.Int(32, true), encoding.getIndexType()); - Assert.assertEquals(3, vector.getValueCount()); - Assert.assertEquals(Arrays.asList(0, 1), vector.getObject(0)); - Assert.assertEquals(Arrays.asList(0), vector.getObject(1)); - Assert.assertEquals(Arrays.asList(1), vector.getObject(2)); + assertEquals(3, vector.getValueCount()); + assertEquals(Arrays.asList(0, 1), vector.getObject(0)); + assertEquals(Arrays.asList(0), vector.getObject(1)); + assertEquals(Arrays.asList(1), vector.getObject(2)); Dictionary dictionary = provider.lookup(2L); - Assert.assertNotNull(dictionary); + assertNotNull(dictionary); VarCharVector dictionaryVector = ((VarCharVector) dictionary.getVector()); - Assert.assertEquals(2, dictionaryVector.getValueCount()); - Assert.assertEquals(new Text("foo"), dictionaryVector.getObject(0)); - Assert.assertEquals(new Text("bar"), dictionaryVector.getObject(1)); + assertEquals(2, dictionaryVector.getValueCount()); + assertEquals(new Text("foo"), dictionaryVector.getObject(0)); + assertEquals(new Text("bar"), dictionaryVector.getObject(1)); } protected VectorSchemaRoot writeDecimalData(BufferAllocator bufferAllocator) { @@ -509,26 +514,26 @@ protected void validateDecimalData(VectorSchemaRoot root) { DecimalVector decimalVector2 = (DecimalVector) root.getVector("decimal2"); DecimalVector decimalVector3 = (DecimalVector) root.getVector("decimal3"); int count = 10; - Assert.assertEquals(count, root.getRowCount()); + assertEquals(count, root.getRowCount()); for (int i = 0; i < count; i++) { // Verify decimal 1 vector BigDecimal readValue = decimalVector1.getObject(i); ArrowType.Decimal type = (ArrowType.Decimal) decimalVector1.getField().getType(); BigDecimal genValue = new BigDecimal(BigInteger.valueOf(i), type.getScale()); - Assert.assertEquals(genValue, readValue); + assertEquals(genValue, readValue); // Verify decimal 2 vector readValue = decimalVector2.getObject(i); type = (ArrowType.Decimal) decimalVector2.getField().getType(); genValue = new BigDecimal(BigInteger.valueOf(i * (1 << 10)), type.getScale()); - Assert.assertEquals(genValue, readValue); + assertEquals(genValue, readValue); // Verify decimal 3 vector readValue = decimalVector3.getObject(i); type = (ArrowType.Decimal) decimalVector3.getField().getType(); genValue = new BigDecimal(BigInteger.valueOf(i * 1111111111111111L), type.getScale()); - Assert.assertEquals(genValue, readValue); + assertEquals(genValue, readValue); } } @@ -558,18 +563,18 @@ public void validateUnionData(int count, VectorSchemaRoot root) { unionReader.setPosition(i); switch (i % 4) { case 0: - Assert.assertEquals(i, unionReader.readInteger().intValue()); + assertEquals(i, unionReader.readInteger().intValue()); break; case 1: - Assert.assertEquals(i, unionReader.readLong().longValue()); + assertEquals(i, unionReader.readLong().longValue()); break; case 2: - Assert.assertEquals(i % 3, unionReader.size()); + assertEquals(i % 3, unionReader.size()); break; case 3: NullableTimeStampMilliHolder h = new NullableTimeStampMilliHolder(); unionReader.reader("timestamp").read(h); - Assert.assertEquals(i, h.value); + assertEquals(i, h.value); break; default: assert false : "Unexpected value in switch statement: " + i; @@ -623,7 +628,7 @@ public void writeUnionData(int count, StructVector parent) { } protected void writeVarBinaryData(int count, StructVector parent) { - Assert.assertTrue(count < 100); + assertTrue(count < 100); ComplexWriter writer = new ComplexWriterImpl("root", parent); StructWriter rootWriter = writer.rootAsStruct(); ListWriter listWriter = rootWriter.list("list"); @@ -642,7 +647,7 @@ protected void writeVarBinaryData(int count, StructVector parent) { } protected void validateVarBinary(int count, VectorSchemaRoot root) { - Assert.assertEquals(count, root.getRowCount()); + assertEquals(count, root.getRowCount()); ListVector listVector = (ListVector) root.getVector("list"); byte[] expectedArray = new byte[count]; int numVarBinaryValues = 0; @@ -650,23 +655,23 @@ protected void validateVarBinary(int count, VectorSchemaRoot root) { expectedArray[i] = (byte) i; List objList = listVector.getObject(i); if (i % 3 == 0) { - Assert.assertTrue(objList.isEmpty()); + assertTrue(objList.isEmpty()); } else { byte[] expected = Arrays.copyOfRange(expectedArray, 0, i + 1); for (int j = 0; j < i % 3; j++) { byte[] result = (byte[]) objList.get(j); - Assert.assertArrayEquals(result, expected); + assertArrayEquals(result, expected); numVarBinaryValues++; } } } // ListVector lastSet should be the index of last value + 1 - Assert.assertEquals(listVector.getLastSet(), count - 1); + assertEquals(listVector.getLastSet(), count - 1); // VarBinaryVector lastSet should be the index of last value VarBinaryVector binaryVector = (VarBinaryVector) listVector.getChildrenFromFields().get(0); - Assert.assertEquals(binaryVector.getLastSet(), numVarBinaryValues - 1); + assertEquals(binaryVector.getLastSet(), numVarBinaryValues - 1); } protected void writeBatchData(ArrowWriter writer, IntVector vector, VectorSchemaRoot root) throws IOException { @@ -762,7 +767,7 @@ protected void validateMapData(VectorSchemaRoot root) { MapVector sortedMapVector = (MapVector) root.getVector("mapSorted"); final int count = 10; - Assert.assertEquals(count, root.getRowCount()); + assertEquals(count, root.getRowCount()); UnionMapReader mapReader = new UnionMapReader(mapVector); UnionMapReader sortedMapReader = new UnionMapReader(sortedMapVector); @@ -833,7 +838,7 @@ protected void validateListAsMapData(VectorSchemaRoot root) { MapVector sortedMapVector = (MapVector) root.getVector("map"); final int count = 10; - Assert.assertEquals(count, root.getRowCount()); + assertEquals(count, root.getRowCount()); UnionMapReader sortedMapReader = new UnionMapReader(sortedMapVector); sortedMapReader.setKeyValueNames("myKey", "myValue"); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/ITTestIPCWithLargeArrowBuffers.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/ITTestIPCWithLargeArrowBuffers.java index d3c91fd144356..52d093ae29ebf 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/ITTestIPCWithLargeArrowBuffers.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/ITTestIPCWithLargeArrowBuffers.java @@ -17,10 +17,10 @@ package org.apache.arrow.vector.ipc; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.File; import java.io.FileInputStream; @@ -40,7 +40,7 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.Test; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java index 79a4b249a8a89..d5120b70d01e9 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/MessageSerializerTest.java @@ -19,10 +19,10 @@ import static java.util.Arrays.asList; import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -48,9 +48,7 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.ExpectedException; +import org.junit.jupiter.api.Test; public class MessageSerializerTest { @@ -154,9 +152,6 @@ public void testSchemaDictionaryMessageSerialization() throws IOException { assertEquals(schema, deserialized); } - @Rule - public ExpectedException expectedEx = ExpectedException.none(); - @Test public void testSerializeRecordBatchV4() throws IOException { byte[] validity = new byte[]{(byte) 255, 0}; @@ -243,5 +238,4 @@ public static void verifyBatch(ArrowRecordBatch batch, byte[] validity, byte[] v assertArrayEquals(validity, MessageSerializerTest.array(buffers.get(0))); assertArrayEquals(values, MessageSerializerTest.array(buffers.get(1))); } - } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFile.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFile.java index 4fb5822786083..d76e5263122fe 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFile.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFile.java @@ -19,8 +19,8 @@ import static java.nio.channels.Channels.newChannel; import static org.apache.arrow.vector.TestUtils.newVarCharVector; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -39,7 +39,7 @@ import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.types.pojo.Field; -import org.junit.Test; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFooter.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFooter.java index 38c65bddeddea..beb6500ac2ca0 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFooter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowFooter.java @@ -18,7 +18,7 @@ package org.apache.arrow.vector.ipc; import static java.util.Arrays.asList; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -32,7 +32,7 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.Test; +import org.junit.jupiter.api.Test; import com.google.flatbuffers.FlatBufferBuilder; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java index 07875b25029ea..ad9ca50a14979 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java @@ -23,11 +23,12 @@ import static org.apache.arrow.vector.TestUtils.newVarCharVector; import static org.apache.arrow.vector.TestUtils.newVector; import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -87,10 +88,9 @@ import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel; import org.apache.arrow.vector.util.DictionaryUtility; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestArrowReaderWriter { @@ -109,7 +109,7 @@ public class TestArrowReaderWriter { private Schema schema; private Schema encodedSchema; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); @@ -150,7 +150,7 @@ public void init() { new DictionaryEncoding(/*id=*/3L, /*ordered=*/false, /*indexType=*/null)); } - @After + @AfterEach public void terminate() throws Exception { dictionaryVector1.close(); dictionaryVector2.close(); @@ -386,18 +386,17 @@ public void testWriteReadWithStructDictionaries() throws IOException { assertEquals(dictionaryVector4.getValueCount(), readDictionaryVector.getValueCount()); final BiFunction typeComparatorIgnoreName = (v1, v2) -> new TypeEqualsVisitor(v1, false, true).equals(v2); - assertTrue("Dictionary vectors are not equal", - new RangeEqualsVisitor(dictionaryVector4, readDictionaryVector, - typeComparatorIgnoreName) - .rangeEquals(new Range(0, 0, dictionaryVector4.getValueCount()))); + assertTrue(new RangeEqualsVisitor(dictionaryVector4, readDictionaryVector, typeComparatorIgnoreName) + .rangeEquals(new Range(0, 0, dictionaryVector4.getValueCount())), + "Dictionary vectors are not equal"); // Assert the decoded vector is correct try (final ValueVector readVector = DictionaryEncoder.decode(readEncoded, readDictionary)) { assertEquals(vector.getValueCount(), readVector.getValueCount()); - assertTrue("Decoded vectors are not equal", - new RangeEqualsVisitor(vector, readVector, typeComparatorIgnoreName) - .rangeEquals(new Range(0, 0, vector.getValueCount()))); + assertTrue(new RangeEqualsVisitor(vector, readVector, typeComparatorIgnoreName) + .rangeEquals(new Range(0, 0, vector.getValueCount())), + "Decoded vectors are not equal"); } } } @@ -986,7 +985,7 @@ public void testFileFooterSizeOverflow() { System.arraycopy(magicBytes, 0, data, footerOffset + 4, ArrowMagic.MAGIC_LENGTH); // test file reader - InvalidArrowFileException e = Assertions.assertThrows(InvalidArrowFileException.class, () -> { + InvalidArrowFileException e = assertThrows(InvalidArrowFileException.class, () -> { try (SeekableReadChannel channel = new SeekableReadChannel(new ByteArrayReadableSeekableByteChannel(data)); ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { reader.getVectorSchemaRoot().getSchema(); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java index 145bdd588e945..7f3541252772f 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStream.java @@ -17,9 +17,9 @@ package org.apache.arrow.vector.ipc; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -31,8 +31,7 @@ import org.apache.arrow.vector.TinyIntVector; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestArrowStream extends BaseFileTest { @Test @@ -44,15 +43,15 @@ public void testEmptyStream() throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); ArrowStreamWriter writer = new ArrowStreamWriter(root, null, out); writer.close(); - Assert.assertTrue(out.size() > 0); + assertTrue(out.size() > 0); ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); try (ArrowStreamReader reader = new ArrowStreamReader(in, allocator)) { assertEquals(schema, reader.getVectorSchemaRoot().getSchema()); // Empty should return false - Assert.assertFalse(reader.loadNextBatch()); + assertFalse(reader.loadNextBatch()); assertEquals(0, reader.getVectorSchemaRoot().getRowCount()); - Assert.assertFalse(reader.loadNextBatch()); + assertFalse(reader.loadNextBatch()); assertEquals(0, reader.getVectorSchemaRoot().getRowCount()); } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java index 422a63f57f7d8..4ba11fb05ff5d 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowStreamPipe.java @@ -17,8 +17,9 @@ package org.apache.arrow.vector.ipc; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; import java.io.IOException; import java.nio.channels.Pipe; @@ -33,8 +34,7 @@ import org.apache.arrow.vector.ipc.ArrowStreamWriter; import org.apache.arrow.vector.ipc.MessageSerializerTest; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestArrowStreamPipe { Schema schema = MessageSerializerTest.testSchema(); @@ -75,7 +75,7 @@ public void run() { root.close(); } catch (IOException e) { e.printStackTrace(); - Assert.fail(e.toString()); // have to explicitly fail since we're in a separate thread + fail(e.toString()); // have to explicitly fail since we're in a separate thread } } @@ -103,14 +103,14 @@ public boolean loadNextBatch() throws IOException { return false; } VectorSchemaRoot root = getVectorSchemaRoot(); - Assert.assertEquals(16, root.getRowCount()); + assertEquals(16, root.getRowCount()); TinyIntVector vector = (TinyIntVector) root.getFieldVectors().get(0); - Assert.assertEquals((byte) (batchesRead - 1), vector.get(0)); + assertEquals((byte) (batchesRead - 1), vector.get(0)); for (int i = 1; i < 16; i++) { if (i < 8) { - Assert.assertEquals((byte) (i + 1), vector.get(i)); + assertEquals((byte) (i + 1), vector.get(i)); } else { - Assert.assertTrue(vector.isNull(i)); + assertTrue(vector.isNull(i)); } } @@ -129,7 +129,7 @@ public void run() { reader.close(); } catch (IOException e) { e.printStackTrace(); - Assert.fail(e.toString()); // have to explicitly fail since we're in a separate thread + fail(e.toString()); // have to explicitly fail since we're in a separate thread } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java index bd5bd4feabbd4..a90b97310a1cf 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestJSONFile.java @@ -17,9 +17,9 @@ package org.apache.arrow.vector.ipc; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; import java.io.File; import java.io.IOException; @@ -43,8 +43,7 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.Validator; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -342,7 +341,7 @@ public void testSetStructLength() throws IOException { // initialize vectors try (VectorSchemaRoot root = reader.read();) { FieldVector vector = root.getVector("struct_nullable"); - Assert.assertEquals(7, vector.getValueCount()); + assertEquals(7, vector.getValueCount()); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java index 5f57e90f6ba19..d1a3a6db0da44 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestRoundTrip.java @@ -18,12 +18,13 @@ package org.apache.arrow.vector.ipc; import static org.apache.arrow.vector.dictionary.DictionaryProvider.MapDictionaryProvider; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assumptions.assumeTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -33,14 +34,13 @@ import java.io.IOException; import java.nio.channels.Channels; import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiConsumer; +import java.util.stream.Stream; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; @@ -68,55 +68,47 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.AfterClass; -import org.junit.Assume; -import org.junit.BeforeClass; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.MethodSource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -@RunWith(Parameterized.class) public class TestRoundTrip extends BaseFileTest { private static final Logger LOGGER = LoggerFactory.getLogger(TestRoundTrip.class); private static BufferAllocator allocator; - private final String name; - private final IpcOption writeOption; - public TestRoundTrip(String name, IpcOption writeOption) { - this.name = name; - this.writeOption = writeOption; - } - - @Parameterized.Parameters(name = "options = {0}") - public static Collection getWriteOption() { + static Stream getWriteOption() { final IpcOption legacy = new IpcOption(true, MetadataVersion.V4); final IpcOption version4 = new IpcOption(false, MetadataVersion.V4); - return Arrays.asList( + return Stream.of( new Object[] {"V4Legacy", legacy}, new Object[] {"V4", version4}, new Object[] {"V5", IpcOption.DEFAULT} ); } - @BeforeClass + @BeforeAll public static void setUpClass() { allocator = new RootAllocator(Integer.MAX_VALUE); } - @AfterClass + @AfterAll public static void tearDownClass() { allocator.close(); } - @Test - public void testStruct() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testStruct(String name, IpcOption writeOption) throws Exception { try (final BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) { writeData(COUNT, parent); roundTrip( + name, + writeOption, new VectorSchemaRoot(parent.getChild("root")), /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -125,13 +117,16 @@ public void testStruct() throws Exception { } } - @Test - public void testComplex() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testComplex(String name, IpcOption writeOption) throws Exception { try (final BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) { writeComplexData(COUNT, parent); roundTrip( + name, + writeOption, new VectorSchemaRoot(parent.getChild("root")), /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -140,14 +135,17 @@ public void testComplex() throws Exception { } } - @Test - public void testMultipleRecordBatches() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testMultipleRecordBatches(String name, IpcOption writeOption) throws Exception { int[] counts = {10, 5}; try (final BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) { writeData(counts[0], parent); roundTrip( + name, + writeOption, new VectorSchemaRoot(parent.getChild("root")), /* dictionaryProvider */null, (root, writer) -> { @@ -170,9 +168,10 @@ public void testMultipleRecordBatches() throws Exception { } } - @Test - public void testUnionV4() throws Exception { - Assume.assumeTrue(writeOption.metadataVersion == MetadataVersion.V4); + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testUnionV4(String name, IpcOption writeOption) throws Exception { + assumeTrue(writeOption.metadataVersion == MetadataVersion.V4); final File temp = File.createTempFile("arrow-test-" + name + "-", ".arrow"); temp.deleteOnExit(); final ByteArrayOutputStream memoryStream = new ByteArrayOutputStream(); @@ -188,17 +187,18 @@ public void testUnionV4() throws Exception { new ArrowStreamWriter(root, null, Channels.newChannel(memoryStream), writeOption); } }); - assertTrue(e.getMessage(), e.getMessage().contains("Cannot write union with V4 metadata")); + assertTrue(e.getMessage().contains("Cannot write union with V4 metadata"), e.getMessage()); e = assertThrows(IllegalArgumentException.class, () -> { new ArrowStreamWriter(root, null, Channels.newChannel(memoryStream), writeOption); }); - assertTrue(e.getMessage(), e.getMessage().contains("Cannot write union with V4 metadata")); + assertTrue(e.getMessage().contains("Cannot write union with V4 metadata"), e.getMessage()); } } - @Test - public void testUnionV5() throws Exception { - Assume.assumeTrue(writeOption.metadataVersion == MetadataVersion.V5); + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testUnionV5(String name, IpcOption writeOption) throws Exception { + assumeTrue(writeOption.metadataVersion == MetadataVersion.V5); try (final BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) { @@ -206,6 +206,8 @@ public void testUnionV5() throws Exception { VectorSchemaRoot root = new VectorSchemaRoot(parent.getChild("root")); validateUnionData(COUNT, root); roundTrip( + name, + writeOption, root, /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -214,8 +216,9 @@ public void testUnionV5() throws Exception { } } - @Test - public void testTiny() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testTiny(String name, IpcOption writeOption) throws Exception { try (final VectorSchemaRoot root = VectorSchemaRoot.create(MessageSerializerTest.testSchema(), allocator)) { root.getFieldVectors().get(0).allocateNew(); int count = 16; @@ -227,6 +230,8 @@ public void testTiny() throws Exception { root.setRowCount(count); roundTrip( + name, + writeOption, root, /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -247,8 +252,9 @@ private void validateTinyData(int count, VectorSchemaRoot root) { } } - @Test - public void testMetadata() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testMetadata(String name, IpcOption writeOption) throws Exception { List childFields = new ArrayList<>(); childFields.add(new Field("varchar-child", new FieldType(true, ArrowType.Utf8.INSTANCE, null, metadata(1)), null)); childFields.add(new Field("float-child", @@ -283,6 +289,8 @@ public void testMetadata() throws Exception { } }; roundTrip( + name, + writeOption, root, /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -298,14 +306,17 @@ private Map metadata(int i) { return Collections.unmodifiableMap(map); } - @Test - public void testFlatDictionary() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testFlatDictionary(String name, IpcOption writeOption) throws Exception { AtomicInteger numDictionaryBlocksWritten = new AtomicInteger(); MapDictionaryProvider provider = new MapDictionaryProvider(); try (final BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); final VectorSchemaRoot root = writeFlatDictionaryData(originalVectorAllocator, provider)) { roundTrip( + name, + writeOption, root, provider, (ignored, writer) -> { @@ -339,8 +350,9 @@ public void testFlatDictionary() throws Exception { } } - @Test - public void testNestedDictionary() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testNestedDictionary(String name, IpcOption writeOption) throws Exception { AtomicInteger numDictionaryBlocksWritten = new AtomicInteger(); MapDictionaryProvider provider = new MapDictionaryProvider(); // data being written: @@ -356,6 +368,8 @@ public void testNestedDictionary() throws Exception { validateNestedDictionary(readRoot, streamReader); }; roundTrip( + name, + writeOption, root, provider, (ignored, writer) -> { @@ -376,8 +390,9 @@ public void testNestedDictionary() throws Exception { } } - @Test - public void testFixedSizeBinary() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testFixedSizeBinary(String name, IpcOption writeOption) throws Exception { final int count = 10; final int typeWidth = 11; byte[][] byteValues = new byte[count][typeWidth]; @@ -405,6 +420,8 @@ public void testFixedSizeBinary() throws Exception { parent.setValueCount(count); roundTrip( + name, + writeOption, new VectorSchemaRoot(parent), /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -413,8 +430,9 @@ public void testFixedSizeBinary() throws Exception { } } - @Test - public void testFixedSizeList() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testFixedSizeList(String name, IpcOption writeOption) throws Exception { BiConsumer validator = (expectedCount, root) -> { for (int i = 0; i < expectedCount; i++) { assertEquals(Collections2.asImmutableList(i + 0.1f, i + 10.1f), root.getVector("float-pairs") @@ -441,6 +459,8 @@ public void testFixedSizeList() throws Exception { parent.setValueCount(COUNT); roundTrip( + name, + writeOption, new VectorSchemaRoot(parent), /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -449,8 +469,9 @@ public void testFixedSizeList() throws Exception { } } - @Test - public void testVarBinary() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testVarBinary(String name, IpcOption writeOption) throws Exception { try (final BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); final StructVector parent = StructVector.empty("parent", originalVectorAllocator)) { @@ -459,6 +480,8 @@ public void testVarBinary() throws Exception { validateVarBinary(COUNT, root); roundTrip( + name, + writeOption, root, /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -467,8 +490,9 @@ public void testVarBinary() throws Exception { } } - @Test - public void testReadWriteMultipleBatches() throws IOException { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testReadWriteMultipleBatches(String name, IpcOption writeOption) throws IOException { File file = new File("target/mytest_nulls_multibatch.arrow"); int numBlocksWritten = 0; @@ -491,12 +515,15 @@ public void testReadWriteMultipleBatches() throws IOException { } } - @Test - public void testMap() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testMap(String name, IpcOption writeOption) throws Exception { try (final BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); final VectorSchemaRoot root = writeMapData(originalVectorAllocator)) { roundTrip( + name, + writeOption, root, /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -505,12 +532,15 @@ public void testMap() throws Exception { } } - @Test - public void testListAsMap() throws Exception { + @ParameterizedTest(name = "options = {0}") + @MethodSource("getWriteOption") + public void testListAsMap(String name, IpcOption writeOption) throws Exception { try (final BufferAllocator originalVectorAllocator = allocator.newChildAllocator("original vectors", 0, allocator.getLimit()); final VectorSchemaRoot root = writeListAsMapData(originalVectorAllocator)) { roundTrip( + name, + writeOption, root, /* dictionaryProvider */null, TestRoundTrip::writeSingleBatch, @@ -539,10 +569,10 @@ private CheckedConsumer validateFileBatches( assertEquals(counts.length, recordBatches.size()); long previousOffset = 0; for (ArrowBlock rbBlock : recordBatches) { - assertTrue(rbBlock.getOffset() + " > " + previousOffset, rbBlock.getOffset() > previousOffset); + assertTrue(rbBlock.getOffset() > previousOffset, rbBlock.getOffset() + " > " + previousOffset); previousOffset = rbBlock.getOffset(); arrowReader.loadRecordBatch(rbBlock); - assertEquals("RB #" + i, counts[i], root.getRowCount()); + assertEquals(counts[i], root.getRowCount(), "RB #" + i); validator.accept(counts[i], root); try (final ArrowRecordBatch batch = unloader.getRecordBatch()) { List buffersLayout = batch.getBuffersLayout(); @@ -566,7 +596,7 @@ private CheckedConsumer validateStreamBatches( for (int n = 0; n < counts.length; n++) { assertTrue(arrowReader.loadNextBatch()); - assertEquals("RB #" + i, counts[i], root.getRowCount()); + assertEquals(counts[i], root.getRowCount(), "RB #" + i); validator.accept(counts[i], root); try (final ArrowRecordBatch batch = unloader.getRecordBatch()) { final List buffersLayout = batch.getBuffersLayout(); @@ -590,7 +620,7 @@ interface CheckedBiConsumer { void accept(T t, U u) throws Exception; } - private void roundTrip(VectorSchemaRoot root, DictionaryProvider provider, + private void roundTrip(String name, IpcOption writeOption, VectorSchemaRoot root, DictionaryProvider provider, CheckedBiConsumer writer, CheckedConsumer fileValidator, CheckedConsumer streamValidator) throws Exception { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java index ac95121eb73f2..db1e787d04d27 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestUIntDictionaryRoundTrip.java @@ -18,10 +18,10 @@ package org.apache.arrow.vector.ipc; import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; @@ -29,9 +29,9 @@ import java.nio.channels.Channels; import java.nio.charset.StandardCharsets; import java.util.Arrays; -import java.util.Collection; import java.util.Map; import java.util.function.ToIntBiFunction; +import java.util.stream.Stream; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; @@ -51,41 +51,34 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.junit.runners.Parameterized; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; /** * Test the round-trip of dictionary encoding, * with unsigned integer as indices. */ -@RunWith(Parameterized.class) public class TestUIntDictionaryRoundTrip { - private final boolean streamMode; - - public TestUIntDictionaryRoundTrip(boolean streamMode) { - this.streamMode = streamMode; - } - private BufferAllocator allocator; private DictionaryProvider.MapDictionaryProvider dictionaryProvider; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); dictionaryProvider = new DictionaryProvider.MapDictionaryProvider(); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } - private byte[] writeData(FieldVector encodedVector) throws IOException { + private byte[] writeData(boolean streamMode, FieldVector encodedVector) throws IOException { ByteArrayOutputStream out = new ByteArrayOutputStream(); VectorSchemaRoot root = new VectorSchemaRoot( @@ -102,6 +95,7 @@ private byte[] writeData(FieldVector encodedVector) throws IOException { } private void readData( + boolean streamMode, byte[] data, Field expectedField, ToIntBiFunction valGetter, @@ -156,8 +150,9 @@ private ValueVector createEncodedVector(int bitWidth, VarCharVector dictionaryVe return field.createVector(allocator); } - @Test - public void testUInt1RoundTrip() throws IOException { + @ParameterizedTest(name = "stream mode = {0}") + @MethodSource("getRepeat") + public void testUInt1RoundTrip(boolean streamMode) throws IOException { final int vectorLength = UInt1Vector.MAX_UINT1 & UInt1Vector.PROMOTION_MASK; try (VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator); UInt1Vector encodedVector1 = (UInt1Vector) createEncodedVector(8, dictionaryVector)) { @@ -170,15 +165,15 @@ public void testUInt1RoundTrip() throws IOException { } encodedVector1.setValueCount(vectorLength); setVector(dictionaryVector, dictionaryItems); - byte[] data = writeData(encodedVector1); - readData( - data, encodedVector1.getField(), (vector, index) -> (int) ((UInt1Vector) vector).getValueAsLong(index), - 8L, indices, dictionaryItems); + byte[] data = writeData(streamMode, encodedVector1); + readData(streamMode, data, encodedVector1.getField(), + (vector, index) -> (int) ((UInt1Vector) vector).getValueAsLong(index), 8L, indices, dictionaryItems); } } - @Test - public void testUInt2RoundTrip() throws IOException { + @ParameterizedTest(name = "stream mode = {0}") + @MethodSource("getRepeat") + public void testUInt2RoundTrip(boolean streamMode) throws IOException { try (VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator); UInt2Vector encodedVector2 = (UInt2Vector) createEncodedVector(16, dictionaryVector)) { int[] indices = new int[]{1, 3, 5, 7, 9, UInt2Vector.MAX_UINT2}; @@ -190,14 +185,15 @@ public void testUInt2RoundTrip() throws IOException { setVector(encodedVector2, (char) 1, (char) 3, (char) 5, (char) 7, (char) 9, UInt2Vector.MAX_UINT2); setVector(dictionaryVector, dictItems); - byte[] data = writeData(encodedVector2); - readData(data, encodedVector2.getField(), (vector, index) -> (int) ((UInt2Vector) vector).getValueAsLong(index), - 16L, indices, dictItems); + byte[] data = writeData(streamMode, encodedVector2); + readData(streamMode, data, encodedVector2.getField(), + (vector, index) -> (int) ((UInt2Vector) vector).getValueAsLong(index), 16L, indices, dictItems); } } - @Test - public void testUInt4RoundTrip() throws IOException { + @ParameterizedTest(name = "stream mode = {0}") + @MethodSource("getRepeat") + public void testUInt4RoundTrip(boolean streamMode) throws IOException { final int dictLength = 10; try (VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator); UInt4Vector encodedVector4 = (UInt4Vector) createEncodedVector(32, dictionaryVector)) { @@ -211,14 +207,15 @@ public void testUInt4RoundTrip() throws IOException { setVector(dictionaryVector, dictItems); setVector(encodedVector4, 1, 3, 5, 7, 9); - byte[] data = writeData(encodedVector4); - readData(data, encodedVector4.getField(), (vector, index) -> (int) ((UInt4Vector) vector).getValueAsLong(index), - 32L, indices, dictItems); + byte[] data = writeData(streamMode, encodedVector4); + readData(streamMode, data, encodedVector4.getField(), + (vector, index) -> (int) ((UInt4Vector) vector).getValueAsLong(index), 32L, indices, dictItems); } } - @Test - public void testUInt8RoundTrip() throws IOException { + @ParameterizedTest(name = "stream mode = {0}") + @MethodSource("getRepeat") + public void testUInt8RoundTrip(boolean streamMode) throws IOException { final int dictLength = 10; try (VarCharVector dictionaryVector = new VarCharVector("dictionary", allocator); UInt8Vector encodedVector8 = (UInt8Vector) createEncodedVector(64, dictionaryVector)) { @@ -231,17 +228,16 @@ public void testUInt8RoundTrip() throws IOException { setVector(encodedVector8, 1L, 3L, 5L, 7L, 9L); setVector(dictionaryVector, dictItems); - byte[] data = writeData(encodedVector8); - readData(data, encodedVector8.getField(), (vector, index) -> (int) ((UInt8Vector) vector).getValueAsLong(index), - 64L, indices, dictItems); + byte[] data = writeData(streamMode, encodedVector8); + readData(streamMode, data, encodedVector8.getField(), + (vector, index) -> (int) ((UInt8Vector) vector).getValueAsLong(index), 64L, indices, dictItems); } } - @Parameterized.Parameters(name = "stream mode = {0}") - public static Collection getRepeat() { - return Arrays.asList( - new Object[]{true}, - new Object[]{false} + static Stream getRepeat() { + return Stream.of( + Arguments.of(true), + Arguments.of(false) ); } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java b/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java index 0505a18484b54..89cbb9f3f1b89 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/ipc/message/TestMessageMetadataResult.java @@ -17,11 +17,11 @@ package org.apache.arrow.vector.ipc.message; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; import java.nio.ByteBuffer; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestMessageMetadataResult { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java index 5cc0d080053af..925f6ca254544 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/pojo/TestConvert.java @@ -19,8 +19,8 @@ import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE; import static org.apache.arrow.vector.types.FloatingPointPrecision.SINGLE; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import java.nio.ByteBuffer; import java.util.ArrayList; @@ -46,7 +46,7 @@ import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; -import org.junit.Test; +import org.junit.jupiter.api.Test; import com.google.flatbuffers.FlatBufferBuilder; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java b/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java index 3c075c9293079..369fcc140a1b1 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/testing/TestValueVectorPopulator.java @@ -17,8 +17,8 @@ package org.apache.arrow.vector.testing; -import static junit.framework.TestCase.assertTrue; import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.StandardCharsets; @@ -56,20 +56,20 @@ import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestValueVectorPopulator { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java index 45e6e630792a9..66dc13d6ef545 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/testing/ValueVectorDataPopulator.java @@ -17,7 +17,7 @@ package org.apache.arrow.vector.testing; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; import java.math.BigDecimal; import java.nio.charset.StandardCharsets; diff --git a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java index 872b2f3934b07..5ebfb62038919 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestExtensionType.java @@ -17,10 +17,12 @@ package org.apache.arrow.vector.types.pojo; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.File; import java.io.IOException; @@ -51,8 +53,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.ExtensionType; import org.apache.arrow.vector.util.VectorBatchAppender; import org.apache.arrow.vector.validate.ValidateVectorVisitor; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestExtensionType { /** @@ -85,21 +86,19 @@ public void roundtripUuid() throws IOException { final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { reader.loadNextBatch(); final VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot(); - Assert.assertEquals(root.getSchema(), readerRoot.getSchema()); + assertEquals(root.getSchema(), readerRoot.getSchema()); final Field field = readerRoot.getSchema().getFields().get(0); final UuidType expectedType = new UuidType(); - Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), - expectedType.extensionName()); - Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), - expectedType.serialize()); + assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), expectedType.extensionName()); + assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), expectedType.serialize()); final ExtensionTypeVector deserialized = (ExtensionTypeVector) readerRoot.getFieldVectors().get(0); - Assert.assertEquals(vector.getValueCount(), deserialized.getValueCount()); + assertEquals(vector.getValueCount(), deserialized.getValueCount()); for (int i = 0; i < vector.getValueCount(); i++) { - Assert.assertEquals(vector.isNull(i), deserialized.isNull(i)); + assertEquals(vector.isNull(i), deserialized.isNull(i)); if (!vector.isNull(i)) { - Assert.assertEquals(vector.getObject(i), deserialized.getObject(i)); + assertEquals(vector.getObject(i), deserialized.getObject(i)); } } } @@ -138,29 +137,27 @@ public void readUnderlyingType() throws IOException { final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { reader.loadNextBatch(); final VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot(); - Assert.assertEquals(1, readerRoot.getSchema().getFields().size()); - Assert.assertEquals("a", readerRoot.getSchema().getFields().get(0).getName()); - Assert.assertTrue(readerRoot.getSchema().getFields().get(0).getType() instanceof ArrowType.FixedSizeBinary); - Assert.assertEquals(16, + assertEquals(1, readerRoot.getSchema().getFields().size()); + assertEquals("a", readerRoot.getSchema().getFields().get(0).getName()); + assertTrue(readerRoot.getSchema().getFields().get(0).getType() instanceof ArrowType.FixedSizeBinary); + assertEquals(16, ((ArrowType.FixedSizeBinary) readerRoot.getSchema().getFields().get(0).getType()).getByteWidth()); final Field field = readerRoot.getSchema().getFields().get(0); final UuidType expectedType = new UuidType(); - Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), - expectedType.extensionName()); - Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), - expectedType.serialize()); + assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), expectedType.extensionName()); + assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), expectedType.serialize()); final FixedSizeBinaryVector deserialized = (FixedSizeBinaryVector) readerRoot.getFieldVectors().get(0); - Assert.assertEquals(vector.getValueCount(), deserialized.getValueCount()); + assertEquals(vector.getValueCount(), deserialized.getValueCount()); for (int i = 0; i < vector.getValueCount(); i++) { - Assert.assertEquals(vector.isNull(i), deserialized.isNull(i)); + assertEquals(vector.isNull(i), deserialized.isNull(i)); if (!vector.isNull(i)) { final UUID uuid = vector.getObject(i); final ByteBuffer bb = ByteBuffer.allocate(16); bb.putLong(uuid.getMostSignificantBits()); bb.putLong(uuid.getLeastSignificantBits()); - Assert.assertArrayEquals(bb.array(), deserialized.get(i)); + assertArrayEquals(bb.array(), deserialized.get(i)); } } } @@ -210,26 +207,24 @@ public void roundtripLocation() throws IOException { final ArrowFileReader reader = new ArrowFileReader(channel, allocator)) { reader.loadNextBatch(); final VectorSchemaRoot readerRoot = reader.getVectorSchemaRoot(); - Assert.assertEquals(root.getSchema(), readerRoot.getSchema()); + assertEquals(root.getSchema(), readerRoot.getSchema()); final Field field = readerRoot.getSchema().getFields().get(0); final LocationType expectedType = new LocationType(); - Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), - expectedType.extensionName()); - Assert.assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), - expectedType.serialize()); + assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_NAME), expectedType.extensionName()); + assertEquals(field.getMetadata().get(ExtensionType.EXTENSION_METADATA_KEY_METADATA), expectedType.serialize()); final ExtensionTypeVector deserialized = (ExtensionTypeVector) readerRoot.getFieldVectors().get(0); - Assert.assertTrue(deserialized instanceof LocationVector); - Assert.assertEquals("location", deserialized.getName()); + assertTrue(deserialized instanceof LocationVector); + assertEquals("location", deserialized.getName()); StructVector deserStruct = (StructVector) deserialized.getUnderlyingVector(); - Assert.assertNotNull(deserStruct.getChild("Latitude")); - Assert.assertNotNull(deserStruct.getChild("Longitude")); - Assert.assertEquals(vector.getValueCount(), deserialized.getValueCount()); + assertNotNull(deserStruct.getChild("Latitude")); + assertNotNull(deserStruct.getChild("Longitude")); + assertEquals(vector.getValueCount(), deserialized.getValueCount()); for (int i = 0; i < vector.getValueCount(); i++) { - Assert.assertEquals(vector.isNull(i), deserialized.isNull(i)); + assertEquals(vector.isNull(i), deserialized.isNull(i)); if (!vector.isNull(i)) { - Assert.assertEquals(vector.getObject(i), deserialized.getObject(i)); + assertEquals(vector.getObject(i), deserialized.getObject(i)); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestField.java b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestField.java index bc984fa642d52..8f98a9e9f8b53 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestField.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestField.java @@ -19,8 +19,8 @@ import static org.apache.arrow.vector.types.pojo.Schema.METADATA_KEY; import static org.apache.arrow.vector.types.pojo.Schema.METADATA_VALUE; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; import java.util.Collections; @@ -28,7 +28,7 @@ import java.util.Map; import org.apache.arrow.vector.types.pojo.ArrowType.Int; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestField { @@ -57,7 +57,7 @@ public void testMetadata() throws IOException { private void jsonContains(String json, String... strings) { for (String string : strings) { - assertTrue(json + " contains " + string, json.contains(string)); + assertTrue(json.contains(string), json + " contains " + string); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java index 7b62247c6e12d..e51e76737dfb7 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/types/pojo/TestSchema.java @@ -20,8 +20,8 @@ import static java.util.Arrays.asList; import static org.apache.arrow.vector.types.pojo.Schema.METADATA_KEY; import static org.apache.arrow.vector.types.pojo.Schema.METADATA_VALUE; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; import java.nio.ByteBuffer; @@ -49,7 +49,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.Timestamp; import org.apache.arrow.vector.types.pojo.ArrowType.Union; import org.apache.arrow.vector.types.pojo.ArrowType.Utf8; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestSchema { @@ -280,13 +280,13 @@ private void validateFieldsHashcode(java.util.List schemaFields, java.uti private void validateHashCode(Object o1, Object o2) { assertEquals(o1, o2); - assertEquals(o1 + " == " + o2, o1.hashCode(), o2.hashCode()); + assertEquals(o1.hashCode(), o2.hashCode(), o1 + " == " + o2); } private void contains(Schema schema, String... s) { String json = schema.toJson(); for (String string : s) { - assertTrue(json + " contains " + string, json.contains(string)); + assertTrue(json.contains(string), json + " contains " + string); } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/DecimalUtilityTest.java b/java/vector/src/test/java/org/apache/arrow/vector/util/DecimalUtilityTest.java index 804092ed94ac7..21906cb89af24 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/DecimalUtilityTest.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/DecimalUtilityTest.java @@ -17,14 +17,15 @@ package org.apache.arrow.vector.util; +import static org.junit.jupiter.api.Assertions.assertEquals; + import java.math.BigDecimal; import java.math.BigInteger; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; -import org.junit.Assert; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class DecimalUtilityTest { private static final BigInteger[] MAX_BIG_INT = new BigInteger[]{BigInteger.valueOf(10).pow(38) @@ -45,7 +46,7 @@ public void testSetLongInDecimalArrowBuf() { DecimalUtility.writeLongToArrowBuf((long) val, buf, 0, byteLengths[x]); BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); BigDecimal expected = BigDecimal.valueOf(val); - Assert.assertEquals(expected, actual); + assertEquals(expected, actual); } } } @@ -64,7 +65,7 @@ public void testSetByteArrayInDecimalArrowBuf() { DecimalUtility.writeByteArrayToArrowBuf(BigInteger.valueOf(val).toByteArray(), buf, 0, byteLengths[x]); BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); BigDecimal expected = BigDecimal.valueOf(val); - Assert.assertEquals(expected, actual); + assertEquals(expected, actual); } long [] longValues = new long[] {Long.MIN_VALUE, 0 , Long.MAX_VALUE}; @@ -73,7 +74,7 @@ public void testSetByteArrayInDecimalArrowBuf() { DecimalUtility.writeByteArrayToArrowBuf(BigInteger.valueOf(val).toByteArray(), buf, 0, byteLengths[x]); BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); BigDecimal expected = BigDecimal.valueOf(val); - Assert.assertEquals(expected, actual); + assertEquals(expected, actual); } BigInteger [] decimals = new BigInteger[] {MAX_BIG_INT[x], new BigInteger("0"), MIN_BIG_INT[x]}; @@ -82,7 +83,7 @@ public void testSetByteArrayInDecimalArrowBuf() { DecimalUtility.writeByteArrayToArrowBuf(val.toByteArray(), buf, 0, byteLengths[x]); BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); BigDecimal expected = new BigDecimal(val); - Assert.assertEquals(expected, actual); + assertEquals(expected, actual); } } } @@ -101,7 +102,7 @@ public void testSetBigDecimalInDecimalArrowBuf() { DecimalUtility.writeBigDecimalToArrowBuf(BigDecimal.valueOf(val), buf, 0, byteLengths[x]); BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); BigDecimal expected = BigDecimal.valueOf(val); - Assert.assertEquals(expected, actual); + assertEquals(expected, actual); } long [] longValues = new long[] {Long.MIN_VALUE, 0 , Long.MAX_VALUE}; @@ -110,7 +111,7 @@ public void testSetBigDecimalInDecimalArrowBuf() { DecimalUtility.writeBigDecimalToArrowBuf(BigDecimal.valueOf(val), buf, 0, byteLengths[x]); BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); BigDecimal expected = BigDecimal.valueOf(val); - Assert.assertEquals(expected, actual); + assertEquals(expected, actual); } BigInteger [] decimals = new BigInteger[] {MAX_BIG_INT[x], new BigInteger("0"), MIN_BIG_INT[x]}; @@ -119,7 +120,7 @@ public void testSetBigDecimalInDecimalArrowBuf() { DecimalUtility.writeBigDecimalToArrowBuf(new BigDecimal(val), buf, 0, byteLengths[x]); BigDecimal actual = DecimalUtility.getBigDecimalFromArrowBuf(buf, 0, 0, byteLengths[x]); BigDecimal expected = new BigDecimal(val); - Assert.assertEquals(expected, actual); + assertEquals(expected, actual); } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestDataSizeRoundingUtil.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestDataSizeRoundingUtil.java index 4138ea9d7a181..636de9aab1f2b 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestDataSizeRoundingUtil.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestDataSizeRoundingUtil.java @@ -17,9 +17,9 @@ package org.apache.arrow.vector.util; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; -import org.junit.Test; +import org.junit.jupiter.api.Test; /** * Test cases for {@link DataSizeRoundingUtil}. diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java index 1c8281c85981b..fb954413e9f29 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestElementAddressableVectorIterator.java @@ -17,8 +17,8 @@ package org.apache.arrow.vector.util; -import static junit.framework.TestCase.assertNull; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; import java.nio.charset.StandardCharsets; @@ -27,9 +27,9 @@ import org.apache.arrow.memory.util.ArrowBufPointer; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.VarCharVector; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; /** * Test cases for {@link ElementAddressableVectorIterator}. @@ -40,12 +40,12 @@ public class TestElementAddressableVectorIterator { private BufferAllocator allocator; - @Before + @BeforeEach public void prepare() { allocator = new RootAllocator(1024 * 1024); } - @After + @AfterEach public void shutdown() { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestMapWithOrdinal.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestMapWithOrdinal.java index edd5221faf268..e0c9031c49b94 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestMapWithOrdinal.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestMapWithOrdinal.java @@ -18,20 +18,20 @@ package org.apache.arrow.vector.util; import static junit.framework.TestCase.assertNull; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.Collection; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestMapWithOrdinal { private MapWithOrdinal map; - @Before + @BeforeEach public void setUp() { map = new MapWithOrdinalImpl<>(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestMultiMapWithOrdinal.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestMultiMapWithOrdinal.java index ea829060d1c04..0c03f3dfeac46 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestMultiMapWithOrdinal.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestMultiMapWithOrdinal.java @@ -17,8 +17,11 @@ package org.apache.arrow.vector.util; -import org.junit.Assert; -import org.junit.Test; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import org.junit.jupiter.api.Test; public class TestMultiMapWithOrdinal { @@ -27,33 +30,33 @@ public void test() { MultiMapWithOrdinal map = new MultiMapWithOrdinal<>(); map.put("x", "1", false); - Assert.assertEquals(1, map.size()); + assertEquals(1, map.size()); map.remove("x", "1"); - Assert.assertTrue(map.isEmpty()); + assertTrue(map.isEmpty()); map.put("x", "1", false); map.put("x", "2", false); map.put("y", "0", false); - Assert.assertEquals(3, map.size()); - Assert.assertEquals(2, map.getAll("x").size()); - Assert.assertEquals("1", map.getAll("x").stream().findFirst().get()); - Assert.assertEquals("1", map.getByOrdinal(0)); - Assert.assertEquals("2", map.getByOrdinal(1)); - Assert.assertEquals("0", map.getByOrdinal(2)); - Assert.assertTrue(map.remove("x", "1")); - Assert.assertFalse(map.remove("x", "1")); - Assert.assertEquals("0", map.getByOrdinal(0)); - Assert.assertEquals(2, map.size()); + assertEquals(3, map.size()); + assertEquals(2, map.getAll("x").size()); + assertEquals("1", map.getAll("x").stream().findFirst().get()); + assertEquals("1", map.getByOrdinal(0)); + assertEquals("2", map.getByOrdinal(1)); + assertEquals("0", map.getByOrdinal(2)); + assertTrue(map.remove("x", "1")); + assertFalse(map.remove("x", "1")); + assertEquals("0", map.getByOrdinal(0)); + assertEquals(2, map.size()); map.put("x", "3", true); - Assert.assertEquals(1, map.getAll("x").size()); - Assert.assertEquals("3", map.getAll("x").stream().findFirst().get()); + assertEquals(1, map.getAll("x").size()); + assertEquals("3", map.getAll("x").stream().findFirst().get()); map.put("z", "4", false); - Assert.assertEquals(3, map.size()); + assertEquals(3, map.size()); map.put("z", "5", false); map.put("z", "6", false); - Assert.assertEquals(5, map.size()); + assertEquals(5, map.size()); map.removeAll("z"); - Assert.assertEquals(2, map.size()); - Assert.assertFalse(map.containsKey("z")); + assertEquals(2, map.size()); + assertFalse(map.containsKey("z")); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestReusableByteArray.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestReusableByteArray.java index f562e63b4bf8d..80420608c3912 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestReusableByteArray.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestReusableByteArray.java @@ -17,11 +17,11 @@ package org.apache.arrow.vector.util; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotSame; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotSame; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.StandardCharsets; import java.util.Arrays; @@ -31,21 +31,21 @@ import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BaseValueVector; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestReusableByteArray { private BufferAllocator allocator; - @Before + @BeforeEach public void prepare() { // Permit allocating 4 vectors of max size. allocator = new RootAllocator(4 * BaseValueVector.MAX_ALLOCATION_SIZE); } - @After + @AfterEach public void shutdown() { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestSchemaUtil.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestSchemaUtil.java index 52b6584086832..4375ca6e690b7 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestSchemaUtil.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestSchemaUtil.java @@ -18,7 +18,7 @@ package org.apache.arrow.vector.util; import static java.util.Arrays.asList; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; import java.io.IOException; @@ -28,7 +28,7 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.SchemaUtility; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestSchemaUtil { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestValidator.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestValidator.java index 2db70ca5d5b8d..0f72ada76f933 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestValidator.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestValidator.java @@ -18,10 +18,10 @@ package org.apache.arrow.vector.util; import static org.apache.arrow.vector.util.Validator.equalEnough; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; -import org.junit.Test; +import org.junit.jupiter.api.Test; public class TestValidator { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java index 93e7535947536..45563a69ba9e6 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorAppender.java @@ -17,9 +17,9 @@ package org.apache.arrow.vector.util; -import static junit.framework.TestCase.assertEquals; -import static junit.framework.TestCase.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.StandardCharsets; import java.util.Arrays; @@ -53,9 +53,9 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; /** * Test cases for {@link VectorAppender}. @@ -64,13 +64,13 @@ public class TestVectorAppender { private BufferAllocator allocator; - @Before + @BeforeEach public void prepare() { // Permit allocating 4 vectors of max size. allocator = new RootAllocator(4 * BaseValueVector.MAX_ALLOCATION_SIZE); } - @After + @AfterEach public void shutdown() { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java index 799c25c0ad71c..193736e70cadf 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorBatchAppender.java @@ -17,15 +17,15 @@ package org.apache.arrow.vector.util; -import static junit.framework.TestCase.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; /** * Test cases for {@link VectorBatchAppender}. @@ -34,12 +34,12 @@ public class TestVectorBatchAppender { private BufferAllocator allocator; - @Before + @BeforeEach public void prepare() { allocator = new RootAllocator(1024 * 1024); } - @After + @AfterEach public void shutdown() { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorSchemaRootAppender.java b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorSchemaRootAppender.java index 6309d385870c9..82a4589c3ba64 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorSchemaRootAppender.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/util/TestVectorSchemaRootAppender.java @@ -17,8 +17,8 @@ package org.apache.arrow.vector.util; -import static junit.framework.TestCase.assertEquals; import static org.apache.arrow.vector.util.TestVectorAppender.assertVectorsEqual; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import org.apache.arrow.memory.BufferAllocator; @@ -28,9 +28,9 @@ import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; /** * Test cases for {@link VectorSchemaRootAppender}. @@ -39,12 +39,12 @@ public class TestVectorSchemaRootAppender { private BufferAllocator allocator; - @Before + @BeforeEach public void prepare() { allocator = new RootAllocator(1024 * 1024); } - @After + @AfterEach public void shutdown() { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java index 20492036dab99..837b865c30b26 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVector.java @@ -19,8 +19,8 @@ import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; import static org.apache.arrow.vector.util.ValueVectorUtility.validate; -import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.Charset; import java.util.Arrays; @@ -44,15 +44,15 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestValidateVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } @@ -62,7 +62,7 @@ public void init() { private static final byte[] STR2 = "BBBBBBBBB2".getBytes(utf8Charset); private static final byte[] STR3 = "CCCC3".getBytes(utf8Charset); - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorFull.java b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorFull.java index ca71a622bb8ea..fcf031fc33824 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorFull.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorFull.java @@ -19,9 +19,9 @@ import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; import static org.apache.arrow.vector.util.ValueVectorUtility.validateFull; -import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.StandardCharsets; import java.util.Arrays; @@ -48,20 +48,20 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestValidateVectorFull { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorSchemaRoot.java b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorSchemaRoot.java index 1885fb21f17b6..bdb9ad3e8e530 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorSchemaRoot.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorSchemaRoot.java @@ -19,8 +19,8 @@ import static org.apache.arrow.vector.util.ValueVectorUtility.validate; import static org.apache.arrow.vector.util.ValueVectorUtility.validateFull; -import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; @@ -29,20 +29,20 @@ import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestValidateVectorSchemaRoot { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorTypeVisitor.java b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorTypeVisitor.java index 0ddd790d6ffab..42297e1d37fe0 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorTypeVisitor.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/validate/TestValidateVectorTypeVisitor.java @@ -70,9 +70,9 @@ import org.apache.arrow.vector.types.Types; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; /** * Test cases for {@link ValidateVectorTypeVisitor}. @@ -83,12 +83,12 @@ public class TestValidateVectorTypeVisitor { private ValidateVectorTypeVisitor visitor = new ValidateVectorTypeVisitor(); - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } From 3999384c3e05ef8ef804ab651e1bebee8bf7670c Mon Sep 17 00:00:00 2001 From: Thomas A Caswell Date: Sat, 8 Jun 2024 16:08:01 -0400 Subject: [PATCH 253/261] MINOR: [Python] spell "language" correctly in trove classifier (#42031) ### Rationale for this change Newer (possibly unreleased) version of the Python build tools check that the classifiers are valid and the build failed due to this typo. ### What changes are included in this PR? Fix the spelling of a word ### Are these changes tested? Build will fail without these changes, has no run-time effect. ### Are there any user-facing changes? no Authored-by: Thomas A Caswell Signed-off-by: Sutou Kouhei --- python/pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index f72c3a91eb436..86a90906d02f9 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -47,7 +47,7 @@ classifiers = [ 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', - 'Programming Langauge :: Python :: 3.12', + 'Programming Language :: Python :: 3.12', ] maintainers = [ {name = "Apache Arrow Developers", email = "dev@arrow.apache.org"} From 601be7687ba89f711b876397746b5f49503c0871 Mon Sep 17 00:00:00 2001 From: abandy Date: Sat, 8 Jun 2024 17:25:28 -0400 Subject: [PATCH 254/261] GH-42020: [Swift] Add Arrow decoding implementation for Swift Codable (#42023) ### Rationale for this change This change implements decode for the Arrow Swift Codable implementation. This allows the data in a RecordBatch to be copied to properties in a struct/class. The PR is a bit longer than desired but all three container types are required in order to implement the Decoder protocol. ### What changes are included in this PR? The ArrowDecoder class is included in this PR along with a class for each container type (keyed, unkeyed, and single). Most of the logic is encapsulated in the ArrowDecoder with minimal logic in each container class (Most of the methods in the container classes are a single line that calls the ArrowDecoder doDecode methods) ### Are these changes tested? Yes, a test has been added to test the three types of containers provided by the decoder. * GitHub Issue: #42020 Authored-by: Alva Bandy Signed-off-by: Sutou Kouhei --- swift/Arrow/Sources/Arrow/ArrowDecoder.swift | 347 ++++++++++++++++++ .../Arrow/Tests/ArrowTests/CodableTests.swift | 170 +++++++++ 2 files changed, 517 insertions(+) create mode 100644 swift/Arrow/Sources/Arrow/ArrowDecoder.swift create mode 100644 swift/Arrow/Tests/ArrowTests/CodableTests.swift diff --git a/swift/Arrow/Sources/Arrow/ArrowDecoder.swift b/swift/Arrow/Sources/Arrow/ArrowDecoder.swift new file mode 100644 index 0000000000000..7e0c69b1e79e8 --- /dev/null +++ b/swift/Arrow/Sources/Arrow/ArrowDecoder.swift @@ -0,0 +1,347 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import Foundation + +public class ArrowDecoder: Decoder { + var rbIndex: UInt = 0 + public var codingPath: [CodingKey] = [] + public var userInfo: [CodingUserInfoKey: Any] = [:] + public let rb: RecordBatch + public let nameToCol: [String: ArrowArrayHolder] + public let columns: [ArrowArrayHolder] + public init(_ decoder: ArrowDecoder) { + self.userInfo = decoder.userInfo + self.codingPath = decoder.codingPath + self.rb = decoder.rb + self.columns = decoder.columns + self.nameToCol = decoder.nameToCol + self.rbIndex = decoder.rbIndex + } + + public init(_ rb: RecordBatch) { + self.rb = rb + var colMapping = [String: ArrowArrayHolder]() + var columns = [ArrowArrayHolder]() + for index in 0..(_ type: T.Type) throws -> [T] { + var output = [T]() + for index in 0..(keyedBy type: Key.Type + ) -> KeyedDecodingContainer where Key: CodingKey { + let container = ArrowKeyedDecoding(self, codingPath: codingPath) + return KeyedDecodingContainer(container) + } + + public func unkeyedContainer() -> UnkeyedDecodingContainer { + return ArrowUnkeyedDecoding(self, codingPath: codingPath) + } + + public func singleValueContainer() -> SingleValueDecodingContainer { + return ArrowSingleValueDecoding(self, codingPath: codingPath) + } + + func getCol(_ name: String) throws -> AnyArray { + guard let col = self.nameToCol[name] else { + throw ArrowError.invalid("Column for key \"\(name)\" not found") + } + + guard let anyArray = col.array as? AnyArray else { + throw ArrowError.invalid("Unable to convert array to AnyArray") + } + + return anyArray + } + + func getCol(_ index: Int) throws -> AnyArray { + if index >= self.columns.count { + throw ArrowError.outOfBounds(index: Int64(index)) + } + + guard let anyArray = self.columns[index].array as? AnyArray else { + throw ArrowError.invalid("Unable to convert array to AnyArray") + } + + return anyArray + } + + func doDecode(_ key: CodingKey) throws -> T? { + let array: AnyArray = try self.getCol(key.stringValue) + return array.asAny(self.rbIndex) as? T + } + + func doDecode(_ col: Int) throws -> T? { + let array: AnyArray = try self.getCol(col) + return array.asAny(self.rbIndex) as? T + } +} + +private struct ArrowUnkeyedDecoding: UnkeyedDecodingContainer { + var codingPath: [CodingKey] + var count: Int? = 0 + var isAtEnd: Bool = false + var currentIndex: Int = 0 + let decoder: ArrowDecoder + + init(_ decoder: ArrowDecoder, codingPath: [CodingKey]) { + self.decoder = decoder + self.codingPath = codingPath + self.count = self.decoder.columns.count + } + + mutating func increment() { + self.currentIndex += 1 + self.isAtEnd = self.currentIndex >= self.count! + } + + mutating func decodeNil() throws -> Bool { + defer {increment()} + return try self.decoder.doDecode(self.currentIndex) == nil + } + + mutating func decode(_ type: T.Type) throws -> T where T: Decodable { + if type == Int8.self || type == Int16.self || + type == Int32.self || type == Int64.self || + type == UInt8.self || type == UInt16.self || + type == UInt32.self || type == UInt64.self || + type == String.self || type == Double.self || + type == Float.self || type == Date.self { + defer {increment()} + return try self.decoder.doDecode(self.currentIndex)! + } else { + throw ArrowError.invalid("Type \(type) is currently not supported") + } + } + + func nestedContainer( + keyedBy type: NestedKey.Type + ) throws -> KeyedDecodingContainer where NestedKey: CodingKey { + throw ArrowError.invalid("Nested decoding is currently not supported.") + } + + func nestedUnkeyedContainer() throws -> UnkeyedDecodingContainer { + throw ArrowError.invalid("Nested decoding is currently not supported.") + } + + func superDecoder() throws -> Decoder { + throw ArrowError.invalid("super decoding is currently not supported.") + } +} + +private struct ArrowKeyedDecoding: KeyedDecodingContainerProtocol { + var codingPath = [CodingKey]() + var allKeys = [Key]() + let decoder: ArrowDecoder + + init(_ decoder: ArrowDecoder, codingPath: [CodingKey]) { + self.decoder = decoder + self.codingPath = codingPath + } + + func contains(_ key: Key) -> Bool { + return self.decoder.nameToCol.keys.contains(key.stringValue) + } + + func decodeNil(forKey key: Key) throws -> Bool { + return try self.decoder.doDecode(key) == nil + } + + func decode(_ type: Bool.Type, forKey key: Key) throws -> Bool { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: String.Type, forKey key: Key) throws -> String { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: Double.Type, forKey key: Key) throws -> Double { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: Float.Type, forKey key: Key) throws -> Float { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: Int.Type, forKey key: Key) throws -> Int { + throw ArrowError.invalid( + "Int type is not supported (please use Int8, Int16, Int32 or Int64)") + } + + func decode(_ type: Int8.Type, forKey key: Key) throws -> Int8 { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: Int16.Type, forKey key: Key) throws -> Int16 { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: Int32.Type, forKey key: Key) throws -> Int32 { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: Int64.Type, forKey key: Key) throws -> Int64 { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: UInt.Type, forKey key: Key) throws -> UInt { + throw ArrowError.invalid( + "UInt type is not supported (please use UInt8, UInt16, UInt32 or UInt64)") + } + + func decode(_ type: UInt8.Type, forKey key: Key) throws -> UInt8 { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: UInt16.Type, forKey key: Key) throws -> UInt16 { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: UInt32.Type, forKey key: Key) throws -> UInt32 { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: UInt64.Type, forKey key: Key) throws -> UInt64 { + return try self.decoder.doDecode(key)! + } + + func decode(_ type: T.Type, forKey key: Key) throws -> T where T: Decodable { + if type == Date.self { + return try self.decoder.doDecode(key)! + } else { + throw ArrowError.invalid("Type \(type) is currently not supported") + } + } + + func nestedContainer( + keyedBy type: NestedKey.Type, + forKey key: Key + ) throws -> KeyedDecodingContainer where NestedKey: CodingKey { + throw ArrowError.invalid("Nested decoding is currently not supported.") + } + + func nestedUnkeyedContainer(forKey key: Key) throws -> UnkeyedDecodingContainer { + throw ArrowError.invalid("Nested decoding is currently not supported.") + } + + func superDecoder() throws -> Decoder { + throw ArrowError.invalid("super decoding is currently not supported.") + } + + func superDecoder(forKey key: Key) throws -> Decoder { + throw ArrowError.invalid("super decoding is currently not supported.") + } +} + +private struct ArrowSingleValueDecoding: SingleValueDecodingContainer { + var codingPath = [CodingKey]() + let decoder: ArrowDecoder + + init(_ decoder: ArrowDecoder, codingPath: [CodingKey]) { + self.decoder = decoder + self.codingPath = codingPath + } + + func decodeNil() -> Bool { + do { + return try self.decoder.doDecode(0) == nil + } catch { + return false + } + } + + func decode(_ type: Bool.Type) throws -> Bool { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: String.Type) throws -> String { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: Double.Type) throws -> Double { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: Float.Type) throws -> Float { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: Int.Type) throws -> Int { + throw ArrowError.invalid( + "Int type is not supported (please use Int8, Int16, Int32 or Int64)") + } + + func decode(_ type: Int8.Type) throws -> Int8 { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: Int16.Type) throws -> Int16 { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: Int32.Type) throws -> Int32 { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: Int64.Type) throws -> Int64 { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: UInt.Type) throws -> UInt { + throw ArrowError.invalid( + "UInt type is not supported (please use UInt8, UInt16, UInt32 or UInt64)") + } + + func decode(_ type: UInt8.Type) throws -> UInt8 { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: UInt16.Type) throws -> UInt16 { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: UInt32.Type) throws -> UInt32 { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: UInt64.Type) throws -> UInt64 { + return try self.decoder.doDecode(0)! + } + + func decode(_ type: T.Type) throws -> T where T: Decodable { + if type == Date.self { + return try self.decoder.doDecode(0)! + } else { + throw ArrowError.invalid("Type \(type) is currently not supported") + } + } +} diff --git a/swift/Arrow/Tests/ArrowTests/CodableTests.swift b/swift/Arrow/Tests/ArrowTests/CodableTests.swift new file mode 100644 index 0000000000000..e7359467ae1c5 --- /dev/null +++ b/swift/Arrow/Tests/ArrowTests/CodableTests.swift @@ -0,0 +1,170 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import XCTest +@testable import Arrow + +final class CodableTests: XCTestCase { + public class TestClass: Codable { + public var propBool: Bool + public var propInt8: Int8 + public var propInt16: Int16 + public var propInt32: Int32 + public var propInt64: Int64 + public var propUInt8: UInt8 + public var propUInt16: UInt16 + public var propUInt32: UInt32 + public var propUInt64: UInt64 + public var propFloat: Float + public var propDouble: Double + public var propString: String + public var propDate: Date + + public required init() { + self.propBool = false + self.propInt8 = 1 + self.propInt16 = 2 + self.propInt32 = 3 + self.propInt64 = 4 + self.propUInt8 = 5 + self.propUInt16 = 6 + self.propUInt32 = 7 + self.propUInt64 = 8 + self.propFloat = 9 + self.propDouble = 10 + self.propString = "11" + self.propDate = Date.now + } + } + + func testArrowKeyedDecoder() throws { // swiftlint:disable:this function_body_length + let date1 = Date(timeIntervalSinceReferenceDate: 86400 * 5000 + 352) + + let boolBuilder = try ArrowArrayBuilders.loadBoolArrayBuilder() + let int8Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let int16Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let int32Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let int64Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let uint8Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let uint16Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let uint32Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let uint64Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let floatBuilder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let doubleBuilder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder() + let dateBuilder = try ArrowArrayBuilders.loadDate64ArrayBuilder() + + boolBuilder.append(false, true, false) + int8Builder.append(10, 11, 12) + int16Builder.append(20, 21, 22) + int32Builder.append(30, 31, 32) + int64Builder.append(40, 41, 42) + uint8Builder.append(50, 51, 52) + uint16Builder.append(60, 61, 62) + uint32Builder.append(70, 71, 72) + uint64Builder.append(80, 81, 82) + floatBuilder.append(90.1, 91.1, 92.1) + doubleBuilder.append(100.1, 101.1, 102.1) + stringBuilder.append("test0", "test1", "test2") + dateBuilder.append(date1, date1, date1) + let result = RecordBatch.Builder() + .addColumn("propBool", arrowArray: try boolBuilder.toHolder()) + .addColumn("propInt8", arrowArray: try int8Builder.toHolder()) + .addColumn("propInt16", arrowArray: try int16Builder.toHolder()) + .addColumn("propInt32", arrowArray: try int32Builder.toHolder()) + .addColumn("propInt64", arrowArray: try int64Builder.toHolder()) + .addColumn("propUInt8", arrowArray: try uint8Builder.toHolder()) + .addColumn("propUInt16", arrowArray: try uint16Builder.toHolder()) + .addColumn("propUInt32", arrowArray: try uint32Builder.toHolder()) + .addColumn("propUInt64", arrowArray: try uint64Builder.toHolder()) + .addColumn("propFloat", arrowArray: try floatBuilder.toHolder()) + .addColumn("propDouble", arrowArray: try doubleBuilder.toHolder()) + .addColumn("propString", arrowArray: try stringBuilder.toHolder()) + .addColumn("propDate", arrowArray: try dateBuilder.toHolder()) + .finish() + switch result { + case .success(let rb): + let decoder = ArrowDecoder(rb) + var testClasses = try decoder.decode(TestClass.self) + for index in 0.. = try ArrowArrayBuilders.loadNumberArrayBuilder() + int8Builder.append(10, 11, 12, nil) + let result = RecordBatch.Builder() + .addColumn("propInt8", arrowArray: try int8Builder.toHolder()) + .finish() + switch result { + case .success(let rb): + let decoder = ArrowDecoder(rb) + let testData = try decoder.decode(Int8?.self) + for index in 0.. = try ArrowArrayBuilders.loadNumberArrayBuilder() + let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder() + int8Builder.append(10, 11, 12) + stringBuilder.append("test0", "test1", "test2") + let result = RecordBatch.Builder() + .addColumn("propInt8", arrowArray: try int8Builder.toHolder()) + .addColumn("propString", arrowArray: try stringBuilder.toHolder()) + .finish() + switch result { + case .success(let rb): + let decoder = ArrowDecoder(rb) + let testData = try decoder.decode([Int8: String].self) + var index: Int8 = 0 + for data in testData { + let str = data[10 + index] + XCTAssertEqual(str, "test\(index)") + index += 1 + } + case .failure(let err): + throw err + } + } + +} From 399408cb273c47f490f65cdad95bc184a652826c Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Sun, 9 Jun 2024 14:50:25 +0900 Subject: [PATCH 255/261] GH-42039: [Docs][Go] Fix broken link (#42040) ### Rationale for this change Fix the broken link to the correct link due to a change in the path. ### What changes are included in this PR? Updating link from the incorrect `go/` path to change in the path. - old link: https://arrow.apache.org/adbc/main/driver/go/flight_sql.html#client-options - new link: https://arrow.apache.org/adbc/main/driver/flight_sql.html#client-options ### Are these changes tested? Yes. I have checked the link. ### Are there any user-facing changes? Yes, the updated link will be visible to users. * GitHub Issue: #42039 Authored-by: Hyunseok Seo Signed-off-by: Sutou Kouhei --- go/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/README.md b/go/README.md index 4f97c49e1c7e8..220b0a230a615 100644 --- a/go/README.md +++ b/go/README.md @@ -48,7 +48,7 @@ func main() { DSN option keys are expressed as `k=v`, delimited with `;`. Some options keys are defined in ADBC, others are defined in the FlightSQL ADBC driver. -- Arrow ADBC [developer doc](https://arrow.apache.org/adbc/main/driver/go/flight_sql.html#client-options) +- Arrow ADBC [developer doc](https://arrow.apache.org/adbc/main/driver/flight_sql.html#client-options) - ADBC [source code](https://github.com/apache/arrow-adbc/blob/3d12fad1bae21029a8ff25604d6e65760c3f65bd/go/adbc/adbc.go#L149-L158) - FlightSQL driver option keys [source code](https://github.com/apache/arrow-adbc/blob/3d12fad1bae21029a8ff25604d6e65760c3f65bd/go/adbc/driver/flightsql/flightsql_adbc.go#L70-L81) From 7aaea3d9bb65ad37a17a9d3a52341f0fe2478903 Mon Sep 17 00:00:00 2001 From: abandy Date: Sun, 9 Jun 2024 19:55:16 -0400 Subject: [PATCH 256/261] GH-42041: [Swift] Fix nullable type decoder issue (#42043) ### Rationale for this change There is an issue when decoding nullable types. The previous method of checking for nil values always returned false for nullable types due too the ArrowArray types being non nullable. ### What changes are included in this PR? This PR adds a IsNull method to the ArrowDecoder to be used for null checks. Also, a check for nullable types has been added to the Unkeyed decode method. ### Are these changes tested? Yes, tests have been added/modified to test this fix. * GitHub Issue: #42041 Authored-by: Alva Bandy Signed-off-by: Sutou Kouhei --- swift/Arrow/Sources/Arrow/ArrowDecoder.swift | 31 ++++++-- .../Arrow/Tests/ArrowTests/CodableTests.swift | 73 ++++++++++++++++--- 2 files changed, 88 insertions(+), 16 deletions(-) diff --git a/swift/Arrow/Sources/Arrow/ArrowDecoder.swift b/swift/Arrow/Sources/Arrow/ArrowDecoder.swift index 7e0c69b1e79e8..9aa8a65137d28 100644 --- a/swift/Arrow/Sources/Arrow/ArrowDecoder.swift +++ b/swift/Arrow/Sources/Arrow/ArrowDecoder.swift @@ -104,6 +104,16 @@ public class ArrowDecoder: Decoder { let array: AnyArray = try self.getCol(col) return array.asAny(self.rbIndex) as? T } + + func isNull(_ key: CodingKey) throws -> Bool { + let array: AnyArray = try self.getCol(key.stringValue) + return array.asAny(self.rbIndex) == nil + } + + func isNull(_ col: Int) throws -> Bool { + let array: AnyArray = try self.getCol(col) + return array.asAny(self.rbIndex) == nil + } } private struct ArrowUnkeyedDecoding: UnkeyedDecodingContainer { @@ -126,11 +136,17 @@ private struct ArrowUnkeyedDecoding: UnkeyedDecodingContainer { mutating func decodeNil() throws -> Bool { defer {increment()} - return try self.decoder.doDecode(self.currentIndex) == nil + return try self.decoder.isNull(self.currentIndex) } mutating func decode(_ type: T.Type) throws -> T where T: Decodable { - if type == Int8.self || type == Int16.self || + if type == Int8?.self || type == Int16?.self || + type == Int32?.self || type == Int64?.self || + type == UInt8?.self || type == UInt16?.self || + type == UInt32?.self || type == UInt64?.self || + type == String?.self || type == Double?.self || + type == Float?.self || type == Date?.self || + type == Int8.self || type == Int16.self || type == Int32.self || type == Int64.self || type == UInt8.self || type == UInt16.self || type == UInt32.self || type == UInt64.self || @@ -173,7 +189,7 @@ private struct ArrowKeyedDecoding: KeyedDecodingContainerProtoco } func decodeNil(forKey key: Key) throws -> Bool { - return try self.decoder.doDecode(key) == nil + try self.decoder.isNull(key) } func decode(_ type: Bool.Type, forKey key: Key) throws -> Bool { @@ -273,7 +289,7 @@ private struct ArrowSingleValueDecoding: SingleValueDecodingContainer { func decodeNil() -> Bool { do { - return try self.decoder.doDecode(0) == nil + return try self.decoder.isNull(0) } catch { return false } @@ -338,7 +354,12 @@ private struct ArrowSingleValueDecoding: SingleValueDecodingContainer { } func decode(_ type: T.Type) throws -> T where T: Decodable { - if type == Date.self { + if type == Int8.self || type == Int16.self || + type == Int32.self || type == Int64.self || + type == UInt8.self || type == UInt16.self || + type == UInt32.self || type == UInt64.self || + type == String.self || type == Double.self || + type == Float.self || type == Date.self { return try self.decoder.doDecode(0)! } else { throw ArrowError.invalid("Type \(type) is currently not supported") diff --git a/swift/Arrow/Tests/ArrowTests/CodableTests.swift b/swift/Arrow/Tests/ArrowTests/CodableTests.swift index e7359467ae1c5..d7d3414cf6250 100644 --- a/swift/Arrow/Tests/ArrowTests/CodableTests.swift +++ b/swift/Arrow/Tests/ArrowTests/CodableTests.swift @@ -30,7 +30,7 @@ final class CodableTests: XCTestCase { public var propUInt32: UInt32 public var propUInt64: UInt64 public var propFloat: Float - public var propDouble: Double + public var propDouble: Double? public var propString: String public var propDate: Date @@ -53,7 +53,6 @@ final class CodableTests: XCTestCase { func testArrowKeyedDecoder() throws { // swiftlint:disable:this function_body_length let date1 = Date(timeIntervalSinceReferenceDate: 86400 * 5000 + 352) - let boolBuilder = try ArrowArrayBuilders.loadBoolArrayBuilder() let int8Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() let int16Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() @@ -78,7 +77,7 @@ final class CodableTests: XCTestCase { uint32Builder.append(70, 71, 72) uint64Builder.append(80, 81, 82) floatBuilder.append(90.1, 91.1, 92.1) - doubleBuilder.append(100.1, 101.1, 102.1) + doubleBuilder.append(101.1, nil, nil) stringBuilder.append("test0", "test1", "test2") dateBuilder.append(date1, date1, date1) let result = RecordBatch.Builder() @@ -102,7 +101,6 @@ final class CodableTests: XCTestCase { var testClasses = try decoder.decode(TestClass.self) for index in 0.. = try ArrowArrayBuilders.loadNumberArrayBuilder() - int8Builder.append(10, 11, 12, nil) + int8Builder.append(10, 11, 12) let result = RecordBatch.Builder() .addColumn("propInt8", arrowArray: try int8Builder.toHolder()) .finish() @@ -134,7 +136,28 @@ final class CodableTests: XCTestCase { let testData = try decoder.decode(Int8?.self) for index in 0.. = try ArrowArrayBuilders.loadNumberArrayBuilder() + int8WNilBuilder.append(10, nil, 12, nil) + let resultWNil = RecordBatch.Builder() + .addColumn("propInt8", arrowArray: try int8WNilBuilder.toHolder()) + .finish() + switch resultWNil { + case .success(let rb): + let decoder = ArrowDecoder(rb) + let testData = try decoder.decode(Int8?.self) + for index in 0.. = try ArrowArrayBuilders.loadNumberArrayBuilder() let stringBuilder = try ArrowArrayBuilders.loadStringArrayBuilder() - int8Builder.append(10, 11, 12) - stringBuilder.append("test0", "test1", "test2") + int8Builder.append(10, 11, 12, 13) + stringBuilder.append("test0", "test1", "test2", "test3") let result = RecordBatch.Builder() .addColumn("propInt8", arrowArray: try int8Builder.toHolder()) .addColumn("propString", arrowArray: try stringBuilder.toHolder()) @@ -167,4 +190,32 @@ final class CodableTests: XCTestCase { } } + func testArrowUnkeyedDecoderWithNull() throws { + let int8Builder: NumberArrayBuilder = try ArrowArrayBuilders.loadNumberArrayBuilder() + let stringWNilBuilder = try ArrowArrayBuilders.loadStringArrayBuilder() + int8Builder.append(10, 11, 12, 13) + stringWNilBuilder.append(nil, "test1", nil, "test3") + let resultWNil = RecordBatch.Builder() + .addColumn("propInt8", arrowArray: try int8Builder.toHolder()) + .addColumn("propString", arrowArray: try stringWNilBuilder.toHolder()) + .finish() + switch resultWNil { + case .success(let rb): + let decoder = ArrowDecoder(rb) + let testData = try decoder.decode([Int8: String?].self) + var index: Int8 = 0 + for data in testData { + let str = data[10 + index] + if index % 2 == 0 { + XCTAssertNil(str!) + } else { + XCTAssertEqual(str, "test\(index)") + } + index += 1 + } + case .failure(let err): + throw err + } + + } } From 7c15568aa71c1366af5eadb6140fa445f6ce4cd0 Mon Sep 17 00:00:00 2001 From: Hyunseok Seo Date: Mon, 10 Jun 2024 09:48:05 +0900 Subject: [PATCH 257/261] GH-42042: [Java] Update Unit Tests for Compressions Module (#42044) ### Rationale for this change Update package from JUnit 4(`org.junit`) to JUnit 5(`org.junit.jupiter`). ### What changes are included in this PR? - [x] Replacing `org.junit` with `org.junit.jupiter.api`. - [x] Updating `Assertions.assertXXX` to `assertXXX` using static imports - [x] Updating annotations such as `@ After`. - `@ After` -> `@ AfterEach` - [x] Doing self review ### Are these changes tested? Yes, existing tests have passed. ### Are there any user-facing changes? No. * GitHub Issue: #42042 Authored-by: Hyunseok Seo Signed-off-by: David Li --- .../TestArrowReaderWriterWithCompression.java | 59 ++++++++++--------- 1 file changed, 31 insertions(+), 28 deletions(-) diff --git a/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java b/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java index af28333746290..24d6abf3cb7c3 100644 --- a/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java +++ b/java/compression/src/test/java/org/apache/arrow/compression/TestArrowReaderWriterWithCompression.java @@ -17,6 +17,11 @@ package org.apache.arrow.compression; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.channels.Channels; @@ -46,9 +51,7 @@ import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel; -import org.junit.After; -import org.junit.Assert; -import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -67,7 +70,7 @@ public void setup() { root = null; } - @After + @AfterEach public void tearDown() { if (root != null) { root.close(); @@ -134,19 +137,19 @@ public void testArrowFileZstdRoundTrip() throws Exception { try (ArrowFileReader reader = new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator, CommonsCompressionFactory.INSTANCE)) { - Assertions.assertEquals(1, reader.getRecordBlocks().size()); - Assertions.assertTrue(reader.loadNextBatch()); - Assertions.assertTrue(root.equals(reader.getVectorSchemaRoot())); - Assertions.assertFalse(reader.loadNextBatch()); + assertEquals(1, reader.getRecordBlocks().size()); + assertTrue(reader.loadNextBatch()); + assertTrue(root.equals(reader.getVectorSchemaRoot())); + assertFalse(reader.loadNextBatch()); } // without compression try (ArrowFileReader reader = new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator, NoCompressionCodec.Factory.INSTANCE)) { - Assertions.assertEquals(1, reader.getRecordBlocks().size()); - Exception exception = Assert.assertThrows(IllegalArgumentException.class, + assertEquals(1, reader.getRecordBlocks().size()); + Exception exception = assertThrows(IllegalArgumentException.class, reader::loadNextBatch); - Assertions.assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD", + assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD", exception.getMessage()); } } @@ -158,17 +161,17 @@ public void testArrowStreamZstdRoundTrip() throws Exception { try (ArrowStreamReader reader = new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator, CommonsCompressionFactory.INSTANCE)) { - Assert.assertTrue(reader.loadNextBatch()); - Assert.assertTrue(root.equals(reader.getVectorSchemaRoot())); - Assert.assertFalse(reader.loadNextBatch()); + assertTrue(reader.loadNextBatch()); + assertTrue(root.equals(reader.getVectorSchemaRoot())); + assertFalse(reader.loadNextBatch()); } // without compression try (ArrowStreamReader reader = new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator, NoCompressionCodec.Factory.INSTANCE)) { - Exception exception = Assert.assertThrows(IllegalArgumentException.class, + Exception exception = assertThrows(IllegalArgumentException.class, reader::loadNextBatch); - Assert.assertEquals( + assertEquals( "Please add arrow-compression module to use CommonsCompressionFactory for ZSTD", exception.getMessage() ); @@ -189,19 +192,19 @@ public void testArrowFileZstdRoundTripWithDictionary() throws Exception { try (ArrowFileReader reader = new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator, CommonsCompressionFactory.INSTANCE)) { - Assertions.assertEquals(1, reader.getRecordBlocks().size()); - Assertions.assertTrue(reader.loadNextBatch()); - Assertions.assertTrue(root.equals(reader.getVectorSchemaRoot())); - Assertions.assertFalse(reader.loadNextBatch()); + assertEquals(1, reader.getRecordBlocks().size()); + assertTrue(reader.loadNextBatch()); + assertTrue(root.equals(reader.getVectorSchemaRoot())); + assertFalse(reader.loadNextBatch()); } // without compression try (ArrowFileReader reader = new ArrowFileReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator, NoCompressionCodec.Factory.INSTANCE)) { - Assertions.assertEquals(1, reader.getRecordBlocks().size()); - Exception exception = Assert.assertThrows(IllegalArgumentException.class, + assertEquals(1, reader.getRecordBlocks().size()); + Exception exception = assertThrows(IllegalArgumentException.class, reader::loadNextBatch); - Assertions.assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD", + assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD", exception.getMessage()); } dictionaryVector.close(); @@ -221,17 +224,17 @@ public void testArrowStreamZstdRoundTripWithDictionary() throws Exception { try (ArrowStreamReader reader = new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator, CommonsCompressionFactory.INSTANCE)) { - Assertions.assertTrue(reader.loadNextBatch()); - Assertions.assertTrue(root.equals(reader.getVectorSchemaRoot())); - Assertions.assertFalse(reader.loadNextBatch()); + assertTrue(reader.loadNextBatch()); + assertTrue(root.equals(reader.getVectorSchemaRoot())); + assertFalse(reader.loadNextBatch()); } // without compression try (ArrowStreamReader reader = new ArrowStreamReader(new ByteArrayReadableSeekableByteChannel(out.toByteArray()), allocator, NoCompressionCodec.Factory.INSTANCE)) { - Exception exception = Assert.assertThrows(IllegalArgumentException.class, + Exception exception = assertThrows(IllegalArgumentException.class, reader::loadNextBatch); - Assertions.assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD", + assertEquals("Please add arrow-compression module to use CommonsCompressionFactory for ZSTD", exception.getMessage()); } dictionaryVector.close(); From f086b76fdd6bd3693bf3b5c9ac89081772d61e26 Mon Sep 17 00:00:00 2001 From: Vibhatha Lakmal Abeykoon Date: Mon, 10 Jun 2024 06:26:39 +0530 Subject: [PATCH 258/261] GH-40819: [Java] Adding Spotless to Algorithm module (#41825) ### Rationale for this change Adding code style and formatting options for Algorithm module. ### What changes are included in this PR? Code formatting spotless plugin has been added. ### Are these changes tested? Yes, but doesn't involve test cases, the plugin itself corrects. ### Are there any user-facing changes? No * GitHub Issue: #40819 Lead-authored-by: Vibhatha Abeykoon Co-authored-by: Vibhatha Lakmal Abeykoon Co-authored-by: David Li Signed-off-by: David Li --- .gitignore | 4 +- docs/source/developers/java/development.rst | 46 ++- {.mvn => java/.mvn}/develocity.xml | 0 {.mvn => java/.mvn}/extensions.xml | 0 java/algorithm/pom.xml | 7 +- .../deduplicate/DeduplicationUtils.java | 16 +- .../deduplicate/VectorRunDeduplicator.java | 27 +- .../dictionary/DictionaryBuilder.java | 30 +- .../dictionary/DictionaryEncoder.java | 11 +- .../HashTableBasedDictionaryBuilder.java | 34 +-- .../HashTableDictionaryEncoder.java | 69 ++--- .../dictionary/LinearDictionaryEncoder.java | 44 +-- .../dictionary/SearchDictionaryEncoder.java | 41 ++- .../SearchTreeBasedDictionaryBuilder.java | 46 ++- .../arrow/algorithm/misc/PartialSumUtils.java | 41 ++- .../arrow/algorithm/rank/VectorRank.java | 15 +- .../algorithm/search/ParallelSearcher.java | 187 ++++++------ .../algorithm/search/VectorRangeSearcher.java | 213 +++++++------ .../algorithm/search/VectorSearcher.java | 26 +- .../sort/CompositeVectorComparator.java | 17 +- .../sort/DefaultVectorComparators.java | 126 ++++---- .../sort/FixedWidthInPlaceVectorSorter.java | 25 +- .../FixedWidthOutOfPlaceVectorSorter.java | 35 ++- .../sort/GeneralOutOfPlaceVectorSorter.java | 20 +- .../algorithm/sort/InPlaceVectorSorter.java | 7 +- .../arrow/algorithm/sort/IndexSorter.java | 33 +- .../arrow/algorithm/sort/InsertionSorter.java | 23 +- .../arrow/algorithm/sort/OffHeapIntStack.java | 5 +- .../sort/OutOfPlaceVectorSorter.java | 8 +- .../sort/StableVectorComparator.java | 13 +- .../VariableWidthOutOfPlaceVectorSorter.java | 56 ++-- .../algorithm/sort/VectorValueComparator.java | 56 ++-- .../deduplicate/TestDeduplicationUtils.java | 46 +-- .../TestVectorRunDeduplicator.java | 19 +- .../TestHashTableBasedDictionaryBuilder.java | 62 ++-- .../TestHashTableDictionaryEncoder.java | 72 +++-- .../TestLinearDictionaryEncoder.java | 72 +++-- .../TestSearchDictionaryEncoder.java | 84 ++--- .../TestSearchTreeBasedDictionaryBuilder.java | 90 ++++-- .../algorithm/misc/TestPartialSumUtils.java | 18 +- .../arrow/algorithm/rank/TestVectorRank.java | 20 +- .../search/TestParallelSearcher.java | 36 ++- .../search/TestVectorRangeSearcher.java | 30 +- .../algorithm/search/TestVectorSearcher.java | 30 +- .../sort/TestCompositeVectorComparator.java | 18 +- .../sort/TestDefaultVectorComparator.java | 167 ++++++---- .../TestFixedWidthInPlaceVectorSorter.java | 48 ++- .../TestFixedWidthOutOfPlaceVectorSorter.java | 69 +++-- .../algorithm/sort/TestFixedWidthSorting.java | 126 +++++--- .../TestGeneralOutOfPlaceVectorSorter.java | 79 ++--- .../arrow/algorithm/sort/TestIndexSorter.java | 31 +- .../algorithm/sort/TestInsertionSorter.java | 9 +- .../algorithm/sort/TestOffHeapIntStack.java | 5 +- .../sort/TestOutOfPlaceVectorSorter.java | 6 +- .../arrow/algorithm/sort/TestSortingUtil.java | 136 +++++---- .../sort/TestStableVectorComparator.java | 50 +-- ...stVariableWidthOutOfPlaceVectorSorter.java | 40 +-- .../sort/TestVariableWidthSorting.java | 44 +-- java/dev/checkstyle/checkstyle-spotless.xml | 286 ++++++++++++++++++ .../asf-java.license} | 0 java/dev/license/asf-xml.license | 11 + java/maven/pom.xml | 2 +- java/pom.xml | 22 +- 63 files changed, 1716 insertions(+), 1293 deletions(-) rename {.mvn => java/.mvn}/develocity.xml (100%) rename {.mvn => java/.mvn}/extensions.xml (100%) create mode 100644 java/dev/checkstyle/checkstyle-spotless.xml rename java/dev/{checkstyle/checkstyle.license => license/asf-java.license} (100%) create mode 100644 java/dev/license/asf-xml.license diff --git a/.gitignore b/.gitignore index 3192069d1ac7a..52ffa6c6124c2 100644 --- a/.gitignore +++ b/.gitignore @@ -102,8 +102,8 @@ __debug_bin .envrc # Develocity -.mvn/.gradle-enterprise/ -.mvn/.develocity/ +java/.mvn/.gradle-enterprise/ +java/.mvn/.develocity/ # rat filtered_rat.txt diff --git a/docs/source/developers/java/development.rst b/docs/source/developers/java/development.rst index 9f78eccf6c525..dd1839257a30e 100644 --- a/docs/source/developers/java/development.rst +++ b/docs/source/developers/java/development.rst @@ -110,7 +110,46 @@ integration tests, you would do: Code Style ========== -Java code style is enforced with Checkstyle. The configuration is located at `checkstyle`_. +The current Java code follows the `Google Java Style`_ with Apache license headers. + +Java code style is checked by `Spotless`_ during the build, and the continuous integration build will verify +that changes adhere to the style guide. + +Automatically fixing code style issues +-------------------------------------- + +- You can check the style without building the project with ``mvn spotless:check``. +- You can autoformat the source with ``mvn spotless:apply``. + +Example: + +.. code-block:: bash + + The following files had format violations: + src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java + @@ -15,7 +15,6 @@ + ·*·limitations·under·the·License. + ·*/ + + - + package·org.apache.arrow.algorithm.rank; + + import·java.util.stream.IntStream; + Run 'mvn spotless:apply' to fix these violations. + +Code Formatter for Intellij IDEA and Eclipse +-------------------------------------------- + +Follow the instructions to set up google-java-format for: + +- `Eclipse`_ +- `IntelliJ`_ + + +Checkstyle +---------- + +Checkstyle is also used for general linting. The configuration is located at `checkstyle`_. You can also just check the style without building the project. This checks the code style of all source code under the current directory or from within an individual module. @@ -137,7 +176,10 @@ This applies the style to all pom.xml files under the current directory or from .. _conbench: https://github.com/conbench/conbench .. _checkstyle: https://github.com/apache/arrow/blob/main/java/dev/checkstyle/checkstyle.xml .. _Apache Maven pom.xml guidelines: https://maven.apache.org/developers/conventions/code.html#pom-code-convention - +.. _Spotless: https://github.com/diffplug/spotless +.. _Google Java Style: https://google.github.io/styleguide/javaguide.html +.. _Eclipse: https://github.com/google/google-java-format?tab=readme-ov-file#eclipse +.. _IntelliJ: https://github.com/google/google-java-format?tab=readme-ov-file#intellij-android-studio-and-other-jetbrains-ides Build Caching ============= diff --git a/.mvn/develocity.xml b/java/.mvn/develocity.xml similarity index 100% rename from .mvn/develocity.xml rename to java/.mvn/develocity.xml diff --git a/.mvn/extensions.xml b/java/.mvn/extensions.xml similarity index 100% rename from .mvn/extensions.xml rename to java/.mvn/extensions.xml diff --git a/java/algorithm/pom.xml b/java/algorithm/pom.xml index 0854da48b718a..5984cce766d9e 100644 --- a/java/algorithm/pom.xml +++ b/java/algorithm/pom.xml @@ -20,6 +20,11 @@ Arrow Algorithms (Experimental/Contrib) A collection of algorithms for working with ValueVectors. + + dev/checkstyle/checkstyle-spotless.xml + none + + org.apache.arrow @@ -47,6 +52,4 @@ value-annotations - - diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java index 8811e43d3d08d..e9364b2a85b7b 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.deduplicate; import org.apache.arrow.memory.ArrowBuf; @@ -26,18 +25,18 @@ import org.apache.arrow.vector.compare.RangeEqualsVisitor; import org.apache.arrow.vector.util.DataSizeRoundingUtil; -/** - * Utilities for vector deduplication. - */ +/** Utilities for vector deduplication. */ class DeduplicationUtils { /** * Gets the start positions of the first distinct values in a vector. + * * @param vector the target vector. * @param runStarts the bit set to hold the start positions. * @param vector type. */ - public static void populateRunStartIndicators(V vector, ArrowBuf runStarts) { + public static void populateRunStartIndicators( + V vector, ArrowBuf runStarts) { int bufSize = DataSizeRoundingUtil.divideBy8Ceil(vector.getValueCount()); Preconditions.checkArgument(runStarts.capacity() >= bufSize); runStarts.setZero(0, bufSize); @@ -55,6 +54,7 @@ public static void populateRunStartIndicators(V vector, /** * Gets the run lengths, given the start positions. + * * @param runStarts the bit set for start positions. * @param runLengths the run length vector to populate. * @param valueCount the number of values in the bit set. @@ -76,15 +76,15 @@ public static void populateRunLengths(ArrowBuf runStarts, IntVector runLengths, } /** - * Gets distinct values from the input vector by removing adjacent - * duplicated values. + * Gets distinct values from the input vector by removing adjacent duplicated values. + * * @param indicators the bit set containing the start positions of distinct values. * @param inputVector the input vector. * @param outputVector the output vector. * @param vector type. */ public static void populateDeduplicatedValues( - ArrowBuf indicators, V inputVector, V outputVector) { + ArrowBuf indicators, V inputVector, V outputVector) { int dstIdx = 0; for (int srcIdx = 0; srcIdx < inputVector.getValueCount(); srcIdx++) { if (BitVectorHelper.get(indicators, srcIdx) != 0) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java index 5ef03cbe4a734..4e49de14f5956 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.deduplicate; import org.apache.arrow.memory.ArrowBuf; @@ -26,29 +25,28 @@ import org.apache.arrow.vector.util.DataSizeRoundingUtil; /** - * Remove adjacent equal elements from a vector. - * If the vector is sorted, it removes all duplicated values in the vector. + * Remove adjacent equal elements from a vector. If the vector is sorted, it removes all duplicated + * values in the vector. + * * @param vector type. */ public class VectorRunDeduplicator implements AutoCloseable { /** - * Bit set for distinct values. - * If the value at some index is not equal to the previous value, - * its bit is set to 1, otherwise its bit is set to 0. + * Bit set for distinct values. If the value at some index is not equal to the previous value, its + * bit is set to 1, otherwise its bit is set to 0. */ private ArrowBuf distinctValueBuffer; - /** - * The vector to deduplicate. - */ + /** The vector to deduplicate. */ private final V vector; private final BufferAllocator allocator; /** * Constructs a vector run deduplicator for a given vector. - * @param vector the vector to deduplicate. Ownership is NOT taken. + * + * @param vector the vector to deduplicate. Ownership is NOT taken. * @param allocator the allocator used for allocating buffers for start indices. */ public VectorRunDeduplicator(V vector, BufferAllocator allocator) { @@ -65,17 +63,20 @@ private void createDistinctValueBuffer() { /** * Gets the number of values which are different from their predecessor. + * * @return the run count. */ public int getRunCount() { if (distinctValueBuffer == null) { createDistinctValueBuffer(); } - return vector.getValueCount() - BitVectorHelper.getNullCount(distinctValueBuffer, vector.getValueCount()); + return vector.getValueCount() + - BitVectorHelper.getNullCount(distinctValueBuffer, vector.getValueCount()); } /** * Gets the vector with deduplicated adjacent values removed. + * * @param outVector the output vector. */ public void populateDeduplicatedValues(V outVector) { @@ -88,6 +89,7 @@ public void populateDeduplicatedValues(V outVector) { /** * Gets the length of each distinct value. + * * @param lengthVector the vector for holding length values. */ public void populateRunLengths(IntVector lengthVector) { @@ -95,7 +97,8 @@ public void populateRunLengths(IntVector lengthVector) { createDistinctValueBuffer(); } - DeduplicationUtils.populateRunLengths(distinctValueBuffer, lengthVector, vector.getValueCount()); + DeduplicationUtils.populateRunLengths( + distinctValueBuffer, lengthVector, vector.getValueCount()); } @Override diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java index 398368d1fc612..88c4e4dc65450 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java @@ -14,33 +14,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import org.apache.arrow.vector.ValueVector; /** - * A dictionary builder is intended for the scenario frequently encountered in practice: - * the dictionary is not known a priori, so it is generated dynamically. - * In particular, when a new value arrives, it is tested to check if it is already - * in the dictionary. If so, it is simply neglected, otherwise, it is added to the dictionary. - *

- * The dictionary builder is intended to build a single dictionary. - * So it cannot be used for different dictionaries. - *

+ * A dictionary builder is intended for the scenario frequently encountered in practice: the + * dictionary is not known a priori, so it is generated dynamically. In particular, when a new value + * arrives, it is tested to check if it is already in the dictionary. If so, it is simply neglected, + * otherwise, it is added to the dictionary. + * + *

The dictionary builder is intended to build a single dictionary. So it cannot be used for + * different dictionaries. + * *

Below gives the sample code for using the dictionary builder + * *

{@code
  * DictionaryBuilder dictionaryBuilder = ...
  * ...
  * dictionaryBuild.addValue(newValue);
  * ...
  * }
- *

- *

- * With the above code, the dictionary vector will be populated, - * and it can be retrieved by the {@link DictionaryBuilder#getDictionary()} method. - * After that, dictionary encoding can proceed with the populated dictionary.. - *

+ * + *

With the above code, the dictionary vector will be populated, and it can be retrieved by the + * {@link DictionaryBuilder#getDictionary()} method. After that, dictionary encoding can proceed + * with the populated dictionary.. * * @param the dictionary vector type. */ @@ -58,7 +56,7 @@ public interface DictionaryBuilder { * Try to add an element from the target vector to the dictionary. * * @param targetVector the target vector containing new element. - * @param targetIndex the index of the new element in the target vector. + * @param targetIndex the index of the new element in the target vector. * @return the index of the new element in the dictionary. */ int addValue(V targetVector, int targetIndex); diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java index cda7b3bf9540e..16e27c3a23e72 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import org.apache.arrow.vector.BaseIntVector; @@ -22,8 +21,9 @@ /** * A dictionary encoder translates one vector into another one based on a dictionary vector. - * According to Arrow specification, the encoded vector must be an integer based vector, which - * is the index of the original vector element in the dictionary. + * According to Arrow specification, the encoded vector must be an integer based vector, which is + * the index of the original vector element in the dictionary. + * * @param type of the encoded vector. * @param type of the vector to encode. It is also the type of the dictionary vector. */ @@ -31,9 +31,10 @@ public interface DictionaryEncoder the dictionary vector type. */ -public class HashTableBasedDictionaryBuilder implements DictionaryBuilder { +public class HashTableBasedDictionaryBuilder + implements DictionaryBuilder { - /** - * The dictionary to be built. - */ + /** The dictionary to be built. */ private final V dictionary; - /** - * If null should be encoded. - */ + /** If null should be encoded. */ private final boolean encodeNull; /** - * The hash map for distinct dictionary entries. - * The key is the pointer to the dictionary element, whereas the value is the index in the dictionary. + * The hash map for distinct dictionary entries. The key is the pointer to the dictionary element, + * whereas the value is the index in the dictionary. */ private HashMap hashMap = new HashMap<>(); - /** - * The hasher used for calculating the hash code. - */ + /** The hasher used for calculating the hash code. */ private final ArrowBufHasher hasher; - /** - * Next pointer to try to add to the hash table. - */ + /** Next pointer to try to add to the hash table. */ private ArrowBufPointer nextPointer; /** @@ -83,7 +73,7 @@ public HashTableBasedDictionaryBuilder(V dictionary, boolean encodeNull) { * * @param dictionary the dictionary to populate. * @param encodeNull if null values should be added to the dictionary. - * @param hasher the hasher used to compute the hash code. + * @param hasher the hasher used to compute the hash code. */ public HashTableBasedDictionaryBuilder(V dictionary, boolean encodeNull, ArrowBufHasher hasher) { this.dictionary = dictionary; @@ -125,7 +115,7 @@ public int addValues(V targetVector) { * Try to add an element from the target vector to the dictionary. * * @param targetVector the target vector containing new element. - * @param targetIndex the index of the new element in the target vector. + * @param targetIndex the index of the new element in the target vector. * @return the index of the new element in the dictionary. */ @Override diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java index bea1a784c3d6a..ac7a7d32bf597 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java @@ -14,11 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import java.util.HashMap; - import org.apache.arrow.memory.util.ArrowBufPointer; import org.apache.arrow.memory.util.hash.ArrowBufHasher; import org.apache.arrow.memory.util.hash.SimpleHasher; @@ -27,43 +25,35 @@ /** * Dictionary encoder based on hash table. + * * @param encoded vector type. * @param decoded vector type, which is also the dictionary type. */ public class HashTableDictionaryEncoder implements DictionaryEncoder { - /** - * The dictionary for encoding/decoding. - * It must be sorted. - */ + /** The dictionary for encoding/decoding. It must be sorted. */ private final D dictionary; - /** - * The hasher used to compute the hash code. - */ + /** The hasher used to compute the hash code. */ private final ArrowBufHasher hasher; - /** - * A flag indicating if null should be encoded. - */ + /** A flag indicating if null should be encoded. */ private final boolean encodeNull; /** - * The hash map for distinct dictionary entries. - * The key is the pointer to the dictionary element, whereas the value is the index in the dictionary. + * The hash map for distinct dictionary entries. The key is the pointer to the dictionary element, + * whereas the value is the index in the dictionary. */ private HashMap hashMap = new HashMap<>(); - /** - * The pointer used to probe each element to encode. - */ + /** The pointer used to probe each element to encode. */ private ArrowBufPointer reusablePointer; /** * Constructs a dictionary encoder. - * @param dictionary the dictionary. * + * @param dictionary the dictionary. */ public HashTableDictionaryEncoder(D dictionary) { this(dictionary, false); @@ -71,20 +61,17 @@ public HashTableDictionaryEncoder(D dictionary) { /** * Constructs a dictionary encoder. + * * @param dictionary the dictionary. - * @param encodeNull a flag indicating if null should be encoded. - * It determines the behaviors for processing null values in the input during encoding/decoding. - *

  • - * For encoding, when a null is encountered in the input, - * 1) If the flag is set to true, the encoder searches for the value in the dictionary, - * and outputs the index in the dictionary. - * 2) If the flag is set to false, the encoder simply produces a null in the output. - *
  • - *
  • - * For decoding, when a null is encountered in the input, - * 1) If the flag is set to true, the decoder should never expect a null in the input. - * 2) If set to false, the decoder simply produces a null in the output. - *
  • + * @param encodeNull a flag indicating if null should be encoded. It determines the behaviors for + * processing null values in the input during encoding/decoding. + *
  • For encoding, when a null is encountered in the input, 1) If the flag is set to true, + * the encoder searches for the value in the dictionary, and outputs the index in the + * dictionary. 2) If the flag is set to false, the encoder simply produces a null in the + * output. + *
  • For decoding, when a null is encountered in the input, 1) If the flag is set to true, + * the decoder should never expect a null in the input. 2) If set to false, the decoder + * simply produces a null in the output. */ public HashTableDictionaryEncoder(D dictionary, boolean encodeNull) { this(dictionary, encodeNull, SimpleHasher.INSTANCE); @@ -92,13 +79,13 @@ public HashTableDictionaryEncoder(D dictionary, boolean encodeNull) { /** * Constructs a dictionary encoder. + * * @param dictionary the dictionary. - * @param encodeNull a flag indicating if null should be encoded. - * It determines the behaviors for processing null values in the input during encoding. - * When a null is encountered in the input, - * 1) If the flag is set to true, the encoder searches for the value in the dictionary, - * and outputs the index in the dictionary. - * 2) If the flag is set to false, the encoder simply produces a null in the output. + * @param encodeNull a flag indicating if null should be encoded. It determines the behaviors for + * processing null values in the input during encoding. When a null is encountered in the + * input, 1) If the flag is set to true, the encoder searches for the value in the dictionary, + * and outputs the index in the dictionary. 2) If the flag is set to false, the encoder simply + * produces a null in the output. * @param hasher the hasher used to calculate the hash code. */ public HashTableDictionaryEncoder(D dictionary, boolean encodeNull, ArrowBufHasher hasher) { @@ -120,12 +107,12 @@ private void buildHashMap() { } /** - * Encodes an input vector by a hash table. - * So the algorithm takes O(n) time, where n is the length of the input vector. + * Encodes an input vector by a hash table. So the algorithm takes O(n) time, where n is the + * length of the input vector. * - * @param input the input vector. + * @param input the input vector. * @param output the output vector. - **/ + */ @Override public void encode(D input, E output) { for (int i = 0; i < input.getValueCount(); i++) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java index 84a3a96af8ef1..9aeff22005751 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import org.apache.arrow.vector.BaseIntVector; @@ -24,20 +23,17 @@ /** * Dictionary encoder based on linear search. + * * @param encoded vector type. * @param decoded vector type, which is also the dictionary type. */ public class LinearDictionaryEncoder implements DictionaryEncoder { - /** - * The dictionary for encoding. - */ + /** The dictionary for encoding. */ private final D dictionary; - /** - * A flag indicating if null should be encoded. - */ + /** A flag indicating if null should be encoded. */ private final boolean encodeNull; private RangeEqualsVisitor equalizer; @@ -46,8 +42,10 @@ public class LinearDictionaryEncoder encoded vector type. * @param decoded vector type, which is also the dictionary type. */ public class SearchDictionaryEncoder implements DictionaryEncoder { - /** - * The dictionary for encoding/decoding. - * It must be sorted. - */ + /** The dictionary for encoding/decoding. It must be sorted. */ private final D dictionary; - /** - * The criteria by which the dictionary is sorted. - */ + /** The criteria by which the dictionary is sorted. */ private final VectorValueComparator comparator; - /** - * A flag indicating if null should be encoded. - */ + /** A flag indicating if null should be encoded. */ private final boolean encodeNull; /** * Constructs a dictionary encoder. + * * @param dictionary the dictionary. It must be in sorted order. * @param comparator the criteria for sorting. */ @@ -57,28 +51,29 @@ public SearchDictionaryEncoder(D dictionary, VectorValueComparator comparator /** * Constructs a dictionary encoder. + * * @param dictionary the dictionary. It must be in sorted order. * @param comparator the criteria for sorting. - * @param encodeNull a flag indicating if null should be encoded. - * It determines the behaviors for processing null values in the input during encoding. - * When a null is encountered in the input, - * 1) If the flag is set to true, the encoder searches for the value in the dictionary, - * and outputs the index in the dictionary. - * 2) If the flag is set to false, the encoder simply produces a null in the output. + * @param encodeNull a flag indicating if null should be encoded. It determines the behaviors for + * processing null values in the input during encoding. When a null is encountered in the + * input, 1) If the flag is set to true, the encoder searches for the value in the dictionary, + * and outputs the index in the dictionary. 2) If the flag is set to false, the encoder simply + * produces a null in the output. */ - public SearchDictionaryEncoder(D dictionary, VectorValueComparator comparator, boolean encodeNull) { + public SearchDictionaryEncoder( + D dictionary, VectorValueComparator comparator, boolean encodeNull) { this.dictionary = dictionary; this.comparator = comparator; this.encodeNull = encodeNull; } /** - * Encodes an input vector by binary search. - * So the algorithm takes O(n * log(m)) time, where n is the length of the input vector, - * and m is the length of the dictionary. + * Encodes an input vector by binary search. So the algorithm takes O(n * log(m)) time, where n is + * the length of the input vector, and m is the length of the dictionary. + * * @param input the input vector. - * @param output the output vector. Note that it must be in a fresh state. At least, - * all its validity bits should be clear. + * @param output the output vector. Note that it must be in a fresh state. At least, all its + * validity bits should be clear. */ @Override public void encode(D input, E output) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java index f9cd77daa2e76..fca7df067dcff 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java @@ -14,45 +14,36 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import java.util.TreeSet; - import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.vector.ValueVector; /** - * This class builds the dictionary based on a binary search tree. - * Each add operation can be finished in O(log(n)) time, - * where n is the current dictionary size. + * This class builds the dictionary based on a binary search tree. Each add operation can be + * finished in O(log(n)) time, where n is the current dictionary size. * * @param the dictionary vector type. */ -public class SearchTreeBasedDictionaryBuilder implements DictionaryBuilder { +public class SearchTreeBasedDictionaryBuilder + implements DictionaryBuilder { - /** - * The dictionary to be built. - */ + /** The dictionary to be built. */ private final V dictionary; - /** - * The criteria for sorting in the search tree. - */ + /** The criteria for sorting in the search tree. */ protected final VectorValueComparator comparator; - /** - * If null should be encoded. - */ + /** If null should be encoded. */ private final boolean encodeNull; - /** - * The search tree for storing the value index. - */ + /** The search tree for storing the value index. */ private TreeSet searchTree; /** * Construct a search tree-based dictionary builder. + * * @param dictionary the dictionary vector. * @param comparator the criteria for value equality. */ @@ -62,11 +53,13 @@ public SearchTreeBasedDictionaryBuilder(V dictionary, VectorValueComparator c /** * Construct a search tree-based dictionary builder. + * * @param dictionary the dictionary vector. * @param comparator the criteria for value equality. * @param encodeNull if null values should be added to the dictionary. */ - public SearchTreeBasedDictionaryBuilder(V dictionary, VectorValueComparator comparator, boolean encodeNull) { + public SearchTreeBasedDictionaryBuilder( + V dictionary, VectorValueComparator comparator, boolean encodeNull) { this.dictionary = dictionary; this.comparator = comparator; this.encodeNull = encodeNull; @@ -76,11 +69,10 @@ public SearchTreeBasedDictionaryBuilder(V dictionary, VectorValueComparator c } /** - * Gets the dictionary built. - * Please note that the dictionary is not in sorted order. - * Instead, its order is determined by the order of element insertion. - * To get the dictionary in sorted order, please use - * {@link SearchTreeBasedDictionaryBuilder#populateSortedDictionary(ValueVector)}. + * Gets the dictionary built. Please note that the dictionary is not in sorted order. Instead, its + * order is determined by the order of element insertion. To get the dictionary in sorted order, + * please use {@link SearchTreeBasedDictionaryBuilder#populateSortedDictionary(ValueVector)}. + * * @return the dictionary. */ @Override @@ -90,6 +82,7 @@ public V getDictionary() { /** * Try to add all values from the target vector to the dictionary. + * * @param targetVector the target vector containing values to probe. * @return the number of values actually added to the dictionary. */ @@ -107,6 +100,7 @@ public int addValues(V targetVector) { /** * Try to add an element from the target vector to the dictionary. + * * @param targetVector the target vector containing new element. * @param targetIndex the index of the new element in the target vector. * @return the index of the new element in the dictionary. @@ -132,8 +126,8 @@ public int addValue(V targetVector, int targetIndex) { } /** - * Gets the sorted dictionary. - * Note that given the binary search tree, the sort can finish in O(n). + * Gets the sorted dictionary. Note that given the binary search tree, the sort can finish in + * O(n). */ public void populateSortedDictionary(V sortedDictionary) { int idx = 0; diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java index f5e95cf1033f5..5492676af1a2e 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java @@ -14,26 +14,24 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.misc; import org.apache.arrow.vector.BaseIntVector; -/** - * Partial sum related utilities. - */ +/** Partial sum related utilities. */ public class PartialSumUtils { /** - * Converts an input vector to a partial sum vector. - * This is an inverse operation of {@link PartialSumUtils#toDeltaVector(BaseIntVector, BaseIntVector)}. - * Suppose we have input vector a and output vector b. - * Then we have b(0) = sumBase; b(i + 1) = b(i) + a(i) (i = 0, 1, 2, ...). + * Converts an input vector to a partial sum vector. This is an inverse operation of {@link + * PartialSumUtils#toDeltaVector(BaseIntVector, BaseIntVector)}. Suppose we have input vector a + * and output vector b. Then we have b(0) = sumBase; b(i + 1) = b(i) + a(i) (i = 0, 1, 2, ...). + * * @param deltaVector the input vector. * @param partialSumVector the output vector. * @param sumBase the base of the partial sums. */ - public static void toPartialSumVector(BaseIntVector deltaVector, BaseIntVector partialSumVector, long sumBase) { + public static void toPartialSumVector( + BaseIntVector deltaVector, BaseIntVector partialSumVector, long sumBase) { long sum = sumBase; partialSumVector.setWithPossibleTruncate(0, sumBase); @@ -45,10 +43,10 @@ public static void toPartialSumVector(BaseIntVector deltaVector, BaseIntVector p } /** - * Converts an input vector to the delta vector. - * This is an inverse operation of {@link PartialSumUtils#toPartialSumVector(BaseIntVector, BaseIntVector, long)}. - * Suppose we have input vector a and output vector b. - * Then we have b(i) = a(i + 1) - a(i) (i = 0, 1, 2, ...). + * Converts an input vector to the delta vector. This is an inverse operation of {@link + * PartialSumUtils#toPartialSumVector(BaseIntVector, BaseIntVector, long)}. Suppose we have input + * vector a and output vector b. Then we have b(i) = a(i + 1) - a(i) (i = 0, 1, 2, ...). + * * @param partialSumVector the input vector. * @param deltaVector the output vector. */ @@ -61,18 +59,18 @@ public static void toDeltaVector(BaseIntVector partialSumVector, BaseIntVector d } /** - * Given a value and a partial sum vector, finds its position in the partial sum vector. - * In particular, given an integer value a and partial sum vector v, we try to find a - * position i, so that v(i) <= a < v(i + 1). - * The algorithm is based on binary search, so it takes O(log(n)) time, where n is - * the length of the partial sum vector. + * Given a value and a partial sum vector, finds its position in the partial sum vector. In + * particular, given an integer value a and partial sum vector v, we try to find a position i, so + * that v(i) <= a < v(i + 1). The algorithm is based on binary search, so it takes O(log(n)) time, + * where n is the length of the partial sum vector. + * * @param partialSumVector the input partial sum vector. * @param value the value to search. * @return the position in the partial sum vector, if any, or -1, if none is found. */ public static int findPositionInPartialSumVector(BaseIntVector partialSumVector, long value) { - if (value < partialSumVector.getValueAsLong(0) || - value >= partialSumVector.getValueAsLong(partialSumVector.getValueCount() - 1)) { + if (value < partialSumVector.getValueAsLong(0) + || value >= partialSumVector.getValueAsLong(partialSumVector.getValueCount() - 1)) { return -1; } @@ -114,6 +112,5 @@ public static int findPositionInPartialSumVector(BaseIntVector partialSumVector, throw new IllegalStateException("Should never get here"); } - private PartialSumUtils() { - } + private PartialSumUtils() {} } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java index 43c9a5b010e8c..baa2058ffc51f 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java @@ -14,11 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.rank; import java.util.stream.IntStream; - import org.apache.arrow.algorithm.sort.IndexSorter; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; @@ -28,21 +26,21 @@ /** * Utility for calculating ranks of vector elements. + * * @param the vector type */ public class VectorRank { private VectorValueComparator comparator; - /** - * Vector indices. - */ + /** Vector indices. */ private IntVector indices; private final BufferAllocator allocator; /** * Constructs a vector rank utility. + * * @param allocator the allocator to use. */ public VectorRank(BufferAllocator allocator) { @@ -50,9 +48,10 @@ public VectorRank(BufferAllocator allocator) { } /** - * Given a rank r, gets the index of the element that is the rth smallest in the vector. - * The operation is performed without changing the vector, and takes O(n) time, - * where n is the length of the vector. + * Given a rank r, gets the index of the element that is the rth smallest in the vector. The + * operation is performed without changing the vector, and takes O(n) time, where n is the length + * of the vector. + * * @param vector the vector from which to get the element index. * @param comparator the criteria for vector element comparison. * @param rank the rank to determine. diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java index 6226921b22ed6..6a48019edc3eb 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java @@ -14,49 +14,40 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.search; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; - import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.compare.Range; import org.apache.arrow.vector.compare.RangeEqualsVisitor; /** - * Search for a value in the vector by multiple threads. - * This is often used in scenarios where the vector is large or - * low response time is required. + * Search for a value in the vector by multiple threads. This is often used in scenarios where the + * vector is large or low response time is required. + * * @param the vector type. */ public class ParallelSearcher { - /** - * The target vector to search. - */ + /** The target vector to search. */ private final V vector; - /** - * The thread pool. - */ + /** The thread pool. */ private final ExecutorService threadPool; - /** - * The number of threads to use. - */ + /** The number of threads to use. */ private final int numThreads; - /** - * The position of the key in the target vector, if any. - */ + /** The position of the key in the target vector, if any. */ private volatile int keyPosition = -1; /** * Constructs a parallel searcher. + * * @param vector the vector to search. * @param threadPool the thread pool to use. * @param numThreads the number of threads to use. @@ -77,17 +68,17 @@ private CompletableFuture[] initSearch() { } /** - * Search for the key in the target vector. The element-wise comparison is based on - * {@link RangeEqualsVisitor}, so there are two possible results for each element-wise - * comparison: equal and un-equal. + * Search for the key in the target vector. The element-wise comparison is based on {@link + * RangeEqualsVisitor}, so there are two possible results for each element-wise comparison: equal + * and un-equal. + * * @param keyVector the vector containing the search key. * @param keyIndex the index of the search key in the key vector. - * @return the position of a matched value in the target vector, - * or -1 if none is found. Please note that if there are multiple - * matches of the key in the target vector, this method makes no - * guarantees about which instance is returned. - * For an alternative search implementation that always finds the first match of the key, - * see {@link VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}. + * @return the position of a matched value in the target vector, or -1 if none is found. Please + * note that if there are multiple matches of the key in the target vector, this method makes + * no guarantees about which instance is returned. For an alternative search implementation + * that always finds the first match of the key, see {@link + * VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}. * @throws ExecutionException if an exception occurs in a thread. * @throws InterruptedException if a thread is interrupted. */ @@ -96,36 +87,38 @@ public int search(V keyVector, int keyIndex) throws ExecutionException, Interrup final int valueCount = vector.getValueCount(); for (int i = 0; i < numThreads; i++) { final int tid = i; - Future unused = threadPool.submit(() -> { - // convert to long to avoid overflow - int start = (int) (((long) valueCount) * tid / numThreads); - int end = (int) ((long) valueCount) * (tid + 1) / numThreads; - - if (start >= end) { - // no data assigned to this task. - futures[tid].complete(false); - return; - } - - RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector, keyVector, null); - Range range = new Range(0, 0, 1); - for (int pos = start; pos < end; pos++) { - if (keyPosition != -1) { - // the key has been found by another task - futures[tid].complete(false); - return; - } - range.setLeftStart(pos).setRightStart(keyIndex); - if (visitor.rangeEquals(range)) { - keyPosition = pos; - futures[tid].complete(true); - return; - } - } - - // no match value is found. - futures[tid].complete(false); - }); + Future unused = + threadPool.submit( + () -> { + // convert to long to avoid overflow + int start = (int) (((long) valueCount) * tid / numThreads); + int end = (int) ((long) valueCount) * (tid + 1) / numThreads; + + if (start >= end) { + // no data assigned to this task. + futures[tid].complete(false); + return; + } + + RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector, keyVector, null); + Range range = new Range(0, 0, 1); + for (int pos = start; pos < end; pos++) { + if (keyPosition != -1) { + // the key has been found by another task + futures[tid].complete(false); + return; + } + range.setLeftStart(pos).setRightStart(keyIndex); + if (visitor.rangeEquals(range)) { + keyPosition = pos; + futures[tid].complete(true); + return; + } + } + + // no match value is found. + futures[tid].complete(false); + }); } CompletableFuture.allOf(futures).get(); @@ -133,56 +126,58 @@ public int search(V keyVector, int keyIndex) throws ExecutionException, Interrup } /** - * Search for the key in the target vector. The element-wise comparison is based on - * {@link VectorValueComparator}, so there are three possible results for each element-wise - * comparison: less than, equal to and greater than. + * Search for the key in the target vector. The element-wise comparison is based on {@link + * VectorValueComparator}, so there are three possible results for each element-wise comparison: + * less than, equal to and greater than. + * * @param keyVector the vector containing the search key. * @param keyIndex the index of the search key in the key vector. * @param comparator the comparator for comparing the key against vector elements. - * @return the position of a matched value in the target vector, - * or -1 if none is found. Please note that if there are multiple - * matches of the key in the target vector, this method makes no - * guarantees about which instance is returned. - * For an alternative search implementation that always finds the first match of the key, - * see {@link VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}. + * @return the position of a matched value in the target vector, or -1 if none is found. Please + * note that if there are multiple matches of the key in the target vector, this method makes + * no guarantees about which instance is returned. For an alternative search implementation + * that always finds the first match of the key, see {@link + * VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}. * @throws ExecutionException if an exception occurs in a thread. * @throws InterruptedException if a thread is interrupted. */ - public int search( - V keyVector, int keyIndex, VectorValueComparator comparator) throws ExecutionException, InterruptedException { + public int search(V keyVector, int keyIndex, VectorValueComparator comparator) + throws ExecutionException, InterruptedException { final CompletableFuture[] futures = initSearch(); final int valueCount = vector.getValueCount(); for (int i = 0; i < numThreads; i++) { final int tid = i; - Future unused = threadPool.submit(() -> { - // convert to long to avoid overflow - int start = (int) (((long) valueCount) * tid / numThreads); - int end = (int) ((long) valueCount) * (tid + 1) / numThreads; - - if (start >= end) { - // no data assigned to this task. - futures[tid].complete(false); - return; - } - - VectorValueComparator localComparator = comparator.createNew(); - localComparator.attachVectors(vector, keyVector); - for (int pos = start; pos < end; pos++) { - if (keyPosition != -1) { - // the key has been found by another task - futures[tid].complete(false); - return; - } - if (localComparator.compare(pos, keyIndex) == 0) { - keyPosition = pos; - futures[tid].complete(true); - return; - } - } - - // no match value is found. - futures[tid].complete(false); - }); + Future unused = + threadPool.submit( + () -> { + // convert to long to avoid overflow + int start = (int) (((long) valueCount) * tid / numThreads); + int end = (int) ((long) valueCount) * (tid + 1) / numThreads; + + if (start >= end) { + // no data assigned to this task. + futures[tid].complete(false); + return; + } + + VectorValueComparator localComparator = comparator.createNew(); + localComparator.attachVectors(vector, keyVector); + for (int pos = start; pos < end; pos++) { + if (keyPosition != -1) { + // the key has been found by another task + futures[tid].complete(false); + return; + } + if (localComparator.compare(pos, keyIndex) == 0) { + keyPosition = pos; + futures[tid].complete(true); + return; + } + } + + // no match value is found. + futures[tid].complete(false); + }); } CompletableFuture.allOf(futures).get(); diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java index 249194843f101..c7905dd8956c8 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java @@ -1,108 +1,105 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.arrow.algorithm.search; - -import org.apache.arrow.algorithm.sort.VectorValueComparator; -import org.apache.arrow.vector.ValueVector; - -/** - * Search for the range of a particular element in the target vector. - */ -public class VectorRangeSearcher { - - /** - * Result returned when a search fails. - */ - public static final int SEARCH_FAIL_RESULT = -1; - - /** - * Search for the first occurrence of an element. - * The search is based on the binary search algorithm. So the target vector must be sorted. - * @param targetVector the vector from which to perform the search. - * @param comparator the criterion for the comparison. - * @param keyVector the vector containing the element to search. - * @param keyIndex the index of the search key in the key vector. - * @param the vector type. - * @return the index of the first matched element if any, and -1 otherwise. - */ - public static int getFirstMatch( - V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { - comparator.attachVectors(keyVector, targetVector); - - int ret = SEARCH_FAIL_RESULT; - - int low = 0; - int high = targetVector.getValueCount() - 1; - - while (low <= high) { - int mid = low + (high - low) / 2; - int result = comparator.compare(keyIndex, mid); - if (result < 0) { - // the key is smaller - high = mid - 1; - } else if (result > 0) { - // the key is larger - low = mid + 1; - } else { - // an equal element is found - // continue to go left-ward - ret = mid; - high = mid - 1; - } - } - return ret; - } - - /** - * Search for the last occurrence of an element. - * The search is based on the binary search algorithm. So the target vector must be sorted. - * @param targetVector the vector from which to perform the search. - * @param comparator the criterion for the comparison. - * @param keyVector the vector containing the element to search. - * @param keyIndex the index of the search key in the key vector. - * @param the vector type. - * @return the index of the last matched element if any, and -1 otherwise. - */ - public static int getLastMatch( - V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { - comparator.attachVectors(keyVector, targetVector); - - int ret = SEARCH_FAIL_RESULT; - - int low = 0; - int high = targetVector.getValueCount() - 1; - - while (low <= high) { - int mid = low + (high - low) / 2; - int result = comparator.compare(keyIndex, mid); - if (result < 0) { - // the key is smaller - high = mid - 1; - } else if (result > 0) { - // the key is larger - low = mid + 1; - } else { - // an equal element is found, - // continue to go right-ward - ret = mid; - low = mid + 1; - } - } - return ret; - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.algorithm.search; + +import org.apache.arrow.algorithm.sort.VectorValueComparator; +import org.apache.arrow.vector.ValueVector; + +/** Search for the range of a particular element in the target vector. */ +public class VectorRangeSearcher { + + /** Result returned when a search fails. */ + public static final int SEARCH_FAIL_RESULT = -1; + + /** + * Search for the first occurrence of an element. The search is based on the binary search + * algorithm. So the target vector must be sorted. + * + * @param targetVector the vector from which to perform the search. + * @param comparator the criterion for the comparison. + * @param keyVector the vector containing the element to search. + * @param keyIndex the index of the search key in the key vector. + * @param the vector type. + * @return the index of the first matched element if any, and -1 otherwise. + */ + public static int getFirstMatch( + V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { + comparator.attachVectors(keyVector, targetVector); + + int ret = SEARCH_FAIL_RESULT; + + int low = 0; + int high = targetVector.getValueCount() - 1; + + while (low <= high) { + int mid = low + (high - low) / 2; + int result = comparator.compare(keyIndex, mid); + if (result < 0) { + // the key is smaller + high = mid - 1; + } else if (result > 0) { + // the key is larger + low = mid + 1; + } else { + // an equal element is found + // continue to go left-ward + ret = mid; + high = mid - 1; + } + } + return ret; + } + + /** + * Search for the last occurrence of an element. The search is based on the binary search + * algorithm. So the target vector must be sorted. + * + * @param targetVector the vector from which to perform the search. + * @param comparator the criterion for the comparison. + * @param keyVector the vector containing the element to search. + * @param keyIndex the index of the search key in the key vector. + * @param the vector type. + * @return the index of the last matched element if any, and -1 otherwise. + */ + public static int getLastMatch( + V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { + comparator.attachVectors(keyVector, targetVector); + + int ret = SEARCH_FAIL_RESULT; + + int low = 0; + int high = targetVector.getValueCount() - 1; + + while (low <= high) { + int mid = low + (high - low) / 2; + int result = comparator.compare(keyIndex, mid); + if (result < 0) { + // the key is smaller + high = mid - 1; + } else if (result > 0) { + // the key is larger + low = mid + 1; + } else { + // an equal element is found, + // continue to go right-ward + ret = mid; + low = mid + 1; + } + } + return ret; + } +} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java index 646bca01bb81d..dd0b4de5d8677 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java @@ -14,25 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.search; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.vector.ValueVector; -/** - * Search for a particular element in the vector. - */ +/** Search for a particular element in the vector. */ public final class VectorSearcher { - /** - * Result returned when a search fails. - */ + /** Result returned when a search fails. */ public static final int SEARCH_FAIL_RESULT = -1; /** - * Search for a particular element from the key vector in the target vector by binary search. - * The target vector must be sorted. + * Search for a particular element from the key vector in the target vector by binary search. The + * target vector must be sorted. + * * @param targetVector the vector from which to perform the sort. * @param comparator the criterion for the sort. * @param keyVector the vector containing the element to search. @@ -41,7 +37,7 @@ public final class VectorSearcher { * @return the index of a matched element if any, and -1 otherwise. */ public static int binarySearch( - V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { + V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { comparator.attachVectors(keyVector, targetVector); // perform binary search @@ -63,7 +59,9 @@ public static int binarySearch( } /** - * Search for a particular element from the key vector in the target vector by traversing the vector in sequence. + * Search for a particular element from the key vector in the target vector by traversing the + * vector in sequence. + * * @param targetVector the vector from which to perform the search. * @param comparator the criterion for element equality. * @param keyVector the vector containing the element to search. @@ -72,7 +70,7 @@ public static int binarySearch( * @return the index of a matched element if any, and -1 otherwise. */ public static int linearSearch( - V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { + V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { comparator.attachVectors(keyVector, targetVector); for (int i = 0; i < targetVector.getValueCount(); i++) { if (comparator.compare(keyIndex, i) == 0) { @@ -82,7 +80,5 @@ public static int linearSearch( return SEARCH_FAIL_RESULT; } - private VectorSearcher() { - - } + private VectorSearcher() {} } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java index ec74598e0eebf..77093d87bc489 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java @@ -14,20 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.vector.ValueVector; /** - * A composite vector comparator compares a number of vectors - * by a number of inner comparators. - *

    - * It works by first using the first comparator, if a non-zero value - * is returned, it simply returns it. Otherwise, it uses the second comparator, - * and so on, until a non-zero value is produced, or all inner comparators have - * been used. - *

    + * A composite vector comparator compares a number of vectors by a number of inner comparators. + * + *

    It works by first using the first comparator, if a non-zero value is returned, it simply + * returns it. Otherwise, it uses the second comparator, and so on, until a non-zero value is + * produced, or all inner comparators have been used. */ public class CompositeVectorComparator extends VectorValueComparator { @@ -62,7 +58,8 @@ public int compare(int index1, int index2) { @Override public VectorValueComparator createNew() { - VectorValueComparator[] newInnerComparators = new VectorValueComparator[innerComparators.length]; + VectorValueComparator[] newInnerComparators = + new VectorValueComparator[innerComparators.length]; for (int i = 0; i < innerComparators.length; i++) { newInnerComparators[i] = innerComparators[i].createNew(); } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java index 588876aa99059..ec650cd9dc88b 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH; import java.math.BigDecimal; import java.time.Duration; - import org.apache.arrow.memory.util.ArrowBufPointer; import org.apache.arrow.memory.util.ByteFunctionHelpers; import org.apache.arrow.vector.BaseFixedWidthVector; @@ -56,13 +54,12 @@ import org.apache.arrow.vector.complex.RepeatedValueVector; import org.apache.arrow.vector.holders.NullableFixedSizeBinaryHolder; -/** - * Default comparator implementations for different types of vectors. - */ +/** Default comparator implementations for different types of vectors. */ public class DefaultVectorComparators { /** * Create the default comparator for the vector. + * * @param vector the vector. * @param the vector type. * @return the default comparator. @@ -104,7 +101,8 @@ public static VectorValueComparator createDefaultComp } else if (vector instanceof IntervalDayVector) { return (VectorValueComparator) new IntervalDayComparator(); } else if (vector instanceof IntervalMonthDayNanoVector) { - throw new IllegalArgumentException("No default comparator for " + vector.getClass().getCanonicalName()); + throw new IllegalArgumentException( + "No default comparator for " + vector.getClass().getCanonicalName()); } else if (vector instanceof TimeMicroVector) { return (VectorValueComparator) new TimeMicroComparator(); } else if (vector instanceof TimeMilliVector) { @@ -122,7 +120,7 @@ public static VectorValueComparator createDefaultComp return (VectorValueComparator) new VariableWidthComparator(); } else if (vector instanceof RepeatedValueVector) { VectorValueComparator innerComparator = - createDefaultComparator(((RepeatedValueVector) vector).getDataVector()); + createDefaultComparator(((RepeatedValueVector) vector).getDataVector()); return new RepeatedValueComparator(innerComparator); } else if (vector instanceof FixedSizeListVector) { VectorValueComparator innerComparator = @@ -132,13 +130,11 @@ public static VectorValueComparator createDefaultComp return (VectorValueComparator) new NullComparator(); } - throw new IllegalArgumentException("No default comparator for " + vector.getClass().getCanonicalName()); + throw new IllegalArgumentException( + "No default comparator for " + vector.getClass().getCanonicalName()); } - /** - * Default comparator for bytes. - * The comparison is based on values, with null comes first. - */ + /** Default comparator for bytes. The comparison is based on values, with null comes first. */ public static class ByteComparator extends VectorValueComparator { public ByteComparator() { @@ -159,8 +155,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for short integers. - * The comparison is based on values, with null comes first. + * Default comparator for short integers. The comparison is based on values, with null comes + * first. */ public static class ShortComparator extends VectorValueComparator { @@ -182,8 +178,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for 32-bit integers. - * The comparison is based on int values, with null comes first. + * Default comparator for 32-bit integers. The comparison is based on int values, with null comes + * first. */ public static class IntComparator extends VectorValueComparator { @@ -205,8 +201,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for long integers. - * The comparison is based on values, with null comes first. + * Default comparator for long integers. The comparison is based on values, with null comes first. */ public static class LongComparator extends VectorValueComparator { @@ -229,8 +224,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for unsigned bytes. - * The comparison is based on values, with null comes first. + * Default comparator for unsigned bytes. The comparison is based on values, with null comes + * first. */ public static class UInt1Comparator extends VectorValueComparator { @@ -253,8 +248,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for unsigned short integer. - * The comparison is based on values, with null comes first. + * Default comparator for unsigned short integer. The comparison is based on values, with null + * comes first. */ public static class UInt2Comparator extends VectorValueComparator { @@ -280,8 +275,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for unsigned integer. - * The comparison is based on values, with null comes first. + * Default comparator for unsigned integer. The comparison is based on values, with null comes + * first. */ public static class UInt4Comparator extends VectorValueComparator { @@ -303,8 +298,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for unsigned long integer. - * The comparison is based on values, with null comes first. + * Default comparator for unsigned long integer. The comparison is based on values, with null + * comes first. */ public static class UInt8Comparator extends VectorValueComparator { @@ -326,8 +321,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for float type. - * The comparison is based on values, with null comes first. + * Default comparator for float type. The comparison is based on values, with null comes first. */ public static class Float4Comparator extends VectorValueComparator { @@ -363,8 +357,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for double type. - * The comparison is based on values, with null comes first. + * Default comparator for double type. The comparison is based on values, with null comes first. */ public static class Float8Comparator extends VectorValueComparator { @@ -399,10 +392,7 @@ public VectorValueComparator createNew() { } } - /** - * Default comparator for bit type. - * The comparison is based on values, with null comes first. - */ + /** Default comparator for bit type. The comparison is based on values, with null comes first. */ public static class BitComparator extends VectorValueComparator { public BitComparator() { @@ -424,8 +414,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for DateDay type. - * The comparison is based on values, with null comes first. + * Default comparator for DateDay type. The comparison is based on values, with null comes first. */ public static class DateDayComparator extends VectorValueComparator { @@ -447,8 +436,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for DateMilli type. - * The comparison is based on values, with null comes first. + * Default comparator for DateMilli type. The comparison is based on values, with null comes + * first. */ public static class DateMilliComparator extends VectorValueComparator { @@ -471,8 +460,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for Decimal256 type. - * The comparison is based on values, with null comes first. + * Default comparator for Decimal256 type. The comparison is based on values, with null comes + * first. */ public static class Decimal256Comparator extends VectorValueComparator { @@ -495,8 +484,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for Decimal type. - * The comparison is based on values, with null comes first. + * Default comparator for Decimal type. The comparison is based on values, with null comes first. */ public static class DecimalComparator extends VectorValueComparator { @@ -519,8 +507,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for Duration type. - * The comparison is based on values, with null comes first. + * Default comparator for Duration type. The comparison is based on values, with null comes first. */ public static class DurationComparator extends VectorValueComparator { @@ -543,8 +530,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for IntervalDay type. - * The comparison is based on values, with null comes first. + * Default comparator for IntervalDay type. The comparison is based on values, with null comes + * first. */ public static class IntervalDayComparator extends VectorValueComparator { @@ -567,8 +554,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for TimeMicro type. - * The comparison is based on values, with null comes first. + * Default comparator for TimeMicro type. The comparison is based on values, with null comes + * first. */ public static class TimeMicroComparator extends VectorValueComparator { @@ -591,8 +578,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for TimeMilli type. - * The comparison is based on values, with null comes first. + * Default comparator for TimeMilli type. The comparison is based on values, with null comes + * first. */ public static class TimeMilliComparator extends VectorValueComparator { @@ -615,8 +602,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for TimeNano type. - * The comparison is based on values, with null comes first. + * Default comparator for TimeNano type. The comparison is based on values, with null comes first. */ public static class TimeNanoComparator extends VectorValueComparator { @@ -639,8 +625,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for TimeSec type. - * The comparison is based on values, with null comes first. + * Default comparator for TimeSec type. The comparison is based on values, with null comes first. */ public static class TimeSecComparator extends VectorValueComparator { @@ -663,8 +648,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for TimeSec type. - * The comparison is based on values, with null comes first. + * Default comparator for TimeSec type. The comparison is based on values, with null comes first. */ public static class TimeStampComparator extends VectorValueComparator { @@ -687,10 +671,11 @@ public VectorValueComparator createNew() { } /** - * Default comparator for {@link org.apache.arrow.vector.FixedSizeBinaryVector}. - * The comparison is in lexicographic order, with null comes first. + * Default comparator for {@link org.apache.arrow.vector.FixedSizeBinaryVector}. The comparison is + * in lexicographic order, with null comes first. */ - public static class FixedSizeBinaryComparator extends VectorValueComparator { + public static class FixedSizeBinaryComparator + extends VectorValueComparator { @Override public int compare(int index1, int index2) { @@ -720,9 +705,7 @@ public VectorValueComparator createNew() { } } - /** - * Default comparator for {@link org.apache.arrow.vector.NullVector}. - */ + /** Default comparator for {@link org.apache.arrow.vector.NullVector}. */ public static class NullComparator extends VectorValueComparator { @Override public int compare(int index1, int index2) { @@ -742,8 +725,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for {@link org.apache.arrow.vector.VariableWidthVector}. - * The comparison is in lexicographic order, with null comes first. + * Default comparator for {@link org.apache.arrow.vector.VariableWidthVector}. The comparison is + * in lexicographic order, with null comes first. */ public static class VariableWidthComparator extends VectorValueComparator { @@ -772,12 +755,13 @@ public VectorValueComparator createNew() { } /** - * Default comparator for {@link RepeatedValueVector}. - * It works by comparing the underlying vector in a lexicographic order. + * Default comparator for {@link RepeatedValueVector}. It works by comparing the underlying vector + * in a lexicographic order. + * * @param inner vector type. */ public static class RepeatedValueComparator - extends VectorValueComparator { + extends VectorValueComparator { private final VectorValueComparator innerComparator; @@ -823,8 +807,9 @@ public void attachVectors(RepeatedValueVector vector1, RepeatedValueVector vecto } /** - * Default comparator for {@link RepeatedValueVector}. - * It works by comparing the underlying vector in a lexicographic order. + * Default comparator for {@link RepeatedValueVector}. It works by comparing the underlying vector + * in a lexicographic order. + * * @param inner vector type. */ public static class FixedSizeListComparator @@ -869,6 +854,5 @@ public void attachVectors(FixedSizeListVector vector1, FixedSizeListVector vecto } } - private DefaultVectorComparators() { - } + private DefaultVectorComparators() {} } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java index aaa7ba117c3ba..ea2b344a1eabb 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java @@ -14,20 +14,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.vector.BaseFixedWidthVector; /** - * Default in-place sorter for fixed-width vectors. - * It is based on quick-sort, with average time complexity O(n*log(n)). + * Default in-place sorter for fixed-width vectors. It is based on quick-sort, with average time + * complexity O(n*log(n)). + * * @param vector type. */ -public class FixedWidthInPlaceVectorSorter implements InPlaceVectorSorter { +public class FixedWidthInPlaceVectorSorter + implements InPlaceVectorSorter { /** - * If the number of items is smaller than this threshold, we will use another algorithm to sort the data. + * If the number of items is smaller than this threshold, we will use another algorithm to sort + * the data. */ public static final int CHANGE_ALGORITHM_THRESHOLD = 15; @@ -35,15 +37,10 @@ public class FixedWidthInPlaceVectorSorter imple VectorValueComparator comparator; - /** - * The vector to sort. - */ + /** The vector to sort. */ V vec; - /** - * The buffer to hold the pivot. - * It always has length 1. - */ + /** The buffer to hold the pivot. It always has length 1. */ V pivotBuffer; @Override @@ -99,9 +96,7 @@ private void quickSort() { } } - /** - * Select the pivot as the median of 3 samples. - */ + /** Select the pivot as the median of 3 samples. */ void choosePivot(int low, int high) { // we need at least 3 items if (high - low + 1 < STOP_CHOOSING_PIVOT_THRESHOLD) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java index 05a4585792dc2..817e890a5abe1 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.memory.ArrowBuf; @@ -26,18 +25,21 @@ import org.apache.arrow.vector.IntVector; /** - * Default out-of-place sorter for fixed-width vectors. - * It is an out-of-place sort, with time complexity O(n*log(n)). + * Default out-of-place sorter for fixed-width vectors. It is an out-of-place sort, with time + * complexity O(n*log(n)). + * * @param vector type. */ -public class FixedWidthOutOfPlaceVectorSorter implements OutOfPlaceVectorSorter { +public class FixedWidthOutOfPlaceVectorSorter + implements OutOfPlaceVectorSorter { protected IndexSorter indexSorter = new IndexSorter<>(); @Override public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator comparator) { if (srcVector instanceof BitVector) { - throw new IllegalArgumentException("BitVector is not supported with FixedWidthOutOfPlaceVectorSorter."); + throw new IllegalArgumentException( + "BitVector is not supported with FixedWidthOutOfPlaceVectorSorter."); } comparator.attachVector(srcVector); @@ -49,15 +51,18 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator co ArrowBuf dstValueBuffer = dstVector.getDataBuffer(); // check buffer size - Preconditions.checkArgument(dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(), - "Not enough capacity for the validity buffer of the dst vector. " + - "Expected capacity %s, actual capacity %s", - (srcVector.getValueCount() + 7) / 8, dstValidityBuffer.capacity()); + Preconditions.checkArgument( + dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(), + "Not enough capacity for the validity buffer of the dst vector. " + + "Expected capacity %s, actual capacity %s", + (srcVector.getValueCount() + 7) / 8, + dstValidityBuffer.capacity()); Preconditions.checkArgument( dstValueBuffer.capacity() >= srcVector.getValueCount() * ((long) srcVector.getTypeWidth()), - "Not enough capacity for the data buffer of the dst vector. " + - "Expected capacity %s, actual capacity %s", - srcVector.getValueCount() * srcVector.getTypeWidth(), dstValueBuffer.capacity()); + "Not enough capacity for the data buffer of the dst vector. " + + "Expected capacity %s, actual capacity %s", + srcVector.getValueCount() * srcVector.getTypeWidth(), + dstValueBuffer.capacity()); // sort value indices try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) { @@ -73,9 +78,9 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator co } else { BitVectorHelper.setBit(dstValidityBuffer, dstIndex); MemoryUtil.UNSAFE.copyMemory( - srcValueBuffer.memoryAddress() + srcIndex * ((long) valueWidth), - dstValueBuffer.memoryAddress() + dstIndex * ((long) valueWidth), - valueWidth); + srcValueBuffer.memoryAddress() + srcIndex * ((long) valueWidth), + dstValueBuffer.memoryAddress() + dstIndex * ((long) valueWidth), + valueWidth); } } } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java index 9ea39f638aebe..18f5e94314f83 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.util.Preconditions; @@ -22,23 +21,26 @@ import org.apache.arrow.vector.ValueVector; /** - * An out-of-place sorter for vectors of arbitrary type, with time complexity O(n*log(n)). - * Since it does not make any assumptions about the memory layout of the vector, its performance - * can be sub-optimal. So if another sorter is applicable ({@link FixedWidthInPlaceVectorSorter}), - * it should be used in preference. + * An out-of-place sorter for vectors of arbitrary type, with time complexity O(n*log(n)). Since it + * does not make any assumptions about the memory layout of the vector, its performance can be + * sub-optimal. So if another sorter is applicable ({@link FixedWidthInPlaceVectorSorter}), it + * should be used in preference. * * @param vector type. */ -public class GeneralOutOfPlaceVectorSorter implements OutOfPlaceVectorSorter { +public class GeneralOutOfPlaceVectorSorter + implements OutOfPlaceVectorSorter { @Override public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator comparator) { comparator.attachVector(srcVector); // check vector capacity - Preconditions.checkArgument(dstVector.getValueCapacity() >= srcVector.getValueCount(), - "Not enough capacity for the target vector. " + - "Expected capacity %s, actual capacity %s", srcVector.getValueCount(), dstVector.getValueCapacity()); + Preconditions.checkArgument( + dstVector.getValueCapacity() >= srcVector.getValueCount(), + "Not enough capacity for the target vector. " + "Expected capacity %s, actual capacity %s", + srcVector.getValueCount(), + dstVector.getValueCapacity()); // sort value indices try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java index 19817fe76b8ec..ba41bb9e4eac7 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java @@ -14,15 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.vector.ValueVector; /** - * Basic interface for sorting a vector in-place. - * That is, the sorting is performed by modifying the input vector, - * without creating a new sorted vector. + * Basic interface for sorting a vector in-place. That is, the sorting is performed by modifying the + * input vector, without creating a new sorted vector. * * @param the vector type. */ @@ -30,6 +28,7 @@ public interface InPlaceVectorSorter { /** * Sort a vector in-place. + * * @param vec the vector to sort. * @param comparator the criteria for sort. */ diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java index 3072717f43123..b8ce3289d2889 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java @@ -14,39 +14,35 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import java.util.stream.IntStream; - import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.ValueVector; /** * Sorter for the indices of a vector. + * * @param vector type. */ public class IndexSorter { /** - * If the number of items is smaller than this threshold, we will use another algorithm to sort the data. + * If the number of items is smaller than this threshold, we will use another algorithm to sort + * the data. */ public static final int CHANGE_ALGORITHM_THRESHOLD = 15; - /** - * Comparator for vector indices. - */ + /** Comparator for vector indices. */ private VectorValueComparator comparator; - /** - * Vector indices to sort. - */ + /** Vector indices to sort. */ private IntVector indices; /** - * Sorts indices, by quick-sort. Suppose the vector is denoted by v. - * After calling this method, the following relations hold: - * v(indices[0]) <= v(indices[1]) <= ... + * Sorts indices, by quick-sort. Suppose the vector is denoted by v. After calling this method, + * the following relations hold: v(indices[0]) <= v(indices[1]) <= ... + * * @param vector the vector whose indices need to be sorted. * @param indices the vector for storing the sorted indices. * @param comparator the comparator to sort indices. @@ -100,11 +96,9 @@ private void quickSort() { } } - /** - * Select the pivot as the median of 3 samples. - */ + /** Select the pivot as the median of 3 samples. */ static int choosePivot( - int low, int high, IntVector indices, VectorValueComparator comparator) { + int low, int high, IntVector indices, VectorValueComparator comparator) { // we need at least 3 items if (high - low + 1 < FixedWidthInPlaceVectorSorter.STOP_CHOOSING_PIVOT_THRESHOLD) { return indices.get(low); @@ -149,8 +143,9 @@ static int choosePivot( /** * Partition a range of values in a vector into two parts, with elements in one part smaller than - * elements from the other part. The partition is based on the element indices, so it does - * not modify the underlying vector. + * elements from the other part. The partition is based on the element indices, so it does not + * modify the underlying vector. + * * @param low the lower bound of the range. * @param high the upper bound of the range. * @param indices vector element indices. @@ -159,7 +154,7 @@ static int choosePivot( * @return the index of the split point. */ public static int partition( - int low, int high, IntVector indices, VectorValueComparator comparator) { + int low, int high, IntVector indices, VectorValueComparator comparator) { int pivotIndex = choosePivot(low, high, indices, comparator); while (low < high) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java index dc12a5fefdb65..c058636d66d1e 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java @@ -14,27 +14,24 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.ValueVector; -/** - * Insertion sorter. - */ +/** Insertion sorter. */ class InsertionSorter { /** * Sorts the range of a vector by insertion sort. * - * @param vector the vector to be sorted. - * @param startIdx the start index of the range (inclusive). - * @param endIdx the end index of the range (inclusive). - * @param buffer an extra buffer with capacity 1 to hold the current key. + * @param vector the vector to be sorted. + * @param startIdx the start index of the range (inclusive). + * @param endIdx the end index of the range (inclusive). + * @param buffer an extra buffer with capacity 1 to hold the current key. * @param comparator the criteria for vector element comparison. - * @param the vector type. + * @param the vector type. */ static void insertionSort( V vector, int startIdx, int endIdx, VectorValueComparator comparator, V buffer) { @@ -53,11 +50,11 @@ static void insertionSort( /** * Sorts the range of vector indices by insertion sort. * - * @param indices the vector indices. - * @param startIdx the start index of the range (inclusive). - * @param endIdx the end index of the range (inclusive). + * @param indices the vector indices. + * @param startIdx the start index of the range (inclusive). + * @param endIdx the end index of the range (inclusive). * @param comparator the criteria for vector element comparison. - * @param the vector type. + * @param the vector type. */ static void insertionSort( IntVector indices, int startIdx, int endIdx, VectorValueComparator comparator) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java index df96121f1f8f7..ccb7bea4e2bd3 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java @@ -14,15 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.IntVector; -/** - * An off heap implementation of stack with int elements. - */ +/** An off heap implementation of stack with int elements. */ class OffHeapIntStack implements AutoCloseable { private static final int INIT_SIZE = 128; diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java index 41d6dadc49147..b18e9b35d0895 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java @@ -14,21 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.vector.ValueVector; /** - * Basic interface for sorting a vector out-of-place. - * That is, the sorting is performed on a newly-created vector, - * and the original vector is not modified. + * Basic interface for sorting a vector out-of-place. That is, the sorting is performed on a + * newly-created vector, and the original vector is not modified. + * * @param the vector type. */ public interface OutOfPlaceVectorSorter { /** * Sort a vector out-of-place. + * * @param inVec the input vector. * @param outVec the output vector, which has the same size as the input vector. * @param comparator the criteria for sort. diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java index 0b0c3bd55b271..3fcfa5f8f215c 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java @@ -14,17 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.ValueVector; /** - * Stable sorter. It compares values like ordinary comparators. - * However, when values are equal, it breaks ties by the value indices. - * Therefore, sort algorithms using this comparator always produce + * Stable sorter. It compares values like ordinary comparators. However, when values are equal, it + * breaks ties by the value indices. Therefore, sort algorithms using this comparator always produce * stable sort results. + * * @param type of the vector. */ public class StableVectorComparator extends VectorValueComparator { @@ -33,6 +32,7 @@ public class StableVectorComparator extends VectorValueCo /** * Constructs a stable comparator from a given comparator. + * * @param innerComparator the comparator to convert to stable comparator.. */ public StableVectorComparator(VectorValueComparator innerComparator) { @@ -47,8 +47,9 @@ public void attachVector(V vector) { @Override public void attachVectors(V vector1, V vector2) { - Preconditions.checkArgument(vector1 == vector2, - "Stable comparator only supports comparing values from the same vector"); + Preconditions.checkArgument( + vector1 == vector2, + "Stable comparator only supports comparing values from the same vector"); super.attachVectors(vector1, vector2); innerComparator.attachVectors(vector1, vector2); } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java index 863b07c348ef2..8f58dc0dcee0f 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.memory.ArrowBuf; @@ -25,12 +24,13 @@ import org.apache.arrow.vector.IntVector; /** - * Default sorter for variable-width vectors. - * It is an out-of-place sort, with time complexity O(n*log(n)). + * Default sorter for variable-width vectors. It is an out-of-place sort, with time complexity + * O(n*log(n)). + * * @param vector type. */ public class VariableWidthOutOfPlaceVectorSorter - implements OutOfPlaceVectorSorter { + implements OutOfPlaceVectorSorter { protected IndexSorter indexSorter = new IndexSorter<>(); @@ -46,20 +46,29 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator co ArrowBuf dstOffsetBuffer = dstVector.getOffsetBuffer(); // check buffer size - Preconditions.checkArgument(dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(), - "Not enough capacity for the validity buffer of the dst vector. " + - "Expected capacity %s, actual capacity %s", - (srcVector.getValueCount() + 7) / 8, dstValidityBuffer.capacity()); Preconditions.checkArgument( - dstOffsetBuffer.capacity() >= (srcVector.getValueCount() + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH), - "Not enough capacity for the offset buffer of the dst vector. " + - "Expected capacity %s, actual capacity %s", - (srcVector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH, dstOffsetBuffer.capacity()); - long dataSize = srcVector.getOffsetBuffer().getInt( - srcVector.getValueCount() * ((long) BaseVariableWidthVector.OFFSET_WIDTH)); + dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(), + "Not enough capacity for the validity buffer of the dst vector. " + + "Expected capacity %s, actual capacity %s", + (srcVector.getValueCount() + 7) / 8, + dstValidityBuffer.capacity()); + Preconditions.checkArgument( + dstOffsetBuffer.capacity() + >= (srcVector.getValueCount() + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH), + "Not enough capacity for the offset buffer of the dst vector. " + + "Expected capacity %s, actual capacity %s", + (srcVector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH, + dstOffsetBuffer.capacity()); + long dataSize = + srcVector + .getOffsetBuffer() + .getInt(srcVector.getValueCount() * ((long) BaseVariableWidthVector.OFFSET_WIDTH)); Preconditions.checkArgument( - dstValueBuffer.capacity() >= dataSize, "No enough capacity for the data buffer of the dst vector. " + - "Expected capacity %s, actual capacity %s", dataSize, dstValueBuffer.capacity()); + dstValueBuffer.capacity() >= dataSize, + "No enough capacity for the data buffer of the dst vector. " + + "Expected capacity %s, actual capacity %s", + dataSize, + dstValueBuffer.capacity()); // sort value indices try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) { @@ -77,16 +86,19 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator co BitVectorHelper.unsetBit(dstValidityBuffer, dstIndex); } else { BitVectorHelper.setBit(dstValidityBuffer, dstIndex); - int srcOffset = srcOffsetBuffer.getInt(srcIndex * ((long) BaseVariableWidthVector.OFFSET_WIDTH)); + int srcOffset = + srcOffsetBuffer.getInt(srcIndex * ((long) BaseVariableWidthVector.OFFSET_WIDTH)); int valueLength = - srcOffsetBuffer.getInt((srcIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH)) - srcOffset; + srcOffsetBuffer.getInt((srcIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH)) + - srcOffset; MemoryUtil.UNSAFE.copyMemory( - srcValueBuffer.memoryAddress() + srcOffset, - dstValueBuffer.memoryAddress() + dstOffset, - valueLength); + srcValueBuffer.memoryAddress() + srcOffset, + dstValueBuffer.memoryAddress() + dstOffset, + valueLength); dstOffset += valueLength; } - dstOffsetBuffer.setInt((dstIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH), dstOffset); + dstOffsetBuffer.setInt( + (dstIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH), dstOffset); } } } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java index d2c772ca8a819..0472f04109b1c 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java @@ -14,54 +14,44 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.vector.ValueVector; /** - * Compare two values at the given indices in the vectors. - * This is used for vector sorting. + * Compare two values at the given indices in the vectors. This is used for vector sorting. + * * @param type of the vector. */ public abstract class VectorValueComparator { - /** - * The first vector to compare. - */ + /** The first vector to compare. */ protected V vector1; - /** - * The second vector to compare. - */ + /** The second vector to compare. */ protected V vector2; - /** - * Width of the vector value. For variable-length vectors, this value makes no sense. - */ + /** Width of the vector value. For variable-length vectors, this value makes no sense. */ protected int valueWidth; - private boolean checkNullsOnCompare = true; /** - * This value is true by default and re-computed when vectors are attached to the comparator. If both vectors cannot - * contain nulls then this value is {@code false} and calls to {@code compare(i1, i2)} are short-circuited - * to {@code compareNotNull(i1, i2)} thereby speeding up comparisons resulting in faster sorts etc. + * This value is true by default and re-computed when vectors are attached to the comparator. If + * both vectors cannot contain nulls then this value is {@code false} and calls to {@code + * compare(i1, i2)} are short-circuited to {@code compareNotNull(i1, i2)} thereby speeding up + * comparisons resulting in faster sorts etc. */ public boolean checkNullsOnCompare() { return this.checkNullsOnCompare; } - /** - * Constructor for variable-width vectors. - */ - protected VectorValueComparator() { - - } + /** Constructor for variable-width vectors. */ + protected VectorValueComparator() {} /** * Constructor for fixed-width vectors. + * * @param valueWidth the record width (in bytes). */ protected VectorValueComparator(int valueWidth) { @@ -74,6 +64,7 @@ public int getValueWidth() { /** * Attach both vectors to compare to the same input vector. + * * @param vector the vector to attach. */ public void attachVector(V vector) { @@ -82,6 +73,7 @@ public void attachVector(V vector) { /** * Attach vectors to compare. + * * @param vector1 the first vector to compare. * @param vector2 the second vector to compare. */ @@ -99,7 +91,7 @@ private boolean mayHaveNulls(V v) { if (v.getValueCount() == 0) { return true; } - if (! v.getField().isNullable()) { + if (!v.getField().isNullable()) { return false; } return v.getNullCount() > 0; @@ -107,11 +99,11 @@ private boolean mayHaveNulls(V v) { /** * Compare two values, given their indices. + * * @param index1 index of the first value to compare. * @param index2 index of the second value to compare. - * @return an integer greater than 0, if the first value is greater; - * an integer smaller than 0, if the first value is smaller; or 0, if both - * values are equal. + * @return an integer greater than 0, if the first value is greater; an integer smaller than 0, if + * the first value is smaller; or 0, if both values are equal. */ public int compare(int index1, int index2) { if (checkNullsOnCompare) { @@ -133,19 +125,19 @@ public int compare(int index1, int index2) { } /** - * Compare two values, given their indices. - * This is a fast path for comparing non-null values, so the caller - * must make sure that values at both indices are not null. + * Compare two values, given their indices. This is a fast path for comparing non-null values, so + * the caller must make sure that values at both indices are not null. + * * @param index1 index of the first value to compare. * @param index2 index of the second value to compare. - * @return an integer greater than 0, if the first value is greater; - * an integer smaller than 0, if the first value is smaller; or 0, if both - * values are equal. + * @return an integer greater than 0, if the first value is greater; an integer smaller than 0, if + * the first value is smaller; or 0, if both values are equal. */ public abstract int compareNotNull(int index1, int index2); /** * Creates a comparator of the same type. + * * @return the newly created comparator. */ public abstract VectorValueComparator createNew(); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java index ac083b84f1611..537189013a731 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.deduplicate; import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import java.nio.charset.StandardCharsets; - import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; @@ -33,9 +31,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link DeduplicationUtils}. - */ +/** Test cases for {@link DeduplicationUtils}. */ public class TestDeduplicationUtils { private static final int VECTOR_LENGTH = 100; @@ -57,10 +53,11 @@ public void shutdown() { @Test public void testDeduplicateFixedWidth() { try (IntVector origVec = new IntVector("original vec", allocator); - IntVector dedupVec = new IntVector("deduplicated vec", allocator); - IntVector lengthVec = new IntVector("length vec", allocator); - ArrowBuf distinctBuf = allocator.buffer( - DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) { + IntVector dedupVec = new IntVector("deduplicated vec", allocator); + IntVector lengthVec = new IntVector("length vec", allocator); + ArrowBuf distinctBuf = + allocator.buffer( + DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) { origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT); origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT); lengthVec.allocateNew(); @@ -73,9 +70,10 @@ public void testDeduplicateFixedWidth() { } DeduplicationUtils.populateRunStartIndicators(origVec, distinctBuf); - assertEquals( VECTOR_LENGTH, - VECTOR_LENGTH * REPETITION_COUNT - - BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT)); + assertEquals( + VECTOR_LENGTH, + VECTOR_LENGTH * REPETITION_COUNT + - BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT)); DeduplicationUtils.populateDeduplicatedValues(distinctBuf, origVec, dedupVec); assertEquals(VECTOR_LENGTH, dedupVec.getValueCount()); @@ -84,7 +82,8 @@ public void testDeduplicateFixedWidth() { assertEquals(i, dedupVec.get(i)); } - DeduplicationUtils.populateRunLengths(distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT); + DeduplicationUtils.populateRunLengths( + distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT); assertEquals(VECTOR_LENGTH, lengthVec.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { @@ -96,12 +95,12 @@ public void testDeduplicateFixedWidth() { @Test public void testDeduplicateVariableWidth() { try (VarCharVector origVec = new VarCharVector("original vec", allocator); - VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator); - IntVector lengthVec = new IntVector("length vec", allocator); - ArrowBuf distinctBuf = allocator.buffer( - DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) { - origVec.allocateNew( - VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT); + VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator); + IntVector lengthVec = new IntVector("length vec", allocator); + ArrowBuf distinctBuf = + allocator.buffer( + DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) { + origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT); origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT); lengthVec.allocateNew(); @@ -114,9 +113,10 @@ public void testDeduplicateVariableWidth() { } DeduplicationUtils.populateRunStartIndicators(origVec, distinctBuf); - assertEquals(VECTOR_LENGTH, - VECTOR_LENGTH * REPETITION_COUNT - - BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT)); + assertEquals( + VECTOR_LENGTH, + VECTOR_LENGTH * REPETITION_COUNT + - BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT)); DeduplicationUtils.populateDeduplicatedValues(distinctBuf, origVec, dedupVec); assertEquals(VECTOR_LENGTH, dedupVec.getValueCount()); @@ -126,7 +126,7 @@ public void testDeduplicateVariableWidth() { } DeduplicationUtils.populateRunLengths( - distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT); + distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT); assertEquals(VECTOR_LENGTH, lengthVec.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java index 788213b162870..820cadccae537 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.deduplicate; import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import java.nio.charset.StandardCharsets; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -30,9 +28,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link VectorRunDeduplicator}. - */ +/** Test cases for {@link VectorRunDeduplicator}. */ public class TestVectorRunDeduplicator { private static final int VECTOR_LENGTH = 100; @@ -57,7 +53,7 @@ public void testDeduplicateFixedWidth() { IntVector dedupVec = new IntVector("deduplicated vec", allocator); IntVector lengthVec = new IntVector("length vec", allocator); VectorRunDeduplicator deduplicator = - new VectorRunDeduplicator<>(origVec, allocator)) { + new VectorRunDeduplicator<>(origVec, allocator)) { origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT); origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT); lengthVec.allocateNew(); @@ -93,12 +89,11 @@ public void testDeduplicateFixedWidth() { @Test public void testDeduplicateVariableWidth() { try (VarCharVector origVec = new VarCharVector("original vec", allocator); - VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator); - IntVector lengthVec = new IntVector("length vec", allocator); - VectorRunDeduplicator deduplicator = - new VectorRunDeduplicator<>(origVec, allocator)) { - origVec.allocateNew( - VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT); + VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator); + IntVector lengthVec = new IntVector("length vec", allocator); + VectorRunDeduplicator deduplicator = + new VectorRunDeduplicator<>(origVec, allocator)) { + origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT); origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT); lengthVec.allocateNew(); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java index 45c47626b720e..bfda86f26883d 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import static junit.framework.TestCase.assertTrue; @@ -23,7 +22,6 @@ import java.nio.charset.StandardCharsets; import java.util.Objects; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -32,9 +30,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link HashTableBasedDictionaryBuilder}. - */ +/** Test cases for {@link HashTableBasedDictionaryBuilder}. */ public class TestHashTableBasedDictionaryBuilder { private BufferAllocator allocator; @@ -52,7 +48,7 @@ public void shutdown() { @Test public void testBuildVariableWidthDictionaryWithNull() { try (VarCharVector vec = new VarCharVector("", allocator); - VarCharVector dictionary = new VarCharVector("", allocator)) { + VarCharVector dictionary = new VarCharVector("", allocator)) { vec.allocateNew(100, 10); vec.setValueCount(10); @@ -72,27 +68,34 @@ public void testBuildVariableWidthDictionaryWithNull() { vec.set(9, "abc".getBytes(StandardCharsets.UTF_8)); HashTableBasedDictionaryBuilder dictionaryBuilder = - new HashTableBasedDictionaryBuilder<>(dictionary, true); + new HashTableBasedDictionaryBuilder<>(dictionary, true); int result = dictionaryBuilder.addValues(vec); assertEquals(7, result); assertEquals(7, dictionary.getValueCount()); - assertEquals("hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8)); - assertEquals("abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8)); + assertEquals( + "hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8)); + assertEquals( + "abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8)); assertNull(dictionary.get(2)); - assertEquals("world", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8)); - assertEquals("12", new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8)); - assertEquals("dictionary", new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8)); - assertEquals("good", new String(Objects.requireNonNull(dictionary.get(6)), StandardCharsets.UTF_8)); + assertEquals( + "world", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8)); + assertEquals( + "12", new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8)); + assertEquals( + "dictionary", + new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8)); + assertEquals( + "good", new String(Objects.requireNonNull(dictionary.get(6)), StandardCharsets.UTF_8)); } } @Test public void testBuildVariableWidthDictionaryWithoutNull() { try (VarCharVector vec = new VarCharVector("", allocator); - VarCharVector dictionary = new VarCharVector("", allocator)) { + VarCharVector dictionary = new VarCharVector("", allocator)) { vec.allocateNew(100, 10); vec.setValueCount(10); @@ -112,27 +115,33 @@ public void testBuildVariableWidthDictionaryWithoutNull() { vec.set(9, "abc".getBytes(StandardCharsets.UTF_8)); HashTableBasedDictionaryBuilder dictionaryBuilder = - new HashTableBasedDictionaryBuilder<>(dictionary, false); + new HashTableBasedDictionaryBuilder<>(dictionary, false); int result = dictionaryBuilder.addValues(vec); assertEquals(6, result); assertEquals(6, dictionary.getValueCount()); - assertEquals("hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8)); - assertEquals("abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8)); - assertEquals("world", new String(Objects.requireNonNull(dictionary.get(2)), StandardCharsets.UTF_8)); - assertEquals("12", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8)); - assertEquals("dictionary", new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8)); - assertEquals("good", new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8)); - + assertEquals( + "hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8)); + assertEquals( + "abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8)); + assertEquals( + "world", new String(Objects.requireNonNull(dictionary.get(2)), StandardCharsets.UTF_8)); + assertEquals( + "12", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8)); + assertEquals( + "dictionary", + new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8)); + assertEquals( + "good", new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8)); } } @Test public void testBuildFixedWidthDictionaryWithNull() { try (IntVector vec = new IntVector("", allocator); - IntVector dictionary = new IntVector("", allocator)) { + IntVector dictionary = new IntVector("", allocator)) { vec.allocateNew(10); vec.setValueCount(10); @@ -151,7 +160,7 @@ public void testBuildFixedWidthDictionaryWithNull() { vec.setNull(9); HashTableBasedDictionaryBuilder dictionaryBuilder = - new HashTableBasedDictionaryBuilder<>(dictionary, true); + new HashTableBasedDictionaryBuilder<>(dictionary, true); int result = dictionaryBuilder.addValues(vec); @@ -169,7 +178,7 @@ public void testBuildFixedWidthDictionaryWithNull() { @Test public void testBuildFixedWidthDictionaryWithoutNull() { try (IntVector vec = new IntVector("", allocator); - IntVector dictionary = new IntVector("", allocator)) { + IntVector dictionary = new IntVector("", allocator)) { vec.allocateNew(10); vec.setValueCount(10); @@ -188,7 +197,7 @@ public void testBuildFixedWidthDictionaryWithoutNull() { vec.setNull(9); HashTableBasedDictionaryBuilder dictionaryBuilder = - new HashTableBasedDictionaryBuilder<>(dictionary, false); + new HashTableBasedDictionaryBuilder<>(dictionary, false); int result = dictionaryBuilder.addValues(vec); @@ -199,7 +208,6 @@ public void testBuildFixedWidthDictionaryWithoutNull() { assertEquals(8, dictionary.get(1)); assertEquals(32, dictionary.get(2)); assertEquals(16, dictionary.get(3)); - } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java index 60efbf58bebda..b9646284a015b 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import static junit.framework.TestCase.assertTrue; @@ -25,7 +24,6 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Random; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -38,9 +36,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link HashTableDictionaryEncoder}. - */ +/** Test cases for {@link HashTableDictionaryEncoder}. */ public class TestHashTableDictionaryEncoder { private final int VECTOR_LENGTH = 50; @@ -53,7 +49,7 @@ public class TestHashTableDictionaryEncoder { byte[] one = "111".getBytes(StandardCharsets.UTF_8); byte[] two = "222".getBytes(StandardCharsets.UTF_8); - byte[][] data = new byte[][]{zero, one, two}; + byte[][] data = new byte[][] {zero, one, two}; @Before public void prepare() { @@ -69,8 +65,8 @@ public void shutdown() { public void testEncodeAndDecode() { Random random = new Random(); try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary dictionary.allocateNew(); @@ -89,7 +85,7 @@ public void testEncodeAndDecode() { rawVector.setValueCount(VECTOR_LENGTH); HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionary, false); + new HashTableDictionaryEncoder<>(dictionary, false); // perform encoding encodedVector.allocateNew(); @@ -98,17 +94,21 @@ public void testEncodeAndDecode() { // verify encoding results assertEquals(rawVector.getValueCount(), encodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + rawVector.get(i), + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); } // perform decoding Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { + try (VarCharVector decodedVector = + (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { // verify decoding results assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), + assertArrayEquals( + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), decodedVector.get(i)); } } @@ -119,8 +119,8 @@ public void testEncodeAndDecode() { public void testEncodeAndDecodeWithNull() { Random random = new Random(); try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary dictionary.allocateNew(); @@ -144,7 +144,7 @@ public void testEncodeAndDecodeWithNull() { rawVector.setValueCount(VECTOR_LENGTH); HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionary, true); + new HashTableDictionaryEncoder<>(dictionary, true); // perform encoding encodedVector.allocateNew(); @@ -156,20 +156,24 @@ public void testEncodeAndDecodeWithNull() { if (i % 10 == 0) { assertEquals(0, encodedVector.get(i)); } else { - assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + rawVector.get(i), + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); } } // perform decoding Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { + try (VarCharVector decodedVector = + (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { // verify decoding results assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { if (i % 10 == 0) { assertTrue(decodedVector.isNull(i)); } else { - assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), + assertArrayEquals( + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), decodedVector.get(i)); } } @@ -180,8 +184,8 @@ public void testEncodeAndDecodeWithNull() { @Test public void testEncodeNullWithoutNullInDictionary() { try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary, with no null in it. dictionary.allocateNew(); @@ -199,13 +203,15 @@ public void testEncodeNullWithoutNullInDictionary() { encodedVector.allocateNew(); HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionary, true); + new HashTableDictionaryEncoder<>(dictionary, true); // the encoder should encode null, but no null in the dictionary, // so an exception should be thrown. - assertThrows(IllegalArgumentException.class, () -> { - encoder.encode(rawVector, encodedVector); - }); + assertThrows( + IllegalArgumentException.class, + () -> { + encoder.encode(rawVector, encodedVector); + }); } } @@ -213,8 +219,8 @@ public void testEncodeNullWithoutNullInDictionary() { public void testEncodeStrings() { // Create a new value vector try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { + final IntVector encoded = new IntVector("encoded", allocator); + final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { vector.allocateNew(512, 5); encoded.allocateNew(); @@ -235,7 +241,7 @@ public void testEncodeStrings() { dictionaryVector.setValueCount(3); HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionaryVector); + new HashTableDictionaryEncoder<>(dictionaryVector); encoder.encode(vector, encoded); // verify indices @@ -262,8 +268,8 @@ public void testEncodeStrings() { public void testEncodeLargeVector() { // Create a new value vector try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { + final IntVector encoded = new IntVector("encoded", allocator); + final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { vector.allocateNew(); encoded.allocateNew(); @@ -281,7 +287,7 @@ public void testEncodeLargeVector() { dictionaryVector.setValueCount(3); HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionaryVector); + new HashTableDictionaryEncoder<>(dictionaryVector); encoder.encode(vector, encoded); assertEquals(count, encoded.getValueCount()); @@ -305,8 +311,8 @@ public void testEncodeLargeVector() { public void testEncodeBinaryVector() { // Create a new value vector try (final VarBinaryVector vector = new VarBinaryVector("foo", allocator); - final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); - final IntVector encoded = new IntVector("encoded", allocator)) { + final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); + final IntVector encoded = new IntVector("encoded", allocator)) { vector.allocateNew(512, 5); vector.allocateNew(); encoded.allocateNew(); @@ -327,7 +333,7 @@ public void testEncodeBinaryVector() { dictionaryVector.setValueCount(3); HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionaryVector); + new HashTableDictionaryEncoder<>(dictionaryVector); encoder.encode(vector, encoded); assertEquals(5, encoded.getValueCount()); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java index a76aedffa308d..a4641704198cb 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import static junit.framework.TestCase.assertTrue; @@ -25,7 +24,6 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Random; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -39,9 +37,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link LinearDictionaryEncoder}. - */ +/** Test cases for {@link LinearDictionaryEncoder}. */ public class TestLinearDictionaryEncoder { private final int VECTOR_LENGTH = 50; @@ -54,7 +50,7 @@ public class TestLinearDictionaryEncoder { byte[] one = "111".getBytes(StandardCharsets.UTF_8); byte[] two = "222".getBytes(StandardCharsets.UTF_8); - byte[][] data = new byte[][]{zero, one, two}; + byte[][] data = new byte[][] {zero, one, two}; @Before public void prepare() { @@ -70,8 +66,8 @@ public void shutdown() { public void testEncodeAndDecode() { Random random = new Random(); try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary dictionary.allocateNew(); @@ -90,7 +86,7 @@ public void testEncodeAndDecode() { rawVector.setValueCount(VECTOR_LENGTH); LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionary, false); + new LinearDictionaryEncoder<>(dictionary, false); // perform encoding encodedVector.allocateNew(); @@ -99,17 +95,21 @@ public void testEncodeAndDecode() { // verify encoding results assertEquals(rawVector.getValueCount(), encodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + rawVector.get(i), + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); } // perform decoding Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { + try (VarCharVector decodedVector = + (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { // verify decoding results assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), + assertArrayEquals( + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), decodedVector.get(i)); } } @@ -120,8 +120,8 @@ public void testEncodeAndDecode() { public void testEncodeAndDecodeWithNull() { Random random = new Random(); try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary dictionary.allocateNew(); @@ -145,7 +145,7 @@ public void testEncodeAndDecodeWithNull() { rawVector.setValueCount(VECTOR_LENGTH); LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionary, true); + new LinearDictionaryEncoder<>(dictionary, true); // perform encoding encodedVector.allocateNew(); @@ -157,13 +157,16 @@ public void testEncodeAndDecodeWithNull() { if (i % 10 == 0) { assertEquals(0, encodedVector.get(i)); } else { - assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + rawVector.get(i), + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); } } // perform decoding Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { + try (VarCharVector decodedVector = + (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { // verify decoding results assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); @@ -171,7 +174,8 @@ public void testEncodeAndDecodeWithNull() { if (i % 10 == 0) { assertTrue(decodedVector.isNull(i)); } else { - assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), + assertArrayEquals( + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), decodedVector.get(i)); } } @@ -182,8 +186,8 @@ public void testEncodeAndDecodeWithNull() { @Test public void testEncodeNullWithoutNullInDictionary() { try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary, with no null in it. dictionary.allocateNew(); @@ -201,13 +205,15 @@ public void testEncodeNullWithoutNullInDictionary() { encodedVector.allocateNew(); LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionary, true); + new LinearDictionaryEncoder<>(dictionary, true); // the encoder should encode null, but no null in the dictionary, // so an exception should be thrown. - assertThrows(IllegalArgumentException.class, () -> { - encoder.encode(rawVector, encodedVector); - }); + assertThrows( + IllegalArgumentException.class, + () -> { + encoder.encode(rawVector, encodedVector); + }); } } @@ -215,8 +221,8 @@ public void testEncodeNullWithoutNullInDictionary() { public void testEncodeStrings() { // Create a new value vector try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { + final IntVector encoded = new IntVector("encoded", allocator); + final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { vector.allocateNew(512, 5); encoded.allocateNew(); @@ -237,7 +243,7 @@ public void testEncodeStrings() { dictionaryVector.setValueCount(3); LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionaryVector); + new LinearDictionaryEncoder<>(dictionaryVector); encoder.encode(vector, encoded); // verify indices @@ -263,8 +269,8 @@ public void testEncodeStrings() { public void testEncodeLargeVector() { // Create a new value vector try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { + final IntVector encoded = new IntVector("encoded", allocator); + final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { vector.allocateNew(); encoded.allocateNew(); @@ -282,7 +288,7 @@ public void testEncodeLargeVector() { dictionaryVector.setValueCount(3); LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionaryVector); + new LinearDictionaryEncoder<>(dictionaryVector); encoder.encode(vector, encoded); assertEquals(count, encoded.getValueCount()); @@ -306,8 +312,8 @@ public void testEncodeLargeVector() { public void testEncodeBinaryVector() { // Create a new value vector try (final VarBinaryVector vector = new VarBinaryVector("foo", allocator); - final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); - final IntVector encoded = new IntVector("encoded", allocator)) { + final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); + final IntVector encoded = new IntVector("encoded", allocator)) { vector.allocateNew(512, 5); vector.allocateNew(); encoded.allocateNew(); @@ -328,7 +334,7 @@ public void testEncodeBinaryVector() { dictionaryVector.setValueCount(3); LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionaryVector); + new LinearDictionaryEncoder<>(dictionaryVector); encoder.encode(vector, encoded); assertEquals(5, encoded.getValueCount()); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java index e01c2e7905b46..e783e1f76818c 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import static junit.framework.TestCase.assertTrue; @@ -25,7 +24,6 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Random; - import org.apache.arrow.algorithm.sort.DefaultVectorComparators; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; @@ -40,9 +38,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link SearchDictionaryEncoder}. - */ +/** Test cases for {@link SearchDictionaryEncoder}. */ public class TestSearchDictionaryEncoder { private final int VECTOR_LENGTH = 50; @@ -55,7 +51,7 @@ public class TestSearchDictionaryEncoder { byte[] one = "111".getBytes(StandardCharsets.UTF_8); byte[] two = "222".getBytes(StandardCharsets.UTF_8); - byte[][] data = new byte[][]{zero, one, two}; + byte[][] data = new byte[][] {zero, one, two}; @Before public void prepare() { @@ -71,8 +67,8 @@ public void shutdown() { public void testEncodeAndDecode() { Random random = new Random(); try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary dictionary.allocateNew(); @@ -91,8 +87,8 @@ public void testEncodeAndDecode() { rawVector.setValueCount(VECTOR_LENGTH); SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), false); + new SearchDictionaryEncoder<>( + dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), false); // perform encoding encodedVector.allocateNew(); @@ -101,17 +97,21 @@ public void testEncodeAndDecode() { // verify encoding results assertEquals(rawVector.getValueCount(), encodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + rawVector.get(i), + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); } // perform decoding Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { + try (VarCharVector decodedVector = + (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { // verify decoding results assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), + assertArrayEquals( + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), decodedVector.get(i)); } } @@ -122,8 +122,8 @@ public void testEncodeAndDecode() { public void testEncodeAndDecodeWithNull() { Random random = new Random(); try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary dictionary.allocateNew(); @@ -147,8 +147,8 @@ public void testEncodeAndDecodeWithNull() { rawVector.setValueCount(VECTOR_LENGTH); SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true); + new SearchDictionaryEncoder<>( + dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true); // perform encoding encodedVector.allocateNew(); @@ -160,13 +160,16 @@ public void testEncodeAndDecodeWithNull() { if (i % 10 == 0) { assertEquals(0, encodedVector.get(i)); } else { - assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + rawVector.get(i), + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); } } // perform decoding Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { + try (VarCharVector decodedVector = + (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { // verify decoding results assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); @@ -174,7 +177,8 @@ public void testEncodeAndDecodeWithNull() { if (i % 10 == 0) { assertTrue(decodedVector.isNull(i)); } else { - assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), + assertArrayEquals( + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), decodedVector.get(i)); } } @@ -185,8 +189,8 @@ public void testEncodeAndDecodeWithNull() { @Test public void testEncodeNullWithoutNullInDictionary() { try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary, with no null in it. dictionary.allocateNew(); @@ -204,14 +208,16 @@ public void testEncodeNullWithoutNullInDictionary() { encodedVector.allocateNew(); SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true); + new SearchDictionaryEncoder<>( + dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true); // the encoder should encode null, but no null in the dictionary, // so an exception should be thrown. - assertThrows(IllegalArgumentException.class, () -> { - encoder.encode(rawVector, encodedVector); - }); + assertThrows( + IllegalArgumentException.class, + () -> { + encoder.encode(rawVector, encodedVector); + }); } } @@ -219,8 +225,8 @@ public void testEncodeNullWithoutNullInDictionary() { public void testEncodeStrings() { // Create a new value vector try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { + final IntVector encoded = new IntVector("encoded", allocator); + final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { vector.allocateNew(512, 5); encoded.allocateNew(); @@ -241,8 +247,8 @@ public void testEncodeStrings() { dictionaryVector.setValueCount(3); SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); + new SearchDictionaryEncoder<>( + dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); encoder.encode(vector, encoded); // verify indices @@ -268,8 +274,8 @@ public void testEncodeStrings() { public void testEncodeLargeVector() { // Create a new value vector try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { + final IntVector encoded = new IntVector("encoded", allocator); + final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { vector.allocateNew(); encoded.allocateNew(); @@ -287,8 +293,8 @@ public void testEncodeLargeVector() { dictionaryVector.setValueCount(3); SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); + new SearchDictionaryEncoder<>( + dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); encoder.encode(vector, encoded); assertEquals(count, encoded.getValueCount()); @@ -312,8 +318,8 @@ public void testEncodeLargeVector() { public void testEncodeBinaryVector() { // Create a new value vector try (final VarBinaryVector vector = new VarBinaryVector("foo", allocator); - final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); - final IntVector encoded = new IntVector("encoded", allocator)) { + final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); + final IntVector encoded = new IntVector("encoded", allocator)) { vector.allocateNew(512, 5); vector.allocateNew(); encoded.allocateNew(); @@ -334,8 +340,8 @@ public void testEncodeBinaryVector() { dictionaryVector.setValueCount(3); SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); + new SearchDictionaryEncoder<>( + dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); encoder.encode(vector, encoded); assertEquals(5, encoded.getValueCount()); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java index 340b7e67e861f..6c8a57c1a4648 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import static org.junit.Assert.assertEquals; @@ -22,7 +21,6 @@ import java.nio.charset.StandardCharsets; import java.util.Objects; - import org.apache.arrow.algorithm.sort.DefaultVectorComparators; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; @@ -33,9 +31,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link SearchTreeBasedDictionaryBuilder}. - */ +/** Test cases for {@link SearchTreeBasedDictionaryBuilder}. */ public class TestSearchTreeBasedDictionaryBuilder { private BufferAllocator allocator; @@ -53,8 +49,8 @@ public void shutdown() { @Test public void testBuildVariableWidthDictionaryWithNull() { try (VarCharVector vec = new VarCharVector("", allocator); - VarCharVector dictionary = new VarCharVector("", allocator); - VarCharVector sortedDictionary = new VarCharVector("", allocator)) { + VarCharVector dictionary = new VarCharVector("", allocator); + VarCharVector sortedDictionary = new VarCharVector("", allocator)) { vec.allocateNew(100, 10); vec.setValueCount(10); @@ -74,9 +70,10 @@ public void testBuildVariableWidthDictionaryWithNull() { vec.set(8, "good".getBytes(StandardCharsets.UTF_8)); vec.set(9, "abc".getBytes(StandardCharsets.UTF_8)); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); SearchTreeBasedDictionaryBuilder dictionaryBuilder = - new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true); + new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true); int result = dictionaryBuilder.addValues(vec); @@ -86,20 +83,32 @@ public void testBuildVariableWidthDictionaryWithNull() { dictionaryBuilder.populateSortedDictionary(sortedDictionary); assertTrue(sortedDictionary.isNull(0)); - assertEquals("12", new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8)); - assertEquals("abc", new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8)); - assertEquals("dictionary", new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8)); - assertEquals("good", new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8)); - assertEquals("hello", new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8)); - assertEquals("world", new String(Objects.requireNonNull(sortedDictionary.get(6)), StandardCharsets.UTF_8)); + assertEquals( + "12", + new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8)); + assertEquals( + "abc", + new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8)); + assertEquals( + "dictionary", + new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8)); + assertEquals( + "good", + new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8)); + assertEquals( + "hello", + new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8)); + assertEquals( + "world", + new String(Objects.requireNonNull(sortedDictionary.get(6)), StandardCharsets.UTF_8)); } } @Test public void testBuildVariableWidthDictionaryWithoutNull() { try (VarCharVector vec = new VarCharVector("", allocator); - VarCharVector dictionary = new VarCharVector("", allocator); - VarCharVector sortedDictionary = new VarCharVector("", allocator)) { + VarCharVector dictionary = new VarCharVector("", allocator); + VarCharVector sortedDictionary = new VarCharVector("", allocator)) { vec.allocateNew(100, 10); vec.setValueCount(10); @@ -119,9 +128,10 @@ public void testBuildVariableWidthDictionaryWithoutNull() { vec.set(8, "good".getBytes(StandardCharsets.UTF_8)); vec.set(9, "abc".getBytes(StandardCharsets.UTF_8)); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); SearchTreeBasedDictionaryBuilder dictionaryBuilder = - new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false); + new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false); int result = dictionaryBuilder.addValues(vec); @@ -130,20 +140,32 @@ public void testBuildVariableWidthDictionaryWithoutNull() { dictionaryBuilder.populateSortedDictionary(sortedDictionary); - assertEquals("12", new String(Objects.requireNonNull(sortedDictionary.get(0)), StandardCharsets.UTF_8)); - assertEquals("abc", new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8)); - assertEquals("dictionary", new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8)); - assertEquals("good", new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8)); - assertEquals("hello", new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8)); - assertEquals("world", new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8)); + assertEquals( + "12", + new String(Objects.requireNonNull(sortedDictionary.get(0)), StandardCharsets.UTF_8)); + assertEquals( + "abc", + new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8)); + assertEquals( + "dictionary", + new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8)); + assertEquals( + "good", + new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8)); + assertEquals( + "hello", + new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8)); + assertEquals( + "world", + new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8)); } } @Test public void testBuildFixedWidthDictionaryWithNull() { try (IntVector vec = new IntVector("", allocator); - IntVector dictionary = new IntVector("", allocator); - IntVector sortedDictionary = new IntVector("", allocator)) { + IntVector dictionary = new IntVector("", allocator); + IntVector sortedDictionary = new IntVector("", allocator)) { vec.allocateNew(10); vec.setValueCount(10); @@ -162,9 +184,10 @@ public void testBuildFixedWidthDictionaryWithNull() { vec.set(8, 4); vec.setNull(9); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); SearchTreeBasedDictionaryBuilder dictionaryBuilder = - new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true); + new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true); int result = dictionaryBuilder.addValues(vec); @@ -184,8 +207,8 @@ public void testBuildFixedWidthDictionaryWithNull() { @Test public void testBuildFixedWidthDictionaryWithoutNull() { try (IntVector vec = new IntVector("", allocator); - IntVector dictionary = new IntVector("", allocator); - IntVector sortedDictionary = new IntVector("", allocator)) { + IntVector dictionary = new IntVector("", allocator); + IntVector sortedDictionary = new IntVector("", allocator)) { vec.allocateNew(10); vec.setValueCount(10); @@ -204,9 +227,10 @@ public void testBuildFixedWidthDictionaryWithoutNull() { vec.set(8, 4); vec.setNull(9); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); SearchTreeBasedDictionaryBuilder dictionaryBuilder = - new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false); + new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false); int result = dictionaryBuilder.addValues(vec); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java index 630dd80b44084..e3ab981670e9e 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.misc; import static org.junit.Assert.assertEquals; @@ -26,9 +25,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link PartialSumUtils}. - */ +/** Test cases for {@link PartialSumUtils}. */ public class TestPartialSumUtils { private static final int PARTIAL_SUM_VECTOR_LENGTH = 101; @@ -50,7 +47,7 @@ public void shutdown() { @Test public void testToPartialSumVector() { try (IntVector delta = new IntVector("delta", allocator); - IntVector partialSum = new IntVector("partial sum", allocator)) { + IntVector partialSum = new IntVector("partial sum", allocator)) { delta.allocateNew(DELTA_VECTOR_LENGTH); delta.setValueCount(DELTA_VECTOR_LENGTH); @@ -75,7 +72,7 @@ public void testToPartialSumVector() { @Test public void testToDeltaVector() { try (IntVector partialSum = new IntVector("partial sum", allocator); - IntVector delta = new IntVector("delta", allocator)) { + IntVector delta = new IntVector("delta", allocator)) { partialSum.allocateNew(PARTIAL_SUM_VECTOR_LENGTH); partialSum.setValueCount(PARTIAL_SUM_VECTOR_LENGTH); @@ -111,7 +108,8 @@ public void testFindPositionInPartialSumVector() { // search and verify results for (int i = 0; i < PARTIAL_SUM_VECTOR_LENGTH - 1; i++) { - assertEquals(i, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase + 3 * i + 1)); + assertEquals( + i, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase + 3 * i + 1)); } } } @@ -131,8 +129,10 @@ public void testFindPositionInPartialSumVectorNegative() { // search and verify results assertEquals(0, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase)); assertEquals(-1, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase - 1)); - assertEquals(-1, PartialSumUtils.findPositionInPartialSumVector(partialSum, - sumBase + 3 * (PARTIAL_SUM_VECTOR_LENGTH - 1))); + assertEquals( + -1, + PartialSumUtils.findPositionInPartialSumVector( + partialSum, sumBase + 3 * (PARTIAL_SUM_VECTOR_LENGTH - 1))); } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java index 0e6627eb4822a..4b7c6a9756780 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.rank; import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import java.nio.charset.StandardCharsets; - import org.apache.arrow.algorithm.sort.DefaultVectorComparators; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; @@ -32,9 +30,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link org.apache.arrow.algorithm.rank.VectorRank}. - */ +/** Test cases for {@link org.apache.arrow.algorithm.rank.VectorRank}. */ public class TestVectorRank { private BufferAllocator allocator; @@ -70,7 +66,7 @@ public void testFixedWidthRank() { vector.set(9, 6); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vector); + DefaultVectorComparators.createDefaultComparator(vector); assertEquals(7, rank.indexAtRank(vector, comparator, 0)); assertEquals(0, rank.indexAtRank(vector, comparator, 1)); assertEquals(6, rank.indexAtRank(vector, comparator, 2)); @@ -103,7 +99,7 @@ public void testVariableWidthRank() { vector.set(9, String.valueOf(6).getBytes(StandardCharsets.UTF_8)); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vector); + DefaultVectorComparators.createDefaultComparator(vector); assertEquals(7, rank.indexAtRank(vector, comparator, 0)); assertEquals(0, rank.indexAtRank(vector, comparator, 1)); @@ -137,11 +133,13 @@ public void testRankNegative() { vector.set(9, 6); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vector); + DefaultVectorComparators.createDefaultComparator(vector); - assertThrows(IllegalArgumentException.class, () -> { - rank.indexAtRank(vector, comparator, VECTOR_LENGTH + 1); - }); + assertThrows( + IllegalArgumentException.class, + () -> { + rank.indexAtRank(vector, comparator, VECTOR_LENGTH + 1); + }); } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java index 9ccecfa84a73a..7ff86a743effd 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.search; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -26,7 +25,6 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; - import org.apache.arrow.algorithm.sort.DefaultVectorComparators; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; @@ -39,9 +37,7 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -/** - * Test cases for {@link ParallelSearcher}. - */ +/** Test cases for {@link ParallelSearcher}. */ @RunWith(Parameterized.class) public class TestParallelSearcher { @@ -97,8 +93,10 @@ public void testParallelIntSearch() throws ExecutionException, InterruptedExcept keyVector.allocateNew(VECTOR_LENGTH); // if we are comparing elements using equality semantics, we do not need a comparator here. - VectorValueComparator comparator = comparatorType == ComparatorType.EqualityComparator ? null - : DefaultVectorComparators.createDefaultComparator(targetVector); + VectorValueComparator comparator = + comparatorType == ComparatorType.EqualityComparator + ? null + : DefaultVectorComparators.createDefaultComparator(targetVector); for (int i = 0; i < VECTOR_LENGTH; i++) { targetVector.set(i, i); @@ -107,9 +105,13 @@ public void testParallelIntSearch() throws ExecutionException, InterruptedExcept targetVector.setValueCount(VECTOR_LENGTH); keyVector.setValueCount(VECTOR_LENGTH); - ParallelSearcher searcher = new ParallelSearcher<>(targetVector, threadPool, threadCount); + ParallelSearcher searcher = + new ParallelSearcher<>(targetVector, threadPool, threadCount); for (int i = 0; i < VECTOR_LENGTH; i++) { - int pos = comparator == null ? searcher.search(keyVector, i) : searcher.search(keyVector, i, comparator); + int pos = + comparator == null + ? searcher.search(keyVector, i) + : searcher.search(keyVector, i, comparator); if (i * 2 < VECTOR_LENGTH) { assertEquals(i * 2, pos); } else { @@ -122,13 +124,15 @@ public void testParallelIntSearch() throws ExecutionException, InterruptedExcept @Test public void testParallelStringSearch() throws ExecutionException, InterruptedException { try (VarCharVector targetVector = new VarCharVector("targetVector", allocator); - VarCharVector keyVector = new VarCharVector("keyVector", allocator)) { + VarCharVector keyVector = new VarCharVector("keyVector", allocator)) { targetVector.allocateNew(VECTOR_LENGTH); keyVector.allocateNew(VECTOR_LENGTH); // if we are comparing elements using equality semantics, we do not need a comparator here. - VectorValueComparator comparator = comparatorType == ComparatorType.EqualityComparator ? null - : DefaultVectorComparators.createDefaultComparator(targetVector); + VectorValueComparator comparator = + comparatorType == ComparatorType.EqualityComparator + ? null + : DefaultVectorComparators.createDefaultComparator(targetVector); for (int i = 0; i < VECTOR_LENGTH; i++) { targetVector.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8)); @@ -137,9 +141,13 @@ public void testParallelStringSearch() throws ExecutionException, InterruptedExc targetVector.setValueCount(VECTOR_LENGTH); keyVector.setValueCount(VECTOR_LENGTH); - ParallelSearcher searcher = new ParallelSearcher<>(targetVector, threadPool, threadCount); + ParallelSearcher searcher = + new ParallelSearcher<>(targetVector, threadPool, threadCount); for (int i = 0; i < VECTOR_LENGTH; i++) { - int pos = comparator == null ? searcher.search(keyVector, i) : searcher.search(keyVector, i, comparator); + int pos = + comparator == null + ? searcher.search(keyVector, i) + : searcher.search(keyVector, i, comparator); if (i * 2 < VECTOR_LENGTH) { assertEquals(i * 2, pos); } else { diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java index 18f4fa0355f4f..39f2f609f7df4 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.search; import static org.junit.Assert.assertEquals; import java.util.Arrays; import java.util.Collection; - import org.apache.arrow.algorithm.sort.DefaultVectorComparators; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; @@ -33,9 +31,7 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -/** - * Test cases for {@link VectorRangeSearcher}. - */ +/** Test cases for {@link VectorRangeSearcher}. */ @RunWith(Parameterized.class) public class TestVectorRangeSearcher { @@ -78,9 +74,11 @@ public void testGetLowerBounds() { } // do search - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(intVector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(intVector); for (int i = 0; i < maxValue; i++) { - int result = VectorRangeSearcher.getFirstMatch(intVector, comparator, intVector, i * repeat); + int result = + VectorRangeSearcher.getFirstMatch(intVector, comparator, intVector, i * repeat); assertEquals(i * ((long) repeat), result); } } @@ -112,7 +110,8 @@ public void testGetLowerBoundsNegative() { } // do search - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(intVector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(intVector); for (int i = 0; i < maxValue; i++) { int result = VectorRangeSearcher.getFirstMatch(intVector, comparator, negVector, i); assertEquals(-1, result); @@ -141,7 +140,8 @@ public void testGetUpperBounds() { } // do search - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(intVector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(intVector); for (int i = 0; i < maxValue; i++) { int result = VectorRangeSearcher.getLastMatch(intVector, comparator, intVector, i * repeat); assertEquals((i + 1) * repeat - 1, result); @@ -153,7 +153,7 @@ public void testGetUpperBounds() { public void testGetUpperBoundsNegative() { final int maxValue = 100; try (IntVector intVector = new IntVector("int vec", allocator); - IntVector negVector = new IntVector("neg vec", allocator)) { + IntVector negVector = new IntVector("neg vec", allocator)) { // allocate vector intVector.allocateNew(maxValue * repeat); intVector.setValueCount(maxValue * repeat); @@ -175,7 +175,8 @@ public void testGetUpperBoundsNegative() { } // do search - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(intVector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(intVector); for (int i = 0; i < maxValue; i++) { int result = VectorRangeSearcher.getLastMatch(intVector, comparator, negVector, i); assertEquals(-1, result); @@ -185,11 +186,6 @@ public void testGetUpperBoundsNegative() { @Parameterized.Parameters(name = "repeat = {0}") public static Collection getRepeat() { - return Arrays.asList( - new Object[]{1}, - new Object[]{2}, - new Object[]{5}, - new Object[]{10} - ); + return Arrays.asList(new Object[] {1}, new Object[] {2}, new Object[] {5}, new Object[] {10}); } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java index 32fa10bbd98d0..629d900b479b6 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.search; import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH; import static org.junit.Assert.assertEquals; import java.nio.charset.StandardCharsets; - import org.apache.arrow.algorithm.sort.DefaultVectorComparators; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; @@ -37,9 +35,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link org.apache.arrow.algorithm.search.VectorSearcher}. - */ +/** Test cases for {@link org.apache.arrow.algorithm.search.VectorSearcher}. */ public class TestVectorSearcher { private final int VECTOR_LENGTH = 100; @@ -59,7 +55,7 @@ public void shutdown() { @Test public void testBinarySearchInt() { try (IntVector rawVector = new IntVector("", allocator); - IntVector negVector = new IntVector("", allocator)) { + IntVector negVector = new IntVector("", allocator)) { rawVector.allocateNew(VECTOR_LENGTH); rawVector.setValueCount(VECTOR_LENGTH); negVector.allocateNew(1); @@ -77,7 +73,7 @@ public void testBinarySearchInt() { // do search VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < VECTOR_LENGTH; i++) { int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i); assertEquals(i, result); @@ -91,7 +87,7 @@ public void testBinarySearchInt() { @Test public void testLinearSearchInt() { try (IntVector rawVector = new IntVector("", allocator); - IntVector negVector = new IntVector("", allocator)) { + IntVector negVector = new IntVector("", allocator)) { rawVector.allocateNew(VECTOR_LENGTH); rawVector.setValueCount(VECTOR_LENGTH); negVector.allocateNew(1); @@ -109,7 +105,7 @@ public void testLinearSearchInt() { // do search VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < VECTOR_LENGTH; i++) { int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i); assertEquals(i, result); @@ -123,7 +119,7 @@ public void testLinearSearchInt() { @Test public void testBinarySearchVarChar() { try (VarCharVector rawVector = new VarCharVector("", allocator); - VarCharVector negVector = new VarCharVector("", allocator)) { + VarCharVector negVector = new VarCharVector("", allocator)) { rawVector.allocateNew(VECTOR_LENGTH * 16, VECTOR_LENGTH); rawVector.setValueCount(VECTOR_LENGTH); negVector.allocateNew(VECTOR_LENGTH, 1); @@ -148,7 +144,7 @@ public void testBinarySearchVarChar() { // do search VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < VECTOR_LENGTH; i++) { int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i); assertEquals(i, result); @@ -162,7 +158,7 @@ public void testBinarySearchVarChar() { @Test public void testLinearSearchVarChar() { try (VarCharVector rawVector = new VarCharVector("", allocator); - VarCharVector negVector = new VarCharVector("", allocator)) { + VarCharVector negVector = new VarCharVector("", allocator)) { rawVector.allocateNew(VECTOR_LENGTH * 16, VECTOR_LENGTH); rawVector.setValueCount(VECTOR_LENGTH); negVector.allocateNew(VECTOR_LENGTH, 1); @@ -187,7 +183,7 @@ public void testLinearSearchVarChar() { // do search VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < VECTOR_LENGTH; i++) { int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i); assertEquals(i, result); @@ -260,11 +256,11 @@ private ListVector createNegativeListVector() { @Test public void testBinarySearchList() { try (ListVector rawVector = createListVector(); - ListVector negVector = createNegativeListVector()) { + ListVector negVector = createNegativeListVector()) { // do search VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < rawVector.getValueCount(); i++) { int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i); assertEquals(i, result); @@ -281,11 +277,11 @@ public void testBinarySearchList() { @Test public void testLinearSearchList() { try (ListVector rawVector = createListVector(); - ListVector negVector = createNegativeListVector()) { + ListVector negVector = createNegativeListVector()) { // do search VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < rawVector.getValueCount(); i++) { int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i); assertEquals(i, result); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java index 9624432924b5a..21f6c0217c376 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -22,7 +21,6 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -33,9 +31,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link CompositeVectorComparator}. - */ +/** Test cases for {@link CompositeVectorComparator}. */ public class TestCompositeVectorComparator { private BufferAllocator allocator; @@ -60,7 +56,7 @@ public void testCompareVectorSchemaRoot() { VarCharVector strVec2 = new VarCharVector("str2", allocator); try (VectorSchemaRoot batch1 = new VectorSchemaRoot(Arrays.asList(intVec1, strVec1)); - VectorSchemaRoot batch2 = new VectorSchemaRoot(Arrays.asList(intVec2, strVec2))) { + VectorSchemaRoot batch2 = new VectorSchemaRoot(Arrays.asList(intVec2, strVec2))) { intVec1.allocateNew(vectorLength); strVec1.allocateNew(vectorLength * 10, vectorLength); @@ -75,15 +71,15 @@ public void testCompareVectorSchemaRoot() { } VectorValueComparator innerComparator1 = - DefaultVectorComparators.createDefaultComparator(intVec1); + DefaultVectorComparators.createDefaultComparator(intVec1); innerComparator1.attachVectors(intVec1, intVec2); VectorValueComparator innerComparator2 = - DefaultVectorComparators.createDefaultComparator(strVec1); + DefaultVectorComparators.createDefaultComparator(strVec1); innerComparator2.attachVectors(strVec1, strVec2); - VectorValueComparator comparator = new CompositeVectorComparator( - new VectorValueComparator[]{innerComparator1, innerComparator2} - ); + VectorValueComparator comparator = + new CompositeVectorComparator( + new VectorValueComparator[] {innerComparator1, innerComparator2}); // verify results diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java index c40854fb17410..f1b3d6fb5aa1d 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH; @@ -67,9 +66,7 @@ import org.junit.Test; import org.junit.jupiter.api.Assertions; -/** - * Test cases for {@link DefaultVectorComparators}. - */ +/** Test cases for {@link DefaultVectorComparators}. */ public class TestDefaultVectorComparator { private BufferAllocator allocator; @@ -111,9 +108,9 @@ private ListVector createListVector(int count) { @Test public void testCompareLists() { try (ListVector listVector1 = createListVector(10); - ListVector listVector2 = createListVector(11)) { + ListVector listVector2 = createListVector(11)) { VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(listVector1); + DefaultVectorComparators.createDefaultComparator(listVector1); comparator.attachVectors(listVector1, listVector2); // prefix is smaller @@ -121,11 +118,11 @@ public void testCompareLists() { } try (ListVector listVector1 = createListVector(11); - ListVector listVector2 = createListVector(11)) { + ListVector listVector2 = createListVector(11)) { ((IntVector) listVector2.getDataVector()).set(10, 110); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(listVector1); + DefaultVectorComparators.createDefaultComparator(listVector1); comparator.attachVectors(listVector1, listVector2); // breaking tie by the last element @@ -133,10 +130,10 @@ public void testCompareLists() { } try (ListVector listVector1 = createListVector(10); - ListVector listVector2 = createListVector(10)) { + ListVector listVector2 = createListVector(10)) { VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(listVector1); + DefaultVectorComparators.createDefaultComparator(listVector1); comparator.attachVectors(listVector1, listVector2); // list vector elements equal @@ -149,9 +146,9 @@ public void testCopiedComparatorForLists() { for (int i = 1; i < 10; i++) { for (int j = 1; j < 10; j++) { try (ListVector listVector1 = createListVector(10); - ListVector listVector2 = createListVector(11)) { + ListVector listVector2 = createListVector(11)) { VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(listVector1); + DefaultVectorComparators.createDefaultComparator(listVector1); comparator.attachVectors(listVector1, listVector2); VectorValueComparator copyComparator = comparator.createNew(); @@ -185,7 +182,7 @@ private FixedSizeListVector createFixedSizeListVector(int count) { @Test public void testCompareFixedSizeLists() { try (FixedSizeListVector listVector1 = createFixedSizeListVector(10); - FixedSizeListVector listVector2 = createFixedSizeListVector(11)) { + FixedSizeListVector listVector2 = createFixedSizeListVector(11)) { VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(listVector1); comparator.attachVectors(listVector1, listVector2); @@ -195,7 +192,7 @@ public void testCompareFixedSizeLists() { } try (FixedSizeListVector listVector1 = createFixedSizeListVector(11); - FixedSizeListVector listVector2 = createFixedSizeListVector(11)) { + FixedSizeListVector listVector2 = createFixedSizeListVector(11)) { ((IntVector) listVector2.getDataVector()).set(10, 110); VectorValueComparator comparator = @@ -207,7 +204,7 @@ public void testCompareFixedSizeLists() { } try (FixedSizeListVector listVector1 = createFixedSizeListVector(10); - FixedSizeListVector listVector2 = createFixedSizeListVector(10)) { + FixedSizeListVector listVector2 = createFixedSizeListVector(10)) { VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(listVector1); @@ -236,7 +233,7 @@ public void testCompareUInt1() { vec.set(9, Byte.MIN_VALUE); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertTrue(comparator.compare(0, 1) < 0); @@ -259,14 +256,21 @@ public void testCompareUInt2() { vec.allocateNew(10); ValueVectorDataPopulator.setVector( - vec, null, (char) (Character.MAX_VALUE - 1), Character.MAX_VALUE, (char) 0, (char) 1, - (char) 2, (char) (Character.MAX_VALUE - 1), null, + vec, + null, + (char) (Character.MAX_VALUE - 1), + Character.MAX_VALUE, + (char) 0, + (char) 1, + (char) 2, + (char) (Character.MAX_VALUE - 1), + null, '\u7FFF', // value for the max 16-byte signed integer '\u8000' // value for the min 16-byte signed integer - ); + ); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertTrue(comparator.compare(0, 1) < 0); @@ -301,7 +305,7 @@ public void testCompareUInt4() { vec.set(9, Integer.MIN_VALUE); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertTrue(comparator.compare(0, 1) < 0); @@ -336,7 +340,7 @@ public void testCompareUInt8() { vec.set(9, Long.MIN_VALUE); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertTrue(comparator.compare(0, 1) < 0); @@ -358,7 +362,16 @@ public void testCompareFloat4() { try (Float4Vector vec = new Float4Vector("", allocator)) { vec.allocateNew(9); ValueVectorDataPopulator.setVector( - vec, -1.1f, 0.0f, 1.0f, null, 1.0f, 2.0f, Float.NaN, Float.NaN, Float.POSITIVE_INFINITY, + vec, + -1.1f, + 0.0f, + 1.0f, + null, + 1.0f, + 2.0f, + Float.NaN, + Float.NaN, + Float.POSITIVE_INFINITY, Float.NEGATIVE_INFINITY); VectorValueComparator comparator = @@ -393,7 +406,16 @@ public void testCompareFloat8() { try (Float8Vector vec = new Float8Vector("", allocator)) { vec.allocateNew(9); ValueVectorDataPopulator.setVector( - vec, -1.1, 0.0, 1.0, null, 1.0, 2.0, Double.NaN, Double.NaN, Double.POSITIVE_INFINITY, + vec, + -1.1, + 0.0, + 1.0, + null, + 1.0, + 2.0, + Double.NaN, + Double.NaN, + Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY); VectorValueComparator comparator = @@ -488,8 +510,15 @@ public void testCompareShort() { try (SmallIntVector vec = new SmallIntVector("", allocator)) { vec.allocateNew(8); ValueVectorDataPopulator.setVector( - vec, (short) -1, (short) 0, (short) 1, null, (short) 1, (short) 5, - (short) (Short.MIN_VALUE + 1), Short.MAX_VALUE); + vec, + (short) -1, + (short) 0, + (short) 1, + null, + (short) 1, + (short) 5, + (short) (Short.MIN_VALUE + 1), + Short.MAX_VALUE); VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); @@ -519,8 +548,15 @@ public void testCompareByte() { try (TinyIntVector vec = new TinyIntVector("", allocator)) { vec.allocateNew(8); ValueVectorDataPopulator.setVector( - vec, (byte) -1, (byte) 0, (byte) 1, null, (byte) 1, (byte) 5, - (byte) (Byte.MIN_VALUE + 1), Byte.MAX_VALUE); + vec, + (byte) -1, + (byte) 0, + (byte) 1, + null, + (byte) 1, + (byte) 5, + (byte) (Byte.MIN_VALUE + 1), + Byte.MAX_VALUE); VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); @@ -549,8 +585,7 @@ public void testCompareByte() { public void testCompareBit() { try (BitVector vec = new BitVector("", allocator)) { vec.allocateNew(6); - ValueVectorDataPopulator.setVector( - vec, 1, 2, 0, 0, -1, null); + ValueVectorDataPopulator.setVector(vec, 1, 2, 0, 0, -1, null); VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); @@ -691,7 +726,8 @@ public void testCompareDecimal256() { @Test public void testCompareDuration() { try (DurationVector vec = - new DurationVector("", FieldType.nullable(new ArrowType.Duration(TimeUnit.MILLISECOND)), allocator)) { + new DurationVector( + "", FieldType.nullable(new ArrowType.Duration(TimeUnit.MILLISECOND)), allocator)) { vec.allocateNew(8); ValueVectorDataPopulator.setVector( vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); @@ -722,7 +758,8 @@ public void testCompareDuration() { @Test public void testCompareIntervalDay() { try (IntervalDayVector vec = - new IntervalDayVector("", FieldType.nullable(new ArrowType.Duration(TimeUnit.MILLISECOND)), allocator)) { + new IntervalDayVector( + "", FieldType.nullable(new ArrowType.Duration(TimeUnit.MILLISECOND)), allocator)) { vec.allocateNew(8); vec.set(0, -1, 0); vec.set(1, 0, 0); @@ -755,8 +792,7 @@ public void testCompareIntervalDay() { @Test public void testCompareTimeMicro() { - try (TimeMicroVector vec = - new TimeMicroVector("", allocator)) { + try (TimeMicroVector vec = new TimeMicroVector("", allocator)) { vec.allocateNew(8); ValueVectorDataPopulator.setVector( vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); @@ -816,8 +852,7 @@ public void testCompareTimeMilli() { @Test public void testCompareTimeNano() { - try (TimeNanoVector vec = - new TimeNanoVector("", allocator)) { + try (TimeNanoVector vec = new TimeNanoVector("", allocator)) { vec.allocateNew(8); ValueVectorDataPopulator.setVector( vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); @@ -877,8 +912,7 @@ public void testCompareTimeSec() { @Test public void testCompareTimeStamp() { - try (TimeStampMilliVector vec = - new TimeStampMilliVector("", allocator)) { + try (TimeStampMilliVector vec = new TimeStampMilliVector("", allocator)) { vec.allocateNew(8); ValueVectorDataPopulator.setVector( vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); @@ -909,7 +943,7 @@ public void testCompareTimeStamp() { @Test public void testCompareFixedSizeBinary() { try (FixedSizeBinaryVector vector1 = new FixedSizeBinaryVector("test1", allocator, 2); - FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { + FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { vector1.allocateNew(); vector2.allocateNew(); vector1.set(0, new byte[] {1, 1}); @@ -923,7 +957,7 @@ public void testCompareFixedSizeBinary() { } try (FixedSizeBinaryVector vector1 = new FixedSizeBinaryVector("test1", allocator, 3); - FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { + FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { vector1.allocateNew(); vector2.allocateNew(); vector1.set(0, new byte[] {1, 1, 0}); @@ -937,7 +971,7 @@ public void testCompareFixedSizeBinary() { } try (FixedSizeBinaryVector vector1 = new FixedSizeBinaryVector("test1", allocator, 3); - FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { + FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { vector1.allocateNew(); vector2.allocateNew(); vector1.set(0, new byte[] {1, 1, 1}); @@ -953,8 +987,8 @@ public void testCompareFixedSizeBinary() { @Test public void testCompareNull() { - try (NullVector vec = new NullVector("test", - FieldType.notNullable(new ArrowType.Int(32, false)))) { + try (NullVector vec = + new NullVector("test", FieldType.notNullable(new ArrowType.Int(32, false)))) { vec.setValueCount(2); VectorValueComparator comparator = @@ -967,12 +1001,14 @@ public void testCompareNull() { @Test public void testCheckNullsOnCompareIsFalseForNonNullableVector() { - try (IntVector vec = new IntVector("not nullable", - FieldType.notNullable(new ArrowType.Int(32, false)), allocator)) { + try (IntVector vec = + new IntVector( + "not nullable", FieldType.notNullable(new ArrowType.Int(32, false)), allocator)) { ValueVectorDataPopulator.setVector(vec, 1, 2, 3, 4); - final VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + final VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertFalse(comparator.checkNullsOnCompare()); @@ -981,16 +1017,17 @@ public void testCheckNullsOnCompareIsFalseForNonNullableVector() { @Test public void testCheckNullsOnCompareIsTrueForNullableVector() { - try (IntVector vec = new IntVector("nullable", FieldType.nullable( - new ArrowType.Int(32, false)), allocator); - IntVector vec2 = new IntVector("not-nullable", FieldType.notNullable( - new ArrowType.Int(32, false)), allocator) - ) { + try (IntVector vec = + new IntVector("nullable", FieldType.nullable(new ArrowType.Int(32, false)), allocator); + IntVector vec2 = + new IntVector( + "not-nullable", FieldType.notNullable(new ArrowType.Int(32, false)), allocator)) { ValueVectorDataPopulator.setVector(vec, 1, null, 3, 4); ValueVectorDataPopulator.setVector(vec2, 1, 2, 3, 4); - final VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + final VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertTrue(comparator.checkNullsOnCompare()); @@ -1001,17 +1038,18 @@ public void testCheckNullsOnCompareIsTrueForNullableVector() { @Test public void testCheckNullsOnCompareIsFalseWithNoNulls() { - try (IntVector vec = new IntVector("nullable", FieldType.nullable( - new ArrowType.Int(32, false)), allocator); - IntVector vec2 = new IntVector("also-nullable", FieldType.nullable( - new ArrowType.Int(32, false)), allocator) - ) { + try (IntVector vec = + new IntVector("nullable", FieldType.nullable(new ArrowType.Int(32, false)), allocator); + IntVector vec2 = + new IntVector( + "also-nullable", FieldType.nullable(new ArrowType.Int(32, false)), allocator)) { // no null values ValueVectorDataPopulator.setVector(vec, 1, 2, 3, 4); ValueVectorDataPopulator.setVector(vec2, 1, 2, 3, 4); - final VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + final VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertFalse(comparator.checkNullsOnCompare()); @@ -1022,13 +1060,14 @@ public void testCheckNullsOnCompareIsFalseWithNoNulls() { @Test public void testCheckNullsOnCompareIsTrueWithEmptyVectors() { - try (IntVector vec = new IntVector("nullable", FieldType.nullable( - new ArrowType.Int(32, false)), allocator); - IntVector vec2 = new IntVector("also-nullable", FieldType.nullable( - new ArrowType.Int(32, false)), allocator) - ) { + try (IntVector vec = + new IntVector("nullable", FieldType.nullable(new ArrowType.Int(32, false)), allocator); + IntVector vec2 = + new IntVector( + "also-nullable", FieldType.nullable(new ArrowType.Int(32, false)), allocator)) { - final VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + final VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec2); assertTrue(comparator.checkNullsOnCompare()); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java index 91ef52017df4d..ed5aadfcda04c 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertEquals; @@ -22,7 +21,6 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals; import java.util.stream.IntStream; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -32,9 +30,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link FixedWidthInPlaceVectorSorter}. - */ +/** Test cases for {@link FixedWidthInPlaceVectorSorter}. */ public class TestFixedWidthInPlaceVectorSorter { private BufferAllocator allocator; @@ -69,7 +65,8 @@ public void testSortInt() { // sort the vector FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); sorter.sortInPlace(vec, comparator); @@ -90,8 +87,8 @@ public void testSortInt() { } /** - * Tests the worst case for quick sort. - * It may cause stack overflow if the algorithm is implemented as a recursive algorithm. + * Tests the worst case for quick sort. It may cause stack overflow if the algorithm is + * implemented as a recursive algorithm. */ @Test public void testSortLargeIncreasingInt() { @@ -107,7 +104,8 @@ public void testSortLargeIncreasingInt() { // sort the vector FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); sorter.sortInPlace(vec, comparator); @@ -133,7 +131,8 @@ public void testChoosePivot() { vec.setValueCount(vectorLength); FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); try (IntVector pivotBuffer = (IntVector) vec.getField().createVector(allocator)) { // setup internal data structures @@ -164,16 +163,15 @@ public void testChoosePivot() { } } - /** - * Evaluates choosing pivot for all possible permutations of 3 numbers. - */ + /** Evaluates choosing pivot for all possible permutations of 3 numbers. */ @Test public void testChoosePivotAllPermutes() { try (IntVector vec = new IntVector("", allocator)) { vec.allocateNew(3); FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); try (IntVector pivotBuffer = (IntVector) vec.getField().createVector(allocator)) { // setup internal data structures @@ -216,25 +214,25 @@ public void testChoosePivotAllPermutes() { @Test public void testSortInt2() { try (IntVector vector = new IntVector("vector", allocator)) { - ValueVectorDataPopulator.setVector(vector, - 0, 1, 2, 3, 4, 5, 30, 31, 32, 33, - 34, 35, 60, 61, 62, 63, 64, 65, 6, 7, - 8, 9, 10, 11, 36, 37, 38, 39, 40, 41, - 66, 67, 68, 69, 70, 71); + ValueVectorDataPopulator.setVector( + vector, 0, 1, 2, 3, 4, 5, 30, 31, 32, 33, 34, 35, 60, 61, 62, 63, 64, 65, 6, 7, 8, 9, 10, + 11, 36, 37, 38, 39, 40, 41, 66, 67, 68, 69, 70, 71); FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vector); sorter.sortInPlace(vector, comparator); int[] actual = new int[vector.getValueCount()]; - IntStream.range(0, vector.getValueCount()).forEach( - i -> actual[i] = vector.get(i)); + IntStream.range(0, vector.getValueCount()).forEach(i -> actual[i] = vector.get(i)); assertArrayEquals( - new int[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 11, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71}, actual); + new int[] { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71 + }, + actual); } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java index cc13e7f8ceaee..4096897c20a05 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import java.util.stream.IntStream; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BaseFixedWidthVector; @@ -37,9 +35,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link FixedWidthOutOfPlaceVectorSorter}. - */ +/** Test cases for {@link FixedWidthOutOfPlaceVectorSorter}. */ public class TestFixedWidthOutOfPlaceVectorSorter extends TestOutOfPlaceVectorSorter { private BufferAllocator allocator; @@ -49,7 +45,9 @@ public TestFixedWidthOutOfPlaceVectorSorter(boolean generalSorter) { } OutOfPlaceVectorSorter getSorter() { - return generalSorter ? new GeneralOutOfPlaceVectorSorter<>() : new FixedWidthOutOfPlaceVectorSorter<>(); + return generalSorter + ? new GeneralOutOfPlaceVectorSorter<>() + : new FixedWidthOutOfPlaceVectorSorter<>(); } @Before @@ -82,10 +80,11 @@ public void testSortByte() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); TinyIntVector sortedVec = - (TinyIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + (TinyIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -129,10 +128,11 @@ public void testSortShort() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); SmallIntVector sortedVec = - (SmallIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + (SmallIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -176,9 +176,11 @@ public void testSortInt() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); - IntVector sortedVec = (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + IntVector sortedVec = + (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -222,9 +224,11 @@ public void testSortLong() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); - BigIntVector sortedVec = (BigIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + BigIntVector sortedVec = + (BigIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -268,9 +272,11 @@ public void testSortFloat() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); - Float4Vector sortedVec = (Float4Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + Float4Vector sortedVec = + (Float4Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -314,9 +320,11 @@ public void testSortDouble() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); - Float8Vector sortedVec = (Float8Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + Float8Vector sortedVec = + (Float8Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -343,17 +351,17 @@ public void testSortDouble() { @Test public void testSortInt2() { try (IntVector vec = new IntVector("", allocator)) { - ValueVectorDataPopulator.setVector(vec, - 0, 1, 2, 3, 4, 5, 30, 31, 32, 33, - 34, 35, 60, 61, 62, 63, 64, 65, 6, 7, - 8, 9, 10, 11, 36, 37, 38, 39, 40, 41, - 66, 67, 68, 69, 70, 71); + ValueVectorDataPopulator.setVector( + vec, 0, 1, 2, 3, 4, 5, 30, 31, 32, 33, 34, 35, 60, 61, 62, 63, 64, 65, 6, 7, 8, 9, 10, 11, + 36, 37, 38, 39, 40, 41, 66, 67, 68, 69, 70, 71); // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); - try (IntVector sortedVec = (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null)) { + try (IntVector sortedVec = + (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null)) { sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -361,13 +369,14 @@ public void testSortInt2() { // verify results int[] actual = new int[sortedVec.getValueCount()]; - IntStream.range(0, sortedVec.getValueCount()).forEach( - i -> actual[i] = sortedVec.get(i)); + IntStream.range(0, sortedVec.getValueCount()).forEach(i -> actual[i] = sortedVec.get(i)); assertArrayEquals( - new int[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 11, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71}, actual); + new int[] { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71 + }, + actual); } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java index 80c72b4e21a27..a92cc77818f4a 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.function.Function; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BaseFixedWidthVector; @@ -37,9 +35,7 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -/** - * Test sorting fixed width vectors with random data. - */ +/** Test sorting fixed width vectors with random data. */ @RunWith(Parameterized.class) public class TestFixedWidthSorting> { @@ -70,8 +66,12 @@ public void shutdown() { } public TestFixedWidthSorting( - int length, double nullFraction, boolean inPlace, String desc, - Function vectorGenerator, TestSortingUtil.DataGenerator dataGenerator) { + int length, + double nullFraction, + boolean inPlace, + String desc, + Function vectorGenerator, + TestSortingUtil.DataGenerator dataGenerator) { this.length = length; this.nullFraction = nullFraction; this.inPlace = inPlace; @@ -94,7 +94,8 @@ void sortInPlace() { TestSortingUtil.sortArray(array); FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vector); sorter.sortInPlace(vector, comparator); @@ -109,9 +110,11 @@ void sortOutOfPlace() { // sort the vector FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vector); - try (V sortedVec = (V) vector.getField().getFieldType().createNewSingleVector("", allocator, null)) { + try (V sortedVec = + (V) vector.getField().getFieldType().createNewSingleVector("", allocator, null)) { sortedVec.allocateNew(vector.getValueCount()); sortedVec.setValueCount(vector.getValueCount()); @@ -123,47 +126,78 @@ void sortOutOfPlace() { } } - @Parameterized.Parameters(name = "length = {0}, null fraction = {1}, in place = {2}, vector = {3}") + @Parameterized.Parameters( + name = "length = {0}, null fraction = {1}, in place = {2}, vector = {3}") public static Collection getParameters() { List params = new ArrayList<>(); for (int length : VECTOR_LENGTHS) { for (double nullFrac : NULL_FRACTIONS) { for (boolean inPlace : new boolean[] {true, false}) { - params.add(new Object[] { - length, nullFrac, inPlace, "TinyIntVector", - (Function) allocator -> new TinyIntVector("vector", allocator), - TestSortingUtil.TINY_INT_GENERATOR - }); - - params.add(new Object[] { - length, nullFrac, inPlace, "SmallIntVector", - (Function) allocator -> new SmallIntVector("vector", allocator), - TestSortingUtil.SMALL_INT_GENERATOR - }); - - params.add(new Object[] { - length, nullFrac, inPlace, "IntVector", - (Function) allocator -> new IntVector("vector", allocator), - TestSortingUtil.INT_GENERATOR - }); - - params.add(new Object[] { - length, nullFrac, inPlace, "BigIntVector", - (Function) allocator -> new BigIntVector("vector", allocator), - TestSortingUtil.LONG_GENERATOR - }); - - params.add(new Object[] { - length, nullFrac, inPlace, "Float4Vector", - (Function) allocator -> new Float4Vector("vector", allocator), - TestSortingUtil.FLOAT_GENERATOR - }); - - params.add(new Object[] { - length, nullFrac, inPlace, "Float8Vector", - (Function) allocator -> new Float8Vector("vector", allocator), - TestSortingUtil.DOUBLE_GENERATOR - }); + params.add( + new Object[] { + length, + nullFrac, + inPlace, + "TinyIntVector", + (Function) + allocator -> new TinyIntVector("vector", allocator), + TestSortingUtil.TINY_INT_GENERATOR + }); + + params.add( + new Object[] { + length, + nullFrac, + inPlace, + "SmallIntVector", + (Function) + allocator -> new SmallIntVector("vector", allocator), + TestSortingUtil.SMALL_INT_GENERATOR + }); + + params.add( + new Object[] { + length, + nullFrac, + inPlace, + "IntVector", + (Function) + allocator -> new IntVector("vector", allocator), + TestSortingUtil.INT_GENERATOR + }); + + params.add( + new Object[] { + length, + nullFrac, + inPlace, + "BigIntVector", + (Function) + allocator -> new BigIntVector("vector", allocator), + TestSortingUtil.LONG_GENERATOR + }); + + params.add( + new Object[] { + length, + nullFrac, + inPlace, + "Float4Vector", + (Function) + allocator -> new Float4Vector("vector", allocator), + TestSortingUtil.FLOAT_GENERATOR + }); + + params.add( + new Object[] { + length, + nullFrac, + inPlace, + "Float8Vector", + (Function) + allocator -> new Float8Vector("vector", allocator), + TestSortingUtil.DOUBLE_GENERATOR + }); } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestGeneralOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestGeneralOutOfPlaceVectorSorter.java index 07a6b545ddaa2..9e796a98ab790 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestGeneralOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestGeneralOutOfPlaceVectorSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertEquals; @@ -30,9 +29,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link GeneralOutOfPlaceVectorSorter}. - */ +/** Test cases for {@link GeneralOutOfPlaceVectorSorter}. */ public class TestGeneralOutOfPlaceVectorSorter { private BufferAllocator allocator; @@ -49,30 +46,33 @@ public void shutdown() { VectorValueComparator getComparator(StructVector structVector) { IntVector child0 = structVector.getChild("column0", IntVector.class); - VectorValueComparator childComp0 = DefaultVectorComparators.createDefaultComparator(child0); + VectorValueComparator childComp0 = + DefaultVectorComparators.createDefaultComparator(child0); childComp0.attachVector(child0); IntVector child1 = structVector.getChild("column1", IntVector.class); - VectorValueComparator childComp1 = DefaultVectorComparators.createDefaultComparator(child1); + VectorValueComparator childComp1 = + DefaultVectorComparators.createDefaultComparator(child1); childComp1.attachVector(child1); - VectorValueComparator comp = new VectorValueComparator() { - - @Override - public int compareNotNull(int index1, int index2) { - // compare values by lexicographic order - int result0 = childComp0.compare(index1, index2); - if (result0 != 0) { - return result0; - } - return childComp1.compare(index1, index2); - } - - @Override - public VectorValueComparator createNew() { - return this; - } - }; + VectorValueComparator comp = + new VectorValueComparator() { + + @Override + public int compareNotNull(int index1, int index2) { + // compare values by lexicographic order + int result0 = childComp0.compare(index1, index2); + if (result0 != 0) { + return result0; + } + return childComp1.compare(index1, index2); + } + + @Override + public VectorValueComparator createNew() { + return this; + } + }; return comp; } @@ -81,17 +81,21 @@ public VectorValueComparator createNew() { public void testSortStructVector() { final int vectorLength = 7; try (StructVector srcVector = StructVector.empty("src struct", allocator); - StructVector dstVector = StructVector.empty("dst struct", allocator)) { + StructVector dstVector = StructVector.empty("dst struct", allocator)) { IntVector srcChild0 = - srcVector.addOrGet("column0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); + srcVector.addOrGet( + "column0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); IntVector srcChild1 = - srcVector.addOrGet("column1", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); + srcVector.addOrGet( + "column1", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); IntVector dstChild0 = - dstVector.addOrGet("column0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); + dstVector.addOrGet( + "column0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); IntVector dstChild1 = - dstVector.addOrGet("column1", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); + dstVector.addOrGet( + "column1", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); // src struct vector values: // [ @@ -128,15 +132,16 @@ public void testSortStructVector() { // validate results assertEquals(vectorLength, dstVector.getValueCount()); assertEquals( - "[" + - "null, " + - "{\"column1\":3}, " + - "{\"column0\":2,\"column1\":1}, " + - "{\"column0\":3,\"column1\":4}, " + - "{\"column0\":5,\"column1\":4}, " + - "{\"column0\":6,\"column1\":6}, " + - "{\"column0\":7}" + - "]", dstVector.toString()); + "[" + + "null, " + + "{\"column1\":3}, " + + "{\"column0\":2,\"column1\":1}, " + + "{\"column0\":3,\"column1\":4}, " + + "{\"column0\":5,\"column1\":4}, " + + "{\"column0\":6,\"column1\":6}, " + + "{\"column0\":7}" + + "]", + dstVector.toString()); } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestIndexSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestIndexSorter.java index 99e22f8bdcd5c..bc8aac08b61e4 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestIndexSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestIndexSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertEquals; @@ -28,9 +27,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link IndexSorter}. - */ +/** Test cases for {@link IndexSorter}. */ public class TestIndexSorter { private BufferAllocator allocator; @@ -56,14 +53,15 @@ public void testIndexSort() { // sort the index IndexSorter indexSorter = new IndexSorter<>(); - DefaultVectorComparators.IntComparator intComparator = new DefaultVectorComparators.IntComparator(); + DefaultVectorComparators.IntComparator intComparator = + new DefaultVectorComparators.IntComparator(); intComparator.attachVector(vec); IntVector indices = new IntVector("", allocator); indices.setValueCount(10); indexSorter.sort(vec, indices, intComparator); - int[] expected = new int[]{6, 9, 1, 3, 0, 4, 5, 7, 2, 8}; + int[] expected = new int[] {6, 9, 1, 3, 0, 4, 5, 7, 2, 8}; for (int i = 0; i < expected.length; i++) { assertTrue(!indices.isNull(i)); @@ -74,8 +72,8 @@ public void testIndexSort() { } /** - * Tests the worst case for quick sort. - * It may cause stack overflow if the algorithm is implemented as a recursive algorithm. + * Tests the worst case for quick sort. It may cause stack overflow if the algorithm is + * implemented as a recursive algorithm. */ @Test public void testSortLargeIncreasingInt() { @@ -91,7 +89,8 @@ public void testSortLargeIncreasingInt() { // sort the vector IndexSorter indexSorter = new IndexSorter<>(); - DefaultVectorComparators.IntComparator intComparator = new DefaultVectorComparators.IntComparator(); + DefaultVectorComparators.IntComparator intComparator = + new DefaultVectorComparators.IntComparator(); intComparator.attachVector(vec); try (IntVector indices = new IntVector("", allocator)) { @@ -110,7 +109,7 @@ public void testSortLargeIncreasingInt() { public void testChoosePivot() { final int vectorLength = 100; try (IntVector vec = new IntVector("vector", allocator); - IntVector indices = new IntVector("indices", allocator)) { + IntVector indices = new IntVector("indices", allocator)) { vec.allocateNew(vectorLength); indices.allocateNew(vectorLength); @@ -122,7 +121,8 @@ public void testChoosePivot() { vec.setValueCount(vectorLength); indices.setValueCount(vectorLength); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); // setup internal data structures comparator.attachVector(vec); @@ -147,17 +147,16 @@ public void testChoosePivot() { } } - /** - * Evaluates choosing pivot for all possible permutations of 3 numbers. - */ + /** Evaluates choosing pivot for all possible permutations of 3 numbers. */ @Test public void testChoosePivotAllPermutes() { try (IntVector vec = new IntVector("vector", allocator); - IntVector indices = new IntVector("indices", allocator)) { + IntVector indices = new IntVector("indices", allocator)) { vec.allocateNew(); indices.allocateNew(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); // setup internal data structures comparator.attachVector(vec); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestInsertionSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestInsertionSorter.java index ba9c42913c0d9..3b16ac30d4ff4 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestInsertionSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestInsertionSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertFalse; @@ -28,9 +27,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link InsertionSorter}. - */ +/** Test cases for {@link InsertionSorter}. */ public class TestInsertionSorter { private BufferAllocator allocator; @@ -49,7 +46,7 @@ public void shutdown() { private void testSortIntVectorRange(int start, int end, int[] expected) { try (IntVector vector = new IntVector("vector", allocator); - IntVector buffer = new IntVector("buffer", allocator)) { + IntVector buffer = new IntVector("buffer", allocator)) { buffer.allocateNew(1); @@ -81,7 +78,7 @@ public void testSortIntVector() { private void testSortIndicesRange(int start, int end, int[] expectedIndices) { try (IntVector vector = new IntVector("vector", allocator); - IntVector indices = new IntVector("indices", allocator)) { + IntVector indices = new IntVector("indices", allocator)) { ValueVectorDataPopulator.setVector(vector, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); ValueVectorDataPopulator.setVector(indices, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOffHeapIntStack.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOffHeapIntStack.java index 321ca226d7e1d..025576f08e248 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOffHeapIntStack.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOffHeapIntStack.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static junit.framework.TestCase.assertEquals; @@ -26,9 +25,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link OffHeapIntStack}. - */ +/** Test cases for {@link OffHeapIntStack}. */ public class TestOffHeapIntStack { private BufferAllocator allocator; diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOutOfPlaceVectorSorter.java index 66b75cbccac3e..4f6a8489c43ea 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOutOfPlaceVectorSorter.java @@ -14,19 +14,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import java.util.ArrayList; import java.util.Collection; import java.util.List; - import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -/** - * Test cases for out-of-place sorters. - */ +/** Test cases for out-of-place sorters. */ @RunWith(Parameterized.class) public abstract class TestOutOfPlaceVectorSorter { diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java index e22b22d4e6757..24b2c752d0863 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -25,7 +24,6 @@ import java.util.Random; import java.util.function.BiConsumer; import java.util.function.Supplier; - import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.Float4Vector; import org.apache.arrow.vector.Float8Vector; @@ -37,50 +35,59 @@ import org.apache.arrow.vector.testing.RandomDataGenerator; import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -/** - * Utilities for sorting related utilities. - */ +/** Utilities for sorting related utilities. */ public class TestSortingUtil { static final Random random = new Random(0); - static final DataGenerator TINY_INT_GENERATOR = new DataGenerator<>( - RandomDataGenerator.TINY_INT_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Byte.class); - - static final DataGenerator SMALL_INT_GENERATOR = new DataGenerator<>( - RandomDataGenerator.SMALL_INT_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Short.class); - - static final DataGenerator INT_GENERATOR = new DataGenerator<>( - RandomDataGenerator.INT_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Integer.class); - - static final DataGenerator LONG_GENERATOR = new DataGenerator<>( - RandomDataGenerator.LONG_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Long.class); - - static final DataGenerator FLOAT_GENERATOR = new DataGenerator<>( - RandomDataGenerator.FLOAT_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Float.class); - - static final DataGenerator DOUBLE_GENERATOR = new DataGenerator<>( - RandomDataGenerator.DOUBLE_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Double.class); - - static final DataGenerator STRING_GENERATOR = new DataGenerator<>( - () -> { - int strLength = random.nextInt(20) + 1; - return generateRandomString(strLength); - }, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), String.class); - - private TestSortingUtil() { - } - - /** - * Verify that a vector is equal to an array. - */ + static final DataGenerator TINY_INT_GENERATOR = + new DataGenerator<>( + RandomDataGenerator.TINY_INT_GENERATOR, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + Byte.class); + + static final DataGenerator SMALL_INT_GENERATOR = + new DataGenerator<>( + RandomDataGenerator.SMALL_INT_GENERATOR, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + Short.class); + + static final DataGenerator INT_GENERATOR = + new DataGenerator<>( + RandomDataGenerator.INT_GENERATOR, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + Integer.class); + + static final DataGenerator LONG_GENERATOR = + new DataGenerator<>( + RandomDataGenerator.LONG_GENERATOR, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + Long.class); + + static final DataGenerator FLOAT_GENERATOR = + new DataGenerator<>( + RandomDataGenerator.FLOAT_GENERATOR, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + Float.class); + + static final DataGenerator DOUBLE_GENERATOR = + new DataGenerator<>( + RandomDataGenerator.DOUBLE_GENERATOR, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + Double.class); + + static final DataGenerator STRING_GENERATOR = + new DataGenerator<>( + () -> { + int strLength = random.nextInt(20) + 1; + return generateRandomString(strLength); + }, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + String.class); + + private TestSortingUtil() {} + + /** Verify that a vector is equal to an array. */ public static void verifyResults(V vector, U[] expected) { assertEquals(vector.getValueCount(), expected.length); for (int i = 0; i < expected.length; i++) { @@ -88,30 +95,28 @@ public static void verifyResults(V vector, U[] expect } } - /** - * Sort an array with null values come first. - */ + /** Sort an array with null values come first. */ public static > void sortArray(U[] array) { - Arrays.sort(array, (a, b) -> { - if (a == null || b == null) { - if (a == null && b == null) { - return 0; - } - - // exactly one is null - if (a == null) { - return -1; - } else { - return 1; - } - } - return a.compareTo(b); - }); + Arrays.sort( + array, + (a, b) -> { + if (a == null || b == null) { + if (a == null && b == null) { + return 0; + } + + // exactly one is null + if (a == null) { + return -1; + } else { + return 1; + } + } + return a.compareTo(b); + }); } - /** - * Generate a string with alphabetic characters only. - */ + /** Generate a string with alphabetic characters only. */ static String generateRandomString(int length) { byte[] str = new byte[length]; final int lower = 'a'; @@ -128,6 +133,7 @@ static String generateRandomString(int length) { /** * Utility to generate data for testing. + * * @param vector type. * @param data element type. */ @@ -139,8 +145,7 @@ static class DataGenerator> { final Class clazz; - DataGenerator( - Supplier dataGenerator, BiConsumer vectorPopulator, Class clazz) { + DataGenerator(Supplier dataGenerator, BiConsumer vectorPopulator, Class clazz) { this.dataGenerator = dataGenerator; this.vectorPopulator = vectorPopulator; this.clazz = clazz; @@ -148,6 +153,7 @@ static class DataGenerator> { /** * Populate the vector according to the specified parameters. + * * @param vector the vector to populate. * @param length vector length. * @param nullFraction the fraction of null values. diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java index f2de5d23fce89..ce15940c1df3d 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertEquals; @@ -22,7 +21,6 @@ import java.nio.charset.StandardCharsets; import java.util.Objects; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.VarCharVector; @@ -31,9 +29,7 @@ import org.junit.Test; import org.junit.jupiter.api.Assertions; -/** - * Test cases for {@link StableVectorComparator}. - */ +/** Test cases for {@link StableVectorComparator}. */ public class TestStableVectorComparator { private BufferAllocator allocator; @@ -62,7 +58,8 @@ public void testCompare() { vec.set(4, "a".getBytes(StandardCharsets.UTF_8)); VectorValueComparator comparator = new TestVarCharSorter(); - VectorValueComparator stableComparator = new StableVectorComparator<>(comparator); + VectorValueComparator stableComparator = + new StableVectorComparator<>(comparator); stableComparator.attachVector(vec); assertTrue(stableComparator.compare(0, 1) > 0); @@ -95,10 +92,12 @@ public void testStableSortString() { // sort the vector VariableWidthOutOfPlaceVectorSorter sorter = new VariableWidthOutOfPlaceVectorSorter(); VectorValueComparator comparator = new TestVarCharSorter(); - VectorValueComparator stableComparator = new StableVectorComparator<>(comparator); + VectorValueComparator stableComparator = + new StableVectorComparator<>(comparator); try (VarCharVector sortedVec = - (VarCharVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null)) { + (VarCharVector) + vec.getField().getFieldType().createNewSingleVector("", allocator, null)) { sortedVec.allocateNew(vec.getByteCapacity(), vec.getValueCount()); sortedVec.setLastSet(vec.getValueCount() - 1); sortedVec.setValueCount(vec.getValueCount()); @@ -107,23 +106,32 @@ public void testStableSortString() { // verify results // the results are stable - assertEquals("0", new String(Objects.requireNonNull(sortedVec.get(0)), StandardCharsets.UTF_8)); - assertEquals("01", new String(Objects.requireNonNull(sortedVec.get(1)), StandardCharsets.UTF_8)); - assertEquals("0c", new String(Objects.requireNonNull(sortedVec.get(2)), StandardCharsets.UTF_8)); - assertEquals("a", new String(Objects.requireNonNull(sortedVec.get(3)), StandardCharsets.UTF_8)); - assertEquals("abc", new String(Objects.requireNonNull(sortedVec.get(4)), StandardCharsets.UTF_8)); - assertEquals("aa", new String(Objects.requireNonNull(sortedVec.get(5)), StandardCharsets.UTF_8)); - assertEquals("a1", new String(Objects.requireNonNull(sortedVec.get(6)), StandardCharsets.UTF_8)); - assertEquals("abcdefg", new String(Objects.requireNonNull(sortedVec.get(7)), StandardCharsets.UTF_8)); - assertEquals("accc", new String(Objects.requireNonNull(sortedVec.get(8)), StandardCharsets.UTF_8)); - assertEquals("afds", new String(Objects.requireNonNull(sortedVec.get(9)), StandardCharsets.UTF_8)); + assertEquals( + "0", new String(Objects.requireNonNull(sortedVec.get(0)), StandardCharsets.UTF_8)); + assertEquals( + "01", new String(Objects.requireNonNull(sortedVec.get(1)), StandardCharsets.UTF_8)); + assertEquals( + "0c", new String(Objects.requireNonNull(sortedVec.get(2)), StandardCharsets.UTF_8)); + assertEquals( + "a", new String(Objects.requireNonNull(sortedVec.get(3)), StandardCharsets.UTF_8)); + assertEquals( + "abc", new String(Objects.requireNonNull(sortedVec.get(4)), StandardCharsets.UTF_8)); + assertEquals( + "aa", new String(Objects.requireNonNull(sortedVec.get(5)), StandardCharsets.UTF_8)); + assertEquals( + "a1", new String(Objects.requireNonNull(sortedVec.get(6)), StandardCharsets.UTF_8)); + assertEquals( + "abcdefg", + new String(Objects.requireNonNull(sortedVec.get(7)), StandardCharsets.UTF_8)); + assertEquals( + "accc", new String(Objects.requireNonNull(sortedVec.get(8)), StandardCharsets.UTF_8)); + assertEquals( + "afds", new String(Objects.requireNonNull(sortedVec.get(9)), StandardCharsets.UTF_8)); } } } - /** - * Utility comparator that compares varchars by the first character. - */ + /** Utility comparator that compares varchars by the first character. */ private static class TestVarCharSorter extends VectorValueComparator { @Override diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java index 2486034f1fa32..b3f2539fa53c2 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertEquals; @@ -22,7 +21,6 @@ import java.nio.charset.StandardCharsets; import java.util.Objects; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BaseVariableWidthVector; @@ -32,9 +30,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link VariableWidthOutOfPlaceVectorSorter}. - */ +/** Test cases for {@link VariableWidthOutOfPlaceVectorSorter}. */ public class TestVariableWidthOutOfPlaceVectorSorter extends TestOutOfPlaceVectorSorter { private BufferAllocator allocator; @@ -44,10 +40,11 @@ public TestVariableWidthOutOfPlaceVectorSorter(boolean generalSorter) { } OutOfPlaceVectorSorter getSorter() { - return generalSorter ? new GeneralOutOfPlaceVectorSorter<>() : new VariableWidthOutOfPlaceVectorSorter(); + return generalSorter + ? new GeneralOutOfPlaceVectorSorter<>() + : new VariableWidthOutOfPlaceVectorSorter(); } - @Before public void prepare() { allocator = new RootAllocator(1024 * 1024); @@ -79,10 +76,10 @@ public void testSortString() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); + DefaultVectorComparators.createDefaultComparator(vec); VarCharVector sortedVec = - (VarCharVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + (VarCharVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getByteCapacity(), vec.getValueCount()); sortedVec.setLastSet(vec.getValueCount() - 1); sortedVec.setValueCount(vec.getValueCount()); @@ -96,14 +93,23 @@ public void testSortString() { assertTrue(sortedVec.isNull(0)); assertTrue(sortedVec.isNull(1)); - assertEquals("12", new String(Objects.requireNonNull(sortedVec.get(2)), StandardCharsets.UTF_8)); - assertEquals("abc", new String(Objects.requireNonNull(sortedVec.get(3)), StandardCharsets.UTF_8)); - assertEquals("dictionary", new String(Objects.requireNonNull(sortedVec.get(4)), StandardCharsets.UTF_8)); - assertEquals("good", new String(Objects.requireNonNull(sortedVec.get(5)), StandardCharsets.UTF_8)); - assertEquals("hello", new String(Objects.requireNonNull(sortedVec.get(6)), StandardCharsets.UTF_8)); - assertEquals("hello", new String(Objects.requireNonNull(sortedVec.get(7)), StandardCharsets.UTF_8)); - assertEquals("world", new String(Objects.requireNonNull(sortedVec.get(8)), StandardCharsets.UTF_8)); - assertEquals("yes", new String(Objects.requireNonNull(sortedVec.get(9)), StandardCharsets.UTF_8)); + assertEquals( + "12", new String(Objects.requireNonNull(sortedVec.get(2)), StandardCharsets.UTF_8)); + assertEquals( + "abc", new String(Objects.requireNonNull(sortedVec.get(3)), StandardCharsets.UTF_8)); + assertEquals( + "dictionary", + new String(Objects.requireNonNull(sortedVec.get(4)), StandardCharsets.UTF_8)); + assertEquals( + "good", new String(Objects.requireNonNull(sortedVec.get(5)), StandardCharsets.UTF_8)); + assertEquals( + "hello", new String(Objects.requireNonNull(sortedVec.get(6)), StandardCharsets.UTF_8)); + assertEquals( + "hello", new String(Objects.requireNonNull(sortedVec.get(7)), StandardCharsets.UTF_8)); + assertEquals( + "world", new String(Objects.requireNonNull(sortedVec.get(8)), StandardCharsets.UTF_8)); + assertEquals( + "yes", new String(Objects.requireNonNull(sortedVec.get(9)), StandardCharsets.UTF_8)); sortedVec.close(); } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java index 7951c39d550d2..5c37ddf9284e4 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.jupiter.api.Assertions.assertArrayEquals; @@ -28,7 +27,6 @@ import java.util.Comparator; import java.util.List; import java.util.function.Function; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BaseVariableWidthVector; @@ -41,9 +39,7 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -/** - * Test sorting variable width vectors with random data. - */ +/** Test sorting variable width vectors with random data. */ @RunWith(Parameterized.class) public class TestVariableWidthSorting> { @@ -72,8 +68,11 @@ public void shutdown() { } public TestVariableWidthSorting( - int length, double nullFraction, String desc, - Function vectorGenerator, TestSortingUtil.DataGenerator dataGenerator) { + int length, + double nullFraction, + String desc, + Function vectorGenerator, + TestSortingUtil.DataGenerator dataGenerator) { this.length = length; this.nullFraction = nullFraction; this.vectorGenerator = vectorGenerator; @@ -92,9 +91,11 @@ void sortOutOfPlace() { // sort the vector VariableWidthOutOfPlaceVectorSorter sorter = new VariableWidthOutOfPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vector); - try (V sortedVec = (V) vector.getField().getFieldType().createNewSingleVector("", allocator, null)) { + try (V sortedVec = + (V) vector.getField().getFieldType().createNewSingleVector("", allocator, null)) { int dataSize = vector.getOffsetBuffer().getInt(vector.getValueCount() * 4L); sortedVec.allocateNew(dataSize, vector.getValueCount()); sortedVec.setValueCount(vector.getValueCount()); @@ -112,33 +113,36 @@ public static Collection getParameters() { List params = new ArrayList<>(); for (int length : VECTOR_LENGTHS) { for (double nullFrac : NULL_FRACTIONS) { - params.add(new Object[]{ - length, nullFrac, "VarCharVector", - (Function) allocator -> new VarCharVector("vector", allocator), - TestSortingUtil.STRING_GENERATOR - }); + params.add( + new Object[] { + length, + nullFrac, + "VarCharVector", + (Function) + allocator -> new VarCharVector("vector", allocator), + TestSortingUtil.STRING_GENERATOR + }); } } return params; } - /** - * Verify results as byte arrays. - */ + /** Verify results as byte arrays. */ public static void verifyResults(V vector, String[] expected) { assertEquals(vector.getValueCount(), expected.length); for (int i = 0; i < expected.length; i++) { if (expected[i] == null) { assertTrue(vector.isNull(i)); } else { - assertArrayEquals(((Text) vector.getObject(i)).getBytes(), expected[i].getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + ((Text) vector.getObject(i)).getBytes(), expected[i].getBytes(StandardCharsets.UTF_8)); } } } /** - * String comparator with the same behavior as that of - * {@link DefaultVectorComparators.VariableWidthComparator}. + * String comparator with the same behavior as that of {@link + * DefaultVectorComparators.VariableWidthComparator}. */ static class StringComparator implements Comparator { diff --git a/java/dev/checkstyle/checkstyle-spotless.xml b/java/dev/checkstyle/checkstyle-spotless.xml new file mode 100644 index 0000000000000..cbaec1a39bf2c --- /dev/null +++ b/java/dev/checkstyle/checkstyle-spotless.xml @@ -0,0 +1,286 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/java/dev/checkstyle/checkstyle.license b/java/dev/license/asf-java.license similarity index 100% rename from java/dev/checkstyle/checkstyle.license rename to java/dev/license/asf-java.license diff --git a/java/dev/license/asf-xml.license b/java/dev/license/asf-xml.license new file mode 100644 index 0000000000000..a43b97bca8f0f --- /dev/null +++ b/java/dev/license/asf-xml.license @@ -0,0 +1,11 @@ + + \ No newline at end of file diff --git a/java/maven/pom.xml b/java/maven/pom.xml index 72140dd6570d0..8a4043016e770 100644 --- a/java/maven/pom.xml +++ b/java/maven/pom.xml @@ -237,7 +237,7 @@ maven-checkstyle-plugin ../dev/checkstyle/checkstyle.xml - ../dev/checkstyle/checkstyle.license + ../dev/license/asf-java.license ../dev/checkstyle/suppressions.xml true UTF-8 diff --git a/java/pom.xml b/java/pom.xml index 9624444cf422d..bcb8b46843f2d 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -92,12 +92,15 @@ 1.11.3 2 + dev/checkstyle/checkstyle.xml true 9+181-r4173-1 2.28.0 5.11.0 5.2.0 3.43.0 + + **/*.java none -Xdoclint:none @@ -701,8 +704,8 @@ maven-checkstyle-plugin **/module-info.java - dev/checkstyle/checkstyle.xml - dev/checkstyle/checkstyle.license + ${checkstyle.config.location} + dev/license/asf-java.license dev/checkstyle/suppressions.xml true UTF-8 @@ -803,6 +806,19 @@ + + + ${spotless.java.excludes} + + + 1.7 + + + + ${maven.multiModuleProjectDirectory}/dev/license/asf-java.license + package + + @@ -929,7 +945,7 @@ Error Prone 2.10.0 is the latest version to support running on JDK 8. With right flags it could be upgraded, - but we choose to keep this unchanged for now. + but we choose to keep this unchanged for now. --> 2.10.0 From 95db23e9e02c7d724269b0a0f241e53ecfbe76be Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 10 Jun 2024 13:41:04 +0900 Subject: [PATCH 259/261] MINOR: [Java] Bump org.jacoco:jacoco-maven-plugin from 0.8.11 to 0.8.12 in /java (#41516) Bumps [org.jacoco:jacoco-maven-plugin](https://github.com/jacoco/jacoco) from 0.8.11 to 0.8.12.

    Release notes

    Sourced from org.jacoco:jacoco-maven-plugin's releases.

    0.8.12

    New Features

    • JaCoCo now officially supports Java 22 (GitHub #1596).
    • Experimental support for Java 23 class files (GitHub #1553).

    Fixed bugs

    • Branches added by the Kotlin compiler for functions with default arguments and having more than 32 parameters are filtered out during generation of report (GitHub #1556).
    • Branch added by the Kotlin compiler version 1.5.0 and above for reading from lateinit property is filtered out during generation of report (GitHub #1568).

    Non-functional Changes

    • JaCoCo now depends on ASM 9.7 (GitHub #1600).
    Commits

    [![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=org.jacoco:jacoco-maven-plugin&package-manager=maven&previous-version=0.8.11&new-version=0.8.12)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) You can trigger a rebase of this PR by commenting `@ dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
    Dependabot commands and options
    You can trigger Dependabot actions by commenting on this PR: - `@ dependabot rebase` will rebase this PR - `@ dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@ dependabot merge` will merge this PR after your CI passes on it - `@ dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@ dependabot cancel merge` will cancel a previously requested merge and block automerging - `@ dependabot reopen` will reopen this PR if it is closed - `@ dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@ dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@ dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@ dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)
    > **Note** > Automatic rebases have been disabled on this pull request as it has been open for over 30 days. Authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Signed-off-by: David Li --- java/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/pom.xml b/java/pom.xml index bcb8b46843f2d..085546573596a 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -342,7 +342,7 @@ org.jacoco jacoco-maven-plugin - 0.8.11 + 0.8.12 ArrowType.Utf8
  • - *
  • INT --> ArrowType.Int(32, signed)
  • - *
  • LONG --> ArrowType.Int(64, signed)
  • - *
  • FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
  • - *
  • DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)
  • - *
  • BOOLEAN --> ArrowType.Bool
  • - *
  • BYTES --> ArrowType.Binary
  • - *
  • ARRAY --> ArrowType.List
  • - *
  • MAP --> ArrowType.Map
  • - *
  • FIXED --> ArrowType.FixedSizeBinary
  • - *
  • RECORD --> ArrowType.Struct
  • - *
  • UNION --> ArrowType.Union
  • - *
  • ENUM--> ArrowType.Int
  • - *
  • DECIMAL --> ArrowType.Decimal
  • - *
  • Date --> ArrowType.Date(DateUnit.DAY)
  • - *
  • TimeMillis --> ArrowType.Time(TimeUnit.MILLISECOND, 32)
  • - *
  • TimeMicros --> ArrowType.Time(TimeUnit.MICROSECOND, 64)
  • - *
  • TimestampMillis --> ArrowType.Timestamp(TimeUnit.MILLISECOND, null)
  • - *
  • TimestampMicros --> ArrowType.Timestamp(TimeUnit.MICROSECOND, null)
  • + *
  • STRING --> ArrowType.Utf8 + *
  • INT --> ArrowType.Int(32, signed) + *
  • LONG --> ArrowType.Int(64, signed) + *
  • FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) + *
  • DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE) + *
  • BOOLEAN --> ArrowType.Bool + *
  • BYTES --> ArrowType.Binary + *
  • ARRAY --> ArrowType.List + *
  • MAP --> ArrowType.Map + *
  • FIXED --> ArrowType.FixedSizeBinary + *
  • RECORD --> ArrowType.Struct + *
  • UNION --> ArrowType.Union + *
  • ENUM--> ArrowType.Int + *
  • DECIMAL --> ArrowType.Decimal + *
  • Date --> ArrowType.Date(DateUnit.DAY) + *
  • TimeMillis --> ArrowType.Time(TimeUnit.MILLISECOND, 32) + *
  • TimeMicros --> ArrowType.Time(TimeUnit.MICROSECOND, 64) + *
  • TimestampMillis --> ArrowType.Timestamp(TimeUnit.MILLISECOND, null) + *
  • TimestampMicros --> ArrowType.Timestamp(TimeUnit.MICROSECOND, null) * */ - private static Consumer createConsumer(Schema schema, String name, AvroToArrowConfig config) { return createConsumer(schema, name, false, config, null); } - private static Consumer createConsumer(Schema schema, String name, AvroToArrowConfig config, FieldVector vector) { + private static Consumer createConsumer( + Schema schema, String name, AvroToArrowConfig config, FieldVector vector) { return createConsumer(schema, name, false, config, vector); } @@ -144,7 +143,8 @@ private static Consumer createConsumer(Schema schema, String name, AvroToArrowCo * * @param schema avro schema * @param name arrow field name - * @param consumerVector vector to keep in consumer, if v == null, will create a new vector via field. + * @param consumerVector vector to keep in consumer, if v == null, will create a new vector via + * field. * @return consumer */ private static Consumer createConsumer( @@ -185,7 +185,7 @@ private static Consumer createConsumer( break; case STRING: arrowType = new ArrowType.Utf8(); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroStringConsumer((VarCharVector) vector); break; @@ -193,12 +193,18 @@ private static Consumer createConsumer( Map extProps = createExternalProps(schema); if (logicalType instanceof LogicalTypes.Decimal) { arrowType = createDecimalArrowType((LogicalTypes.Decimal) logicalType); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema, extProps)); + fieldType = + new FieldType( + nullable, arrowType, /*dictionary=*/ null, getMetaData(schema, extProps)); vector = createVector(consumerVector, fieldType, name, allocator); - consumer = new AvroDecimalConsumer.FixedDecimalConsumer((DecimalVector) vector, schema.getFixedSize()); + consumer = + new AvroDecimalConsumer.FixedDecimalConsumer( + (DecimalVector) vector, schema.getFixedSize()); } else { arrowType = new ArrowType.FixedSizeBinary(schema.getFixedSize()); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema, extProps)); + fieldType = + new FieldType( + nullable, arrowType, /*dictionary=*/ null, getMetaData(schema, extProps)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroFixedConsumer((FixedSizeBinaryVector) vector, schema.getFixedSize()); } @@ -206,84 +212,85 @@ private static Consumer createConsumer( case INT: if (logicalType instanceof LogicalTypes.Date) { arrowType = new ArrowType.Date(DateUnit.DAY); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroDateConsumer((DateDayVector) vector); } else if (logicalType instanceof LogicalTypes.TimeMillis) { arrowType = new ArrowType.Time(TimeUnit.MILLISECOND, 32); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroTimeMillisConsumer((TimeMilliVector) vector); } else { - arrowType = new ArrowType.Int(32, /*isSigned=*/true); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + arrowType = new ArrowType.Int(32, /*isSigned=*/ true); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroIntConsumer((IntVector) vector); } break; case BOOLEAN: arrowType = new ArrowType.Bool(); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroBooleanConsumer((BitVector) vector); break; case LONG: if (logicalType instanceof LogicalTypes.TimeMicros) { arrowType = new ArrowType.Time(TimeUnit.MICROSECOND, 64); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroTimeMicroConsumer((TimeMicroVector) vector); } else if (logicalType instanceof LogicalTypes.TimestampMillis) { arrowType = new ArrowType.Timestamp(TimeUnit.MILLISECOND, null); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroTimestampMillisConsumer((TimeStampMilliVector) vector); } else if (logicalType instanceof LogicalTypes.TimestampMicros) { arrowType = new ArrowType.Timestamp(TimeUnit.MICROSECOND, null); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroTimestampMicrosConsumer((TimeStampMicroVector) vector); } else { - arrowType = new ArrowType.Int(64, /*isSigned=*/true); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + arrowType = new ArrowType.Int(64, /*isSigned=*/ true); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroLongConsumer((BigIntVector) vector); } break; case FLOAT: arrowType = new ArrowType.FloatingPoint(SINGLE); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroFloatConsumer((Float4Vector) vector); break; case DOUBLE: arrowType = new ArrowType.FloatingPoint(DOUBLE); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroDoubleConsumer((Float8Vector) vector); break; case BYTES: if (logicalType instanceof LogicalTypes.Decimal) { arrowType = createDecimalArrowType((LogicalTypes.Decimal) logicalType); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroDecimalConsumer.BytesDecimalConsumer((DecimalVector) vector); } else { arrowType = new ArrowType.Binary(); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); vector = createVector(consumerVector, fieldType, name, allocator); consumer = new AvroBytesConsumer((VarBinaryVector) vector); } break; case NULL: arrowType = new ArrowType.Null(); - fieldType = new FieldType(nullable, arrowType, /*dictionary=*/null, getMetaData(schema)); - vector = fieldType.createNewSingleVector(name, allocator, /*schemaCallBack=*/null); + fieldType = new FieldType(nullable, arrowType, /*dictionary=*/ null, getMetaData(schema)); + vector = fieldType.createNewSingleVector(name, allocator, /*schemaCallBack=*/ null); consumer = new AvroNullConsumer((NullVector) vector); break; default: // no-op, shouldn't get here - throw new UnsupportedOperationException("Can't convert avro type %s to arrow type." + type.getName()); + throw new UnsupportedOperationException( + "Can't convert avro type %s to arrow type." + type.getName()); } return consumer; } @@ -291,15 +298,16 @@ private static Consumer createConsumer( private static ArrowType createDecimalArrowType(LogicalTypes.Decimal logicalType) { final int scale = logicalType.getScale(); final int precision = logicalType.getPrecision(); - Preconditions.checkArgument(precision > 0 && precision <= 38, - "Precision must be in range of 1 to 38"); - Preconditions.checkArgument(scale >= 0 && scale <= 38, - "Scale must be in range of 0 to 38."); - Preconditions.checkArgument(scale <= precision, - "Invalid decimal scale: %s (greater than precision: %s)", scale, precision); + Preconditions.checkArgument( + precision > 0 && precision <= 38, "Precision must be in range of 1 to 38"); + Preconditions.checkArgument(scale >= 0 && scale <= 38, "Scale must be in range of 0 to 38."); + Preconditions.checkArgument( + scale <= precision, + "Invalid decimal scale: %s (greater than precision: %s)", + scale, + precision); return new ArrowType.Decimal(precision, scale, 128); - } private static Consumer createSkipConsumer(Schema schema) { @@ -309,41 +317,46 @@ private static Consumer createSkipConsumer(Schema schema) { switch (type) { case UNION: - List unionDelegates = schema.getTypes().stream().map(s -> - createSkipConsumer(s)).collect(Collectors.toList()); + List unionDelegates = + schema.getTypes().stream().map(s -> createSkipConsumer(s)).collect(Collectors.toList()); skipFunction = decoder -> unionDelegates.get(decoder.readInt()).consume(decoder); break; case ARRAY: Consumer elementDelegate = createSkipConsumer(schema.getElementType()); - skipFunction = decoder -> { - for (long i = decoder.skipArray(); i != 0; i = decoder.skipArray()) { - for (long j = 0; j < i; j++) { - elementDelegate.consume(decoder); - } - } - }; + skipFunction = + decoder -> { + for (long i = decoder.skipArray(); i != 0; i = decoder.skipArray()) { + for (long j = 0; j < i; j++) { + elementDelegate.consume(decoder); + } + } + }; break; case MAP: Consumer valueDelegate = createSkipConsumer(schema.getValueType()); - skipFunction = decoder -> { - for (long i = decoder.skipMap(); i != 0; i = decoder.skipMap()) { - for (long j = 0; j < i; j++) { - decoder.skipString(); // Discard key - valueDelegate.consume(decoder); - } - } - }; + skipFunction = + decoder -> { + for (long i = decoder.skipMap(); i != 0; i = decoder.skipMap()) { + for (long j = 0; j < i; j++) { + decoder.skipString(); // Discard key + valueDelegate.consume(decoder); + } + } + }; break; case RECORD: - List delegates = schema.getFields().stream().map(field -> - createSkipConsumer(field.schema())).collect(Collectors.toList()); + List delegates = + schema.getFields().stream() + .map(field -> createSkipConsumer(field.schema())) + .collect(Collectors.toList()); - skipFunction = decoder -> { - for (Consumer consumer : delegates) { - consumer.consume(decoder); - } - }; + skipFunction = + decoder -> { + for (Consumer consumer : delegates) { + consumer.consume(decoder); + } + }; break; case ENUM: @@ -374,7 +387,7 @@ private static Consumer createSkipConsumer(Schema schema) { skipFunction = decoder -> decoder.skipBytes(); break; case NULL: - skipFunction = decoder -> { }; + skipFunction = decoder -> {}; break; default: // no-op, shouldn't get here @@ -384,8 +397,7 @@ private static Consumer createSkipConsumer(Schema schema) { return new SkipConsumer(skipFunction); } - static CompositeAvroConsumer createCompositeConsumer( - Schema schema, AvroToArrowConfig config) { + static CompositeAvroConsumer createCompositeConsumer(Schema schema, AvroToArrowConfig config) { List consumers = new ArrayList<>(); final Set skipFieldNames = config.getSkipFieldNames(); @@ -399,7 +411,6 @@ static CompositeAvroConsumer createCompositeConsumer( Consumer consumer = createConsumer(field.schema(), field.name(), config); consumers.add(consumer); } - } } else { Consumer consumer = createConsumer(schema, "", config); @@ -409,9 +420,11 @@ static CompositeAvroConsumer createCompositeConsumer( return new CompositeAvroConsumer(consumers); } - private static FieldVector createVector(FieldVector consumerVector, FieldType fieldType, - String name, BufferAllocator allocator) { - return consumerVector != null ? consumerVector : fieldType.createNewSingleVector(name, allocator, null); + private static FieldVector createVector( + FieldVector consumerVector, FieldType fieldType, String name, BufferAllocator allocator) { + return consumerVector != null + ? consumerVector + : fieldType.createNewSingleVector(name, allocator, null); } private static String getDefaultFieldName(ArrowType type) { @@ -424,10 +437,7 @@ private static Field avroSchemaToField(Schema schema, String name, AvroToArrowCo } private static Field avroSchemaToField( - Schema schema, - String name, - AvroToArrowConfig config, - Map externalProps) { + Schema schema, String name, AvroToArrowConfig config, Map externalProps) { final Schema.Type type = schema.getType(); final LogicalType logicalType = schema.getLogicalType(); @@ -441,7 +451,8 @@ private static Field avroSchemaToField( // Union child vector should use default name children.add(avroSchemaToField(childSchema, null, config)); } - fieldType = createFieldType(new ArrowType.Union(UnionMode.Sparse, null), schema, externalProps); + fieldType = + createFieldType(new ArrowType.Union(UnionMode.Sparse, null), schema, externalProps); break; case ARRAY: Schema elementSchema = schema.getElementType(); @@ -450,14 +461,18 @@ private static Field avroSchemaToField( break; case MAP: // MapVector internal struct field and key field should be non-nullable - FieldType keyFieldType = new FieldType(/*nullable=*/false, new ArrowType.Utf8(), /*dictionary=*/null); - Field keyField = new Field("key", keyFieldType, /*children=*/null); + FieldType keyFieldType = + new FieldType(/*nullable=*/ false, new ArrowType.Utf8(), /*dictionary=*/ null); + Field keyField = new Field("key", keyFieldType, /*children=*/ null); Field valueField = avroSchemaToField(schema.getValueType(), "value", config); - FieldType structFieldType = new FieldType(false, new ArrowType.Struct(), /*dictionary=*/null); - Field structField = new Field("internal", structFieldType, Arrays.asList(keyField, valueField)); + FieldType structFieldType = + new FieldType(false, new ArrowType.Struct(), /*dictionary=*/ null); + Field structField = + new Field("internal", structFieldType, Arrays.asList(keyField, valueField)); children.add(structField); - fieldType = createFieldType(new ArrowType.Map(/*keysSorted=*/false), schema, externalProps); + fieldType = + createFieldType(new ArrowType.Map(/*keysSorted=*/ false), schema, externalProps); break; case RECORD: final Set skipFieldNames = config.getSkipFieldNames(); @@ -486,8 +501,12 @@ private static Field avroSchemaToField( int enumCount = schema.getEnumSymbols().size(); ArrowType.Int indexType = DictionaryEncoder.getIndexType(enumCount); - fieldType = createFieldType(indexType, schema, externalProps, - new DictionaryEncoding(current, /*ordered=*/false, /*indexType=*/indexType)); + fieldType = + createFieldType( + indexType, + schema, + externalProps, + new DictionaryEncoding(current, /*ordered=*/ false, /*indexType=*/ indexType)); break; case STRING: @@ -509,7 +528,7 @@ private static Field avroSchemaToField( } else if (logicalType instanceof LogicalTypes.TimeMillis) { intArrowType = new ArrowType.Time(TimeUnit.MILLISECOND, 32); } else { - intArrowType = new ArrowType.Int(32, /*isSigned=*/true); + intArrowType = new ArrowType.Int(32, /*isSigned=*/ true); } fieldType = createFieldType(intArrowType, schema, externalProps); break; @@ -525,7 +544,7 @@ private static Field avroSchemaToField( } else if (logicalType instanceof LogicalTypes.TimestampMicros) { longArrowType = new ArrowType.Timestamp(TimeUnit.MICROSECOND, null); } else { - longArrowType = new ArrowType.Int(64, /*isSigned=*/true); + longArrowType = new ArrowType.Int(64, /*isSigned=*/ true); } fieldType = createFieldType(longArrowType, schema, externalProps); break; @@ -558,8 +577,8 @@ private static Field avroSchemaToField( return new Field(name, fieldType, children.size() == 0 ? null : children); } - private static Consumer createArrayConsumer(Schema schema, String name, AvroToArrowConfig config, - FieldVector consumerVector) { + private static Consumer createArrayConsumer( + Schema schema, String name, AvroToArrowConfig config, FieldVector consumerVector) { ListVector listVector; if (consumerVector == null) { @@ -578,8 +597,8 @@ private static Consumer createArrayConsumer(Schema schema, String name, AvroToAr return new AvroArraysConsumer(listVector, delegate); } - private static Consumer createStructConsumer(Schema schema, String name, AvroToArrowConfig config, - FieldVector consumerVector) { + private static Consumer createStructConsumer( + Schema schema, String name, AvroToArrowConfig config, FieldVector consumerVector) { final Set skipFieldNames = config.getSkipFieldNames(); @@ -601,19 +620,22 @@ private static Consumer createStructConsumer(Schema schema, String name, AvroToA if (skipFieldNames.contains(fullChildName)) { delegate = createSkipConsumer(childField.schema()); } else { - delegate = createConsumer(childField.schema(), fullChildName, config, - structVector.getChildrenFromFields().get(vectorIndex++)); + delegate = + createConsumer( + childField.schema(), + fullChildName, + config, + structVector.getChildrenFromFields().get(vectorIndex++)); } delegates[i] = delegate; } return new AvroStructConsumer(structVector, delegates); - } - private static Consumer createEnumConsumer(Schema schema, String name, AvroToArrowConfig config, - FieldVector consumerVector) { + private static Consumer createEnumConsumer( + Schema schema, String name, AvroToArrowConfig config, FieldVector consumerVector) { BaseIntVector indexVector; if (consumerVector == null) { @@ -630,16 +652,14 @@ private static Consumer createEnumConsumer(Schema schema, String name, AvroToArr for (int i = 0; i < valueCount; i++) { dictVector.set(i, schema.getEnumSymbols().get(i).getBytes(StandardCharsets.UTF_8)); } - Dictionary dictionary = - new Dictionary(dictVector, indexVector.getField().getDictionary()); + Dictionary dictionary = new Dictionary(dictVector, indexVector.getField().getDictionary()); config.getProvider().put(dictionary); return new AvroEnumConsumer(indexVector); - } - private static Consumer createMapConsumer(Schema schema, String name, AvroToArrowConfig config, - FieldVector consumerVector) { + private static Consumer createMapConsumer( + Schema schema, String name, AvroToArrowConfig config, FieldVector consumerVector) { MapVector mapVector; if (consumerVector == null) { @@ -653,10 +673,14 @@ private static Consumer createMapConsumer(Schema schema, String name, AvroToArro StructVector structVector = (StructVector) mapVector.getDataVector(); // keys in avro map are always assumed to be strings. - Consumer keyConsumer = new AvroStringConsumer( - (VarCharVector) structVector.getChildrenFromFields().get(0)); - Consumer valueConsumer = createConsumer(schema.getValueType(), schema.getValueType().getName(), - config, structVector.getChildrenFromFields().get(1)); + Consumer keyConsumer = + new AvroStringConsumer((VarCharVector) structVector.getChildrenFromFields().get(0)); + Consumer valueConsumer = + createConsumer( + schema.getValueType(), + schema.getValueType().getName(), + config, + structVector.getChildrenFromFields().get(1)); AvroStructConsumer internalConsumer = new AvroStructConsumer(structVector, new Consumer[] {keyConsumer, valueConsumer}); @@ -664,11 +688,12 @@ private static Consumer createMapConsumer(Schema schema, String name, AvroToArro return new AvroMapConsumer(mapVector, internalConsumer); } - private static Consumer createUnionConsumer(Schema schema, String name, AvroToArrowConfig config, - FieldVector consumerVector) { + private static Consumer createUnionConsumer( + Schema schema, String name, AvroToArrowConfig config, FieldVector consumerVector) { final int size = schema.getTypes().size(); - final boolean nullable = schema.getTypes().stream().anyMatch(t -> t.getType() == Schema.Type.NULL); + final boolean nullable = + schema.getTypes().stream().anyMatch(t -> t.getType() == Schema.Type.NULL); UnionVector unionVector; if (consumerVector == null) { @@ -695,14 +720,12 @@ private static Consumer createUnionConsumer(Schema schema, String name, AvroToAr /** * Read data from {@link Decoder} and generate a {@link VectorSchemaRoot}. + * * @param schema avro schema * @param decoder avro decoder to read data from */ static VectorSchemaRoot avroToArrowVectors( - Schema schema, - Decoder decoder, - AvroToArrowConfig config) - throws IOException { + Schema schema, Decoder decoder, AvroToArrowConfig config) throws IOException { List vectors = new ArrayList<>(); List consumers = new ArrayList<>(); @@ -726,8 +749,8 @@ static VectorSchemaRoot avroToArrowVectors( } long validConsumerCount = consumers.stream().filter(c -> !c.skippable()).count(); - Preconditions.checkArgument(vectors.size() == validConsumerCount, - "vectors size not equals consumers size."); + Preconditions.checkArgument( + vectors.size() == validConsumerCount, "vectors size not equals consumers size."); List fields = vectors.stream().map(t -> t.getField()).collect(Collectors.toList()); @@ -767,9 +790,7 @@ private static Map getMetaData(Schema schema, Map createExternalProps(Schema schema) { final Map extProps = new HashMap<>(); String doc = schema.getDoc(); @@ -783,8 +804,9 @@ private static Map createExternalProps(Schema schema) { return extProps; } - private static FieldType createFieldType(ArrowType arrowType, Schema schema, Map externalProps) { - return createFieldType(arrowType, schema, externalProps, /*dictionary=*/null); + private static FieldType createFieldType( + ArrowType arrowType, Schema schema, Map externalProps) { + return createFieldType(arrowType, schema, externalProps, /*dictionary=*/ null); } private static FieldType createFieldType( @@ -793,8 +815,8 @@ private static FieldType createFieldType( Map externalProps, DictionaryEncoding dictionary) { - return new FieldType(/*nullable=*/false, arrowType, dictionary, - getMetaData(schema, externalProps)); + return new FieldType( + /*nullable=*/ false, arrowType, dictionary, getMetaData(schema, externalProps)); } private static String convertAliases(Set aliases) { diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java index 9a0cfd97a49a1..4123370061794 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/AvroToArrowVectorIterator.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro; import java.io.EOFException; @@ -22,7 +21,6 @@ import java.util.Iterator; import java.util.List; import java.util.stream.Collectors; - import org.apache.arrow.adapter.avro.consumers.CompositeAvroConsumer; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.FieldVector; @@ -32,9 +30,7 @@ import org.apache.avro.Schema; import org.apache.avro.io.Decoder; -/** - * VectorSchemaRoot iterator for partially converting avro data. - */ +/** VectorSchemaRoot iterator for partially converting avro data. */ public class AvroToArrowVectorIterator implements Iterator, AutoCloseable { public static final int NO_LIMIT_BATCH_SIZE = -1; @@ -53,28 +49,18 @@ public class AvroToArrowVectorIterator implements Iterator, Au private final int targetBatchSize; - /** - * Construct an instance. - */ - private AvroToArrowVectorIterator( - Decoder decoder, - Schema schema, - AvroToArrowConfig config) { + /** Construct an instance. */ + private AvroToArrowVectorIterator(Decoder decoder, Schema schema, AvroToArrowConfig config) { this.decoder = decoder; this.schema = schema; this.config = config; this.targetBatchSize = config.getTargetBatchSize(); - } - /** - * Create a ArrowVectorIterator to partially convert data. - */ + /** Create a ArrowVectorIterator to partially convert data. */ public static AvroToArrowVectorIterator create( - Decoder decoder, - Schema schema, - AvroToArrowConfig config) { + Decoder decoder, Schema schema, AvroToArrowConfig config) { AvroToArrowVectorIterator iterator = new AvroToArrowVectorIterator(decoder, schema, config); try { @@ -136,9 +122,10 @@ private void load(VectorSchemaRoot root) { ValueVectorUtility.preAllocate(root, targetBatchSize); } - long validConsumerCount = compositeConsumer.getConsumers().stream().filter(c -> - !c.skippable()).count(); - Preconditions.checkArgument(root.getFieldVectors().size() == validConsumerCount, + long validConsumerCount = + compositeConsumer.getConsumers().stream().filter(c -> !c.skippable()).count(); + Preconditions.checkArgument( + root.getFieldVectors().size() == validConsumerCount, "Schema root vectors size not equals to consumers size."); compositeConsumer.resetConsumerVectors(root); @@ -159,9 +146,7 @@ public boolean hasNext() { return nextBatch != null; } - /** - * Gets the next vector. The user is responsible for freeing its resources. - */ + /** Gets the next vector. The user is responsible for freeing its resources. */ @Override public VectorSchemaRoot next() { Preconditions.checkArgument(hasNext()); @@ -175,9 +160,7 @@ public VectorSchemaRoot next() { return returned; } - /** - * Clean up resources. - */ + /** Clean up resources. */ @Override public void close() { if (nextBatch != null) { diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroArraysConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroArraysConsumer.java index fd25986c32b95..4555ce7a295f7 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroArraysConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroArraysConsumer.java @@ -14,25 +14,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.complex.ListVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume array type values from avro decoder. - * Write the data to {@link ListVector}. + * Consumer which consume array type values from avro decoder. Write the data to {@link ListVector}. */ public class AvroArraysConsumer extends BaseAvroConsumer { private final Consumer delegate; - /** - * Instantiate a ArrayConsumer. - */ + /** Instantiate a ArrayConsumer. */ public AvroArraysConsumer(ListVector vector, Consumer delegate) { super(vector); this.delegate = delegate; diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBooleanConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBooleanConsumer.java index bf41828d19f7a..09eb5f3b255d5 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBooleanConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBooleanConsumer.java @@ -14,23 +14,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.BitVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume boolean type values from avro decoder. - * Write the data to {@link BitVector}. + * Consumer which consume boolean type values from avro decoder. Write the data to {@link + * BitVector}. */ public class AvroBooleanConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroBooleanConsumer. - */ + /** Instantiate a AvroBooleanConsumer. */ public AvroBooleanConsumer(BitVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBytesConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBytesConsumer.java index c8370e480608d..86b6cbb13d881 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBytesConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroBytesConsumer.java @@ -14,26 +14,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; import java.nio.ByteBuffer; - import org.apache.arrow.vector.VarBinaryVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume bytes type values from avro decoder. - * Write the data to {@link VarBinaryVector}. + * Consumer which consume bytes type values from avro decoder. Write the data to {@link + * VarBinaryVector}. */ public class AvroBytesConsumer extends BaseAvroConsumer { private ByteBuffer cacheBuffer; - /** - * Instantiate a AvroBytesConsumer. - */ + /** Instantiate a AvroBytesConsumer. */ public AvroBytesConsumer(VarBinaryVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroDoubleConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroDoubleConsumer.java index 7cc7dd33b15a9..011cbccc09c5b 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroDoubleConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroDoubleConsumer.java @@ -14,23 +14,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.Float8Vector; import org.apache.avro.io.Decoder; /** - * Consumer which consume double type values from avro decoder. - * Write the data to {@link Float8Vector}. + * Consumer which consume double type values from avro decoder. Write the data to {@link + * Float8Vector}. */ public class AvroDoubleConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroDoubleConsumer. - */ + /** Instantiate a AvroDoubleConsumer. */ public AvroDoubleConsumer(Float8Vector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroEnumConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroEnumConsumer.java index 32a2c85f6fc50..f47988fb962a1 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroEnumConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroEnumConsumer.java @@ -14,24 +14,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.BaseIntVector; import org.apache.arrow.vector.IntVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume enum type values from avro decoder. - * Write the data to {@link IntVector}. + * Consumer which consume enum type values from avro decoder. Write the data to {@link IntVector}. */ public class AvroEnumConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroEnumConsumer. - */ + /** Instantiate a AvroEnumConsumer. */ public AvroEnumConsumer(BaseIntVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFixedConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFixedConsumer.java index 16b70898fd36a..6b78afd3c95d4 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFixedConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFixedConsumer.java @@ -14,25 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.FixedSizeBinaryVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume fixed type values from avro decoder. - * Write the data to {@link org.apache.arrow.vector.FixedSizeBinaryVector}. + * Consumer which consume fixed type values from avro decoder. Write the data to {@link + * org.apache.arrow.vector.FixedSizeBinaryVector}. */ public class AvroFixedConsumer extends BaseAvroConsumer { private final byte[] reuseBytes; - /** - * Instantiate a AvroFixedConsumer. - */ + /** Instantiate a AvroFixedConsumer. */ public AvroFixedConsumer(FixedSizeBinaryVector vector, int size) { super(vector); reuseBytes = new byte[size]; diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFloatConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFloatConsumer.java index b09d2881875b6..2c6d4aa5a05f6 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFloatConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroFloatConsumer.java @@ -14,23 +14,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.Float4Vector; import org.apache.avro.io.Decoder; /** - * Consumer which consume float type values from avro decoder. - * Write the data to {@link Float4Vector}. + * Consumer which consume float type values from avro decoder. Write the data to {@link + * Float4Vector}. */ public class AvroFloatConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroFloatConsumer. - */ + /** Instantiate a AvroFloatConsumer. */ public AvroFloatConsumer(Float4Vector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroIntConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroIntConsumer.java index ae5a2719c5642..22c7b10aa65f7 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroIntConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroIntConsumer.java @@ -14,23 +14,18 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.IntVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume int type values from avro decoder. - * Write the data to {@link IntVector}. + * Consumer which consume int type values from avro decoder. Write the data to {@link IntVector}. */ public class AvroIntConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroIntConsumer. - */ + /** Instantiate a AvroIntConsumer. */ public AvroIntConsumer(IntVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroLongConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroLongConsumer.java index 4db836acc4586..90c5313417d7c 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroLongConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroLongConsumer.java @@ -14,23 +14,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.BigIntVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume long type values from avro decoder. - * Write the data to {@link BigIntVector}. + * Consumer which consume long type values from avro decoder. Write the data to {@link + * BigIntVector}. */ public class AvroLongConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroLongConsumer. - */ + /** Instantiate a AvroLongConsumer. */ public AvroLongConsumer(BigIntVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroMapConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroMapConsumer.java index 1ea97e63b61e5..543471533ec01 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroMapConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroMapConsumer.java @@ -14,27 +14,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume map type values from avro decoder. - * Write the data to {@link MapVector}. + * Consumer which consume map type values from avro decoder. Write the data to {@link MapVector}. */ public class AvroMapConsumer extends BaseAvroConsumer { private final Consumer delegate; - /** - * Instantiate a AvroMapConsumer. - */ + /** Instantiate a AvroMapConsumer. */ public AvroMapConsumer(MapVector vector, Consumer delegate) { super(vector); this.delegate = delegate; diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroNullConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroNullConsumer.java index 4c7bb8c03bad3..0f80c2b7b2db3 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroNullConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroNullConsumer.java @@ -14,17 +14,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.NullVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume null type values from avro decoder. - * Corresponding to {@link org.apache.arrow.vector.NullVector}. + * Consumer which consume null type values from avro decoder. Corresponding to {@link + * org.apache.arrow.vector.NullVector}. */ public class AvroNullConsumer extends BaseAvroConsumer { diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStringConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStringConsumer.java index 072270aa6c081..164d595e9c6ac 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStringConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStringConsumer.java @@ -14,26 +14,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; import java.nio.ByteBuffer; - import org.apache.arrow.vector.VarCharVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume string type values from avro decoder. - * Write the data to {@link VarCharVector}. + * Consumer which consume string type values from avro decoder. Write the data to {@link + * VarCharVector}. */ public class AvroStringConsumer extends BaseAvroConsumer { private ByteBuffer cacheBuffer; - /** - * Instantiate a AvroStringConsumer. - */ + /** Instantiate a AvroStringConsumer. */ public AvroStringConsumer(VarCharVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStructConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStructConsumer.java index a02b1577f9fa8..94c2f611e84b7 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStructConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroStructConsumer.java @@ -14,27 +14,23 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.util.AutoCloseables; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume nested record type values from avro decoder. - * Write the data to {@link org.apache.arrow.vector.complex.StructVector}. + * Consumer which consume nested record type values from avro decoder. Write the data to {@link + * org.apache.arrow.vector.complex.StructVector}. */ public class AvroStructConsumer extends BaseAvroConsumer { private final Consumer[] delegates; - /** - * Instantiate a AvroStructConsumer. - */ + /** Instantiate a AvroStructConsumer. */ public AvroStructConsumer(StructVector vector, Consumer[] delegates) { super(vector); this.delegates = delegates; @@ -49,7 +45,6 @@ public void consume(Decoder decoder) throws IOException { } vector.setIndexDefined(currentIndex); currentIndex++; - } @Override diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroUnionsConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroUnionsConsumer.java index 76287543b0646..5a8e23e62892c 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroUnionsConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/AvroUnionsConsumer.java @@ -14,11 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.util.AutoCloseables; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.complex.UnionVector; @@ -26,17 +24,15 @@ import org.apache.avro.io.Decoder; /** - * Consumer which consume unions type values from avro decoder. - * Write the data to {@link org.apache.arrow.vector.complex.UnionVector}. + * Consumer which consume unions type values from avro decoder. Write the data to {@link + * org.apache.arrow.vector.complex.UnionVector}. */ public class AvroUnionsConsumer extends BaseAvroConsumer { private Consumer[] delegates; private Types.MinorType[] types; - /** - * Instantiate an AvroUnionConsumer. - */ + /** Instantiate an AvroUnionConsumer. */ public AvroUnionsConsumer(UnionVector vector, Consumer[] delegates, Types.MinorType[] types) { super(vector); @@ -53,7 +49,8 @@ public void consume(Decoder decoder) throws IOException { vector.setType(currentIndex, types[fieldIndex]); // In UnionVector we need to set sub vector writer position before consume a value - // because in the previous iterations we might not have written to the specific union sub vector. + // because in the previous iterations we might not have written to the specific union sub + // vector. delegate.setPosition(currentIndex); delegate.consume(decoder); diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/BaseAvroConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/BaseAvroConsumer.java index 66a6cda68401e..9430d83cb4372 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/BaseAvroConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/BaseAvroConsumer.java @@ -14,13 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import org.apache.arrow.vector.FieldVector; /** * Base class for non-skippable avro consumers. + * * @param vector type. */ public abstract class BaseAvroConsumer implements Consumer { @@ -30,6 +30,7 @@ public abstract class BaseAvroConsumer implements Consume /** * Constructs a base avro consumer. + * * @param vector the vector to consume. */ public BaseAvroConsumer(T vector) { diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/CompositeAvroConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/CompositeAvroConsumer.java index 97812226180ac..11c1f7712ef19 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/CompositeAvroConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/CompositeAvroConsumer.java @@ -14,20 +14,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; import java.util.List; - import org.apache.arrow.util.AutoCloseables; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.avro.io.Decoder; -/** - * Composite consumer which hold all consumers. - * It manages the consume and cleanup process. - */ +/** Composite consumer which hold all consumers. It manages the consume and cleanup process. */ public class CompositeAvroConsumer implements AutoCloseable { private final List consumers; @@ -40,18 +35,14 @@ public CompositeAvroConsumer(List consumers) { this.consumers = consumers; } - /** - * Consume decoder data. - */ + /** Consume decoder data. */ public void consume(Decoder decoder) throws IOException { for (Consumer consumer : consumers) { consumer.consume(decoder); } } - /** - * Reset vector of consumers with the given {@link VectorSchemaRoot}. - */ + /** Reset vector of consumers with the given {@link VectorSchemaRoot}. */ public void resetConsumerVectors(VectorSchemaRoot root) { int index = 0; for (Consumer consumer : consumers) { diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/Consumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/Consumer.java index 8eaaf74cff68a..0c07f90bf5f39 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/Consumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/Consumer.java @@ -14,59 +14,49 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.FieldVector; import org.apache.avro.io.Decoder; /** * Interface that is used to consume values from avro decoder. + * * @param The vector within consumer or its delegate, used for partially consume purpose. */ public interface Consumer extends AutoCloseable { /** * Consume a specific type value from avro decoder and write it to vector. + * * @param decoder avro decoder to read data * @throws IOException on error */ void consume(Decoder decoder) throws IOException; - /** - * Add null value to vector by making writer position + 1. - */ + /** Add null value to vector by making writer position + 1. */ void addNull(); - /** - * Set the position to write value into vector. - */ + /** Set the position to write value into vector. */ void setPosition(int index); - /** - * Get the vector within the consumer. - */ + /** Get the vector within the consumer. */ FieldVector getVector(); - /** - * Close this consumer when occurs exception to avoid potential leak. - */ + /** Close this consumer when occurs exception to avoid potential leak. */ @Override void close() throws Exception; /** * Reset the vector within consumer for partial read purpose. + * * @return true if reset is successful, false if reset is not needed. */ boolean resetValueVector(T vector); - /** - * Indicates whether the consumer is type of {@link SkipConsumer}. - */ + /** Indicates whether the consumer is type of {@link SkipConsumer}. */ default boolean skippable() { return false; } - } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipConsumer.java index 1ac0a6d71557b..2c104728ce620 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipConsumer.java @@ -14,17 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.arrow.vector.FieldVector; import org.apache.avro.io.Decoder; -/** - * Consumer which skip (throw away) data from the decoder. - */ +/** Consumer which skip (throw away) data from the decoder. */ public class SkipConsumer implements Consumer { private final SkipFunction skipFunction; @@ -39,12 +35,10 @@ public void consume(Decoder decoder) throws IOException { } @Override - public void addNull() { - } + public void addNull() {} @Override - public void setPosition(int index) { - } + public void setPosition(int index) {} @Override public FieldVector getVector() { @@ -52,8 +46,7 @@ public FieldVector getVector() { } @Override - public void close() throws Exception { - } + public void close() throws Exception {} @Override public boolean resetValueVector(FieldVector vector) { diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipFunction.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipFunction.java index 93fc4a7fede3f..3d72d03104f3c 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipFunction.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/SkipFunction.java @@ -14,16 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers; import java.io.IOException; - import org.apache.avro.io.Decoder; -/** - * Adapter function to skip (throw away) data from the decoder. - */ +/** Adapter function to skip (throw away) data from the decoder. */ @FunctionalInterface public interface SkipFunction { void apply(Decoder decoder) throws IOException; diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDateConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDateConsumer.java index a5c36d88fb76a..0f557297a3cb7 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDateConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDateConsumer.java @@ -14,24 +14,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers.logical; import java.io.IOException; - import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer; import org.apache.arrow.vector.DateDayVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume date type values from avro decoder. - * Write the data to {@link DateDayVector}. + * Consumer which consume date type values from avro decoder. Write the data to {@link + * DateDayVector}. */ public class AvroDateConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroDateConsumer. - */ + /** Instantiate a AvroDateConsumer. */ public AvroDateConsumer(DateDayVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDecimalConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDecimalConsumer.java index ebe5ca3884e5e..fa1a12ac8a6ed 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDecimalConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroDecimalConsumer.java @@ -14,40 +14,32 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers.logical; import java.io.IOException; import java.nio.ByteBuffer; - import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.DecimalVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume decimal type values from avro decoder. - * Write the data to {@link DecimalVector}. + * Consumer which consume decimal type values from avro decoder. Write the data to {@link + * DecimalVector}. */ public abstract class AvroDecimalConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroDecimalConsumer. - */ + /** Instantiate a AvroDecimalConsumer. */ public AvroDecimalConsumer(DecimalVector vector) { super(vector); } - /** - * Consumer for decimal logical type with original bytes type. - */ + /** Consumer for decimal logical type with original bytes type. */ public static class BytesDecimalConsumer extends AvroDecimalConsumer { private ByteBuffer cacheBuffer; - /** - * Instantiate a BytesDecimalConsumer. - */ + /** Instantiate a BytesDecimalConsumer. */ public BytesDecimalConsumer(DecimalVector vector) { super(vector); } @@ -60,19 +52,14 @@ public void consume(Decoder decoder) throws IOException { cacheBuffer.get(bytes); vector.setBigEndian(currentIndex++, bytes); } - } - /** - * Consumer for decimal logical type with original fixed type. - */ + /** Consumer for decimal logical type with original fixed type. */ public static class FixedDecimalConsumer extends AvroDecimalConsumer { private byte[] reuseBytes; - /** - * Instantiate a FixedDecimalConsumer. - */ + /** Instantiate a FixedDecimalConsumer. */ public FixedDecimalConsumer(DecimalVector vector, int size) { super(vector); Preconditions.checkArgument(size <= 16, "Decimal bytes length should <= 16."); diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMicroConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMicroConsumer.java index 89216d4ad1436..60e7d15bf16d6 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMicroConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMicroConsumer.java @@ -14,24 +14,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers.logical; import java.io.IOException; - import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer; import org.apache.arrow.vector.TimeMicroVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume date time-micro values from avro decoder. - * Write the data to {@link TimeMicroVector}. + * Consumer which consume date time-micro values from avro decoder. Write the data to {@link + * TimeMicroVector}. */ public class AvroTimeMicroConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroTimeMicroConsumer. - */ + /** Instantiate a AvroTimeMicroConsumer. */ public AvroTimeMicroConsumer(TimeMicroVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMillisConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMillisConsumer.java index ab5df8d4bc8ac..e0b232e9abd5e 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMillisConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimeMillisConsumer.java @@ -14,24 +14,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers.logical; import java.io.IOException; - import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer; import org.apache.arrow.vector.TimeMilliVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume date time-millis values from avro decoder. - * Write the data to {@link TimeMilliVector}. + * Consumer which consume date time-millis values from avro decoder. Write the data to {@link + * TimeMilliVector}. */ public class AvroTimeMillisConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroTimeMilliConsumer. - */ + /** Instantiate a AvroTimeMilliConsumer. */ public AvroTimeMillisConsumer(TimeMilliVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMicrosConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMicrosConsumer.java index 93b39d479ff0e..88acf7b329569 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMicrosConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMicrosConsumer.java @@ -14,24 +14,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers.logical; import java.io.IOException; - import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer; import org.apache.arrow.vector.TimeStampMicroVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume date timestamp-micro values from avro decoder. - * Write the data to {@link TimeStampMicroVector}. + * Consumer which consume date timestamp-micro values from avro decoder. Write the data to {@link + * TimeStampMicroVector}. */ public class AvroTimestampMicrosConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroTimestampMicroConsumer. - */ + /** Instantiate a AvroTimestampMicroConsumer. */ public AvroTimestampMicrosConsumer(TimeStampMicroVector vector) { super(vector); } diff --git a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMillisConsumer.java b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMillisConsumer.java index 9e651c3959f81..ec50d7902319c 100644 --- a/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMillisConsumer.java +++ b/java/adapter/avro/src/main/java/org/apache/arrow/adapter/avro/consumers/logical/AvroTimestampMillisConsumer.java @@ -14,24 +14,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro.consumers.logical; import java.io.IOException; - import org.apache.arrow.adapter.avro.consumers.BaseAvroConsumer; import org.apache.arrow.vector.TimeStampMilliVector; import org.apache.avro.io.Decoder; /** - * Consumer which consume date timestamp-millis values from avro decoder. - * Write the data to {@link TimeStampMilliVector}. + * Consumer which consume date timestamp-millis values from avro decoder. Write the data to {@link + * TimeStampMilliVector}. */ public class AvroTimestampMillisConsumer extends BaseAvroConsumer { - /** - * Instantiate a AvroTimestampMillisConsumer. - */ + /** Instantiate a AvroTimestampMillisConsumer. */ public AvroTimestampMillisConsumer(TimeStampMilliVector vector) { super(vector); } diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroLogicalTypesTest.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroLogicalTypesTest.java index 6ee04e33a5ce1..d8eefc715f611 100644 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroLogicalTypesTest.java +++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroLogicalTypesTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro; import static junit.framework.TestCase.assertNull; @@ -27,7 +26,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; - import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.util.DateUtility; @@ -43,13 +41,13 @@ public void testTimestampMicros() throws Exception { Schema schema = getSchema("logical/test_timestamp_micros.avsc"); List data = Arrays.asList(10000L, 20000L, 30000L, 40000L, 50000L); - List expected = Arrays.asList( - DateUtility.getLocalDateTimeFromEpochMicro(10000), - DateUtility.getLocalDateTimeFromEpochMicro(20000), - DateUtility.getLocalDateTimeFromEpochMicro(30000), - DateUtility.getLocalDateTimeFromEpochMicro(40000), - DateUtility.getLocalDateTimeFromEpochMicro(50000) - ); + List expected = + Arrays.asList( + DateUtility.getLocalDateTimeFromEpochMicro(10000), + DateUtility.getLocalDateTimeFromEpochMicro(20000), + DateUtility.getLocalDateTimeFromEpochMicro(30000), + DateUtility.getLocalDateTimeFromEpochMicro(40000), + DateUtility.getLocalDateTimeFromEpochMicro(50000)); VectorSchemaRoot root = writeAndRead(schema, data); FieldVector vector = root.getFieldVectors().get(0); @@ -62,13 +60,13 @@ public void testTimestampMillis() throws Exception { Schema schema = getSchema("logical/test_timestamp_millis.avsc"); List data = Arrays.asList(10000L, 20000L, 30000L, 40000L, 50000L); - List expected = Arrays.asList( - DateUtility.getLocalDateTimeFromEpochMilli(10000), - DateUtility.getLocalDateTimeFromEpochMilli(20000), - DateUtility.getLocalDateTimeFromEpochMilli(30000), - DateUtility.getLocalDateTimeFromEpochMilli(40000), - DateUtility.getLocalDateTimeFromEpochMilli(50000) - ); + List expected = + Arrays.asList( + DateUtility.getLocalDateTimeFromEpochMilli(10000), + DateUtility.getLocalDateTimeFromEpochMilli(20000), + DateUtility.getLocalDateTimeFromEpochMilli(30000), + DateUtility.getLocalDateTimeFromEpochMilli(40000), + DateUtility.getLocalDateTimeFromEpochMilli(50000)); VectorSchemaRoot root = writeAndRead(schema, data); FieldVector vector = root.getFieldVectors().get(0); @@ -93,13 +91,13 @@ public void testTimeMillis() throws Exception { Schema schema = getSchema("logical/test_time_millis.avsc"); List data = Arrays.asList(100, 200, 300, 400, 500); - List expected = Arrays.asList( - DateUtility.getLocalDateTimeFromEpochMilli(100), - DateUtility.getLocalDateTimeFromEpochMilli(200), - DateUtility.getLocalDateTimeFromEpochMilli(300), - DateUtility.getLocalDateTimeFromEpochMilli(400), - DateUtility.getLocalDateTimeFromEpochMilli(500) - ); + List expected = + Arrays.asList( + DateUtility.getLocalDateTimeFromEpochMilli(100), + DateUtility.getLocalDateTimeFromEpochMilli(200), + DateUtility.getLocalDateTimeFromEpochMilli(300), + DateUtility.getLocalDateTimeFromEpochMilli(400), + DateUtility.getLocalDateTimeFromEpochMilli(500)); VectorSchemaRoot root = writeAndRead(schema, data); FieldVector vector = root.getFieldVectors().get(0); @@ -137,7 +135,6 @@ public void testDecimalWithOriginalBytes() throws Exception { VectorSchemaRoot root = writeAndRead(schema, data); FieldVector vector = root.getFieldVectors().get(0); checkPrimitiveResult(expected, vector); - } @Test @@ -174,10 +171,9 @@ public void testInvalidDecimalPrecision() throws Exception { data.add(buffer); } - IllegalArgumentException e = assertThrows(IllegalArgumentException.class, - () -> writeAndRead(schema, data)); + IllegalArgumentException e = + assertThrows(IllegalArgumentException.class, () -> writeAndRead(schema, data)); assertTrue(e.getMessage().contains("Precision must be in range of 1 to 38")); - } @Test @@ -197,5 +193,4 @@ public void testFailedToCreateDecimalLogicalType() throws Exception { Schema schema3 = getSchema("logical/test_decimal_invalid4.avsc"); assertNull(schema3.getLogicalType()); } - } diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroSkipFieldTest.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroSkipFieldTest.java index 54fa26afe3fa8..3335ee5a8f6dc 100644 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroSkipFieldTest.java +++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroSkipFieldTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro; import static org.junit.Assert.assertEquals; @@ -26,7 +25,6 @@ import java.util.HashMap; import java.util.HashSet; import java.util.Set; - import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.types.Types; @@ -41,7 +39,10 @@ public class AvroSkipFieldTest extends AvroTestBase { public void testSkipUnionWithOneField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f0"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_union_before.avsc"); Schema expectedSchema = getSchema("skip/test_skip_union_one_field_expected.avsc"); @@ -70,7 +71,10 @@ public void testSkipUnionWithOneField() throws Exception { public void testSkipUnionWithNullableOneField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f1"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_union_before.avsc"); Schema expectedSchema = getSchema("skip/test_skip_union_nullable_field_expected.avsc"); @@ -99,7 +103,10 @@ public void testSkipUnionWithNullableOneField() throws Exception { public void testSkipUnionWithMultiFields() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f2"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_union_before.avsc"); Schema expectedSchema = getSchema("skip/test_skip_union_multi_fields_expected.avsc"); @@ -128,7 +135,10 @@ public void testSkipUnionWithMultiFields() throws Exception { public void testSkipMapField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f1"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_map_before.avsc"); Schema expectedSchema = getSchema("skip/test_skip_map_expected.avsc"); @@ -160,7 +170,10 @@ public void testSkipMapField() throws Exception { public void testSkipArrayField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f1"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_array_before.avsc"); Schema expectedSchema = getSchema("skip/test_skip_array_expected.avsc"); @@ -189,7 +202,10 @@ public void testSkipMultiFields() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f1"); skipFieldNames.add("f2"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("test_record.avsc"); Schema expectedSchema = getSchema("skip/test_skip_multi_fields_expected.avsc"); @@ -216,7 +232,10 @@ public void testSkipMultiFields() throws Exception { public void testSkipStringField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f2"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base1.avsc"); Schema expectedSchema = getSchema("skip/test_skip_string_expected.avsc"); @@ -229,7 +248,8 @@ public void testSkipStringField() throws Exception { GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema()); fixed.bytes(testBytes); record.put(0, fixed); - GenericData.EnumSymbol symbol = new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); + GenericData.EnumSymbol symbol = + new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); record.put(1, symbol); record.put(2, "testtest" + i); record.put(3, ByteBuffer.wrap(testBytes)); @@ -250,7 +270,10 @@ public void testSkipStringField() throws Exception { public void testSkipBytesField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f3"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base1.avsc"); Schema expectedSchema = getSchema("skip/test_skip_bytes_expected.avsc"); @@ -263,7 +286,8 @@ public void testSkipBytesField() throws Exception { GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema()); fixed.bytes(testBytes); record.put(0, fixed); - GenericData.EnumSymbol symbol = new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); + GenericData.EnumSymbol symbol = + new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); record.put(1, symbol); record.put(2, "testtest" + i); record.put(3, ByteBuffer.wrap(testBytes)); @@ -284,7 +308,10 @@ public void testSkipBytesField() throws Exception { public void testSkipFixedField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f0"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base1.avsc"); Schema expectedSchema = getSchema("skip/test_skip_fixed_expected.avsc"); @@ -297,7 +324,8 @@ public void testSkipFixedField() throws Exception { GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema()); fixed.bytes(testBytes); record.put(0, fixed); - GenericData.EnumSymbol symbol = new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); + GenericData.EnumSymbol symbol = + new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); record.put(1, symbol); record.put(2, "testtest" + i); record.put(3, ByteBuffer.wrap(testBytes)); @@ -318,7 +346,10 @@ public void testSkipFixedField() throws Exception { public void testSkipEnumField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f1"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base1.avsc"); Schema expectedSchema = getSchema("skip/test_skip_fixed_expected.avsc"); @@ -331,7 +362,8 @@ public void testSkipEnumField() throws Exception { GenericData.Fixed fixed = new GenericData.Fixed(schema.getField("f0").schema()); fixed.bytes(testBytes); record.put(0, fixed); - GenericData.EnumSymbol symbol = new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); + GenericData.EnumSymbol symbol = + new GenericData.EnumSymbol(schema.getField("f1").schema(), "TEST" + i % 2); record.put(1, symbol); record.put(2, "testtest" + i); record.put(3, ByteBuffer.wrap(testBytes)); @@ -352,7 +384,10 @@ public void testSkipEnumField() throws Exception { public void testSkipBooleanField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f0"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base2.avsc"); Schema expectedSchema = getSchema("skip/test_skip_boolean_expected.avsc"); @@ -385,7 +420,10 @@ public void testSkipBooleanField() throws Exception { public void testSkipIntField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f1"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base2.avsc"); Schema expectedSchema = getSchema("skip/test_skip_int_expected.avsc"); @@ -418,7 +456,10 @@ public void testSkipIntField() throws Exception { public void testSkipLongField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f2"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base2.avsc"); Schema expectedSchema = getSchema("skip/test_skip_long_expected.avsc"); @@ -451,7 +492,10 @@ public void testSkipLongField() throws Exception { public void testSkipFloatField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f3"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base2.avsc"); Schema expectedSchema = getSchema("skip/test_skip_float_expected.avsc"); @@ -484,7 +528,10 @@ public void testSkipFloatField() throws Exception { public void testSkipDoubleField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f4"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_base2.avsc"); Schema expectedSchema = getSchema("skip/test_skip_double_expected.avsc"); @@ -517,7 +564,10 @@ public void testSkipDoubleField() throws Exception { public void testSkipRecordField() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f0"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("skip/test_skip_record_before.avsc"); Schema nestedSchema = schema.getFields().get(0).schema(); ArrayList data = new ArrayList<>(); @@ -547,7 +597,10 @@ public void testSkipRecordField() throws Exception { public void testSkipNestedFields() throws Exception { Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f0.f0"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); Schema schema = getSchema("test_nested_record.avsc"); Schema nestedSchema = schema.getFields().get(0).schema(); ArrayList data = new ArrayList<>(); @@ -603,21 +656,26 @@ public void testSkipThirdLevelField() throws Exception { assertEquals(Types.MinorType.STRUCT, root1.getFieldVectors().get(0).getMinorType()); StructVector secondLevelVector = (StructVector) root1.getFieldVectors().get(0); assertEquals(1, secondLevelVector.getChildrenFromFields().size()); - assertEquals(Types.MinorType.STRUCT, secondLevelVector.getChildrenFromFields().get(0).getMinorType()); + assertEquals( + Types.MinorType.STRUCT, secondLevelVector.getChildrenFromFields().get(0).getMinorType()); StructVector thirdLevelVector = (StructVector) secondLevelVector.getChildrenFromFields().get(0); assertEquals(3, thirdLevelVector.getChildrenFromFields().size()); // skip third level field and validate Set skipFieldNames = new HashSet<>(); skipFieldNames.add("f0.f0.f0"); - config = new AvroToArrowConfigBuilder(config.getAllocator()).setSkipFieldNames(skipFieldNames).build(); + config = + new AvroToArrowConfigBuilder(config.getAllocator()) + .setSkipFieldNames(skipFieldNames) + .build(); VectorSchemaRoot root2 = writeAndRead(firstLevelSchema, data); assertEquals(1, root2.getFieldVectors().size()); assertEquals(Types.MinorType.STRUCT, root2.getFieldVectors().get(0).getMinorType()); StructVector secondStruct = (StructVector) root2.getFieldVectors().get(0); assertEquals(1, secondStruct.getChildrenFromFields().size()); - assertEquals(Types.MinorType.STRUCT, secondStruct.getChildrenFromFields().get(0).getMinorType()); + assertEquals( + Types.MinorType.STRUCT, secondStruct.getChildrenFromFields().get(0).getMinorType()); StructVector thirdStruct = (StructVector) secondStruct.getChildrenFromFields().get(0); assertEquals(2, thirdStruct.getChildrenFromFields().size()); diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroTestBase.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroTestBase.java index a91bba7b84fb4..534c2cc18c572 100644 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroTestBase.java +++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroTestBase.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro; import static org.junit.Assert.assertEquals; @@ -29,7 +28,6 @@ import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.FieldVector; @@ -51,8 +49,7 @@ public class AvroTestBase { - @ClassRule - public static final TemporaryFolder TMP = new TemporaryFolder(); + @ClassRule public static final TemporaryFolder TMP = new TemporaryFolder(); protected AvroToArrowConfig config; @@ -64,18 +61,21 @@ public void init() { public static Schema getSchema(String schemaName) throws Exception { try { - // Attempt to use JDK 9 behavior of getting the module then the resource stream from the module. + // Attempt to use JDK 9 behavior of getting the module then the resource stream from the + // module. // Note that this code is caller-sensitive. Method getModuleMethod = Class.class.getMethod("getModule"); Object module = getModuleMethod.invoke(TestWriteReadAvroRecord.class); - Method getResourceAsStreamFromModule = module.getClass().getMethod("getResourceAsStream", String.class); - try (InputStream is = (InputStream) getResourceAsStreamFromModule.invoke(module, "/schema/" + schemaName)) { - return new Schema.Parser() - .parse(is); + Method getResourceAsStreamFromModule = + module.getClass().getMethod("getResourceAsStream", String.class); + try (InputStream is = + (InputStream) getResourceAsStreamFromModule.invoke(module, "/schema/" + schemaName)) { + return new Schema.Parser().parse(is); } } catch (NoSuchMethodException ex) { // Use JDK8 behavior. - try (InputStream is = TestWriteReadAvroRecord.class.getResourceAsStream("/schema/" + schemaName)) { + try (InputStream is = + TestWriteReadAvroRecord.class.getResourceAsStream("/schema/" + schemaName)) { return new Schema.Parser().parse(is); } } @@ -84,11 +84,11 @@ public static Schema getSchema(String schemaName) throws Exception { protected VectorSchemaRoot writeAndRead(Schema schema, List data) throws Exception { File dataFile = TMP.newFile(); - BinaryEncoder - encoder = new EncoderFactory().directBinaryEncoder(new FileOutputStream(dataFile), null); + BinaryEncoder encoder = + new EncoderFactory().directBinaryEncoder(new FileOutputStream(dataFile), null); DatumWriter writer = new GenericDatumWriter(schema); - BinaryDecoder - decoder = new DecoderFactory().directBinaryDecoder(new FileInputStream(dataFile), null); + BinaryDecoder decoder = + new DecoderFactory().directBinaryDecoder(new FileInputStream(dataFile), null); for (Object value : data) { writer.write(value, encoder); @@ -157,10 +157,10 @@ protected void checkRecordResult(Schema schema, List data, Vector checkPrimitiveResult(fieldData, root.getFieldVectors().get(i)); } - } - protected void checkNestedRecordResult(Schema schema, List data, VectorSchemaRoot root) { + protected void checkNestedRecordResult( + Schema schema, List data, VectorSchemaRoot root) { assertEquals(data.size(), root.getRowCount()); assertTrue(schema.getFields().size() == 1); @@ -176,10 +176,8 @@ protected void checkNestedRecordResult(Schema schema, List data, checkPrimitiveResult(fieldData, structVector.getChildrenFromFields().get(i)); } - } - // belows are for iterator api protected void checkArrayResult(List> expected, List vectors) { @@ -194,10 +192,12 @@ protected void checkArrayResult(List> expected, List vectors } } - protected void checkRecordResult(Schema schema, List data, List roots) { - roots.forEach(root -> { - assertEquals(schema.getFields().size(), root.getFieldVectors().size()); - }); + protected void checkRecordResult( + Schema schema, List data, List roots) { + roots.forEach( + root -> { + assertEquals(schema.getFields().size(), root.getFieldVectors().size()); + }); for (int i = 0; i < schema.getFields().size(); i++) { List fieldData = new ArrayList(); @@ -210,7 +210,6 @@ protected void checkRecordResult(Schema schema, List data, List vectors) { diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java index 7f2edb08fdabc..7e73b2d6c7038 100644 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java +++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowIteratorTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro; import static org.junit.Assert.assertEquals; @@ -28,7 +27,6 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.util.AutoCloseables; @@ -59,11 +57,11 @@ public void init() { private AvroToArrowVectorIterator convert(Schema schema, List data) throws Exception { File dataFile = TMP.newFile(); - BinaryEncoder - encoder = new EncoderFactory().directBinaryEncoder(new FileOutputStream(dataFile), null); + BinaryEncoder encoder = + new EncoderFactory().directBinaryEncoder(new FileOutputStream(dataFile), null); DatumWriter writer = new GenericDatumWriter(schema); - BinaryDecoder - decoder = new DecoderFactory().directBinaryDecoder(new FileInputStream(dataFile), null); + BinaryDecoder decoder = + new DecoderFactory().directBinaryDecoder(new FileInputStream(dataFile), null); for (Object value : data) { writer.write(value, encoder); @@ -107,7 +105,7 @@ public void testNullableStringType() throws Exception { List roots = new ArrayList<>(); List vectors = new ArrayList<>(); - try (AvroToArrowVectorIterator iterator = convert(schema, data);) { + try (AvroToArrowVectorIterator iterator = convert(schema, data); ) { while (iterator.hasNext()) { VectorSchemaRoot root = iterator.next(); FieldVector vector = root.getFieldVectors().get(0); @@ -117,7 +115,6 @@ public void testNullableStringType() throws Exception { } checkPrimitiveResult(expected, vectors); AutoCloseables.close(roots); - } @Test @@ -140,18 +137,18 @@ public void testRecordType() throws Exception { } checkRecordResult(schema, data, roots); AutoCloseables.close(roots); - } @Test public void testArrayType() throws Exception { Schema schema = getSchema("test_array.avsc"); - List> data = Arrays.asList( - Arrays.asList("11", "222", "999"), - Arrays.asList("12222", "2333", "1000"), - Arrays.asList("1rrr", "2ggg"), - Arrays.asList("1vvv", "2bbb"), - Arrays.asList("1fff", "2")); + List> data = + Arrays.asList( + Arrays.asList("11", "222", "999"), + Arrays.asList("12222", "2333", "1000"), + Arrays.asList("1rrr", "2ggg"), + Arrays.asList("1vvv", "2bbb"), + Arrays.asList("1fff", "2")); List roots = new ArrayList<>(); List vectors = new ArrayList<>(); @@ -172,8 +169,9 @@ public void runLargeNumberOfRows() throws Exception { int x = 0; final int targetRows = 600000; Decoder fakeDecoder = new FakeDecoder(targetRows); - try (AvroToArrowVectorIterator iter = AvroToArrow.avroToArrowIterator(schema, fakeDecoder, - new AvroToArrowConfigBuilder(config.getAllocator()).build())) { + try (AvroToArrowVectorIterator iter = + AvroToArrow.avroToArrowIterator( + schema, fakeDecoder, new AvroToArrowConfigBuilder(config.getAllocator()).build())) { while (iter.hasNext()) { VectorSchemaRoot root = iter.next(); x += root.getRowCount(); @@ -184,9 +182,7 @@ public void runLargeNumberOfRows() throws Exception { assertEquals(targetRows, x); } - /** - * Fake avro decoder to test large data. - */ + /** Fake avro decoder to test large data. */ private static class FakeDecoder extends Decoder { private int numRows; @@ -204,8 +200,7 @@ private void validate() throws EOFException { } @Override - public void readNull() throws IOException { - } + public void readNull() throws IOException {} @Override public boolean readBoolean() throws IOException { @@ -243,9 +238,7 @@ public String readString() throws IOException { } @Override - public void skipString() throws IOException { - - } + public void skipString() throws IOException {} @Override public ByteBuffer readBytes(ByteBuffer old) throws IOException { @@ -253,9 +246,7 @@ public ByteBuffer readBytes(ByteBuffer old) throws IOException { } @Override - public void skipBytes() throws IOException { - - } + public void skipBytes() throws IOException {} @Override public void readFixed(byte[] bytes, int start, int length) throws IOException { @@ -264,9 +255,7 @@ public void readFixed(byte[] bytes, int start, int length) throws IOException { } @Override - public void skipFixed(int length) throws IOException { - - } + public void skipFixed(int length) throws IOException {} @Override public int readEnum() throws IOException { diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowTest.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowTest.java index 26f72173b6b7e..59317c3be033f 100644 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowTest.java +++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/AvroToArrowTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro; import static org.junit.Assert.assertEquals; @@ -26,7 +25,6 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; - import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.VarCharVector; import org.apache.arrow.vector.VectorSchemaRoot; @@ -105,12 +103,13 @@ public void testFixedAttributes() throws Exception { @Test public void testEnumAttributes() throws Exception { Schema schema = getSchema("attrs/test_enum_attrs.avsc"); - List data = Arrays.asList( - new GenericData.EnumSymbol(schema, "SPADES"), - new GenericData.EnumSymbol(schema, "HEARTS"), - new GenericData.EnumSymbol(schema, "DIAMONDS"), - new GenericData.EnumSymbol(schema, "CLUBS"), - new GenericData.EnumSymbol(schema, "SPADES")); + List data = + Arrays.asList( + new GenericData.EnumSymbol(schema, "SPADES"), + new GenericData.EnumSymbol(schema, "HEARTS"), + new GenericData.EnumSymbol(schema, "DIAMONDS"), + new GenericData.EnumSymbol(schema, "CLUBS"), + new GenericData.EnumSymbol(schema, "SPADES")); VectorSchemaRoot root = writeAndRead(schema, data); FieldVector vector = root.getFieldVectors().get(0); @@ -172,12 +171,13 @@ public void testNestedRecordType() throws Exception { @Test public void testEnumType() throws Exception { Schema schema = getSchema("test_primitive_enum.avsc"); - List data = Arrays.asList( - new GenericData.EnumSymbol(schema, "SPADES"), - new GenericData.EnumSymbol(schema, "HEARTS"), - new GenericData.EnumSymbol(schema, "DIAMONDS"), - new GenericData.EnumSymbol(schema, "CLUBS"), - new GenericData.EnumSymbol(schema, "SPADES")); + List data = + Arrays.asList( + new GenericData.EnumSymbol(schema, "SPADES"), + new GenericData.EnumSymbol(schema, "HEARTS"), + new GenericData.EnumSymbol(schema, "DIAMONDS"), + new GenericData.EnumSymbol(schema, "CLUBS"), + new GenericData.EnumSymbol(schema, "SPADES")); List expectedIndices = Arrays.asList(0, 1, 2, 3, 0); @@ -302,12 +302,13 @@ public void testNullableDoubleType() throws Exception { @Test public void testBytesType() throws Exception { Schema schema = getSchema("test_primitive_bytes.avsc"); - List data = Arrays.asList( - ByteBuffer.wrap("value1".getBytes(StandardCharsets.UTF_8)), - ByteBuffer.wrap("value2".getBytes(StandardCharsets.UTF_8)), - ByteBuffer.wrap("value3".getBytes(StandardCharsets.UTF_8)), - ByteBuffer.wrap("value4".getBytes(StandardCharsets.UTF_8)), - ByteBuffer.wrap("value5".getBytes(StandardCharsets.UTF_8))); + List data = + Arrays.asList( + ByteBuffer.wrap("value1".getBytes(StandardCharsets.UTF_8)), + ByteBuffer.wrap("value2".getBytes(StandardCharsets.UTF_8)), + ByteBuffer.wrap("value3".getBytes(StandardCharsets.UTF_8)), + ByteBuffer.wrap("value4".getBytes(StandardCharsets.UTF_8)), + ByteBuffer.wrap("value5".getBytes(StandardCharsets.UTF_8))); VectorSchemaRoot root = writeAndRead(schema, data); FieldVector vector = root.getFieldVectors().get(0); @@ -322,7 +323,8 @@ public void testNullableBytesType() throws Exception { ArrayList data = new ArrayList<>(); for (int i = 0; i < 5; i++) { GenericRecord record = new GenericData.Record(schema); - record.put(0, i % 2 == 0 ? ByteBuffer.wrap(("test" + i).getBytes(StandardCharsets.UTF_8)) : null); + record.put( + 0, i % 2 == 0 ? ByteBuffer.wrap(("test" + i).getBytes(StandardCharsets.UTF_8)) : null); data.add(record); } @@ -359,12 +361,13 @@ public void testNullableBooleanType() throws Exception { @Test public void testArrayType() throws Exception { Schema schema = getSchema("test_array.avsc"); - List> data = Arrays.asList( - Arrays.asList("11", "222", "999"), - Arrays.asList("12222", "2333", "1000"), - Arrays.asList("1rrr", "2ggg"), - Arrays.asList("1vvv", "2bbb"), - Arrays.asList("1fff", "2")); + List> data = + Arrays.asList( + Arrays.asList("11", "222", "999"), + Arrays.asList("12222", "2333", "1000"), + Arrays.asList("1rrr", "2ggg"), + Arrays.asList("1vvv", "2bbb"), + Arrays.asList("1fff", "2")); VectorSchemaRoot root = writeAndRead(schema, data); FieldVector vector = root.getFieldVectors().get(0); @@ -471,5 +474,4 @@ public void testNullableUnionType() throws Exception { checkPrimitiveResult(expected, vector); } - } diff --git a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/TestWriteReadAvroRecord.java b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/TestWriteReadAvroRecord.java index afbddaa6ed87a..a721a1e4cc6a8 100644 --- a/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/TestWriteReadAvroRecord.java +++ b/java/adapter/avro/src/test/java/org/apache/arrow/adapter/avro/TestWriteReadAvroRecord.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.avro; import static org.junit.Assert.assertEquals; @@ -22,7 +21,6 @@ import java.io.File; import java.util.ArrayList; import java.util.List; - import org.apache.avro.Schema; import org.apache.avro.file.DataFileReader; import org.apache.avro.file.DataFileWriter; @@ -36,11 +34,9 @@ import org.junit.Test; import org.junit.rules.TemporaryFolder; - public class TestWriteReadAvroRecord { - @ClassRule - public static final TemporaryFolder TMP = new TemporaryFolder(); + @ClassRule public static final TemporaryFolder TMP = new TemporaryFolder(); @Test public void testWriteAndRead() throws Exception { @@ -48,7 +44,7 @@ public void testWriteAndRead() throws Exception { File dataFile = TMP.newFile(); Schema schema = AvroTestBase.getSchema("test.avsc"); - //write data to disk + // write data to disk GenericRecord user1 = new GenericData.Record(schema); user1.put("name", "Alyssa"); user1.put("favorite_number", 256); @@ -65,10 +61,10 @@ public void testWriteAndRead() throws Exception { dataFileWriter.append(user2); dataFileWriter.close(); - //read data from disk + // read data from disk DatumReader datumReader = new GenericDatumReader(schema); - DataFileReader - dataFileReader = new DataFileReader(dataFile, datumReader); + DataFileReader dataFileReader = + new DataFileReader(dataFile, datumReader); List result = new ArrayList<>(); while (dataFileReader.hasNext()) { GenericRecord user = dataFileReader.next(); @@ -86,5 +82,4 @@ public void testWriteAndRead() throws Exception { assertEquals(7, deUser2.get("favorite_number")); assertEquals("red", deUser2.get("favorite_color").toString()); } - } diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml index 2f2911dd9da95..b444eff56277d 100644 --- a/java/adapter/jdbc/pom.xml +++ b/java/adapter/jdbc/pom.xml @@ -24,6 +24,11 @@ (Contrib/Experimental)A library for converting JDBC data to Arrow data. http://maven.apache.org + + dev/checkstyle/checkstyle-spotless.xml + none + + diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java index 427c766982f30..d30cf32a04996 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/ArrowVectorIterator.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.apache.arrow.adapter.jdbc.JdbcToArrowUtils.isColumnNullable; @@ -23,7 +22,6 @@ import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.util.Iterator; - import org.apache.arrow.adapter.jdbc.consumer.CompositeJdbcConsumer; import org.apache.arrow.adapter.jdbc.consumer.JdbcConsumer; import org.apache.arrow.adapter.jdbc.consumer.exceptions.JdbcConsumerException; @@ -35,9 +33,7 @@ import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.ValueVectorUtility; -/** - * VectorSchemaRoot iterator for partially converting JDBC data. - */ +/** VectorSchemaRoot iterator for partially converting JDBC data. */ public class ArrowVectorIterator implements Iterator, AutoCloseable { private final ResultSet resultSet; @@ -54,13 +50,12 @@ public class ArrowVectorIterator implements Iterator, AutoClos private final int targetBatchSize; - // This is used to track whether the ResultSet has been fully read, and is needed specifically for cases where there + // This is used to track whether the ResultSet has been fully read, and is needed specifically for + // cases where there // is a ResultSet having zero rows (empty): private boolean readComplete = false; - /** - * Construct an instance. - */ + /** Construct an instance. */ private ArrowVectorIterator(ResultSet resultSet, JdbcToArrowConfig config) throws SQLException { this.resultSet = resultSet; this.config = config; @@ -73,12 +68,8 @@ private ArrowVectorIterator(ResultSet resultSet, JdbcToArrowConfig config) throw this.nextBatch = config.isReuseVectorSchemaRoot() ? createVectorSchemaRoot() : null; } - /** - * Create a ArrowVectorIterator to partially convert data. - */ - public static ArrowVectorIterator create( - ResultSet resultSet, - JdbcToArrowConfig config) + /** Create a ArrowVectorIterator to partially convert data. */ + public static ArrowVectorIterator create(ResultSet resultSet, JdbcToArrowConfig config) throws SQLException { ArrowVectorIterator iterator = null; try { @@ -142,10 +133,18 @@ private VectorSchemaRoot createVectorSchemaRoot() throws SQLException { private void initialize(VectorSchemaRoot root) throws SQLException { for (int i = 1; i <= consumers.length; i++) { - final JdbcFieldInfo columnFieldInfo = JdbcToArrowUtils.getJdbcFieldInfoForColumn(rsmd, i, config); + final JdbcFieldInfo columnFieldInfo = + JdbcToArrowUtils.getJdbcFieldInfoForColumn(rsmd, i, config); ArrowType arrowType = config.getJdbcToArrowTypeConverter().apply(columnFieldInfo); - consumers[i - 1] = config.getJdbcConsumerGetter().apply( - arrowType, i, isColumnNullable(resultSet.getMetaData(), i, columnFieldInfo), root.getVector(i - 1), config); + consumers[i - 1] = + config + .getJdbcConsumerGetter() + .apply( + arrowType, + i, + isColumnNullable(resultSet.getMetaData(), i, columnFieldInfo), + root.getVector(i - 1), + config); } } @@ -170,16 +169,17 @@ public boolean hasNext() { } /** - * Gets the next vector. - * If {@link JdbcToArrowConfig#isReuseVectorSchemaRoot()} is false, - * the client is responsible for freeing its resources. + * Gets the next vector. If {@link JdbcToArrowConfig#isReuseVectorSchemaRoot()} is false, the + * client is responsible for freeing its resources. + * * @throws JdbcConsumerException on error from VectorConsumer */ @Override public VectorSchemaRoot next() { Preconditions.checkArgument(hasNext()); try { - VectorSchemaRoot ret = config.isReuseVectorSchemaRoot() ? nextBatch : createVectorSchemaRoot(); + VectorSchemaRoot ret = + config.isReuseVectorSchemaRoot() ? nextBatch : createVectorSchemaRoot(); load(ret); return ret; } catch (Exception e) { @@ -193,8 +193,9 @@ public VectorSchemaRoot next() { } /** - * Clean up resources ONLY WHEN THE {@link VectorSchemaRoot} HOLDING EACH BATCH IS REUSED. If a new VectorSchemaRoot - * is created for each batch, each root must be closed manually by the client code. + * Clean up resources ONLY WHEN THE {@link VectorSchemaRoot} HOLDING EACH BATCH IS REUSED. If a + * new VectorSchemaRoot is created for each batch, each root must be closed manually by the client + * code. */ @Override public void close() { diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java index f95133fc7e44c..30e734a68d511 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/Constants.java @@ -14,20 +14,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; -/** - * String constants used for metadata returned on Vectors. - */ +/** String constants used for metadata returned on Vectors. */ public class Constants { - private Constants() { - } + private Constants() {} public static final String SQL_CATALOG_NAME_KEY = "SQL_CATALOG_NAME"; public static final String SQL_SCHEMA_NAME_KEY = "SQL_SCHEMA_NAME"; public static final String SQL_TABLE_NAME_KEY = "SQL_TABLE_NAME"; public static final String SQL_COLUMN_NAME_KEY = "SQL_COLUMN_NAME"; public static final String SQL_TYPE_KEY = "SQL_TYPE"; - } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java index d16964ea14417..6becac0bbc10c 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfo.java @@ -14,25 +14,25 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.util.Preconditions; /** - * This class represents the information about a JDBC ResultSet Field that is - * needed to construct an {@link org.apache.arrow.vector.types.pojo.ArrowType}. - * Currently, this is: + * This class represents the information about a JDBC ResultSet Field that is needed to construct an + * {@link org.apache.arrow.vector.types.pojo.ArrowType}. Currently, this is: + * *
      - *
    • The JDBC {@link java.sql.Types} type.
    • - *
    • The nullability.
    • - *
    • The field's precision (used for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types).
    • - *
    • The field's scale (used for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types).
    • + *
    • The JDBC {@link java.sql.Types} type. + *
    • The nullability. + *
    • The field's precision (used for {@link java.sql.Types#DECIMAL} and {@link + * java.sql.Types#NUMERIC} types). + *
    • The field's scale (used for {@link java.sql.Types#DECIMAL} and {@link + * java.sql.Types#NUMERIC} types). *
    */ public class JdbcFieldInfo { @@ -45,12 +45,13 @@ public class JdbcFieldInfo { private final int displaySize; /** - * Builds a JdbcFieldInfo using only the {@link java.sql.Types} type. Do not use this constructor - * if the field type is {@link java.sql.Types#DECIMAL} or {@link java.sql.Types#NUMERIC}; the precision and - * scale will be set to 0. + * Builds a JdbcFieldInfo using only the {@link java.sql.Types} type. Do not use this + * constructor if the field type is {@link java.sql.Types#DECIMAL} or {@link + * java.sql.Types#NUMERIC}; the precision and scale will be set to 0. * * @param jdbcType The {@link java.sql.Types} type. - * @throws IllegalArgumentException if jdbcType is {@link java.sql.Types#DECIMAL} or {@link java.sql.Types#NUMERIC}. + * @throws IllegalArgumentException if jdbcType is {@link java.sql.Types#DECIMAL} or {@link + * java.sql.Types#NUMERIC}. */ public JdbcFieldInfo(int jdbcType) { Preconditions.checkArgument( @@ -67,7 +68,8 @@ public JdbcFieldInfo(int jdbcType) { /** * Builds a JdbcFieldInfo from the {@link java.sql.Types} type, precision, and scale. - * Use this constructor for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} types. + * Use this constructor for {@link java.sql.Types#DECIMAL} and {@link java.sql.Types#NUMERIC} + * types. * * @param jdbcType The {@link java.sql.Types} type. * @param precision The field's numeric precision. @@ -84,11 +86,13 @@ public JdbcFieldInfo(int jdbcType, int precision, int scale) { } /** - * Builds a JdbcFieldInfo from the {@link java.sql.Types} type, nullability, precision, and scale. + * Builds a JdbcFieldInfo from the {@link java.sql.Types} type, nullability, + * precision, and scale. * * @param jdbcType The {@link java.sql.Types} type. * @param nullability The nullability. Must be one of {@link ResultSetMetaData#columnNoNulls}, - * {@link ResultSetMetaData#columnNullable}, or {@link ResultSetMetaData#columnNullableUnknown}. + * {@link ResultSetMetaData#columnNullable}, or {@link + * ResultSetMetaData#columnNullableUnknown}. * @param precision The field's numeric precision. * @param scale The field's numeric scale. */ @@ -103,7 +107,8 @@ public JdbcFieldInfo(int jdbcType, int nullability, int precision, int scale) { } /** - * Builds a JdbcFieldInfo from the corresponding {@link java.sql.ResultSetMetaData} column. + * Builds a JdbcFieldInfo from the corresponding {@link java.sql.ResultSetMetaData} + * column. * * @param rsmd The {@link java.sql.ResultSetMetaData} to get the field information from. * @param column The column to get the field information for (on a 1-based index). @@ -113,10 +118,12 @@ public JdbcFieldInfo(int jdbcType, int nullability, int precision, int scale) { */ public JdbcFieldInfo(ResultSetMetaData rsmd, int column) throws SQLException { Preconditions.checkNotNull(rsmd, "ResultSetMetaData cannot be null."); - Preconditions.checkArgument(column > 0, "ResultSetMetaData columns have indices starting at 1."); + Preconditions.checkArgument( + column > 0, "ResultSetMetaData columns have indices starting at 1."); Preconditions.checkArgument( column <= rsmd.getColumnCount(), - "The index must be within the number of columns (1 to %s, inclusive)", rsmd.getColumnCount()); + "The index must be within the number of columns (1 to %s, inclusive)", + rsmd.getColumnCount()); this.column = column; this.jdbcType = rsmd.getColumnType(column); @@ -128,8 +135,8 @@ public JdbcFieldInfo(ResultSetMetaData rsmd, int column) throws SQLException { } /** - * Builds a JdbcFieldInfo from the corresponding row from a {@link java.sql.DatabaseMetaData#getColumns} - * ResultSet. + * Builds a JdbcFieldInfo from the corresponding row from a {@link + * java.sql.DatabaseMetaData#getColumns} ResultSet. * * @param rs The {@link java.sql.ResultSet} to get the field information from. * @throws SQLException If the column information cannot be retrieved. @@ -144,51 +151,42 @@ public JdbcFieldInfo(ResultSet rs) throws SQLException { this.displaySize = rs.getInt("CHAR_OCTET_LENGTH"); } - /** - * The {@link java.sql.Types} type. - */ + /** The {@link java.sql.Types} type. */ public int getJdbcType() { return jdbcType; } - /** - * The nullability. - */ + /** The nullability. */ public int isNullable() { return nullability; } /** - * The numeric precision, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} types. + * The numeric precision, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} + * types. */ public int getPrecision() { return precision; } /** - * The numeric scale, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} types. + * The numeric scale, for {@link java.sql.Types#NUMERIC} and {@link java.sql.Types#DECIMAL} types. */ public int getScale() { return scale; } - /** - * The column index for query column. - */ + /** The column index for query column. */ public int getColumn() { return column; } - /** - * The type name as reported by the database. - */ + /** The type name as reported by the database. */ public String getTypeName() { return typeName; } - /** - * The max number of characters for the column. - */ + /** The max number of characters for the column. */ public int getDisplaySize() { return displaySize; } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinder.java index 2dfc0658cb8d1..fd4721bcd9c4e 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinder.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import java.sql.PreparedStatement; import java.sql.SQLException; import java.util.HashMap; import java.util.Map; - import org.apache.arrow.adapter.jdbc.binder.ColumnBinder; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.VectorSchemaRoot; @@ -29,8 +27,8 @@ /** * A binder binds JDBC prepared statement parameters to rows of Arrow data from a VectorSchemaRoot. * - * Each row of the VectorSchemaRoot will be bound to the configured parameters of the PreparedStatement. - * One row of data is bound at a time. + *

    Each row of the VectorSchemaRoot will be bound to the configured parameters of the + * PreparedStatement. One row of data is bound at a time. */ public class JdbcParameterBinder { private final PreparedStatement statement; @@ -44,8 +42,10 @@ public class JdbcParameterBinder { * * @param statement The statement to bind parameters to. * @param root The VectorSchemaRoot to pull data from. - * @param binders Column binders to translate from Arrow data to JDBC parameters, one per parameter. - * @param parameterIndices For each binder in binders, the index of the parameter to bind to. + * @param binders Column binders to translate from Arrow data to JDBC parameters, one per + * parameter. + * @param parameterIndices For each binder in binders, the index of the parameter to bind + * to. */ private JdbcParameterBinder( final PreparedStatement statement, @@ -55,7 +55,8 @@ private JdbcParameterBinder( Preconditions.checkArgument( binders.length == parameterIndices.length, "Number of column binders (%s) must equal number of parameter indices (%s)", - binders.length, parameterIndices.length); + binders.length, + parameterIndices.length); this.statement = statement; this.root = root; this.binders = binders; @@ -66,9 +67,10 @@ private JdbcParameterBinder( /** * Initialize a binder with a builder. * - * @param statement The statement to bind to. The binder does not maintain ownership of the statement. - * @param root The {@link VectorSchemaRoot} to pull data from. The binder does not maintain ownership - * of the vector schema root. + * @param statement The statement to bind to. The binder does not maintain ownership of the + * statement. + * @param root The {@link VectorSchemaRoot} to pull data from. The binder does not maintain + * ownership of the vector schema root. */ public static Builder builder(final PreparedStatement statement, final VectorSchemaRoot root) { return new Builder(statement, root); @@ -82,8 +84,8 @@ public void reset() { /** * Bind the next row of data to the parameters of the statement. * - * After this, the application should call the desired method on the prepared statement, - * such as {@link PreparedStatement#executeUpdate()}, or {@link PreparedStatement#addBatch()}. + *

    After this, the application should call the desired method on the prepared statement, such + * as {@link PreparedStatement#executeUpdate()}, or {@link PreparedStatement#addBatch()}. * * @return true if a row was bound, false if rows were exhausted */ @@ -99,9 +101,7 @@ public boolean next() throws SQLException { return true; } - /** - * A builder for a {@link JdbcParameterBinder}. - */ + /** A builder for a {@link JdbcParameterBinder}. */ public static class Builder { private final PreparedStatement statement; private final VectorSchemaRoot root; @@ -123,9 +123,7 @@ public Builder bindAll() { /** Bind the given parameter to the given column using the default binder. */ public Builder bind(int parameterIndex, int columnIndex) { - return bind( - parameterIndex, - ColumnBinder.forVector(root.getVector(columnIndex))); + return bind(parameterIndex, ColumnBinder.forVector(root.getVector(columnIndex))); } /** Bind the given parameter using the given binder. */ diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java index 246451b5b22f9..493e53056f945 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrow.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import java.io.IOException; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.util.Preconditions; @@ -29,44 +27,32 @@ * *

    This utility uses following data mapping to map JDBC/SQL datatype to Arrow data types. * - *

    CHAR --> ArrowType.Utf8 - * NCHAR --> ArrowType.Utf8 - * VARCHAR --> ArrowType.Utf8 - * NVARCHAR --> ArrowType.Utf8 - * LONGVARCHAR --> ArrowType.Utf8 - * LONGNVARCHAR --> ArrowType.Utf8 - * NUMERIC --> ArrowType.Decimal(precision, scale) - * DECIMAL --> ArrowType.Decimal(precision, scale) - * BIT --> ArrowType.Bool - * TINYINT --> ArrowType.Int(8, signed) - * SMALLINT --> ArrowType.Int(16, signed) - * INTEGER --> ArrowType.Int(32, signed) - * BIGINT --> ArrowType.Int(64, signed) - * REAL --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) - * FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) - * DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE) - * BINARY --> ArrowType.Binary - * VARBINARY --> ArrowType.Binary - * LONGVARBINARY --> ArrowType.Binary - * DATE --> ArrowType.Date(DateUnit.MILLISECOND) - * TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32) - * TIMESTAMP --> ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone=null) - * CLOB --> ArrowType.Utf8 - * BLOB --> ArrowType.Binary + *

    CHAR --> ArrowType.Utf8 NCHAR --> ArrowType.Utf8 VARCHAR --> ArrowType.Utf8 NVARCHAR --> + * ArrowType.Utf8 LONGVARCHAR --> ArrowType.Utf8 LONGNVARCHAR --> ArrowType.Utf8 NUMERIC --> + * ArrowType.Decimal(precision, scale) DECIMAL --> ArrowType.Decimal(precision, scale) BIT --> + * ArrowType.Bool TINYINT --> ArrowType.Int(8, signed) SMALLINT --> ArrowType.Int(16, signed) + * INTEGER --> ArrowType.Int(32, signed) BIGINT --> ArrowType.Int(64, signed) REAL --> + * ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) FLOAT --> + * ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) DOUBLE --> + * ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE) BINARY --> ArrowType.Binary VARBINARY --> + * ArrowType.Binary LONGVARBINARY --> ArrowType.Binary DATE --> ArrowType.Date(DateUnit.MILLISECOND) + * TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32) TIMESTAMP --> + * ArrowType.Timestamp(TimeUnit.MILLISECOND, timezone=null) CLOB --> ArrowType.Utf8 BLOB --> + * ArrowType.Binary * * @since 0.10.0 */ public class JdbcToArrow { /*----------------------------------------------------------------* - | | - | Partial Convert API | - | | - *----------------------------------------------------------------*/ + | | + | Partial Convert API | + | | + *----------------------------------------------------------------*/ /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. - * Note here uses the default targetBatchSize = 1024. + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow + * objects. Note here uses the default targetBatchSize = 1024. * * @param resultSet ResultSet to use to fetch the data from underlying database * @param allocator Memory allocator @@ -74,28 +60,25 @@ public class JdbcToArrow { * @throws SQLException on error */ public static ArrowVectorIterator sqlToArrowVectorIterator( - ResultSet resultSet, - BufferAllocator allocator) - throws SQLException, IOException { + ResultSet resultSet, BufferAllocator allocator) throws SQLException, IOException { Preconditions.checkNotNull(allocator, "Memory Allocator object cannot be null"); - JdbcToArrowConfig config = - new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar()); + JdbcToArrowConfig config = new JdbcToArrowConfig(allocator, JdbcToArrowUtils.getUtcCalendar()); return sqlToArrowVectorIterator(resultSet, config); } /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. - * Note if not specify {@link JdbcToArrowConfig#targetBatchSize}, will use default value 1024. + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow + * objects. Note if not specify {@link JdbcToArrowConfig#targetBatchSize}, will use default value + * 1024. + * * @param resultSet ResultSet to use to fetch the data from underlying database - * @param config Configuration of the conversion from JDBC to Arrow. + * @param config Configuration of the conversion from JDBC to Arrow. * @return Arrow Data Objects {@link ArrowVectorIterator} * @throws SQLException on error */ public static ArrowVectorIterator sqlToArrowVectorIterator( - ResultSet resultSet, - JdbcToArrowConfig config) - throws SQLException, IOException { + ResultSet resultSet, JdbcToArrowConfig config) throws SQLException, IOException { Preconditions.checkNotNull(resultSet, "JDBC ResultSet object cannot be null"); Preconditions.checkNotNull(config, "The configuration cannot be null"); return ArrowVectorIterator.create(resultSet, config); diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java index 68851f4a98bc9..1bfcfc8fe00aa 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import java.math.RoundingMode; import java.util.Calendar; import java.util.Map; import java.util.function.Function; - import org.apache.arrow.adapter.jdbc.consumer.JdbcConsumer; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.util.Preconditions; @@ -30,25 +28,23 @@ /** * This class configures the JDBC-to-Arrow conversion process. - *

    - * The allocator is used to construct the {@link org.apache.arrow.vector.VectorSchemaRoot}, - * and the calendar is used to define the time zone of any - * {@link org.apache.arrow.vector.types.pojo.ArrowType.Timestamp} - * fields that are created during the conversion. Neither field may be null. - *

    - *

    - * If the includeMetadata flag is set, the Arrow field metadata will contain information - * from the corresponding {@link java.sql.ResultSetMetaData} that was used to create the - * {@link org.apache.arrow.vector.types.pojo.FieldType} of the corresponding - * {@link org.apache.arrow.vector.FieldVector}. - *

    - *

    - * If there are any {@link java.sql.Types#ARRAY} fields in the {@link java.sql.ResultSet}, the corresponding - * {@link JdbcFieldInfo} for the array's contents must be defined here. Unfortunately, the sub-type - * information cannot be retrieved from all JDBC implementations (H2 for example, returns - * {@link java.sql.Types#NULL} for the array sub-type), so it must be configured here. The column index - * or name can be used to map to a {@link JdbcFieldInfo}, and that will be used for the conversion. - *

    + * + *

    The allocator is used to construct the {@link org.apache.arrow.vector.VectorSchemaRoot}, and + * the calendar is used to define the time zone of any {@link + * org.apache.arrow.vector.types.pojo.ArrowType.Timestamp} fields that are created during the + * conversion. Neither field may be null. + * + *

    If the includeMetadata flag is set, the Arrow field metadata will contain + * information from the corresponding {@link java.sql.ResultSetMetaData} that was used to create the + * {@link org.apache.arrow.vector.types.pojo.FieldType} of the corresponding {@link + * org.apache.arrow.vector.FieldVector}. + * + *

    If there are any {@link java.sql.Types#ARRAY} fields in the {@link java.sql.ResultSet}, the + * corresponding {@link JdbcFieldInfo} for the array's contents must be defined here. Unfortunately, + * the sub-type information cannot be retrieved from all JDBC implementations (H2 for example, + * returns {@link java.sql.Types#NULL} for the array sub-type), so it must be configured here. The + * column index or name can be used to map to a {@link JdbcFieldInfo}, and that will be used for the + * conversion. */ public final class JdbcToArrowConfig { @@ -66,14 +62,12 @@ public final class JdbcToArrowConfig { private final Map> columnMetadataByColumnIndex; private final RoundingMode bigDecimalRoundingMode; /** - * The maximum rowCount to read each time when partially convert data. - * Default value is 1024 and -1 means disable partial read. - * default is -1 which means disable partial read. - * Note that this flag only useful for {@link JdbcToArrow#sqlToArrowVectorIterator} - * 1) if targetBatchSize != -1, it will convert full data into multiple vectors - * with valueCount no more than targetBatchSize. - * 2) if targetBatchSize == -1, it will convert full data into a single vector in {@link ArrowVectorIterator} - *

    + * The maximum rowCount to read each time when partially convert data. Default value is 1024 and + * -1 means disable partial read. default is -1 which means disable partial read. Note that this + * flag only useful for {@link JdbcToArrow#sqlToArrowVectorIterator} 1) if targetBatchSize != -1, + * it will convert full data into multiple vectors with valueCount no more than targetBatchSize. + * 2) if targetBatchSize == -1, it will convert full data into a single vector in {@link + * ArrowVectorIterator} */ private final int targetBatchSize; @@ -81,81 +75,100 @@ public final class JdbcToArrowConfig { private final JdbcConsumerFactory jdbcConsumerGetter; /** - * Constructs a new configuration from the provided allocator and calendar. The allocator - * is used when constructing the Arrow vectors from the ResultSet, and the calendar is used to define - * Arrow Timestamp fields, and to read time-based fields from the JDBC ResultSet. + * Constructs a new configuration from the provided allocator and calendar. The allocator + * is used when constructing the Arrow vectors from the ResultSet, and the calendar is + * used to define Arrow Timestamp fields, and to read time-based fields from the JDBC + * ResultSet. * - * @param allocator The memory allocator to construct the Arrow vectors with. - * @param calendar The calendar to use when constructing Timestamp fields and reading time-based results. + * @param allocator The memory allocator to construct the Arrow vectors with. + * @param calendar The calendar to use when constructing Timestamp fields and reading time-based + * results. */ JdbcToArrowConfig(BufferAllocator allocator, Calendar calendar) { - this(allocator, calendar, + this( + allocator, + calendar, /* include metadata */ false, /* reuse vector schema root */ false, /* array sub-types by column index */ null, /* array sub-types by column name */ null, - DEFAULT_TARGET_BATCH_SIZE, null, null); + DEFAULT_TARGET_BATCH_SIZE, + null, + null); } JdbcToArrowConfig( - BufferAllocator allocator, - Calendar calendar, - boolean includeMetadata, - boolean reuseVectorSchemaRoot, - Map arraySubTypesByColumnIndex, - Map arraySubTypesByColumnName, - int targetBatchSize, - Function jdbcToArrowTypeConverter) { - this(allocator, calendar, includeMetadata, reuseVectorSchemaRoot, arraySubTypesByColumnIndex, - arraySubTypesByColumnName, targetBatchSize, jdbcToArrowTypeConverter, null); + BufferAllocator allocator, + Calendar calendar, + boolean includeMetadata, + boolean reuseVectorSchemaRoot, + Map arraySubTypesByColumnIndex, + Map arraySubTypesByColumnName, + int targetBatchSize, + Function jdbcToArrowTypeConverter) { + this( + allocator, + calendar, + includeMetadata, + reuseVectorSchemaRoot, + arraySubTypesByColumnIndex, + arraySubTypesByColumnName, + targetBatchSize, + jdbcToArrowTypeConverter, + null); } /** - * Constructs a new configuration from the provided allocator and calendar. The allocator - * is used when constructing the Arrow vectors from the ResultSet, and the calendar is used to define - * Arrow Timestamp fields, and to read time-based fields from the JDBC ResultSet. + * Constructs a new configuration from the provided allocator and calendar. The allocator + * is used when constructing the Arrow vectors from the ResultSet, and the calendar is + * used to define Arrow Timestamp fields, and to read time-based fields from the JDBC + * ResultSet. * - * @param allocator The memory allocator to construct the Arrow vectors with. - * @param calendar The calendar to use when constructing Timestamp fields and reading time-based results. - * @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field metadata. + * @param allocator The memory allocator to construct the Arrow vectors with. + * @param calendar The calendar to use when constructing Timestamp fields and reading time-based + * results. + * @param includeMetadata Whether to include JDBC field metadata in the Arrow Schema Field + * metadata. * @param reuseVectorSchemaRoot Whether to reuse the vector schema root for each data load. * @param arraySubTypesByColumnIndex The type of the JDBC array at the column index (1-based). - * @param arraySubTypesByColumnName The type of the JDBC array at the column name. - * @param targetBatchSize The target batch size to be used in preallocation of the resulting vectors. - * @param jdbcToArrowTypeConverter The function that maps JDBC field type information to arrow type. If set to null, - * the default mapping will be used, which is defined as: - *
      - *
    • CHAR --> ArrowType.Utf8
    • - *
    • NCHAR --> ArrowType.Utf8
    • - *
    • VARCHAR --> ArrowType.Utf8
    • - *
    • NVARCHAR --> ArrowType.Utf8
    • - *
    • LONGVARCHAR --> ArrowType.Utf8
    • - *
    • LONGNVARCHAR --> ArrowType.Utf8
    • - *
    • NUMERIC --> ArrowType.Decimal(precision, scale)
    • - *
    • DECIMAL --> ArrowType.Decimal(precision, scale)
    • - *
    • BIT --> ArrowType.Bool
    • - *
    • TINYINT --> ArrowType.Int(8, signed)
    • - *
    • SMALLINT --> ArrowType.Int(16, signed)
    • - *
    • INTEGER --> ArrowType.Int(32, signed)
    • - *
    • BIGINT --> ArrowType.Int(64, signed)
    • - *
    • REAL --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
    • - *
    • FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
    • - *
    • DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)
    • - *
    • BINARY --> ArrowType.Binary
    • - *
    • VARBINARY --> ArrowType.Binary
    • - *
    • LONGVARBINARY --> ArrowType.Binary
    • - *
    • DATE --> ArrowType.Date(DateUnit.DAY)
    • - *
    • TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32)
    • - *
    • TIMESTAMP --> ArrowType.Timestamp(TimeUnit.MILLISECOND, calendar timezone)
    • - *
    • CLOB --> ArrowType.Utf8
    • - *
    • BLOB --> ArrowType.Binary
    • - *
    • ARRAY --> ArrowType.List
    • - *
    • STRUCT --> ArrowType.Struct
    • - *
    • NULL --> ArrowType.Null
    • - *
    - * @param bigDecimalRoundingMode The java.math.RoundingMode to be used in coercion of a BigDecimal from a - * ResultSet having a scale which does not match that of the target vector. Use null - * (default value) to require strict scale matching. + * @param arraySubTypesByColumnName The type of the JDBC array at the column name. + * @param targetBatchSize The target batch size to be used in preallocation of the resulting + * vectors. + * @param jdbcToArrowTypeConverter The function that maps JDBC field type information to arrow + * type. If set to null, the default mapping will be used, which is defined as: + *
      + *
    • CHAR --> ArrowType.Utf8 + *
    • NCHAR --> ArrowType.Utf8 + *
    • VARCHAR --> ArrowType.Utf8 + *
    • NVARCHAR --> ArrowType.Utf8 + *
    • LONGVARCHAR --> ArrowType.Utf8 + *
    • LONGNVARCHAR --> ArrowType.Utf8 + *
    • NUMERIC --> ArrowType.Decimal(precision, scale) + *
    • DECIMAL --> ArrowType.Decimal(precision, scale) + *
    • BIT --> ArrowType.Bool + *
    • TINYINT --> ArrowType.Int(8, signed) + *
    • SMALLINT --> ArrowType.Int(16, signed) + *
    • INTEGER --> ArrowType.Int(32, signed) + *
    • BIGINT --> ArrowType.Int(64, signed) + *
    • REAL --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) + *
    • FLOAT --> ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE) + *
    • DOUBLE --> ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE) + *
    • BINARY --> ArrowType.Binary + *
    • VARBINARY --> ArrowType.Binary + *
    • LONGVARBINARY --> ArrowType.Binary + *
    • DATE --> ArrowType.Date(DateUnit.DAY) + *
    • TIME --> ArrowType.Time(TimeUnit.MILLISECOND, 32) + *
    • TIMESTAMP --> ArrowType.Timestamp(TimeUnit.MILLISECOND, calendar timezone) + *
    • CLOB --> ArrowType.Utf8 + *
    • BLOB --> ArrowType.Binary + *
    • ARRAY --> ArrowType.List + *
    • STRUCT --> ArrowType.Struct + *
    • NULL --> ArrowType.Null + *
    + * + * @param bigDecimalRoundingMode The java.math.RoundingMode to be used in coercion of a BigDecimal + * from a ResultSet having a scale which does not match that of the target vector. Use null + * (default value) to require strict scale matching. */ JdbcToArrowConfig( BufferAllocator allocator, @@ -245,16 +258,19 @@ public final class JdbcToArrowConfig { this.bigDecimalRoundingMode = bigDecimalRoundingMode; // set up type converter - this.jdbcToArrowTypeConverter = jdbcToArrowTypeConverter != null ? jdbcToArrowTypeConverter : - (jdbcFieldInfo) -> JdbcToArrowUtils.getArrowTypeFromJdbcType(jdbcFieldInfo, calendar); + this.jdbcToArrowTypeConverter = + jdbcToArrowTypeConverter != null + ? jdbcToArrowTypeConverter + : (jdbcFieldInfo) -> JdbcToArrowUtils.getArrowTypeFromJdbcType(jdbcFieldInfo, calendar); - this.jdbcConsumerGetter = jdbcConsumerGetter != null ? jdbcConsumerGetter : JdbcToArrowUtils::getConsumer; + this.jdbcConsumerGetter = + jdbcConsumerGetter != null ? jdbcConsumerGetter : JdbcToArrowUtils::getConsumer; } /** - * The calendar to use when defining Arrow Timestamp fields - * and retrieving {@link java.sql.Date}, {@link java.sql.Time}, or {@link java.sql.Timestamp} - * data types from the {@link java.sql.ResultSet}, or null if not converting. + * The calendar to use when defining Arrow Timestamp fields and retrieving {@link java.sql.Date}, + * {@link java.sql.Time}, or {@link java.sql.Timestamp} data types from the {@link + * java.sql.ResultSet}, or null if not converting. * * @return the calendar. */ @@ -280,30 +296,22 @@ public boolean shouldIncludeMetadata() { return includeMetadata; } - /** - * Get the target batch size for partial read. - */ + /** Get the target batch size for partial read. */ public int getTargetBatchSize() { return targetBatchSize; } - /** - * Get whether it is allowed to reuse the vector schema root. - */ + /** Get whether it is allowed to reuse the vector schema root. */ public boolean isReuseVectorSchemaRoot() { return reuseVectorSchemaRoot; } - /** - * Gets the mapping between JDBC type information to Arrow type. - */ + /** Gets the mapping between JDBC type information to Arrow type. */ public Function getJdbcToArrowTypeConverter() { return jdbcToArrowTypeConverter; } - /** - * Gets the JDBC consumer getter. - */ + /** Gets the JDBC consumer getter. */ public JdbcConsumerFactory getJdbcConsumerGetter() { return jdbcConsumerGetter; } @@ -311,8 +319,10 @@ public JdbcConsumerFactory getJdbcConsumerGetter() { /** * Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column index. * - * @param index The {@link java.sql.ResultSetMetaData} column index of an {@link java.sql.Types#ARRAY} type. - * @return The {@link JdbcFieldInfo} for that array's sub-type, or null if not defined. + * @param index The {@link java.sql.ResultSetMetaData} column index of an {@link + * java.sql.Types#ARRAY} type. + * @return The {@link JdbcFieldInfo} for that array's sub-type, or null if not + * defined. */ public JdbcFieldInfo getArraySubTypeByColumnIndex(int index) { if (arraySubTypesByColumnIndex == null) { @@ -325,8 +335,10 @@ public JdbcFieldInfo getArraySubTypeByColumnIndex(int index) { /** * Returns the array sub-type {@link JdbcFieldInfo} defined for the provided column name. * - * @param name The {@link java.sql.ResultSetMetaData} column name of an {@link java.sql.Types#ARRAY} type. - * @return The {@link JdbcFieldInfo} for that array's sub-type, or null if not defined. + * @param name The {@link java.sql.ResultSetMetaData} column name of an {@link + * java.sql.Types#ARRAY} type. + * @return The {@link JdbcFieldInfo} for that array's sub-type, or null if not + * defined. */ public JdbcFieldInfo getArraySubTypeByColumnName(String name) { if (arraySubTypesByColumnName == null) { @@ -339,7 +351,8 @@ public JdbcFieldInfo getArraySubTypeByColumnName(String name) { /** * Returns the type {@link JdbcFieldInfo} explicitly defined for the provided column index. * - * @param index The {@link java.sql.ResultSetMetaData} column index to evaluate for explicit type mapping. + * @param index The {@link java.sql.ResultSetMetaData} column index to evaluate for explicit type + * mapping. * @return The {@link JdbcFieldInfo} defined for the column, or null if not defined. */ public JdbcFieldInfo getExplicitTypeByColumnIndex(int index) { @@ -353,7 +366,8 @@ public JdbcFieldInfo getExplicitTypeByColumnIndex(int index) { /** * Returns the type {@link JdbcFieldInfo} explicitly defined for the provided column name. * - * @param name The {@link java.sql.ResultSetMetaData} column name to evaluate for explicit type mapping. + * @param name The {@link java.sql.ResultSetMetaData} column name to evaluate for explicit type + * mapping. * @return The {@link JdbcFieldInfo} defined for the column, or null if not defined. */ public JdbcFieldInfo getExplicitTypeByColumnName(String name) { @@ -364,17 +378,12 @@ public JdbcFieldInfo getExplicitTypeByColumnName(String name) { } } - /** - * Return schema level metadata or null if not provided. - */ + /** Return schema level metadata or null if not provided. */ public Map getSchemaMetadata() { return schemaMetadata; } - /** - * Return metadata from columnIndex->meta map on per field basis - * or null if not provided. - */ + /** Return metadata from columnIndex->meta map on per field basis or null if not provided. */ public Map> getColumnMetadataByColumnIndex() { return columnMetadataByColumnIndex; } @@ -383,12 +392,14 @@ public RoundingMode getBigDecimalRoundingMode() { return bigDecimalRoundingMode; } - /** - * Interface for a function that gets a JDBC consumer for the given values. - */ + /** Interface for a function that gets a JDBC consumer for the given values. */ @FunctionalInterface public interface JdbcConsumerFactory { - JdbcConsumer apply(ArrowType arrowType, int columnIndex, boolean nullable, FieldVector vector, - JdbcToArrowConfig config); + JdbcConsumer apply( + ArrowType arrowType, + int columnIndex, + boolean nullable, + FieldVector vector, + JdbcToArrowConfig config); } } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java index 7d88c23832067..783a373c6d0a7 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.apache.arrow.adapter.jdbc.JdbcToArrowConfig.DEFAULT_TARGET_BATCH_SIZE; @@ -23,15 +22,12 @@ import java.util.Calendar; import java.util.Map; import java.util.function.Function; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.types.pojo.ArrowType; -/** - * This class builds {@link JdbcToArrowConfig}s. - */ +/** This class builds {@link JdbcToArrowConfig}s. */ public class JdbcToArrowConfigBuilder { private Calendar calendar; private BufferAllocator allocator; @@ -49,9 +45,9 @@ public class JdbcToArrowConfigBuilder { private RoundingMode bigDecimalRoundingMode; /** - * Default constructor for the JdbcToArrowConfigBuilder}. - * Use the setter methods for the allocator and calendar; the allocator must be - * set. Otherwise, {@link #build()} will throw a {@link NullPointerException}. + * Default constructor for the JdbcToArrowConfigBuilder}. Use the setter methods for + * the allocator and calendar; the allocator must be set. Otherwise, {@link #build()} will throw a + * {@link NullPointerException}. */ public JdbcToArrowConfigBuilder() { this.allocator = null; @@ -68,16 +64,13 @@ public JdbcToArrowConfigBuilder() { } /** - * Constructor for the JdbcToArrowConfigBuilder. The - * allocator is required, and a {@link NullPointerException} - * will be thrown if it is null. - *

    - * The allocator is used to construct Arrow vectors from the JDBC ResultSet. - * The calendar is used to determine the time zone of {@link java.sql.Timestamp} - * fields and convert {@link java.sql.Date}, {@link java.sql.Time}, and - * {@link java.sql.Timestamp} fields to a single, common time zone when reading - * from the result set. - *

    + * Constructor for the JdbcToArrowConfigBuilder. The allocator is required, and a + * {@link NullPointerException} will be thrown if it is null. + * + *

    The allocator is used to construct Arrow vectors from the JDBC ResultSet. The calendar is + * used to determine the time zone of {@link java.sql.Timestamp} fields and convert {@link + * java.sql.Date}, {@link java.sql.Time}, and {@link java.sql.Timestamp} fields to a single, + * common time zone when reading from the result set. * * @param allocator The Arrow Vector memory allocator. * @param calendar The calendar to use when constructing timestamp fields. @@ -95,26 +88,23 @@ public JdbcToArrowConfigBuilder(BufferAllocator allocator, Calendar calendar) { } /** - * Constructor for the JdbcToArrowConfigBuilder. Both the - * allocator and calendar are required. A {@link NullPointerException} - * will be thrown if either of those arguments is null. - *

    - * The allocator is used to construct Arrow vectors from the JDBC ResultSet. - * The calendar is used to determine the time zone of {@link java.sql.Timestamp} - * fields and convert {@link java.sql.Date}, {@link java.sql.Time}, and - * {@link java.sql.Timestamp} fields to a single, common time zone when reading - * from the result set. - *

    - *

    - * The includeMetadata argument, if true will cause - * various information about each database field to be added to the Vector - * Schema's field metadata. - *

    + * Constructor for the JdbcToArrowConfigBuilder. Both the allocator and calendar are + * required. A {@link NullPointerException} will be thrown if either of those arguments is + * null. + * + *

    The allocator is used to construct Arrow vectors from the JDBC ResultSet. The calendar is + * used to determine the time zone of {@link java.sql.Timestamp} fields and convert {@link + * java.sql.Date}, {@link java.sql.Time}, and {@link java.sql.Timestamp} fields to a single, + * common time zone when reading from the result set. + * + *

    The includeMetadata argument, if true will cause various + * information about each database field to be added to the Vector Schema's field metadata. * * @param allocator The Arrow Vector memory allocator. * @param calendar The calendar to use when constructing timestamp fields. */ - public JdbcToArrowConfigBuilder(BufferAllocator allocator, Calendar calendar, boolean includeMetadata) { + public JdbcToArrowConfigBuilder( + BufferAllocator allocator, Calendar calendar, boolean includeMetadata) { this(allocator, calendar); this.includeMetadata = includeMetadata; } @@ -132,8 +122,8 @@ public JdbcToArrowConfigBuilder setAllocator(BufferAllocator allocator) { } /** - * Sets the {@link Calendar} to use when constructing timestamp fields in the - * Arrow schema, and reading time-based fields from the JDBC ResultSet. + * Sets the {@link Calendar} to use when constructing timestamp fields in the Arrow schema, and + * reading time-based fields from the JDBC ResultSet. * * @param calendar the calendar to set. */ @@ -145,7 +135,8 @@ public JdbcToArrowConfigBuilder setCalendar(Calendar calendar) { /** * Sets whether to include JDBC ResultSet field metadata in the Arrow Schema field metadata. * - * @param includeMetadata Whether to include or exclude JDBC metadata in the Arrow Schema field metadata. + * @param includeMetadata Whether to include or exclude JDBC metadata in the Arrow Schema field + * metadata. * @return This instance of the JdbcToArrowConfig, for chaining. */ public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) { @@ -154,8 +145,8 @@ public JdbcToArrowConfigBuilder setIncludeMetadata(boolean includeMetadata) { } /** - * Sets the mapping of column-index-to-{@link JdbcFieldInfo} used for columns of type {@link java.sql.Types#ARRAY}. - * The column index is 1-based, to match the JDBC column index. + * Sets the mapping of column-index-to-{@link JdbcFieldInfo} used for columns of type {@link + * java.sql.Types#ARRAY}. The column index is 1-based, to match the JDBC column index. * * @param map The mapping. * @return This instance of the JdbcToArrowConfig, for chaining. @@ -166,7 +157,8 @@ public JdbcToArrowConfigBuilder setArraySubTypeByColumnIndexMap(MapJdbcToArrowConfig, for chaining. @@ -178,11 +170,12 @@ public JdbcToArrowConfigBuilder setArraySubTypeByColumnNameMap(Map - * This can be useful to override type information from JDBC drivers that provide incomplete type info, - * e.g. DECIMAL with precision = scale = 0. - *

    - * The column index is 1-based, to match the JDBC column index. + * + *

    This can be useful to override type information from JDBC drivers that provide incomplete + * type info, e.g. DECIMAL with precision = scale = 0. + * + *

    The column index is 1-based, to match the JDBC column index. + * * @param map The mapping. */ public JdbcToArrowConfigBuilder setExplicitTypesByColumnIndex(Map map) { @@ -192,9 +185,10 @@ public JdbcToArrowConfigBuilder setExplicitTypesByColumnIndex(Map - * This can be useful to override type information from JDBC drivers that provide incomplete type info, - * e.g. DECIMAL with precision = scale = 0. + * + *

    This can be useful to override type information from JDBC drivers that provide incomplete + * type info, e.g. DECIMAL with precision = scale = 0. + * * @param map The mapping. */ public JdbcToArrowConfigBuilder setExplicitTypesByColumnName(Map map) { @@ -204,8 +198,8 @@ public JdbcToArrowConfigBuilder setExplicitTypesByColumnName(Map - * Use {@link JdbcToArrowConfig#NO_LIMIT_BATCH_SIZE} to read all rows at once. + * + *

    Use {@link JdbcToArrowConfig#NO_LIMIT_BATCH_SIZE} to read all rows at once. */ public JdbcToArrowConfigBuilder setTargetBatchSize(int targetBatchSize) { this.targetBatchSize = targetBatchSize; @@ -214,8 +208,9 @@ public JdbcToArrowConfigBuilder setTargetBatchSize(int targetBatchSize) { /** * Set the function used to convert JDBC types to Arrow types. - *

    - * Defaults to wrapping {@link JdbcToArrowUtils#getArrowTypeFromJdbcType(JdbcFieldInfo, Calendar)}. + * + *

    Defaults to wrapping {@link JdbcToArrowUtils#getArrowTypeFromJdbcType(JdbcFieldInfo, + * Calendar)}. */ public JdbcToArrowConfigBuilder setJdbcToArrowTypeConverter( Function jdbcToArrowTypeConverter) { @@ -225,9 +220,9 @@ public JdbcToArrowConfigBuilder setJdbcToArrowTypeConverter( /** * Set the function used to get a JDBC consumer for a given type. - *

    - * Defaults to wrapping {@link - * JdbcToArrowUtils#getConsumer(ArrowType, Integer, Boolean, FieldVector, JdbcToArrowConfig)}. + * + *

    Defaults to wrapping {@link JdbcToArrowUtils#getConsumer(ArrowType, Integer, Boolean, + * FieldVector, JdbcToArrowConfig)}. */ public JdbcToArrowConfigBuilder setJdbcConsumerGetter( JdbcToArrowConfig.JdbcConsumerFactory jdbcConsumerGetter) { @@ -236,35 +231,32 @@ public JdbcToArrowConfigBuilder setJdbcConsumerGetter( } /** - * Set whether to use the same {@link org.apache.arrow.vector.VectorSchemaRoot} instance on each iteration, - * or to allocate a new one. + * Set whether to use the same {@link org.apache.arrow.vector.VectorSchemaRoot} instance on each + * iteration, or to allocate a new one. */ public JdbcToArrowConfigBuilder setReuseVectorSchemaRoot(boolean reuseVectorSchemaRoot) { this.reuseVectorSchemaRoot = reuseVectorSchemaRoot; return this; } - /** - * Set metadata for schema. - */ + /** Set metadata for schema. */ public JdbcToArrowConfigBuilder setSchemaMetadata(Map schemaMetadata) { this.schemaMetadata = schemaMetadata; return this; } - /** - * Set metadata from columnIndex->meta map on per field basis. - */ + /** Set metadata from columnIndex->meta map on per field basis. */ public JdbcToArrowConfigBuilder setColumnMetadataByColumnIndex( - Map> columnMetadataByColumnIndex) { + Map> columnMetadataByColumnIndex) { this.columnMetadataByColumnIndex = columnMetadataByColumnIndex; return this; } /** - * Set the rounding mode used when the scale of the actual value does not match the declared scale. - *

    - * By default, an error is raised in such cases. + * Set the rounding mode used when the scale of the actual value does not match the declared + * scale. + * + *

    By default, an error is raised in such cases. */ public JdbcToArrowConfigBuilder setBigDecimalRoundingMode(RoundingMode bigDecimalRoundingMode) { this.bigDecimalRoundingMode = bigDecimalRoundingMode; @@ -272,8 +264,8 @@ public JdbcToArrowConfigBuilder setBigDecimalRoundingMode(RoundingMode bigDecima } /** - * This builds the {@link JdbcToArrowConfig} from the provided - * {@link BufferAllocator} and {@link Calendar}. + * This builds the {@link JdbcToArrowConfig} from the provided {@link BufferAllocator} and {@link + * Calendar}. * * @return The built {@link JdbcToArrowConfig} * @throws NullPointerException if either the allocator or calendar was not set. diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java index eaee49936079f..8397d4c9e0dc4 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.apache.arrow.vector.types.FloatingPointPrecision.DOUBLE; @@ -38,7 +37,6 @@ import java.util.Locale; import java.util.Map; import java.util.TimeZone; - import org.apache.arrow.adapter.jdbc.consumer.ArrayConsumer; import org.apache.arrow.adapter.jdbc.consumer.BigIntConsumer; import org.apache.arrow.adapter.jdbc.consumer.BinaryConsumer; @@ -91,7 +89,8 @@ import org.apache.arrow.vector.util.ValueVectorUtility; /** - * Class that does most of the work to convert JDBC ResultSet data into Arrow columnar format Vector objects. + * Class that does most of the work to convert JDBC ResultSet data into Arrow columnar format Vector + * objects. * * @since 0.10.0 */ @@ -99,9 +98,7 @@ public class JdbcToArrowUtils { private static final int JDBC_ARRAY_VALUE_COLUMN = 2; - /** - * Returns the instance of a {java.util.Calendar} with the UTC time zone and root locale. - */ + /** Returns the instance of a {java.util.Calendar} with the UTC time zone and root locale. */ public static Calendar getUtcCalendar() { return Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT); } @@ -114,7 +111,8 @@ public static Calendar getUtcCalendar() { * @return {@link Schema} * @throws SQLException on error */ - public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar) throws SQLException { + public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar) + throws SQLException { Preconditions.checkNotNull(calendar, "Calendar object can't be null"); return jdbcToArrowSchema(rsmd, new JdbcToArrowConfig(new RootAllocator(0), calendar)); @@ -123,25 +121,28 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, Calendar calendar /** * Create Arrow {@link Schema} object for the given JDBC {@link ResultSetMetaData}. * - * @param parameterMetaData The ResultSetMetaData containing the results, to read the JDBC metadata from. - * @param calendar The calendar to use the time zone field of, to construct Timestamp fields from. + * @param parameterMetaData The ResultSetMetaData containing the results, to read the JDBC + * metadata from. + * @param calendar The calendar to use the time zone field of, to construct Timestamp fields from. * @return {@link Schema} * @throws SQLException on error */ - public static Schema jdbcToArrowSchema(final ParameterMetaData parameterMetaData, final Calendar calendar) - throws SQLException { + public static Schema jdbcToArrowSchema( + final ParameterMetaData parameterMetaData, final Calendar calendar) throws SQLException { Preconditions.checkNotNull(calendar, "Calendar object can't be null"); Preconditions.checkNotNull(parameterMetaData); final List parameterFields = new ArrayList<>(parameterMetaData.getParameterCount()); - for (int parameterCounter = 1; parameterCounter <= parameterMetaData.getParameterCount(); - parameterCounter++) { + for (int parameterCounter = 1; + parameterCounter <= parameterMetaData.getParameterCount(); + parameterCounter++) { final int jdbcDataType = parameterMetaData.getParameterType(parameterCounter); final int jdbcIsNullable = parameterMetaData.isNullable(parameterCounter); final boolean arrowIsNullable = jdbcIsNullable != ParameterMetaData.parameterNoNulls; final int precision = parameterMetaData.getPrecision(parameterCounter); final int scale = parameterMetaData.getScale(parameterCounter); - final ArrowType arrowType = getArrowTypeFromJdbcType(new JdbcFieldInfo(jdbcDataType, precision, scale), calendar); - final FieldType fieldType = new FieldType(arrowIsNullable, arrowType, /*dictionary=*/null); + final ArrowType arrowType = + getArrowTypeFromJdbcType(new JdbcFieldInfo(jdbcDataType, precision, scale), calendar); + final FieldType fieldType = new FieldType(arrowIsNullable, arrowType, /*dictionary=*/ null); parameterFields.add(new Field(null, fieldType, null)); } @@ -152,10 +153,11 @@ public static Schema jdbcToArrowSchema(final ParameterMetaData parameterMetaData * Converts the provided JDBC type to its respective {@link ArrowType} counterpart. * * @param fieldInfo the {@link JdbcFieldInfo} with information about the original JDBC type. - * @param calendar the {@link Calendar} to use for datetime data types. + * @param calendar the {@link Calendar} to use for datetime data types. * @return a new {@link ArrowType}. */ - public static ArrowType getArrowTypeFromJdbcType(final JdbcFieldInfo fieldInfo, final Calendar calendar) { + public static ArrowType getArrowTypeFromJdbcType( + final JdbcFieldInfo fieldInfo, final Calendar calendar) { switch (fieldInfo.getJdbcType()) { case Types.BOOLEAN: case Types.BIT: @@ -222,30 +224,34 @@ public static ArrowType getArrowTypeFromJdbcType(final JdbcFieldInfo fieldInfo, /** * Create Arrow {@link Schema} object for the given JDBC {@link java.sql.ResultSetMetaData}. * - *

    - * If {@link JdbcToArrowConfig#shouldIncludeMetadata()} returns true, the following fields - * will be added to the {@link FieldType#getMetadata()}: + *

    If {@link JdbcToArrowConfig#shouldIncludeMetadata()} returns true, the + * following fields will be added to the {@link FieldType#getMetadata()}: + * *

      - *
    • {@link Constants#SQL_CATALOG_NAME_KEY} representing {@link ResultSetMetaData#getCatalogName(int)}
    • - *
    • {@link Constants#SQL_TABLE_NAME_KEY} representing {@link ResultSetMetaData#getTableName(int)}
    • - *
    • {@link Constants#SQL_COLUMN_NAME_KEY} representing {@link ResultSetMetaData#getColumnLabel(int)}
    • - *
    • {@link Constants#SQL_TYPE_KEY} representing {@link ResultSetMetaData#getColumnTypeName(int)}
    • + *
    • {@link Constants#SQL_CATALOG_NAME_KEY} representing {@link + * ResultSetMetaData#getCatalogName(int)} + *
    • {@link Constants#SQL_TABLE_NAME_KEY} representing {@link + * ResultSetMetaData#getTableName(int)} + *
    • {@link Constants#SQL_COLUMN_NAME_KEY} representing {@link + * ResultSetMetaData#getColumnLabel(int)} + *
    • {@link Constants#SQL_TYPE_KEY} representing {@link + * ResultSetMetaData#getColumnTypeName(int)} *
    - *

    - *

    - * If any columns are of type {@link java.sql.Types#ARRAY}, the configuration object will be used to look up - * the array sub-type field. The {@link JdbcToArrowConfig#getArraySubTypeByColumnIndex(int)} method will be - * checked first, followed by the {@link JdbcToArrowConfig#getArraySubTypeByColumnName(String)} method. - *

    + * + *

    If any columns are of type {@link java.sql.Types#ARRAY}, the configuration object will be + * used to look up the array sub-type field. The {@link + * JdbcToArrowConfig#getArraySubTypeByColumnIndex(int)} method will be checked first, followed by + * the {@link JdbcToArrowConfig#getArraySubTypeByColumnName(String)} method. * * @param rsmd The ResultSetMetaData containing the results, to read the JDBC metadata from. * @param config The configuration to use when constructing the schema. * @return {@link Schema} * @throws SQLException on error - * @throws IllegalArgumentException if rsmd contains an {@link java.sql.Types#ARRAY} but the - * config does not have a sub-type definition for it. + * @throws IllegalArgumentException if rsmd contains an {@link java.sql.Types#ARRAY} + * but the config does not have a sub-type definition for it. */ - public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig config) throws SQLException { + public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig config) + throws SQLException { Preconditions.checkNotNull(rsmd, "JDBC ResultSetMetaData object can't be null"); Preconditions.checkNotNull(config, "The configuration object must not be null"); @@ -254,8 +260,10 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig for (int i = 1; i <= columnCount; i++) { final String columnName = rsmd.getColumnLabel(i); - final Map columnMetadata = config.getColumnMetadataByColumnIndex() != null ? - config.getColumnMetadataByColumnIndex().get(i) : null; + final Map columnMetadata = + config.getColumnMetadataByColumnIndex() != null + ? config.getColumnMetadataByColumnIndex().get(i) + : null; final Map metadata; if (config.shouldIncludeMetadata()) { metadata = new HashMap<>(); @@ -278,14 +286,19 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig final JdbcFieldInfo columnFieldInfo = getJdbcFieldInfoForColumn(rsmd, i, config); final ArrowType arrowType = config.getJdbcToArrowTypeConverter().apply(columnFieldInfo); if (arrowType != null) { - final FieldType fieldType = new FieldType( - isColumnNullable(rsmd, i, columnFieldInfo), arrowType, /* dictionary encoding */ null, metadata); + final FieldType fieldType = + new FieldType( + isColumnNullable(rsmd, i, columnFieldInfo), + arrowType, /* dictionary encoding */ + null, + metadata); List children = null; if (arrowType.getTypeID() == ArrowType.List.TYPE_TYPE) { final JdbcFieldInfo arrayFieldInfo = getJdbcFieldInfoForArraySubType(rsmd, i, config); if (arrayFieldInfo == null) { - throw new IllegalArgumentException("Configuration does not provide a mapping for array column " + i); + throw new IllegalArgumentException( + "Configuration does not provide a mapping for array column " + i); } children = new ArrayList(); final ArrowType childType = config.getJdbcToArrowTypeConverter().apply(arrayFieldInfo); @@ -295,9 +308,13 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig FieldType keyType = new FieldType(false, new ArrowType.Utf8(), null, null); FieldType valueType = new FieldType(false, new ArrowType.Utf8(), null, null); children = new ArrayList<>(); - children.add(new Field("child", mapType, - Arrays.asList(new Field(MapVector.KEY_NAME, keyType, null), - new Field(MapVector.VALUE_NAME, valueType, null)))); + children.add( + new Field( + "child", + mapType, + Arrays.asList( + new Field(MapVector.KEY_NAME, keyType, null), + new Field(MapVector.VALUE_NAME, valueType, null)))); } fields.add(new Field(columnName, fieldType, children)); @@ -307,18 +324,14 @@ public static Schema jdbcToArrowSchema(ResultSetMetaData rsmd, JdbcToArrowConfig } static JdbcFieldInfo getJdbcFieldInfoForColumn( - ResultSetMetaData rsmd, - int arrayColumn, - JdbcToArrowConfig config) - throws SQLException { + ResultSetMetaData rsmd, int arrayColumn, JdbcToArrowConfig config) throws SQLException { Preconditions.checkNotNull(rsmd, "ResultSet MetaData object cannot be null"); Preconditions.checkNotNull(config, "Configuration must not be null"); Preconditions.checkArgument( - arrayColumn > 0, - "ResultSetMetaData columns start with 1; column cannot be less than 1"); + arrayColumn > 0, "ResultSetMetaData columns start with 1; column cannot be less than 1"); Preconditions.checkArgument( - arrayColumn <= rsmd.getColumnCount(), - "Column number cannot be more than the number of columns"); + arrayColumn <= rsmd.getColumnCount(), + "Column number cannot be more than the number of columns"); JdbcFieldInfo fieldInfo = config.getExplicitTypeByColumnIndex(arrayColumn); if (fieldInfo == null) { @@ -334,16 +347,12 @@ static JdbcFieldInfo getJdbcFieldInfoForColumn( * If no sub-type can be found, returns null. */ private static JdbcFieldInfo getJdbcFieldInfoForArraySubType( - ResultSetMetaData rsmd, - int arrayColumn, - JdbcToArrowConfig config) - throws SQLException { + ResultSetMetaData rsmd, int arrayColumn, JdbcToArrowConfig config) throws SQLException { Preconditions.checkNotNull(rsmd, "ResultSet MetaData object cannot be null"); Preconditions.checkNotNull(config, "Configuration must not be null"); Preconditions.checkArgument( - arrayColumn > 0, - "ResultSetMetaData columns start with 1; column cannot be less than 1"); + arrayColumn > 0, "ResultSetMetaData columns start with 1; column cannot be less than 1"); Preconditions.checkArgument( arrayColumn <= rsmd.getColumnCount(), "Column number cannot be more than the number of columns"); @@ -359,10 +368,10 @@ private static JdbcFieldInfo getJdbcFieldInfoForArraySubType( * Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate * the given Arrow Vector objects. * - * @param rs ResultSet to use to fetch the data from underlying database - * @param root Arrow {@link VectorSchemaRoot} object to populate - * @param calendar The calendar to use when reading {@link Date}, {@link Time}, or {@link Timestamp} - * data types from the {@link ResultSet}, or null if not converting. + * @param rs ResultSet to use to fetch the data from underlying database + * @param root Arrow {@link VectorSchemaRoot} object to populate + * @param calendar The calendar to use when reading {@link Date}, {@link Time}, or {@link + * Timestamp} data types from the {@link ResultSet}, or null if not converting. * @throws SQLException on error */ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calendar calendar) @@ -373,29 +382,30 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, Calen jdbcToArrowVectors(rs, root, new JdbcToArrowConfig(new RootAllocator(0), calendar)); } - static boolean isColumnNullable(ResultSetMetaData resultSetMetadata, int index, JdbcFieldInfo info) - throws SQLException { + static boolean isColumnNullable( + ResultSetMetaData resultSetMetadata, int index, JdbcFieldInfo info) throws SQLException { int nullableValue; if (info != null && info.isNullable() != ResultSetMetaData.columnNullableUnknown) { nullableValue = info.isNullable(); } else { nullableValue = resultSetMetadata.isNullable(index); } - return nullableValue == ResultSetMetaData.columnNullable || - nullableValue == ResultSetMetaData.columnNullableUnknown; + return nullableValue == ResultSetMetaData.columnNullable + || nullableValue == ResultSetMetaData.columnNullableUnknown; } /** * Iterate the given JDBC {@link ResultSet} object to fetch the data and transpose it to populate * the given Arrow Vector objects. * - * @param rs ResultSet to use to fetch the data from underlying database - * @param root Arrow {@link VectorSchemaRoot} object to populate + * @param rs ResultSet to use to fetch the data from underlying database + * @param root Arrow {@link VectorSchemaRoot} object to populate * @param config The configuration to use when reading the data. * @throws SQLException on error * @throws JdbcConsumerException on error from VectorConsumer */ - public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcToArrowConfig config) + public static void jdbcToArrowVectors( + ResultSet rs, VectorSchemaRoot root, JdbcToArrowConfig config) throws SQLException, IOException { ResultSetMetaData rsmd = rs.getMetaData(); @@ -405,8 +415,13 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcT for (int i = 1; i <= columnCount; i++) { FieldVector vector = root.getVector(rsmd.getColumnLabel(i)); final JdbcFieldInfo columnFieldInfo = getJdbcFieldInfoForColumn(rsmd, i, config); - consumers[i - 1] = getConsumer( - vector.getField().getType(), i, isColumnNullable(rsmd, i, columnFieldInfo), vector, config); + consumers[i - 1] = + getConsumer( + vector.getField().getType(), + i, + isColumnNullable(rsmd, i, columnFieldInfo), + vector, + config); } CompositeJdbcConsumer compositeConsumer = null; @@ -439,18 +454,22 @@ public static void jdbcToArrowVectors(ResultSet rs, VectorSchemaRoot root, JdbcT } /** - * Default function used for JdbcConsumerFactory. This function gets a JdbcConsumer for the - * given column based on the Arrow type and provided vector. + * Default function used for JdbcConsumerFactory. This function gets a JdbcConsumer for the given + * column based on the Arrow type and provided vector. * - * @param arrowType Arrow type for the column. + * @param arrowType Arrow type for the column. * @param columnIndex Column index to fetch from the ResultSet - * @param nullable Whether the value is nullable or not - * @param vector Vector to store the consumed value - * @param config Associated JdbcToArrowConfig, used mainly for the Calendar. + * @param nullable Whether the value is nullable or not + * @param vector Vector to store the consumed value + * @param config Associated JdbcToArrowConfig, used mainly for the Calendar. * @return {@link JdbcConsumer} */ - public static JdbcConsumer getConsumer(ArrowType arrowType, int columnIndex, boolean nullable, - FieldVector vector, JdbcToArrowConfig config) { + public static JdbcConsumer getConsumer( + ArrowType arrowType, + int columnIndex, + boolean nullable, + FieldVector vector, + JdbcToArrowConfig config) { final Calendar calendar = config.getCalendar(); switch (arrowType.getTypeID()) { @@ -472,10 +491,11 @@ public static JdbcConsumer getConsumer(ArrowType arrowType, int columnIndex, boo case Decimal: final RoundingMode bigDecimalRoundingMode = config.getBigDecimalRoundingMode(); if (((ArrowType.Decimal) arrowType).getBitWidth() == 256) { - return Decimal256Consumer.createConsumer((Decimal256Vector) vector, columnIndex, nullable, - bigDecimalRoundingMode); + return Decimal256Consumer.createConsumer( + (Decimal256Vector) vector, columnIndex, nullable, bigDecimalRoundingMode); } else { - return DecimalConsumer.createConsumer((DecimalVector) vector, columnIndex, nullable, bigDecimalRoundingMode); + return DecimalConsumer.createConsumer( + (DecimalVector) vector, columnIndex, nullable, bigDecimalRoundingMode); } case FloatingPoint: switch (((ArrowType.FloatingPoint) arrowType).getPrecision()) { @@ -495,17 +515,25 @@ public static JdbcConsumer getConsumer(ArrowType arrowType, int columnIndex, boo case Date: return DateConsumer.createConsumer((DateDayVector) vector, columnIndex, nullable, calendar); case Time: - return TimeConsumer.createConsumer((TimeMilliVector) vector, columnIndex, nullable, calendar); + return TimeConsumer.createConsumer( + (TimeMilliVector) vector, columnIndex, nullable, calendar); case Timestamp: if (config.getCalendar() == null) { - return TimestampConsumer.createConsumer((TimeStampMilliVector) vector, columnIndex, nullable); + return TimestampConsumer.createConsumer( + (TimeStampMilliVector) vector, columnIndex, nullable); } else { - return TimestampTZConsumer.createConsumer((TimeStampMilliTZVector) vector, columnIndex, nullable, calendar); + return TimestampTZConsumer.createConsumer( + (TimeStampMilliTZVector) vector, columnIndex, nullable, calendar); } case List: FieldVector childVector = ((ListVector) vector).getDataVector(); - JdbcConsumer delegate = getConsumer(childVector.getField().getType(), JDBC_ARRAY_VALUE_COLUMN, - childVector.getField().isNullable(), childVector, config); + JdbcConsumer delegate = + getConsumer( + childVector.getField().getType(), + JDBC_ARRAY_VALUE_COLUMN, + childVector.getField().isNullable(), + childVector, + config); return ArrayConsumer.createConsumer((ListVector) vector, delegate, columnIndex, nullable); case Map: return MapConsumer.createConsumer((MapVector) vector, columnIndex, nullable); diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BaseColumnBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BaseColumnBinder.java index f24f409072c0d..d7b62c43acf6f 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BaseColumnBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BaseColumnBinder.java @@ -14,13 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import org.apache.arrow.vector.FieldVector; /** * Base class for ColumnBinder implementations. + * * @param The concrete FieldVector subtype. */ public abstract class BaseColumnBinder implements ColumnBinder { diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BigIntBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BigIntBinder.java index fde4642ef90a5..b9dfcb0d6c956 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BigIntBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BigIntBinder.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.BigIntVector; /** A column binder for 8-bit integers. */ @@ -34,7 +32,8 @@ public BigIntBinder(BigIntVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { final long value = vector.getDataBuffer().getLong((long) rowIndex * BigIntVector.TYPE_WIDTH); statement.setLong(parameterIndex, value); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BitBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BitBinder.java index adae513e99e7c..c9db194f652ff 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BitBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/BitBinder.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.BitVector; /** A column binder for booleans. */ @@ -34,7 +32,8 @@ public BitBinder(BitVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { // See BitVector#getBit final int byteIndex = rowIndex >> 3; final byte b = vector.getDataBuffer().getByte(byteIndex); diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinder.java index c2b1259e1424b..c38db68234ecf 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinder.java @@ -14,17 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; - import org.apache.arrow.vector.FieldVector; -/** - * A helper to bind values from a wrapped Arrow vector to a JDBC PreparedStatement. - */ +/** A helper to bind values from a wrapped Arrow vector to a JDBC PreparedStatement. */ public interface ColumnBinder { /** * Bind the given row to the given parameter. @@ -43,14 +39,10 @@ public interface ColumnBinder { */ int getJdbcType(); - /** - * Get the vector used by this binder. - */ + /** Get the vector used by this binder. */ FieldVector getVector(); - /** - * Create a column binder for a vector, using the default JDBC type code for null values. - */ + /** Create a column binder for a vector, using the default JDBC type code for null values. */ static ColumnBinder forVector(FieldVector vector) { return forVector(vector, /*jdbcType*/ null); } @@ -62,7 +54,8 @@ static ColumnBinder forVector(FieldVector vector) { * @param jdbcType The JDBC type code to use (or null to use the default). */ static ColumnBinder forVector(FieldVector vector, Integer jdbcType) { - final ColumnBinder binder = vector.getField().getType().accept(new ColumnBinderArrowTypeVisitor(vector, jdbcType)); + final ColumnBinder binder = + vector.getField().getType().accept(new ColumnBinderArrowTypeVisitor(vector, jdbcType)); if (vector.getField().isNullable()) { return new NullableColumnBinder(binder); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java index 7420a8c23dd48..30b2305f3f916 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.Types; import java.time.ZoneId; import java.util.Calendar; import java.util.TimeZone; - import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.DateDayVector; @@ -50,8 +48,8 @@ /** * Visitor to create the base ColumnBinder for a vector. - *

    - * To handle null values, wrap the returned binder in a {@link NullableColumnBinder}. + * + *

    To handle null values, wrap the returned binder in a {@link NullableColumnBinder}. */ public class ColumnBinderArrowTypeVisitor implements ArrowType.ArrowTypeVisitor { private final FieldVector vector; @@ -111,17 +109,21 @@ public ColumnBinder visit(ArrowType.Int type) { } switch (type.getBitWidth()) { case 8: - return jdbcType == null ? new TinyIntBinder((TinyIntVector) vector) : - new TinyIntBinder((TinyIntVector) vector, jdbcType); + return jdbcType == null + ? new TinyIntBinder((TinyIntVector) vector) + : new TinyIntBinder((TinyIntVector) vector, jdbcType); case 16: - return jdbcType == null ? new SmallIntBinder((SmallIntVector) vector) : - new SmallIntBinder((SmallIntVector) vector, jdbcType); + return jdbcType == null + ? new SmallIntBinder((SmallIntVector) vector) + : new SmallIntBinder((SmallIntVector) vector, jdbcType); case 32: - return jdbcType == null ? new IntBinder((IntVector) vector) : - new IntBinder((IntVector) vector, jdbcType); + return jdbcType == null + ? new IntBinder((IntVector) vector) + : new IntBinder((IntVector) vector, jdbcType); case 64: - return jdbcType == null ? new BigIntBinder((BigIntVector) vector) : - new BigIntBinder((BigIntVector) vector, jdbcType); + return jdbcType == null + ? new BigIntBinder((BigIntVector) vector) + : new BigIntBinder((BigIntVector) vector, jdbcType); default: throw new UnsupportedOperationException("No column binder implemented for type " + type); } @@ -131,11 +133,13 @@ public ColumnBinder visit(ArrowType.Int type) { public ColumnBinder visit(ArrowType.FloatingPoint type) { switch (type.getPrecision()) { case SINGLE: - return jdbcType == null ? new Float4Binder((Float4Vector) vector) : - new Float4Binder((Float4Vector) vector, jdbcType); + return jdbcType == null + ? new Float4Binder((Float4Vector) vector) + : new Float4Binder((Float4Vector) vector, jdbcType); case DOUBLE: - return jdbcType == null ? new Float8Binder((Float8Vector) vector) : - new Float8Binder((Float8Vector) vector, jdbcType); + return jdbcType == null + ? new Float8Binder((Float8Vector) vector) + : new Float8Binder((Float8Vector) vector, jdbcType); default: throw new UnsupportedOperationException("No column binder implemented for type " + type); } @@ -144,61 +148,74 @@ public ColumnBinder visit(ArrowType.FloatingPoint type) { @Override public ColumnBinder visit(ArrowType.Utf8 type) { VarCharVector varChar = (VarCharVector) vector; - return jdbcType == null ? new VarCharBinder<>(varChar, Types.VARCHAR) : - new VarCharBinder<>(varChar, jdbcType); + return jdbcType == null + ? new VarCharBinder<>(varChar, Types.VARCHAR) + : new VarCharBinder<>(varChar, jdbcType); } @Override public ColumnBinder visit(ArrowType.Utf8View type) { - throw new UnsupportedOperationException("Column binder implemented for type " + type + " is not supported"); + throw new UnsupportedOperationException( + "Column binder implemented for type " + type + " is not supported"); } @Override public ColumnBinder visit(ArrowType.LargeUtf8 type) { LargeVarCharVector varChar = (LargeVarCharVector) vector; - return jdbcType == null ? new VarCharBinder<>(varChar, Types.LONGVARCHAR) : - new VarCharBinder<>(varChar, jdbcType); + return jdbcType == null + ? new VarCharBinder<>(varChar, Types.LONGVARCHAR) + : new VarCharBinder<>(varChar, jdbcType); } @Override public ColumnBinder visit(ArrowType.Binary type) { VarBinaryVector varBinary = (VarBinaryVector) vector; - return jdbcType == null ? new VarBinaryBinder<>(varBinary, Types.VARBINARY) : - new VarBinaryBinder<>(varBinary, jdbcType); + return jdbcType == null + ? new VarBinaryBinder<>(varBinary, Types.VARBINARY) + : new VarBinaryBinder<>(varBinary, jdbcType); } @Override public ColumnBinder visit(ArrowType.BinaryView type) { - throw new UnsupportedOperationException("Column binder implemented for type " + type + " is not supported"); + throw new UnsupportedOperationException( + "Column binder implemented for type " + type + " is not supported"); } @Override public ColumnBinder visit(ArrowType.LargeBinary type) { LargeVarBinaryVector varBinary = (LargeVarBinaryVector) vector; - return jdbcType == null ? new VarBinaryBinder<>(varBinary, Types.LONGVARBINARY) : - new VarBinaryBinder<>(varBinary, jdbcType); + return jdbcType == null + ? new VarBinaryBinder<>(varBinary, Types.LONGVARBINARY) + : new VarBinaryBinder<>(varBinary, jdbcType); } @Override public ColumnBinder visit(ArrowType.FixedSizeBinary type) { FixedSizeBinaryVector binary = (FixedSizeBinaryVector) vector; - return jdbcType == null ? new FixedSizeBinaryBinder(binary, Types.BINARY) : - new FixedSizeBinaryBinder(binary, jdbcType); + return jdbcType == null + ? new FixedSizeBinaryBinder(binary, Types.BINARY) + : new FixedSizeBinaryBinder(binary, jdbcType); } @Override public ColumnBinder visit(ArrowType.Bool type) { - return jdbcType == null ? new BitBinder((BitVector) vector) : new BitBinder((BitVector) vector, jdbcType); + return jdbcType == null + ? new BitBinder((BitVector) vector) + : new BitBinder((BitVector) vector, jdbcType); } @Override public ColumnBinder visit(ArrowType.Decimal type) { if (type.getBitWidth() == 128) { DecimalVector decimalVector = (DecimalVector) vector; - return jdbcType == null ? new Decimal128Binder(decimalVector) : new Decimal128Binder(decimalVector, jdbcType); + return jdbcType == null + ? new Decimal128Binder(decimalVector) + : new Decimal128Binder(decimalVector, jdbcType); } else if (type.getBitWidth() == 256) { Decimal256Vector decimalVector = (Decimal256Vector) vector; - return jdbcType == null ? new Decimal256Binder(decimalVector) : new Decimal256Binder(decimalVector, jdbcType); + return jdbcType == null + ? new Decimal256Binder(decimalVector) + : new Decimal256Binder(decimalVector, jdbcType); } throw new UnsupportedOperationException("No column binder implemented for type " + type); } @@ -207,11 +224,13 @@ public ColumnBinder visit(ArrowType.Decimal type) { public ColumnBinder visit(ArrowType.Date type) { switch (type.getUnit()) { case DAY: - return jdbcType == null ? new DateDayBinder((DateDayVector) vector) : - new DateDayBinder((DateDayVector) vector, /*calendar*/null, jdbcType); + return jdbcType == null + ? new DateDayBinder((DateDayVector) vector) + : new DateDayBinder((DateDayVector) vector, /*calendar*/ null, jdbcType); case MILLISECOND: - return jdbcType == null ? new DateMilliBinder((DateMilliVector) vector) : - new DateMilliBinder((DateMilliVector) vector, /*calendar*/null, jdbcType); + return jdbcType == null + ? new DateMilliBinder((DateMilliVector) vector) + : new DateMilliBinder((DateMilliVector) vector, /*calendar*/ null, jdbcType); default: throw new UnsupportedOperationException("No column binder implemented for type " + type); } @@ -221,17 +240,21 @@ public ColumnBinder visit(ArrowType.Date type) { public ColumnBinder visit(ArrowType.Time type) { switch (type.getUnit()) { case SECOND: - return jdbcType == null ? new Time32Binder((TimeSecVector) vector) : - new Time32Binder((TimeSecVector) vector, jdbcType); + return jdbcType == null + ? new Time32Binder((TimeSecVector) vector) + : new Time32Binder((TimeSecVector) vector, jdbcType); case MILLISECOND: - return jdbcType == null ? new Time32Binder((TimeMilliVector) vector) : - new Time32Binder((TimeMilliVector) vector, jdbcType); + return jdbcType == null + ? new Time32Binder((TimeMilliVector) vector) + : new Time32Binder((TimeMilliVector) vector, jdbcType); case MICROSECOND: - return jdbcType == null ? new Time64Binder((TimeMicroVector) vector) : - new Time64Binder((TimeMicroVector) vector, jdbcType); + return jdbcType == null + ? new Time64Binder((TimeMicroVector) vector) + : new Time64Binder((TimeMicroVector) vector, jdbcType); case NANOSECOND: - return jdbcType == null ? new Time64Binder((TimeNanoVector) vector) : - new Time64Binder((TimeNanoVector) vector, jdbcType); + return jdbcType == null + ? new Time64Binder((TimeNanoVector) vector) + : new Time64Binder((TimeNanoVector) vector, jdbcType); default: throw new UnsupportedOperationException("No column binder implemented for type " + type); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateDayBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateDayBinder.java index bc16790c8f391..b9eae464c8aa2 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateDayBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateDayBinder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.Date; @@ -22,12 +21,9 @@ import java.sql.SQLException; import java.sql.Types; import java.util.Calendar; - import org.apache.arrow.vector.DateDayVector; -/** - * A column binder for 32-bit dates. - */ +/** A column binder for 32-bit dates. */ public class DateDayBinder extends BaseColumnBinder { private static final long MILLIS_PER_DAY = 86_400_000; private final Calendar calendar; @@ -46,7 +42,8 @@ public DateDayBinder(DateDayVector vector, Calendar calendar, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { // TODO: multiply with overflow final long index = (long) rowIndex * DateDayVector.TYPE_WIDTH; final Date value = new Date(vector.getDataBuffer().getInt(index) * MILLIS_PER_DAY); diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateMilliBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateMilliBinder.java index 5cb91b46ac179..f320391fbed5b 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateMilliBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/DateMilliBinder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.Date; @@ -22,12 +21,9 @@ import java.sql.SQLException; import java.sql.Types; import java.util.Calendar; - import org.apache.arrow.vector.DateMilliVector; -/** - * A column binder for 64-bit dates. - */ +/** A column binder for 64-bit dates. */ public class DateMilliBinder extends BaseColumnBinder { private final Calendar calendar; @@ -39,14 +35,14 @@ public DateMilliBinder(DateMilliVector vector, Calendar calendar) { this(vector, calendar, Types.DATE); } - public DateMilliBinder(DateMilliVector vector, Calendar calendar, int jdbcType) { super(vector, jdbcType); this.calendar = calendar; } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { final long index = (long) rowIndex * DateMilliVector.TYPE_WIDTH; final Date value = new Date(vector.getDataBuffer().getLong(index)); if (calendar == null) { diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal128Binder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal128Binder.java index 9e9d0e4fdb25b..07ef52f2e594c 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal128Binder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal128Binder.java @@ -14,20 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.math.BigDecimal; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.DecimalVector; import org.apache.arrow.vector.util.DecimalUtility; -/** - * A binder for 128-bit decimals. - */ +/** A binder for 128-bit decimals. */ public class Decimal128Binder extends BaseColumnBinder { public Decimal128Binder(DecimalVector vector) { this(vector, Types.DECIMAL); @@ -38,9 +34,11 @@ public Decimal128Binder(DecimalVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - final BigDecimal value = DecimalUtility.getBigDecimalFromArrowBuf( - vector.getDataBuffer(), rowIndex, vector.getScale(), DecimalVector.TYPE_WIDTH); + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { + final BigDecimal value = + DecimalUtility.getBigDecimalFromArrowBuf( + vector.getDataBuffer(), rowIndex, vector.getScale(), DecimalVector.TYPE_WIDTH); statement.setBigDecimal(parameterIndex, value); } } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal256Binder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal256Binder.java index bd29e083b4513..5a4222f6b84db 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal256Binder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Decimal256Binder.java @@ -14,20 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.math.BigDecimal; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.Decimal256Vector; import org.apache.arrow.vector.util.DecimalUtility; -/** - * A binder for 256-bit decimals. - */ +/** A binder for 256-bit decimals. */ public class Decimal256Binder extends BaseColumnBinder { public Decimal256Binder(Decimal256Vector vector) { this(vector, Types.DECIMAL); @@ -38,9 +34,11 @@ public Decimal256Binder(Decimal256Vector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - final BigDecimal value = DecimalUtility.getBigDecimalFromArrowBuf( - vector.getDataBuffer(), rowIndex, vector.getScale(), Decimal256Vector.TYPE_WIDTH); + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { + final BigDecimal value = + DecimalUtility.getBigDecimalFromArrowBuf( + vector.getDataBuffer(), rowIndex, vector.getScale(), Decimal256Vector.TYPE_WIDTH); statement.setBigDecimal(parameterIndex, value); } } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/FixedSizeBinaryBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/FixedSizeBinaryBinder.java index 7edc5e4532985..4f74b1fa8cfd4 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/FixedSizeBinaryBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/FixedSizeBinaryBinder.java @@ -14,22 +14,18 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; - import org.apache.arrow.vector.FixedSizeBinaryVector; -/** - * A binder for fixed-width binary types. - */ +/** A binder for fixed-width binary types. */ public class FixedSizeBinaryBinder extends BaseColumnBinder { /** * Create a binder for the given vector using the given JDBC type for null values. * - * @param vector The vector to draw values from. + * @param vector The vector to draw values from. * @param jdbcType The JDBC type code. */ public FixedSizeBinaryBinder(FixedSizeBinaryVector vector, int jdbcType) { @@ -37,9 +33,12 @@ public FixedSizeBinaryBinder(FixedSizeBinaryVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { byte[] binaryData = new byte[vector.getByteWidth()]; - vector.getDataBuffer().getBytes((long) rowIndex * binaryData.length, binaryData, 0, binaryData.length); + vector + .getDataBuffer() + .getBytes((long) rowIndex * binaryData.length, binaryData, 0, binaryData.length); statement.setBytes(parameterIndex, binaryData); } } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float4Binder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float4Binder.java index a471c1ebadd66..466a67a2dbc89 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float4Binder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float4Binder.java @@ -14,18 +14,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.Float4Vector; -/** - * A binder for 32-bit floats. - */ +/** A binder for 32-bit floats. */ public class Float4Binder extends BaseColumnBinder { public Float4Binder(Float4Vector vector) { this(vector, Types.REAL); @@ -36,7 +32,8 @@ public Float4Binder(Float4Vector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { final float value = vector.getDataBuffer().getFloat((long) rowIndex * Float4Vector.TYPE_WIDTH); statement.setFloat(parameterIndex, value); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float8Binder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float8Binder.java index 4710c3b59860d..222bebf115372 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float8Binder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Float8Binder.java @@ -14,18 +14,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.Float8Vector; -/** - * A binder for 64-bit floats. - */ +/** A binder for 64-bit floats. */ public class Float8Binder extends BaseColumnBinder { public Float8Binder(Float8Vector vector) { this(vector, Types.DOUBLE); @@ -36,8 +32,10 @@ public Float8Binder(Float8Vector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - final double value = vector.getDataBuffer().getDouble((long) rowIndex * Float8Vector.TYPE_WIDTH); + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { + final double value = + vector.getDataBuffer().getDouble((long) rowIndex * Float8Vector.TYPE_WIDTH); statement.setDouble(parameterIndex, value); } } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/IntBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/IntBinder.java index 7d47f585a39d9..6b49eeb5352b1 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/IntBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/IntBinder.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.IntVector; /** A column binder for 32-bit integers. */ @@ -34,7 +32,8 @@ public IntBinder(IntVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { final int value = vector.getDataBuffer().getInt((long) rowIndex * IntVector.TYPE_WIDTH); statement.setInt(parameterIndex, value); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ListBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ListBinder.java index b8aa61234f4e9..25172c0c1f0aa 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ListBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ListBinder.java @@ -14,21 +14,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.lang.reflect.Array; import java.util.ArrayList; import java.util.Arrays; - import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.complex.ListVector; import org.apache.arrow.vector.complex.impl.UnionListReader; import org.apache.arrow.vector.util.Text; -/** - * A column binder for list of primitive values. - */ +/** A column binder for list of primitive values. */ public class ListBinder extends BaseColumnBinder { private final UnionListReader listReader; @@ -52,7 +48,9 @@ public ListBinder(ListVector vector, int jdbcType) { try { arrayElementClass = dataVectorClass.getMethod("getObject", Integer.TYPE).getReturnType(); } catch (NoSuchMethodException e) { - final String message = String.format("Issue to determine type for getObject method of data vector class %s ", + final String message = + String.format( + "Issue to determine type for getObject method of data vector class %s ", dataVectorClass.getName()); throw new RuntimeException(message); } @@ -60,7 +58,8 @@ public ListBinder(ListVector vector, int jdbcType) { } @Override - public void bind(java.sql.PreparedStatement statement, int parameterIndex, int rowIndex)throws java.sql.SQLException { + public void bind(java.sql.PreparedStatement statement, int parameterIndex, int rowIndex) + throws java.sql.SQLException { listReader.setPosition(rowIndex); ArrayList sourceArray = (ArrayList) listReader.readObject(); Object array; @@ -69,7 +68,9 @@ public void bind(java.sql.PreparedStatement statement, int parameterIndex, int r Arrays.setAll((Object[]) array, sourceArray::get); } else { array = new String[sourceArray.size()]; - Arrays.setAll((Object[]) array, idx -> sourceArray.get(idx) != null ? sourceArray.get(idx).toString() : null); + Arrays.setAll( + (Object[]) array, + idx -> sourceArray.get(idx) != null ? sourceArray.get(idx).toString() : null); } statement.setObject(parameterIndex, array); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/MapBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/MapBinder.java index 07391eb7cbfb4..e94f186453581 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/MapBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/MapBinder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; @@ -23,16 +22,13 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Objects; - import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.impl.UnionMapReader; import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.util.JsonStringHashMap; -/** - * A column binder for map of primitive values. - */ +/** A column binder for map of primitive values. */ public class MapBinder extends BaseColumnBinder { private UnionMapReader reader; @@ -58,8 +54,8 @@ public MapBinder(MapVector vector, int jdbcType) { } List keyValueFields = Objects.requireNonNull(structField.get(0)).getChildren(); if (keyValueFields.size() != 2) { - throw new IllegalArgumentException("Expected two children fields " + - "inside nested Struct field in Map"); + throw new IllegalArgumentException( + "Expected two children fields " + "inside nested Struct field in Map"); } ArrowType keyType = Objects.requireNonNull(keyValueFields.get(0)).getType(); ArrowType valueType = Objects.requireNonNull(keyValueFields.get(1)).getType(); @@ -68,15 +64,16 @@ public MapBinder(MapVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, - int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { reader.setPosition(rowIndex); LinkedHashMap tags = new JsonStringHashMap<>(); while (reader.next()) { Object key = reader.key().readObject(); Object value = reader.value().readObject(); - tags.put(isTextKey && key != null ? key.toString() : key, - isTextValue && value != null ? value.toString() : value); + tags.put( + isTextKey && key != null ? key.toString() : key, + isTextValue && value != null ? value.toString() : value); } switch (jdbcType) { case Types.VARCHAR: diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/NullableColumnBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/NullableColumnBinder.java index 123b587ca50d4..bf5288b173341 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/NullableColumnBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/NullableColumnBinder.java @@ -14,17 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; - import org.apache.arrow.vector.FieldVector; -/** - * A ColumnBinder that checks for nullability before deferring to a type-specific binder. - */ +/** A ColumnBinder that checks for nullability before deferring to a type-specific binder. */ public class NullableColumnBinder implements ColumnBinder { private final ColumnBinder wrapped; @@ -33,7 +29,8 @@ public NullableColumnBinder(ColumnBinder wrapped) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { if (wrapped.getVector().isNull(rowIndex)) { statement.setNull(parameterIndex, wrapped.getJdbcType()); } else { diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/SmallIntBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/SmallIntBinder.java index f9d744b9f5497..aa636c9336f55 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/SmallIntBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/SmallIntBinder.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.SmallIntVector; /** A column binder for 8-bit integers. */ @@ -34,8 +32,10 @@ public SmallIntBinder(SmallIntVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { - final short value = vector.getDataBuffer().getShort((short) rowIndex * SmallIntVector.TYPE_WIDTH); + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { + final short value = + vector.getDataBuffer().getShort((short) rowIndex * SmallIntVector.TYPE_WIDTH); statement.setShort(parameterIndex, value); } } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time32Binder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time32Binder.java index 5dc7e3f513f97..4e09c3be23264 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time32Binder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time32Binder.java @@ -14,21 +14,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Time; import java.sql.Types; - import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.TimeMilliVector; import org.apache.arrow.vector.TimeSecVector; -/** - * A binder for 32-bit time types. - */ +/** A binder for 32-bit time types. */ public class Time32Binder extends BaseColumnBinder { private static final long TYPE_WIDTH = 4; @@ -43,11 +39,11 @@ public Time32Binder(TimeMilliVector vector) { } public Time32Binder(TimeSecVector vector, int jdbcType) { - this(vector, /*factor*/1_000, jdbcType); + this(vector, /*factor*/ 1_000, jdbcType); } public Time32Binder(TimeMilliVector vector, int jdbcType) { - this(vector, /*factor*/1, jdbcType); + this(vector, /*factor*/ 1, jdbcType); } Time32Binder(BaseFixedWidthVector vector, long factor, int jdbcType) { @@ -56,7 +52,8 @@ public Time32Binder(TimeMilliVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { // TODO: multiply with overflow // TODO: take in a Calendar as well? final Time value = new Time(vector.getDataBuffer().getInt(rowIndex * TYPE_WIDTH) * factor); diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time64Binder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time64Binder.java index 8d62ae0eb36df..01c85fb32f1b5 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time64Binder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/Time64Binder.java @@ -14,21 +14,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Time; import java.sql.Types; - import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.TimeMicroVector; import org.apache.arrow.vector.TimeNanoVector; -/** - * A binder for 64-bit time types. - */ +/** A binder for 64-bit time types. */ public class Time64Binder extends BaseColumnBinder { private static final long TYPE_WIDTH = 8; @@ -43,11 +39,11 @@ public Time64Binder(TimeNanoVector vector) { } public Time64Binder(TimeMicroVector vector, int jdbcType) { - this(vector, /*factor*/1_000, jdbcType); + this(vector, /*factor*/ 1_000, jdbcType); } public Time64Binder(TimeNanoVector vector, int jdbcType) { - this(vector, /*factor*/1_000_000, jdbcType); + this(vector, /*factor*/ 1_000_000, jdbcType); } Time64Binder(BaseFixedWidthVector vector, long factor, int jdbcType) { @@ -56,7 +52,8 @@ public Time64Binder(TimeNanoVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { // TODO: option to throw on truncation (vendor Guava IntMath#multiply) final Time value = new Time(vector.getDataBuffer().getLong(rowIndex * TYPE_WIDTH) / factor); statement.setTime(parameterIndex, value); diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TimeStampBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TimeStampBinder.java index 6677e5909901a..942d7ae58dcd5 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TimeStampBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TimeStampBinder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; @@ -22,7 +21,6 @@ import java.sql.Timestamp; import java.sql.Types; import java.util.Calendar; - import org.apache.arrow.vector.TimeStampVector; import org.apache.arrow.vector.types.pojo.ArrowType; @@ -32,15 +30,17 @@ public class TimeStampBinder extends BaseColumnBinder { private final long unitsPerSecond; private final long nanosPerUnit; - /** - * Create a binder for a timestamp vector using the default JDBC type code. - */ + /** Create a binder for a timestamp vector using the default JDBC type code. */ public TimeStampBinder(TimeStampVector vector, Calendar calendar) { - this(vector, calendar, isZoned(vector.getField().getType()) ? Types.TIMESTAMP_WITH_TIMEZONE : Types.TIMESTAMP); + this( + vector, + calendar, + isZoned(vector.getField().getType()) ? Types.TIMESTAMP_WITH_TIMEZONE : Types.TIMESTAMP); } /** * Create a binder for a timestamp vector. + * * @param vector The vector to pull values from. * @param calendar Optionally, the calendar to pass to JDBC. * @param jdbcType The JDBC type code to use for null values. @@ -73,19 +73,23 @@ public TimeStampBinder(TimeStampVector vector, Calendar calendar, int jdbcType) } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { // TODO: option to throw on truncation (vendor Guava IntMath#multiply) or overflow - final long rawValue = vector.getDataBuffer().getLong((long) rowIndex * TimeStampVector.TYPE_WIDTH); + final long rawValue = + vector.getDataBuffer().getLong((long) rowIndex * TimeStampVector.TYPE_WIDTH); final long seconds = rawValue / unitsPerSecond; final int nanos = (int) ((rawValue - (seconds * unitsPerSecond)) * nanosPerUnit); final Timestamp value = new Timestamp(seconds * 1_000); value.setNanos(nanos); if (calendar != null) { - // Timestamp == Date == UTC timestamp (confusingly). Arrow's timestamp with timezone is a UTC value with a + // Timestamp == Date == UTC timestamp (confusingly). Arrow's timestamp with timezone is a UTC + // value with a // zone offset, so we don't need to do any conversion. statement.setTimestamp(parameterIndex, value, calendar); } else { - // Arrow timestamp without timezone isn't strictly convertible to any timezone. So this is technically wrong, + // Arrow timestamp without timezone isn't strictly convertible to any timezone. So this is + // technically wrong, // but there is no 'correct' interpretation here. The application should provide a calendar. statement.setTimestamp(parameterIndex, value); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TinyIntBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TinyIntBinder.java index f51d139be863a..0580456d37983 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TinyIntBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/TinyIntBinder.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.vector.TinyIntVector; /** A column binder for 8-bit integers. */ @@ -34,7 +32,8 @@ public TinyIntBinder(TinyIntVector vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { final byte value = vector.getDataBuffer().getByte((long) rowIndex * TinyIntVector.TYPE_WIDTH); statement.setByte(parameterIndex, value); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarBinaryBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarBinaryBinder.java index a94cff6a00496..41807efc611b1 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarBinaryBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarBinaryBinder.java @@ -14,12 +14,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.sql.PreparedStatement; import java.sql.SQLException; - import org.apache.arrow.memory.util.ArrowBufPointer; import org.apache.arrow.vector.ElementAddressableVector; import org.apache.arrow.vector.FieldVector; @@ -29,13 +27,14 @@ * * @param The binary vector. */ -public class VarBinaryBinder extends BaseColumnBinder { +public class VarBinaryBinder + extends BaseColumnBinder { private final ArrowBufPointer element; /** * Create a binder for the given vector using the given JDBC type for null values. * - * @param vector The vector to draw values from. + * @param vector The vector to draw values from. * @param jdbcType The JDBC type code. */ public VarBinaryBinder(T vector, int jdbcType) { @@ -44,15 +43,18 @@ public VarBinaryBinder(T vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { vector.getDataPointer(rowIndex, element); if (element.getBuf() == null) { statement.setNull(parameterIndex, jdbcType); return; } if (element.getLength() > (long) Integer.MAX_VALUE) { - final String message = String.format("Length of value at index %d (%d) exceeds Integer.MAX_VALUE", - rowIndex, element.getLength()); + final String message = + String.format( + "Length of value at index %d (%d) exceeds Integer.MAX_VALUE", + rowIndex, element.getLength()); throw new RuntimeException(message); } byte[] binaryData = new byte[(int) element.getLength()]; diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarCharBinder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarCharBinder.java index 73bd55981490b..926e1da28c9a0 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarCharBinder.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/VarCharBinder.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.binder; import java.nio.charset.StandardCharsets; import java.sql.PreparedStatement; import java.sql.SQLException; - import org.apache.arrow.memory.util.ArrowBufPointer; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.VariableWidthVector; @@ -30,13 +28,14 @@ * * @param The text vector. */ -public class VarCharBinder extends BaseColumnBinder { +public class VarCharBinder + extends BaseColumnBinder { private final ArrowBufPointer element; /** * Create a binder for the given vector using the given JDBC type for null values. * - * @param vector The vector to draw values from. + * @param vector The vector to draw values from. * @param jdbcType The JDBC type code. */ public VarCharBinder(T vector, int jdbcType) { @@ -45,15 +44,18 @@ public VarCharBinder(T vector, int jdbcType) { } @Override - public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { + public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) + throws SQLException { vector.getDataPointer(rowIndex, element); if (element.getBuf() == null) { statement.setNull(parameterIndex, jdbcType); return; } if (element.getLength() > (long) Integer.MAX_VALUE) { - final String message = String.format("Length of value at index %d (%d) exceeds Integer.MAX_VALUE", - rowIndex, element.getLength()); + final String message = + String.format( + "Length of value at index %d (%d) exceeds Integer.MAX_VALUE", + rowIndex, element.getLength()); throw new RuntimeException(message); } byte[] utf8Bytes = new byte[(int) element.getLength()]; diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/package-info.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/package-info.java index 4f8936e0c27bf..945c3c9f84fa8 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/package-info.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/package-info.java @@ -15,8 +15,5 @@ * limitations under the License. */ -/** - * Utilities to bind Arrow data as JDBC prepared statement parameters. - */ - +/** Utilities to bind Arrow data as JDBC prepared statement parameters. */ package org.apache.arrow.adapter.jdbc.binder; diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ArrayConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ArrayConsumer.java index 2f18b8a416d34..4676e8204eed4 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ArrayConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ArrayConsumer.java @@ -14,29 +14,25 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.io.IOException; import java.sql.Array; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.complex.ListVector; /** - * Consumer which consume array type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.complex.ListVector}. + * Consumer which consume array type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.complex.ListVector}. */ public abstract class ArrayConsumer extends BaseConsumer { - /** - * Creates a consumer for {@link ListVector}. - */ + /** Creates a consumer for {@link ListVector}. */ public static ArrayConsumer createConsumer( - ListVector vector, JdbcConsumer delegate, int index, boolean nullable) { + ListVector vector, JdbcConsumer delegate, int index, boolean nullable) { if (nullable) { return new ArrayConsumer.NullableArrayConsumer(vector, delegate, index); } else { @@ -50,9 +46,7 @@ public static ArrayConsumer createConsumer( protected int innerVectorIndex = 0; - /** - * Instantiate a ArrayConsumer. - */ + /** Instantiate a ArrayConsumer. */ public ArrayConsumer(ListVector vector, JdbcConsumer delegate, int index) { super(vector, index); this.delegate = delegate; @@ -81,14 +75,10 @@ void ensureInnerVectorCapacity(int targetCapacity) { } } - /** - * Nullable consumer for {@link ListVector}. - */ + /** Nullable consumer for {@link ListVector}. */ static class NullableArrayConsumer extends ArrayConsumer { - /** - * Instantiate a nullable array consumer. - */ + /** Instantiate a nullable array consumer. */ public NullableArrayConsumer(ListVector vector, JdbcConsumer delegate, int index) { super(vector, delegate, index); } @@ -113,14 +103,10 @@ public void consume(ResultSet resultSet) throws SQLException, IOException { } } - /** - * Non-nullable consumer for {@link ListVector}. - */ + /** Non-nullable consumer for {@link ListVector}. */ static class NonNullableArrayConsumer extends ArrayConsumer { - /** - * Instantiate a nullable array consumer. - */ + /** Instantiate a nullable array consumer. */ public NonNullableArrayConsumer(ListVector vector, JdbcConsumer delegate, int index) { super(vector, delegate, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BaseConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BaseConsumer.java index 2db128d3e2b2d..9ca3c98a7eb98 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BaseConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BaseConsumer.java @@ -14,13 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import org.apache.arrow.vector.ValueVector; /** * Base class for all consumers. + * * @param vector type. */ public abstract class BaseConsumer implements JdbcConsumer { @@ -33,6 +33,7 @@ public abstract class BaseConsumer implements JdbcConsume /** * Constructs a new consumer. + * * @param vector the underlying vector for the consumer. * @param index the column id for the consumer. */ diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BigIntConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BigIntConsumer.java index 19c8efa91719f..b7c547a9391b6 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BigIntConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BigIntConsumer.java @@ -14,24 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.BigIntVector; /** - * Consumer which consume bigint type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.BigIntVector}. + * Consumer which consume bigint type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.BigIntVector}. */ public class BigIntConsumer { - /** - * Creates a consumer for {@link BigIntVector}. - */ - public static JdbcConsumer createConsumer(BigIntVector vector, int index, boolean nullable) { + /** Creates a consumer for {@link BigIntVector}. */ + public static JdbcConsumer createConsumer( + BigIntVector vector, int index, boolean nullable) { if (nullable) { return new NullableBigIntConsumer(vector, index); } else { @@ -39,14 +36,10 @@ public static JdbcConsumer createConsumer(BigIntVector vector, int } } - /** - * Nullable consumer for big int. - */ + /** Nullable consumer for big int. */ static class NullableBigIntConsumer extends BaseConsumer { - /** - * Instantiate a BigIntConsumer. - */ + /** Instantiate a BigIntConsumer. */ public NullableBigIntConsumer(BigIntVector vector, int index) { super(vector, index); } @@ -63,14 +56,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for big int. - */ + /** Non-nullable consumer for big int. */ static class NonNullableBigIntConsumer extends BaseConsumer { - /** - * Instantiate a BigIntConsumer. - */ + /** Instantiate a BigIntConsumer. */ public NonNullableBigIntConsumer(BigIntVector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumer.java index 538d161f9e9c7..edbc6360df6bf 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumer.java @@ -14,27 +14,23 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.io.IOException; import java.io.InputStream; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.vector.BitVectorHelper; import org.apache.arrow.vector.VarBinaryVector; /** - * Consumer which consume binary type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.VarBinaryVector}. + * Consumer which consume binary type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.VarBinaryVector}. */ public abstract class BinaryConsumer extends BaseConsumer { - /** - * Creates a consumer for {@link VarBinaryVector}. - */ + /** Creates a consumer for {@link VarBinaryVector}. */ public static BinaryConsumer createConsumer(VarBinaryVector vector, int index, boolean nullable) { if (nullable) { return new NullableBinaryConsumer(vector, index); @@ -45,9 +41,7 @@ public static BinaryConsumer createConsumer(VarBinaryVector vector, int index, b private final byte[] reuseBytes = new byte[1024]; - /** - * Instantiate a BinaryConsumer. - */ + /** Instantiate a BinaryConsumer. */ public BinaryConsumer(VarBinaryVector vector, int index) { super(vector, index); if (vector != null) { @@ -55,9 +49,7 @@ public BinaryConsumer(VarBinaryVector vector, int index) { } } - /** - * consume a InputStream. - */ + /** consume a InputStream. */ public void consume(InputStream is) throws IOException { if (is != null) { while (currentIndex >= vector.getValueCapacity()) { @@ -74,7 +66,8 @@ public void consume(InputStream is) throws IOException { vector.getDataBuffer().setBytes(startOffset + dataLength, reuseBytes, 0, read); dataLength += read; } - offsetBuffer.setInt((currentIndex + 1) * ((long) VarBinaryVector.OFFSET_WIDTH), startOffset + dataLength); + offsetBuffer.setInt( + (currentIndex + 1) * ((long) VarBinaryVector.OFFSET_WIDTH), startOffset + dataLength); BitVectorHelper.setBit(vector.getValidityBuffer(), currentIndex); vector.setLastSet(currentIndex); } @@ -91,14 +84,10 @@ public void resetValueVector(VarBinaryVector vector) { this.currentIndex = 0; } - /** - * Consumer for nullable binary data. - */ + /** Consumer for nullable binary data. */ static class NullableBinaryConsumer extends BinaryConsumer { - - /** - * Instantiate a BinaryConsumer. - */ + + /** Instantiate a BinaryConsumer. */ public NullableBinaryConsumer(VarBinaryVector vector, int index) { super(vector, index); } @@ -113,14 +102,10 @@ public void consume(ResultSet resultSet) throws SQLException, IOException { } } - /** - * Consumer for non-nullable binary data. - */ + /** Consumer for non-nullable binary data. */ static class NonNullableBinaryConsumer extends BinaryConsumer { - /** - * Instantiate a BinaryConsumer. - */ + /** Instantiate a BinaryConsumer. */ public NonNullableBinaryConsumer(VarBinaryVector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BitConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BitConsumer.java index d2d94d0a40e2f..287b9509b5054 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BitConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BitConsumer.java @@ -14,24 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.BitVector; /** - * Consumer which consume bit type values from {@link ResultSet}. - * Write the data to {@link BitVector}. + * Consumer which consume bit type values from {@link ResultSet}. Write the data to {@link + * BitVector}. */ public class BitConsumer { - /** - * Creates a consumer for {@link BitVector}. - */ - public static JdbcConsumer createConsumer(BitVector vector, int index, boolean nullable) { + /** Creates a consumer for {@link BitVector}. */ + public static JdbcConsumer createConsumer( + BitVector vector, int index, boolean nullable) { if (nullable) { return new NullableBitConsumer(vector, index); } else { @@ -39,14 +36,10 @@ public static JdbcConsumer createConsumer(BitVector vector, int index } } - /** - * Nullable consumer for {@link BitVector}. - */ + /** Nullable consumer for {@link BitVector}. */ static class NullableBitConsumer extends BaseConsumer { - /** - * Instantiate a BitConsumer. - */ + /** Instantiate a BitConsumer. */ public NullableBitConsumer(BitVector vector, int index) { super(vector, index); } @@ -63,14 +56,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for {@link BitVector}. - */ + /** Non-nullable consumer for {@link BitVector}. */ static class NonNullableBitConsumer extends BaseConsumer { - /** - * Instantiate a BitConsumer. - */ + /** Instantiate a BitConsumer. */ public NonNullableBitConsumer(BitVector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BlobConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BlobConsumer.java index e57ecdf91707a..a4fc789494e0f 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BlobConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/BlobConsumer.java @@ -14,19 +14,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.io.IOException; import java.sql.Blob; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.VarBinaryVector; /** - * Consumer which consume blob type values from {@link ResultSet}. - * Write the data to {@link VarBinaryVector}. + * Consumer which consume blob type values from {@link ResultSet}. Write the data to {@link + * VarBinaryVector}. */ public class BlobConsumer extends BaseConsumer { @@ -34,17 +32,12 @@ public class BlobConsumer extends BaseConsumer { private final boolean nullable; - /** - * Creates a consumer for {@link VarBinaryVector}. - */ - public static BlobConsumer createConsumer( - BinaryConsumer delegate, int index, boolean nullable) { + /** Creates a consumer for {@link VarBinaryVector}. */ + public static BlobConsumer createConsumer(BinaryConsumer delegate, int index, boolean nullable) { return new BlobConsumer(delegate, index, nullable); } - /** - * Instantiate a BlobConsumer. - */ + /** Instantiate a BlobConsumer. */ public BlobConsumer(BinaryConsumer delegate, int index, boolean nullable) { super(null, index); this.delegate = delegate; diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java index 3ed0c2d3cbb2f..7deba1cbffebd 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/ClobConsumer.java @@ -14,28 +14,24 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.nio.charset.StandardCharsets; import java.sql.Clob; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.util.MemoryUtil; import org.apache.arrow.vector.BitVectorHelper; import org.apache.arrow.vector.VarCharVector; /** - * Consumer which consume clob type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.VarCharVector}. + * Consumer which consume clob type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.VarCharVector}. */ public abstract class ClobConsumer extends BaseConsumer { - /** - * Creates a consumer for {@link VarCharVector}. - */ + /** Creates a consumer for {@link VarCharVector}. */ public static ClobConsumer createConsumer(VarCharVector vector, int index, boolean nullable) { if (nullable) { return new NullableClobConsumer(vector, index); @@ -46,9 +42,7 @@ public static ClobConsumer createConsumer(VarCharVector vector, int index, boole private static final int BUFFER_SIZE = 256; - /** - * Instantiate a ClobConsumer. - */ + /** Instantiate a ClobConsumer. */ public ClobConsumer(VarCharVector vector, int index) { super(vector, index); if (vector != null) { @@ -63,14 +57,10 @@ public void resetValueVector(VarCharVector vector) { this.currentIndex = 0; } - /** - * Nullable consumer for clob data. - */ + /** Nullable consumer for clob data. */ static class NullableClobConsumer extends ClobConsumer { - - /** - * Instantiate a ClobConsumer. - */ + + /** Instantiate a ClobConsumer. */ public NullableClobConsumer(VarCharVector vector, int index) { super(vector, index); } @@ -97,11 +87,11 @@ public void consume(ResultSet resultSet) throws SQLException { vector.reallocDataBuffer(); } MemoryUtil.UNSAFE.copyMemory( - bytes, - MemoryUtil.BYTE_ARRAY_BASE_OFFSET, - null, - dataBuffer.memoryAddress() + startIndex + totalBytes, - bytes.length); + bytes, + MemoryUtil.BYTE_ARRAY_BASE_OFFSET, + null, + dataBuffer.memoryAddress() + startIndex + totalBytes, + bytes.length); totalBytes += bytes.length; read += readSize; @@ -115,14 +105,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for clob data. - */ + /** Non-nullable consumer for clob data. */ static class NonNullableClobConsumer extends ClobConsumer { - /** - * Instantiate a ClobConsumer. - */ + /** Instantiate a ClobConsumer. */ public NonNullableClobConsumer(VarCharVector vector, int index) { super(vector, index); } @@ -148,11 +134,11 @@ public void consume(ResultSet resultSet) throws SQLException { vector.reallocDataBuffer(); } MemoryUtil.UNSAFE.copyMemory( - bytes, - MemoryUtil.BYTE_ARRAY_BASE_OFFSET, - null, - dataBuffer.memoryAddress() + startIndex + totalBytes, - bytes.length); + bytes, + MemoryUtil.BYTE_ARRAY_BASE_OFFSET, + null, + dataBuffer.memoryAddress() + startIndex + totalBytes, + bytes.length); totalBytes += bytes.length; read += readSize; @@ -161,7 +147,7 @@ public void consume(ResultSet resultSet) throws SQLException { BitVectorHelper.setBit(vector.getValidityBuffer(), currentIndex); vector.setLastSet(currentIndex); } - + currentIndex++; } } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/CompositeJdbcConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/CompositeJdbcConsumer.java index e6d780956d538..2366116fd0d18 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/CompositeJdbcConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/CompositeJdbcConsumer.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.io.IOException; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.adapter.jdbc.JdbcFieldInfo; import org.apache.arrow.adapter.jdbc.consumer.exceptions.JdbcConsumerException; import org.apache.arrow.util.AutoCloseables; @@ -28,17 +26,12 @@ import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.types.pojo.ArrowType; -/** - * Composite consumer which hold all consumers. - * It manages the consume and cleanup process. - */ +/** Composite consumer which hold all consumers. It manages the consume and cleanup process. */ public class CompositeJdbcConsumer implements JdbcConsumer { private final JdbcConsumer[] consumers; - /** - * Construct an instance. - */ + /** Construct an instance. */ public CompositeJdbcConsumer(JdbcConsumer[] consumers) { this.consumers = consumers; } @@ -51,9 +44,11 @@ public void consume(ResultSet rs) throws SQLException, IOException { } catch (Exception e) { if (consumers[i] instanceof BaseConsumer) { BaseConsumer consumer = (BaseConsumer) consumers[i]; - JdbcFieldInfo fieldInfo = new JdbcFieldInfo(rs.getMetaData(), consumer.columnIndexInResultSet); + JdbcFieldInfo fieldInfo = + new JdbcFieldInfo(rs.getMetaData(), consumer.columnIndexInResultSet); ArrowType arrowType = consumer.vector.getMinorType().getType(); - throw new JdbcConsumerException("Exception while consuming JDBC value", e, fieldInfo, arrowType); + throw new JdbcConsumerException( + "Exception while consuming JDBC value", e, fieldInfo, arrowType); } else { throw e; } @@ -70,17 +65,12 @@ public void close() { } catch (Exception e) { throw new RuntimeException("Error occurred while releasing resources.", e); } - } @Override - public void resetValueVector(ValueVector vector) { + public void resetValueVector(ValueVector vector) {} - } - - /** - * Reset inner consumers through vectors in the vector schema root. - */ + /** Reset inner consumers through vectors in the vector schema root. */ public void resetVectorSchemaRoot(VectorSchemaRoot root) { assert root.getFieldVectors().size() == consumers.length; for (int i = 0; i < consumers.length; i++) { @@ -88,4 +78,3 @@ public void resetVectorSchemaRoot(VectorSchemaRoot root) { } } } - diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DateConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DateConsumer.java index b9b83daccc25a..c271b900682a1 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DateConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DateConsumer.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.Date; @@ -22,19 +21,16 @@ import java.sql.SQLException; import java.util.Calendar; import java.util.concurrent.TimeUnit; - import org.apache.arrow.vector.DateDayVector; import org.apache.arrow.vector.DateMilliVector; /** - * Consumer which consume date type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.DateDayVector}. + * Consumer which consume date type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.DateDayVector}. */ public class DateConsumer { - /** - * Creates a consumer for {@link DateMilliVector}. - */ + /** Creates a consumer for {@link DateMilliVector}. */ public static JdbcConsumer createConsumer( DateDayVector vector, int index, boolean nullable, Calendar calendar) { if (nullable) { @@ -44,23 +40,17 @@ public static JdbcConsumer createConsumer( } } - /** - * Nullable consumer for date. - */ + /** Nullable consumer for date. */ static class NullableDateConsumer extends BaseConsumer { protected final Calendar calendar; - /** - * Instantiate a DateConsumer. - */ + /** Instantiate a DateConsumer. */ public NullableDateConsumer(DateDayVector vector, int index) { - this(vector, index, /* calendar */null); + this(vector, index, /* calendar */ null); } - /** - * Instantiate a DateConsumer. - */ + /** Instantiate a DateConsumer. */ public NullableDateConsumer(DateDayVector vector, int index, Calendar calendar) { super(vector, index); this.calendar = calendar; @@ -68,8 +58,10 @@ public NullableDateConsumer(DateDayVector vector, int index, Calendar calendar) @Override public void consume(ResultSet resultSet) throws SQLException { - Date date = calendar == null ? resultSet.getDate(columnIndexInResultSet) : - resultSet.getDate(columnIndexInResultSet, calendar); + Date date = + calendar == null + ? resultSet.getDate(columnIndexInResultSet) + : resultSet.getDate(columnIndexInResultSet, calendar); if (!resultSet.wasNull()) { // for fixed width vectors, we have allocated enough memory proactively, // so there is no need to call the setSafe method here. @@ -79,23 +71,17 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for date. - */ + /** Non-nullable consumer for date. */ static class NonNullableDateConsumer extends BaseConsumer { protected final Calendar calendar; - /** - * Instantiate a DateConsumer. - */ + /** Instantiate a DateConsumer. */ public NonNullableDateConsumer(DateDayVector vector, int index) { - this(vector, index, /* calendar */null); + this(vector, index, /* calendar */ null); } - /** - * Instantiate a DateConsumer. - */ + /** Instantiate a DateConsumer. */ public NonNullableDateConsumer(DateDayVector vector, int index, Calendar calendar) { super(vector, index); this.calendar = calendar; @@ -103,8 +89,10 @@ public NonNullableDateConsumer(DateDayVector vector, int index, Calendar calenda @Override public void consume(ResultSet resultSet) throws SQLException { - Date date = calendar == null ? resultSet.getDate(columnIndexInResultSet) : - resultSet.getDate(columnIndexInResultSet, calendar); + Date date = + calendar == null + ? resultSet.getDate(columnIndexInResultSet) + : resultSet.getDate(columnIndexInResultSet, calendar); // for fixed width vectors, we have allocated enough memory proactively, // so there is no need to call the setSafe method here. vector.set(currentIndex, Math.toIntExact(TimeUnit.MILLISECONDS.toDays(date.getTime()))); @@ -112,5 +100,3 @@ public void consume(ResultSet resultSet) throws SQLException { } } } - - diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/Decimal256Consumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/Decimal256Consumer.java index ad00d9b5a2492..eb33ea5038b98 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/Decimal256Consumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/Decimal256Consumer.java @@ -14,19 +14,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.math.BigDecimal; import java.math.RoundingMode; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.Decimal256Vector; /** - * Consumer which consume decimal type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.Decimal256Vector}. + * Consumer which consume decimal type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.Decimal256Vector}. */ public abstract class Decimal256Consumer extends BaseConsumer { private final RoundingMode bigDecimalRoundingMode; @@ -36,7 +34,7 @@ public abstract class Decimal256Consumer extends BaseConsumer * Constructs a new consumer. * * @param vector the underlying vector for the consumer. - * @param index the column id for the consumer. + * @param index the column id for the consumer. */ public Decimal256Consumer(Decimal256Vector vector, int index) { this(vector, index, null); @@ -44,27 +42,23 @@ public Decimal256Consumer(Decimal256Vector vector, int index) { /** * Constructs a new consumer, with optional coercibility. + * * @param vector the underlying vector for the consumer. * @param index the column index for the consumer. - * @param bigDecimalRoundingMode java.math.RoundingMode to be applied if the BigDecimal scale does not match that - * of the target vector. Set to null to retain strict matching behavior (scale of - * source and target vector must match exactly). + * @param bigDecimalRoundingMode java.math.RoundingMode to be applied if the BigDecimal scale does + * not match that of the target vector. Set to null to retain strict matching behavior (scale + * of source and target vector must match exactly). */ - public Decimal256Consumer(Decimal256Vector vector, int index, RoundingMode bigDecimalRoundingMode) { + public Decimal256Consumer( + Decimal256Vector vector, int index, RoundingMode bigDecimalRoundingMode) { super(vector, index); this.bigDecimalRoundingMode = bigDecimalRoundingMode; this.scale = vector.getScale(); } - /** - * Creates a consumer for {@link Decimal256Vector}. - */ + /** Creates a consumer for {@link Decimal256Vector}. */ public static JdbcConsumer createConsumer( - Decimal256Vector vector, - int index, - boolean nullable, - RoundingMode bigDecimalRoundingMode - ) { + Decimal256Vector vector, int index, boolean nullable, RoundingMode bigDecimalRoundingMode) { if (nullable) { return new NullableDecimal256Consumer(vector, index, bigDecimalRoundingMode); } else { @@ -79,16 +73,12 @@ protected void set(BigDecimal value) { vector.set(currentIndex, value); } - - /** - * Consumer for nullable decimal. - */ + /** Consumer for nullable decimal. */ static class NullableDecimal256Consumer extends Decimal256Consumer { - /** - * Instantiate a Decimal256Consumer. - */ - public NullableDecimal256Consumer(Decimal256Vector vector, int index, RoundingMode bigDecimalRoundingMode) { + /** Instantiate a Decimal256Consumer. */ + public NullableDecimal256Consumer( + Decimal256Vector vector, int index, RoundingMode bigDecimalRoundingMode) { super(vector, index, bigDecimalRoundingMode); } @@ -104,15 +94,12 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Consumer for non-nullable decimal. - */ + /** Consumer for non-nullable decimal. */ static class NonNullableDecimal256Consumer extends Decimal256Consumer { - /** - * Instantiate a Decimal256Consumer. - */ - public NonNullableDecimal256Consumer(Decimal256Vector vector, int index, RoundingMode bigDecimalRoundingMode) { + /** Instantiate a Decimal256Consumer. */ + public NonNullableDecimal256Consumer( + Decimal256Vector vector, int index, RoundingMode bigDecimalRoundingMode) { super(vector, index, bigDecimalRoundingMode); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DecimalConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DecimalConsumer.java index bed96dda8b65d..05b4d27de1022 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DecimalConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DecimalConsumer.java @@ -14,19 +14,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.math.BigDecimal; import java.math.RoundingMode; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.DecimalVector; /** - * Consumer which consume decimal type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.DecimalVector}. + * Consumer which consume decimal type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.DecimalVector}. */ public abstract class DecimalConsumer extends BaseConsumer { private final RoundingMode bigDecimalRoundingMode; @@ -36,7 +34,7 @@ public abstract class DecimalConsumer extends BaseConsumer { * Constructs a new consumer. * * @param vector the underlying vector for the consumer. - * @param index the column id for the consumer. + * @param index the column id for the consumer. */ public DecimalConsumer(DecimalVector vector, int index) { this(vector, index, null); @@ -44,11 +42,12 @@ public DecimalConsumer(DecimalVector vector, int index) { /** * Constructs a new consumer, with optional coercibility. + * * @param vector the underlying vector for the consumer. * @param index the column index for the consumer. - * @param bigDecimalRoundingMode java.math.RoundingMode to be applied if the BigDecimal scale does not match that - * of the target vector. Set to null to retain strict matching behavior (scale of - * source and target vector must match exactly). + * @param bigDecimalRoundingMode java.math.RoundingMode to be applied if the BigDecimal scale does + * not match that of the target vector. Set to null to retain strict matching behavior (scale + * of source and target vector must match exactly). */ public DecimalConsumer(DecimalVector vector, int index, RoundingMode bigDecimalRoundingMode) { super(vector, index); @@ -56,15 +55,9 @@ public DecimalConsumer(DecimalVector vector, int index, RoundingMode bigDecimalR this.scale = vector.getScale(); } - /** - * Creates a consumer for {@link DecimalVector}. - */ + /** Creates a consumer for {@link DecimalVector}. */ public static JdbcConsumer createConsumer( - DecimalVector vector, - int index, - boolean nullable, - RoundingMode bigDecimalRoundingMode - ) { + DecimalVector vector, int index, boolean nullable, RoundingMode bigDecimalRoundingMode) { if (nullable) { return new NullableDecimalConsumer(vector, index, bigDecimalRoundingMode); } else { @@ -79,16 +72,12 @@ protected void set(BigDecimal value) { vector.set(currentIndex, value); } - - /** - * Consumer for nullable decimal. - */ + /** Consumer for nullable decimal. */ static class NullableDecimalConsumer extends DecimalConsumer { - /** - * Instantiate a DecimalConsumer. - */ - public NullableDecimalConsumer(DecimalVector vector, int index, RoundingMode bigDecimalRoundingMode) { + /** Instantiate a DecimalConsumer. */ + public NullableDecimalConsumer( + DecimalVector vector, int index, RoundingMode bigDecimalRoundingMode) { super(vector, index, bigDecimalRoundingMode); } @@ -104,15 +93,12 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Consumer for non-nullable decimal. - */ + /** Consumer for non-nullable decimal. */ static class NonNullableDecimalConsumer extends DecimalConsumer { - /** - * Instantiate a DecimalConsumer. - */ - public NonNullableDecimalConsumer(DecimalVector vector, int index, RoundingMode bigDecimalRoundingMode) { + /** Instantiate a DecimalConsumer. */ + public NonNullableDecimalConsumer( + DecimalVector vector, int index, RoundingMode bigDecimalRoundingMode) { super(vector, index, bigDecimalRoundingMode); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DoubleConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DoubleConsumer.java index e3db95d1535af..9cd31e9245472 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DoubleConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/DoubleConsumer.java @@ -14,24 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.Float8Vector; /** - * Consumer which consume double type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.Float8Vector}. + * Consumer which consume double type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.Float8Vector}. */ public class DoubleConsumer { - /** - * Creates a consumer for {@link Float8Vector}. - */ - public static JdbcConsumer createConsumer(Float8Vector vector, int index, boolean nullable) { + /** Creates a consumer for {@link Float8Vector}. */ + public static JdbcConsumer createConsumer( + Float8Vector vector, int index, boolean nullable) { if (nullable) { return new NullableDoubleConsumer(vector, index); } else { @@ -39,14 +36,10 @@ public static JdbcConsumer createConsumer(Float8Vector vector, int } } - /** - * Nullable double consumer. - */ + /** Nullable double consumer. */ static class NullableDoubleConsumer extends BaseConsumer { - /** - * Instantiate a DoubleConsumer. - */ + /** Instantiate a DoubleConsumer. */ public NullableDoubleConsumer(Float8Vector vector, int index) { super(vector, index); } @@ -63,14 +56,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable double consumer. - */ + /** Non-nullable double consumer. */ static class NonNullableDoubleConsumer extends BaseConsumer { - /** - * Instantiate a DoubleConsumer. - */ + /** Instantiate a DoubleConsumer. */ public NonNullableDoubleConsumer(Float8Vector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/FloatConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/FloatConsumer.java index 830348fe94c6b..0f16a68da883e 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/FloatConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/FloatConsumer.java @@ -14,24 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.Float4Vector; /** - * Consumer which consume float type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.Float4Vector}. + * Consumer which consume float type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.Float4Vector}. */ public class FloatConsumer { - /** - * Creates a consumer for {@link Float4Vector}. - */ - public static JdbcConsumer createConsumer(Float4Vector vector, int index, boolean nullable) { + /** Creates a consumer for {@link Float4Vector}. */ + public static JdbcConsumer createConsumer( + Float4Vector vector, int index, boolean nullable) { if (nullable) { return new NullableFloatConsumer(vector, index); } else { @@ -39,14 +36,10 @@ public static JdbcConsumer createConsumer(Float4Vector vector, int } } - /** - * Nullable float consumer. - */ + /** Nullable float consumer. */ static class NullableFloatConsumer extends BaseConsumer { - /** - * Instantiate a FloatConsumer. - */ + /** Instantiate a FloatConsumer. */ public NullableFloatConsumer(Float4Vector vector, int index) { super(vector, index); } @@ -63,14 +56,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable float consumer. - */ + /** Non-nullable float consumer. */ static class NonNullableFloatConsumer extends BaseConsumer { - /** - * Instantiate a FloatConsumer. - */ + /** Instantiate a FloatConsumer. */ public NonNullableFloatConsumer(Float4Vector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/IntConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/IntConsumer.java index 4e537d682ff7c..302be697fbf07 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/IntConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/IntConsumer.java @@ -14,24 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.IntVector; /** - * Consumer which consume int type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.IntVector}. + * Consumer which consume int type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.IntVector}. */ public class IntConsumer { - /** - * Creates a consumer for {@link IntVector}. - */ - public static JdbcConsumer createConsumer(IntVector vector, int index, boolean nullable) { + /** Creates a consumer for {@link IntVector}. */ + public static JdbcConsumer createConsumer( + IntVector vector, int index, boolean nullable) { if (nullable) { return new NullableIntConsumer(vector, index); } else { @@ -39,14 +36,10 @@ public static JdbcConsumer createConsumer(IntVector vector, int index } } - /** - * Nullable consumer for int. - */ + /** Nullable consumer for int. */ static class NullableIntConsumer extends BaseConsumer { - /** - * Instantiate a IntConsumer. - */ + /** Instantiate a IntConsumer. */ public NullableIntConsumer(IntVector vector, int index) { super(vector, index); } @@ -63,14 +56,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for int. - */ + /** Non-nullable consumer for int. */ static class NonNullableIntConsumer extends BaseConsumer { - /** - * Instantiate a IntConsumer. - */ + /** Instantiate a IntConsumer. */ public NonNullableIntConsumer(IntVector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/JdbcConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/JdbcConsumer.java index 7c867c7ad64d3..1ec6ad7eb9266 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/JdbcConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/JdbcConsumer.java @@ -14,34 +14,27 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.io.IOException; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.ValueVector; /** * An abstraction that is used to consume values from {@link ResultSet}. + * * @param The vector within consumer or its delegate, used for partially consume purpose. */ public interface JdbcConsumer extends AutoCloseable { - /** - * Consume a specific type value from {@link ResultSet} and write it to vector. - */ + /** Consume a specific type value from {@link ResultSet} and write it to vector. */ void consume(ResultSet resultSet) throws SQLException, IOException; - /** - * Close this consumer, do some clean work such as clear reuse ArrowBuf. - */ + /** Close this consumer, do some clean work such as clear reuse ArrowBuf. */ @Override void close() throws Exception; - /** - * Reset the vector within consumer for partial read purpose. - */ + /** Reset the vector within consumer for partial read purpose. */ void resetValueVector(T vector); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/MapConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/MapConsumer.java index 07a071bfc096e..6223650ff2c04 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/MapConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/MapConsumer.java @@ -14,46 +14,39 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.sql.ResultSet; import java.sql.SQLException; import java.util.Map; - import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.impl.UnionMapWriter; import org.apache.arrow.vector.util.ObjectMapperFactory; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; - /** - * Consumer which consume map type values from {@link ResultSet}. - * Write the data into {@link org.apache.arrow.vector.complex.MapVector}. + * Consumer which consume map type values from {@link ResultSet}. Write the data into {@link + * org.apache.arrow.vector.complex.MapVector}. */ public class MapConsumer extends BaseConsumer { - private final UnionMapWriter writer; private final ObjectMapper objectMapper = ObjectMapperFactory.newObjectMapper(); - private final TypeReference> typeReference = new TypeReference>() {}; + private final TypeReference> typeReference = + new TypeReference>() {}; private int currentRow; - /** - * Creates a consumer for {@link MapVector}. - */ + /** Creates a consumer for {@link MapVector}. */ public static MapConsumer createConsumer(MapVector mapVector, int index, boolean nullable) { return new MapConsumer(mapVector, index); } - /** - * Instantiate a MapConsumer. - */ + /** Instantiate a MapConsumer. */ public MapConsumer(MapVector vector, int index) { super(vector, index); writer = vector.getWriter(); @@ -69,7 +62,8 @@ public void consume(ResultSet resultSet) throws SQLException, IOException { } else if (map instanceof Map) { writeJavaMapIntoVector((Map) map); } else { - throw new IllegalArgumentException("Unknown type of map type column from JDBC " + map.getClass().getName()); + throw new IllegalArgumentException( + "Unknown type of map type column from JDBC " + map.getClass().getName()); } } else { writer.writeNull(); @@ -79,26 +73,25 @@ public void consume(ResultSet resultSet) throws SQLException, IOException { private void writeJavaMapIntoVector(Map map) { BufferAllocator allocator = vector.getAllocator(); writer.startMap(); - map.forEach((key, value) -> { - byte[] keyBytes = key.getBytes(StandardCharsets.UTF_8); - byte[] valueBytes = value != null ? value.getBytes(StandardCharsets.UTF_8) : null; - try ( - ArrowBuf keyBuf = allocator.buffer(keyBytes.length); - ArrowBuf valueBuf = valueBytes != null ? allocator.buffer(valueBytes.length) : null; - ) { - writer.startEntry(); - keyBuf.writeBytes(keyBytes); - writer.key().varChar().writeVarChar(0, keyBytes.length, keyBuf); - if (valueBytes != null) { - valueBuf.writeBytes(valueBytes); - writer.value().varChar().writeVarChar(0, valueBytes.length, valueBuf); - } else { - writer.value().varChar().writeNull(); - } - writer.endEntry(); - } - }); + map.forEach( + (key, value) -> { + byte[] keyBytes = key.getBytes(StandardCharsets.UTF_8); + byte[] valueBytes = value != null ? value.getBytes(StandardCharsets.UTF_8) : null; + try (ArrowBuf keyBuf = allocator.buffer(keyBytes.length); + ArrowBuf valueBuf = + valueBytes != null ? allocator.buffer(valueBytes.length) : null; ) { + writer.startEntry(); + keyBuf.writeBytes(keyBytes); + writer.key().varChar().writeVarChar(0, keyBytes.length, keyBuf); + if (valueBytes != null) { + valueBuf.writeBytes(valueBytes); + writer.value().varChar().writeVarChar(0, valueBytes.length, valueBuf); + } else { + writer.value().varChar().writeNull(); + } + writer.endEntry(); + } + }); writer.endMap(); } } - diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/NullConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/NullConsumer.java index a79a029f45d06..9d7a760f697a7 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/NullConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/NullConsumer.java @@ -14,17 +14,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.NullVector; /** - * Consumer which consume null type values from ResultSet. - * Corresponding to {@link org.apache.arrow.vector.NullVector}. + * Consumer which consume null type values from ResultSet. Corresponding to {@link + * org.apache.arrow.vector.NullVector}. */ public class NullConsumer extends BaseConsumer { @@ -33,6 +31,5 @@ public NullConsumer(NullVector vector) { } @Override - public void consume(ResultSet resultSet) throws SQLException { - } + public void consume(ResultSet resultSet) throws SQLException {} } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/SmallIntConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/SmallIntConsumer.java index 2edb3605b177a..9f45c077ed0a8 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/SmallIntConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/SmallIntConsumer.java @@ -14,24 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.SmallIntVector; /** - * Consumer which consume smallInt type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.SmallIntVector}. + * Consumer which consume smallInt type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.SmallIntVector}. */ public class SmallIntConsumer { - /** - * Creates a consumer for {@link SmallIntVector}. - */ - public static BaseConsumer createConsumer(SmallIntVector vector, int index, boolean nullable) { + /** Creates a consumer for {@link SmallIntVector}. */ + public static BaseConsumer createConsumer( + SmallIntVector vector, int index, boolean nullable) { if (nullable) { return new NullableSmallIntConsumer(vector, index); } else { @@ -39,14 +36,10 @@ public static BaseConsumer createConsumer(SmallIntVector vector, } } - /** - * Nullable consumer for small int. - */ + /** Nullable consumer for small int. */ static class NullableSmallIntConsumer extends BaseConsumer { - /** - * Instantiate a SmallIntConsumer. - */ + /** Instantiate a SmallIntConsumer. */ public NullableSmallIntConsumer(SmallIntVector vector, int index) { super(vector, index); } @@ -63,14 +56,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for small int. - */ + /** Non-nullable consumer for small int. */ static class NonNullableSmallIntConsumer extends BaseConsumer { - /** - * Instantiate a SmallIntConsumer. - */ + /** Instantiate a SmallIntConsumer. */ public NonNullableSmallIntConsumer(SmallIntVector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimeConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimeConsumer.java index 4fa15ad79039e..bee19d0e4deab 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimeConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimeConsumer.java @@ -14,27 +14,23 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Time; import java.util.Calendar; - import org.apache.arrow.vector.TimeMilliVector; /** - * Consumer which consume time type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.TimeMilliVector}. + * Consumer which consume time type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.TimeMilliVector}. */ public abstract class TimeConsumer { - /** - * Creates a consumer for {@link TimeMilliVector}. - */ + /** Creates a consumer for {@link TimeMilliVector}. */ public static JdbcConsumer createConsumer( - TimeMilliVector vector, int index, boolean nullable, Calendar calendar) { + TimeMilliVector vector, int index, boolean nullable, Calendar calendar) { if (nullable) { return new NullableTimeConsumer(vector, index, calendar); } else { @@ -42,23 +38,17 @@ public static JdbcConsumer createConsumer( } } - /** - * Nullable consumer for {@link TimeMilliVector}. - */ + /** Nullable consumer for {@link TimeMilliVector}. */ static class NullableTimeConsumer extends BaseConsumer { protected final Calendar calendar; - /** - * Instantiate a TimeConsumer. - */ + /** Instantiate a TimeConsumer. */ public NullableTimeConsumer(TimeMilliVector vector, int index) { - this(vector, index, /* calendar */null); + this(vector, index, /* calendar */ null); } - /** - * Instantiate a TimeConsumer. - */ + /** Instantiate a TimeConsumer. */ public NullableTimeConsumer(TimeMilliVector vector, int index, Calendar calendar) { super(vector, index); this.calendar = calendar; @@ -66,8 +56,10 @@ public NullableTimeConsumer(TimeMilliVector vector, int index, Calendar calendar @Override public void consume(ResultSet resultSet) throws SQLException { - Time time = calendar == null ? resultSet.getTime(columnIndexInResultSet) : - resultSet.getTime(columnIndexInResultSet, calendar); + Time time = + calendar == null + ? resultSet.getTime(columnIndexInResultSet) + : resultSet.getTime(columnIndexInResultSet, calendar); if (!resultSet.wasNull()) { // for fixed width vectors, we have allocated enough memory proactively, // so there is no need to call the setSafe method here. @@ -77,23 +69,17 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for {@link TimeMilliVector}. - */ + /** Non-nullable consumer for {@link TimeMilliVector}. */ static class NonNullableTimeConsumer extends BaseConsumer { protected final Calendar calendar; - /** - * Instantiate a TimeConsumer. - */ + /** Instantiate a TimeConsumer. */ public NonNullableTimeConsumer(TimeMilliVector vector, int index) { - this(vector, index, /* calendar */null); + this(vector, index, /* calendar */ null); } - /** - * Instantiate a TimeConsumer. - */ + /** Instantiate a TimeConsumer. */ public NonNullableTimeConsumer(TimeMilliVector vector, int index, Calendar calendar) { super(vector, index); this.calendar = calendar; @@ -101,8 +87,10 @@ public NonNullableTimeConsumer(TimeMilliVector vector, int index, Calendar calen @Override public void consume(ResultSet resultSet) throws SQLException { - Time time = calendar == null ? resultSet.getTime(columnIndexInResultSet) : - resultSet.getTime(columnIndexInResultSet, calendar); + Time time = + calendar == null + ? resultSet.getTime(columnIndexInResultSet) + : resultSet.getTime(columnIndexInResultSet, calendar); // for fixed width vectors, we have allocated enough memory proactively, // so there is no need to call the setSafe method here. vector.set(currentIndex, (int) time.getTime()); diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampConsumer.java index 3351e7e78a7e4..cc6269c21f04a 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampConsumer.java @@ -14,26 +14,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Timestamp; - import org.apache.arrow.vector.TimeStampMilliVector; /** - * Consumer which consume timestamp type values from {@link ResultSet}. - * Write the data to {@link TimeStampMilliVector}. + * Consumer which consume timestamp type values from {@link ResultSet}. Write the data to {@link + * TimeStampMilliVector}. */ public abstract class TimestampConsumer { - /** - * Creates a consumer for {@link TimeStampMilliVector}. - */ + /** Creates a consumer for {@link TimeStampMilliVector}. */ public static JdbcConsumer createConsumer( - TimeStampMilliVector vector, int index, boolean nullable) { + TimeStampMilliVector vector, int index, boolean nullable) { if (nullable) { return new NullableTimestampConsumer(vector, index); } else { @@ -41,14 +37,10 @@ public static JdbcConsumer createConsumer( } } - /** - * Nullable consumer for timestamp. - */ + /** Nullable consumer for timestamp. */ static class NullableTimestampConsumer extends BaseConsumer { - /** - * Instantiate a TimestampConsumer. - */ + /** Instantiate a TimestampConsumer. */ public NullableTimestampConsumer(TimeStampMilliVector vector, int index) { super(vector, index); } @@ -65,14 +57,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for timestamp. - */ + /** Non-nullable consumer for timestamp. */ static class NonNullableTimestampConsumer extends BaseConsumer { - /** - * Instantiate a TimestampConsumer. - */ + /** Instantiate a TimestampConsumer. */ public NonNullableTimestampConsumer(TimeStampMilliVector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampTZConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampTZConsumer.java index f08671f0be61a..3e4911ac1a161 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampTZConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TimestampTZConsumer.java @@ -14,25 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Timestamp; import java.util.Calendar; - import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.TimeStampMilliTZVector; /** - * Consumer which consume timestamp (with time zone) type values from {@link ResultSet}. - * Write the data to {@link TimeStampMilliTZVector}. + * Consumer which consume timestamp (with time zone) type values from {@link ResultSet}. Write the + * data to {@link TimeStampMilliTZVector}. */ public class TimestampTZConsumer { - /** - * Creates a consumer for {@link TimeStampMilliTZVector}. - */ + /** Creates a consumer for {@link TimeStampMilliTZVector}. */ public static JdbcConsumer createConsumer( TimeStampMilliTZVector vector, int index, boolean nullable, Calendar calendar) { Preconditions.checkArgument(calendar != null, "Calendar cannot be null"); @@ -43,17 +39,14 @@ public static JdbcConsumer createConsumer( } } - /** - * Nullable consumer for timestamp (with time zone). - */ + /** Nullable consumer for timestamp (with time zone). */ static class NullableTimestampTZConsumer extends BaseConsumer { protected final Calendar calendar; - /** - * Instantiate a TimestampConsumer. - */ - public NullableTimestampTZConsumer(TimeStampMilliTZVector vector, int index, Calendar calendar) { + /** Instantiate a TimestampConsumer. */ + public NullableTimestampTZConsumer( + TimeStampMilliTZVector vector, int index, Calendar calendar) { super(vector, index); this.calendar = calendar; } @@ -70,17 +63,14 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for timestamp (with time zone). - */ + /** Non-nullable consumer for timestamp (with time zone). */ static class NonNullableTimestampConsumer extends BaseConsumer { protected final Calendar calendar; - /** - * Instantiate a TimestampConsumer. - */ - public NonNullableTimestampConsumer(TimeStampMilliTZVector vector, int index, Calendar calendar) { + /** Instantiate a TimestampConsumer. */ + public NonNullableTimestampConsumer( + TimeStampMilliTZVector vector, int index, Calendar calendar) { super(vector, index); this.calendar = calendar; } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TinyIntConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TinyIntConsumer.java index 40cf087a5ec66..b75b87dd81cc4 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TinyIntConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/TinyIntConsumer.java @@ -14,24 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.TinyIntVector; /** - * Consumer which consume tinyInt type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.TinyIntVector}. + * Consumer which consume tinyInt type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.TinyIntVector}. */ public abstract class TinyIntConsumer { - /** - * Creates a consumer for {@link TinyIntVector}. - */ - public static JdbcConsumer createConsumer(TinyIntVector vector, int index, boolean nullable) { + /** Creates a consumer for {@link TinyIntVector}. */ + public static JdbcConsumer createConsumer( + TinyIntVector vector, int index, boolean nullable) { if (nullable) { return new NullableTinyIntConsumer(vector, index); } else { @@ -39,14 +36,10 @@ public static JdbcConsumer createConsumer(TinyIntVector vector, i } } - /** - * Nullable consumer for tiny int. - */ + /** Nullable consumer for tiny int. */ static class NullableTinyIntConsumer extends BaseConsumer { - /** - * Instantiate a TinyIntConsumer. - */ + /** Instantiate a TinyIntConsumer. */ public NullableTinyIntConsumer(TinyIntVector vector, int index) { super(vector, index); } @@ -63,14 +56,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for tiny int. - */ + /** Non-nullable consumer for tiny int. */ static class NonNullableTinyIntConsumer extends BaseConsumer { - /** - * Instantiate a TinyIntConsumer. - */ + /** Instantiate a TinyIntConsumer. */ public NonNullableTinyIntConsumer(TinyIntVector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/VarCharConsumer.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/VarCharConsumer.java index 05333715b8c2f..c81c4f0db124b 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/VarCharConsumer.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/VarCharConsumer.java @@ -14,25 +14,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import java.nio.charset.StandardCharsets; import java.sql.ResultSet; import java.sql.SQLException; - import org.apache.arrow.vector.VarCharVector; /** - * Consumer which consume varchar type values from {@link ResultSet}. - * Write the data to {@link org.apache.arrow.vector.VarCharVector}. + * Consumer which consume varchar type values from {@link ResultSet}. Write the data to {@link + * org.apache.arrow.vector.VarCharVector}. */ public abstract class VarCharConsumer { - /** - * Creates a consumer for {@link VarCharVector}. - */ - public static JdbcConsumer createConsumer(VarCharVector vector, int index, boolean nullable) { + /** Creates a consumer for {@link VarCharVector}. */ + public static JdbcConsumer createConsumer( + VarCharVector vector, int index, boolean nullable) { if (nullable) { return new NullableVarCharConsumer(vector, index); } else { @@ -40,14 +37,10 @@ public static JdbcConsumer createConsumer(VarCharVector vector, i } } - /** - * Nullable consumer for var char. - */ + /** Nullable consumer for var char. */ static class NullableVarCharConsumer extends BaseConsumer { - /** - * Instantiate a VarCharConsumer. - */ + /** Instantiate a VarCharConsumer. */ public NullableVarCharConsumer(VarCharVector vector, int index) { super(vector, index); } @@ -63,14 +56,10 @@ public void consume(ResultSet resultSet) throws SQLException { } } - /** - * Non-nullable consumer for var char. - */ + /** Non-nullable consumer for var char. */ static class NonNullableVarCharConsumer extends BaseConsumer { - /** - * Instantiate a VarCharConsumer. - */ + /** Instantiate a VarCharConsumer. */ public NonNullableVarCharConsumer(VarCharVector vector, int index) { super(vector, index); } diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/exceptions/JdbcConsumerException.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/exceptions/JdbcConsumerException.java index b235be173cf10..04e26d640c04d 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/exceptions/JdbcConsumerException.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/consumer/exceptions/JdbcConsumerException.java @@ -14,15 +14,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer.exceptions; import org.apache.arrow.adapter.jdbc.JdbcFieldInfo; import org.apache.arrow.vector.types.pojo.ArrowType; /** - * Exception while consuming JDBC data. This exception stores the JdbcFieldInfo for the column and the - * ArrowType for the corresponding vector for easier debugging. + * Exception while consuming JDBC data. This exception stores the JdbcFieldInfo for the column and + * the ArrowType for the corresponding vector for easier debugging. */ public class JdbcConsumerException extends RuntimeException { final JdbcFieldInfo fieldInfo; @@ -31,12 +30,13 @@ public class JdbcConsumerException extends RuntimeException { /** * Construct JdbcConsumerException with all fields. * - * @param message error message - * @param cause original exception + * @param message error message + * @param cause original exception * @param fieldInfo JdbcFieldInfo for the column * @param arrowType ArrowType for the corresponding vector */ - public JdbcConsumerException(String message, Throwable cause, JdbcFieldInfo fieldInfo, ArrowType arrowType) { + public JdbcConsumerException( + String message, Throwable cause, JdbcFieldInfo fieldInfo, ArrowType arrowType) { super(message, cause); this.fieldInfo = fieldInfo; this.arrowType = arrowType; diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java index 88a66a31aa2c9..1ad4492b35d18 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/AbstractJdbcToArrowTest.java @@ -14,9 +14,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import java.io.IOException; import java.sql.Connection; import java.sql.DriverManager; @@ -30,7 +31,6 @@ import java.util.Map; import java.util.TimeZone; import java.util.function.Function; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.util.Preconditions; @@ -41,12 +41,7 @@ import org.junit.Before; import org.junit.Test; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; - -/** - * Class to abstract out some common test functionality for testing JDBC to Arrow. - */ +/** Class to abstract out some common test functionality for testing JDBC to Arrow. */ public abstract class AbstractJdbcToArrowTest { protected static final String BIGINT = "BIGINT_FIELD5"; @@ -69,7 +64,8 @@ public abstract class AbstractJdbcToArrowTest { protected static final String TINYINT = "TINYINT_FIELD3"; protected static final String VARCHAR = "VARCHAR_FIELD13"; protected static final String NULL = "NULL_FIELD18"; - protected static final Map ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP = new HashMap<>(); + protected static final Map ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP = + new HashMap<>(); static { ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP.put(LIST, new JdbcFieldInfo(Types.INTEGER)); @@ -86,12 +82,12 @@ public abstract class AbstractJdbcToArrowTest { * @return Table object * @throws IOException on error */ - protected static Table getTable(String ymlFilePath, @SuppressWarnings("rawtypes") Class clss) throws IOException { - return new ObjectMapper(new YAMLFactory()).readValue( - clss.getClassLoader().getResourceAsStream(ymlFilePath), Table.class); + protected static Table getTable(String ymlFilePath, @SuppressWarnings("rawtypes") Class clss) + throws IOException { + return new ObjectMapper(new YAMLFactory()) + .readValue(clss.getClassLoader().getResourceAsStream(ymlFilePath), Table.class); } - /** * This method creates Connection object and DB table and also populate data into table for test. * @@ -105,7 +101,7 @@ public void setUp() throws SQLException, ClassNotFoundException { String driver = "org.h2.Driver"; Class.forName(driver); conn = DriverManager.getConnection(url); - try (Statement stmt = conn.createStatement();) { + try (Statement stmt = conn.createStatement(); ) { stmt.executeUpdate(table.getCreate()); for (String insert : table.getData()) { stmt.executeUpdate(insert); @@ -136,12 +132,13 @@ public void destroy() throws SQLException { * @throws ClassNotFoundException on error * @throws IOException on error */ - public static Object[][] prepareTestData(String[] testFiles, @SuppressWarnings("rawtypes") Class clss) + public static Object[][] prepareTestData( + String[] testFiles, @SuppressWarnings("rawtypes") Class clss) throws SQLException, ClassNotFoundException, IOException { Object[][] tableArr = new Object[testFiles.length][]; int i = 0; for (String testFile : testFiles) { - tableArr[i++] = new Object[]{getTable(testFile, clss)}; + tableArr[i++] = new Object[] {getTable(testFile, clss)}; } return tableArr; } @@ -159,86 +156,90 @@ public static Object[][] prepareTestData(String[] testFiles, @SuppressWarnings(" * Abstract method to implement logic to assert test various datatype values. * * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ public abstract void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector); /** - * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects. - * This method uses the default Calendar instance with default TimeZone and Locale as returned by the JVM. - * If you wish to use specific TimeZone or Locale for any Date, Time and Timestamp datasets, you may want use - * overloaded API that taken Calendar object instance. + * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow + * objects. This method uses the default Calendar instance with default TimeZone and Locale as + * returned by the JVM. If you wish to use specific TimeZone or Locale for any Date, Time and + * Timestamp datasets, you may want use overloaded API that taken Calendar object instance. * - * This method is for test only. + *

    This method is for test only. * - * @param connection Database connection to be used. This method will not close the passed connection object. Since - * the caller has passed the connection object it's the responsibility of the caller to close or - * return the connection to the pool. - * @param query The DB Query to fetch the data. - * @param allocator Memory allocator + * @param connection Database connection to be used. This method will not close the passed + * connection object. Since the caller has passed the connection object it's the + * responsibility of the caller to close or return the connection to the pool. + * @param query The DB Query to fetch the data. + * @param allocator Memory allocator * @return Arrow Data Objects {@link VectorSchemaRoot} - * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as - * ResultSet and Statement objects. + * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources + * opened such as ResultSet and Statement objects. */ public VectorSchemaRoot sqlToArrow(Connection connection, String query, BufferAllocator allocator) throws SQLException, IOException { Preconditions.checkNotNull(allocator, "Memory allocator object cannot be null"); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(allocator, JdbcToArrowUtils.getUtcCalendar()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(allocator, JdbcToArrowUtils.getUtcCalendar()) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); return sqlToArrow(connection, query, config); } /** - * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects. + * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow + * objects. * - * This method is for test only. + *

    This method is for test only. * - * @param connection Database connection to be used. This method will not close the passed connection object. Since - * the caller has passed the connection object it's the responsibility of the caller to close or - * return the connection to the pool. - * @param query The DB Query to fetch the data. - * @param allocator Memory allocator - * @param calendar Calendar object to use to handle Date, Time and Timestamp datasets. + * @param connection Database connection to be used. This method will not close the passed + * connection object. Since the caller has passed the connection object it's the + * responsibility of the caller to close or return the connection to the pool. + * @param query The DB Query to fetch the data. + * @param allocator Memory allocator + * @param calendar Calendar object to use to handle Date, Time and Timestamp datasets. * @return Arrow Data Objects {@link VectorSchemaRoot} - * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as - * ResultSet and Statement objects. + * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources + * opened such as ResultSet and Statement objects. */ public VectorSchemaRoot sqlToArrow( - Connection connection, - String query, - BufferAllocator allocator, - Calendar calendar) throws SQLException, IOException { + Connection connection, String query, BufferAllocator allocator, Calendar calendar) + throws SQLException, IOException { Preconditions.checkNotNull(allocator, "Memory allocator object cannot be null"); Preconditions.checkNotNull(calendar, "Calendar object cannot be null"); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(allocator, calendar) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(allocator, calendar) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); return sqlToArrow(connection, query, config); } /** - * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow objects. + * For the given SQL query, execute and fetch the data from Relational DB and convert it to Arrow + * objects. * - * This method is for test only. + *

    This method is for test only. * - * @param connection Database connection to be used. This method will not close the passed connection object. - * Since the caller has passed the connection object it's the responsibility of the caller - * to close or return the connection to the pool. - * @param query The DB Query to fetch the data. - * @param config Configuration + * @param connection Database connection to be used. This method will not close the passed + * connection object. Since the caller has passed the connection object it's the + * responsibility of the caller to close or return the connection to the pool. + * @param query The DB Query to fetch the data. + * @param config Configuration * @return Arrow Data Objects {@link VectorSchemaRoot} - * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources opened such as - * ResultSet and Statement objects. + * @throws SQLException Propagate any SQL Exceptions to the caller after closing any resources + * opened such as ResultSet and Statement objects. */ - public static VectorSchemaRoot sqlToArrow(Connection connection, String query, JdbcToArrowConfig config) + public static VectorSchemaRoot sqlToArrow( + Connection connection, String query, JdbcToArrowConfig config) throws SQLException, IOException { Preconditions.checkNotNull(connection, "JDBC connection object cannot be null"); - Preconditions.checkArgument(query != null && query.length() > 0, "SQL query cannot be null or empty"); + Preconditions.checkArgument( + query != null && query.length() > 0, "SQL query cannot be null or empty"); try (Statement stmt = connection.createStatement()) { return sqlToArrow(stmt.executeQuery(query), config); @@ -246,10 +247,10 @@ public static VectorSchemaRoot sqlToArrow(Connection connection, String query, J } /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. This - * method uses the default RootAllocator and Calendar object. + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow + * objects. This method uses the default RootAllocator and Calendar object. * - * This method is for test only. + *

    This method is for test only. * * @param resultSet ResultSet to use to fetch the data from underlying database * @return Arrow Data Objects {@link VectorSchemaRoot} @@ -262,9 +263,10 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet) throws SQLExcepti } /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow + * objects. * - * This method is for test only. + *

    This method is for test only. * * @param resultSet ResultSet to use to fetch the data from underlying database * @param allocator Memory allocator @@ -275,62 +277,69 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, BufferAllocator a throws SQLException, IOException { Preconditions.checkNotNull(allocator, "Memory Allocator object cannot be null"); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(allocator, JdbcToArrowUtils.getUtcCalendar()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(allocator, JdbcToArrowUtils.getUtcCalendar()) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); return sqlToArrow(resultSet, config); } /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow + * objects. * - * This method is for test only. + *

    This method is for test only. * * @param resultSet ResultSet to use to fetch the data from underlying database - * @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or null if none. + * @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or null + * if none. * @return Arrow Data Objects {@link VectorSchemaRoot} * @throws SQLException on error */ - public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar) throws SQLException, IOException { + public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, Calendar calendar) + throws SQLException, IOException { Preconditions.checkNotNull(resultSet, "JDBC ResultSet object cannot be null"); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), calendar) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), calendar) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); return sqlToArrow(resultSet, config); } /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow + * objects. * - * This method is for test only. + *

    This method is for test only. * * @param resultSet ResultSet to use to fetch the data from underlying database * @param allocator Memory allocator to use. - * @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or null if none. + * @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or null + * if none. * @return Arrow Data Objects {@link VectorSchemaRoot} * @throws SQLException on error */ public static VectorSchemaRoot sqlToArrow( - ResultSet resultSet, - BufferAllocator allocator, - Calendar calendar) + ResultSet resultSet, BufferAllocator allocator, Calendar calendar) throws SQLException, IOException { Preconditions.checkNotNull(allocator, "Memory Allocator object cannot be null"); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(allocator, calendar) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(allocator, calendar) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); return sqlToArrow(resultSet, config); } /** - * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow objects. + * For the given JDBC {@link ResultSet}, fetch the data from Relational DB and convert it to Arrow + * objects. * - * This method is for test only. + *

    This method is for test only. * * @param resultSet ResultSet to use to fetch the data from underlying database - * @param config Configuration of the conversion from JDBC to Arrow. + * @param config Configuration of the conversion from JDBC to Arrow. * @return Arrow Data Objects {@link VectorSchemaRoot} * @throws SQLException on error */ @@ -339,8 +348,10 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, JdbcToArrowConfig Preconditions.checkNotNull(resultSet, "JDBC ResultSet object cannot be null"); Preconditions.checkNotNull(config, "The configuration cannot be null"); - VectorSchemaRoot root = VectorSchemaRoot.create( - JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), config), config.getAllocator()); + VectorSchemaRoot root = + VectorSchemaRoot.create( + JdbcToArrowUtils.jdbcToArrowSchema(resultSet.getMetaData(), config), + config.getAllocator()); if (config.getTargetBatchSize() != JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) { ValueVectorUtility.preAllocate(root, config.getTargetBatchSize()); } @@ -350,12 +361,14 @@ public static VectorSchemaRoot sqlToArrow(ResultSet resultSet, JdbcToArrowConfig /** * Register MAP_FIELD20 as ArrowType.Map - * @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or null if none. + * + * @param calendar Calendar instance to use for Date, Time and Timestamp datasets, or null + * if none. * @param rsmd ResultSetMetaData to lookup column name from result set metadata * @return typeConverter instance with mapping column to Map type */ protected Function jdbcToArrowTypeConverter( - Calendar calendar, ResultSetMetaData rsmd) { + Calendar calendar, ResultSetMetaData rsmd) { return (jdbcFieldInfo) -> { String columnLabel = null; try { @@ -377,5 +390,4 @@ protected Function jdbcToArrowTypeConverter( protected ResultSetMetaData getQueryMetaData(String query) throws SQLException { return conn.createStatement().executeQuery(query).getMetaData(); } - } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java index b1a8b8f226753..cd6a78eae2b1a 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcFieldInfoTest.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.junit.Assert.*; import java.sql.Types; - import org.junit.Test; public class JdbcFieldInfoTest { diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinderTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinderTest.java index a94f0aa454f1d..a05130f18e4ac 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinderTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcParameterBinderTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.assertj.core.api.Assertions.assertThat; @@ -32,7 +31,6 @@ import java.util.List; import java.util.Map; import java.util.function.BiConsumer; - import org.apache.arrow.adapter.jdbc.binder.ColumnBinder; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; @@ -106,7 +104,7 @@ void bindOrder() throws SQLException { Field.nullable("ints1", new ArrowType.Int(32, true)), Field.nullable("ints2", new ArrowType.Int(32, true)))); try (final MockPreparedStatement statement = new MockPreparedStatement(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { final JdbcParameterBinder binder = JdbcParameterBinder.builder(statement, root) .bind(/*parameterIndex=*/ 1, /*columnIndex=*/ 2) @@ -161,17 +159,17 @@ void bindOrder() throws SQLException { @Test void customBinder() throws SQLException { final Schema schema = - new Schema(Collections.singletonList( - Field.nullable("ints0", new ArrowType.Int(32, true)))); + new Schema(Collections.singletonList(Field.nullable("ints0", new ArrowType.Int(32, true)))); try (final MockPreparedStatement statement = new MockPreparedStatement(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { final JdbcParameterBinder binder = JdbcParameterBinder.builder(statement, root) .bind( /*parameterIndex=*/ 1, new ColumnBinder() { private final IntVector vector = (IntVector) root.getVector(0); + @Override public void bind(PreparedStatement statement, int parameterIndex, int rowIndex) throws SQLException { @@ -212,7 +210,9 @@ public FieldVector getVector() { @Test void bool() throws SQLException { - testSimpleType(ArrowType.Bool.INSTANCE, Types.BOOLEAN, + testSimpleType( + ArrowType.Bool.INSTANCE, + Types.BOOLEAN, (BitVector vector, Integer index, Boolean value) -> vector.setSafe(index, value ? 1 : 0), BitVector::setNull, Arrays.asList(true, false, true)); @@ -220,53 +220,76 @@ void bool() throws SQLException { @Test void int8() throws SQLException { - testSimpleType(new ArrowType.Int(8, true), Types.TINYINT, - TinyIntVector::setSafe, TinyIntVector::setNull, + testSimpleType( + new ArrowType.Int(8, true), + Types.TINYINT, + TinyIntVector::setSafe, + TinyIntVector::setNull, Arrays.asList(Byte.MAX_VALUE, Byte.MIN_VALUE, (byte) 42)); } @Test void int16() throws SQLException { - testSimpleType(new ArrowType.Int(16, true), Types.SMALLINT, - SmallIntVector::setSafe, SmallIntVector::setNull, + testSimpleType( + new ArrowType.Int(16, true), + Types.SMALLINT, + SmallIntVector::setSafe, + SmallIntVector::setNull, Arrays.asList(Short.MAX_VALUE, Short.MIN_VALUE, (short) 42)); } @Test void int32() throws SQLException { - testSimpleType(new ArrowType.Int(32, true), Types.INTEGER, - IntVector::setSafe, IntVector::setNull, + testSimpleType( + new ArrowType.Int(32, true), + Types.INTEGER, + IntVector::setSafe, + IntVector::setNull, Arrays.asList(Integer.MAX_VALUE, Integer.MIN_VALUE, 42)); } @Test void int64() throws SQLException { - testSimpleType(new ArrowType.Int(64, true), Types.BIGINT, - BigIntVector::setSafe, BigIntVector::setNull, + testSimpleType( + new ArrowType.Int(64, true), + Types.BIGINT, + BigIntVector::setSafe, + BigIntVector::setNull, Arrays.asList(Long.MAX_VALUE, Long.MIN_VALUE, 42L)); } @Test void float32() throws SQLException { - testSimpleType(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), Types.REAL, - Float4Vector::setSafe, Float4Vector::setNull, + testSimpleType( + new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), + Types.REAL, + Float4Vector::setSafe, + Float4Vector::setNull, Arrays.asList(Float.MIN_VALUE, Float.MAX_VALUE, Float.POSITIVE_INFINITY)); } @Test void float64() throws SQLException { - testSimpleType(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), Types.DOUBLE, - Float8Vector::setSafe, Float8Vector::setNull, + testSimpleType( + new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), + Types.DOUBLE, + Float8Vector::setSafe, + Float8Vector::setNull, Arrays.asList(Double.MIN_VALUE, Double.MAX_VALUE, Double.POSITIVE_INFINITY)); } @Test void time32() throws SQLException { - testSimpleType(new ArrowType.Time(TimeUnit.SECOND, 32), Types.TIME, - (valueVectors, index, value) -> valueVectors.setSafe(index, (int) (value.getTime() / 1_000)), + testSimpleType( + new ArrowType.Time(TimeUnit.SECOND, 32), + Types.TIME, + (valueVectors, index, value) -> + valueVectors.setSafe(index, (int) (value.getTime() / 1_000)), TimeSecVector::setNull, Arrays.asList(new Time(-128_000), new Time(104_000), new Time(-42_000))); - testSimpleType(new ArrowType.Time(TimeUnit.MILLISECOND, 32), Types.TIME, + testSimpleType( + new ArrowType.Time(TimeUnit.MILLISECOND, 32), + Types.TIME, (valueVectors, index, value) -> valueVectors.setSafe(index, (int) value.getTime()), TimeMilliVector::setNull, Arrays.asList(new Time(-128_000), new Time(104_000), new Time(-42_000))); @@ -274,11 +297,15 @@ void time32() throws SQLException { @Test void time64() throws SQLException { - testSimpleType(new ArrowType.Time(TimeUnit.MICROSECOND, 64), Types.TIME, + testSimpleType( + new ArrowType.Time(TimeUnit.MICROSECOND, 64), + Types.TIME, (valueVectors, index, value) -> valueVectors.setSafe(index, (value.getTime() * 1_000)), TimeMicroVector::setNull, Arrays.asList(new Time(-128_000), new Time(104_000), new Time(-42_000))); - testSimpleType(new ArrowType.Time(TimeUnit.NANOSECOND, 64), Types.TIME, + testSimpleType( + new ArrowType.Time(TimeUnit.NANOSECOND, 64), + Types.TIME, (valueVectors, index, value) -> valueVectors.setSafe(index, (value.getTime() * 1_000_000)), TimeNanoVector::setNull, Arrays.asList(new Time(-128), new Time(104), new Time(-42))); @@ -286,57 +313,92 @@ void time64() throws SQLException { @Test void date32() throws SQLException { - testSimpleType(new ArrowType.Date(DateUnit.DAY), Types.DATE, - (valueVectors, index, value) -> valueVectors.setSafe(index, (int) (value.getTime() / MILLIS_PER_DAY)), + testSimpleType( + new ArrowType.Date(DateUnit.DAY), + Types.DATE, + (valueVectors, index, value) -> + valueVectors.setSafe(index, (int) (value.getTime() / MILLIS_PER_DAY)), DateDayVector::setNull, - Arrays.asList(new Date(-5 * MILLIS_PER_DAY), new Date(2 * MILLIS_PER_DAY), new Date(MILLIS_PER_DAY))); + Arrays.asList( + new Date(-5 * MILLIS_PER_DAY), new Date(2 * MILLIS_PER_DAY), new Date(MILLIS_PER_DAY))); } @Test void date64() throws SQLException { - testSimpleType(new ArrowType.Date(DateUnit.MILLISECOND), Types.DATE, + testSimpleType( + new ArrowType.Date(DateUnit.MILLISECOND), + Types.DATE, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime()), DateMilliVector::setNull, - Arrays.asList(new Date(-5 * MILLIS_PER_DAY), new Date(2 * MILLIS_PER_DAY), new Date(MILLIS_PER_DAY))); + Arrays.asList( + new Date(-5 * MILLIS_PER_DAY), new Date(2 * MILLIS_PER_DAY), new Date(MILLIS_PER_DAY))); } @Test void timestamp() throws SQLException { - List values = Arrays.asList(new Timestamp(-128_000), new Timestamp(104_000), new Timestamp(-42_000)); - testSimpleType(new ArrowType.Timestamp(TimeUnit.SECOND, null), Types.TIMESTAMP, + List values = + Arrays.asList(new Timestamp(-128_000), new Timestamp(104_000), new Timestamp(-42_000)); + testSimpleType( + new ArrowType.Timestamp(TimeUnit.SECOND, null), + Types.TIMESTAMP, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime() / 1_000), - TimeStampSecVector::setNull, values); - testSimpleType(new ArrowType.Timestamp(TimeUnit.MILLISECOND, null), Types.TIMESTAMP, + TimeStampSecVector::setNull, + values); + testSimpleType( + new ArrowType.Timestamp(TimeUnit.MILLISECOND, null), + Types.TIMESTAMP, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime()), - TimeStampMilliVector::setNull, values); - testSimpleType(new ArrowType.Timestamp(TimeUnit.MICROSECOND, null), Types.TIMESTAMP, + TimeStampMilliVector::setNull, + values); + testSimpleType( + new ArrowType.Timestamp(TimeUnit.MICROSECOND, null), + Types.TIMESTAMP, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime() * 1_000), - TimeStampMicroVector::setNull, values); - testSimpleType(new ArrowType.Timestamp(TimeUnit.NANOSECOND, null), Types.TIMESTAMP, + TimeStampMicroVector::setNull, + values); + testSimpleType( + new ArrowType.Timestamp(TimeUnit.NANOSECOND, null), + Types.TIMESTAMP, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime() * 1_000_000), - TimeStampNanoVector::setNull, values); + TimeStampNanoVector::setNull, + values); } @Test void timestampTz() throws SQLException { - List values = Arrays.asList(new Timestamp(-128_000), new Timestamp(104_000), new Timestamp(-42_000)); - testSimpleType(new ArrowType.Timestamp(TimeUnit.SECOND, "UTC"), Types.TIMESTAMP_WITH_TIMEZONE, + List values = + Arrays.asList(new Timestamp(-128_000), new Timestamp(104_000), new Timestamp(-42_000)); + testSimpleType( + new ArrowType.Timestamp(TimeUnit.SECOND, "UTC"), + Types.TIMESTAMP_WITH_TIMEZONE, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime() / 1_000), - TimeStampSecTZVector::setNull, values); - testSimpleType(new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC"), Types.TIMESTAMP_WITH_TIMEZONE, + TimeStampSecTZVector::setNull, + values); + testSimpleType( + new ArrowType.Timestamp(TimeUnit.MILLISECOND, "UTC"), + Types.TIMESTAMP_WITH_TIMEZONE, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime()), - TimeStampMilliTZVector::setNull, values); - testSimpleType(new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC"), Types.TIMESTAMP_WITH_TIMEZONE, + TimeStampMilliTZVector::setNull, + values); + testSimpleType( + new ArrowType.Timestamp(TimeUnit.MICROSECOND, "UTC"), + Types.TIMESTAMP_WITH_TIMEZONE, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime() * 1_000), - TimeStampMicroTZVector::setNull, values); - testSimpleType(new ArrowType.Timestamp(TimeUnit.NANOSECOND, "UTC"), Types.TIMESTAMP_WITH_TIMEZONE, + TimeStampMicroTZVector::setNull, + values); + testSimpleType( + new ArrowType.Timestamp(TimeUnit.NANOSECOND, "UTC"), + Types.TIMESTAMP_WITH_TIMEZONE, (valueVectors, index, value) -> valueVectors.setSafe(index, value.getTime() * 1_000_000), - TimeStampNanoTZVector::setNull, values); + TimeStampNanoTZVector::setNull, + values); } @Test void utf8() throws SQLException { - testSimpleType(ArrowType.Utf8.INSTANCE, Types.VARCHAR, + testSimpleType( + ArrowType.Utf8.INSTANCE, + Types.VARCHAR, (VarCharVector vector, Integer index, String value) -> vector.setSafe(index, value.getBytes(StandardCharsets.UTF_8)), BaseVariableWidthVector::setNull, @@ -345,7 +407,9 @@ void utf8() throws SQLException { @Test void largeUtf8() throws SQLException { - testSimpleType(ArrowType.LargeUtf8.INSTANCE, Types.LONGVARCHAR, + testSimpleType( + ArrowType.LargeUtf8.INSTANCE, + Types.LONGVARCHAR, (LargeVarCharVector vector, Integer index, String value) -> vector.setSafe(index, value.getBytes(StandardCharsets.UTF_8)), BaseLargeVariableWidthVector::setNull, @@ -354,155 +418,200 @@ void largeUtf8() throws SQLException { @Test void binary() throws SQLException { - testSimpleType(ArrowType.Binary.INSTANCE, Types.VARBINARY, - (VarBinaryVector vector, Integer index, byte[] value) -> - vector.setSafe(index, value), + testSimpleType( + ArrowType.Binary.INSTANCE, + Types.VARBINARY, + (VarBinaryVector vector, Integer index, byte[] value) -> vector.setSafe(index, value), BaseVariableWidthVector::setNull, Arrays.asList(new byte[0], new byte[] {2, -4}, new byte[] {0, -1, 127, -128})); } @Test void largeBinary() throws SQLException { - testSimpleType(ArrowType.LargeBinary.INSTANCE, Types.LONGVARBINARY, - (LargeVarBinaryVector vector, Integer index, byte[] value) -> - vector.setSafe(index, value), + testSimpleType( + ArrowType.LargeBinary.INSTANCE, + Types.LONGVARBINARY, + (LargeVarBinaryVector vector, Integer index, byte[] value) -> vector.setSafe(index, value), BaseLargeVariableWidthVector::setNull, Arrays.asList(new byte[0], new byte[] {2, -4}, new byte[] {0, -1, 127, -128})); } @Test void fixedSizeBinary() throws SQLException { - testSimpleType(new ArrowType.FixedSizeBinary(3), Types.BINARY, - FixedSizeBinaryVector::setSafe, FixedSizeBinaryVector::setNull, + testSimpleType( + new ArrowType.FixedSizeBinary(3), + Types.BINARY, + FixedSizeBinaryVector::setSafe, + FixedSizeBinaryVector::setNull, Arrays.asList(new byte[3], new byte[] {1, 2, -4}, new byte[] {-1, 127, -128})); } @Test void decimal128() throws SQLException { - testSimpleType(new ArrowType.Decimal(/*precision*/ 12, /*scale*/3, 128), Types.DECIMAL, - DecimalVector::setSafe, DecimalVector::setNull, - Arrays.asList(new BigDecimal("120.429"), new BigDecimal("-10590.123"), new BigDecimal("0.000"))); + testSimpleType( + new ArrowType.Decimal(/*precision*/ 12, /*scale*/ 3, 128), + Types.DECIMAL, + DecimalVector::setSafe, + DecimalVector::setNull, + Arrays.asList( + new BigDecimal("120.429"), new BigDecimal("-10590.123"), new BigDecimal("0.000"))); } @Test void decimal256() throws SQLException { - testSimpleType(new ArrowType.Decimal(/*precision*/ 12, /*scale*/3, 256), Types.DECIMAL, - Decimal256Vector::setSafe, Decimal256Vector::setNull, - Arrays.asList(new BigDecimal("120.429"), new BigDecimal("-10590.123"), new BigDecimal("0.000"))); + testSimpleType( + new ArrowType.Decimal(/*precision*/ 12, /*scale*/ 3, 256), + Types.DECIMAL, + Decimal256Vector::setSafe, + Decimal256Vector::setNull, + Arrays.asList( + new BigDecimal("120.429"), new BigDecimal("-10590.123"), new BigDecimal("0.000"))); } @Test void listOfDouble() throws SQLException { - TriConsumer setValue = (listVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); - writer.setPosition(index); - writer.startList(); - Arrays.stream(values).forEach(doubleValue -> writer.float8().writeFloat8(doubleValue)); - writer.endList(); - listVector.setLastSet(index); - }; - List values = Arrays.asList(new Double[]{0.0, Math.PI}, new Double[]{1.1, -352346.2, 2355.6}, - new Double[]{-1024.3}, new Double[]{}); - testListType(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), setValue, ListVector::setNull, values); + TriConsumer setValue = + (listVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); + writer.setPosition(index); + writer.startList(); + Arrays.stream(values).forEach(doubleValue -> writer.float8().writeFloat8(doubleValue)); + writer.endList(); + listVector.setLastSet(index); + }; + List values = + Arrays.asList( + new Double[] {0.0, Math.PI}, new Double[] {1.1, -352346.2, 2355.6}, + new Double[] {-1024.3}, new Double[] {}); + testListType( + new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), + setValue, + ListVector::setNull, + values); } @Test void listOfInt64() throws SQLException { - TriConsumer setValue = (listVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); - writer.setPosition(index); - writer.startList(); - Arrays.stream(values).forEach(longValue -> writer.bigInt().writeBigInt(longValue)); - writer.endList(); - listVector.setLastSet(index); - }; - List values = Arrays.asList(new Long[]{1L, 2L, 3L}, new Long[]{4L, 5L}, - new Long[]{512L, 1024L, 2048L, 4096L}, new Long[]{}); + TriConsumer setValue = + (listVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); + writer.setPosition(index); + writer.startList(); + Arrays.stream(values).forEach(longValue -> writer.bigInt().writeBigInt(longValue)); + writer.endList(); + listVector.setLastSet(index); + }; + List values = + Arrays.asList( + new Long[] {1L, 2L, 3L}, + new Long[] {4L, 5L}, + new Long[] {512L, 1024L, 2048L, 4096L}, + new Long[] {}); testListType((ArrowType) new ArrowType.Int(64, true), setValue, ListVector::setNull, values); } @Test void listOfInt32() throws SQLException { - TriConsumer setValue = (listVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); - writer.setPosition(index); - writer.startList(); - Arrays.stream(values).forEach(integerValue -> writer.integer().writeInt(integerValue)); - writer.endList(); - listVector.setLastSet(index); - }; - List values = Arrays.asList(new Integer[]{1, 2, 3}, new Integer[]{4, 5}, - new Integer[]{512, 1024, 2048, 4096}, new Integer[]{}); + TriConsumer setValue = + (listVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); + writer.setPosition(index); + writer.startList(); + Arrays.stream(values).forEach(integerValue -> writer.integer().writeInt(integerValue)); + writer.endList(); + listVector.setLastSet(index); + }; + List values = + Arrays.asList( + new Integer[] {1, 2, 3}, + new Integer[] {4, 5}, + new Integer[] {512, 1024, 2048, 4096}, + new Integer[] {}); testListType((ArrowType) new ArrowType.Int(32, true), setValue, ListVector::setNull, values); } @Test void listOfBoolean() throws SQLException { - TriConsumer setValue = (listVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); - writer.setPosition(index); - writer.startList(); - Arrays.stream(values).forEach(booleanValue -> writer.bit().writeBit(booleanValue ? 1 : 0)); - writer.endList(); - listVector.setLastSet(index); - }; - List values = Arrays.asList(new Boolean[]{true, false}, - new Boolean[]{false, false}, new Boolean[]{true, true, false, true}, new Boolean[]{}); + TriConsumer setValue = + (listVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); + writer.setPosition(index); + writer.startList(); + Arrays.stream(values) + .forEach(booleanValue -> writer.bit().writeBit(booleanValue ? 1 : 0)); + writer.endList(); + listVector.setLastSet(index); + }; + List values = + Arrays.asList( + new Boolean[] {true, false}, + new Boolean[] {false, false}, + new Boolean[] {true, true, false, true}, + new Boolean[] {}); testListType((ArrowType) new ArrowType.Bool(), setValue, ListVector::setNull, values); } @Test void listOfString() throws SQLException { - TriConsumer setValue = (listVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); - writer.setPosition(index); - writer.startList(); - Arrays.stream(values).forEach(stringValue -> { - if (stringValue != null) { - byte[] stringValueBytes = stringValue.getBytes(StandardCharsets.UTF_8); - try (ArrowBuf stringBuffer = allocator.buffer(stringValueBytes.length)) { - stringBuffer.writeBytes(stringValueBytes); - writer.varChar().writeVarChar(0, stringValueBytes.length, stringBuffer); - } - } else { - writer.varChar().writeNull(); - } - }); - writer.endList(); - listVector.setLastSet(index); - }; - List values = Arrays.asList(new String[]{"aaaa", "b1"}, - new String[]{"c", null, "d"}, new String[]{"e", "f", "g", "h"}, new String[]{}); + TriConsumer setValue = + (listVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionListWriter writer = listVector.getWriter(); + writer.setPosition(index); + writer.startList(); + Arrays.stream(values) + .forEach( + stringValue -> { + if (stringValue != null) { + byte[] stringValueBytes = stringValue.getBytes(StandardCharsets.UTF_8); + try (ArrowBuf stringBuffer = allocator.buffer(stringValueBytes.length)) { + stringBuffer.writeBytes(stringValueBytes); + writer.varChar().writeVarChar(0, stringValueBytes.length, stringBuffer); + } + } else { + writer.varChar().writeNull(); + } + }); + writer.endList(); + listVector.setLastSet(index); + }; + List values = + Arrays.asList( + new String[] {"aaaa", "b1"}, + new String[] {"c", null, "d"}, + new String[] {"e", "f", "g", "h"}, + new String[] {}); testListType((ArrowType) new ArrowType.Utf8(), setValue, ListVector::setNull, values); } @Test void mapOfString() throws SQLException { - TriConsumer> setValue = (mapVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionMapWriter mapWriter = mapVector.getWriter(); - mapWriter.setPosition(index); - mapWriter.startMap(); - values.entrySet().forEach(mapValue -> { - if (mapValue != null) { - byte[] keyBytes = mapValue.getKey().getBytes(StandardCharsets.UTF_8); - byte[] valueBytes = mapValue.getValue().getBytes(StandardCharsets.UTF_8); - try ( - ArrowBuf keyBuf = allocator.buffer(keyBytes.length); - ArrowBuf valueBuf = allocator.buffer(valueBytes.length); - ) { - mapWriter.startEntry(); - keyBuf.writeBytes(keyBytes); - valueBuf.writeBytes(valueBytes); - mapWriter.key().varChar().writeVarChar(0, keyBytes.length, keyBuf); - mapWriter.value().varChar().writeVarChar(0, valueBytes.length, valueBuf); - mapWriter.endEntry(); - } - } else { - mapWriter.writeNull(); - } - }); - mapWriter.endMap(); - }; + TriConsumer> setValue = + (mapVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionMapWriter mapWriter = mapVector.getWriter(); + mapWriter.setPosition(index); + mapWriter.startMap(); + values + .entrySet() + .forEach( + mapValue -> { + if (mapValue != null) { + byte[] keyBytes = mapValue.getKey().getBytes(StandardCharsets.UTF_8); + byte[] valueBytes = mapValue.getValue().getBytes(StandardCharsets.UTF_8); + try (ArrowBuf keyBuf = allocator.buffer(keyBytes.length); + ArrowBuf valueBuf = allocator.buffer(valueBytes.length); ) { + mapWriter.startEntry(); + keyBuf.writeBytes(keyBytes); + valueBuf.writeBytes(valueBytes); + mapWriter.key().varChar().writeVarChar(0, keyBytes.length, keyBuf); + mapWriter.value().varChar().writeVarChar(0, valueBytes.length, valueBuf); + mapWriter.endEntry(); + } + } else { + mapWriter.writeNull(); + } + }); + mapWriter.endMap(); + }; JsonStringHashMap value1 = new JsonStringHashMap(); value1.put("a", "b"); @@ -514,28 +623,34 @@ void mapOfString() throws SQLException { JsonStringHashMap value3 = new JsonStringHashMap(); value3.put("y", "z"); value3.put("arrow", "cool"); - List> values = Arrays.asList(value1, value2, value3, Collections.emptyMap()); - testMapType(new ArrowType.Map(true), setValue, MapVector::setNull, values, new ArrowType.Utf8()); + List> values = + Arrays.asList(value1, value2, value3, Collections.emptyMap()); + testMapType( + new ArrowType.Map(true), setValue, MapVector::setNull, values, new ArrowType.Utf8()); } @Test void mapOfInteger() throws SQLException { - TriConsumer> setValue = (mapVector, index, values) -> { - org.apache.arrow.vector.complex.impl.UnionMapWriter mapWriter = mapVector.getWriter(); - mapWriter.setPosition(index); - mapWriter.startMap(); - values.entrySet().forEach(mapValue -> { - if (mapValue != null) { - mapWriter.startEntry(); - mapWriter.key().integer().writeInt(mapValue.getKey()); - mapWriter.value().integer().writeInt(mapValue.getValue()); - mapWriter.endEntry(); - } else { - mapWriter.writeNull(); - } - }); - mapWriter.endMap(); - }; + TriConsumer> setValue = + (mapVector, index, values) -> { + org.apache.arrow.vector.complex.impl.UnionMapWriter mapWriter = mapVector.getWriter(); + mapWriter.setPosition(index); + mapWriter.startMap(); + values + .entrySet() + .forEach( + mapValue -> { + if (mapValue != null) { + mapWriter.startEntry(); + mapWriter.key().integer().writeInt(mapValue.getKey()); + mapWriter.value().integer().writeInt(mapValue.getValue()); + mapWriter.endEntry(); + } else { + mapWriter.writeNull(); + } + }); + mapWriter.endMap(); + }; JsonStringHashMap value1 = new JsonStringHashMap(); value1.put(1, 2); @@ -547,8 +662,10 @@ void mapOfInteger() throws SQLException { JsonStringHashMap value3 = new JsonStringHashMap(); value3.put(Integer.MIN_VALUE, Integer.MAX_VALUE); value3.put(0, 4096); - List> values = Arrays.asList(value1, value2, value3, Collections.emptyMap()); - testMapType(new ArrowType.Map(true), setValue, MapVector::setNull, values, new ArrowType.Int(32, true)); + List> values = + Arrays.asList(value1, value2, value3, Collections.emptyMap()); + testMapType( + new ArrowType.Map(true), setValue, MapVector::setNull, values, new ArrowType.Int(32, true)); } @FunctionalInterface @@ -556,11 +673,16 @@ interface TriConsumer { void accept(T value1, U value2, V value3); } - void testSimpleType(ArrowType arrowType, int jdbcType, TriConsumer setValue, - BiConsumer setNull, List values) throws SQLException { + void testSimpleType( + ArrowType arrowType, + int jdbcType, + TriConsumer setValue, + BiConsumer setNull, + List values) + throws SQLException { Schema schema = new Schema(Collections.singletonList(Field.nullable("field", arrowType))); try (final MockPreparedStatement statement = new MockPreparedStatement(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { final JdbcParameterBinder binder = JdbcParameterBinder.builder(statement, root).bindAll().build(); assertThat(binder.next()).isFalse(); @@ -610,7 +732,7 @@ void testSimpleType(ArrowType arrowType, int jdbcType // Non-nullable (since some types have a specialized binder) schema = new Schema(Collections.singletonList(Field.notNullable("field", arrowType))); try (final MockPreparedStatement statement = new MockPreparedStatement(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { final JdbcParameterBinder binder = JdbcParameterBinder.builder(statement, root).bindAll().build(); assertThat(binder.next()).isFalse(); @@ -650,15 +772,23 @@ void testSimpleType(ArrowType arrowType, int jdbcType } } - void testListType(ArrowType arrowType, TriConsumer setValue, - BiConsumer setNull, List values) throws SQLException { + void testListType( + ArrowType arrowType, + TriConsumer setValue, + BiConsumer setNull, + List values) + throws SQLException { int jdbcType = Types.ARRAY; - Schema schema = new Schema(Collections.singletonList(new Field("field", FieldType.nullable( - new ArrowType.List()), Collections.singletonList( - new Field("element", FieldType.notNullable(arrowType), null) - )))); + Schema schema = + new Schema( + Collections.singletonList( + new Field( + "field", + FieldType.nullable(new ArrowType.List()), + Collections.singletonList( + new Field("element", FieldType.notNullable(arrowType), null))))); try (final MockPreparedStatement statement = new MockPreparedStatement(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { final JdbcParameterBinder binder = JdbcParameterBinder.builder(statement, root).bindAll().build(); assertThat(binder.next()).isFalse(); @@ -706,12 +836,16 @@ void testListType(ArrowType arrowType, TriConsumer void testListType(ArrowType arrowType, TriConsumer void testMapType(ArrowType arrowType, TriConsumer setValue, - BiConsumer setNull, List values, - ArrowType elementType) throws SQLException { + void testMapType( + ArrowType arrowType, + TriConsumer setValue, + BiConsumer setNull, + List values, + ArrowType elementType) + throws SQLException { int jdbcType = Types.VARCHAR; FieldType keyType = new FieldType(false, elementType, null, null); FieldType mapType = new FieldType(false, ArrowType.Struct.INSTANCE, null, null); - Schema schema = new Schema(Collections.singletonList(new Field("field", FieldType.nullable(arrowType), - Collections.singletonList(new Field(MapVector.KEY_NAME, mapType, - Arrays.asList(new Field(MapVector.KEY_NAME, keyType, null), - new Field(MapVector.VALUE_NAME, keyType, null))))))); + Schema schema = + new Schema( + Collections.singletonList( + new Field( + "field", + FieldType.nullable(arrowType), + Collections.singletonList( + new Field( + MapVector.KEY_NAME, + mapType, + Arrays.asList( + new Field(MapVector.KEY_NAME, keyType, null), + new Field(MapVector.VALUE_NAME, keyType, null))))))); try (final MockPreparedStatement statement = new MockPreparedStatement(); - final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { + final VectorSchemaRoot root = VectorSchemaRoot.create(schema, allocator)) { final JdbcParameterBinder binder = JdbcParameterBinder.builder(statement, root).bindAll().build(); assertThat(binder.next()).isFalse(); @@ -810,18 +957,31 @@ void testMapType(ArrowType arrowType, TriConsumer metadata) { + private static Field field( + String name, boolean nullable, ArrowType type, Map metadata) { return new Field(name, new FieldType(nullable, type, null, metadata), Collections.emptyList()); } @@ -90,16 +91,26 @@ private static Map metadata(String... entries) { public void schemaComment() throws Exception { boolean includeMetadata = false; Schema schema = getSchemaWithCommentFromQuery(includeMetadata); - Schema expectedSchema = new Schema(Arrays.asList( - field("ID", false, Types.MinorType.BIGINT.getType(), - metadata("comment", "Record identifier")), - field("NAME", true, Types.MinorType.VARCHAR.getType(), - metadata("comment", "Name of record")), - field("COLUMN1", true, Types.MinorType.BIT.getType(), - metadata()), - field("COLUMNN", true, Types.MinorType.INT.getType(), - metadata("comment", "Informative description of columnN")) - ), metadata("comment", "This is super special table with valuable data")); + Schema expectedSchema = + new Schema( + Arrays.asList( + field( + "ID", + false, + Types.MinorType.BIGINT.getType(), + metadata("comment", "Record identifier")), + field( + "NAME", + true, + Types.MinorType.VARCHAR.getType(), + metadata("comment", "Name of record")), + field("COLUMN1", true, Types.MinorType.BIT.getType(), metadata()), + field( + "COLUMNN", + true, + Types.MinorType.INT.getType(), + metadata("comment", "Informative description of columnN"))), + metadata("comment", "This is super special table with valuable data")); assertThat(schema).isEqualTo(expectedSchema); } @@ -107,47 +118,60 @@ public void schemaComment() throws Exception { public void schemaCommentWithDatabaseMetadata() throws Exception { boolean includeMetadata = true; Schema schema = getSchemaWithCommentFromQuery(includeMetadata); - Schema expectedSchema = new Schema(Arrays.asList( - field("ID", false, Types.MinorType.BIGINT.getType(), - metadata( - "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", - "SQL_SCHEMA_NAME", "PUBLIC", - "SQL_TABLE_NAME", "TABLE1", - "SQL_COLUMN_NAME", "ID", - "SQL_TYPE", "BIGINT", - "comment", "Record identifier" - )), - field("NAME", true, Types.MinorType.VARCHAR.getType(), - metadata( - "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", - "SQL_SCHEMA_NAME", "PUBLIC", - "SQL_TABLE_NAME", "TABLE1", - "SQL_COLUMN_NAME", "NAME", - "SQL_TYPE", "CHARACTER VARYING", - "comment", "Name of record")), - field("COLUMN1", true, Types.MinorType.BIT.getType(), - metadata( - "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", - "SQL_SCHEMA_NAME", "PUBLIC", - "SQL_TABLE_NAME", "TABLE1", - "SQL_COLUMN_NAME", "COLUMN1", - "SQL_TYPE", "BOOLEAN")), - field("COLUMNN", true, Types.MinorType.INT.getType(), - metadata( - "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", - "SQL_SCHEMA_NAME", "PUBLIC", - "SQL_TABLE_NAME", "TABLE1", - "SQL_COLUMN_NAME", "COLUMNN", - "SQL_TYPE", "INTEGER", - "comment", "Informative description of columnN")) - ), metadata("comment", "This is super special table with valuable data")); + Schema expectedSchema = + new Schema( + Arrays.asList( + field( + "ID", + false, + Types.MinorType.BIGINT.getType(), + metadata( + "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", + "SQL_SCHEMA_NAME", "PUBLIC", + "SQL_TABLE_NAME", "TABLE1", + "SQL_COLUMN_NAME", "ID", + "SQL_TYPE", "BIGINT", + "comment", "Record identifier")), + field( + "NAME", + true, + Types.MinorType.VARCHAR.getType(), + metadata( + "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", + "SQL_SCHEMA_NAME", "PUBLIC", + "SQL_TABLE_NAME", "TABLE1", + "SQL_COLUMN_NAME", "NAME", + "SQL_TYPE", "CHARACTER VARYING", + "comment", "Name of record")), + field( + "COLUMN1", + true, + Types.MinorType.BIT.getType(), + metadata( + "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", + "SQL_SCHEMA_NAME", "PUBLIC", + "SQL_TABLE_NAME", "TABLE1", + "SQL_COLUMN_NAME", "COLUMN1", + "SQL_TYPE", "BOOLEAN")), + field( + "COLUMNN", + true, + Types.MinorType.INT.getType(), + metadata( + "SQL_CATALOG_NAME", "JDBCTOARROWTEST?CHARACTERENCODING=UTF-8", + "SQL_SCHEMA_NAME", "PUBLIC", + "SQL_TABLE_NAME", "TABLE1", + "SQL_COLUMN_NAME", "COLUMNN", + "SQL_TYPE", "INTEGER", + "comment", "Informative description of columnN"))), + metadata("comment", "This is super special table with valuable data")); assertThat(schema).isEqualTo(expectedSchema); /* corresponding Apache Spark DDL after conversion: - ID BIGINT NOT NULL COMMENT 'Record identifier', - NAME STRING COMMENT 'Name of record', - COLUMN1 BOOLEAN, - COLUMNN INT COMMENT 'Informative description of columnN' - */ + ID BIGINT NOT NULL COMMENT 'Record identifier', + NAME STRING COMMENT 'Name of record', + COLUMN1 BOOLEAN, + COLUMNN INT COMMENT 'Informative description of columnN' + */ assertThat(schema).isEqualTo(expectedSchema); } @@ -156,19 +180,25 @@ private Schema getSchemaWithCommentFromQuery(boolean includeMetadata) throws SQL try (Statement statement = conn.createStatement()) { try (ResultSet resultSet = statement.executeQuery("select * from table1")) { ResultSetMetaData resultSetMetaData = resultSet.getMetaData(); - Map> columnCommentByColumnIndex = getColumnComments(metaData, resultSetMetaData); + Map> columnCommentByColumnIndex = + getColumnComments(metaData, resultSetMetaData); String tableName = getTableNameFromResultSetMetaData(resultSetMetaData); String tableComment = getTableComment(metaData, tableName); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder() - .setAllocator(new RootAllocator()).setSchemaMetadata(Collections.singletonMap(COMMENT, tableComment)) - .setColumnMetadataByColumnIndex(columnCommentByColumnIndex).setIncludeMetadata(includeMetadata).build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder() + .setAllocator(new RootAllocator()) + .setSchemaMetadata(Collections.singletonMap(COMMENT, tableComment)) + .setColumnMetadataByColumnIndex(columnCommentByColumnIndex) + .setIncludeMetadata(includeMetadata) + .build(); return JdbcToArrowUtils.jdbcToArrowSchema(resultSetMetaData, config); } } } - private String getTableNameFromResultSetMetaData(ResultSetMetaData resultSetMetaData) throws SQLException { + private String getTableNameFromResultSetMetaData(ResultSetMetaData resultSetMetaData) + throws SQLException { Set tablesFromQuery = new HashSet<>(); for (int idx = 1, columnCount = resultSetMetaData.getColumnCount(); idx <= columnCount; idx++) { String tableName = resultSetMetaData.getTableName(idx); @@ -182,11 +212,16 @@ private String getTableNameFromResultSetMetaData(ResultSetMetaData resultSetMeta throw new RuntimeException("Table metadata is absent or ambiguous"); } - private Map> getColumnComments(DatabaseMetaData metaData, - ResultSetMetaData resultSetMetaData) throws SQLException { + private Map> getColumnComments( + DatabaseMetaData metaData, ResultSetMetaData resultSetMetaData) throws SQLException { Map> columnCommentByColumnIndex = new HashMap<>(); - for (int columnIdx = 1, columnCount = resultSetMetaData.getColumnCount(); columnIdx <= columnCount; columnIdx++) { - String columnComment = getColumnComment(metaData, resultSetMetaData.getTableName(columnIdx), + for (int columnIdx = 1, columnCount = resultSetMetaData.getColumnCount(); + columnIdx <= columnCount; + columnIdx++) { + String columnComment = + getColumnComment( + metaData, + resultSetMetaData.getTableName(columnIdx), resultSetMetaData.getColumnName(columnIdx)); if (columnComment != null && !columnComment.isEmpty()) { columnCommentByColumnIndex.put(columnIdx, Collections.singletonMap(COMMENT, columnComment)); @@ -216,7 +251,8 @@ private String getTableComment(DatabaseMetaData metaData, String tableName) thro throw new RuntimeException("Table comment not found"); } - private String getColumnComment(DatabaseMetaData metaData, String tableName, String columnName) throws SQLException { + private String getColumnComment(DatabaseMetaData metaData, String tableName, String columnName) + throws SQLException { try (ResultSet tableMetadata = metaData.getColumns(null, null, tableName, columnName)) { if (tableMetadata.next()) { return tableMetadata.getString("REMARKS"); diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java index d4fb7c32997a7..85d6d89d036ff 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.junit.Assert.assertEquals; @@ -28,7 +27,6 @@ import java.util.HashMap; import java.util.Locale; import java.util.TimeZone; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.junit.Test; @@ -36,7 +34,8 @@ public class JdbcToArrowConfigTest { private static final BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE); - private static final Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT); + private static final Calendar calendar = + Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.ROOT); @Test(expected = NullPointerException.class) public void testConfigNullArguments() { @@ -116,13 +115,29 @@ public void testIncludeMetadata() { config = new JdbcToArrowConfigBuilder(allocator, calendar, true).build(); assertTrue(config.shouldIncludeMetadata()); - config = new JdbcToArrowConfig(allocator, calendar, /* include metadata */ true, - /* reuse vector schema root */ true, null, null, JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE, null); + config = + new JdbcToArrowConfig( + allocator, + calendar, /* include metadata */ + true, + /* reuse vector schema root */ true, + null, + null, + JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE, + null); assertTrue(config.shouldIncludeMetadata()); assertTrue(config.isReuseVectorSchemaRoot()); - config = new JdbcToArrowConfig(allocator, calendar, /* include metadata */ false, - /* reuse vector schema root */ false, null, null, JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE, null); + config = + new JdbcToArrowConfig( + allocator, + calendar, /* include metadata */ + false, + /* reuse vector schema root */ false, + null, + null, + JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE, + null); assertFalse(config.shouldIncludeMetadata()); assertFalse(config.isReuseVectorSchemaRoot()); } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java index 7dd881b3f7cec..375463d6fd5d4 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/JdbcToArrowTestHelper.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.junit.Assert.assertArrayEquals; @@ -22,6 +21,9 @@ import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; import java.math.BigDecimal; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; @@ -32,7 +34,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; - import org.apache.arrow.vector.BaseValueVector; import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.BitVector; @@ -58,12 +59,9 @@ import org.apache.arrow.vector.util.ObjectMapperFactory; import org.apache.arrow.vector.util.Text; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.core.type.TypeReference; -import com.fasterxml.jackson.databind.ObjectMapper; - /** - * This is a Helper class which has functionalities to read and assert the values from the given FieldVector object. + * This is a Helper class which has functionalities to read and assert the values from the given + * FieldVector object. */ public class JdbcToArrowTestHelper { @@ -79,7 +77,8 @@ public static void assertIntVectorValues(IntVector intVector, int rowCount, Inte } } - public static void assertBooleanVectorValues(BitVector bitVector, int rowCount, Boolean[] values) { + public static void assertBooleanVectorValues( + BitVector bitVector, int rowCount, Boolean[] values) { assertEquals(rowCount, bitVector.getValueCount()); for (int j = 0; j < bitVector.getValueCount(); j++) { @@ -103,7 +102,8 @@ public static void assertBitVectorValues(BitVector bitVector, int rowCount, Inte } } - public static void assertTinyIntVectorValues(TinyIntVector tinyIntVector, int rowCount, Integer[] values) { + public static void assertTinyIntVectorValues( + TinyIntVector tinyIntVector, int rowCount, Integer[] values) { assertEquals(rowCount, tinyIntVector.getValueCount()); for (int j = 0; j < tinyIntVector.getValueCount(); j++) { @@ -115,7 +115,8 @@ public static void assertTinyIntVectorValues(TinyIntVector tinyIntVector, int ro } } - public static void assertSmallIntVectorValues(SmallIntVector smallIntVector, int rowCount, Integer[] values) { + public static void assertSmallIntVectorValues( + SmallIntVector smallIntVector, int rowCount, Integer[] values) { assertEquals(rowCount, smallIntVector.getValueCount()); for (int j = 0; j < smallIntVector.getValueCount(); j++) { @@ -127,7 +128,8 @@ public static void assertSmallIntVectorValues(SmallIntVector smallIntVector, int } } - public static void assertBigIntVectorValues(BigIntVector bigIntVector, int rowCount, Long[] values) { + public static void assertBigIntVectorValues( + BigIntVector bigIntVector, int rowCount, Long[] values) { assertEquals(rowCount, bigIntVector.getValueCount()); for (int j = 0; j < bigIntVector.getValueCount(); j++) { @@ -139,7 +141,8 @@ public static void assertBigIntVectorValues(BigIntVector bigIntVector, int rowCo } } - public static void assertDecimalVectorValues(DecimalVector decimalVector, int rowCount, BigDecimal[] values) { + public static void assertDecimalVectorValues( + DecimalVector decimalVector, int rowCount, BigDecimal[] values) { assertEquals(rowCount, decimalVector.getValueCount()); for (int j = 0; j < decimalVector.getValueCount(); j++) { @@ -151,7 +154,8 @@ public static void assertDecimalVectorValues(DecimalVector decimalVector, int ro } } - public static void assertFloat8VectorValues(Float8Vector float8Vector, int rowCount, Double[] values) { + public static void assertFloat8VectorValues( + Float8Vector float8Vector, int rowCount, Double[] values) { assertEquals(rowCount, float8Vector.getValueCount()); for (int j = 0; j < float8Vector.getValueCount(); j++) { @@ -163,7 +167,8 @@ public static void assertFloat8VectorValues(Float8Vector float8Vector, int rowCo } } - public static void assertFloat4VectorValues(Float4Vector float4Vector, int rowCount, Float[] values) { + public static void assertFloat4VectorValues( + Float4Vector float4Vector, int rowCount, Float[] values) { assertEquals(rowCount, float4Vector.getValueCount()); for (int j = 0; j < float4Vector.getValueCount(); j++) { @@ -175,7 +180,8 @@ public static void assertFloat4VectorValues(Float4Vector float4Vector, int rowCo } } - public static void assertTimeVectorValues(TimeMilliVector timeMilliVector, int rowCount, Long[] values) { + public static void assertTimeVectorValues( + TimeMilliVector timeMilliVector, int rowCount, Long[] values) { assertEquals(rowCount, timeMilliVector.getValueCount()); for (int j = 0; j < timeMilliVector.getValueCount(); j++) { @@ -187,7 +193,8 @@ public static void assertTimeVectorValues(TimeMilliVector timeMilliVector, int r } } - public static void assertDateVectorValues(DateDayVector dateDayVector, int rowCount, Integer[] values) { + public static void assertDateVectorValues( + DateDayVector dateDayVector, int rowCount, Integer[] values) { assertEquals(rowCount, dateDayVector.getValueCount()); for (int j = 0; j < dateDayVector.getValueCount(); j++) { @@ -199,7 +206,8 @@ public static void assertDateVectorValues(DateDayVector dateDayVector, int rowCo } } - public static void assertTimeStampVectorValues(TimeStampVector timeStampVector, int rowCount, Long[] values) { + public static void assertTimeStampVectorValues( + TimeStampVector timeStampVector, int rowCount, Long[] values) { assertEquals(rowCount, timeStampVector.getValueCount()); for (int j = 0; j < timeStampVector.getValueCount(); j++) { @@ -211,7 +219,8 @@ public static void assertTimeStampVectorValues(TimeStampVector timeStampVector, } } - public static void assertVarBinaryVectorValues(VarBinaryVector varBinaryVector, int rowCount, byte[][] values) { + public static void assertVarBinaryVectorValues( + VarBinaryVector varBinaryVector, int rowCount, byte[][] values) { assertEquals(rowCount, varBinaryVector.getValueCount()); for (int j = 0; j < varBinaryVector.getValueCount(); j++) { @@ -223,7 +232,8 @@ public static void assertVarBinaryVectorValues(VarBinaryVector varBinaryVector, } } - public static void assertVarcharVectorValues(VarCharVector varCharVector, int rowCount, byte[][] values) { + public static void assertVarcharVectorValues( + VarCharVector varCharVector, int rowCount, byte[][] values) { assertEquals(rowCount, varCharVector.getValueCount()); for (int j = 0; j < varCharVector.getValueCount(); j++) { @@ -239,7 +249,8 @@ public static void assertNullVectorValues(NullVector vector, int rowCount) { assertEquals(rowCount, vector.getValueCount()); } - public static void assertListVectorValues(ListVector listVector, int rowCount, Integer[][] values) { + public static void assertListVectorValues( + ListVector listVector, int rowCount, Integer[][] values) { assertEquals(rowCount, listVector.getValueCount()); for (int j = 0; j < listVector.getValueCount(); j++) { @@ -252,7 +263,8 @@ public static void assertListVectorValues(ListVector listVector, int rowCount, I } } - public static void assertMapVectorValues(MapVector mapVector, int rowCount, Map[] values) { + public static void assertMapVectorValues( + MapVector mapVector, int rowCount, Map[] values) { assertEquals(rowCount, mapVector.getValueCount()); for (int j = 0; j < mapVector.getValueCount(); j++) { @@ -263,10 +275,17 @@ public static void assertMapVectorValues(MapVector mapVector, int rowCount, Map< (JsonStringArrayList>) mapVector.getObject(j); Map actualMap = null; if (actualSource != null && !actualSource.isEmpty()) { - actualMap = actualSource.stream().map(entry -> - new AbstractMap.SimpleEntry<>(entry.get("key").toString(), - entry.get("value") != null ? entry.get("value").toString() : null)) - .collect(HashMap::new, (collector, val) -> collector.put(val.getKey(), val.getValue()), HashMap::putAll); + actualMap = + actualSource.stream() + .map( + entry -> + new AbstractMap.SimpleEntry<>( + entry.get("key").toString(), + entry.get("value") != null ? entry.get("value").toString() : null)) + .collect( + HashMap::new, + (collector, val) -> collector.put(val.getKey(), val.getValue()), + HashMap::putAll); } assertEquals(values[j], actualMap); } @@ -310,8 +329,8 @@ public static void assertFieldMetadataIsEmpty(VectorSchemaRoot schema) { } } - public static void assertFieldMetadataMatchesResultSetMetadata(ResultSetMetaData rsmd, Schema schema) - throws SQLException { + public static void assertFieldMetadataMatchesResultSetMetadata( + ResultSetMetaData rsmd, Schema schema) throws SQLException { assertNotNull(schema); assertNotNull(schema.getFields()); assertNotNull(rsmd); @@ -400,12 +419,14 @@ public static byte[][] getCharArray(String[] values, String dataType) { byte[][] valueArr = new byte[dataArr.length][]; int i = 0; for (String data : dataArr) { - valueArr[i++] = "null".equals(data.trim()) ? null : data.trim().getBytes(StandardCharsets.UTF_8); + valueArr[i++] = + "null".equals(data.trim()) ? null : data.trim().getBytes(StandardCharsets.UTF_8); } return valueArr; } - public static byte[][] getCharArrayWithCharSet(String[] values, String dataType, Charset charSet) { + public static byte[][] getCharArrayWithCharSet( + String[] values, String dataType, Charset charSet) { String[] dataArr = getValues(values, dataType); byte[][] valueArr = new byte[dataArr.length][]; int i = 0; @@ -420,7 +441,8 @@ public static byte[][] getBinaryValues(String[] values, String dataType) { byte[][] valueArr = new byte[dataArr.length][]; int i = 0; for (String data : dataArr) { - valueArr[i++] = "null".equals(data.trim()) ? null : data.trim().getBytes(StandardCharsets.UTF_8); + valueArr[i++] = + "null".equals(data.trim()) ? null : data.trim().getBytes(StandardCharsets.UTF_8); } return valueArr; } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java index 4478cdfbee6f7..8dfc684e22f24 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/MockPreparedStatement.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import java.io.InputStream; @@ -231,8 +230,7 @@ public void setDate(int parameterIndex, Date x, Calendar cal) throws SQLExceptio } @Override - public void setTime(int parameterIndex, Time x, Calendar cal) throws SQLException { - } + public void setTime(int parameterIndex, Time x, Calendar cal) throws SQLException {} @Override public void setTimestamp(int parameterIndex, Timestamp x, Calendar cal) throws SQLException { @@ -242,8 +240,7 @@ public void setTimestamp(int parameterIndex, Timestamp x, Calendar cal) throws S } @Override - public void setNull(int parameterIndex, int sqlType, String typeName) throws SQLException { - } + public void setNull(int parameterIndex, int sqlType, String typeName) throws SQLException {} @Override public void setURL(int parameterIndex, URL x) throws SQLException { @@ -261,80 +258,62 @@ public void setRowId(int parameterIndex, RowId x) throws SQLException { } @Override - public void setNString(int parameterIndex, String value) throws SQLException { - } + public void setNString(int parameterIndex, String value) throws SQLException {} @Override public void setNCharacterStream(int parameterIndex, Reader value, long length) - throws SQLException { - } + throws SQLException {} @Override - public void setNClob(int parameterIndex, NClob value) throws SQLException { - } + public void setNClob(int parameterIndex, NClob value) throws SQLException {} @Override - public void setClob(int parameterIndex, Reader reader, long length) throws SQLException { - } + public void setClob(int parameterIndex, Reader reader, long length) throws SQLException {} @Override public void setBlob(int parameterIndex, InputStream inputStream, long length) - throws SQLException { - } + throws SQLException {} @Override - public void setNClob(int parameterIndex, Reader reader, long length) throws SQLException { - } + public void setNClob(int parameterIndex, Reader reader, long length) throws SQLException {} @Override - public void setSQLXML(int parameterIndex, SQLXML xmlObject) throws SQLException { - } + public void setSQLXML(int parameterIndex, SQLXML xmlObject) throws SQLException {} @Override public void setObject(int parameterIndex, Object x, int targetSqlType, int scaleOrLength) - throws SQLException { - } + throws SQLException {} @Override - public void setAsciiStream(int parameterIndex, InputStream x, long length) throws SQLException { - } + public void setAsciiStream(int parameterIndex, InputStream x, long length) throws SQLException {} @Override - public void setBinaryStream(int parameterIndex, InputStream x, long length) throws SQLException { - } + public void setBinaryStream(int parameterIndex, InputStream x, long length) throws SQLException {} @Override public void setCharacterStream(int parameterIndex, Reader reader, long length) - throws SQLException { - } + throws SQLException {} @Override - public void setAsciiStream(int parameterIndex, InputStream x) throws SQLException { - } + public void setAsciiStream(int parameterIndex, InputStream x) throws SQLException {} @Override - public void setBinaryStream(int parameterIndex, InputStream x) throws SQLException { - } + public void setBinaryStream(int parameterIndex, InputStream x) throws SQLException {} @Override - public void setCharacterStream(int parameterIndex, Reader reader) throws SQLException { - } + public void setCharacterStream(int parameterIndex, Reader reader) throws SQLException {} @Override - public void setNCharacterStream(int parameterIndex, Reader value) throws SQLException { - } + public void setNCharacterStream(int parameterIndex, Reader value) throws SQLException {} @Override - public void setClob(int parameterIndex, Reader reader) throws SQLException { - } + public void setClob(int parameterIndex, Reader reader) throws SQLException {} @Override - public void setBlob(int parameterIndex, InputStream inputStream) throws SQLException { - } + public void setBlob(int parameterIndex, InputStream inputStream) throws SQLException {} @Override - public void setNClob(int parameterIndex, Reader reader) throws SQLException { - } + public void setNClob(int parameterIndex, Reader reader) throws SQLException {} @Override public ResultSet executeQuery(String sql) throws SQLException { @@ -347,8 +326,7 @@ public int executeUpdate(String sql) throws SQLException { } @Override - public void close() throws SQLException { - } + public void close() throws SQLException {} @Override public int getMaxFieldSize() throws SQLException { diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java index b05a59a9a04d8..5f5f6dcb98d43 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import java.io.InputStream; @@ -48,13 +47,11 @@ public class ResultSetUtility { public static ResultSet generateEmptyResultSet() throws SQLException { MockDataElement element = new MockDataElement("string_example"); MockResultSetMetaData.MockColumnMetaData columnMetaData = - MockResultSetMetaData.MockColumnMetaData.fromDataElement(element, 1); + MockResultSetMetaData.MockColumnMetaData.fromDataElement(element, 1); ArrayList cols = new ArrayList<>(); cols.add(columnMetaData); ResultSetMetaData metadata = new MockResultSetMetaData(cols); - return MockResultSet.builder() - .setMetaData(metadata) - .build(); + return MockResultSet.builder().setMetaData(metadata).build(); } public static MockResultSet generateBasicResultSet(int rows) throws SQLException { @@ -319,15 +316,19 @@ public String getColumnTypeName(int column) throws SQLException { } public static MockResultSetMetaData fromRows(List rows) throws SQLException { - // Note: This attempts to dynamically construct ResultSetMetaData from the first row in a given result set. - // If there are now rows, or the result set contains no columns, this cannot be dynamically generated and + // Note: This attempts to dynamically construct ResultSetMetaData from the first row in a + // given result set. + // If there are now rows, or the result set contains no columns, this cannot be dynamically + // generated and // an exception will be thrown. if (rows.size() == 0) { - throw new SQLException("Unable to dynamically generate ResultSetMetaData because row count is zero!"); + throw new SQLException( + "Unable to dynamically generate ResultSetMetaData because row count is zero!"); } MockRow firstRow = rows.get(0); if (firstRow.dataElements.size() == 0) { - throw new SQLException("Unable to dynamically generate ResultSetMetaData because column count is zero!"); + throw new SQLException( + "Unable to dynamically generate ResultSetMetaData because column count is zero!"); } ArrayList columns = new ArrayList<>(); for (int i = 0; i < firstRow.dataElements.size(); i++) { @@ -346,9 +347,7 @@ public static class MockColumnMetaData { private String typeName; private int displaySize; - - private MockColumnMetaData() { - } + private MockColumnMetaData() {} private String getLabel() { return label; @@ -382,16 +381,17 @@ private int getDisplaySize() { return displaySize; } - public static MockColumnMetaData fromDataElement(MockDataElement element, int i) throws SQLException { + public static MockColumnMetaData fromDataElement(MockDataElement element, int i) + throws SQLException { return MockColumnMetaData.builder() - .sqlType(element.getSqlType()) - .precision(element.getPrecision()) - .scale(element.getScale()) - .nullable(element.isNullable()) - .setTypeName("TYPE") - .setDisplaySize(420) - .label("col_" + i) - .build(); + .sqlType(element.getSqlType()) + .precision(element.getPrecision()) + .scale(element.getScale()) + .nullable(element.isNullable()) + .setTypeName("TYPE") + .setDisplaySize(420) + .label("col_" + i) + .build(); } public static Builder builder() { @@ -440,9 +440,7 @@ public MockColumnMetaData build() { return this.columnMetaData; } } - } - } public static class MockRow { @@ -635,7 +633,6 @@ public short getShort() throws SQLException { } } - public static class ThrowingResultSet implements ResultSet { @Override @@ -1139,17 +1136,20 @@ public void updateTimestamp(String columnLabel, Timestamp x) throws SQLException } @Override - public void updateAsciiStream(String columnLabel, InputStream x, int length) throws SQLException { + public void updateAsciiStream(String columnLabel, InputStream x, int length) + throws SQLException { throw getExceptionToThrow(); } @Override - public void updateBinaryStream(String columnLabel, InputStream x, int length) throws SQLException { + public void updateBinaryStream(String columnLabel, InputStream x, int length) + throws SQLException { throw getExceptionToThrow(); } @Override - public void updateCharacterStream(String columnLabel, Reader reader, int length) throws SQLException { + public void updateCharacterStream(String columnLabel, Reader reader, int length) + throws SQLException { throw getExceptionToThrow(); } @@ -1439,7 +1439,8 @@ public void updateNCharacterStream(int columnIndex, Reader x, long length) throw } @Override - public void updateNCharacterStream(String columnLabel, Reader reader, long length) throws SQLException { + public void updateNCharacterStream(String columnLabel, Reader reader, long length) + throws SQLException { throw getExceptionToThrow(); } @@ -1449,7 +1450,8 @@ public void updateAsciiStream(int columnIndex, InputStream x, long length) throw } @Override - public void updateBinaryStream(int columnIndex, InputStream x, long length) throws SQLException { + public void updateBinaryStream(int columnIndex, InputStream x, long length) + throws SQLException { throw getExceptionToThrow(); } @@ -1459,27 +1461,32 @@ public void updateCharacterStream(int columnIndex, Reader x, long length) throws } @Override - public void updateAsciiStream(String columnLabel, InputStream x, long length) throws SQLException { + public void updateAsciiStream(String columnLabel, InputStream x, long length) + throws SQLException { throw getExceptionToThrow(); } @Override - public void updateBinaryStream(String columnLabel, InputStream x, long length) throws SQLException { + public void updateBinaryStream(String columnLabel, InputStream x, long length) + throws SQLException { throw getExceptionToThrow(); } @Override - public void updateCharacterStream(String columnLabel, Reader reader, long length) throws SQLException { + public void updateCharacterStream(String columnLabel, Reader reader, long length) + throws SQLException { throw getExceptionToThrow(); } @Override - public void updateBlob(int columnIndex, InputStream inputStream, long length) throws SQLException { + public void updateBlob(int columnIndex, InputStream inputStream, long length) + throws SQLException { throw getExceptionToThrow(); } @Override - public void updateBlob(String columnLabel, InputStream inputStream, long length) throws SQLException { + public void updateBlob(String columnLabel, InputStream inputStream, long length) + throws SQLException { throw getExceptionToThrow(); } @@ -1584,13 +1591,14 @@ public T getObject(String columnLabel, Class type) throws SQLException { } @Override - public void updateObject(int columnIndex, Object x, SQLType targetSqlType, int scaleOrLength) throws SQLException { + public void updateObject(int columnIndex, Object x, SQLType targetSqlType, int scaleOrLength) + throws SQLException { throw getExceptionToThrow(); } @Override public void updateObject(String columnLabel, Object x, SQLType targetSqlType, int scaleOrLength) - throws SQLException { + throws SQLException { throw getExceptionToThrow(); } @@ -1600,7 +1608,8 @@ public void updateObject(int columnIndex, Object x, SQLType targetSqlType) throw } @Override - public void updateObject(String columnLabel, Object x, SQLType targetSqlType) throws SQLException { + public void updateObject(String columnLabel, Object x, SQLType targetSqlType) + throws SQLException { throw getExceptionToThrow(); } @@ -1623,7 +1632,6 @@ private static SQLException getExceptionToThrow(String message) { return new SQLException(message); } - public static class ThrowingResultSetMetaData implements ResultSetMetaData { @Override public int getColumnCount() throws SQLException { diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtilityTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtilityTest.java index 2424ed625248d..8af2c06f4de54 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtilityTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtilityTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.junit.Assert.assertEquals; @@ -26,7 +25,6 @@ import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Types; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.VectorSchemaRoot; @@ -36,20 +34,24 @@ public class ResultSetUtilityTest { @Test public void testZeroRowResultSet() throws Exception { - for (boolean reuseVectorSchemaRoot : new boolean[]{false, true}) { + for (boolean reuseVectorSchemaRoot : new boolean[] {false, true}) { try (BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE)) { ResultSet rs = ResultSetUtility.generateEmptyResultSet(); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .build(); ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config); assertTrue("Iterator on zero row ResultSet should haveNext() before use", iter.hasNext()); VectorSchemaRoot root = iter.next(); assertNotNull("VectorSchemaRoot from first next() result should never be null", root); - assertEquals("VectorSchemaRoot from empty ResultSet should have zero rows", 0, root.getRowCount()); - assertFalse("hasNext() should return false on empty ResultSets after initial next() call", iter.hasNext()); + assertEquals( + "VectorSchemaRoot from empty ResultSet should have zero rows", 0, root.getRowCount()); + assertFalse( + "hasNext() should return false on empty ResultSets after initial next() call", + iter.hasNext()); } } } @@ -99,7 +101,8 @@ public void testBasicResultSet() throws Exception { @Test public void testMockDataTypes() throws SQLException { - ResultSetUtility.MockDataElement element = new ResultSetUtility.MockDataElement(1L, Types.NUMERIC); + ResultSetUtility.MockDataElement element = + new ResultSetUtility.MockDataElement(1L, Types.NUMERIC); assertEquals(1L, element.getLong()); assertEquals(1, element.getInt()); assertEquals("1", element.getString()); diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java index 50c4fe6db2a14..7fa8188a99158 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/Table.java @@ -14,17 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import java.math.BigDecimal; import java.nio.charset.StandardCharsets; -import com.fasterxml.jackson.annotation.JsonIgnoreProperties; - -/** - * POJO to handle the YAML data from the test YAML file. - */ +/** POJO to handle the YAML data from the test YAML file. */ @JsonIgnoreProperties(ignoreUnknown = true) public class Table { private String name; @@ -39,8 +35,7 @@ public class Table { private String[] vectors; private int rowCount; - public Table() { - } + public Table() {} public String getName() { return name; diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/UnreliableMetaDataTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/UnreliableMetaDataTest.java index 053604073fd66..93ba028e39629 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/UnreliableMetaDataTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/UnreliableMetaDataTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc; import static org.junit.Assert.assertEquals; @@ -34,7 +33,6 @@ import java.util.Collections; import java.util.HashMap; import java.util.Map; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -48,9 +46,7 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -/** - * Test options for dealing with unreliable ResultSetMetaData from JDBC drivers. - */ +/** Test options for dealing with unreliable ResultSetMetaData from JDBC drivers. */ @RunWith(Parameterized.class) public class UnreliableMetaDataTest { private final boolean reuseVectorSchemaRoot; @@ -72,7 +68,7 @@ public void afterEach() { @Parameterized.Parameters(name = "reuseVectorSchemaRoot = {0}") public static Collection getTestData() { - return Arrays.asList(new Object[][] { {false}, {true} }); + return Arrays.asList(new Object[][] {{false}, {true}}); } @Test @@ -91,13 +87,15 @@ public void testUnreliableMetaDataPrecisionAndScale() throws Exception { // reset the ResultSet: rs.beforeFirst(); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .build(); try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { assertTrue(iter.hasNext()); - assertThrows(RuntimeException.class, iter::next, "Expected to fail due to mismatched metadata!"); + assertThrows( + RuntimeException.class, iter::next, "Expected to fail due to mismatched metadata!"); } // reset the ResultSet: @@ -105,11 +103,12 @@ public void testUnreliableMetaDataPrecisionAndScale() throws Exception { JdbcFieldInfo explicitMappingField = new JdbcFieldInfo(Types.DECIMAL, 18, 2); Map explicitMapping = new HashMap<>(); explicitMapping.put(1, explicitMappingField); - config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setExplicitTypesByColumnIndex(explicitMapping) - .build(); + config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setExplicitTypesByColumnIndex(explicitMapping) + .build(); try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { while (iter.hasNext()) { @@ -133,7 +132,8 @@ public void testInconsistentPrecisionAndScale() throws Exception { assertEquals("Value precision should be 18", 18, bd1.precision()); rs.next(); BigDecimal bd2 = rs.getBigDecimal(1); - assertEquals("Value should be 1000000000300.0000001", new BigDecimal("1000000000300.0000001"), bd2); + assertEquals( + "Value should be 1000000000300.0000001", new BigDecimal("1000000000300.0000001"), bd2); assertEquals("Value scale should be 7", 7, bd2.scale()); assertEquals("Value precision should be 20", 20, bd2.precision()); rs.beforeFirst(); @@ -141,23 +141,27 @@ public void testInconsistentPrecisionAndScale() throws Exception { Map explicitMapping = new HashMap<>(); explicitMapping.put(1, explicitMappingField); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setExplicitTypesByColumnIndex(explicitMapping) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setExplicitTypesByColumnIndex(explicitMapping) + .build(); try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { assertTrue(iter.hasNext()); - assertThrows(RuntimeException.class, iter::next, + assertThrows( + RuntimeException.class, + iter::next, "This is expected to fail due to inconsistent BigDecimal scales, while strict matching is enabled."); } // Reuse same ResultSet, with RoundingMode.UNNECESSARY set to coerce BigDecimal scale as needed: - config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setExplicitTypesByColumnIndex(explicitMapping) - .setBigDecimalRoundingMode(RoundingMode.UNNECESSARY) - .build(); + config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setExplicitTypesByColumnIndex(explicitMapping) + .setBigDecimalRoundingMode(RoundingMode.UNNECESSARY) + .build(); try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { while (iter.hasNext()) { VectorSchemaRoot root = iter.next(); @@ -174,23 +178,29 @@ public void testIncorrectNullability() throws Exception { .sqlType(Types.INTEGER) .nullable(ResultSetMetaData.columnNoNulls) .build(); - ResultSetMetaData metadata = new ResultSetUtility.MockResultSetMetaData(Collections.singletonList(columnMetaData)); - final ResultSetUtility.MockResultSet.Builder resultSetBuilder = ResultSetUtility.MockResultSet.builder() - .setMetaData(metadata) - .addDataElement(new ResultSetUtility.MockDataElement(1024, Types.INTEGER)) - .finishRow() - .addDataElement(new ResultSetUtility.MockDataElement(null, Types.INTEGER)) - .finishRow(); - final Schema notNullSchema = new Schema( - Collections.singletonList(Field.notNullable(/*name=*/null, new ArrowType.Int(32, true)))); - final Schema nullSchema = new Schema( - Collections.singletonList(Field.nullable(/*name=*/null, new ArrowType.Int(32, true)))); + ResultSetMetaData metadata = + new ResultSetUtility.MockResultSetMetaData(Collections.singletonList(columnMetaData)); + final ResultSetUtility.MockResultSet.Builder resultSetBuilder = + ResultSetUtility.MockResultSet.builder() + .setMetaData(metadata) + .addDataElement(new ResultSetUtility.MockDataElement(1024, Types.INTEGER)) + .finishRow() + .addDataElement(new ResultSetUtility.MockDataElement(null, Types.INTEGER)) + .finishRow(); + final Schema notNullSchema = + new Schema( + Collections.singletonList( + Field.notNullable(/*name=*/ null, new ArrowType.Int(32, true)))); + final Schema nullSchema = + new Schema( + Collections.singletonList(Field.nullable(/*name=*/ null, new ArrowType.Int(32, true)))); try (final ResultSet rs = resultSetBuilder.build()) { - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .build(); try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { assertTrue(iter.hasNext()); final VectorSchemaRoot root = iter.next(); @@ -208,14 +218,16 @@ public void testIncorrectNullability() throws Exception { // Override the nullability to get the correct result final Map typeMapping = new HashMap<>(); - JdbcFieldInfo realFieldInfo = new JdbcFieldInfo( - Types.INTEGER, ResultSetMetaData.columnNullable, /*precision*/0, /*scale*/0); + JdbcFieldInfo realFieldInfo = + new JdbcFieldInfo( + Types.INTEGER, ResultSetMetaData.columnNullable, /*precision*/ 0, /*scale*/ 0); typeMapping.put(1, realFieldInfo); - config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setExplicitTypesByColumnIndex(typeMapping) - .build(); + config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setExplicitTypesByColumnIndex(typeMapping) + .build(); try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { assertTrue(iter.hasNext()); final VectorSchemaRoot root = iter.next(); @@ -231,14 +243,16 @@ public void testIncorrectNullability() throws Exception { rs.beforeFirst(); // columnNullableUnknown won't override the metadata - realFieldInfo = new JdbcFieldInfo( - Types.INTEGER, ResultSetMetaData.columnNullableUnknown, /*precision*/0, /*scale*/0); + realFieldInfo = + new JdbcFieldInfo( + Types.INTEGER, ResultSetMetaData.columnNullableUnknown, /*precision*/ 0, /*scale*/ 0); typeMapping.put(1, realFieldInfo); - config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setExplicitTypesByColumnIndex(typeMapping) - .build(); + config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setExplicitTypesByColumnIndex(typeMapping) + .build(); try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { assertTrue(iter.hasNext()); final VectorSchemaRoot root = iter.next(); @@ -266,8 +280,8 @@ private ResultSet buildIncorrectPrecisionAndScaleMetaDataResultSet() throws SQLE return ResultSetUtility.MockResultSet.builder() .setMetaData(metadata) .addDataElement( - new ResultSetUtility.MockDataElement(new BigDecimal("1000000000000000.01"), Types.DECIMAL) - ) + new ResultSetUtility.MockDataElement( + new BigDecimal("1000000000000000.01"), Types.DECIMAL)) .finishRow() .build(); } @@ -285,12 +299,12 @@ private ResultSet buildVaryingPrecisionAndScaleResultSet() throws SQLException { return ResultSetUtility.MockResultSet.builder() .setMetaData(metadata) .addDataElement( - new ResultSetUtility.MockDataElement(new BigDecimal("1000000000000000.01"), Types.DECIMAL) - ) + new ResultSetUtility.MockDataElement( + new BigDecimal("1000000000000000.01"), Types.DECIMAL)) .finishRow() .addDataElement( - new ResultSetUtility.MockDataElement(new BigDecimal("1000000000300.0000001"), Types.DECIMAL) - ) + new ResultSetUtility.MockDataElement( + new BigDecimal("1000000000300.0000001"), Types.DECIMAL)) .finishRow() .build(); } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/AbstractConsumerTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/AbstractConsumerTest.java index 96bac42214cef..6a25c58fbde7e 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/AbstractConsumerTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/AbstractConsumerTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import org.apache.arrow.memory.BufferAllocator; @@ -35,5 +34,4 @@ public void setUp() { public void tearDown() { allocator.close(); } - } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumerTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumerTest.java index a368023d49005..255770ecdbf6d 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumerTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/consumer/BinaryConsumerTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.consumer; import static org.junit.Assert.assertArrayEquals; @@ -23,7 +22,6 @@ import java.io.ByteArrayInputStream; import java.io.IOException; - import org.apache.arrow.vector.BaseValueVector; import org.apache.arrow.vector.VarBinaryVector; import org.junit.Test; @@ -37,7 +35,8 @@ interface InputStreamConsumer { void consume(BinaryConsumer consumer) throws IOException; } - protected void assertConsume(boolean nullable, InputStreamConsumer dataConsumer, byte[][] expect) throws IOException { + protected void assertConsume(boolean nullable, InputStreamConsumer dataConsumer, byte[][] expect) + throws IOException { try (final VarBinaryVector vector = new VarBinaryVector("binary", allocator)) { BinaryConsumer consumer = BinaryConsumer.createConsumer(vector, 0, nullable); dataConsumer.consume(consumer); @@ -61,51 +60,59 @@ private byte[] createBytes(int length) { return bytes; } - public void testConsumeInputStream(byte[][] values, boolean nullable) throws IOException { - assertConsume(nullable, binaryConsumer -> { - for (byte[] value : values) { - binaryConsumer.consume(new ByteArrayInputStream(value)); - binaryConsumer.moveWriterPosition(); - } - }, values); + assertConsume( + nullable, + binaryConsumer -> { + for (byte[] value : values) { + binaryConsumer.consume(new ByteArrayInputStream(value)); + binaryConsumer.moveWriterPosition(); + } + }, + values); } @Test public void testConsumeInputStream() throws IOException { - testConsumeInputStream(new byte[][]{ - createBytes(DEFAULT_RECORD_BYTE_COUNT) - }, false); - - testConsumeInputStream(new byte[][]{ - createBytes(DEFAULT_RECORD_BYTE_COUNT), - createBytes(DEFAULT_RECORD_BYTE_COUNT) - }, false); - - testConsumeInputStream(new byte[][]{ - createBytes(DEFAULT_RECORD_BYTE_COUNT * 2), - createBytes(DEFAULT_RECORD_BYTE_COUNT), - createBytes(DEFAULT_RECORD_BYTE_COUNT) - }, false); - - testConsumeInputStream(new byte[][]{ - createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT) - }, false); - - testConsumeInputStream(new byte[][]{ - createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT * 10), - }, false); - - testConsumeInputStream(new byte[][]{ - createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT), - createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT) - }, false); - - testConsumeInputStream(new byte[][]{ - createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT), - createBytes(DEFAULT_RECORD_BYTE_COUNT), - createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT) - }, false); + testConsumeInputStream(new byte[][] {createBytes(DEFAULT_RECORD_BYTE_COUNT)}, false); + + testConsumeInputStream( + new byte[][] { + createBytes(DEFAULT_RECORD_BYTE_COUNT), createBytes(DEFAULT_RECORD_BYTE_COUNT) + }, + false); + + testConsumeInputStream( + new byte[][] { + createBytes(DEFAULT_RECORD_BYTE_COUNT * 2), + createBytes(DEFAULT_RECORD_BYTE_COUNT), + createBytes(DEFAULT_RECORD_BYTE_COUNT) + }, + false); + + testConsumeInputStream( + new byte[][] {createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT)}, false); + + testConsumeInputStream( + new byte[][] { + createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT * 10), + }, + false); + + testConsumeInputStream( + new byte[][] { + createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT), + createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT) + }, + false); + + testConsumeInputStream( + new byte[][] { + createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT), + createBytes(DEFAULT_RECORD_BYTE_COUNT), + createBytes(INITIAL_VALUE_ALLOCATION * DEFAULT_RECORD_BYTE_COUNT) + }, + false); byte[][] testRecords = new byte[INITIAL_VALUE_ALLOCATION * 2][]; for (int i = 0; i < testRecords.length; i++) { @@ -113,5 +120,4 @@ public void testConsumeInputStream() throws IOException { } testConsumeInputStream(testRecords, false); } - } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java index d32c2bbab91a8..e22686e890580 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcAliasToArrowTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest.sqlToArrow; @@ -28,7 +27,6 @@ import java.sql.SQLException; import java.sql.Statement; import java.util.List; - import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.VectorSchemaRoot; import org.apache.arrow.vector.types.pojo.Field; @@ -40,10 +38,8 @@ public class JdbcAliasToArrowTest { private Connection conn = null; - private static final String CREATE_STATEMENT = - "CREATE TABLE example_table (id INTEGER);"; - private static final String INSERT_STATEMENT = - "INSERT INTO example_table (id) VALUES (?);"; + private static final String CREATE_STATEMENT = "CREATE TABLE example_table (id INTEGER);"; + private static final String INSERT_STATEMENT = "INSERT INTO example_table (id) VALUES (?);"; private static final String QUERY = "SELECT id as a, id as b FROM example_table;"; private static final String DROP_STATEMENT = "DROP TABLE example_table;"; private static final String ORIGINAL_COLUMN_NAME = "ID"; @@ -62,10 +58,9 @@ public void setUp() throws Exception { } /** - * Test h2 database query with alias for column name and column label. - * To verify reading field alias from an H2 database works as expected. - * If this test fails, something is either wrong with the setup, - * or the H2 SQL behavior changed. + * Test h2 database query with alias for column name and column label. To verify reading field + * alias from an H2 database works as expected. If this test fails, something is either wrong with + * the setup, or the H2 SQL behavior changed. */ @Test public void testReadH2Alias() throws Exception { @@ -96,8 +91,8 @@ public void testReadH2Alias() throws Exception { } /** - * Test jdbc query results with alias to arrow works expected. - * Arrow result schema name should be field alias name. + * Test jdbc query results with alias to arrow works expected. Arrow result schema name should be + * field alias name. */ @Test public void testJdbcAliasToArrow() throws Exception { @@ -105,8 +100,7 @@ public void testJdbcAliasToArrow() throws Exception { insertRows(rowCount); try (ResultSet resultSet = conn.createStatement().executeQuery(QUERY)) { - final VectorSchemaRoot vector = - sqlToArrow(resultSet, new RootAllocator(Integer.MAX_VALUE)); + final VectorSchemaRoot vector = sqlToArrow(resultSet, new RootAllocator(Integer.MAX_VALUE)); assertEquals(rowCount, vector.getRowCount()); Schema vectorSchema = vector.getSchema(); diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java index eabbdc5a25e5d..895dab52ca534 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowArrayTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest.sqlToArrow; @@ -34,7 +33,6 @@ import java.sql.Types; import java.util.HashMap; import java.util.Map; - import org.apache.arrow.adapter.jdbc.JdbcFieldInfo; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; @@ -54,11 +52,12 @@ public class JdbcToArrowArrayTest { private Connection conn = null; private static final String CREATE_STATEMENT = - "CREATE TABLE array_table (id INTEGER, int_array INTEGER ARRAY, float_array REAL ARRAY, " + - "string_array VARCHAR ARRAY);"; + "CREATE TABLE array_table (id INTEGER, int_array INTEGER ARRAY, float_array REAL ARRAY, " + + "string_array VARCHAR ARRAY);"; private static final String INSERT_STATEMENT = "INSERT INTO array_table (id, int_array, float_array, string_array) VALUES (?, ?, ?, ?);"; - private static final String QUERY = "SELECT int_array, float_array, string_array FROM array_table ORDER BY id;"; + private static final String QUERY = + "SELECT int_array, float_array, string_array FROM array_table ORDER BY id;"; private static final String DROP_STATEMENT = "DROP TABLE array_table;"; private static Map arrayFieldMapping; @@ -158,7 +157,8 @@ public void testJdbcToArrow() throws Exception { insertRows(rowCount, intArrays, floatArrays, strArrays); final JdbcToArrowConfigBuilder builder = - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), JdbcToArrowUtils.getUtcCalendar(), false); + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), JdbcToArrowUtils.getUtcCalendar(), false); builder.setArraySubTypeByColumnNameMap(arrayFieldMapping); final JdbcToArrowConfig config = builder.build(); @@ -168,9 +168,12 @@ public void testJdbcToArrow() throws Exception { assertEquals(rowCount, vector.getRowCount()); - assertIntegerVectorEquals((ListVector) vector.getVector(INT_ARRAY_FIELD_NAME), rowCount, intArrays); - assertFloatVectorEquals((ListVector) vector.getVector(FLOAT_ARRAY_FIELD_NAME), rowCount, floatArrays); - assertStringVectorEquals((ListVector) vector.getVector(STRING_ARRAY_FIELD_NAME), rowCount, strArrays); + assertIntegerVectorEquals( + (ListVector) vector.getVector(INT_ARRAY_FIELD_NAME), rowCount, intArrays); + assertFloatVectorEquals( + (ListVector) vector.getVector(FLOAT_ARRAY_FIELD_NAME), rowCount, floatArrays); + assertStringVectorEquals( + (ListVector) vector.getVector(STRING_ARRAY_FIELD_NAME), rowCount, strArrays); } } @@ -179,30 +182,22 @@ public void testJdbcToArrowWithNulls() throws Exception { int rowCount = 4; Integer[][] intArrays = { - null, - {0}, - {1}, - {}, + null, {0}, {1}, {}, }; Float[][] floatArrays = { - { 2.0f }, - null, - { 3.0f }, - {}, + {2.0f}, null, {3.0f}, {}, }; String[][] stringArrays = { - {"4"}, - null, - {"5"}, - {}, + {"4"}, null, {"5"}, {}, }; insertRows(rowCount, intArrays, floatArrays, stringArrays); final JdbcToArrowConfigBuilder builder = - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), JdbcToArrowUtils.getUtcCalendar(), false); + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), JdbcToArrowUtils.getUtcCalendar(), false); builder.setArraySubTypeByColumnNameMap(arrayFieldMapping); final JdbcToArrowConfig config = builder.build(); @@ -212,13 +207,17 @@ public void testJdbcToArrowWithNulls() throws Exception { assertEquals(rowCount, vector.getRowCount()); - assertIntegerVectorEquals((ListVector) vector.getVector(INT_ARRAY_FIELD_NAME), rowCount, intArrays); - assertFloatVectorEquals((ListVector) vector.getVector(FLOAT_ARRAY_FIELD_NAME), rowCount, floatArrays); - assertStringVectorEquals((ListVector) vector.getVector(STRING_ARRAY_FIELD_NAME), rowCount, stringArrays); + assertIntegerVectorEquals( + (ListVector) vector.getVector(INT_ARRAY_FIELD_NAME), rowCount, intArrays); + assertFloatVectorEquals( + (ListVector) vector.getVector(FLOAT_ARRAY_FIELD_NAME), rowCount, floatArrays); + assertStringVectorEquals( + (ListVector) vector.getVector(STRING_ARRAY_FIELD_NAME), rowCount, stringArrays); } } - private void assertIntegerVectorEquals(ListVector listVector, int rowCount, Integer[][] expectedValues) { + private void assertIntegerVectorEquals( + ListVector listVector, int rowCount, Integer[][] expectedValues) { IntVector vector = (IntVector) listVector.getDataVector(); ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); @@ -243,7 +242,8 @@ private void assertIntegerVectorEquals(ListVector listVector, int rowCount, Inte } } - private void assertFloatVectorEquals(ListVector listVector, int rowCount, Float[][] expectedValues) { + private void assertFloatVectorEquals( + ListVector listVector, int rowCount, Float[][] expectedValues) { Float4Vector vector = (Float4Vector) listVector.getDataVector(); ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); @@ -268,7 +268,8 @@ private void assertFloatVectorEquals(ListVector listVector, int rowCount, Float[ } } - private void assertStringVectorEquals(ListVector listVector, int rowCount, String[][] expectedValues) { + private void assertStringVectorEquals( + ListVector listVector, int rowCount, String[][] expectedValues) { VarCharVector vector = (VarCharVector) listVector.getDataVector(); ArrowBuf offsetBuffer = listVector.getOffsetBuffer(); @@ -285,7 +286,8 @@ private void assertStringVectorEquals(ListVector listVector, int rowCount, Strin assertEquals(1, listVector.isSet(row)); assertEquals(expectedValues[row].length, offset - prevOffset); for (int i = prevOffset; i < offset; ++i) { - assertArrayEquals(expectedValues[row][i - prevOffset].getBytes(StandardCharsets.UTF_8), vector.get(i)); + assertArrayEquals( + expectedValues[row][i - prevOffset].getBytes(StandardCharsets.UTF_8), vector.get(i)); } prevOffset = offset; @@ -309,7 +311,7 @@ private Integer[][] generateIntegerArrayField(int numRows) { for (int i = 0; i < numRows; ++i) { int val = i * 4; - result[i] = new Integer[]{val, val + 1, val + 2, val + 3}; + result[i] = new Integer[] {val, val + 1, val + 2, val + 3}; } return result; @@ -317,10 +319,10 @@ private Integer[][] generateIntegerArrayField(int numRows) { private Float[][] generateFloatArrayField(int numRows) { Float[][] result = new Float[numRows][]; - + for (int i = 0; i < numRows; ++i) { int val = i * 4; - result[i] = new Float[]{(float) val, (float) val + 1, (float) val + 2, (float) val + 3}; + result[i] = new Float[] {(float) val, (float) val + 1, (float) val + 2, (float) val + 3}; } return result; @@ -331,22 +333,21 @@ private String[][] generateStringArrayField(int numRows) { for (int i = 0; i < numRows; ++i) { int val = i * 4; - result[i] = new String[]{ - String.valueOf(val), - String.valueOf(val + 1), - String.valueOf(val + 2), - String.valueOf(val + 3) }; + result[i] = + new String[] { + String.valueOf(val), + String.valueOf(val + 1), + String.valueOf(val + 2), + String.valueOf(val + 3) + }; } return result; } private void insertRows( - int numRows, - Integer[][] integerArrays, - Float[][] floatArrays, - String[][] strArrays) - throws SQLException { + int numRows, Integer[][] integerArrays, Float[][] floatArrays, String[][] strArrays) + throws SQLException { // Insert 4 Rows try (PreparedStatement stmt = conn.prepareStatement(INSERT_STATEMENT)) { diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java index ab1b4b7fc2fea..14de2d6dc8f3c 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowCharSetTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertVarcharVectorValues; @@ -29,7 +28,6 @@ import java.util.Arrays; import java.util.Calendar; import java.util.Collection; - import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; @@ -47,8 +45,8 @@ import org.junit.runners.Parameterized.Parameters; /** - * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with UTF-8 Charset, - * including the multi-byte CJK characters for H2 database. + * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with + * UTF-8 Charset, including the multi-byte CJK characters for H2 database. */ @RunWith(Parameterized.class) public class JdbcToArrowCharSetTest extends AbstractJdbcToArrowTest { @@ -82,7 +80,7 @@ public void setUp() throws SQLException, ClassNotFoundException { String driver = "org.h2.Driver"; Class.forName(driver); conn = DriverManager.getConnection(url); - try (Statement stmt = conn.createStatement();) { + try (Statement stmt = conn.createStatement(); ) { stmt.executeUpdate(table.getCreate()); for (String insert : table.getData()) { stmt.executeUpdate(insert); @@ -99,39 +97,59 @@ public void setUp() throws SQLException, ClassNotFoundException { * @throws IOException on error */ @Parameters - public static Collection getTestData() throws SQLException, ClassNotFoundException, IOException { + public static Collection getTestData() + throws SQLException, ClassNotFoundException, IOException { return Arrays.asList(prepareTestData(testFiles, JdbcToArrowCharSetTest.class)); } /** - * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with UTF-8 Charset, including - * the multi-byte CJK characters. + * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with UTF-8 + * Charset, including the multi-byte CJK characters. */ @Test @Override public void testJdbcToArrowValues() throws SQLException, IOException { - testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), + false); testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance()), + false); testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery())), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - Calendar.getInstance()), false); - testDataSets(sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build()), false); - testDataSets(sqlToArrow( - conn, - table.getQuery(), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()).build()), false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE)), + false); + testDataSets( + sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()), + false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .build()), + false); + testDataSets( + sqlToArrow( + conn, + table.getQuery(), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .build()), + false); } @Test public void testJdbcSchemaMetadata() throws SQLException { - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true).build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true).build(); ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData(); Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); @@ -141,20 +159,26 @@ public void testJdbcSchemaMetadata() throws SQLException { * This method calls the assert methods for various DataSets. * * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); - assertVarcharVectorValues((VarCharVector) root.getVector(CLOB), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(CLOB), + table.getRowCount(), getCharArrayWithCharSet(table.getValues(), CLOB, StandardCharsets.UTF_8)); - assertVarcharVectorValues((VarCharVector) root.getVector(VARCHAR), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(VARCHAR), + table.getRowCount(), getCharArrayWithCharSet(table.getValues(), VARCHAR, StandardCharsets.UTF_8)); - assertVarcharVectorValues((VarCharVector) root.getVector(CHAR), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(CHAR), + table.getRowCount(), getCharArrayWithCharSet(table.getValues(), CHAR, StandardCharsets.UTF_8)); } } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java index 54e7d5ffb27ed..d7c4be03b3542 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowDataTypesTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBigIntVectorValues; @@ -40,7 +39,6 @@ import java.util.Arrays; import java.util.Calendar; import java.util.Collection; - import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; @@ -71,8 +69,8 @@ import org.junit.runners.Parameterized.Parameters; /** - * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with various data types - * for H2 database using multiple test data files. + * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with + * various data types for H2 database using multiple test data files. */ @RunWith(Parameterized.class) public class JdbcToArrowDataTypesTest extends AbstractJdbcToArrowTest { @@ -137,43 +135,60 @@ public JdbcToArrowDataTypesTest(Table table) { * @throws IOException on error */ @Parameters - public static Collection getTestData() throws SQLException, ClassNotFoundException, IOException { + public static Collection getTestData() + throws SQLException, ClassNotFoundException, IOException { return Arrays.asList(prepareTestData(testFiles, JdbcToArrowDataTypesTest.class)); } - /** - * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes. - */ + /** Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes. */ @Test @Override public void testJdbcToArrowValues() throws SQLException, IOException { - testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), + false); testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance()), + false); testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery())), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()), false); - testDataSets(sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build()), false); - testDataSets(sqlToArrow( - conn, - table.getQuery(), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build()), false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE)), + false); + testDataSets( + sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()), + false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build()), + false); + testDataSets( + sqlToArrow( + conn, + table.getQuery(), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build()), + false); } @Test public void testJdbcSchemaMetadata() throws SQLException { - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData(); Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); @@ -183,8 +198,8 @@ public void testJdbcSchemaMetadata() throws SQLException { * This method calls the assert methods for various DataSets. * * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { @@ -192,69 +207,99 @@ public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { switch (table.getType()) { case BIGINT: - assertBigIntVectorValues((BigIntVector) root.getVector(table.getVector()), table.getValues().length, + assertBigIntVectorValues( + (BigIntVector) root.getVector(table.getVector()), + table.getValues().length, table.getLongValues()); break; case BINARY: case BLOB: - assertVarBinaryVectorValues((VarBinaryVector) root.getVector(table.getVector()), table.getValues().length, + assertVarBinaryVectorValues( + (VarBinaryVector) root.getVector(table.getVector()), + table.getValues().length, table.getBinaryValues()); break; case BIT: - assertBitVectorValues((BitVector) root.getVector(table.getVector()), table.getValues().length, + assertBitVectorValues( + (BitVector) root.getVector(table.getVector()), + table.getValues().length, table.getIntValues()); break; case BOOL: - assertBooleanVectorValues((BitVector) root.getVector(table.getVector()), table.getValues().length, + assertBooleanVectorValues( + (BitVector) root.getVector(table.getVector()), + table.getValues().length, table.getBoolValues()); break; case CHAR: case VARCHAR: case CLOB: - assertVarcharVectorValues((VarCharVector) root.getVector(table.getVector()), table.getValues().length, + assertVarcharVectorValues( + (VarCharVector) root.getVector(table.getVector()), + table.getValues().length, table.getCharValues()); break; case DATE: - assertDateVectorValues((DateDayVector) root.getVector(table.getVector()), table.getValues().length, + assertDateVectorValues( + (DateDayVector) root.getVector(table.getVector()), + table.getValues().length, table.getIntValues()); break; case TIME: - assertTimeVectorValues((TimeMilliVector) root.getVector(table.getVector()), table.getValues().length, + assertTimeVectorValues( + (TimeMilliVector) root.getVector(table.getVector()), + table.getValues().length, table.getLongValues()); break; case TIMESTAMP: - assertTimeStampVectorValues((TimeStampVector) root.getVector(table.getVector()), table.getValues().length, + assertTimeStampVectorValues( + (TimeStampVector) root.getVector(table.getVector()), + table.getValues().length, table.getLongValues()); break; case DECIMAL: - assertDecimalVectorValues((DecimalVector) root.getVector(table.getVector()), table.getValues().length, + assertDecimalVectorValues( + (DecimalVector) root.getVector(table.getVector()), + table.getValues().length, table.getBigDecimalValues()); break; case DOUBLE: - assertFloat8VectorValues((Float8Vector) root.getVector(table.getVector()), table.getValues().length, + assertFloat8VectorValues( + (Float8Vector) root.getVector(table.getVector()), + table.getValues().length, table.getDoubleValues()); break; case INT: - assertIntVectorValues((IntVector) root.getVector(table.getVector()), table.getValues().length, + assertIntVectorValues( + (IntVector) root.getVector(table.getVector()), + table.getValues().length, table.getIntValues()); break; case SMALLINT: - assertSmallIntVectorValues((SmallIntVector) root.getVector(table.getVector()), table.getValues().length, + assertSmallIntVectorValues( + (SmallIntVector) root.getVector(table.getVector()), + table.getValues().length, table.getIntValues()); break; case TINYINT: - assertTinyIntVectorValues((TinyIntVector) root.getVector(table.getVector()), table.getValues().length, + assertTinyIntVectorValues( + (TinyIntVector) root.getVector(table.getVector()), + table.getValues().length, table.getIntValues()); break; case REAL: - assertFloat4VectorValues((Float4Vector) root.getVector(table.getVector()), table.getValues().length, + assertFloat4VectorValues( + (Float4Vector) root.getVector(table.getVector()), + table.getValues().length, table.getFloatValues()); break; case NULL: assertNullVectorValues((NullVector) root.getVector(table.getVector()), table.getRowCount()); break; case LIST: - assertListVectorValues((ListVector) root.getVector(table.getVector()), table.getValues().length, + assertListVectorValues( + (ListVector) root.getVector(table.getVector()), + table.getValues().length, table.getListValues()); break; default: @@ -263,4 +308,3 @@ public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { } } } - diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java index a5d1ffa3f64de..8bb3812637acb 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowMapDataTypeTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertMapVectorValues; @@ -24,7 +23,6 @@ import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.util.Calendar; - import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; import org.apache.arrow.memory.RootAllocator; @@ -32,46 +30,48 @@ import org.apache.arrow.vector.complex.MapVector; import org.junit.Test; -/** - * Test MapConsumer with OTHER jdbc type. - */ +/** Test MapConsumer with OTHER jdbc type. */ public class JdbcToArrowMapDataTypeTest extends AbstractJdbcToArrowTest { public JdbcToArrowMapDataTypeTest() throws IOException { this.table = getTable("h2/test1_map_h2.yml", JdbcToArrowMapDataTypeTest.class); } - /** - * Test Method to test JdbcToArrow Functionality for Map form Types.OTHER column - */ + /** Test Method to test JdbcToArrow Functionality for Map form Types.OTHER column */ @Test @Override public void testJdbcToArrowValues() throws SQLException, IOException { Calendar calendar = Calendar.getInstance(); ResultSetMetaData rsmd = getQueryMetaData(table.getQuery()); - testDataSets(sqlToArrow( + testDataSets( + sqlToArrow( conn.createStatement().executeQuery(table.getQuery()), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .build()), true); - testDataSets(sqlToArrow( + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) + .build()), + true); + testDataSets( + sqlToArrow( conn, table.getQuery(), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .build()), true); + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) + .build()), + true); } /** * This method calls the assert methods for various DataSets. * * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { - assertMapVectorValues((MapVector) root.getVector(MAP), table.getRowCount(), - getMapValues(table.getValues(), MAP)); + assertMapVectorValues( + (MapVector) root.getVector(MAP), table.getRowCount(), getMapValues(table.getValues(), MAP)); } } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java index 31d32bd648906..51394764e385c 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowNullTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertBigIntVectorValues; @@ -51,7 +50,6 @@ import java.util.Arrays; import java.util.Calendar; import java.util.Collection; - import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; @@ -82,8 +80,8 @@ import org.junit.runners.Parameterized.Parameters; /** - * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with null values for - * H2 database. + * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with + * null values for H2 database. */ @RunWith(Parameterized.class) public class JdbcToArrowNullTest extends AbstractJdbcToArrowTest { @@ -116,47 +114,67 @@ public JdbcToArrowNullTest(Table table) { * @throws IOException on error */ @Parameters - public static Collection getTestData() throws SQLException, ClassNotFoundException, IOException { + public static Collection getTestData() + throws SQLException, ClassNotFoundException, IOException { return Arrays.asList(prepareTestData(testFiles, JdbcToArrowNullTest.class)); } /** - * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with null values. + * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with null + * values. */ @Test @Override public void testJdbcToArrowValues() throws SQLException, IOException { - testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), + false); testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance()), + false); testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery())), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE)), + false); + testDataSets( + sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()), + false); Calendar calendar = Calendar.getInstance(); ResultSetMetaData rsmd = getQueryMetaData(table.getQuery()); - testDataSets(sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .build()), true); - testDataSets(sqlToArrow( - conn, - table.getQuery(), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .build()), true); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) + .build()), + true); + testDataSets( + sqlToArrow( + conn, + table.getQuery(), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) + .build()), + true); } @Test public void testJdbcSchemaMetadata() throws SQLException { - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(0), Calendar.getInstance(), true) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData(); Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); @@ -166,8 +184,8 @@ public void testJdbcSchemaMetadata() throws SQLException { * This method calls the assert methods for various DataSets. * * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { @@ -178,7 +196,8 @@ public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { sqlToArrowTestNullValues(table.getVectors(), root, table.getRowCount()); break; case SELECTED_NULL_COLUMN: - sqlToArrowTestSelectedNullColumnsValues(table.getVectors(), root, table.getRowCount(), isIncludeMapVector); + sqlToArrowTestSelectedNullColumnsValues( + table.getVectors(), root, table.getRowCount(), isIncludeMapVector); break; case SELECTED_NULL_ROW: testAllVectorValues(root, isIncludeMapVector); @@ -192,62 +211,96 @@ public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { private void testAllVectorValues(VectorSchemaRoot root, boolean isIncludeMapVector) { JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); - assertBigIntVectorValues((BigIntVector) root.getVector(BIGINT), table.getRowCount(), + assertBigIntVectorValues( + (BigIntVector) root.getVector(BIGINT), + table.getRowCount(), getLongValues(table.getValues(), BIGINT)); - assertTinyIntVectorValues((TinyIntVector) root.getVector(TINYINT), table.getRowCount(), + assertTinyIntVectorValues( + (TinyIntVector) root.getVector(TINYINT), + table.getRowCount(), getIntValues(table.getValues(), TINYINT)); - assertSmallIntVectorValues((SmallIntVector) root.getVector(SMALLINT), table.getRowCount(), + assertSmallIntVectorValues( + (SmallIntVector) root.getVector(SMALLINT), + table.getRowCount(), getIntValues(table.getValues(), SMALLINT)); - assertVarBinaryVectorValues((VarBinaryVector) root.getVector(BINARY), table.getRowCount(), + assertVarBinaryVectorValues( + (VarBinaryVector) root.getVector(BINARY), + table.getRowCount(), getBinaryValues(table.getValues(), BINARY)); - assertVarBinaryVectorValues((VarBinaryVector) root.getVector(BLOB), table.getRowCount(), + assertVarBinaryVectorValues( + (VarBinaryVector) root.getVector(BLOB), + table.getRowCount(), getBinaryValues(table.getValues(), BLOB)); - assertVarcharVectorValues((VarCharVector) root.getVector(CLOB), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(CLOB), + table.getRowCount(), getCharArray(table.getValues(), CLOB)); - assertVarcharVectorValues((VarCharVector) root.getVector(VARCHAR), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(VARCHAR), + table.getRowCount(), getCharArray(table.getValues(), VARCHAR)); - assertVarcharVectorValues((VarCharVector) root.getVector(CHAR), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(CHAR), + table.getRowCount(), getCharArray(table.getValues(), CHAR)); - assertIntVectorValues((IntVector) root.getVector(INT), table.getRowCount(), - getIntValues(table.getValues(), INT)); + assertIntVectorValues( + (IntVector) root.getVector(INT), table.getRowCount(), getIntValues(table.getValues(), INT)); - assertBitVectorValues((BitVector) root.getVector(BIT), table.getRowCount(), - getIntValues(table.getValues(), BIT)); + assertBitVectorValues( + (BitVector) root.getVector(BIT), table.getRowCount(), getIntValues(table.getValues(), BIT)); - assertBooleanVectorValues((BitVector) root.getVector(BOOL), table.getRowCount(), + assertBooleanVectorValues( + (BitVector) root.getVector(BOOL), + table.getRowCount(), getBooleanValues(table.getValues(), BOOL)); - assertDateVectorValues((DateDayVector) root.getVector(DATE), table.getRowCount(), + assertDateVectorValues( + (DateDayVector) root.getVector(DATE), + table.getRowCount(), getIntValues(table.getValues(), DATE)); - assertTimeVectorValues((TimeMilliVector) root.getVector(TIME), table.getRowCount(), + assertTimeVectorValues( + (TimeMilliVector) root.getVector(TIME), + table.getRowCount(), getLongValues(table.getValues(), TIME)); - assertTimeStampVectorValues((TimeStampVector) root.getVector(TIMESTAMP), table.getRowCount(), + assertTimeStampVectorValues( + (TimeStampVector) root.getVector(TIMESTAMP), + table.getRowCount(), getLongValues(table.getValues(), TIMESTAMP)); - assertDecimalVectorValues((DecimalVector) root.getVector(DECIMAL), table.getRowCount(), + assertDecimalVectorValues( + (DecimalVector) root.getVector(DECIMAL), + table.getRowCount(), getDecimalValues(table.getValues(), DECIMAL)); - assertFloat8VectorValues((Float8Vector) root.getVector(DOUBLE), table.getRowCount(), + assertFloat8VectorValues( + (Float8Vector) root.getVector(DOUBLE), + table.getRowCount(), getDoubleValues(table.getValues(), DOUBLE)); - assertFloat4VectorValues((Float4Vector) root.getVector(REAL), table.getRowCount(), + assertFloat4VectorValues( + (Float4Vector) root.getVector(REAL), + table.getRowCount(), getFloatValues(table.getValues(), REAL)); - assertListVectorValues((ListVector) root.getVector(LIST), table.getRowCount(), + assertListVectorValues( + (ListVector) root.getVector(LIST), + table.getRowCount(), getListValues(table.getValues(), LIST)); if (isIncludeMapVector) { - assertMapVectorValues((MapVector) root.getVector(MAP), table.getRowCount(), - getMapValues(table.getValues(), MAP)); + assertMapVectorValues( + (MapVector) root.getVector(MAP), + table.getRowCount(), + getMapValues(table.getValues(), MAP)); } } @@ -285,11 +338,11 @@ public void sqlToArrowTestNullValues(String[] vectors, VectorSchemaRoot root, in * @param vectors Vectors to test * @param root VectorSchemaRoot for test * @param rowCount number of rows - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ - public void sqlToArrowTestSelectedNullColumnsValues(String[] vectors, VectorSchemaRoot root, int rowCount, - boolean isIncludeMapVector) { + public void sqlToArrowTestSelectedNullColumnsValues( + String[] vectors, VectorSchemaRoot root, int rowCount, boolean isIncludeMapVector) { assertNullValues((BigIntVector) root.getVector(vectors[0]), rowCount); assertNullValues((DecimalVector) root.getVector(vectors[1]), rowCount); assertNullValues((Float8Vector) root.getVector(vectors[2]), rowCount); @@ -308,5 +361,4 @@ public void sqlToArrowTestSelectedNullColumnsValues(String[] vectors, VectorSche assertNullValues((MapVector) root.getVector(vectors[14]), rowCount); } } - } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java index 4d0bbfc7a993c..47713d9099da6 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowOptionalColumnsTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static junit.framework.TestCase.assertTrue; @@ -24,7 +23,6 @@ import java.sql.SQLException; import java.util.Arrays; import java.util.Collection; - import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; import org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper; import org.apache.arrow.adapter.jdbc.Table; @@ -40,9 +38,7 @@ */ @RunWith(Parameterized.class) public class JdbcToArrowOptionalColumnsTest extends AbstractJdbcToArrowTest { - private static final String[] testFiles = { - "h2/test1_null_and_notnull.yml" - }; + private static final String[] testFiles = {"h2/test1_null_and_notnull.yml"}; /** * Constructor which populates the table object for each test iteration. @@ -57,17 +53,19 @@ public JdbcToArrowOptionalColumnsTest(Table table) { * Get the test data as a collection of Table objects for each test iteration. * * @return Collection of Table objects - * @throws SQLException on error + * @throws SQLException on error * @throws ClassNotFoundException on error - * @throws IOException on error + * @throws IOException on error */ @Parameterized.Parameters - public static Collection getTestData() throws SQLException, ClassNotFoundException, IOException { + public static Collection getTestData() + throws SQLException, ClassNotFoundException, IOException { return Arrays.asList(prepareTestData(testFiles, JdbcToArrowOptionalColumnsTest.class)); } /** - * Test Method to test JdbcToArrow Functionality for dealing with nullable and non-nullable columns. + * Test Method to test JdbcToArrow Functionality for dealing with nullable and non-nullable + * columns. */ @Test @Override @@ -76,12 +74,13 @@ public void testJdbcToArrowValues() throws SQLException, IOException { } /** - * This method calls the assert methods for various DataSets. We verify that a SQL `NULL` column becomes - * nullable in the VectorSchemaRoot, and that a SQL `NOT NULL` column becomes non-nullable. + * This method calls the assert methods for various DataSets. We verify that a SQL `NULL` column + * becomes nullable in the VectorSchemaRoot, and that a SQL `NOT NULL` column becomes + * non-nullable. * * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { @@ -90,5 +89,4 @@ public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { assertTrue(root.getSchema().getFields().get(0).isNullable()); assertFalse(root.getSchema().getFields().get(1).isNullable()); } - } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java index a925dd7ee32a8..d290b9bf08960 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.*; @@ -29,7 +28,6 @@ import java.util.Collection; import java.util.stream.Collectors; import java.util.stream.Stream; - import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; import org.apache.arrow.adapter.jdbc.ArrowVectorIterator; import org.apache.arrow.adapter.jdbc.JdbcToArrow; @@ -64,8 +62,8 @@ import org.junit.runners.Parameterized; /** - * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with various data types - * for H2 database using single test data file. + * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with + * various data types for H2 database using single test data file. */ @RunWith(Parameterized.class) public class JdbcToArrowTest extends AbstractJdbcToArrowTest { @@ -92,53 +90,72 @@ public JdbcToArrowTest(Table table, boolean reuseVectorSchemaRoot) { * @throws IOException on error */ @Parameterized.Parameters(name = "table = {0}, reuse batch = {1}") - public static Collection getTestData() throws SQLException, ClassNotFoundException, IOException { - return Arrays.stream(prepareTestData(testFiles, JdbcToArrowTest.class)).flatMap(row -> - Stream.of(new Object[] {row[0], true}, new Object[] {row[0], false})).collect(Collectors.toList()); + public static Collection getTestData() + throws SQLException, ClassNotFoundException, IOException { + return Arrays.stream(prepareTestData(testFiles, JdbcToArrowTest.class)) + .flatMap(row -> Stream.of(new Object[] {row[0], true}, new Object[] {row[0], false})) + .collect(Collectors.toList()); } /** - * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with only one test data file. + * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with only one + * test data file. */ @Test @Override public void testJdbcToArrowValues() throws SQLException, IOException { - testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), + false); testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance()), + false); testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery())), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE)), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - Calendar.getInstance()), false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new RootAllocator(Integer.MAX_VALUE)), + false); + testDataSets( + sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), Calendar.getInstance()), + false); Calendar calendar = Calendar.getInstance(); ResultSetMetaData rsmd = getQueryMetaData(table.getQuery()); - testDataSets(sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .build()), true); - testDataSets(sqlToArrow( - conn, - table.getQuery(), - new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), calendar) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .build()), true); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) + .build()), + true); + testDataSets( + sqlToArrow( + conn, + table.getQuery(), + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), calendar) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) + .build()), + true); } @Test public void testJdbcSchemaMetadata() throws SQLException { Calendar calendar = Calendar.getInstance(); ResultSetMetaData rsmd = getQueryMetaData(table.getQuery()); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), calendar, true) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(0), calendar, true) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setJdbcToArrowTypeConverter(jdbcToArrowTypeConverter(calendar, rsmd)) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); } @@ -147,71 +164,105 @@ public void testJdbcSchemaMetadata() throws SQLException { * This method calls the assert methods for various DataSets. * * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { JdbcToArrowTestHelper.assertFieldMetadataIsEmpty(root); - assertBigIntVectorValues((BigIntVector) root.getVector(BIGINT), table.getRowCount(), + assertBigIntVectorValues( + (BigIntVector) root.getVector(BIGINT), + table.getRowCount(), getLongValues(table.getValues(), BIGINT)); - assertTinyIntVectorValues((TinyIntVector) root.getVector(TINYINT), table.getRowCount(), + assertTinyIntVectorValues( + (TinyIntVector) root.getVector(TINYINT), + table.getRowCount(), getIntValues(table.getValues(), TINYINT)); - assertSmallIntVectorValues((SmallIntVector) root.getVector(SMALLINT), table.getRowCount(), + assertSmallIntVectorValues( + (SmallIntVector) root.getVector(SMALLINT), + table.getRowCount(), getIntValues(table.getValues(), SMALLINT)); - assertVarBinaryVectorValues((VarBinaryVector) root.getVector(BINARY), table.getRowCount(), + assertVarBinaryVectorValues( + (VarBinaryVector) root.getVector(BINARY), + table.getRowCount(), getBinaryValues(table.getValues(), BINARY)); - assertVarBinaryVectorValues((VarBinaryVector) root.getVector(BLOB), table.getRowCount(), + assertVarBinaryVectorValues( + (VarBinaryVector) root.getVector(BLOB), + table.getRowCount(), getBinaryValues(table.getValues(), BLOB)); - assertVarcharVectorValues((VarCharVector) root.getVector(CLOB), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(CLOB), + table.getRowCount(), getCharArray(table.getValues(), CLOB)); - assertVarcharVectorValues((VarCharVector) root.getVector(VARCHAR), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(VARCHAR), + table.getRowCount(), getCharArray(table.getValues(), VARCHAR)); - assertVarcharVectorValues((VarCharVector) root.getVector(CHAR), table.getRowCount(), + assertVarcharVectorValues( + (VarCharVector) root.getVector(CHAR), + table.getRowCount(), getCharArray(table.getValues(), CHAR)); - assertIntVectorValues((IntVector) root.getVector(INT), table.getRowCount(), - getIntValues(table.getValues(), INT)); + assertIntVectorValues( + (IntVector) root.getVector(INT), table.getRowCount(), getIntValues(table.getValues(), INT)); - assertBitVectorValues((BitVector) root.getVector(BIT), table.getRowCount(), - getIntValues(table.getValues(), BIT)); + assertBitVectorValues( + (BitVector) root.getVector(BIT), table.getRowCount(), getIntValues(table.getValues(), BIT)); - assertBooleanVectorValues((BitVector) root.getVector(BOOL), table.getRowCount(), + assertBooleanVectorValues( + (BitVector) root.getVector(BOOL), + table.getRowCount(), getBooleanValues(table.getValues(), BOOL)); - assertDateVectorValues((DateDayVector) root.getVector(DATE), table.getRowCount(), + assertDateVectorValues( + (DateDayVector) root.getVector(DATE), + table.getRowCount(), getIntValues(table.getValues(), DATE)); - assertTimeVectorValues((TimeMilliVector) root.getVector(TIME), table.getRowCount(), + assertTimeVectorValues( + (TimeMilliVector) root.getVector(TIME), + table.getRowCount(), getLongValues(table.getValues(), TIME)); - assertTimeStampVectorValues((TimeStampVector) root.getVector(TIMESTAMP), table.getRowCount(), + assertTimeStampVectorValues( + (TimeStampVector) root.getVector(TIMESTAMP), + table.getRowCount(), getLongValues(table.getValues(), TIMESTAMP)); - assertDecimalVectorValues((DecimalVector) root.getVector(DECIMAL), table.getRowCount(), + assertDecimalVectorValues( + (DecimalVector) root.getVector(DECIMAL), + table.getRowCount(), getDecimalValues(table.getValues(), DECIMAL)); - assertFloat8VectorValues((Float8Vector) root.getVector(DOUBLE), table.getRowCount(), + assertFloat8VectorValues( + (Float8Vector) root.getVector(DOUBLE), + table.getRowCount(), getDoubleValues(table.getValues(), DOUBLE)); - assertFloat4VectorValues((Float4Vector) root.getVector(REAL), table.getRowCount(), + assertFloat4VectorValues( + (Float4Vector) root.getVector(REAL), + table.getRowCount(), getFloatValues(table.getValues(), REAL)); assertNullVectorValues((NullVector) root.getVector(NULL), table.getRowCount()); - assertListVectorValues((ListVector) root.getVector(LIST), table.getRowCount(), + assertListVectorValues( + (ListVector) root.getVector(LIST), + table.getRowCount(), getListValues(table.getValues(), LIST)); if (isIncludeMapVector) { - assertMapVectorValues((MapVector) root.getVector(MAP), table.getRowCount(), - getMapValues(table.getValues(), MAP)); + assertMapVectorValues( + (MapVector) root.getVector(MAP), + table.getRowCount(), + getMapValues(table.getValues(), MAP)); } } @@ -221,11 +272,12 @@ public void runLargeNumberOfRows() throws IOException, SQLException { int x = 0; final int targetRows = 600000; ResultSet rs = ResultSetUtility.generateBasicResultSet(targetRows); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder( - allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder( + allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); try (ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config)) { while (iter.hasNext()) { diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java index fe08db161c8ac..c4930c3ab6017 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTimeZoneTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.assertDateVectorValues; @@ -28,7 +27,6 @@ import java.util.Calendar; import java.util.Collection; import java.util.TimeZone; - import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder; @@ -47,10 +45,9 @@ import org.junit.runners.Parameterized.Parameters; /** - * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with TimeZone based Date, - * Time and Timestamp datatypes for H2 database. + * JUnit Test Class which contains methods to test JDBC to Arrow data conversion functionality with + * TimeZone based Date, Time and Timestamp datatypes for H2 database. */ - @RunWith(Parameterized.class) public class JdbcToArrowTimeZoneTest extends AbstractJdbcToArrowTest { @@ -94,40 +91,60 @@ public JdbcToArrowTimeZoneTest(Table table) { * @throws IOException on error */ @Parameters - public static Collection getTestData() throws SQLException, ClassNotFoundException, IOException { + public static Collection getTestData() + throws SQLException, ClassNotFoundException, IOException { return Arrays.asList(prepareTestData(testFiles, JdbcToArrowTimeZoneTest.class)); } /** - * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with TimeZone based Date, - * Time and Timestamp datatype. + * Test Method to test JdbcToArrow Functionality for various H2 DB based datatypes with TimeZone + * based Date, Time and Timestamp datatype. */ @Test @Override public void testJdbcToArrowValues() throws SQLException, IOException { - testDataSets(sqlToArrow(conn, table.getQuery(), new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))), false); - testDataSets(sqlToArrow(conn.createStatement().executeQuery(table.getQuery()), - Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))), false); - testDataSets(sqlToArrow( - conn.createStatement().executeQuery(table.getQuery()), - new JdbcToArrowConfigBuilder( + testDataSets( + sqlToArrow( + conn, + table.getQuery(), new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))).build()), false); - testDataSets(sqlToArrow( - conn, - table.getQuery(), - new JdbcToArrowConfigBuilder( + Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))), + false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))).build()), false); + Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))), + false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))), + false); + testDataSets( + sqlToArrow( + conn.createStatement().executeQuery(table.getQuery()), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))) + .build()), + false); + testDataSets( + sqlToArrow( + conn, + table.getQuery(), + new JdbcToArrowConfigBuilder( + new RootAllocator(Integer.MAX_VALUE), + Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone()))) + .build()), + false); } @Test public void testJdbcSchemaMetadata() throws SQLException { Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone(table.getTimezone())); - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(0), calendar, true).build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(0), calendar, true).build(); ResultSetMetaData rsmd = conn.createStatement().executeQuery(table.getQuery()).getMetaData(); Schema schema = JdbcToArrowUtils.jdbcToArrowSchema(rsmd, config); JdbcToArrowTestHelper.assertFieldMetadataMatchesResultSetMetadata(rsmd, schema); @@ -137,8 +154,8 @@ public void testJdbcSchemaMetadata() throws SQLException { * This method calls the assert methods for various DataSets. * * @param root VectorSchemaRoot for test - * @param isIncludeMapVector is this dataset checks includes map column. - * Jdbc type to 'map' mapping declared in configuration only manually + * @param isIncludeMapVector is this dataset checks includes map column. Jdbc type to 'map' + * mapping declared in configuration only manually */ @Override public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { @@ -148,19 +165,25 @@ public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { case EST_DATE: case GMT_DATE: case PST_DATE: - assertDateVectorValues((DateDayVector) root.getVector(table.getVector()), table.getValues().length, + assertDateVectorValues( + (DateDayVector) root.getVector(table.getVector()), + table.getValues().length, table.getIntValues()); break; case EST_TIME: case GMT_TIME: case PST_TIME: - assertTimeVectorValues((TimeMilliVector) root.getVector(table.getVector()), table.getValues().length, + assertTimeVectorValues( + (TimeMilliVector) root.getVector(table.getVector()), + table.getValues().length, table.getLongValues()); break; case EST_TIMESTAMP: case GMT_TIMESTAMP: case PST_TIMESTAMP: - assertTimeStampVectorValues((TimeStampVector) root.getVector(table.getVector()), table.getValues().length, + assertTimeStampVectorValues( + (TimeStampVector) root.getVector(table.getVector()), + table.getValues().length, table.getLongValues()); break; default: @@ -168,5 +191,4 @@ public void testDataSets(VectorSchemaRoot root, boolean isIncludeMapVector) { break; } } - } diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java index 1d7e2760f843e..caa1c1d971adb 100644 --- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java +++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowVectorIteratorTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.jdbc.h2; import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getBinaryValues; @@ -42,7 +41,6 @@ import java.util.Arrays; import java.util.Calendar; import java.util.List; - import org.apache.arrow.adapter.jdbc.ArrowVectorIterator; import org.apache.arrow.adapter.jdbc.JdbcToArrow; import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig; @@ -91,14 +89,15 @@ public JdbcToArrowVectorIteratorTest(Table table, boolean reuseVectorSchemaRoot) @Test @Override public void testJdbcToArrowValues() throws SQLException, IOException { - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()) - .setTargetBatchSize(3) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setTargetBatchSize(3) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); ArrowVectorIterator iterator = - JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(table.getQuery()), config); + JdbcToArrow.sqlToArrowVectorIterator( + conn.createStatement().executeQuery(table.getQuery()), config); validate(iterator); } @@ -106,27 +105,28 @@ public void testJdbcToArrowValues() throws SQLException, IOException { @Test public void testVectorSchemaRootReuse() throws SQLException, IOException { Integer[][] intValues = { - {101, 102, 103}, - {104, null, null}, - {107, 108, 109}, - {110} + {101, 102, 103}, + {104, null, null}, + {107, 108, 109}, + {110} }; Integer[][][] listValues = { - {{1, 2, 3}, {1, 2}, {1}}, - {{2, 3, 4}, {2, 3}, {2}}, - {{3, 4, 5}, {3, 4}, {3}}, - {{}} + {{1, 2, 3}, {1, 2}, {1}}, + {{2, 3, 4}, {2, 3}, {2}}, + {{3, 4, 5}, {3, 4}, {3}}, + {{}} }; - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()) - .setTargetBatchSize(3) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setTargetBatchSize(3) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); ArrowVectorIterator iterator = - JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(table.getQuery()), config); + JdbcToArrow.sqlToArrowVectorIterator( + conn.createStatement().executeQuery(table.getQuery()), config); int batchCount = 0; VectorSchemaRoot prev = null; @@ -178,14 +178,15 @@ public void testVectorSchemaRootReuse() throws SQLException, IOException { @Test public void testJdbcToArrowValuesNoLimit() throws SQLException, IOException { - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()) - .setTargetBatchSize(JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setTargetBatchSize(JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); ArrowVectorIterator iterator = - JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(table.getQuery()), config); + JdbcToArrow.sqlToArrowVectorIterator( + conn.createStatement().executeQuery(table.getQuery()), config); validate(iterator); } @@ -195,12 +196,12 @@ public void testTimeStampConsumer() throws SQLException, IOException { final String sql = "select timestamp_field11 from table1"; // first experiment, with calendar and time zone. - JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()) - .setTargetBatchSize(3) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + JdbcToArrowConfig config = + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setTargetBatchSize(3) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); assertNotNull(config.getCalendar()); try (ArrowVectorIterator iterator = @@ -213,16 +214,16 @@ public void testTimeStampConsumer() throws SQLException, IOException { } // second experiment, without calendar and time zone. - config = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), - null) - .setTargetBatchSize(3) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) - .build(); + config = + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), null) + .setTargetBatchSize(3) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP) + .build(); assertNull(config.getCalendar()); try (ArrowVectorIterator iterator = - JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(sql), config)) { + JdbcToArrow.sqlToArrowVectorIterator(conn.createStatement().executeQuery(sql), config)) { VectorSchemaRoot root = iterator.next(); assertEquals(1, root.getFieldVectors().size()); @@ -278,24 +279,40 @@ private void validate(ArrowVectorIterator iterator) throws SQLException, IOExcep float8Vectors.add((Float8Vector) root.getVector(DOUBLE)); listVectors.add((ListVector) root.getVector(LIST)); } - assertBigIntVectorValues(bigIntVectors, table.getRowCount(), getLongValues(table.getValues(), BIGINT)); - assertTinyIntVectorValues(tinyIntVectors, table.getRowCount(), getIntValues(table.getValues(), TINYINT)); + assertBigIntVectorValues( + bigIntVectors, table.getRowCount(), getLongValues(table.getValues(), BIGINT)); + assertTinyIntVectorValues( + tinyIntVectors, table.getRowCount(), getIntValues(table.getValues(), TINYINT)); assertIntVectorValues(intVectors, table.getRowCount(), getIntValues(table.getValues(), INT)); - assertSmallIntVectorValues(smallIntVectors, table.getRowCount(), getIntValues(table.getValues(), SMALLINT)); - assertBinaryVectorValues(vectorsForBinary, table.getRowCount(), getBinaryValues(table.getValues(), BINARY)); - assertBinaryVectorValues(vectorsForBlob, table.getRowCount(), getBinaryValues(table.getValues(), BLOB)); - assertVarCharVectorValues(vectorsForClob, table.getRowCount(), getCharArray(table.getValues(), CLOB)); - assertVarCharVectorValues(vectorsForVarChar, table.getRowCount(), getCharArray(table.getValues(), VARCHAR)); - assertVarCharVectorValues(vectorsForChar, table.getRowCount(), getCharArray(table.getValues(), CHAR)); + assertSmallIntVectorValues( + smallIntVectors, table.getRowCount(), getIntValues(table.getValues(), SMALLINT)); + assertBinaryVectorValues( + vectorsForBinary, table.getRowCount(), getBinaryValues(table.getValues(), BINARY)); + assertBinaryVectorValues( + vectorsForBlob, table.getRowCount(), getBinaryValues(table.getValues(), BLOB)); + assertVarCharVectorValues( + vectorsForClob, table.getRowCount(), getCharArray(table.getValues(), CLOB)); + assertVarCharVectorValues( + vectorsForVarChar, table.getRowCount(), getCharArray(table.getValues(), VARCHAR)); + assertVarCharVectorValues( + vectorsForChar, table.getRowCount(), getCharArray(table.getValues(), CHAR)); assertBitVectorValues(vectorsForBit, table.getRowCount(), getIntValues(table.getValues(), BIT)); - assertBooleanVectorValues(vectorsForBool, table.getRowCount(), getBooleanValues(table.getValues(), BOOL)); - assertDateDayVectorValues(dateDayVectors, table.getRowCount(), getLongValues(table.getValues(), DATE)); - assertTimeMilliVectorValues(timeMilliVectors, table.getRowCount(), getLongValues(table.getValues(), TIME)); - assertTimeStampVectorValues(timeStampVectors, table.getRowCount(), getLongValues(table.getValues(), TIMESTAMP)); - assertDecimalVectorValues(decimalVectors, table.getRowCount(), getDecimalValues(table.getValues(), DECIMAL)); - assertFloat4VectorValues(float4Vectors, table.getRowCount(), getFloatValues(table.getValues(), REAL)); - assertFloat8VectorValues(float8Vectors, table.getRowCount(), getDoubleValues(table.getValues(), DOUBLE)); - assertListVectorValues(listVectors, table.getRowCount(), getListValues(table.getValues(), LIST)); + assertBooleanVectorValues( + vectorsForBool, table.getRowCount(), getBooleanValues(table.getValues(), BOOL)); + assertDateDayVectorValues( + dateDayVectors, table.getRowCount(), getLongValues(table.getValues(), DATE)); + assertTimeMilliVectorValues( + timeMilliVectors, table.getRowCount(), getLongValues(table.getValues(), TIME)); + assertTimeStampVectorValues( + timeStampVectors, table.getRowCount(), getLongValues(table.getValues(), TIMESTAMP)); + assertDecimalVectorValues( + decimalVectors, table.getRowCount(), getDecimalValues(table.getValues(), DECIMAL)); + assertFloat4VectorValues( + float4Vectors, table.getRowCount(), getFloatValues(table.getValues(), REAL)); + assertFloat8VectorValues( + float8Vectors, table.getRowCount(), getDoubleValues(table.getValues(), DOUBLE)); + assertListVectorValues( + listVectors, table.getRowCount(), getListValues(table.getValues(), LIST)); roots.forEach(root -> root.close()); } @@ -324,7 +341,8 @@ private void assertFloat4VectorValues(List vectors, int rowCount, } } - private void assertDecimalVectorValues(List vectors, int rowCount, BigDecimal[] values) { + private void assertDecimalVectorValues( + List vectors, int rowCount, BigDecimal[] values) { int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); assertEquals(rowCount, valueCount); @@ -337,7 +355,8 @@ private void assertDecimalVectorValues(List vectors, int rowCount } } - private void assertTimeStampVectorValues(List vectors, int rowCount, Long[] values) { + private void assertTimeStampVectorValues( + List vectors, int rowCount, Long[] values) { int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); assertEquals(rowCount, valueCount); @@ -349,7 +368,8 @@ private void assertTimeStampVectorValues(List vectors, int rowC } } - private void assertTimeMilliVectorValues(List vectors, int rowCount, Long[] values) { + private void assertTimeMilliVectorValues( + List vectors, int rowCount, Long[] values) { int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); assertEquals(rowCount, valueCount); @@ -397,7 +417,8 @@ private void assertBooleanVectorValues(List vectors, int rowCount, Bo } } - private void assertVarCharVectorValues(List vectors, int rowCount, byte[][] values) { + private void assertVarCharVectorValues( + List vectors, int rowCount, byte[][] values) { int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); assertEquals(rowCount, valueCount); @@ -409,7 +430,8 @@ private void assertVarCharVectorValues(List vectors, int rowCount } } - private void assertBinaryVectorValues(List vectors, int rowCount, byte[][] values) { + private void assertBinaryVectorValues( + List vectors, int rowCount, byte[][] values) { int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); assertEquals(rowCount, valueCount); @@ -421,7 +443,8 @@ private void assertBinaryVectorValues(List vectors, int rowCoun } } - private void assertSmallIntVectorValues(List vectors, int rowCount, Integer[] values) { + private void assertSmallIntVectorValues( + List vectors, int rowCount, Integer[] values) { int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); assertEquals(rowCount, valueCount); @@ -433,7 +456,8 @@ private void assertSmallIntVectorValues(List vectors, int rowCou } } - private void assertTinyIntVectorValues(List vectors, int rowCount, Integer[] values) { + private void assertTinyIntVectorValues( + List vectors, int rowCount, Integer[] values) { int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); assertEquals(rowCount, valueCount); @@ -474,7 +498,8 @@ private void assertIntVectorValues(List vectors, int rowCount, Intege } } - public static void assertListVectorValues(List vectors, int rowCount, Integer[][] values) { + public static void assertListVectorValues( + List vectors, int rowCount, Integer[][] values) { int valueCount = vectors.stream().mapToInt(ValueVector::getValueCount).sum(); assertEquals(rowCount, valueCount); @@ -492,12 +517,11 @@ public static void assertListVectorValues(List vectors, int rowCount } } - /** - * Runs a simple query, and encapsulates the result into a field vector. - */ + /** Runs a simple query, and encapsulates the result into a field vector. */ private FieldVector getQueryResult(JdbcToArrowConfig config) throws SQLException, IOException { - ArrowVectorIterator iterator = JdbcToArrow.sqlToArrowVectorIterator( - conn.createStatement().executeQuery("select real_field8 from table1"), config); + ArrowVectorIterator iterator = + JdbcToArrow.sqlToArrowVectorIterator( + conn.createStatement().executeQuery("select real_field8 from table1"), config); VectorSchemaRoot root = iterator.next(); @@ -513,10 +537,11 @@ private FieldVector getQueryResult(JdbcToArrowConfig config) throws SQLException @Test public void testJdbcToArrowCustomTypeConversion() throws SQLException, IOException { - JdbcToArrowConfigBuilder builder = new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), - Calendar.getInstance()).setTargetBatchSize(JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) - .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) - .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP); + JdbcToArrowConfigBuilder builder = + new JdbcToArrowConfigBuilder(new RootAllocator(Integer.MAX_VALUE), Calendar.getInstance()) + .setTargetBatchSize(JdbcToArrowConfig.NO_LIMIT_BATCH_SIZE) + .setReuseVectorSchemaRoot(reuseVectorSchemaRoot) + .setArraySubTypeByColumnNameMap(ARRAY_SUB_TYPE_BY_COLUMN_NAME_MAP); // first experiment, using default type converter JdbcToArrowConfig config = builder.build(); @@ -527,15 +552,16 @@ public void testJdbcToArrowCustomTypeConversion() throws SQLException, IOExcepti } // second experiment, using customized type converter - builder.setJdbcToArrowTypeConverter((fieldInfo) -> { - switch (fieldInfo.getJdbcType()) { - case Types.REAL: - // this is different from the default type converter - return new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE); - default: - return null; - } - }); + builder.setJdbcToArrowTypeConverter( + (fieldInfo) -> { + switch (fieldInfo.getJdbcType()) { + case Types.REAL: + // this is different from the default type converter + return new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE); + default: + return null; + } + }); config = builder.build(); try (FieldVector vector = getQueryResult(config)) { diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml index bc89c4698eecf..7df08e1a98b36 100644 --- a/java/adapter/orc/pom.xml +++ b/java/adapter/orc/pom.xml @@ -24,9 +24,13 @@ jar Arrow Orc Adapter (Experimental/Contrib)A JNI wrapper for the C++ ORC reader implementation. + ../../../cpp/release-build/ + dev/checkstyle/checkstyle-spotless.xml + none + org.apache.arrow diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcFieldNode.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcFieldNode.java index 716a13876608c..faf48e19445ae 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcFieldNode.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcFieldNode.java @@ -14,12 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; -/** - * Metadata about Vectors/Arrays that is passed via JNI interface. - */ +/** Metadata about Vectors/Arrays that is passed via JNI interface. */ class OrcFieldNode { private final int length; @@ -27,6 +24,7 @@ class OrcFieldNode { /** * Construct a new instance. + * * @param length the number of values written. * @param nullCount the number of null values. */ diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java index d61799e990f77..692b0c061839c 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcJniUtils.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; import java.io.File; @@ -25,24 +24,21 @@ import java.nio.file.StandardCopyOption; import java.util.Locale; -/** - * Helper class for JNI related operations. - */ +/** Helper class for JNI related operations. */ class OrcJniUtils { private static final String LIBRARY_NAME = "arrow_orc_jni"; private static boolean isLoaded = false; - private OrcJniUtils() { - } + private OrcJniUtils() {} - static void loadOrcAdapterLibraryFromJar() - throws IOException, IllegalAccessException { + static void loadOrcAdapterLibraryFromJar() throws IOException, IllegalAccessException { synchronized (OrcJniUtils.class) { if (!isLoaded) { final String libraryToLoad = LIBRARY_NAME + "/" + getNormalizedArch() + "/" + System.mapLibraryName(LIBRARY_NAME); final File libraryFile = - moveFileFromJarToTemp(System.getProperty("java.io.tmpdir"), libraryToLoad, LIBRARY_NAME); + moveFileFromJarToTemp( + System.getProperty("java.io.tmpdir"), libraryToLoad, LIBRARY_NAME); System.load(libraryFile.getAbsolutePath()); isLoaded = true; } @@ -64,11 +60,11 @@ private static String getNormalizedArch() { return arch; } - private static File moveFileFromJarToTemp(final String tmpDir, String libraryToLoad, String libraryName) - throws IOException { + private static File moveFileFromJarToTemp( + final String tmpDir, String libraryToLoad, String libraryName) throws IOException { final File temp = File.createTempFile(tmpDir, libraryName); - try (final InputStream is = OrcReaderJniWrapper.class.getClassLoader() - .getResourceAsStream(libraryToLoad)) { + try (final InputStream is = + OrcReaderJniWrapper.class.getClassLoader().getResourceAsStream(libraryToLoad)) { if (is == null) { throw new FileNotFoundException(libraryToLoad); } else { diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcMemoryJniWrapper.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcMemoryJniWrapper.java index 473e8314243b1..70f2a655654c6 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcMemoryJniWrapper.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcMemoryJniWrapper.java @@ -14,12 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; -/** - * Wrapper for orc memory allocated by native code. - */ +/** Wrapper for orc memory allocated by native code. */ class OrcMemoryJniWrapper implements AutoCloseable { private final long nativeInstanceId; @@ -32,6 +29,7 @@ class OrcMemoryJniWrapper implements AutoCloseable { /** * Construct a new instance. + * * @param nativeInstanceId unique id of the underlying memory. * @param memoryAddress starting memory address of the underlying memory. * @param size size of the valid data. @@ -46,6 +44,7 @@ class OrcMemoryJniWrapper implements AutoCloseable { /** * Return the size of underlying chunk of memory that has valid data. + * * @return valid data size */ long getSize() { @@ -54,6 +53,7 @@ long getSize() { /** * Return the size of underlying chunk of memory managed by this OrcMemoryJniWrapper. + * * @return underlying memory size */ long getCapacity() { @@ -62,6 +62,7 @@ long getCapacity() { /** * Return the memory address of underlying chunk of memory. + * * @return memory address */ long getMemoryAddress() { diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReader.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReader.java index 648e17e9c374c..ca9b44e7e8123 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReader.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReader.java @@ -14,44 +14,42 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; import java.io.IOException; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.ipc.ArrowReader; /** - * Orc Reader that allow accessing orc stripes in Orc file. - * This orc reader basically acts like an ArrowReader iterator that - * iterate over orc stripes. Each stripe will be accessed via an - * ArrowReader. + * Orc Reader that allow accessing orc stripes in Orc file. This orc reader basically acts like an + * ArrowReader iterator that iterate over orc stripes. Each stripe will be accessed via an + * ArrowReader. */ public class OrcReader implements AutoCloseable { private final OrcReaderJniWrapper jniWrapper; private BufferAllocator allocator; - /** - * reference to native reader instance. - */ + /** reference to native reader instance. */ private final long nativeInstanceId; /** * Create an OrcReader that iterate over orc stripes. + * * @param filePath file path to target file, currently only support local file. * @param allocator allocator provided to ArrowReader. * @throws IOException throws exception in case of file not found */ - public OrcReader(String filePath, BufferAllocator allocator) throws IOException, IllegalAccessException { + public OrcReader(String filePath, BufferAllocator allocator) + throws IOException, IllegalAccessException { this.allocator = allocator; this.jniWrapper = OrcReaderJniWrapper.getInstance(); this.nativeInstanceId = jniWrapper.open(filePath); } /** - * Seek to designated row. Invoke NextStripeReader() after seek - * will return stripe reader starting from designated row. + * Seek to designated row. Invoke NextStripeReader() after seek will return stripe reader starting + * from designated row. + * * @param rowNumber the rows number to seek * @return true if seek operation is succeeded */ diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReaderJniWrapper.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReaderJniWrapper.java index ff449c343c4e7..be57485005fbf 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReaderJniWrapper.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReaderJniWrapper.java @@ -14,14 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; import java.io.IOException; -/** - * JNI wrapper for Orc reader. - */ +/** JNI wrapper for Orc reader. */ class OrcReaderJniWrapper { private static volatile OrcReaderJniWrapper INSTANCE; @@ -41,21 +38,24 @@ static OrcReaderJniWrapper getInstance() throws IOException, IllegalAccessExcept /** * Construct a orc file reader over the target file. + * * @param fileName absolute file path of target file - * @return id of the orc reader instance if file opened successfully, - * otherwise return error code * -1. + * @return id of the orc reader instance if file opened successfully, otherwise return error code + * * -1. */ native long open(String fileName); /** * Release resources associated with designated reader instance. + * * @param readerId id of the reader instance. */ native void close(long readerId); /** - * Seek to designated row. Invoke nextStripeReader() after seek - * will return id of stripe reader starting from designated row. + * Seek to designated row. Invoke nextStripeReader() after seek will return id of stripe reader + * starting from designated row. + * * @param readerId id of the reader instance * @param rowNumber the rows number to seek * @return true if seek operation is succeeded @@ -64,6 +64,7 @@ static OrcReaderJniWrapper getInstance() throws IOException, IllegalAccessExcept /** * The number of stripes in the file. + * * @param readerId id of the reader instance * @return number of stripes */ @@ -71,6 +72,7 @@ static OrcReaderJniWrapper getInstance() throws IOException, IllegalAccessExcept /** * Get a stripe level ArrowReader with specified batchSize in each record batch. + * * @param readerId id of the reader instance * @param batchSize the number of rows loaded on each iteration * @return id of the stripe reader instance. diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcRecordBatch.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcRecordBatch.java index a006cacab98f2..f78898df2205d 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcRecordBatch.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcRecordBatch.java @@ -14,27 +14,23 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; import java.util.Arrays; import java.util.List; -/** - * Wrapper for record batch meta and native memory. - */ +/** Wrapper for record batch meta and native memory. */ class OrcRecordBatch { final int length; - /** - * Nodes correspond to the pre-ordered flattened logical schema. - */ + /** Nodes correspond to the pre-ordered flattened logical schema. */ final List nodes; final List buffers; /** * Construct a new instance. + * * @param length number of records included in current batch * @param nodes meta data for each fields * @param buffers buffers for underlying data diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReferenceManager.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReferenceManager.java index fdec337e85d39..38233a0493bef 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReferenceManager.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcReferenceManager.java @@ -14,11 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; import java.util.concurrent.atomic.AtomicInteger; - import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.OwnershipTransferResult; @@ -26,8 +24,8 @@ import org.apache.arrow.util.Preconditions; /** - * A simple reference manager implementation for memory allocated by native code. - * The underlying memory will be released when reference count reach zero. + * A simple reference manager implementation for memory allocated by native code. The underlying + * memory will be released when reference count reach zero. */ public class OrcReferenceManager implements ReferenceManager { private final AtomicInteger bufRefCnt = new AtomicInteger(0); @@ -50,8 +48,8 @@ public boolean release() { @Override public boolean release(int decrement) { - Preconditions.checkState(decrement >= 1, - "ref count decrement should be greater than or equal to 1"); + Preconditions.checkState( + decrement >= 1, "ref count decrement should be greater than or equal to 1"); // decrement the ref count final int refCnt; synchronized (this) { @@ -89,18 +87,21 @@ public ArrowBuf deriveBuffer(ArrowBuf sourceBuffer, long index, long length) { final long derivedBufferAddress = sourceBuffer.memoryAddress() + index; // create new ArrowBuf - final ArrowBuf derivedBuf = new ArrowBuf( + final ArrowBuf derivedBuf = + new ArrowBuf( this, null, length, // length (in bytes) in the underlying memory chunk for this new ArrowBuf - derivedBufferAddress // starting byte address in the underlying memory for this new ArrowBuf, + derivedBufferAddress // starting byte address in the underlying memory for this new + // ArrowBuf, ); return derivedBuf; } @Override - public OwnershipTransferResult transferOwnership(ArrowBuf sourceBuffer, BufferAllocator targetAllocator) { + public OwnershipTransferResult transferOwnership( + ArrowBuf sourceBuffer, BufferAllocator targetAllocator) { throw new UnsupportedOperationException(); } diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReader.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReader.java index 484296d92e039..52f5cf429a48d 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReader.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReader.java @@ -14,13 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; import java.io.IOException; import java.util.ArrayList; import java.util.stream.Collectors; - import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.ipc.ArrowReader; @@ -33,19 +31,16 @@ import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.ByteArrayReadableSeekableByteChannel; -/** - * Orc stripe that load data into ArrowRecordBatch. - */ +/** Orc stripe that load data into ArrowRecordBatch. */ public class OrcStripeReader extends ArrowReader { - /** - * reference to native stripe reader instance. - */ + /** reference to native stripe reader instance. */ private final long nativeInstanceId; /** * Construct a new instance. - * @param nativeInstanceId nativeInstanceId of the stripe reader instance, obtained by - * calling nextStripeReader from OrcReaderJniWrapper + * + * @param nativeInstanceId nativeInstanceId of the stripe reader instance, obtained by calling + * nextStripeReader from OrcReaderJniWrapper * @param allocator memory allocator for accounting. */ OrcStripeReader(long nativeInstanceId, BufferAllocator allocator) { @@ -62,18 +57,20 @@ public boolean loadNextBatch() throws IOException { ArrayList buffers = new ArrayList<>(); for (OrcMemoryJniWrapper buffer : recordBatch.buffers) { - buffers.add(new ArrowBuf( + buffers.add( + new ArrowBuf( new OrcReferenceManager(buffer), null, (int) buffer.getSize(), buffer.getMemoryAddress())); } - loadRecordBatch(new ArrowRecordBatch( + loadRecordBatch( + new ArrowRecordBatch( recordBatch.length, recordBatch.nodes.stream() - .map(buf -> new ArrowFieldNode(buf.getLength(), buf.getNullCount())) - .collect(Collectors.toList()), + .map(buf -> new ArrowFieldNode(buf.getLength(), buf.getNullCount())) + .collect(Collectors.toList()), buffers)); return true; } @@ -83,7 +80,6 @@ public long bytesRead() { return 0; } - @Override protected void closeReadSource() throws IOException { OrcStripeReaderJniWrapper.close(nativeInstanceId); @@ -94,9 +90,8 @@ protected Schema readSchema() throws IOException { byte[] schemaBytes = OrcStripeReaderJniWrapper.getSchema(nativeInstanceId); try (MessageChannelReader schemaReader = - new MessageChannelReader( - new ReadChannel( - new ByteArrayReadableSeekableByteChannel(schemaBytes)), allocator)) { + new MessageChannelReader( + new ReadChannel(new ByteArrayReadableSeekableByteChannel(schemaBytes)), allocator)) { MessageResult result = schemaReader.readNext(); if (result == null) { diff --git a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReaderJniWrapper.java b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReaderJniWrapper.java index 1dd96986108b4..e7b691087fb96 100644 --- a/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReaderJniWrapper.java +++ b/java/adapter/orc/src/main/java/org/apache/arrow/adapter/orc/OrcStripeReaderJniWrapper.java @@ -14,16 +14,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; -/** - * JNI wrapper for orc stripe reader. - */ +/** JNI wrapper for orc stripe reader. */ class OrcStripeReaderJniWrapper { /** * Get the schema of current stripe. + * * @param readerId id of the stripe reader instance. * @return serialized schema. */ @@ -31,14 +29,15 @@ class OrcStripeReaderJniWrapper { /** * Load next record batch. + * * @param readerId id of the stripe reader instance. - * @return loaded record batch, return null when reached - * the end of current stripe. + * @return loaded record batch, return null when reached the end of current stripe. */ static native OrcRecordBatch next(long readerId); /** * Release resources of underlying reader. + * * @param readerId id of the stripe reader instance. */ static native void close(long readerId); diff --git a/java/adapter/orc/src/test/java/org/apache/arrow/adapter/orc/OrcReaderTest.java b/java/adapter/orc/src/test/java/org/apache/arrow/adapter/orc/OrcReaderTest.java index 4153a35a61c67..17098806be72a 100644 --- a/java/adapter/orc/src/test/java/org/apache/arrow/adapter/orc/OrcReaderTest.java +++ b/java/adapter/orc/src/test/java/org/apache/arrow/adapter/orc/OrcReaderTest.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.adapter.orc; import static org.junit.Assert.assertEquals; @@ -24,8 +23,6 @@ import java.io.File; import java.nio.charset.StandardCharsets; import java.util.List; - - import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.IntVector; @@ -45,11 +42,9 @@ import org.junit.Test; import org.junit.rules.TemporaryFolder; - public class OrcReaderTest { - @Rule - public TemporaryFolder testFolder = new TemporaryFolder(); + @Rule public TemporaryFolder testFolder = new TemporaryFolder(); private static final int MAX_ALLOCATION = 8 * 1024; private static RootAllocator allocator; @@ -64,8 +59,10 @@ public void testOrcJniReader() throws Exception { TypeDescription schema = TypeDescription.fromString("struct"); File testFile = new File(testFolder.getRoot(), "test-orc"); - Writer writer = OrcFile.createWriter(new Path(testFile.getAbsolutePath()), - OrcFile.writerOptions(new Configuration()).setSchema(schema)); + Writer writer = + OrcFile.createWriter( + new Path(testFile.getAbsolutePath()), + OrcFile.writerOptions(new Configuration()).setSchema(schema)); VectorizedRowBatch batch = schema.createRowBatch(); LongColumnVector longColumnVector = (LongColumnVector) batch.cols[0]; BytesColumnVector bytesColumnVector = (BytesColumnVector) batch.cols[1]; diff --git a/java/dev/checkstyle/checkstyle-spotless.xml b/java/dev/checkstyle/checkstyle-spotless.xml index cbaec1a39bf2c..a2e9a60b12c72 100644 --- a/java/dev/checkstyle/checkstyle-spotless.xml +++ b/java/dev/checkstyle/checkstyle-spotless.xml @@ -89,7 +89,7 @@ - --> + @@ -99,7 +99,7 @@ value="WhitespaceAround: ''{0}'' is not followed by whitespace. Empty blocks may only be represented as '{}' when not part of a multi-block statement (4.1.3)"/> - + -->