Skip to content

Commit

Permalink
Merge branch 'main' into minor-enhance-ipc-write
Browse files Browse the repository at this point in the history
  • Loading branch information
mapleFU committed May 31, 2024
2 parents 7607379 + 706b3e0 commit 6e5c5b3
Show file tree
Hide file tree
Showing 244 changed files with 5,326 additions and 1,710 deletions.
12 changes: 6 additions & 6 deletions .github/workflows/java.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,11 @@ jobs:
env:
ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
run: |
archery docker run \
-e CI=true \
-e "GRADLE_ENTERPRISE_ACCESS_KEY=$GRADLE_ENTERPRISE_ACCESS_KEY" \
-e "DEVELOCITY_ACCESS_KEY=$DEVELOCITY_ACCESS_KEY" \
${{ matrix.image }}
- name: Docker Push
if: >-
Expand Down Expand Up @@ -127,12 +127,12 @@ jobs:
- name: Build
shell: bash
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
run: ci/scripts/java_build.sh $(pwd) $(pwd)/build
- name: Test
shell: bash
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
run: ci/scripts/java_test.sh $(pwd) $(pwd)/build

windows:
Expand All @@ -158,10 +158,10 @@ jobs:
- name: Build
shell: bash
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
run: ci/scripts/java_build.sh $(pwd) $(pwd)/build
- name: Test
shell: bash
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
run: ci/scripts/java_test.sh $(pwd) $(pwd)/build
4 changes: 2 additions & 2 deletions .github/workflows/java_jni.yml
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,11 @@ jobs:
env:
ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
run: |
archery docker run \
-e CI=true \
-e "GRADLE_ENTERPRISE_ACCESS_KEY=$GRADLE_ENTERPRISE_ACCESS_KEY" \
-e "DEVELOCITY_ACCESS_KEY=$DEVELOCITY_ACCESS_KEY" \
conda-python-java-integration
- name: Docker Push
if: >-
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/r.yml
Original file line number Diff line number Diff line change
Expand Up @@ -370,11 +370,12 @@ jobs:
MAKEFLAGS = paste0("-j", parallel::detectCores()),
ARROW_R_DEV = TRUE,
"_R_CHECK_FORCE_SUGGESTS_" = FALSE,
"_R_CHECK_STOP_ON_INVALID_NUMERIC_VERSION_INPUTS_" = TRUE
"_R_CHECK_STOP_ON_INVALID_NUMERIC_VERSION_INPUTS_" = TRUE,
"_R_CHECK_DONTTEST_EXAMPLES_" = TRUE
)
rcmdcheck::rcmdcheck(".",
build_args = '--no-build-vignettes',
args = c('--no-manual', '--as-cran', '--ignore-vignettes', '--run-donttest'),
args = c('--no-manual', '--as-cran', '--ignore-vignettes'),
error_on = 'warning',
check_dir = 'check',
timeout = 3600
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,4 +102,4 @@ __debug_bin
.envrc

# Develocity
.mvn/.gradle-enterprise/
.mvn/.develocity/
10 changes: 5 additions & 5 deletions .mvn/gradle-enterprise.xml → .mvn/develocity.xml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one
Expand All @@ -19,19 +19,19 @@
under the License.
-->
<gradleEnterprise xmlns="https://www.gradle.com/gradle-enterprise-maven" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="https://www.gradle.com/gradle-enterprise-maven https://www.gradle.com/schema/gradle-enterprise-maven.xsd">
<develocity xmlns="https://www.gradle.com/develocity-maven" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="https://www.gradle.com/develocity-maven https://www.gradle.com/schema/develocity-maven.xsd">
<server>
<url>https://ge.apache.org</url>
<allowUntrusted>false</allowUntrusted>
</server>
<buildScan>
<capture>
<goalInputFiles>true</goalInputFiles>
<fileFingerprints>true</fileFingerprints>
<buildLogging>true</buildLogging>
<testLogging>true</testLogging>
</capture>
<backgroundBuildScanUpload>#{isFalse(env['CI'])}</backgroundBuildScanUpload>
<publish>ALWAYS</publish>
<publishing><onlyIf>true</onlyIf></publishing>
<publishIfAuthenticated>true</publishIfAuthenticated>
<obfuscation>
<ipAddresses>#{{'0.0.0.0'}}</ipAddresses>
Expand All @@ -42,4 +42,4 @@
<enabled>false</enabled>
</remote>
</buildCache>
</gradleEnterprise>
</develocity>
6 changes: 3 additions & 3 deletions .mvn/extensions.xml
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@
<extensions xmlns="http://maven.apache.org/EXTENSIONS/1.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/EXTENSIONS/1.0.0 http://maven.apache.org/xsd/core-extensions-1.0.0.xsd">
<extension>
<groupId>com.gradle</groupId>
<artifactId>gradle-enterprise-maven-extension</artifactId>
<version>1.20</version>
<artifactId>develocity-maven-extension</artifactId>
<version>1.21.4</version>
</extension>
<extension>
<groupId>com.gradle</groupId>
<artifactId>common-custom-user-data-maven-extension</artifactId>
<version>1.12.5</version>
<version>2.0</version>
</extension>
</extensions>
37 changes: 36 additions & 1 deletion c_glib/arrow-dataset-glib/dataset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include <arrow-glib/error.hpp>
#include <arrow-glib/file-system.hpp>
#include <arrow-glib/reader.hpp>
#include <arrow-glib/table.hpp>

#include <arrow-dataset-glib/dataset-factory.hpp>
Expand Down Expand Up @@ -152,12 +153,46 @@ gadataset_dataset_to_table(GADatasetDataset *dataset, GError **error)
}
auto arrow_scanner = *arrow_scanner_result;
auto arrow_table_result = arrow_scanner->ToTable();
if (!garrow::check(error, arrow_scanner_result, "[dataset][to-table]")) {
if (!garrow::check(error, arrow_table_result, "[dataset][to-table]")) {
return NULL;
}
return garrow_table_new_raw(&(*arrow_table_result));
}

/**
* gadataset_dataset_to_record_batch_reader:
* @dataset: A #GADatasetDataset.
* @error: (nullable): Return location for a #GError or %NULL.
*
* Returns: (transfer full) (nullable):
* A #GArrowRecordBatchReader on success, %NULL on error.
*
* Since: 17.0.0
*/
GArrowRecordBatchReader *
gadataset_dataset_to_record_batch_reader(GADatasetDataset *dataset, GError **error)
{
auto arrow_dataset = gadataset_dataset_get_raw(dataset);
auto arrow_scanner_builder_result = arrow_dataset->NewScan();
if (!garrow::check(error,
arrow_scanner_builder_result,
"[dataset][to-record-batch-reader]")) {
return nullptr;
}
auto arrow_scanner_builder = *arrow_scanner_builder_result;
auto arrow_scanner_result = arrow_scanner_builder->Finish();
if (!garrow::check(error, arrow_scanner_result, "[dataset][to-record-batch-reader]")) {
return nullptr;
}
auto arrow_scanner = *arrow_scanner_result;
auto arrow_reader_result = arrow_scanner->ToRecordBatchReader();
if (!garrow::check(error, arrow_reader_result, "[dataset][to-record-batch-reader]")) {
return nullptr;
}
auto sources = g_list_prepend(nullptr, dataset);
return garrow_record_batch_reader_new_raw(&(*arrow_reader_result), sources);
}

/**
* gadataset_dataset_get_type_name:
* @dataset: A #GADatasetDataset.
Expand Down
3 changes: 3 additions & 0 deletions c_glib/arrow-dataset-glib/dataset.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ gadataset_dataset_to_table(GADatasetDataset *dataset, GError **error);
GADATASET_AVAILABLE_IN_5_0
gchar *
gadataset_dataset_get_type_name(GADatasetDataset *dataset);
GADATASET_AVAILABLE_IN_17_0
GArrowRecordBatchReader *
gadataset_dataset_to_record_batch_reader(GADatasetDataset *dataset, GError **error);

#define GADATASET_TYPE_FILE_SYSTEM_DATASET_WRITE_OPTIONS \
(gadataset_file_system_dataset_write_options_get_type())
Expand Down
22 changes: 22 additions & 0 deletions c_glib/arrow-dataset-glib/scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,28 @@ gadataset_scanner_to_table(GADatasetScanner *scanner, GError **error)
}
}

/**
* gadataset_scanner_to_record_batch_reader:
* @scanner: A #GADatasetScanner.
* @error: (nullable): Return location for a #GError or %NULL.
*
* Returns: (transfer full) (nullable):
* A #GArrowRecordBatchReader on success, %NULL on error.
*
* Since: 17.0.0
*/
GArrowRecordBatchReader *
gadataset_scanner_to_record_batch_reader(GADatasetScanner *scanner, GError **error)
{
auto arrow_scanner = gadataset_scanner_get_raw(scanner);
auto arrow_reader_result = arrow_scanner->ToRecordBatchReader();
if (!garrow::check(error, arrow_reader_result, "[scanner][to-record-batch-reader]")) {
return nullptr;
}
auto sources = g_list_prepend(nullptr, scanner);
return garrow_record_batch_reader_new_raw(&(*arrow_reader_result), sources);
}

typedef struct GADatasetScannerBuilderPrivate_
{
std::shared_ptr<arrow::dataset::ScannerBuilder> scanner_builder;
Expand Down
4 changes: 4 additions & 0 deletions c_glib/arrow-dataset-glib/scanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ GADATASET_AVAILABLE_IN_5_0
GArrowTable *
gadataset_scanner_to_table(GADatasetScanner *scanner, GError **error);

GADATASET_AVAILABLE_IN_17_0
GArrowRecordBatchReader *
gadataset_scanner_to_record_batch_reader(GADatasetScanner *scanner, GError **error);

#define GADATASET_TYPE_SCANNER_BUILDER (gadataset_scanner_builder_get_type())
GADATASET_AVAILABLE_IN_5_0
G_DECLARE_DERIVABLE_TYPE(
Expand Down
24 changes: 21 additions & 3 deletions c_glib/test/dataset/test-file-system-dataset.rb
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,22 @@ def test_partitioning
end

def test_read_write
dataset, expected_table = create_dataset
assert_equal(expected_table, dataset.to_table)
end

def test_to_record_batch_reader
dataset, expected_table = create_dataset
reader = dataset.to_record_batch_reader
begin
assert_equal(expected_table, reader.read_all)
ensure
# Unref to ensure the reader closes files and we can delete the temp directory
reader.unref
end
end

def create_dataset
table = build_table(label: build_string_array(["a", "a", "b", "c"]),
count: build_int32_array([1, 10, 2, 3]))
table_reader = Arrow::TableBatchReader.new(table)
Expand All @@ -73,7 +89,8 @@ def test_read_write
end
@factory.partition_base_dir = @dir
dataset = @factory.finish
assert_equal(build_table(count: [

expected_table = build_table(count: [
build_int32_array([1, 10]),
build_int32_array([2]),
build_int32_array([3]),
Expand All @@ -82,7 +99,8 @@ def test_read_write
build_string_array(["a", "a"]),
build_string_array(["b"]),
build_string_array(["c"]),
]),
dataset.to_table)
])

return dataset, expected_table
end
end
10 changes: 10 additions & 0 deletions c_glib/test/dataset/test-scanner.rb
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,14 @@ def setup
def test_to_table
assert_equal(@table, @scanner.to_table)
end

def test_to_record_batch_reader
reader = @scanner.to_record_batch_reader
begin
assert_equal(@table, reader.read_all)
ensure
# Unref to ensure the reader closes files and we can delete the temp directory
reader.unref
end
end
end
27 changes: 17 additions & 10 deletions c_glib/test/parquet/test-arrow-file-reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,23 @@ class TestParquetArrowFileReader < Test::Unit::TestCase

def setup
omit("Parquet is required") unless defined?(::Parquet)
@file = Tempfile.open(["data", ".parquet"])
@a_array = build_string_array(["foo", "bar"])
@b_array = build_int32_array([123, 456])
@table = build_table("a" => @a_array,
"b" => @b_array)
writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path)
chunk_size = 1
writer.write_table(@table, chunk_size)
writer.close
@reader = Parquet::ArrowFileReader.new(@file.path)
Tempfile.create(["data", ".parquet"]) do |file|
@file = file
@a_array = build_string_array(["foo", "bar"])
@b_array = build_int32_array([123, 456])
@table = build_table("a" => @a_array,
"b" => @b_array)
writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path)
chunk_size = 1
writer.write_table(@table, chunk_size)
writer.close
@reader = Parquet::ArrowFileReader.new(@file.path)
begin
yield
ensure
@reader.unref
end
end
end

def test_schema
Expand Down
27 changes: 17 additions & 10 deletions c_glib/test/parquet/test-arrow-file-writer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,10 @@ class TestParquetArrowFileWriter < Test::Unit::TestCase

def setup
omit("Parquet is required") unless defined?(::Parquet)
@file = Tempfile.open(["data", ".parquet"])
Tempfile.create(["data", ".parquet"]) do |file|
@file = file
yield
end
end

def test_write
Expand All @@ -33,14 +36,18 @@ def test_write
writer.close

reader = Parquet::ArrowFileReader.new(@file.path)
reader.use_threads = true
assert_equal([
enabled_values.length / chunk_size,
true,
],
[
reader.n_row_groups,
table.equal_metadata(reader.read_table, false),
])
begin
reader.use_threads = true
assert_equal([
enabled_values.length / chunk_size,
true,
],
[
reader.n_row_groups,
table.equal_metadata(reader.read_table, false),
])
ensure
reader.unref
end
end
end
24 changes: 16 additions & 8 deletions c_glib/test/parquet/test-boolean-statistics.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,22 @@ class TestParquetBooleanStatistics < Test::Unit::TestCase

def setup
omit("Parquet is required") unless defined?(::Parquet)
@file = Tempfile.open(["data", ".parquet"])
@table = build_table("boolean" => build_boolean_array([nil, false, true]))
writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path)
chunk_size = 1024
writer.write_table(@table, chunk_size)
writer.close
reader = Parquet::ArrowFileReader.new(@file.path)
@statistics = reader.metadata.get_row_group(0).get_column_chunk(0).statistics
Tempfile.create(["data", ".parquet"]) do |file|
@file = file
@table = build_table("boolean" => build_boolean_array([nil, false, true]))
writer = Parquet::ArrowFileWriter.new(@table.schema, @file.path)
chunk_size = 1024
writer.write_table(@table, chunk_size)
writer.close
reader = Parquet::ArrowFileReader.new(@file.path)
begin
@statistics =
reader.metadata.get_row_group(0).get_column_chunk(0).statistics
yield
ensure
reader.unref
end
end
end

test("#min") do
Expand Down
Loading

0 comments on commit 6e5c5b3

Please sign in to comment.