Skip to content

Commit

Permalink
Merge branch 'main' into parquet-export-kvmeta-to-arrow-and-python
Browse files Browse the repository at this point in the history
  • Loading branch information
mapleFU committed Jun 4, 2024
2 parents a9155c8 + 4ec1c98 commit 090f9dd
Show file tree
Hide file tree
Showing 272 changed files with 16,807 additions and 11,495 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/csharp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -94,8 +94,8 @@ jobs:
run: ci/scripts/csharp_test.sh $(pwd)

macos:
name: ARM64 macOS 14 C# ${{ matrix.dotnet }}
runs-on: macos-latest
name: AMD64 macOS 13 C# ${{ matrix.dotnet }}
runs-on: macos-13 # Pending https://github.com/pythonnet/pythonnet/issues/2396
if: ${{ !contains(github.event.pull_request.title, 'WIP') }}
timeout-minutes: 15
strategy:
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/java.yml
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,11 @@ jobs:
env:
ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
run: |
archery docker run \
-e CI=true \
-e "GRADLE_ENTERPRISE_ACCESS_KEY=$GRADLE_ENTERPRISE_ACCESS_KEY" \
-e "DEVELOCITY_ACCESS_KEY=$DEVELOCITY_ACCESS_KEY" \
${{ matrix.image }}
- name: Docker Push
if: >-
Expand Down Expand Up @@ -127,12 +127,12 @@ jobs:
- name: Build
shell: bash
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
run: ci/scripts/java_build.sh $(pwd) $(pwd)/build
- name: Test
shell: bash
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
run: ci/scripts/java_test.sh $(pwd) $(pwd)/build

windows:
Expand All @@ -158,10 +158,10 @@ jobs:
- name: Build
shell: bash
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
run: ci/scripts/java_build.sh $(pwd) $(pwd)/build
- name: Test
shell: bash
env:
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
run: ci/scripts/java_test.sh $(pwd) $(pwd)/build
4 changes: 2 additions & 2 deletions .github/workflows/java_jni.yml
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,11 @@ jobs:
env:
ARCHERY_DOCKER_USER: ${{ secrets.DOCKERHUB_USER }}
ARCHERY_DOCKER_PASSWORD: ${{ secrets.DOCKERHUB_TOKEN }}
GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
DEVELOCITY_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
run: |
archery docker run \
-e CI=true \
-e "GRADLE_ENTERPRISE_ACCESS_KEY=$GRADLE_ENTERPRISE_ACCESS_KEY" \
-e "DEVELOCITY_ACCESS_KEY=$DEVELOCITY_ACCESS_KEY" \
conda-python-java-integration
- name: Docker Push
if: >-
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/js.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,10 +106,10 @@ jobs:
node-version: ${{ matrix.node }}
- name: Build
shell: bash
run: ci/scripts/js_build.sh $(pwd)
run: ci/scripts/js_build.sh $(pwd) build
- name: Test
shell: bash
run: ci/scripts/js_test.sh $(pwd)
run: ci/scripts/js_test.sh $(pwd) build

windows:
name: AMD64 Windows NodeJS ${{ matrix.node }}
Expand All @@ -136,7 +136,7 @@ jobs:
node-version: ${{ matrix.node }}
- name: Build
shell: bash
run: ci/scripts/js_build.sh $(pwd)
run: ci/scripts/js_build.sh $(pwd) build
- name: Test
shell: bash
run: ci/scripts/js_test.sh $(pwd)
run: ci/scripts/js_test.sh $(pwd) build
5 changes: 3 additions & 2 deletions .github/workflows/r.yml
Original file line number Diff line number Diff line change
Expand Up @@ -370,11 +370,12 @@ jobs:
MAKEFLAGS = paste0("-j", parallel::detectCores()),
ARROW_R_DEV = TRUE,
"_R_CHECK_FORCE_SUGGESTS_" = FALSE,
"_R_CHECK_STOP_ON_INVALID_NUMERIC_VERSION_INPUTS_" = TRUE
"_R_CHECK_STOP_ON_INVALID_NUMERIC_VERSION_INPUTS_" = TRUE,
"_R_CHECK_DONTTEST_EXAMPLES_" = TRUE
)
rcmdcheck::rcmdcheck(".",
build_args = '--no-build-vignettes',
args = c('--no-manual', '--as-cran', '--ignore-vignettes', '--run-donttest'),
args = c('--no-manual', '--as-cran', '--ignore-vignettes'),
error_on = 'warning',
check_dir = 'check',
timeout = 3600
Expand Down
35 changes: 28 additions & 7 deletions .github/workflows/ruby.yml
Original file line number Diff line number Diff line change
Expand Up @@ -313,15 +313,17 @@ jobs:
strategy:
fail-fast: false
env:
ARROW_ACERO: ON
ARROW_BOOST_USE_SHARED: OFF
ARROW_BUILD_BENCHMARKS: OFF
ARROW_BUILD_SHARED: ON
ARROW_BUILD_STATIC: OFF
ARROW_BUILD_TESTS: OFF
ARROW_ACERO: ON
ARROW_DATASET: ON
ARROW_FLIGHT: OFF
ARROW_FLIGHT_SQL: OFF
ARROW_DEPENDENCY_SOURCE: VCPKG
ARROW_DEPENDENCY_USE_SHARED: OFF
ARROW_FLIGHT: ON
ARROW_FLIGHT_SQL: ON
ARROW_GANDIVA: OFF
ARROW_HDFS: OFF
ARROW_HOME: "${{ github.workspace }}/dist"
Expand All @@ -337,13 +339,16 @@ jobs:
ARROW_WITH_LZ4: OFF
ARROW_WITH_OPENTELEMETRY: OFF
ARROW_WITH_SNAPPY: ON
ARROW_WITH_ZLIB: OFF
ARROW_WITH_ZLIB: ON
ARROW_WITH_ZSTD: ON
BOOST_SOURCE: BUNDLED
CMAKE_CXX_STANDARD: "17"
CMAKE_GENERATOR: Ninja
CMAKE_INSTALL_PREFIX: "${{ github.workspace }}/dist"
CMAKE_UNITY_BUILD: ON
VCPKG_BINARY_SOURCES: 'clear;nuget,GitHub,readwrite'
VCPKG_ROOT: "${{ github.workspace }}/vcpkg"
permissions:
packages: write
steps:
- name: Disable Crash Dialogs
run: |
Expand All @@ -361,7 +366,7 @@ jobs:
- name: Install vcpkg
shell: bash
run: |
ci/scripts/install_vcpkg.sh ./vcpkg
ci/scripts/install_vcpkg.sh "${VCPKG_ROOT}"
- name: Install meson
run: |
python -m pip install meson
Expand All @@ -387,6 +392,22 @@ jobs:
env:
# We can invalidate the current cache by updating this.
CACHE_VERSION: "2024-05-09"
- name: Setup NuGet credentials for vcpkg caching
shell: bash
run: |
$(vcpkg/vcpkg.exe fetch nuget | tail -n 1) \
sources add \
-source "https://nuget.pkg.github.com/$GITHUB_REPOSITORY_OWNER/index.json" \
-storepasswordincleartext \
-name "GitHub" \
-username "$GITHUB_REPOSITORY_OWNER" \
-password "${{ secrets.GITHUB_TOKEN }}"
$(vcpkg/vcpkg.exe fetch nuget | tail -n 1) \
setapikey "${{ secrets.GITHUB_TOKEN }}" \
-source "https://nuget.pkg.github.com/$GITHUB_REPOSITORY_OWNER/index.json"
- name: Build C++ vcpkg dependencies
run: |
vcpkg\vcpkg.exe install --triplet x64-windows --x-manifest-root cpp --x-install-root build\cpp\vcpkg_installed
- name: Build C++
shell: cmd
run: |
Expand All @@ -396,4 +417,4 @@ jobs:
shell: cmd
run: |
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
bash -c "VCPKG_ROOT=\"$(pwd)/vcpkg\" ci/scripts/c_glib_build.sh $(pwd) $(pwd)/build"
bash -c "ci/scripts/c_glib_build.sh $(pwd) $(pwd)/build"
7 changes: 6 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,4 +102,9 @@ __debug_bin
.envrc

# Develocity
.mvn/.develocity.xml
.mvn/.gradle-enterprise/
.mvn/.develocity/

# rat
filtered_rat.txt
rat.txt
29 changes: 29 additions & 0 deletions .golangci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

linters:
# Disable all linters.
# Default: false
disable-all: true
# Enable specific linter
# https://golangci-lint.run/usage/linters/#enabled-by-default
enable:
- gofmt
- goimports

issues:
fix: true
14 changes: 14 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -141,3 +141,17 @@ repos:
'--disable',
'dangling-hyphen,line-too-long',
]
- repo: https://github.com/golangci/golangci-lint
rev: v1.59.0
hooks:
# no built-in support for multiple go.mod
# https://github.com/golangci/golangci-lint/issues/828
- id: golangci-lint-full
name: golangci-lint-full-arrow
entry: bash -c 'cd go/arrow && golangci-lint run'
- id: golangci-lint-full
name: golangci-lint-full-parquet
entry: bash -c 'cd go/parquet && golangci-lint run'
- id: golangci-lint-full
name: golangci-lint-full-internal
entry: bash -c 'cd go/internal && golangci-lint run'
2 changes: 1 addition & 1 deletion ci/docker/ubuntu-swift.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.

FROM swift:5.7.3
FROM swift:5.9.0

# Go is needed for generating test data
RUN apt-get update -y -q && \
Expand Down
1 change: 1 addition & 0 deletions ci/scripts/cpp_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ else
-DARROW_C_FLAGS_RELWITHDEBINFO="${ARROW_C_FLAGS_RELWITHDEBINFO:-}" \
-DARROW_DATASET=${ARROW_DATASET:-OFF} \
-DARROW_DEPENDENCY_SOURCE=${ARROW_DEPENDENCY_SOURCE:-AUTO} \
-DARROW_DEPENDENCY_USE_SHARED=${ARROW_DEPENDENCY_USE_SHARED:-ON} \
-DARROW_ENABLE_THREADING=${ARROW_ENABLE_THREADING:-ON} \
-DARROW_ENABLE_TIMING_TESTS=${ARROW_ENABLE_TIMING_TESTS:-ON} \
-DARROW_EXTRA_ERROR_CONTEXT=${ARROW_EXTRA_ERROR_CONTEXT:-OFF} \
Expand Down
43 changes: 22 additions & 21 deletions ci/scripts/r_install_system_dependencies.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,29 +21,30 @@ set -ex

: ${ARROW_SOURCE_HOME:=/arrow}

if [ "$ARROW_S3" == "ON" ] || [ "$ARROW_GCS" == "ON" ] || [ "$ARROW_R_DEV" == "TRUE" ]; then
# Figure out what package manager we have
if [ "`which dnf`" ]; then
PACKAGE_MANAGER=dnf
elif [ "`which yum`" ]; then
PACKAGE_MANAGER=yum
elif [ "`which zypper`" ]; then
PACKAGE_MANAGER=zypper
else
PACKAGE_MANAGER=apt-get
apt-get update
fi
# Figure out what package manager we have
if [ "`which dnf`" ]; then
PACKAGE_MANAGER=dnf
elif [ "`which yum`" ]; then
PACKAGE_MANAGER=yum
elif [ "`which zypper`" ]; then
PACKAGE_MANAGER=zypper
else
PACKAGE_MANAGER=apt-get
apt-get update
fi

# Install curl and OpenSSL for S3/GCS support
case "$PACKAGE_MANAGER" in
apt-get)
apt-get install -y libcurl4-openssl-dev libssl-dev
;;
*)
$PACKAGE_MANAGER install -y libcurl-devel openssl-devel
;;
esac
# Install curl and OpenSSL (technically, only needed for S3/GCS support, but
# installing the R curl package fails without it)
case "$PACKAGE_MANAGER" in
apt-get)
apt-get install -y libcurl4-openssl-dev libssl-dev
;;
*)
$PACKAGE_MANAGER install -y libcurl-devel openssl-devel
;;
esac

if [ "$ARROW_S3" == "ON" ] || [ "$ARROW_GCS" == "ON" ] || [ "$ARROW_R_DEV" == "TRUE" ]; then
# The Dockerfile should have put this file here
if [ "$ARROW_S3" == "ON" ] && [ -f "${ARROW_SOURCE_HOME}/ci/scripts/install_minio.sh" ] && [ "`which wget`" ]; then
"${ARROW_SOURCE_HOME}/ci/scripts/install_minio.sh" latest /usr/local
Expand Down
9 changes: 4 additions & 5 deletions ci/scripts/r_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -110,16 +110,15 @@ SCRIPT="as_cran <- !identical(tolower(Sys.getenv('NOT_CRAN')), 'true')
on.exit(tools::pskill(pid_flight), add = TRUE)
}
run_donttest <- identical(tolower(Sys.getenv('_R_CHECK_DONTTEST_EXAMPLES_', 'true')), 'true')
if (run_donttest) {
args <- c(args, '--run-donttest')
}
install_args <- Sys.getenv('INSTALL_ARGS')
if (nzchar(install_args)) {
args <- c(args, paste0('--install-args=\"', install_args, '\"'))
}
message('Running rcmdcheck with:\n')
print(build_args)
print(args)
rcmdcheck::rcmdcheck(build_args = build_args, args = args, error_on = 'warning', check_dir = 'check', timeout = 3600)"
echo "$SCRIPT" | ${R_BIN} --no-save

Expand Down
3 changes: 3 additions & 0 deletions cpp/cmake_modules/Usevcpkg.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,9 @@ set(LZ4_ROOT
CACHE STRING "")

if(CMAKE_HOST_WIN32)
set(utf8proc_MSVC_STATIC_LIB_SUFFIX
""
CACHE STRING "")
set(LZ4_MSVC_LIB_PREFIX
""
CACHE STRING "")
Expand Down
21 changes: 5 additions & 16 deletions cpp/src/arrow/acero/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -173,13 +173,8 @@ add_arrow_acero_test(hash_join_node_test SOURCES hash_join_node_test.cc
bloom_filter_test.cc)
add_arrow_acero_test(pivot_longer_node_test SOURCES pivot_longer_node_test.cc)

# asof_join_node and sorted_merge_node use std::thread internally
# and doesn't use ThreadPool so it will
# be broken if threading is turned off
if(ARROW_ENABLE_THREADING)
add_arrow_acero_test(asof_join_node_test SOURCES asof_join_node_test.cc)
add_arrow_acero_test(sorted_merge_node_test SOURCES sorted_merge_node_test.cc)
endif()
add_arrow_acero_test(asof_join_node_test SOURCES asof_join_node_test.cc)
add_arrow_acero_test(sorted_merge_node_test SOURCES sorted_merge_node_test.cc)

add_arrow_acero_test(tpch_node_test SOURCES tpch_node_test.cc)
add_arrow_acero_test(union_node_test SOURCES union_node_test.cc)
Expand Down Expand Up @@ -228,9 +223,7 @@ if(ARROW_BUILD_BENCHMARKS)
add_arrow_acero_benchmark(project_benchmark SOURCES benchmark_util.cc
project_benchmark.cc)

if(ARROW_ENABLE_THREADING)
add_arrow_acero_benchmark(asof_join_benchmark SOURCES asof_join_benchmark.cc)
endif()
add_arrow_acero_benchmark(asof_join_benchmark SOURCES asof_join_benchmark.cc)

add_arrow_acero_benchmark(tpch_benchmark SOURCES tpch_benchmark.cc)

Expand All @@ -253,9 +246,7 @@ if(ARROW_BUILD_BENCHMARKS)
target_link_libraries(arrow-acero-expression-benchmark PUBLIC arrow_acero_static)
target_link_libraries(arrow-acero-filter-benchmark PUBLIC arrow_acero_static)
target_link_libraries(arrow-acero-project-benchmark PUBLIC arrow_acero_static)
if(ARROW_ENABLE_THREADING)
target_link_libraries(arrow-acero-asof-join-benchmark PUBLIC arrow_acero_static)
endif()
target_link_libraries(arrow-acero-asof-join-benchmark PUBLIC arrow_acero_static)
target_link_libraries(arrow-acero-tpch-benchmark PUBLIC arrow_acero_static)
if(ARROW_BUILD_OPENMP_BENCHMARKS)
target_link_libraries(arrow-acero-hash-join-benchmark PUBLIC arrow_acero_static)
Expand All @@ -264,9 +255,7 @@ if(ARROW_BUILD_BENCHMARKS)
target_link_libraries(arrow-acero-expression-benchmark PUBLIC arrow_acero_shared)
target_link_libraries(arrow-acero-filter-benchmark PUBLIC arrow_acero_shared)
target_link_libraries(arrow-acero-project-benchmark PUBLIC arrow_acero_shared)
if(ARROW_ENABLE_THREADING)
target_link_libraries(arrow-acero-asof-join-benchmark PUBLIC arrow_acero_shared)
endif()
target_link_libraries(arrow-acero-asof-join-benchmark PUBLIC arrow_acero_shared)
target_link_libraries(arrow-acero-tpch-benchmark PUBLIC arrow_acero_shared)
if(ARROW_BUILD_OPENMP_BENCHMARKS)
target_link_libraries(arrow-acero-hash-join-benchmark PUBLIC arrow_acero_shared)
Expand Down
Loading

0 comments on commit 090f9dd

Please sign in to comment.