Skip to content

Commit

Permalink
Merge branch 'main' into tiny-metadata-style-enhancement
Browse files Browse the repository at this point in the history
  • Loading branch information
mapleFU committed Jan 3, 2024
2 parents 5b4ccc0 + 98f677a commit acef18b
Show file tree
Hide file tree
Showing 137 changed files with 3,745 additions and 3,759 deletions.
2 changes: 0 additions & 2 deletions .github/workflows/cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -340,8 +340,6 @@ jobs:
fail-fast: false
matrix:
include:
- msystem_lower: mingw32
msystem_upper: MINGW32
- msystem_lower: mingw64
msystem_upper: MINGW64
- msystem_lower: clang64
Expand Down
2 changes: 2 additions & 0 deletions ci/scripts/python_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ export ARROW_DEBUG_MEMORY_POOL=trap
: ${PYARROW_TEST_HDFS:=${ARROW_HDFS:-ON}}
: ${PYARROW_TEST_ORC:=${ARROW_ORC:-ON}}
: ${PYARROW_TEST_PARQUET:=${ARROW_PARQUET:-ON}}
: ${PYARROW_TEST_PARQUET_ENCRYPTION:=${PARQUET_REQUIRE_ENCRYPTION:-ON}}
: ${PYARROW_TEST_S3:=${ARROW_S3:-ON}}

export PYARROW_TEST_ACERO
Expand All @@ -56,6 +57,7 @@ export PYARROW_TEST_GCS
export PYARROW_TEST_HDFS
export PYARROW_TEST_ORC
export PYARROW_TEST_PARQUET
export PYARROW_TEST_PARQUET_ENCRYPTION
export PYARROW_TEST_S3

# Testing PyArrow
Expand Down
1 change: 1 addition & 0 deletions ci/scripts/python_wheel_unix_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ export PYARROW_TEST_HDFS=ON
export PYARROW_TEST_ORC=ON
export PYARROW_TEST_PANDAS=ON
export PYARROW_TEST_PARQUET=ON
export PYARROW_TEST_PARQUET_ENCRYPTION=ON
export PYARROW_TEST_SUBSTRAIT=${ARROW_SUBSTRAIT}
export PYARROW_TEST_S3=${ARROW_S3}
export PYARROW_TEST_TENSORFLOW=ON
Expand Down
1 change: 1 addition & 0 deletions ci/scripts/python_wheel_windows_test.bat
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ set PYARROW_TEST_GCS=ON
set PYARROW_TEST_HDFS=ON
set PYARROW_TEST_ORC=OFF
set PYARROW_TEST_PARQUET=ON
set PYARROW_TEST_PARQUET_ENCRYPTION=ON
set PYARROW_TEST_SUBSTRAIT=ON
set PYARROW_TEST_S3=OFF
set PYARROW_TEST_TENSORFLOW=ON
Expand Down
10 changes: 8 additions & 2 deletions cpp/cmake_modules/ThirdpartyToolchain.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -1015,6 +1015,10 @@ else()
endif()

include(FetchContent)
set(FC_DECLARE_COMMON_OPTIONS)
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.28)
list(APPEND FC_DECLARE_COMMON_OPTIONS EXCLUDE_FROM_ALL TRUE)
endif()

macro(prepare_fetchcontent)
set(BUILD_SHARED_LIBS OFF)
Expand Down Expand Up @@ -2146,6 +2150,9 @@ function(build_gtest)
message(STATUS "Building gtest from source")
set(GTEST_VENDORED TRUE)
fetchcontent_declare(googletest
# We should not specify "EXCLUDE_FROM_ALL TRUE" here.
# Because we install GTest with custom path.
# ${FC_DECLARE_COMMON_OPTIONS}
URL ${GTEST_SOURCE_URL}
URL_HASH "SHA256=${ARROW_GTEST_BUILD_SHA256_CHECKSUM}")
prepare_fetchcontent()
Expand Down Expand Up @@ -5096,8 +5103,7 @@ function(build_azure_sdk)
endif()
message(STATUS "Building Azure SDK for C++ from source")
fetchcontent_declare(azure_sdk
# EXCLUDE_FROM_ALL is available since CMake 3.28
# EXCLUDE_FROM_ALL TRUE
${FC_DECLARE_COMMON_OPTIONS}
URL ${ARROW_AZURE_SDK_URL}
URL_HASH "SHA256=${ARROW_AZURE_SDK_BUILD_SHA256_CHECKSUM}")
prepare_fetchcontent()
Expand Down
2 changes: 1 addition & 1 deletion cpp/examples/arrow/compute_and_write_csv_example.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
// under the License.

#include <arrow/api.h>
#include <arrow/compute/api_aggregate.h>
#include <arrow/compute/api.h>
#include <arrow/csv/api.h>
#include <arrow/csv/writer.h>
#include <arrow/io/api.h>
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/acero/aggregate_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "arrow/acero/exec_plan.h"
#include "arrow/acero/options.h"
#include "arrow/compute/exec.h"
#include "arrow/compute/function.h"
#include "arrow/compute/registry.h"
#include "arrow/compute/row/grouper.h"
#include "arrow/datum.h"
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/acero/scalar_aggregate_node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "arrow/acero/options.h"
#include "arrow/acero/util.h"
#include "arrow/compute/exec.h"
#include "arrow/compute/function.h"
#include "arrow/compute/registry.h"
#include "arrow/compute/row/grouper.h"
#include "arrow/datum.h"
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/arrow/array/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,7 @@ struct ARROW_EXPORT ArraySpan {
util::span<const T> GetSpan(int i, int64_t length) const {
const int64_t buffer_length = buffers[i].size / static_cast<int64_t>(sizeof(T));
assert(i > 0 && length + offset <= buffer_length);
ARROW_UNUSED(buffer_length);
return util::span<const T>(buffers[i].data_as<T>() + this->offset, length);
}

Expand All @@ -466,6 +467,7 @@ struct ARROW_EXPORT ArraySpan {
util::span<T> GetSpan(int i, int64_t length) {
const int64_t buffer_length = buffers[i].size / static_cast<int64_t>(sizeof(T));
assert(i > 0 && length + offset <= buffer_length);
ARROW_UNUSED(buffer_length);
return util::span<T>(buffers[i].mutable_data_as<T>() + this->offset, length);
}

Expand Down
21 changes: 13 additions & 8 deletions cpp/src/arrow/compute/api.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,23 @@

#pragma once

/// \defgroup compute-functions Abstract compute function API
/// @{
/// @}

/// \defgroup compute-concrete-options Concrete option classes for compute functions
/// @{
/// @}

#include "arrow/compute/api_aggregate.h" // IWYU pragma: export
#include "arrow/compute/api_scalar.h" // IWYU pragma: export
#include "arrow/compute/api_vector.h" // IWYU pragma: export
#include "arrow/compute/cast.h" // IWYU pragma: export
#include "arrow/compute/function.h" // IWYU pragma: export
#include "arrow/compute/kernel.h" // IWYU pragma: export
#include "arrow/compute/registry.h" // IWYU pragma: export
#include "arrow/datum.h" // IWYU pragma: export
#include "arrow/compute/api_aggregate.h" // IWYU pragma: export
#include "arrow/compute/api_scalar.h" // IWYU pragma: export
#include "arrow/compute/api_vector.h" // IWYU pragma: export
#include "arrow/compute/cast.h" // IWYU pragma: export
#include "arrow/compute/function.h" // IWYU pragma: export
#include "arrow/compute/function_options.h" // IWYU pragma: export
#include "arrow/compute/kernel.h" // IWYU pragma: export
#include "arrow/compute/registry.h" // IWYU pragma: export
#include "arrow/datum.h" // IWYU pragma: export

#include "arrow/compute/expression.h" // IWYU pragma: export

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/api_aggregate.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

#include <vector>

#include "arrow/compute/function.h"
#include "arrow/compute/function_options.h"
#include "arrow/datum.h"
#include "arrow/result.h"
#include "arrow/util/macros.h"
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/api_scalar.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#include <string>
#include <utility>

#include "arrow/compute/function.h"
#include "arrow/compute/function_options.h"
#include "arrow/compute/type_fwd.h"
#include "arrow/datum.h"
#include "arrow/result.h"
Expand Down
3 changes: 1 addition & 2 deletions cpp/src/arrow/compute/api_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,8 @@
#include <memory>
#include <utility>

#include "arrow/compute/function.h"
#include "arrow/compute/function_options.h"
#include "arrow/compute/ordering.h"
#include "arrow/datum.h"
#include "arrow/result.h"
#include "arrow/type_fwd.h"

Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/compute/cast.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <vector>

#include "arrow/compute/function.h"
#include "arrow/compute/function_options.h"
#include "arrow/compute/type_fwd.h"
#include "arrow/result.h"
#include "arrow/status.h"
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/compute/function.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "arrow/compute/exec.h"
#include "arrow/compute/exec_internal.h"
#include "arrow/compute/function_internal.h"
#include "arrow/compute/function_options.h"
#include "arrow/compute/kernels/common_internal.h"
#include "arrow/compute/registry.h"
#include "arrow/datum.h"
Expand Down
46 changes: 1 addition & 45 deletions cpp/src/arrow/compute/function.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,53 +36,9 @@
namespace arrow {
namespace compute {

/// \defgroup compute-functions Abstract compute function API
///
/// \addtogroup compute-functions
/// @{

/// \brief Extension point for defining options outside libarrow (but
/// still within this project).
class ARROW_EXPORT FunctionOptionsType {
public:
virtual ~FunctionOptionsType() = default;

virtual const char* type_name() const = 0;
virtual std::string Stringify(const FunctionOptions&) const = 0;
virtual bool Compare(const FunctionOptions&, const FunctionOptions&) const = 0;
virtual Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const;
virtual Result<std::unique_ptr<FunctionOptions>> Deserialize(
const Buffer& buffer) const;
virtual std::unique_ptr<FunctionOptions> Copy(const FunctionOptions&) const = 0;
};

/// \brief Base class for specifying options configuring a function's behavior,
/// such as error handling.
class ARROW_EXPORT FunctionOptions : public util::EqualityComparable<FunctionOptions> {
public:
virtual ~FunctionOptions() = default;

const FunctionOptionsType* options_type() const { return options_type_; }
const char* type_name() const { return options_type()->type_name(); }

bool Equals(const FunctionOptions& other) const;
std::string ToString() const;
std::unique_ptr<FunctionOptions> Copy() const;
/// \brief Serialize an options struct to a buffer.
Result<std::shared_ptr<Buffer>> Serialize() const;
/// \brief Deserialize an options struct from a buffer.
/// Note: this will only look for `type_name` in the default FunctionRegistry;
/// to use a custom FunctionRegistry, look up the FunctionOptionsType, then
/// call FunctionOptionsType::Deserialize().
static Result<std::unique_ptr<FunctionOptions>> Deserialize(
const std::string& type_name, const Buffer& buffer);

protected:
explicit FunctionOptions(const FunctionOptionsType* type) : options_type_(type) {}
const FunctionOptionsType* options_type_;
};

ARROW_EXPORT void PrintTo(const FunctionOptions&, std::ostream*);

/// \brief Contains the number of required arguments for the function.
///
/// Naming conventions taken from https://en.wikipedia.org/wiki/Arity.
Expand Down
81 changes: 81 additions & 0 deletions cpp/src/arrow/compute/function_options.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

// NOTE: API is EXPERIMENTAL and will change without going through a
// deprecation cycle.

#pragma once

#include "arrow/compute/type_fwd.h"
#include "arrow/result.h"
#include "arrow/status.h"
#include "arrow/type_fwd.h"
#include "arrow/util/visibility.h"

namespace arrow {
namespace compute {

/// \addtogroup compute-functions
/// @{

/// \brief Extension point for defining options outside libarrow (but
/// still within this project).
class ARROW_EXPORT FunctionOptionsType {
public:
virtual ~FunctionOptionsType() = default;

virtual const char* type_name() const = 0;
virtual std::string Stringify(const FunctionOptions&) const = 0;
virtual bool Compare(const FunctionOptions&, const FunctionOptions&) const = 0;
virtual Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const;
virtual Result<std::unique_ptr<FunctionOptions>> Deserialize(
const Buffer& buffer) const;
virtual std::unique_ptr<FunctionOptions> Copy(const FunctionOptions&) const = 0;
};

/// \brief Base class for specifying options configuring a function's behavior,
/// such as error handling.
class ARROW_EXPORT FunctionOptions : public util::EqualityComparable<FunctionOptions> {
public:
virtual ~FunctionOptions() = default;

const FunctionOptionsType* options_type() const { return options_type_; }
const char* type_name() const { return options_type()->type_name(); }

bool Equals(const FunctionOptions& other) const;
std::string ToString() const;
std::unique_ptr<FunctionOptions> Copy() const;
/// \brief Serialize an options struct to a buffer.
Result<std::shared_ptr<Buffer>> Serialize() const;
/// \brief Deserialize an options struct from a buffer.
/// Note: this will only look for `type_name` in the default FunctionRegistry;
/// to use a custom FunctionRegistry, look up the FunctionOptionsType, then
/// call FunctionOptionsType::Deserialize().
static Result<std::unique_ptr<FunctionOptions>> Deserialize(
const std::string& type_name, const Buffer& buffer);

protected:
explicit FunctionOptions(const FunctionOptionsType* type) : options_type_(type) {}
const FunctionOptionsType* options_type_;
};

ARROW_EXPORT void PrintTo(const FunctionOptions&, std::ostream*);

/// @}

} // namespace compute
} // namespace arrow
10 changes: 9 additions & 1 deletion cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ void AddTypeToTypeCast(CastFunction* func) {
kernel.exec = CastFunctor::Exec;
kernel.signature = KernelSignature::Make({InputType(SrcT::type_id)}, kOutputTargetType);
kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
DCHECK_OK(func->AddKernel(StructType::type_id, std::move(kernel)));
DCHECK_OK(func->AddKernel(SrcT::type_id, std::move(kernel)));
}

template <typename DestType>
Expand Down Expand Up @@ -480,14 +480,18 @@ std::vector<std::shared_ptr<CastFunction>> GetNestedCasts() {
auto cast_list = std::make_shared<CastFunction>("cast_list", Type::LIST);
AddCommonCasts(Type::LIST, kOutputTargetType, cast_list.get());
AddListCast<ListType, ListType>(cast_list.get());
AddListCast<ListViewType, ListType>(cast_list.get());
AddListCast<LargeListType, ListType>(cast_list.get());
AddListCast<LargeListViewType, ListType>(cast_list.get());
AddTypeToTypeCast<CastFixedToVarList<ListType>, FixedSizeListType>(cast_list.get());

auto cast_large_list =
std::make_shared<CastFunction>("cast_large_list", Type::LARGE_LIST);
AddCommonCasts(Type::LARGE_LIST, kOutputTargetType, cast_large_list.get());
AddListCast<ListType, LargeListType>(cast_large_list.get());
AddListCast<ListViewType, LargeListType>(cast_large_list.get());
AddListCast<LargeListType, LargeListType>(cast_large_list.get());
AddListCast<LargeListViewType, LargeListType>(cast_large_list.get());
AddTypeToTypeCast<CastFixedToVarList<LargeListType>, FixedSizeListType>(
cast_large_list.get());

Expand All @@ -503,7 +507,11 @@ std::vector<std::shared_ptr<CastFunction>> GetNestedCasts() {
AddCommonCasts(Type::FIXED_SIZE_LIST, kOutputTargetType, cast_fsl.get());
AddTypeToTypeCast<CastFixedList, FixedSizeListType>(cast_fsl.get());
AddTypeToTypeCast<CastVarToFixedList<ListType>, ListType>(cast_fsl.get());
AddTypeToTypeCast<CastVarToFixedList<ListViewType>, ListViewType>(cast_fsl.get());
AddTypeToTypeCast<CastVarToFixedList<LargeListType>, LargeListType>(cast_fsl.get());
AddTypeToTypeCast<CastVarToFixedList<LargeListViewType>, LargeListViewType>(
cast_fsl.get());
AddTypeToTypeCast<CastVarToFixedList<ListType>, MapType>(cast_fsl.get());

// So is struct
auto cast_struct = std::make_shared<CastFunction>("cast_struct", Type::STRUCT);
Expand Down
Loading

0 comments on commit acef18b

Please sign in to comment.