diff --git a/.github/workflows/cpp.yml b/.github/workflows/cpp.yml
index 2e3c2a355a884..3d4fb10b10c39 100644
--- a/.github/workflows/cpp.yml
+++ b/.github/workflows/cpp.yml
@@ -340,8 +340,6 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - msystem_lower: mingw32
-            msystem_upper: MINGW32
           - msystem_lower: mingw64
             msystem_upper: MINGW64
           - msystem_lower: clang64
diff --git a/ci/scripts/python_test.sh b/ci/scripts/python_test.sh
index 8d818346faa6e..341c2dd0577ef 100755
--- a/ci/scripts/python_test.sh
+++ b/ci/scripts/python_test.sh
@@ -45,6 +45,7 @@ export ARROW_DEBUG_MEMORY_POOL=trap
 : ${PYARROW_TEST_HDFS:=${ARROW_HDFS:-ON}}
 : ${PYARROW_TEST_ORC:=${ARROW_ORC:-ON}}
 : ${PYARROW_TEST_PARQUET:=${ARROW_PARQUET:-ON}}
+: ${PYARROW_TEST_PARQUET_ENCRYPTION:=${PARQUET_REQUIRE_ENCRYPTION:-ON}}
 : ${PYARROW_TEST_S3:=${ARROW_S3:-ON}}
 
 export PYARROW_TEST_ACERO
@@ -56,6 +57,7 @@ export PYARROW_TEST_GCS
 export PYARROW_TEST_HDFS
 export PYARROW_TEST_ORC
 export PYARROW_TEST_PARQUET
+export PYARROW_TEST_PARQUET_ENCRYPTION
 export PYARROW_TEST_S3
 
 # Testing PyArrow
diff --git a/ci/scripts/python_wheel_unix_test.sh b/ci/scripts/python_wheel_unix_test.sh
index a6cc3bb7b29b7..01250ff7ef40c 100755
--- a/ci/scripts/python_wheel_unix_test.sh
+++ b/ci/scripts/python_wheel_unix_test.sh
@@ -46,6 +46,7 @@ export PYARROW_TEST_HDFS=ON
 export PYARROW_TEST_ORC=ON
 export PYARROW_TEST_PANDAS=ON
 export PYARROW_TEST_PARQUET=ON
+export PYARROW_TEST_PARQUET_ENCRYPTION=ON
 export PYARROW_TEST_SUBSTRAIT=${ARROW_SUBSTRAIT}
 export PYARROW_TEST_S3=${ARROW_S3}
 export PYARROW_TEST_TENSORFLOW=ON
diff --git a/ci/scripts/python_wheel_windows_test.bat b/ci/scripts/python_wheel_windows_test.bat
index c73b0cfd1b9bd..b14bfddfb36d3 100755
--- a/ci/scripts/python_wheel_windows_test.bat
+++ b/ci/scripts/python_wheel_windows_test.bat
@@ -26,6 +26,7 @@ set PYARROW_TEST_GCS=ON
 set PYARROW_TEST_HDFS=ON
 set PYARROW_TEST_ORC=OFF
 set PYARROW_TEST_PARQUET=ON
+set PYARROW_TEST_PARQUET_ENCRYPTION=ON
 set PYARROW_TEST_SUBSTRAIT=ON
 set PYARROW_TEST_S3=OFF
 set PYARROW_TEST_TENSORFLOW=ON
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 89d046945e5fe..3f327ed64ff00 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -1015,6 +1015,10 @@ else()
 endif()
 
 include(FetchContent)
+set(FC_DECLARE_COMMON_OPTIONS)
+if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.28)
+  list(APPEND FC_DECLARE_COMMON_OPTIONS EXCLUDE_FROM_ALL TRUE)
+endif()
 
 macro(prepare_fetchcontent)
   set(BUILD_SHARED_LIBS OFF)
@@ -2146,6 +2150,9 @@ function(build_gtest)
   message(STATUS "Building gtest from source")
   set(GTEST_VENDORED TRUE)
   fetchcontent_declare(googletest
+                       # We should not specify "EXCLUDE_FROM_ALL TRUE" here.
+                       # Because we install GTest with custom path.
+                       # ${FC_DECLARE_COMMON_OPTIONS}
                        URL ${GTEST_SOURCE_URL}
                        URL_HASH "SHA256=${ARROW_GTEST_BUILD_SHA256_CHECKSUM}")
   prepare_fetchcontent()
@@ -5096,8 +5103,7 @@ function(build_azure_sdk)
   endif()
   message(STATUS "Building Azure SDK for C++ from source")
   fetchcontent_declare(azure_sdk
-                       # EXCLUDE_FROM_ALL is available since CMake 3.28
-                       # EXCLUDE_FROM_ALL TRUE
+                       ${FC_DECLARE_COMMON_OPTIONS}
                        URL ${ARROW_AZURE_SDK_URL}
                        URL_HASH "SHA256=${ARROW_AZURE_SDK_BUILD_SHA256_CHECKSUM}")
   prepare_fetchcontent()
diff --git a/cpp/examples/arrow/compute_and_write_csv_example.cc b/cpp/examples/arrow/compute_and_write_csv_example.cc
index edf21e45b2bb7..7e0f6cdf1ce16 100644
--- a/cpp/examples/arrow/compute_and_write_csv_example.cc
+++ b/cpp/examples/arrow/compute_and_write_csv_example.cc
@@ -16,7 +16,7 @@
 // under the License.
 
 #include <arrow/api.h>
-#include <arrow/compute/api_aggregate.h>
+#include <arrow/compute/api.h>
 #include <arrow/csv/api.h>
 #include <arrow/csv/writer.h>
 #include <arrow/io/api.h>
diff --git a/cpp/src/arrow/acero/aggregate_internal.cc b/cpp/src/arrow/acero/aggregate_internal.cc
index 3cd5491720dcd..9c4b7fe5ae98c 100644
--- a/cpp/src/arrow/acero/aggregate_internal.cc
+++ b/cpp/src/arrow/acero/aggregate_internal.cc
@@ -25,6 +25,7 @@
 #include "arrow/acero/exec_plan.h"
 #include "arrow/acero/options.h"
 #include "arrow/compute/exec.h"
+#include "arrow/compute/function.h"
 #include "arrow/compute/registry.h"
 #include "arrow/compute/row/grouper.h"
 #include "arrow/datum.h"
diff --git a/cpp/src/arrow/acero/scalar_aggregate_node.cc b/cpp/src/arrow/acero/scalar_aggregate_node.cc
index ae59aa692096a..c7805f4d24eb2 100644
--- a/cpp/src/arrow/acero/scalar_aggregate_node.cc
+++ b/cpp/src/arrow/acero/scalar_aggregate_node.cc
@@ -25,6 +25,7 @@
 #include "arrow/acero/options.h"
 #include "arrow/acero/util.h"
 #include "arrow/compute/exec.h"
+#include "arrow/compute/function.h"
 #include "arrow/compute/registry.h"
 #include "arrow/compute/row/grouper.h"
 #include "arrow/datum.h"
diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h
index f29f164d19973..edd443adc43c4 100644
--- a/cpp/src/arrow/array/data.h
+++ b/cpp/src/arrow/array/data.h
@@ -451,6 +451,7 @@ struct ARROW_EXPORT ArraySpan {
   util::span<const T> GetSpan(int i, int64_t length) const {
     const int64_t buffer_length = buffers[i].size / static_cast<int64_t>(sizeof(T));
     assert(i > 0 && length + offset <= buffer_length);
+    ARROW_UNUSED(buffer_length);
     return util::span<const T>(buffers[i].data_as<T>() + this->offset, length);
   }
 
@@ -466,6 +467,7 @@ struct ARROW_EXPORT ArraySpan {
   util::span<T> GetSpan(int i, int64_t length) {
     const int64_t buffer_length = buffers[i].size / static_cast<int64_t>(sizeof(T));
     assert(i > 0 && length + offset <= buffer_length);
+    ARROW_UNUSED(buffer_length);
     return util::span<T>(buffers[i].mutable_data_as<T>() + this->offset, length);
   }
 
diff --git a/cpp/src/arrow/compute/api.h b/cpp/src/arrow/compute/api.h
index 5b5dfdf69eb94..b701d9928691f 100644
--- a/cpp/src/arrow/compute/api.h
+++ b/cpp/src/arrow/compute/api.h
@@ -20,18 +20,23 @@
 
 #pragma once
 
+/// \defgroup compute-functions Abstract compute function API
+/// @{
+/// @}
+
 /// \defgroup compute-concrete-options Concrete option classes for compute functions
 /// @{
 /// @}
 
-#include "arrow/compute/api_aggregate.h"  // IWYU pragma: export
-#include "arrow/compute/api_scalar.h"     // IWYU pragma: export
-#include "arrow/compute/api_vector.h"     // IWYU pragma: export
-#include "arrow/compute/cast.h"           // IWYU pragma: export
-#include "arrow/compute/function.h"       // IWYU pragma: export
-#include "arrow/compute/kernel.h"         // IWYU pragma: export
-#include "arrow/compute/registry.h"       // IWYU pragma: export
-#include "arrow/datum.h"                  // IWYU pragma: export
+#include "arrow/compute/api_aggregate.h"     // IWYU pragma: export
+#include "arrow/compute/api_scalar.h"        // IWYU pragma: export
+#include "arrow/compute/api_vector.h"        // IWYU pragma: export
+#include "arrow/compute/cast.h"              // IWYU pragma: export
+#include "arrow/compute/function.h"          // IWYU pragma: export
+#include "arrow/compute/function_options.h"  // IWYU pragma: export
+#include "arrow/compute/kernel.h"            // IWYU pragma: export
+#include "arrow/compute/registry.h"          // IWYU pragma: export
+#include "arrow/datum.h"                     // IWYU pragma: export
 
 #include "arrow/compute/expression.h"  // IWYU pragma: export
 
diff --git a/cpp/src/arrow/compute/api_aggregate.h b/cpp/src/arrow/compute/api_aggregate.h
index 3493c3146310d..4d2c814a69bbb 100644
--- a/cpp/src/arrow/compute/api_aggregate.h
+++ b/cpp/src/arrow/compute/api_aggregate.h
@@ -22,7 +22,7 @@
 
 #include <vector>
 
-#include "arrow/compute/function.h"
+#include "arrow/compute/function_options.h"
 #include "arrow/datum.h"
 #include "arrow/result.h"
 #include "arrow/util/macros.h"
diff --git a/cpp/src/arrow/compute/api_scalar.h b/cpp/src/arrow/compute/api_scalar.h
index 9f12471ddca14..26fbe64f74293 100644
--- a/cpp/src/arrow/compute/api_scalar.h
+++ b/cpp/src/arrow/compute/api_scalar.h
@@ -24,7 +24,7 @@
 #include <string>
 #include <utility>
 
-#include "arrow/compute/function.h"
+#include "arrow/compute/function_options.h"
 #include "arrow/compute/type_fwd.h"
 #include "arrow/datum.h"
 #include "arrow/result.h"
diff --git a/cpp/src/arrow/compute/api_vector.h b/cpp/src/arrow/compute/api_vector.h
index 0233090ef6fb9..759f9e5c1a408 100644
--- a/cpp/src/arrow/compute/api_vector.h
+++ b/cpp/src/arrow/compute/api_vector.h
@@ -20,9 +20,8 @@
 #include <memory>
 #include <utility>
 
-#include "arrow/compute/function.h"
+#include "arrow/compute/function_options.h"
 #include "arrow/compute/ordering.h"
-#include "arrow/datum.h"
 #include "arrow/result.h"
 #include "arrow/type_fwd.h"
 
diff --git a/cpp/src/arrow/compute/cast.h b/cpp/src/arrow/compute/cast.h
index 613e8a55addd2..18e56092dda2a 100644
--- a/cpp/src/arrow/compute/cast.h
+++ b/cpp/src/arrow/compute/cast.h
@@ -22,6 +22,7 @@
 #include <vector>
 
 #include "arrow/compute/function.h"
+#include "arrow/compute/function_options.h"
 #include "arrow/compute/type_fwd.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
diff --git a/cpp/src/arrow/compute/function.cc b/cpp/src/arrow/compute/function.cc
index c0433145dd1d0..e1a2e8c5d8879 100644
--- a/cpp/src/arrow/compute/function.cc
+++ b/cpp/src/arrow/compute/function.cc
@@ -26,6 +26,7 @@
 #include "arrow/compute/exec.h"
 #include "arrow/compute/exec_internal.h"
 #include "arrow/compute/function_internal.h"
+#include "arrow/compute/function_options.h"
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/compute/registry.h"
 #include "arrow/datum.h"
diff --git a/cpp/src/arrow/compute/function.h b/cpp/src/arrow/compute/function.h
index 333c9a65c56c4..be934a3c5abfc 100644
--- a/cpp/src/arrow/compute/function.h
+++ b/cpp/src/arrow/compute/function.h
@@ -36,53 +36,9 @@
 namespace arrow {
 namespace compute {
 
-/// \defgroup compute-functions Abstract compute function API
-///
+/// \addtogroup compute-functions
 /// @{
 
-/// \brief Extension point for defining options outside libarrow (but
-/// still within this project).
-class ARROW_EXPORT FunctionOptionsType {
- public:
-  virtual ~FunctionOptionsType() = default;
-
-  virtual const char* type_name() const = 0;
-  virtual std::string Stringify(const FunctionOptions&) const = 0;
-  virtual bool Compare(const FunctionOptions&, const FunctionOptions&) const = 0;
-  virtual Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const;
-  virtual Result<std::unique_ptr<FunctionOptions>> Deserialize(
-      const Buffer& buffer) const;
-  virtual std::unique_ptr<FunctionOptions> Copy(const FunctionOptions&) const = 0;
-};
-
-/// \brief Base class for specifying options configuring a function's behavior,
-/// such as error handling.
-class ARROW_EXPORT FunctionOptions : public util::EqualityComparable<FunctionOptions> {
- public:
-  virtual ~FunctionOptions() = default;
-
-  const FunctionOptionsType* options_type() const { return options_type_; }
-  const char* type_name() const { return options_type()->type_name(); }
-
-  bool Equals(const FunctionOptions& other) const;
-  std::string ToString() const;
-  std::unique_ptr<FunctionOptions> Copy() const;
-  /// \brief Serialize an options struct to a buffer.
-  Result<std::shared_ptr<Buffer>> Serialize() const;
-  /// \brief Deserialize an options struct from a buffer.
-  /// Note: this will only look for `type_name` in the default FunctionRegistry;
-  /// to use a custom FunctionRegistry, look up the FunctionOptionsType, then
-  /// call FunctionOptionsType::Deserialize().
-  static Result<std::unique_ptr<FunctionOptions>> Deserialize(
-      const std::string& type_name, const Buffer& buffer);
-
- protected:
-  explicit FunctionOptions(const FunctionOptionsType* type) : options_type_(type) {}
-  const FunctionOptionsType* options_type_;
-};
-
-ARROW_EXPORT void PrintTo(const FunctionOptions&, std::ostream*);
-
 /// \brief Contains the number of required arguments for the function.
 ///
 /// Naming conventions taken from https://en.wikipedia.org/wiki/Arity.
diff --git a/cpp/src/arrow/compute/function_options.h b/cpp/src/arrow/compute/function_options.h
new file mode 100644
index 0000000000000..88ec2fd2d0679
--- /dev/null
+++ b/cpp/src/arrow/compute/function_options.h
@@ -0,0 +1,81 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// NOTE: API is EXPERIMENTAL and will change without going through a
+// deprecation cycle.
+
+#pragma once
+
+#include "arrow/compute/type_fwd.h"
+#include "arrow/result.h"
+#include "arrow/status.h"
+#include "arrow/type_fwd.h"
+#include "arrow/util/visibility.h"
+
+namespace arrow {
+namespace compute {
+
+/// \addtogroup compute-functions
+/// @{
+
+/// \brief Extension point for defining options outside libarrow (but
+/// still within this project).
+class ARROW_EXPORT FunctionOptionsType {
+ public:
+  virtual ~FunctionOptionsType() = default;
+
+  virtual const char* type_name() const = 0;
+  virtual std::string Stringify(const FunctionOptions&) const = 0;
+  virtual bool Compare(const FunctionOptions&, const FunctionOptions&) const = 0;
+  virtual Result<std::shared_ptr<Buffer>> Serialize(const FunctionOptions&) const;
+  virtual Result<std::unique_ptr<FunctionOptions>> Deserialize(
+      const Buffer& buffer) const;
+  virtual std::unique_ptr<FunctionOptions> Copy(const FunctionOptions&) const = 0;
+};
+
+/// \brief Base class for specifying options configuring a function's behavior,
+/// such as error handling.
+class ARROW_EXPORT FunctionOptions : public util::EqualityComparable<FunctionOptions> {
+ public:
+  virtual ~FunctionOptions() = default;
+
+  const FunctionOptionsType* options_type() const { return options_type_; }
+  const char* type_name() const { return options_type()->type_name(); }
+
+  bool Equals(const FunctionOptions& other) const;
+  std::string ToString() const;
+  std::unique_ptr<FunctionOptions> Copy() const;
+  /// \brief Serialize an options struct to a buffer.
+  Result<std::shared_ptr<Buffer>> Serialize() const;
+  /// \brief Deserialize an options struct from a buffer.
+  /// Note: this will only look for `type_name` in the default FunctionRegistry;
+  /// to use a custom FunctionRegistry, look up the FunctionOptionsType, then
+  /// call FunctionOptionsType::Deserialize().
+  static Result<std::unique_ptr<FunctionOptions>> Deserialize(
+      const std::string& type_name, const Buffer& buffer);
+
+ protected:
+  explicit FunctionOptions(const FunctionOptionsType* type) : options_type_(type) {}
+  const FunctionOptionsType* options_type_;
+};
+
+ARROW_EXPORT void PrintTo(const FunctionOptions&, std::ostream*);
+
+/// @}
+
+}  // namespace compute
+}  // namespace arrow
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
index 6fd449a931381..ec5291ef608a3 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_nested.cc
@@ -401,7 +401,7 @@ void AddTypeToTypeCast(CastFunction* func) {
   kernel.exec = CastFunctor::Exec;
   kernel.signature = KernelSignature::Make({InputType(SrcT::type_id)}, kOutputTargetType);
   kernel.null_handling = NullHandling::COMPUTED_NO_PREALLOCATE;
-  DCHECK_OK(func->AddKernel(StructType::type_id, std::move(kernel)));
+  DCHECK_OK(func->AddKernel(SrcT::type_id, std::move(kernel)));
 }
 
 template <typename DestType>
@@ -480,14 +480,18 @@ std::vector<std::shared_ptr<CastFunction>> GetNestedCasts() {
   auto cast_list = std::make_shared<CastFunction>("cast_list", Type::LIST);
   AddCommonCasts(Type::LIST, kOutputTargetType, cast_list.get());
   AddListCast<ListType, ListType>(cast_list.get());
+  AddListCast<ListViewType, ListType>(cast_list.get());
   AddListCast<LargeListType, ListType>(cast_list.get());
+  AddListCast<LargeListViewType, ListType>(cast_list.get());
   AddTypeToTypeCast<CastFixedToVarList<ListType>, FixedSizeListType>(cast_list.get());
 
   auto cast_large_list =
       std::make_shared<CastFunction>("cast_large_list", Type::LARGE_LIST);
   AddCommonCasts(Type::LARGE_LIST, kOutputTargetType, cast_large_list.get());
   AddListCast<ListType, LargeListType>(cast_large_list.get());
+  AddListCast<ListViewType, LargeListType>(cast_large_list.get());
   AddListCast<LargeListType, LargeListType>(cast_large_list.get());
+  AddListCast<LargeListViewType, LargeListType>(cast_large_list.get());
   AddTypeToTypeCast<CastFixedToVarList<LargeListType>, FixedSizeListType>(
       cast_large_list.get());
 
@@ -503,7 +507,11 @@ std::vector<std::shared_ptr<CastFunction>> GetNestedCasts() {
   AddCommonCasts(Type::FIXED_SIZE_LIST, kOutputTargetType, cast_fsl.get());
   AddTypeToTypeCast<CastFixedList, FixedSizeListType>(cast_fsl.get());
   AddTypeToTypeCast<CastVarToFixedList<ListType>, ListType>(cast_fsl.get());
+  AddTypeToTypeCast<CastVarToFixedList<ListViewType>, ListViewType>(cast_fsl.get());
   AddTypeToTypeCast<CastVarToFixedList<LargeListType>, LargeListType>(cast_fsl.get());
+  AddTypeToTypeCast<CastVarToFixedList<LargeListViewType>, LargeListViewType>(
+      cast_fsl.get());
+  AddTypeToTypeCast<CastVarToFixedList<ListType>, MapType>(cast_fsl.get());
 
   // So is struct
   auto cast_struct = std::make_shared<CastFunction>("cast_struct", Type::STRUCT);
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
index ebeb597207a81..a6576e4e4c26f 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_string.cc
@@ -20,11 +20,14 @@
 
 #include "arrow/array/array_base.h"
 #include "arrow/array/builder_binary.h"
+#include "arrow/compute/kernels/base_arithmetic_internal.h"
 #include "arrow/compute/kernels/codegen_internal.h"
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/compute/kernels/scalar_cast_internal.h"
 #include "arrow/compute/kernels/temporal_internal.h"
 #include "arrow/result.h"
+#include "arrow/type.h"
+#include "arrow/type_traits.h"
 #include "arrow/util/formatting.h"
 #include "arrow/util/int_util.h"
 #include "arrow/util/utf8_internal.h"
@@ -284,9 +287,8 @@ Status CastBinaryToBinaryOffsets<int64_t, int32_t>(KernelContext* ctx,
 }
 
 template <typename O, typename I>
-enable_if_base_binary<I, Status> BinaryToBinaryCastExec(KernelContext* ctx,
-                                                        const ExecSpan& batch,
-                                                        ExecResult* out) {
+enable_if_t<is_base_binary_type<I>::value && !is_fixed_size_binary_type<O>::value, Status>
+BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
   const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
   const ArraySpan& input = batch[0].array;
 
@@ -387,6 +389,33 @@ BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* ou
   return ZeroCopyCastExec(ctx, batch, out);
 }
 
+template <typename O, typename I>
+enable_if_t<is_base_binary_type<I>::value && std::is_same<O, FixedSizeBinaryType>::value,
+            Status>
+BinaryToBinaryCastExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) {
+  const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
+  FixedSizeBinaryBuilder builder(options.to_type.GetSharedPtr(), ctx->memory_pool());
+  const ArraySpan& input = batch[0].array;
+  RETURN_NOT_OK(builder.Reserve(input.length));
+
+  RETURN_NOT_OK(VisitArraySpanInline<I>(
+      input,
+      [&](std::string_view v) {
+        if (v.size() != static_cast<size_t>(builder.byte_width())) {
+          return Status::Invalid("Failed casting from ", input.type->ToString(), " to ",
+                                 options.to_type.ToString(), ": widths must match");
+        }
+        builder.UnsafeAppend(v);
+        return Status::OK();
+      },
+      [&] {
+        builder.UnsafeAppendNull();
+        return Status::OK();
+      }));
+
+  return builder.FinishInternal(&std::get<std::shared_ptr<ArrayData>>(out->value));
+}
+
 #if defined(_MSC_VER)
 #pragma warning(pop)
 #endif
@@ -452,6 +481,26 @@ void AddBinaryToBinaryCast(CastFunction* func) {
   AddBinaryToBinaryCast<OutType, FixedSizeBinaryType>(func);
 }
 
+template <typename InType>
+void AddBinaryToFixedSizeBinaryCast(CastFunction* func) {
+  auto resolver_fsb = [](KernelContext* ctx, const std::vector<TypeHolder>&) {
+    const CastOptions& options = checked_cast<const CastState&>(*ctx->state()).options;
+    return options.to_type;
+  };
+
+  DCHECK_OK(func->AddKernel(InType::type_id, {InputType(InType::type_id)}, resolver_fsb,
+                            BinaryToBinaryCastExec<FixedSizeBinaryType, InType>,
+                            NullHandling::COMPUTED_NO_PREALLOCATE));
+}
+
+void AddBinaryToFixedSizeBinaryCast(CastFunction* func) {
+  AddBinaryToFixedSizeBinaryCast<StringType>(func);
+  AddBinaryToFixedSizeBinaryCast<BinaryType>(func);
+  AddBinaryToFixedSizeBinaryCast<LargeStringType>(func);
+  AddBinaryToFixedSizeBinaryCast<LargeBinaryType>(func);
+  AddBinaryToFixedSizeBinaryCast<FixedSizeBinaryType>(func);
+}
+
 }  // namespace
 
 std::vector<std::shared_ptr<CastFunction>> GetBinaryLikeCasts() {
@@ -483,11 +532,7 @@ std::vector<std::shared_ptr<CastFunction>> GetBinaryLikeCasts() {
       std::make_shared<CastFunction>("cast_fixed_size_binary", Type::FIXED_SIZE_BINARY);
   AddCommonCasts(Type::FIXED_SIZE_BINARY, OutputType(ResolveOutputFromOptions),
                  cast_fsb.get());
-  DCHECK_OK(cast_fsb->AddKernel(
-      Type::FIXED_SIZE_BINARY, {InputType(Type::FIXED_SIZE_BINARY)},
-      OutputType(FirstType),
-      BinaryToBinaryCastExec<FixedSizeBinaryType, FixedSizeBinaryType>,
-      NullHandling::COMPUTED_NO_PREALLOCATE));
+  AddBinaryToFixedSizeBinaryCast(cast_fsb.get());
 
   return {cast_binary, cast_large_binary, cast_string, cast_large_string, cast_fsb};
 }
diff --git a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
index c84125bbdd19e..b429c8175b020 100644
--- a/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_cast_test.cc
@@ -2171,6 +2171,22 @@ TEST(Cast, StringToString) {
   }
 }
 
+TEST(Cast, BinaryOrStringToFixedSizeBinary) {
+  for (auto in_type : {utf8(), large_utf8(), binary(), large_binary()}) {
+    auto valid_input = ArrayFromJSON(in_type, R"(["foo", null, "bar", "baz", "quu"])");
+    auto invalid_input = ArrayFromJSON(in_type, R"(["foo", null, "bar", "baz", "quux"])");
+
+    CheckCast(valid_input, ArrayFromJSON(fixed_size_binary(3), R"(["foo", null, "bar",
+          "baz", "quu"])"));
+    CheckCastFails(invalid_input, CastOptions::Safe(fixed_size_binary(3)));
+    CheckCastFails(valid_input, CastOptions::Safe(fixed_size_binary(5)));
+
+    auto empty_input = ArrayFromJSON(in_type, "[]");
+    CheckCast(empty_input, ArrayFromJSON(fixed_size_binary(3), "[]"));
+    CheckCast(empty_input, ArrayFromJSON(fixed_size_binary(5), "[]"));
+  }
+}
+
 TEST(Cast, IntToString) {
   for (auto string_type : {utf8(), large_utf8()}) {
     CheckCast(ArrayFromJSON(int8(), "[0, 1, 127, -128, null]"),
diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
index b72402bbccd4e..58bc560f52842 100644
--- a/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
@@ -21,6 +21,7 @@
 #include "arrow/array/concatenate.h"
 #include "arrow/array/util.h"
 #include "arrow/compute/api_scalar.h"
+#include "arrow/compute/function.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/util/key_value_metadata.h"
diff --git a/cpp/src/arrow/compute/kernels/vector_hash.cc b/cpp/src/arrow/compute/kernels/vector_hash.cc
index 65e59d1a2eb14..800deba3a5ed2 100644
--- a/cpp/src/arrow/compute/kernels/vector_hash.cc
+++ b/cpp/src/arrow/compute/kernels/vector_hash.cc
@@ -26,17 +26,20 @@
 #include "arrow/array/concatenate.h"
 #include "arrow/array/dict_internal.h"
 #include "arrow/array/util.h"
+#include "arrow/buffer.h"
 #include "arrow/compute/api_vector.h"
 #include "arrow/compute/cast.h"
 #include "arrow/compute/kernels/common_internal.h"
 #include "arrow/result.h"
 #include "arrow/util/hashing.h"
+#include "arrow/util/int_util.h"
 #include "arrow/util/unreachable.h"
 
 namespace arrow {
 
 using internal::DictionaryTraits;
 using internal::HashTraits;
+using internal::TransposeInts;
 
 namespace compute {
 namespace internal {
@@ -448,9 +451,9 @@ class DictionaryHashKernel : public HashKernel {
 
   Status Append(const ArraySpan& arr) override {
     auto arr_dict = arr.dictionary().ToArray();
-    if (!dictionary_) {
-      dictionary_ = arr_dict;
-    } else if (!dictionary_->Equals(*arr_dict)) {
+    if (!first_dictionary_) {
+      first_dictionary_ = arr_dict;
+    } else if (!first_dictionary_->Equals(*arr_dict)) {
       // NOTE: This approach computes a new dictionary unification per chunk.
       // This is in effect O(n*k) where n is the total chunked array length and
       // k is the number of chunks (therefore O(n**2) if chunks have a fixed size).
@@ -458,21 +461,23 @@ class DictionaryHashKernel : public HashKernel {
       // A better approach may be to run the kernel over each individual chunk,
       // and then hash-aggregate all results (for example sum-group-by for
       // the "value_counts" kernel).
-      auto out_dict_type = dictionary_->type();
+      if (dictionary_unifier_ == nullptr) {
+        ARROW_ASSIGN_OR_RAISE(dictionary_unifier_,
+                              DictionaryUnifier::Make(first_dictionary_->type()));
+        RETURN_NOT_OK(dictionary_unifier_->Unify(*first_dictionary_));
+      }
+      auto out_dict_type = first_dictionary_->type();
       std::shared_ptr<Buffer> transpose_map;
-      std::shared_ptr<Array> out_dict;
-      ARROW_ASSIGN_OR_RAISE(auto unifier, DictionaryUnifier::Make(out_dict_type));
 
-      ARROW_CHECK_OK(unifier->Unify(*dictionary_));
-      ARROW_CHECK_OK(unifier->Unify(*arr_dict, &transpose_map));
-      ARROW_CHECK_OK(unifier->GetResult(&out_dict_type, &out_dict));
+      RETURN_NOT_OK(dictionary_unifier_->Unify(*arr_dict, &transpose_map));
 
-      dictionary_ = out_dict;
       auto transpose = reinterpret_cast<const int32_t*>(transpose_map->data());
-      auto in_dict_array = arr.ToArray();
+      auto in_array = arr.ToArray();
+      const auto& in_dict_array =
+          arrow::internal::checked_cast<const DictionaryArray&>(*in_array);
       ARROW_ASSIGN_OR_RAISE(
-          auto tmp, arrow::internal::checked_cast<const DictionaryArray&>(*in_dict_array)
-                        .Transpose(arr.type->GetSharedPtr(), out_dict, transpose));
+          auto tmp, in_dict_array.Transpose(arr.type->GetSharedPtr(),
+                                            in_dict_array.dictionary(), transpose));
       return indices_kernel_->Append(*tmp->data());
     }
 
@@ -495,12 +500,27 @@ class DictionaryHashKernel : public HashKernel {
     return dictionary_value_type_;
   }
 
-  std::shared_ptr<Array> dictionary() const { return dictionary_; }
+  /// This can't be called more than once because DictionaryUnifier::GetResult()
+  /// can't be called more than once and produce the same output.
+  Result<std::shared_ptr<Array>> dictionary() const {
+    if (!first_dictionary_) {  // Append was never called
+      return nullptr;
+    }
+    if (!dictionary_unifier_) {  // Append was called only once
+      return first_dictionary_;
+    }
+
+    auto out_dict_type = first_dictionary_->type();
+    std::shared_ptr<Array> out_dict;
+    RETURN_NOT_OK(dictionary_unifier_->GetResult(&out_dict_type, &out_dict));
+    return out_dict;
+  }
 
  private:
   std::unique_ptr<HashKernel> indices_kernel_;
-  std::shared_ptr<Array> dictionary_;
+  std::shared_ptr<Array> first_dictionary_;
   std::shared_ptr<DataType> dictionary_value_type_;
+  std::unique_ptr<DictionaryUnifier> dictionary_unifier_;
 };
 
 // ----------------------------------------------------------------------
@@ -630,8 +650,9 @@ Status ValueCountsFinalize(KernelContext* ctx, std::vector<Datum>* out) {
 // hence have no dictionary.
 Result<std::shared_ptr<ArrayData>> EnsureHashDictionary(KernelContext* ctx,
                                                         DictionaryHashKernel* hash) {
-  if (hash->dictionary()) {
-    return hash->dictionary()->data();
+  ARROW_ASSIGN_OR_RAISE(auto dict, hash->dictionary());
+  if (dict) {
+    return dict->data();
   }
   ARROW_ASSIGN_OR_RAISE(auto null, MakeArrayOfNull(hash->dictionary_value_type(),
                                                    /*length=*/0, ctx->memory_pool()));
diff --git a/cpp/src/arrow/compute/kernels/vector_hash_benchmark.cc b/cpp/src/arrow/compute/kernels/vector_hash_benchmark.cc
index e9548e133aa00..472f50db8cf92 100644
--- a/cpp/src/arrow/compute/kernels/vector_hash_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/vector_hash_benchmark.cc
@@ -25,6 +25,7 @@
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 #include "arrow/testing/util.h"
+#include "arrow/util/logging.h"
 
 #include "arrow/compute/api.h"
 
@@ -226,6 +227,33 @@ static void UniqueString100bytes(benchmark::State& state) {
   BenchUnique(state, HashParams<StringType>{general_bench_cases[state.range(0)], 100});
 }
 
+template <typename ParamType>
+void BenchValueCountsDictionaryChunks(benchmark::State& state, const ParamType& params) {
+  std::shared_ptr<Array> arr;
+  params.GenerateTestData(&arr);
+  // chunk arr to 100 slices
+  std::vector<std::shared_ptr<Array>> chunks;
+  const int64_t chunk_size = arr->length() / 100;
+  for (int64_t i = 0; i < 100; ++i) {
+    auto slice = arr->Slice(i * chunk_size, chunk_size);
+    auto datum = DictionaryEncode(slice).ValueOrDie();
+    ARROW_CHECK(datum.is_array());
+    chunks.push_back(datum.make_array());
+  }
+  auto chunked_array = std::make_shared<ChunkedArray>(chunks);
+
+  while (state.KeepRunning()) {
+    ABORT_NOT_OK(ValueCounts(chunked_array).status());
+  }
+  params.SetMetadata(state);
+}
+
+static void ValueCountsDictionaryChunks(benchmark::State& state) {
+  // Dictionary of byte strings with 10 bytes each
+  BenchValueCountsDictionaryChunks(
+      state, HashParams<StringType>{general_bench_cases[state.range(0)], 10});
+}
+
 void HashSetArgs(benchmark::internal::Benchmark* bench) {
   for (int i = 0; i < static_cast<int>(general_bench_cases.size()); ++i) {
     bench->Arg(i);
@@ -239,6 +267,14 @@ BENCHMARK(UniqueInt64)->Apply(HashSetArgs);
 BENCHMARK(UniqueString10bytes)->Apply(HashSetArgs);
 BENCHMARK(UniqueString100bytes)->Apply(HashSetArgs);
 
+void DictionaryChunksHashSetArgs(benchmark::internal::Benchmark* bench) {
+  for (int i = 0; i < static_cast<int>(general_bench_cases.size()); ++i) {
+    bench->Arg(i);
+  }
+}
+
+BENCHMARK(ValueCountsDictionaryChunks)->Apply(DictionaryChunksHashSetArgs);
+
 void UInt8SetArgs(benchmark::internal::Benchmark* bench) {
   for (int i = 0; i < static_cast<int>(uint8_bench_cases.size()); ++i) {
     bench->Arg(i);
diff --git a/cpp/src/arrow/compute/kernels/vector_rank.cc b/cpp/src/arrow/compute/kernels/vector_rank.cc
index 780ae25d96360..0cea7246e516c 100644
--- a/cpp/src/arrow/compute/kernels/vector_rank.cc
+++ b/cpp/src/arrow/compute/kernels/vector_rank.cc
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include "arrow/compute/function.h"
 #include "arrow/compute/kernels/vector_sort_internal.h"
 #include "arrow/compute/registry.h"
 
diff --git a/cpp/src/arrow/compute/kernels/vector_replace_benchmark.cc b/cpp/src/arrow/compute/kernels/vector_replace_benchmark.cc
index 719969d46ea7c..971a841de0773 100644
--- a/cpp/src/arrow/compute/kernels/vector_replace_benchmark.cc
+++ b/cpp/src/arrow/compute/kernels/vector_replace_benchmark.cc
@@ -18,6 +18,7 @@
 #include <benchmark/benchmark.h>
 
 #include "arrow/array.h"
+#include "arrow/datum.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/testing/random.h"
 
diff --git a/cpp/src/arrow/compute/kernels/vector_run_end_encode_test.cc b/cpp/src/arrow/compute/kernels/vector_run_end_encode_test.cc
index 0bd8e3386e7cc..f02aee1b35996 100644
--- a/cpp/src/arrow/compute/kernels/vector_run_end_encode_test.cc
+++ b/cpp/src/arrow/compute/kernels/vector_run_end_encode_test.cc
@@ -21,6 +21,7 @@
 #include "arrow/array/validate.h"
 #include "arrow/builder.h"
 #include "arrow/compute/api_vector.h"
+#include "arrow/datum.h"
 #include "arrow/testing/gtest_util.h"
 #include "arrow/type_fwd.h"
 #include "arrow/util/logging.h"
diff --git a/cpp/src/arrow/compute/kernels/vector_select_k.cc b/cpp/src/arrow/compute/kernels/vector_select_k.cc
index 5000de8996280..1740a9b7f0bb4 100644
--- a/cpp/src/arrow/compute/kernels/vector_select_k.cc
+++ b/cpp/src/arrow/compute/kernels/vector_select_k.cc
@@ -17,6 +17,7 @@
 
 #include <queue>
 
+#include "arrow/compute/function.h"
 #include "arrow/compute/kernels/vector_sort_internal.h"
 #include "arrow/compute/registry.h"
 
diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc
index 8ddcbb9905cb2..e08a2bc10372f 100644
--- a/cpp/src/arrow/compute/kernels/vector_sort.cc
+++ b/cpp/src/arrow/compute/kernels/vector_sort.cc
@@ -17,6 +17,7 @@
 
 #include <unordered_set>
 
+#include "arrow/compute/function.h"
 #include "arrow/compute/kernels/vector_sort_internal.h"
 #include "arrow/compute/registry.h"
 
diff --git a/cpp/src/arrow/compute/light_array.cc b/cpp/src/arrow/compute/light_array.cc
index 4e8b2b2d7cc3a..93a054de1957c 100644
--- a/cpp/src/arrow/compute/light_array.cc
+++ b/cpp/src/arrow/compute/light_array.cc
@@ -398,9 +398,12 @@ int ExecBatchBuilder::NumRowsToSkip(const std::shared_ptr<ArrayData>& column,
     } else {
       --num_rows_left;
       int row_id_removed = row_ids[num_rows_left];
-      const uint32_t* offsets =
-          reinterpret_cast<const uint32_t*>(column->buffers[1]->data());
+      const int32_t* offsets = column->GetValues<int32_t>(1);
       num_bytes_skipped += offsets[row_id_removed + 1] - offsets[row_id_removed];
+      // Skip consecutive rows with the same id
+      while (num_rows_left > 0 && row_id_removed == row_ids[num_rows_left - 1]) {
+        --num_rows_left;
+      }
     }
   }
 
diff --git a/cpp/src/arrow/compute/light_array.h b/cpp/src/arrow/compute/light_array.h
index 87f6b6c76a12c..84aa86d64bb62 100644
--- a/cpp/src/arrow/compute/light_array.h
+++ b/cpp/src/arrow/compute/light_array.h
@@ -416,7 +416,9 @@ class ARROW_EXPORT ExecBatchBuilder {
   // without checking buffer bounds (useful with SIMD or fixed size memory loads
   // and stores).
   //
-  // The sequence of row_ids provided must be non-decreasing.
+  // The sequence of row_ids provided must be non-decreasing. In case of consecutive rows
+  // with the same row id, they are skipped all at once because they occupy the same
+  // space.
   //
   static int NumRowsToSkip(const std::shared_ptr<ArrayData>& column, int num_rows,
                            const uint16_t* row_ids, int num_tail_bytes_to_skip);
diff --git a/cpp/src/arrow/compute/light_array_test.cc b/cpp/src/arrow/compute/light_array_test.cc
index 4e33f7b578ea8..52121530fe91d 100644
--- a/cpp/src/arrow/compute/light_array_test.cc
+++ b/cpp/src/arrow/compute/light_array_test.cc
@@ -471,6 +471,32 @@ TEST(ExecBatchBuilder, AppendBatchesSomeRows) {
   ASSERT_EQ(0, pool->bytes_allocated());
 }
 
+TEST(ExecBatchBuilder, AppendBatchDupRows) {
+  std::unique_ptr<MemoryPool> owned_pool = MemoryPool::CreateDefault();
+  MemoryPool* pool = owned_pool.get();
+  // Case of cross-word copying for the last row, which may exceed the buffer boundary.
+  // This is a simplified case of GH-32570
+  {
+    // 64-byte data fully occupying one minimal 64-byte aligned memory region.
+    ExecBatch batch_string = JSONToExecBatch({binary()}, R"([["123456789ABCDEF0"],
+      ["123456789ABCDEF0"],
+      ["123456789ABCDEF0"],
+      ["ABCDEF0"],
+      ["123456789"]])");  // 9-byte tail row, larger than a word.
+    ASSERT_EQ(batch_string[0].array()->buffers[1]->capacity(), 64);
+    ASSERT_EQ(batch_string[0].array()->buffers[2]->capacity(), 64);
+    ExecBatchBuilder builder;
+    uint16_t row_ids[2] = {4, 4};
+    ASSERT_OK(builder.AppendSelected(pool, batch_string, 2, row_ids, /*num_cols=*/1));
+    ExecBatch built = builder.Flush();
+    ExecBatch batch_string_appended =
+        JSONToExecBatch({binary()}, R"([["123456789"], ["123456789"]])");
+    ASSERT_EQ(batch_string_appended, built);
+    ASSERT_NE(0, pool->bytes_allocated());
+  }
+  ASSERT_EQ(0, pool->bytes_allocated());
+}
+
 TEST(ExecBatchBuilder, AppendBatchesSomeCols) {
   std::unique_ptr<MemoryPool> owned_pool = MemoryPool::CreateDefault();
   MemoryPool* pool = owned_pool.get();
diff --git a/cpp/src/arrow/compute/registry_test.cc b/cpp/src/arrow/compute/registry_test.cc
index 7fee136de7a0b..2d69f119df1f4 100644
--- a/cpp/src/arrow/compute/registry_test.cc
+++ b/cpp/src/arrow/compute/registry_test.cc
@@ -22,6 +22,7 @@
 #include <gtest/gtest.h>
 
 #include "arrow/compute/function.h"
+#include "arrow/compute/function_options.h"
 #include "arrow/compute/registry.h"
 #include "arrow/result.h"
 #include "arrow/status.h"
diff --git a/cpp/src/arrow/compute/type_fwd.h b/cpp/src/arrow/compute/type_fwd.h
index 3f990b1814311..89f32ceb0f906 100644
--- a/cpp/src/arrow/compute/type_fwd.h
+++ b/cpp/src/arrow/compute/type_fwd.h
@@ -27,6 +27,7 @@ struct TypeHolder;
 namespace compute {
 
 class Function;
+class ScalarAggregateFunction;
 class FunctionExecutor;
 class FunctionOptions;
 class FunctionRegistry;
diff --git a/cpp/src/arrow/dataset/dataset_writer_test.cc b/cpp/src/arrow/dataset/dataset_writer_test.cc
index e62e779f71797..1ac0ec3f39e97 100644
--- a/cpp/src/arrow/dataset/dataset_writer_test.cc
+++ b/cpp/src/arrow/dataset/dataset_writer_test.cc
@@ -290,12 +290,12 @@ TEST_F(DatasetWriterTestFixture, MaxRowsOneWriteBackpresure) {
   write_options_.max_open_files = 2;
   write_options_.min_rows_per_group = kFileSizeLimit - 1;
   auto dataset_writer = MakeDatasetWriter(/*max_rows=*/kFileSizeLimit);
-  for (int i = 0; i < 20; ++i) {
-    dataset_writer->WriteRecordBatch(MakeBatch(kFileSizeLimit * 5), "");
+  for (int i = 0; i < 5; ++i) {
+    dataset_writer->WriteRecordBatch(MakeBatch(kFileSizeLimit * 2), "");
   }
   EndWriterChecked(dataset_writer.get());
   std::vector<ExpectedFile> expected_files;
-  for (int i = 0; i < 100; ++i) {
+  for (int i = 0; i < 10; ++i) {
     expected_files.emplace_back("testdir/chunk-" + std::to_string(i) + ".arrow",
                                 kFileSizeLimit * i, kFileSizeLimit);
   }
diff --git a/cpp/src/arrow/dataset/file_parquet.cc b/cpp/src/arrow/dataset/file_parquet.cc
index 3afe4ec85cf49..1c2fd2dea6307 100644
--- a/cpp/src/arrow/dataset/file_parquet.cc
+++ b/cpp/src/arrow/dataset/file_parquet.cc
@@ -24,6 +24,7 @@
 #include <utility>
 #include <vector>
 
+#include "arrow/compute/cast.h"
 #include "arrow/compute/exec.h"
 #include "arrow/dataset/dataset_internal.h"
 #include "arrow/dataset/parquet_encryption_config.h"
@@ -58,6 +59,8 @@ using parquet::arrow::SchemaField;
 using parquet::arrow::SchemaManifest;
 using parquet::arrow::StatisticsAsScalars;
 
+using compute::Cast;
+
 namespace {
 
 parquet::ReaderProperties MakeReaderProperties(
@@ -370,12 +373,12 @@ std::optional<compute::Expression> ParquetFileFragment::EvaluateStatisticsAsExpr
     return std::nullopt;
   }
 
-  auto maybe_min = min->CastTo(field.type());
-  auto maybe_max = max->CastTo(field.type());
+  auto maybe_min = Cast(min, field.type());
+  auto maybe_max = Cast(max, field.type());
 
   if (maybe_min.ok() && maybe_max.ok()) {
-    min = maybe_min.MoveValueUnsafe();
-    max = maybe_max.MoveValueUnsafe();
+    min = maybe_min.MoveValueUnsafe().scalar();
+    max = maybe_max.MoveValueUnsafe().scalar();
 
     if (min->Equals(*max)) {
       auto single_value = compute::equal(field_expr, compute::literal(std::move(min)));
diff --git a/cpp/src/arrow/filesystem/azurefs.cc b/cpp/src/arrow/filesystem/azurefs.cc
index d72ead92ed111..21350a490411a 100644
--- a/cpp/src/arrow/filesystem/azurefs.cc
+++ b/cpp/src/arrow/filesystem/azurefs.cc
@@ -58,7 +58,7 @@ bool AzureOptions::Equals(const AzureOptions& other) const {
                       blob_storage_scheme == other.blob_storage_scheme &&
                       dfs_storage_scheme == other.dfs_storage_scheme &&
                       default_metadata == other.default_metadata &&
-                      account_name_ == other.account_name_ &&
+                      account_name == other.account_name &&
                       credential_kind_ == other.credential_kind_;
   if (!equals) {
     return false;
@@ -104,23 +104,39 @@ std::string AzureOptions::AccountDfsUrl(const std::string& account_name) const {
   return BuildBaseUrl(dfs_storage_scheme, dfs_storage_authority, account_name);
 }
 
-Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_name,
-                                                   const std::string& account_key) {
+Status AzureOptions::ConfigureAccountKeyCredential(const std::string& account_key) {
   credential_kind_ = CredentialKind::kStorageSharedKeyCredential;
-  account_name_ = account_name;
+  if (account_name.empty()) {
+    return Status::Invalid("AzureOptions doesn't contain a valid account name");
+  }
   storage_shared_key_credential_ =
       std::make_shared<Storage::StorageSharedKeyCredential>(account_name, account_key);
   return Status::OK();
 }
 
-Status AzureOptions::ConfigureDefaultCredential(const std::string& account_name) {
+Status AzureOptions::ConfigureClientSecretCredential(const std::string& tenant_id,
+                                                     const std::string& client_id,
+                                                     const std::string& client_secret) {
+  credential_kind_ = CredentialKind::kTokenCredential;
+  token_credential_ = std::make_shared<Azure::Identity::ClientSecretCredential>(
+      tenant_id, client_id, client_secret);
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureDefaultCredential() {
   credential_kind_ = CredentialKind::kTokenCredential;
   token_credential_ = std::make_shared<Azure::Identity::DefaultAzureCredential>();
   return Status::OK();
 }
 
-Status AzureOptions::ConfigureWorkloadIdentityCredential(
-    const std::string& account_name) {
+Status AzureOptions::ConfigureManagedIdentityCredential(const std::string& client_id) {
+  credential_kind_ = CredentialKind::kTokenCredential;
+  token_credential_ =
+      std::make_shared<Azure::Identity::ManagedIdentityCredential>(client_id);
+  return Status::OK();
+}
+
+Status AzureOptions::ConfigureWorkloadIdentityCredential() {
   credential_kind_ = CredentialKind::kTokenCredential;
   token_credential_ = std::make_shared<Azure::Identity::WorkloadIdentityCredential>();
   return Status::OK();
@@ -128,14 +144,17 @@ Status AzureOptions::ConfigureWorkloadIdentityCredential(
 
 Result<std::unique_ptr<Blobs::BlobServiceClient>> AzureOptions::MakeBlobServiceClient()
     const {
+  if (account_name.empty()) {
+    return Status::Invalid("AzureOptions doesn't contain a valid account name");
+  }
   switch (credential_kind_) {
     case CredentialKind::kAnonymous:
       break;
     case CredentialKind::kTokenCredential:
-      return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name_),
+      return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name),
                                                         token_credential_);
     case CredentialKind::kStorageSharedKeyCredential:
-      return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name_),
+      return std::make_unique<Blobs::BlobServiceClient>(AccountBlobUrl(account_name),
                                                         storage_shared_key_credential_);
   }
   return Status::Invalid("AzureOptions doesn't contain a valid auth configuration");
@@ -143,15 +162,18 @@ Result<std::unique_ptr<Blobs::BlobServiceClient>> AzureOptions::MakeBlobServiceC
 
 Result<std::unique_ptr<DataLake::DataLakeServiceClient>>
 AzureOptions::MakeDataLakeServiceClient() const {
+  if (account_name.empty()) {
+    return Status::Invalid("AzureOptions doesn't contain a valid account name");
+  }
   switch (credential_kind_) {
     case CredentialKind::kAnonymous:
       break;
     case CredentialKind::kTokenCredential:
       return std::make_unique<DataLake::DataLakeServiceClient>(
-          AccountDfsUrl(account_name_), token_credential_);
+          AccountDfsUrl(account_name), token_credential_);
     case CredentialKind::kStorageSharedKeyCredential:
       return std::make_unique<DataLake::DataLakeServiceClient>(
-          AccountDfsUrl(account_name_), storage_shared_key_credential_);
+          AccountDfsUrl(account_name), storage_shared_key_credential_);
   }
   return Status::Invalid("AzureOptions doesn't contain a valid auth configuration");
 }
@@ -941,14 +963,38 @@ class AzureFileSystem::Impl {
         break;
     }
     ARROW_ASSIGN_OR_RAISE(
-        cached_hns_support_,
+        auto hns_support,
         internal::CheckIfHierarchicalNamespaceIsEnabled(adlfs_client, options_));
-    DCHECK_NE(cached_hns_support_, HNSSupport::kUnknown);
-    // Caller should handle kContainerNotFound case appropriately.
-    return cached_hns_support_;
+    DCHECK_NE(hns_support, HNSSupport::kUnknown);
+    if (hns_support == HNSSupport::kContainerNotFound) {
+      // Caller should handle kContainerNotFound case appropriately as it knows the
+      // container this refers to, but the cached value in that case should remain
+      // kUnknown before we get a CheckIfHierarchicalNamespaceIsEnabled result that
+      // is not kContainerNotFound.
+      cached_hns_support_ = HNSSupport::kUnknown;
+    } else {
+      cached_hns_support_ = hns_support;
+    }
+    return hns_support;
   }
 
  public:
+  /// This is used from unit tests to ensure we perform operations on all the
+  /// possible states of cached_hns_support_.
+  void ForceCachedHierarchicalNamespaceSupport(int support) {
+    auto hns_support = static_cast<HNSSupport>(support);
+    switch (hns_support) {
+      case HNSSupport::kUnknown:
+      case HNSSupport::kContainerNotFound:
+      case HNSSupport::kDisabled:
+      case HNSSupport::kEnabled:
+        cached_hns_support_ = hns_support;
+        return;
+    }
+    // This is reachable if an invalid int is cast to enum class HNSSupport.
+    DCHECK(false) << "Invalid enum HierarchicalNamespaceSupport value.";
+  }
+
   Result<FileInfo> GetFileInfo(const AzureLocation& location) {
     if (location.container.empty()) {
       DCHECK(location.path.empty());
@@ -1560,6 +1606,10 @@ AzureFileSystem::AzureFileSystem(std::unique_ptr<Impl>&& impl)
   default_async_is_sync_ = false;
 }
 
+void AzureFileSystem::ForceCachedHierarchicalNamespaceSupport(int hns_support) {
+  impl_->ForceCachedHierarchicalNamespaceSupport(hns_support);
+}
+
 Result<std::shared_ptr<AzureFileSystem>> AzureFileSystem::Make(
     const AzureOptions& options, const io::IOContext& io_context) {
   ARROW_ASSIGN_OR_RAISE(auto impl, AzureFileSystem::Impl::Make(options, io_context));
diff --git a/cpp/src/arrow/filesystem/azurefs.h b/cpp/src/arrow/filesystem/azurefs.h
index be3ca5ba238ae..78e0a8148c616 100644
--- a/cpp/src/arrow/filesystem/azurefs.h
+++ b/cpp/src/arrow/filesystem/azurefs.h
@@ -44,8 +44,13 @@ class DataLakeServiceClient;
 
 namespace arrow::fs {
 
+class TestAzureFileSystem;
+
 /// Options for the AzureFileSystem implementation.
 struct ARROW_EXPORT AzureOptions {
+  /// \brief account name of the Azure Storage account.
+  std::string account_name;
+
   /// \brief hostname[:port] of the Azure Blob Storage Service.
   ///
   /// If the hostname is a relative domain name (one that starts with a '.'), then storage
@@ -92,7 +97,6 @@ struct ARROW_EXPORT AzureOptions {
     kStorageSharedKeyCredential,
   } credential_kind_ = CredentialKind::kAnonymous;
 
-  std::string account_name_;
   std::shared_ptr<Azure::Core::Credentials::TokenCredential> token_credential_;
   std::shared_ptr<Azure::Storage::StorageSharedKeyCredential>
       storage_shared_key_credential_;
@@ -101,12 +105,17 @@ struct ARROW_EXPORT AzureOptions {
   AzureOptions();
   ~AzureOptions();
 
-  Status ConfigureDefaultCredential(const std::string& account_name);
+  Status ConfigureDefaultCredential();
+
+  Status ConfigureManagedIdentityCredential(const std::string& client_id = std::string());
 
-  Status ConfigureWorkloadIdentityCredential(const std::string& account_name);
+  Status ConfigureWorkloadIdentityCredential();
 
-  Status ConfigureAccountKeyCredential(const std::string& account_name,
-                                       const std::string& account_key);
+  Status ConfigureAccountKeyCredential(const std::string& account_key);
+
+  Status ConfigureClientSecretCredential(const std::string& tenant_id,
+                                         const std::string& client_id,
+                                         const std::string& client_secret);
 
   bool Equals(const AzureOptions& other) const;
 
@@ -156,6 +165,9 @@ class ARROW_EXPORT AzureFileSystem : public FileSystem {
 
   explicit AzureFileSystem(std::unique_ptr<Impl>&& impl);
 
+  friend class TestAzureFileSystem;
+  void ForceCachedHierarchicalNamespaceSupport(int hns_support);
+
  public:
   ~AzureFileSystem() override = default;
 
diff --git a/cpp/src/arrow/filesystem/azurefs_test.cc b/cpp/src/arrow/filesystem/azurefs_test.cc
index ecf7522b98eef..f6af9f722dbac 100644
--- a/cpp/src/arrow/filesystem/azurefs_test.cc
+++ b/cpp/src/arrow/filesystem/azurefs_test.cc
@@ -62,7 +62,6 @@ namespace arrow {
 using internal::TemporaryDir;
 namespace fs {
 using internal::ConcatAbstractPath;
-namespace {
 namespace bp = boost::process;
 
 using ::testing::IsEmpty;
@@ -272,15 +271,44 @@ class AzureHierarchicalNSEnv : public AzureEnvImpl<AzureHierarchicalNSEnv> {
   bool WithHierarchicalNamespace() const final { return true; }
 };
 
+TEST(AzureFileSystem, InitializingFilesystemWithoutAccountNameFails) {
+  AzureOptions options;
+  ASSERT_RAISES(Invalid, options.ConfigureAccountKeyCredential("account_key"));
+
+  ARROW_EXPECT_OK(
+      options.ConfigureClientSecretCredential("tenant_id", "client_id", "client_secret"));
+  ASSERT_RAISES(Invalid, AzureFileSystem::Make(options));
+}
+
+TEST(AzureFileSystem, InitializeFilesystemWithClientSecretCredential) {
+  AzureOptions options;
+  options.account_name = "dummy-account-name";
+  ARROW_EXPECT_OK(
+      options.ConfigureClientSecretCredential("tenant_id", "client_id", "client_secret"));
+  EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
+}
+
 TEST(AzureFileSystem, InitializeFilesystemWithDefaultCredential) {
   AzureOptions options;
-  ARROW_EXPECT_OK(options.ConfigureDefaultCredential("dummy-account-name"));
+  options.account_name = "dummy-account-name";
+  ARROW_EXPECT_OK(options.ConfigureDefaultCredential());
+  EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
+}
+
+TEST(AzureFileSystem, InitializeFilesystemWithManagedIdentityCredential) {
+  AzureOptions options;
+  options.account_name = "dummy-account-name";
+  ARROW_EXPECT_OK(options.ConfigureManagedIdentityCredential());
   EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
+
+  ARROW_EXPECT_OK(options.ConfigureManagedIdentityCredential("specific-client-id"));
+  EXPECT_OK_AND_ASSIGN(fs, AzureFileSystem::Make(options));
 }
 
 TEST(AzureFileSystem, InitializeFilesystemWithWorkloadIdentityCredential) {
   AzureOptions options;
-  ARROW_EXPECT_OK(options.ConfigureWorkloadIdentityCredential("dummy-account-name"));
+  options.account_name = "dummy-account-name";
+  ARROW_EXPECT_OK(options.ConfigureWorkloadIdentityCredential());
   EXPECT_OK_AND_ASSIGN(auto fs, AzureFileSystem::Make(options));
 }
 
@@ -354,7 +382,7 @@ class TestAzureFileSystem : public ::testing::Test {
   bool set_up_succeeded_ = false;
   AzureOptions options_;
 
-  std::shared_ptr<FileSystem> fs_;
+  std::shared_ptr<AzureFileSystem> fs_dont_use_directly_;  // use fs()
   std::unique_ptr<Blobs::BlobServiceClient> blob_service_client_;
   std::unique_ptr<DataLake::DataLakeServiceClient> datalake_service_client_;
 
@@ -362,9 +390,22 @@ class TestAzureFileSystem : public ::testing::Test {
   TestAzureFileSystem() : rng_(std::random_device()()) {}
 
   virtual Result<BaseAzureEnv*> GetAzureEnv() const = 0;
+  virtual HNSSupport CachedHNSSupport(const BaseAzureEnv& env) const = 0;
+
+  FileSystem* fs(HNSSupport cached_hns_support) const {
+    auto* fs_ptr = fs_dont_use_directly_.get();
+    fs_ptr->ForceCachedHierarchicalNamespaceSupport(static_cast<int>(cached_hns_support));
+    return fs_ptr;
+  }
+
+  FileSystem* fs() const {
+    EXPECT_OK_AND_ASSIGN(auto env, GetAzureEnv());
+    return fs(CachedHNSSupport(*env));
+  }
 
   static Result<AzureOptions> MakeOptions(BaseAzureEnv* env) {
     AzureOptions options;
+    options.account_name = env->account_name();
     switch (env->backend()) {
       case AzureBackend::kAzurite:
         options.blob_storage_authority = "127.0.0.1:10000";
@@ -376,8 +417,7 @@ class TestAzureFileSystem : public ::testing::Test {
         // Use the default values
         break;
     }
-    ARROW_EXPECT_OK(
-        options.ConfigureAccountKeyCredential(env->account_name(), env->account_key()));
+    ARROW_EXPECT_OK(options.ConfigureAccountKeyCredential(env->account_key()));
     return options;
   }
 
@@ -395,7 +435,7 @@ class TestAzureFileSystem : public ::testing::Test {
       EXPECT_OK_AND_ASSIGN(options_, options_res);
     }
 
-    ASSERT_OK_AND_ASSIGN(fs_, AzureFileSystem::Make(options_));
+    ASSERT_OK_AND_ASSIGN(fs_dont_use_directly_, AzureFileSystem::Make(options_));
     EXPECT_OK_AND_ASSIGN(blob_service_client_, options_.MakeBlobServiceClient());
     EXPECT_OK_AND_ASSIGN(datalake_service_client_, options_.MakeDataLakeServiceClient());
     set_up_succeeded_ = true;
@@ -435,7 +475,7 @@ class TestAzureFileSystem : public ::testing::Test {
 
   void UploadLines(const std::vector<std::string>& lines, const std::string& path,
                    int total_size) {
-    ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {}));
+    ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(path, {}));
     const auto all_lines = std::accumulate(lines.begin(), lines.end(), std::string(""));
     ASSERT_OK(output->Write(all_lines));
     ASSERT_OK(output->Close());
@@ -461,19 +501,19 @@ class TestAzureFileSystem : public ::testing::Test {
     const auto sub_directory_path = ConcatAbstractPath(directory_path, "new-sub");
     const auto sub_blob_path = ConcatAbstractPath(sub_directory_path, "sub.txt");
     const auto top_blob_path = ConcatAbstractPath(directory_path, "top.txt");
-    ASSERT_OK(fs_->CreateDir(sub_directory_path, true));
-    ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(sub_blob_path));
+    ASSERT_OK(fs()->CreateDir(sub_directory_path, true));
+    ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(sub_blob_path));
     ASSERT_OK(output->Write(std::string_view("sub")));
     ASSERT_OK(output->Close());
-    ASSERT_OK_AND_ASSIGN(output, fs_->OpenOutputStream(top_blob_path));
+    ASSERT_OK_AND_ASSIGN(output, fs()->OpenOutputStream(top_blob_path));
     ASSERT_OK(output->Write(std::string_view("top")));
     ASSERT_OK(output->Close());
 
-    AssertFileInfo(fs_.get(), data.container_name, FileType::Directory);
-    AssertFileInfo(fs_.get(), directory_path, FileType::Directory);
-    AssertFileInfo(fs_.get(), sub_directory_path, FileType::Directory);
-    AssertFileInfo(fs_.get(), sub_blob_path, FileType::File);
-    AssertFileInfo(fs_.get(), top_blob_path, FileType::File);
+    AssertFileInfo(fs(), data.container_name, FileType::Directory);
+    AssertFileInfo(fs(), directory_path, FileType::Directory);
+    AssertFileInfo(fs(), sub_directory_path, FileType::Directory);
+    AssertFileInfo(fs(), sub_blob_path, FileType::File);
+    AssertFileInfo(fs(), top_blob_path, FileType::File);
 
     paths->container = data.container_name;
     paths->directory = directory_path;
@@ -538,52 +578,52 @@ class TestAzureFileSystem : public ::testing::Test {
     const auto directory_path = data.RandomDirectoryPath(rng_);
 
     if (WithHierarchicalNamespace()) {
-      ASSERT_OK(fs_->CreateDir(directory_path, true));
-      arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::Directory);
-      ASSERT_OK(fs_->DeleteDir(directory_path));
-      arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+      ASSERT_OK(fs()->CreateDir(directory_path, true));
+      AssertFileInfo(fs(), directory_path, FileType::Directory);
+      ASSERT_OK(fs()->DeleteDir(directory_path));
+      AssertFileInfo(fs(), directory_path, FileType::NotFound);
     } else {
       // There is only virtual directory without hierarchical namespace
       // support. So the CreateDir() and DeleteDir() do nothing.
-      ASSERT_OK(fs_->CreateDir(directory_path));
-      arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
-      ASSERT_OK(fs_->DeleteDir(directory_path));
-      arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+      ASSERT_OK(fs()->CreateDir(directory_path));
+      AssertFileInfo(fs(), directory_path, FileType::NotFound);
+      ASSERT_OK(fs()->DeleteDir(directory_path));
+      AssertFileInfo(fs(), directory_path, FileType::NotFound);
     }
   }
 
   void TestCreateDirSuccessContainerAndDirectory() {
     auto data = SetUpPreexistingData();
     const auto path = data.RandomDirectoryPath(rng_);
-    ASSERT_OK(fs_->CreateDir(path, false));
+    ASSERT_OK(fs()->CreateDir(path, false));
     if (WithHierarchicalNamespace()) {
-      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
+      AssertFileInfo(fs(), path, FileType::Directory);
     } else {
       // There is only virtual directory without hierarchical namespace
       // support. So the CreateDir() does nothing.
-      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
+      AssertFileInfo(fs(), path, FileType::NotFound);
     }
   }
 
   void TestCreateDirRecursiveSuccessContainerOnly() {
     auto container_name = PreexistingData::RandomContainerName(rng_);
-    ASSERT_OK(fs_->CreateDir(container_name, true));
-    arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory);
+    ASSERT_OK(fs()->CreateDir(container_name, true));
+    AssertFileInfo(fs(), container_name, FileType::Directory);
   }
 
   void TestCreateDirRecursiveSuccessDirectoryOnly() {
     auto data = SetUpPreexistingData();
     const auto parent = data.RandomDirectoryPath(rng_);
     const auto path = ConcatAbstractPath(parent, "new-sub");
-    ASSERT_OK(fs_->CreateDir(path, true));
+    ASSERT_OK(fs()->CreateDir(path, true));
     if (WithHierarchicalNamespace()) {
-      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
-      arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
+      AssertFileInfo(fs(), path, FileType::Directory);
+      AssertFileInfo(fs(), parent, FileType::Directory);
     } else {
       // There is only virtual directory without hierarchical namespace
       // support. So the CreateDir() does nothing.
-      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
-      arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
+      AssertFileInfo(fs(), path, FileType::NotFound);
+      AssertFileInfo(fs(), parent, FileType::NotFound);
     }
   }
 
@@ -591,31 +631,31 @@ class TestAzureFileSystem : public ::testing::Test {
     auto data = SetUpPreexistingData();
     const auto parent = data.RandomDirectoryPath(rng_);
     const auto path = ConcatAbstractPath(parent, "new-sub");
-    ASSERT_OK(fs_->CreateDir(path, true));
+    ASSERT_OK(fs()->CreateDir(path, true));
     if (WithHierarchicalNamespace()) {
-      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
-      arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
-      arrow::fs::AssertFileInfo(fs_.get(), data.container_name, FileType::Directory);
+      AssertFileInfo(fs(), path, FileType::Directory);
+      AssertFileInfo(fs(), parent, FileType::Directory);
+      AssertFileInfo(fs(), data.container_name, FileType::Directory);
     } else {
       // There is only virtual directory without hierarchical namespace
       // support. So the CreateDir() does nothing.
-      arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
-      arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
-      arrow::fs::AssertFileInfo(fs_.get(), data.container_name, FileType::Directory);
+      AssertFileInfo(fs(), path, FileType::NotFound);
+      AssertFileInfo(fs(), parent, FileType::NotFound);
+      AssertFileInfo(fs(), data.container_name, FileType::Directory);
     }
   }
 
   void TestDeleteDirContentsSuccessNonexistent() {
     auto data = SetUpPreexistingData();
     const auto directory_path = data.RandomDirectoryPath(rng_);
-    ASSERT_OK(fs_->DeleteDirContents(directory_path, true));
-    arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+    ASSERT_OK(fs()->DeleteDirContents(directory_path, true));
+    AssertFileInfo(fs(), directory_path, FileType::NotFound);
   }
 
   void TestDeleteDirContentsFailureNonexistent() {
     auto data = SetUpPreexistingData();
     const auto directory_path = data.RandomDirectoryPath(rng_);
-    ASSERT_RAISES(IOError, fs_->DeleteDirContents(directory_path, false));
+    ASSERT_RAISES(IOError, fs()->DeleteDirContents(directory_path, false));
   }
 };
 
@@ -672,12 +712,12 @@ void TestAzureFileSystem::TestGetFileInfoObject() {
           .GetProperties()
           .Value;
 
-  AssertFileInfo(fs_.get(), data.ObjectPath(), FileType::File,
+  AssertFileInfo(fs(), data.ObjectPath(), FileType::File,
                  std::chrono::system_clock::time_point{object_properties.LastModified},
                  static_cast<int64_t>(object_properties.BlobSize));
 
   // URI
-  ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + std::string{data.kObjectName}));
+  ASSERT_RAISES(Invalid, fs()->GetFileInfo("abfs://" + std::string{data.kObjectName}));
 }
 
 void TestAzureFileSystem::TestGetFileInfoObjectWithNestedStructure() {
@@ -685,37 +725,37 @@ void TestAzureFileSystem::TestGetFileInfoObjectWithNestedStructure() {
   // Adds detailed tests to handle cases of different edge cases
   // with directory naming conventions (e.g. with and without slashes).
   const std::string kObjectName = "test-object-dir/some_other_dir/another_dir/foo";
-  ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(data.ContainerPath(kObjectName),
-                                                          /*metadata=*/{}));
+  ASSERT_OK_AND_ASSIGN(auto output,
+                       fs()->OpenOutputStream(data.ContainerPath(kObjectName),
+                                              /*metadata=*/{}));
   const std::string_view lorem_ipsum(PreexistingData::kLoremIpsum);
   ASSERT_OK(output->Write(lorem_ipsum));
   ASSERT_OK(output->Close());
 
   // 0 is immediately after "/" lexicographically, ensure that this doesn't
   // cause unexpected issues.
-  ASSERT_OK_AND_ASSIGN(
-      output, fs_->OpenOutputStream(data.ContainerPath("test-object-dir/some_other_dir0"),
-                                    /*metadata=*/{}));
+  ASSERT_OK_AND_ASSIGN(output, fs()->OpenOutputStream(
+                                   data.ContainerPath("test-object-dir/some_other_dir0"),
+                                   /*metadata=*/{}));
   ASSERT_OK(output->Write(lorem_ipsum));
   ASSERT_OK(output->Close());
   ASSERT_OK_AND_ASSIGN(output,
-                       fs_->OpenOutputStream(data.ContainerPath(kObjectName + "0"),
-                                             /*metadata=*/{}));
+                       fs()->OpenOutputStream(data.ContainerPath(kObjectName + "0"),
+                                              /*metadata=*/{}));
   ASSERT_OK(output->Write(lorem_ipsum));
   ASSERT_OK(output->Close());
 
-  AssertFileInfo(fs_.get(), data.ContainerPath(kObjectName), FileType::File);
-  AssertFileInfo(fs_.get(), data.ContainerPath(kObjectName) + "/", FileType::NotFound);
-  AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir"), FileType::Directory);
-  AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir") + "/",
-                 FileType::Directory);
-  AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir/some_other_dir"),
+  AssertFileInfo(fs(), data.ContainerPath(kObjectName), FileType::File);
+  AssertFileInfo(fs(), data.ContainerPath(kObjectName) + "/", FileType::NotFound);
+  AssertFileInfo(fs(), data.ContainerPath("test-object-dir"), FileType::Directory);
+  AssertFileInfo(fs(), data.ContainerPath("test-object-dir") + "/", FileType::Directory);
+  AssertFileInfo(fs(), data.ContainerPath("test-object-dir/some_other_dir"),
                  FileType::Directory);
-  AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir/some_other_dir") + "/",
+  AssertFileInfo(fs(), data.ContainerPath("test-object-dir/some_other_dir") + "/",
                  FileType::Directory);
 
-  AssertFileInfo(fs_.get(), data.ContainerPath("test-object-di"), FileType::NotFound);
-  AssertFileInfo(fs_.get(), data.ContainerPath("test-object-dir/some_other_di"),
+  AssertFileInfo(fs(), data.ContainerPath("test-object-di"), FileType::NotFound);
+  AssertFileInfo(fs(), data.ContainerPath("test-object-dir/some_other_di"),
                  FileType::NotFound);
 
   if (WithHierarchicalNamespace()) {
@@ -723,17 +763,45 @@ void TestAzureFileSystem::TestGetFileInfoObjectWithNestedStructure() {
         .GetDirectoryClient("test-empty-object-dir")
         .Create();
 
-    AssertFileInfo(fs_.get(), data.ContainerPath("test-empty-object-dir"),
+    AssertFileInfo(fs(), data.ContainerPath("test-empty-object-dir"),
                    FileType::Directory);
   }
 }
 
-template <class AzureEnvClass>
+template <class AzureEnv, bool HNSSupportShouldBeKnown = false>
+struct TestingScenario {
+  using AzureEnvClass = AzureEnv;
+  static constexpr bool kHNSSupportShouldBeKnown = HNSSupportShouldBeKnown;
+};
+
+template <class TestingScenario>
 class AzureFileSystemTestImpl : public TestAzureFileSystem {
  public:
+  using AzureEnvClass = typename TestingScenario::AzureEnvClass;
+
   using TestAzureFileSystem::TestAzureFileSystem;
 
   Result<BaseAzureEnv*> GetAzureEnv() const final { return AzureEnvClass::GetInstance(); }
+
+  /// \brief HNSSupport value that should be assumed as the cached
+  /// HNSSupport on every fs()->Operation(...) call in tests.
+  ///
+  /// If TestingScenario::kHNSSupportShouldBeKnown is true, this value
+  /// will be HNSSupport::kEnabled or HNSSupport::kDisabled, depending
+  /// on the environment. Otherwise, this value will be HNSSupport::kUnknown.
+  ///
+  /// This ensures all the branches in the AzureFileSystem code operations are tested.
+  /// For instance, many operations executed on a missing container, wouldn't
+  /// get a HNSSupport::kContainerNotFound error if the cached HNSSupport was
+  /// already known due to a previous operation that cached the HNSSupport value.
+  HNSSupport CachedHNSSupport(const BaseAzureEnv& env) const final {
+    if constexpr (TestingScenario::kHNSSupportShouldBeKnown) {
+      return env.WithHierarchicalNamespace() ? HNSSupport::kEnabled
+                                             : HNSSupport::kDisabled;
+    } else {
+      return HNSSupport::kUnknown;
+    }
+  }
 };
 
 // How to enable the non-Azurite tests:
@@ -762,54 +830,71 @@ class AzureFileSystemTestImpl : public TestAzureFileSystem {
 // [1]: https://azure.microsoft.com/en-gb/free/
 // [2]:
 // https://learn.microsoft.com/en-us/azure/storage/blobs/create-data-lake-storage-account
-using TestAzureFlatNSFileSystem = AzureFileSystemTestImpl<AzureFlatNSEnv>;
-using TestAzureHierarchicalNSFileSystem = AzureFileSystemTestImpl<AzureHierarchicalNSEnv>;
-using TestAzuriteFileSystem = AzureFileSystemTestImpl<AzuriteEnv>;
+using TestAzureFlatNSFileSystem =
+    AzureFileSystemTestImpl<TestingScenario<AzureFlatNSEnv>>;
+using TestAzureHierarchicalNSFileSystem =
+    AzureFileSystemTestImpl<TestingScenario<AzureHierarchicalNSEnv>>;
+using TestAzuriteFileSystem = AzureFileSystemTestImpl<TestingScenario<AzuriteEnv>>;
 
-// Tests using all the 3 environments (Azurite, Azure w/o HNS (flat), Azure w/ HNS)
-
-template <class AzureEnvClass>
-using AzureFileSystemTestOnAllEnvs = AzureFileSystemTestImpl<AzureEnvClass>;
+// Tests using all the 3 environments (Azurite, Azure w/o HNS (flat), Azure w/ HNS).
+template <class TestingScenario>
+using TestAzureFileSystemOnAllEnvs = AzureFileSystemTestImpl<TestingScenario>;
 
 using AllEnvironments =
-    ::testing::Types<AzuriteEnv, AzureFlatNSEnv, AzureHierarchicalNSEnv>;
+    ::testing::Types<TestingScenario<AzuriteEnv>, TestingScenario<AzureFlatNSEnv>,
+                     TestingScenario<AzureHierarchicalNSEnv>>;
 
-TYPED_TEST_SUITE(AzureFileSystemTestOnAllEnvs, AllEnvironments);
+TYPED_TEST_SUITE(TestAzureFileSystemOnAllEnvs, AllEnvironments);
 
-TYPED_TEST(AzureFileSystemTestOnAllEnvs, DetectHierarchicalNamespace) {
+TYPED_TEST(TestAzureFileSystemOnAllEnvs, DetectHierarchicalNamespace) {
   this->TestDetectHierarchicalNamespace(true);
   this->TestDetectHierarchicalNamespace(false);
 }
 
-TYPED_TEST(AzureFileSystemTestOnAllEnvs, DetectHierarchicalNamespaceOnMissingContainer) {
+TYPED_TEST(TestAzureFileSystemOnAllEnvs, DetectHierarchicalNamespaceOnMissingContainer) {
   this->TestDetectHierarchicalNamespaceOnMissingContainer();
 }
 
-TYPED_TEST(AzureFileSystemTestOnAllEnvs, GetFileInfoObject) {
+// Tests using all the 3 environments (Azurite, Azure w/o HNS (flat), Azure w/ HNS)
+// combined with the two scenarios for AzureFileSystem::cached_hns_support_ -- unknown and
+// known according to the environment.
+template <class TestingScenario>
+using TestAzureFileSystemOnAllScenarios = AzureFileSystemTestImpl<TestingScenario>;
+
+using AllScenarios = ::testing::Types<
+    TestingScenario<AzuriteEnv, true>, TestingScenario<AzuriteEnv, false>,
+    TestingScenario<AzureFlatNSEnv, true>, TestingScenario<AzureFlatNSEnv, false>,
+    TestingScenario<AzureHierarchicalNSEnv, true>,
+    TestingScenario<AzureHierarchicalNSEnv, false>>;
+
+TYPED_TEST_SUITE(TestAzureFileSystemOnAllScenarios, AllScenarios);
+
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, GetFileInfoObject) {
   this->TestGetFileInfoObject();
 }
 
-TYPED_TEST(AzureFileSystemTestOnAllEnvs, DeleteDirSuccessEmpty) {
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, DeleteDirSuccessEmpty) {
   this->TestDeleteDirSuccessEmpty();
 }
 
-TYPED_TEST(AzureFileSystemTestOnAllEnvs, GetFileInfoObjectWithNestedStructure) {
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, GetFileInfoObjectWithNestedStructure) {
   this->TestGetFileInfoObjectWithNestedStructure();
 }
 
-TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirSuccessContainerAndDirectory) {
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, CreateDirSuccessContainerAndDirectory) {
   this->TestCreateDirSuccessContainerAndDirectory();
 }
 
-TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessContainerOnly) {
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, CreateDirRecursiveSuccessContainerOnly) {
   this->TestCreateDirRecursiveSuccessContainerOnly();
 }
 
-TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessDirectoryOnly) {
+TYPED_TEST(TestAzureFileSystemOnAllScenarios, CreateDirRecursiveSuccessDirectoryOnly) {
   this->TestCreateDirRecursiveSuccessDirectoryOnly();
 }
 
-TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessContainerAndDirectory) {
+TYPED_TEST(TestAzureFileSystemOnAllScenarios,
+           CreateDirRecursiveSuccessContainerAndDirectory) {
   this->TestCreateDirRecursiveSuccessContainerAndDirectory();
 }
 
@@ -818,41 +903,41 @@ TYPED_TEST(AzureFileSystemTestOnAllEnvs, CreateDirRecursiveSuccessContainerAndDi
 TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirFailureNonexistent) {
   auto data = SetUpPreexistingData();
   const auto path = data.RandomDirectoryPath(rng_);
-  ASSERT_RAISES(IOError, fs_->DeleteDir(path));
+  ASSERT_RAISES(IOError, fs()->DeleteDir(path));
 }
 
 TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirSuccessHaveBlob) {
   auto data = SetUpPreexistingData();
   const auto directory_path = data.RandomDirectoryPath(rng_);
   const auto blob_path = ConcatAbstractPath(directory_path, "hello.txt");
-  ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(blob_path));
+  ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(blob_path));
   ASSERT_OK(output->Write(std::string_view("hello")));
   ASSERT_OK(output->Close());
-  arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::File);
-  ASSERT_OK(fs_->DeleteDir(directory_path));
-  arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::NotFound);
+  AssertFileInfo(fs(), blob_path, FileType::File);
+  ASSERT_OK(fs()->DeleteDir(directory_path));
+  AssertFileInfo(fs(), blob_path, FileType::NotFound);
 }
 
 TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirSuccessHaveDirectory) {
   auto data = SetUpPreexistingData();
   const auto parent = data.RandomDirectoryPath(rng_);
   const auto path = ConcatAbstractPath(parent, "new-sub");
-  ASSERT_OK(fs_->CreateDir(path, true));
-  arrow::fs::AssertFileInfo(fs_.get(), path, FileType::Directory);
-  arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::Directory);
-  ASSERT_OK(fs_->DeleteDir(parent));
-  arrow::fs::AssertFileInfo(fs_.get(), path, FileType::NotFound);
-  arrow::fs::AssertFileInfo(fs_.get(), parent, FileType::NotFound);
+  ASSERT_OK(fs()->CreateDir(path, true));
+  AssertFileInfo(fs(), path, FileType::Directory);
+  AssertFileInfo(fs(), parent, FileType::Directory);
+  ASSERT_OK(fs()->DeleteDir(parent));
+  AssertFileInfo(fs(), path, FileType::NotFound);
+  AssertFileInfo(fs(), parent, FileType::NotFound);
 }
 
 TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirContentsSuccessExist) {
   auto preexisting_data = SetUpPreexistingData();
   HierarchicalPaths paths;
   CreateHierarchicalData(&paths);
-  ASSERT_OK(fs_->DeleteDirContents(paths.directory));
-  arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::Directory);
+  ASSERT_OK(fs()->DeleteDirContents(paths.directory));
+  AssertFileInfo(fs(), paths.directory, FileType::Directory);
   for (const auto& sub_path : paths.sub_paths) {
-    arrow::fs::AssertFileInfo(fs_.get(), sub_path, FileType::NotFound);
+    AssertFileInfo(fs(), sub_path, FileType::NotFound);
   }
 }
 
@@ -867,20 +952,20 @@ TEST_F(TestAzureHierarchicalNSFileSystem, DeleteDirContentsFailureNonexistent) {
 // Tests using Azurite (the local Azure emulator)
 
 TEST_F(TestAzuriteFileSystem, GetFileInfoAccount) {
-  AssertFileInfo(fs_.get(), "", FileType::Directory);
+  AssertFileInfo(fs(), "", FileType::Directory);
 
   // URI
-  ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://"));
+  ASSERT_RAISES(Invalid, fs()->GetFileInfo("abfs://"));
 }
 
 TEST_F(TestAzuriteFileSystem, GetFileInfoContainer) {
   auto data = SetUpPreexistingData();
-  AssertFileInfo(fs_.get(), data.container_name, FileType::Directory);
+  AssertFileInfo(fs(), data.container_name, FileType::Directory);
 
-  AssertFileInfo(fs_.get(), "nonexistent-container", FileType::NotFound);
+  AssertFileInfo(fs(), "nonexistent-container", FileType::NotFound);
 
   // URI
-  ASSERT_RAISES(Invalid, fs_->GetFileInfo("abfs://" + data.container_name));
+  ASSERT_RAISES(Invalid, fs()->GetFileInfo("abfs://" + data.container_name));
 }
 
 TEST_F(TestAzuriteFileSystem, GetFileInfoSelector) {
@@ -891,7 +976,7 @@ TEST_F(TestAzuriteFileSystem, GetFileInfoSelector) {
 
   // Root dir
   select.base_dir = "";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 2);
   ASSERT_EQ(infos, SortedInfos(infos));
   AssertFileInfo(infos[0], "container", FileType::Directory);
@@ -899,18 +984,18 @@ TEST_F(TestAzuriteFileSystem, GetFileInfoSelector) {
 
   // Empty container
   select.base_dir = "empty-container";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 0);
   // Nonexistent container
   select.base_dir = "nonexistent-container";
-  ASSERT_RAISES(IOError, fs_->GetFileInfo(select));
+  ASSERT_RAISES(IOError, fs()->GetFileInfo(select));
   select.allow_not_found = true;
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 0);
   select.allow_not_found = false;
   // Non-empty container
   select.base_dir = "container";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos, SortedInfos(infos));
   ASSERT_EQ(infos.size(), 4);
   AssertFileInfo(infos[0], "container/emptydir", FileType::Directory);
@@ -920,33 +1005,33 @@ TEST_F(TestAzuriteFileSystem, GetFileInfoSelector) {
 
   // Empty "directory"
   select.base_dir = "container/emptydir";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 0);
   // Non-empty "directories"
   select.base_dir = "container/somedir";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 1);
   AssertFileInfo(infos[0], "container/somedir/subdir", FileType::Directory);
   select.base_dir = "container/somedir/subdir";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 1);
   AssertFileInfo(infos[0], "container/somedir/subdir/subfile", FileType::File, 8);
   // Nonexistent
   select.base_dir = "container/nonexistent";
-  ASSERT_RAISES(IOError, fs_->GetFileInfo(select));
+  ASSERT_RAISES(IOError, fs()->GetFileInfo(select));
   select.allow_not_found = true;
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 0);
   select.allow_not_found = false;
 
   // Trailing slashes
   select.base_dir = "empty-container/";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 0);
   select.base_dir = "nonexistent-container/";
-  ASSERT_RAISES(IOError, fs_->GetFileInfo(select));
+  ASSERT_RAISES(IOError, fs()->GetFileInfo(select));
   select.base_dir = "container/";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos, SortedInfos(infos));
   ASSERT_EQ(infos.size(), 4);
 }
@@ -960,19 +1045,19 @@ TEST_F(TestAzuriteFileSystem, GetFileInfoSelectorRecursive) {
   std::vector<FileInfo> infos;
   // Root dir
   select.base_dir = "";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 12);
   ASSERT_EQ(infos, SortedInfos(infos));
   AssertInfoAllContainersRecursive(infos);
 
   // Empty container
   select.base_dir = "empty-container";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 0);
 
   // Non-empty container
   select.base_dir = "container";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos, SortedInfos(infos));
   ASSERT_EQ(infos.size(), 10);
   AssertFileInfo(infos[0], "container/emptydir", FileType::Directory);
@@ -988,19 +1073,19 @@ TEST_F(TestAzuriteFileSystem, GetFileInfoSelectorRecursive) {
 
   // Empty "directory"
   select.base_dir = "container/emptydir";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 0);
 
   // Non-empty "directories"
   select.base_dir = "container/somedir";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos, SortedInfos(infos));
   ASSERT_EQ(infos.size(), 2);
   AssertFileInfo(infos[0], "container/somedir/subdir", FileType::Directory);
   AssertFileInfo(infos[1], "container/somedir/subdir/subfile", FileType::File, 8);
 
   select.base_dir = "container/otherdir";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos, SortedInfos(infos));
   ASSERT_EQ(infos.size(), 4);
   AssertFileInfo(infos[0], "container/otherdir/1", FileType::Directory);
@@ -1023,13 +1108,13 @@ TEST_F(TestAzuriteFileSystem, GetFileInfoSelectorExplicitImplicitDirDedup) {
   FileSelector select;  // non-recursive
   select.base_dir = "container";
 
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 1);
   ASSERT_EQ(infos, SortedInfos(infos));
   AssertFileInfo(infos[0], "container/mydir", FileType::Directory);
 
   select.base_dir = "container/mydir";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 4);
   ASSERT_EQ(infos, SortedInfos(infos));
   AssertFileInfo(infos[0], "container/mydir/emptydir1", FileType::Directory);
@@ -1038,55 +1123,55 @@ TEST_F(TestAzuriteFileSystem, GetFileInfoSelectorExplicitImplicitDirDedup) {
   AssertFileInfo(infos[3], "container/mydir/nonemptydir2", FileType::Directory);
 
   select.base_dir = "container/mydir/emptydir1";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 0);
 
   select.base_dir = "container/mydir/emptydir2";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 0);
 
   select.base_dir = "container/mydir/nonemptydir1";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 1);
   AssertFileInfo(infos[0], "container/mydir/nonemptydir1/somefile", FileType::File);
 
   select.base_dir = "container/mydir/nonemptydir2";
-  ASSERT_OK_AND_ASSIGN(infos, fs_->GetFileInfo(select));
+  ASSERT_OK_AND_ASSIGN(infos, fs()->GetFileInfo(select));
   ASSERT_EQ(infos.size(), 1);
   AssertFileInfo(infos[0], "container/mydir/nonemptydir2/somefile", FileType::File);
 }
 
 TEST_F(TestAzuriteFileSystem, CreateDirFailureNoContainer) {
-  ASSERT_RAISES(Invalid, fs_->CreateDir("", false));
+  ASSERT_RAISES(Invalid, fs()->CreateDir("", false));
 }
 
 TEST_F(TestAzuriteFileSystem, CreateDirSuccessContainerOnly) {
   auto container_name = PreexistingData::RandomContainerName(rng_);
-  ASSERT_OK(fs_->CreateDir(container_name, false));
-  arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory);
+  ASSERT_OK(fs()->CreateDir(container_name, false));
+  AssertFileInfo(fs(), container_name, FileType::Directory);
 }
 
 TEST_F(TestAzuriteFileSystem, CreateDirFailureDirectoryWithMissingContainer) {
   const auto path = std::string("not-a-container/new-directory");
-  ASSERT_RAISES(IOError, fs_->CreateDir(path, false));
+  ASSERT_RAISES(IOError, fs()->CreateDir(path, false));
 }
 
 TEST_F(TestAzuriteFileSystem, CreateDirRecursiveFailureNoContainer) {
-  ASSERT_RAISES(Invalid, fs_->CreateDir("", true));
+  ASSERT_RAISES(Invalid, fs()->CreateDir("", true));
 }
 
 TEST_F(TestAzuriteFileSystem, CreateDirUri) {
   ASSERT_RAISES(
       Invalid,
-      fs_->CreateDir("abfs://" + PreexistingData::RandomContainerName(rng_), true));
+      fs()->CreateDir("abfs://" + PreexistingData::RandomContainerName(rng_), true));
 }
 
 TEST_F(TestAzuriteFileSystem, DeleteDirSuccessContainer) {
   const auto container_name = PreexistingData::RandomContainerName(rng_);
-  ASSERT_OK(fs_->CreateDir(container_name));
-  arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::Directory);
-  ASSERT_OK(fs_->DeleteDir(container_name));
-  arrow::fs::AssertFileInfo(fs_.get(), container_name, FileType::NotFound);
+  ASSERT_OK(fs()->CreateDir(container_name));
+  AssertFileInfo(fs(), container_name, FileType::Directory);
+  ASSERT_OK(fs()->DeleteDir(container_name));
+  AssertFileInfo(fs(), container_name, FileType::NotFound);
 }
 
 TEST_F(TestAzuriteFileSystem, DeleteDirSuccessNonexistent) {
@@ -1094,8 +1179,8 @@ TEST_F(TestAzuriteFileSystem, DeleteDirSuccessNonexistent) {
   const auto directory_path = data.RandomDirectoryPath(rng_);
   // There is only virtual directory without hierarchical namespace
   // support. So the DeleteDir() for nonexistent directory does nothing.
-  ASSERT_OK(fs_->DeleteDir(directory_path));
-  arrow::fs::AssertFileInfo(fs_.get(), directory_path, FileType::NotFound);
+  ASSERT_OK(fs()->DeleteDir(directory_path));
+  AssertFileInfo(fs(), directory_path, FileType::NotFound);
 }
 
 TEST_F(TestAzuriteFileSystem, DeleteDirSuccessHaveBlobs) {
@@ -1110,21 +1195,21 @@ TEST_F(TestAzuriteFileSystem, DeleteDirSuccessHaveBlobs) {
   int64_t n_blobs = 257;
   for (int64_t i = 0; i < n_blobs; ++i) {
     const auto blob_path = ConcatAbstractPath(directory_path, std::to_string(i) + ".txt");
-    ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(blob_path));
+    ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(blob_path));
     ASSERT_OK(output->Write(std::string_view(std::to_string(i))));
     ASSERT_OK(output->Close());
-    arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::File);
+    AssertFileInfo(fs(), blob_path, FileType::File);
   }
-  ASSERT_OK(fs_->DeleteDir(directory_path));
+  ASSERT_OK(fs()->DeleteDir(directory_path));
   for (int64_t i = 0; i < n_blobs; ++i) {
     const auto blob_path = ConcatAbstractPath(directory_path, std::to_string(i) + ".txt");
-    arrow::fs::AssertFileInfo(fs_.get(), blob_path, FileType::NotFound);
+    AssertFileInfo(fs(), blob_path, FileType::NotFound);
   }
 }
 
 TEST_F(TestAzuriteFileSystem, DeleteDirUri) {
   auto data = SetUpPreexistingData();
-  ASSERT_RAISES(Invalid, fs_->DeleteDir("abfs://" + data.container_name + "/"));
+  ASSERT_RAISES(Invalid, fs()->DeleteDir("abfs://" + data.container_name + "/"));
 }
 
 TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessContainer) {
@@ -1135,11 +1220,11 @@ TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessContainer) {
   auto data = SetUpPreexistingData();
   HierarchicalPaths paths;
   CreateHierarchicalData(&paths);
-  ASSERT_OK(fs_->DeleteDirContents(paths.container));
-  arrow::fs::AssertFileInfo(fs_.get(), paths.container, FileType::Directory);
-  arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::NotFound);
+  ASSERT_OK(fs()->DeleteDirContents(paths.container));
+  AssertFileInfo(fs(), paths.container, FileType::Directory);
+  AssertFileInfo(fs(), paths.directory, FileType::NotFound);
   for (const auto& sub_path : paths.sub_paths) {
-    arrow::fs::AssertFileInfo(fs_.get(), sub_path, FileType::NotFound);
+    AssertFileInfo(fs(), sub_path, FileType::NotFound);
   }
 }
 
@@ -1151,11 +1236,11 @@ TEST_F(TestAzuriteFileSystem, DeleteDirContentsSuccessDirectory) {
   auto data = SetUpPreexistingData();
   HierarchicalPaths paths;
   CreateHierarchicalData(&paths);
-  ASSERT_OK(fs_->DeleteDirContents(paths.directory));
+  ASSERT_OK(fs()->DeleteDirContents(paths.directory));
   // GH-38772: We may change this to FileType::Directory.
-  arrow::fs::AssertFileInfo(fs_.get(), paths.directory, FileType::NotFound);
+  AssertFileInfo(fs(), paths.directory, FileType::NotFound);
   for (const auto& sub_path : paths.sub_paths) {
-    arrow::fs::AssertFileInfo(fs_.get(), sub_path, FileType::NotFound);
+    AssertFileInfo(fs(), sub_path, FileType::NotFound);
   }
 }
 
@@ -1170,52 +1255,52 @@ TEST_F(TestAzuriteFileSystem, DeleteDirContentsFailureNonexistent) {
 TEST_F(TestAzuriteFileSystem, CopyFileSuccessDestinationNonexistent) {
   auto data = SetUpPreexistingData();
   const auto destination_path = data.ContainerPath("copy-destionation");
-  ASSERT_OK(fs_->CopyFile(data.ObjectPath(), destination_path));
-  ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(destination_path));
-  ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(info));
+  ASSERT_OK(fs()->CopyFile(data.ObjectPath(), destination_path));
+  ASSERT_OK_AND_ASSIGN(auto info, fs()->GetFileInfo(destination_path));
+  ASSERT_OK_AND_ASSIGN(auto stream, fs()->OpenInputStream(info));
   ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024));
   EXPECT_EQ(PreexistingData::kLoremIpsum, buffer->ToString());
 }
 
 TEST_F(TestAzuriteFileSystem, CopyFileSuccessDestinationSame) {
   auto data = SetUpPreexistingData();
-  ASSERT_OK(fs_->CopyFile(data.ObjectPath(), data.ObjectPath()));
-  ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ObjectPath()));
-  ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(info));
+  ASSERT_OK(fs()->CopyFile(data.ObjectPath(), data.ObjectPath()));
+  ASSERT_OK_AND_ASSIGN(auto info, fs()->GetFileInfo(data.ObjectPath()));
+  ASSERT_OK_AND_ASSIGN(auto stream, fs()->OpenInputStream(info));
   ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024));
   EXPECT_EQ(PreexistingData::kLoremIpsum, buffer->ToString());
 }
 
 TEST_F(TestAzuriteFileSystem, CopyFileFailureDestinationTrailingSlash) {
   auto data = SetUpPreexistingData();
-  ASSERT_RAISES(IOError, fs_->CopyFile(data.ObjectPath(),
-                                       internal::EnsureTrailingSlash(data.ObjectPath())));
+  ASSERT_RAISES(IOError, fs()->CopyFile(data.ObjectPath(), internal::EnsureTrailingSlash(
+                                                               data.ObjectPath())));
 }
 
 TEST_F(TestAzuriteFileSystem, CopyFileFailureSourceNonexistent) {
   auto data = SetUpPreexistingData();
   const auto destination_path = data.ContainerPath("copy-destionation");
-  ASSERT_RAISES(IOError, fs_->CopyFile(data.NotFoundObjectPath(), destination_path));
+  ASSERT_RAISES(IOError, fs()->CopyFile(data.NotFoundObjectPath(), destination_path));
 }
 
 TEST_F(TestAzuriteFileSystem, CopyFileFailureDestinationParentNonexistent) {
   auto data = SetUpPreexistingData();
   const auto destination_path =
       ConcatAbstractPath(PreexistingData::RandomContainerName(rng_), "copy-destionation");
-  ASSERT_RAISES(IOError, fs_->CopyFile(data.ObjectPath(), destination_path));
+  ASSERT_RAISES(IOError, fs()->CopyFile(data.ObjectPath(), destination_path));
 }
 
 TEST_F(TestAzuriteFileSystem, CopyFileUri) {
   auto data = SetUpPreexistingData();
   const auto destination_path = data.ContainerPath("copy-destionation");
-  ASSERT_RAISES(Invalid, fs_->CopyFile("abfs://" + data.ObjectPath(), destination_path));
-  ASSERT_RAISES(Invalid, fs_->CopyFile(data.ObjectPath(), "abfs://" + destination_path));
+  ASSERT_RAISES(Invalid, fs()->CopyFile("abfs://" + data.ObjectPath(), destination_path));
+  ASSERT_RAISES(Invalid, fs()->CopyFile(data.ObjectPath(), "abfs://" + destination_path));
 }
 
 TEST_F(TestAzuriteFileSystem, OpenInputStreamString) {
   auto data = SetUpPreexistingData();
   std::shared_ptr<io::InputStream> stream;
-  ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(data.ObjectPath()));
+  ASSERT_OK_AND_ASSIGN(stream, fs()->OpenInputStream(data.ObjectPath()));
 
   ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024));
   EXPECT_EQ(buffer->ToString(), PreexistingData::kLoremIpsum);
@@ -1224,7 +1309,7 @@ TEST_F(TestAzuriteFileSystem, OpenInputStreamString) {
 TEST_F(TestAzuriteFileSystem, OpenInputStreamStringBuffers) {
   auto data = SetUpPreexistingData();
   std::shared_ptr<io::InputStream> stream;
-  ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(data.ObjectPath()));
+  ASSERT_OK_AND_ASSIGN(stream, fs()->OpenInputStream(data.ObjectPath()));
 
   std::string contents;
   std::shared_ptr<Buffer> buffer;
@@ -1238,10 +1323,10 @@ TEST_F(TestAzuriteFileSystem, OpenInputStreamStringBuffers) {
 
 TEST_F(TestAzuriteFileSystem, OpenInputStreamInfo) {
   auto data = SetUpPreexistingData();
-  ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ObjectPath()));
+  ASSERT_OK_AND_ASSIGN(auto info, fs()->GetFileInfo(data.ObjectPath()));
 
   std::shared_ptr<io::InputStream> stream;
-  ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(info));
+  ASSERT_OK_AND_ASSIGN(stream, fs()->OpenInputStream(info));
 
   ASSERT_OK_AND_ASSIGN(auto buffer, stream->Read(1024));
   EXPECT_EQ(buffer->ToString(), PreexistingData::kLoremIpsum);
@@ -1255,7 +1340,7 @@ TEST_F(TestAzuriteFileSystem, OpenInputStreamEmpty) {
       .GetBlockBlobClient(path_to_file)
       .UploadFrom(nullptr, 0);
 
-  ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(path));
+  ASSERT_OK_AND_ASSIGN(auto stream, fs()->OpenInputStream(path));
   std::array<char, 1024> buffer{};
   std::int64_t size;
   ASSERT_OK_AND_ASSIGN(size, stream->Read(buffer.size(), buffer.data()));
@@ -1264,26 +1349,26 @@ TEST_F(TestAzuriteFileSystem, OpenInputStreamEmpty) {
 
 TEST_F(TestAzuriteFileSystem, OpenInputStreamNotFound) {
   auto data = SetUpPreexistingData();
-  ASSERT_RAISES(IOError, fs_->OpenInputStream(data.NotFoundObjectPath()));
+  ASSERT_RAISES(IOError, fs()->OpenInputStream(data.NotFoundObjectPath()));
 }
 
 TEST_F(TestAzuriteFileSystem, OpenInputStreamInfoInvalid) {
   auto data = SetUpPreexistingData();
-  ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.container_name + "/"));
-  ASSERT_RAISES(IOError, fs_->OpenInputStream(info));
+  ASSERT_OK_AND_ASSIGN(auto info, fs()->GetFileInfo(data.container_name + "/"));
+  ASSERT_RAISES(IOError, fs()->OpenInputStream(info));
 
-  ASSERT_OK_AND_ASSIGN(auto info2, fs_->GetFileInfo(data.NotFoundObjectPath()));
-  ASSERT_RAISES(IOError, fs_->OpenInputStream(info2));
+  ASSERT_OK_AND_ASSIGN(auto info2, fs()->GetFileInfo(data.NotFoundObjectPath()));
+  ASSERT_RAISES(IOError, fs()->OpenInputStream(info2));
 }
 
 TEST_F(TestAzuriteFileSystem, OpenInputStreamUri) {
   auto data = SetUpPreexistingData();
-  ASSERT_RAISES(Invalid, fs_->OpenInputStream("abfs://" + data.ObjectPath()));
+  ASSERT_RAISES(Invalid, fs()->OpenInputStream("abfs://" + data.ObjectPath()));
 }
 
 TEST_F(TestAzuriteFileSystem, OpenInputStreamTrailingSlash) {
   auto data = SetUpPreexistingData();
-  ASSERT_RAISES(IOError, fs_->OpenInputStream(data.ObjectPath() + '/'));
+  ASSERT_RAISES(IOError, fs()->OpenInputStream(data.ObjectPath() + '/'));
 }
 
 namespace {
@@ -1324,7 +1409,7 @@ std::shared_ptr<const KeyValueMetadata> NormalizerKeyValueMetadata(
 TEST_F(TestAzuriteFileSystem, OpenInputStreamReadMetadata) {
   auto data = SetUpPreexistingData();
   std::shared_ptr<io::InputStream> stream;
-  ASSERT_OK_AND_ASSIGN(stream, fs_->OpenInputStream(data.ObjectPath()));
+  ASSERT_OK_AND_ASSIGN(stream, fs()->OpenInputStream(data.ObjectPath()));
 
   std::shared_ptr<const KeyValueMetadata> actual;
   ASSERT_OK_AND_ASSIGN(actual, stream->ReadMetadata());
@@ -1354,7 +1439,7 @@ TEST_F(TestAzuriteFileSystem, OpenInputStreamReadMetadata) {
 
 TEST_F(TestAzuriteFileSystem, OpenInputStreamClosed) {
   auto data = SetUpPreexistingData();
-  ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputStream(data.ObjectPath()));
+  ASSERT_OK_AND_ASSIGN(auto stream, fs()->OpenInputStream(data.ObjectPath()));
   ASSERT_OK(stream->Close());
   std::array<char, 16> buffer{};
   ASSERT_RAISES(Invalid, stream->Read(buffer.size(), buffer.data()));
@@ -1399,13 +1484,13 @@ TEST_F(TestAzuriteFileSystem, WriteMetadata) {
 TEST_F(TestAzuriteFileSystem, OpenOutputStreamSmall) {
   auto data = SetUpPreexistingData();
   const auto path = data.ContainerPath("test-write-object");
-  ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {}));
+  ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(path, {}));
   const std::string_view expected(PreexistingData::kLoremIpsum);
   ASSERT_OK(output->Write(expected));
   ASSERT_OK(output->Close());
 
   // Verify we can read the object back.
-  ASSERT_OK_AND_ASSIGN(auto input, fs_->OpenInputStream(path));
+  ASSERT_OK_AND_ASSIGN(auto input, fs()->OpenInputStream(path));
 
   std::array<char, 1024> inbuf{};
   ASSERT_OK_AND_ASSIGN(auto size, input->Read(inbuf.size(), inbuf.data()));
@@ -1416,7 +1501,7 @@ TEST_F(TestAzuriteFileSystem, OpenOutputStreamSmall) {
 TEST_F(TestAzuriteFileSystem, OpenOutputStreamLarge) {
   auto data = SetUpPreexistingData();
   const auto path = data.ContainerPath("test-write-object");
-  ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {}));
+  ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(path, {}));
   std::array<std::int64_t, 3> sizes{257 * 1024, 258 * 1024, 259 * 1024};
   std::array<std::string, 3> buffers{
       std::string(sizes[0], 'A'),
@@ -1432,7 +1517,7 @@ TEST_F(TestAzuriteFileSystem, OpenOutputStreamLarge) {
   ASSERT_OK(output->Close());
 
   // Verify we can read the object back.
-  ASSERT_OK_AND_ASSIGN(auto input, fs_->OpenInputStream(path));
+  ASSERT_OK_AND_ASSIGN(auto input, fs()->OpenInputStream(path));
 
   std::string contents;
   std::shared_ptr<Buffer> buffer;
@@ -1448,26 +1533,26 @@ TEST_F(TestAzuriteFileSystem, OpenOutputStreamLarge) {
 TEST_F(TestAzuriteFileSystem, OpenOutputStreamTruncatesExistingFile) {
   auto data = SetUpPreexistingData();
   const auto path = data.ContainerPath("test-write-object");
-  ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {}));
+  ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(path, {}));
   const std::string_view expected0("Existing blob content");
   ASSERT_OK(output->Write(expected0));
   ASSERT_OK(output->Close());
 
   // Check that the initial content has been written - if not this test is not achieving
   // what it's meant to.
-  ASSERT_OK_AND_ASSIGN(auto input, fs_->OpenInputStream(path));
+  ASSERT_OK_AND_ASSIGN(auto input, fs()->OpenInputStream(path));
 
   std::array<char, 1024> inbuf{};
   ASSERT_OK_AND_ASSIGN(auto size, input->Read(inbuf.size(), inbuf.data()));
   EXPECT_EQ(expected0, std::string_view(inbuf.data(), size));
 
-  ASSERT_OK_AND_ASSIGN(output, fs_->OpenOutputStream(path, {}));
+  ASSERT_OK_AND_ASSIGN(output, fs()->OpenOutputStream(path, {}));
   const std::string_view expected1(PreexistingData::kLoremIpsum);
   ASSERT_OK(output->Write(expected1));
   ASSERT_OK(output->Close());
 
   // Verify that the initial content has been overwritten.
-  ASSERT_OK_AND_ASSIGN(input, fs_->OpenInputStream(path));
+  ASSERT_OK_AND_ASSIGN(input, fs()->OpenInputStream(path));
   ASSERT_OK_AND_ASSIGN(size, input->Read(inbuf.size(), inbuf.data()));
   EXPECT_EQ(expected1, std::string_view(inbuf.data(), size));
 }
@@ -1475,27 +1560,27 @@ TEST_F(TestAzuriteFileSystem, OpenOutputStreamTruncatesExistingFile) {
 TEST_F(TestAzuriteFileSystem, OpenAppendStreamDoesNotTruncateExistingFile) {
   auto data = SetUpPreexistingData();
   const auto path = data.ContainerPath("test-write-object");
-  ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {}));
+  ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(path, {}));
   const std::string_view expected0("Existing blob content");
   ASSERT_OK(output->Write(expected0));
   ASSERT_OK(output->Close());
 
   // Check that the initial content has been written - if not this test is not achieving
   // what it's meant to.
-  ASSERT_OK_AND_ASSIGN(auto input, fs_->OpenInputStream(path));
+  ASSERT_OK_AND_ASSIGN(auto input, fs()->OpenInputStream(path));
 
   std::array<char, 1024> inbuf{};
   ASSERT_OK_AND_ASSIGN(auto size, input->Read(inbuf.size(), inbuf.data()));
   EXPECT_EQ(expected0, std::string_view(inbuf.data()));
 
-  ASSERT_OK_AND_ASSIGN(output, fs_->OpenAppendStream(path, {}));
+  ASSERT_OK_AND_ASSIGN(output, fs()->OpenAppendStream(path, {}));
   const std::string_view expected1(PreexistingData::kLoremIpsum);
   ASSERT_OK(output->Write(expected1));
   ASSERT_OK(output->Close());
 
   // Verify that the initial content has not been overwritten and that the block from
   // the other client was not committed.
-  ASSERT_OK_AND_ASSIGN(input, fs_->OpenInputStream(path));
+  ASSERT_OK_AND_ASSIGN(input, fs()->OpenInputStream(path));
   ASSERT_OK_AND_ASSIGN(size, input->Read(inbuf.size(), inbuf.data()));
   EXPECT_EQ(std::string(inbuf.data(), size),
             std::string(expected0) + std::string(expected1));
@@ -1504,7 +1589,7 @@ TEST_F(TestAzuriteFileSystem, OpenAppendStreamDoesNotTruncateExistingFile) {
 TEST_F(TestAzuriteFileSystem, OpenOutputStreamClosed) {
   auto data = SetUpPreexistingData();
   const auto path = data.ContainerPath("open-output-stream-closed.txt");
-  ASSERT_OK_AND_ASSIGN(auto output, fs_->OpenOutputStream(path, {}));
+  ASSERT_OK_AND_ASSIGN(auto output, fs()->OpenOutputStream(path, {}));
   ASSERT_OK(output->Close());
   ASSERT_RAISES(Invalid, output->Write(PreexistingData::kLoremIpsum,
                                        std::strlen(PreexistingData::kLoremIpsum)));
@@ -1515,7 +1600,7 @@ TEST_F(TestAzuriteFileSystem, OpenOutputStreamClosed) {
 TEST_F(TestAzuriteFileSystem, OpenOutputStreamUri) {
   auto data = SetUpPreexistingData();
   const auto path = data.ContainerPath("open-output-stream-uri.txt");
-  ASSERT_RAISES(Invalid, fs_->OpenInputStream("abfs://" + path));
+  ASSERT_RAISES(Invalid, fs()->OpenInputStream("abfs://" + path));
 }
 
 TEST_F(TestAzuriteFileSystem, OpenInputFileMixedReadVsReadAt) {
@@ -1534,7 +1619,7 @@ TEST_F(TestAzuriteFileSystem, OpenInputFileMixedReadVsReadAt) {
   UploadLines(lines, path, kLineCount * kLineWidth);
 
   std::shared_ptr<io::RandomAccessFile> file;
-  ASSERT_OK_AND_ASSIGN(file, fs_->OpenInputFile(path));
+  ASSERT_OK_AND_ASSIGN(file, fs()->OpenInputFile(path));
   for (int i = 0; i != 32; ++i) {
     SCOPED_TRACE("Iteration " + std::to_string(i));
     // Verify sequential reads work as expected.
@@ -1582,7 +1667,7 @@ TEST_F(TestAzuriteFileSystem, OpenInputFileRandomSeek) {
   UploadLines(lines, path, kLineCount * kLineWidth);
 
   std::shared_ptr<io::RandomAccessFile> file;
-  ASSERT_OK_AND_ASSIGN(file, fs_->OpenInputFile(path));
+  ASSERT_OK_AND_ASSIGN(file, fs()->OpenInputFile(path));
   for (int i = 0; i != 32; ++i) {
     SCOPED_TRACE("Iteration " + std::to_string(i));
     // Verify sequential reads work as expected.
@@ -1607,16 +1692,16 @@ TEST_F(TestAzuriteFileSystem, OpenInputFileIoContext) {
                          contents.length());
 
   std::shared_ptr<io::RandomAccessFile> file;
-  ASSERT_OK_AND_ASSIGN(file, fs_->OpenInputFile(path));
-  EXPECT_EQ(fs_->io_context().external_id(), file->io_context().external_id());
+  ASSERT_OK_AND_ASSIGN(file, fs()->OpenInputFile(path));
+  EXPECT_EQ(fs()->io_context().external_id(), file->io_context().external_id());
 }
 
 TEST_F(TestAzuriteFileSystem, OpenInputFileInfo) {
   auto data = SetUpPreexistingData();
-  ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.ObjectPath()));
+  ASSERT_OK_AND_ASSIGN(auto info, fs()->GetFileInfo(data.ObjectPath()));
 
   std::shared_ptr<io::RandomAccessFile> file;
-  ASSERT_OK_AND_ASSIGN(file, fs_->OpenInputFile(info));
+  ASSERT_OK_AND_ASSIGN(file, fs()->OpenInputFile(info));
 
   std::array<char, 1024> buffer{};
   std::int64_t size;
@@ -1629,21 +1714,21 @@ TEST_F(TestAzuriteFileSystem, OpenInputFileInfo) {
 
 TEST_F(TestAzuriteFileSystem, OpenInputFileNotFound) {
   auto data = SetUpPreexistingData();
-  ASSERT_RAISES(IOError, fs_->OpenInputFile(data.NotFoundObjectPath()));
+  ASSERT_RAISES(IOError, fs()->OpenInputFile(data.NotFoundObjectPath()));
 }
 
 TEST_F(TestAzuriteFileSystem, OpenInputFileInfoInvalid) {
   auto data = SetUpPreexistingData();
-  ASSERT_OK_AND_ASSIGN(auto info, fs_->GetFileInfo(data.container_name));
-  ASSERT_RAISES(IOError, fs_->OpenInputFile(info));
+  ASSERT_OK_AND_ASSIGN(auto info, fs()->GetFileInfo(data.container_name));
+  ASSERT_RAISES(IOError, fs()->OpenInputFile(info));
 
-  ASSERT_OK_AND_ASSIGN(auto info2, fs_->GetFileInfo(data.NotFoundObjectPath()));
-  ASSERT_RAISES(IOError, fs_->OpenInputFile(info2));
+  ASSERT_OK_AND_ASSIGN(auto info2, fs()->GetFileInfo(data.NotFoundObjectPath()));
+  ASSERT_RAISES(IOError, fs()->OpenInputFile(info2));
 }
 
 TEST_F(TestAzuriteFileSystem, OpenInputFileClosed) {
   auto data = SetUpPreexistingData();
-  ASSERT_OK_AND_ASSIGN(auto stream, fs_->OpenInputFile(data.ObjectPath()));
+  ASSERT_OK_AND_ASSIGN(auto stream, fs()->OpenInputFile(data.ObjectPath()));
   ASSERT_OK(stream->Close());
   std::array<char, 16> buffer{};
   ASSERT_RAISES(Invalid, stream->Tell());
@@ -1654,6 +1739,5 @@ TEST_F(TestAzuriteFileSystem, OpenInputFileClosed) {
   ASSERT_RAISES(Invalid, stream->Seek(2));
 }
 
-}  // namespace
 }  // namespace fs
 }  // namespace arrow
diff --git a/cpp/src/arrow/scalar_test.cc b/cpp/src/arrow/scalar_test.cc
index ac740f92c8527..e8b8784e7a314 100644
--- a/cpp/src/arrow/scalar_test.cc
+++ b/cpp/src/arrow/scalar_test.cc
@@ -1077,7 +1077,8 @@ std::shared_ptr<DataType> MakeListType<FixedSizeListType>(
 
 template <typename ScalarType>
 void CheckListCast(const ScalarType& scalar, const std::shared_ptr<DataType>& to_type) {
-  EXPECT_OK_AND_ASSIGN(auto cast_scalar, scalar.CastTo(to_type));
+  EXPECT_OK_AND_ASSIGN(auto cast_scalar_datum, Cast(scalar, to_type));
+  const auto& cast_scalar = cast_scalar_datum.scalar();
   ASSERT_OK(cast_scalar->ValidateFull());
   ASSERT_EQ(*cast_scalar->type, *to_type);
 
@@ -1087,11 +1088,25 @@ void CheckListCast(const ScalarType& scalar, const std::shared_ptr<DataType>& to
                       *checked_cast<const BaseListScalar&>(*cast_scalar).value);
 }
 
-void CheckInvalidListCast(const Scalar& scalar, const std::shared_ptr<DataType>& to_type,
-                          const std::string& expected_message) {
-  EXPECT_RAISES_WITH_CODE_AND_MESSAGE_THAT(StatusCode::Invalid,
-                                           ::testing::HasSubstr(expected_message),
-                                           scalar.CastTo(to_type));
+template <typename ScalarType>
+void CheckListCastError(const ScalarType& scalar,
+                        const std::shared_ptr<DataType>& to_type) {
+  StatusCode code;
+  std::string expected_message;
+  if (scalar.type->id() == Type::FIXED_SIZE_LIST) {
+    code = StatusCode::TypeError;
+    expected_message =
+        "Size of FixedSizeList is not the same. input list: " + scalar.type->ToString() +
+        " output list: " + to_type->ToString();
+  } else {
+    code = StatusCode::Invalid;
+    expected_message =
+        "ListType can only be casted to FixedSizeListType if the lists are all the "
+        "expected size.";
+  }
+
+  EXPECT_RAISES_WITH_CODE_AND_MESSAGE_THAT(code, ::testing::HasSubstr(expected_message),
+                                           Cast(scalar, to_type));
 }
 
 template <typename T>
@@ -1178,10 +1193,8 @@ class TestListLikeScalar : public ::testing::Test {
     CheckListCast(
         scalar, fixed_size_list(value_->type(), static_cast<int32_t>(value_->length())));
 
-    CheckInvalidListCast(scalar, fixed_size_list(value_->type(), 5),
-                         "Cannot cast " + scalar.type->ToString() + " of length " +
-                             std::to_string(value_->length()) +
-                             " to fixed size list of length 5");
+    auto invalid_cast_type = fixed_size_list(value_->type(), 5);
+    CheckListCastError(scalar, invalid_cast_type);
   }
 
  protected:
@@ -1238,10 +1251,8 @@ TEST(TestMapScalar, Cast) {
   CheckListCast(scalar, large_list(key_value_type));
   CheckListCast(scalar, fixed_size_list(key_value_type, 2));
 
-  CheckInvalidListCast(scalar, fixed_size_list(key_value_type, 5),
-                       "Cannot cast " + scalar.type->ToString() + " of length " +
-                           std::to_string(value->length()) +
-                           " to fixed size list of length 5");
+  auto invalid_cast_type = fixed_size_list(key_value_type, 5);
+  CheckListCastError(scalar, invalid_cast_type);
 }
 
 TEST(TestStructScalar, FieldAccess) {
diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc
index 9ad1ee6efc12a..840efa12cc3c1 100644
--- a/cpp/src/parquet/encoding.cc
+++ b/cpp/src/parquet/encoding.cc
@@ -1080,9 +1080,7 @@ inline int DecodePlain<FixedLenByteArray>(const uint8_t* data, int64_t data_size
     ParquetException::EofException();
   }
   for (int i = 0; i < num_values; ++i) {
-    out[i].ptr = data;
-    data += type_length;
-    data_size -= type_length;
+    out[i].ptr = data + i * type_length;
   }
   return static_cast<int>(bytes_to_decode);
 }
diff --git a/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs b/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs
index 5dc0d1b434b6d..a7c459935c240 100644
--- a/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs
+++ b/csharp/src/Apache.Arrow.Flight/Client/FlightClient.cs
@@ -16,10 +16,8 @@
 using System.Threading.Tasks;
 using Apache.Arrow.Flight.Internal;
 using Apache.Arrow.Flight.Protocol;
-using Apache.Arrow.Flight.Server;
 using Apache.Arrow.Flight.Server.Internal;
 using Grpc.Core;
-using Grpc.Net.Client;
 
 namespace Apache.Arrow.Flight.Client
 {
@@ -29,7 +27,7 @@ public class FlightClient
 
         private readonly FlightService.FlightServiceClient _client;
 
-        public FlightClient(GrpcChannel grpcChannel)
+        public FlightClient(ChannelBase grpcChannel)
         {
             _client = new FlightService.FlightServiceClient(grpcChannel);
         }
diff --git a/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs b/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
index 463ca49e29c94..698d74e4bac84 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
@@ -14,6 +14,7 @@
 // limitations under the License.
 
 using Apache.Arrow.Memory;
+using Apache.Arrow.Scalars;
 using Apache.Arrow.Types;
 using System;
 using System.Collections.Generic;
@@ -46,8 +47,11 @@ private class ArrayDataConcatenationVisitor :
             IArrowTypeVisitor<BooleanType>,
             IArrowTypeVisitor<FixedWidthType>,
             IArrowTypeVisitor<BinaryType>,
+            IArrowTypeVisitor<BinaryViewType>,
             IArrowTypeVisitor<StringType>,
+            IArrowTypeVisitor<StringViewType>,
             IArrowTypeVisitor<ListType>,
+            IArrowTypeVisitor<ListViewType>,
             IArrowTypeVisitor<FixedSizeListType>,
             IArrowTypeVisitor<StructType>,
             IArrowTypeVisitor<UnionType>,
@@ -84,17 +88,50 @@ public void Visit(FixedWidthType type)
             {
                 CheckData(type, 2);
                 ArrowBuffer validityBuffer = ConcatenateValidityBuffer();
-                ArrowBuffer valueBuffer = ConcatenateFixedWidthTypeValueBuffer(type);
+                ArrowBuffer valueBuffer = ConcatenateFixedWidthTypeValueBuffer(1, type);
 
                 Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, valueBuffer });
             }
 
             public void Visit(BinaryType type) => ConcatenateVariableBinaryArrayData(type);
 
+            public void Visit(BinaryViewType type) => ConcatenateBinaryViewArrayData(type);
+
             public void Visit(StringType type) => ConcatenateVariableBinaryArrayData(type);
 
+            public void Visit(StringViewType type) => ConcatenateBinaryViewArrayData(type);
+
             public void Visit(ListType type) => ConcatenateLists(type);
 
+            public void Visit(ListViewType type)
+            {
+                CheckData(type, 3);
+                ArrowBuffer validityBuffer = ConcatenateValidityBuffer();
+
+                var offsetsBuilder = new ArrowBuffer.Builder<int>(_totalLength);
+                int baseOffset = 0;
+
+                foreach (ArrayData arrayData in _arrayDataList)
+                {
+                    if (arrayData.Length > 0)
+                    {
+                        ReadOnlySpan<int> span = arrayData.Buffers[1].Span.CastTo<int>().Slice(0, arrayData.Length);
+                        foreach (int offset in span)
+                        {
+                            offsetsBuilder.Append(baseOffset + offset);
+                        }
+                    }
+
+                    baseOffset += arrayData.Children[0].Length;
+                }
+
+                ArrowBuffer offsetBuffer = offsetsBuilder.Build(_allocator);
+                ArrowBuffer sizesBuffer = ConcatenateFixedWidthTypeValueBuffer(2, Int32Type.Default);
+                ArrayData child = Concatenate(SelectChildren(0), _allocator);
+
+                Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer, sizesBuffer }, new[] { child });
+            }
+
             public void Visit(FixedSizeListType type)
             {
                 CheckData(type, 1);
@@ -161,6 +198,15 @@ private void CheckData(IArrowType type, int expectedBufferCount)
                 }
             }
 
+            private void CheckDataVariadicCount(IArrowType type, int expectedBufferCount)
+            {
+                foreach (ArrayData arrayData in _arrayDataList)
+                {
+                    arrayData.EnsureDataType(type.TypeId);
+                    arrayData.EnsureVariadicBufferCount(expectedBufferCount);
+                }
+            }
+
             private void ConcatenateVariableBinaryArrayData(IArrowType type)
             {
                 CheckData(type, 3);
@@ -171,6 +217,26 @@ private void ConcatenateVariableBinaryArrayData(IArrowType type)
                 Result = new ArrayData(type, _totalLength, _totalNullCount, 0, new ArrowBuffer[] { validityBuffer, offsetBuffer, valueBuffer });
             }
 
+            private void ConcatenateBinaryViewArrayData(IArrowType type)
+            {
+                CheckDataVariadicCount(type, 2);
+                ArrowBuffer validityBuffer = ConcatenateValidityBuffer();
+                ArrowBuffer viewBuffer = ConcatenateViewBuffer(out int variadicBufferCount);
+                ArrowBuffer[] buffers = new ArrowBuffer[2 + variadicBufferCount];
+                buffers[0] = validityBuffer;
+                buffers[1] = viewBuffer;
+                int index = 2;
+                foreach (ArrayData arrayData in _arrayDataList)
+                {
+                    for (int i = 2; i < arrayData.Buffers.Length; i++)
+                    {
+                        buffers[index++] = arrayData.Buffers[i];
+                    }
+                }
+
+                Result = new ArrayData(type, _totalLength, _totalNullCount, 0, buffers);
+            }
+
             private void ConcatenateLists(NestedType type)
             {
                 CheckData(type, 2);
@@ -206,7 +272,7 @@ private ArrowBuffer ConcatenateBitmapBuffer(int bufferIndex)
                 return builder.Build(_allocator);
             }
 
-            private ArrowBuffer ConcatenateFixedWidthTypeValueBuffer(FixedWidthType type)
+            private ArrowBuffer ConcatenateFixedWidthTypeValueBuffer(int bufferIndex, FixedWidthType type)
             {
                 int typeByteWidth = type.BitWidth / 8;
                 var builder = new ArrowBuffer.Builder<byte>(_totalLength * typeByteWidth);
@@ -216,7 +282,7 @@ private ArrowBuffer ConcatenateFixedWidthTypeValueBuffer(FixedWidthType type)
                     int length = arrayData.Length;
                     int byteLength = length * typeByteWidth;
 
-                    builder.Append(arrayData.Buffers[1].Span.Slice(0, byteLength));
+                    builder.Append(arrayData.Buffers[bufferIndex].Span.Slice(0, byteLength));
                 }
 
                 return builder.Build(_allocator);
@@ -265,6 +331,36 @@ private ArrowBuffer ConcatenateOffsetBuffer()
                 return builder.Build(_allocator);
             }
 
+            private ArrowBuffer ConcatenateViewBuffer(out int variadicBufferCount)
+            {
+                var builder = new ArrowBuffer.Builder<BinaryView>(_totalLength);
+                variadicBufferCount = 0;
+                foreach (ArrayData arrayData in _arrayDataList)
+                {
+                    if (arrayData.Length == 0)
+                    {
+                        continue;
+                    }
+
+                    ReadOnlySpan<BinaryView> span = arrayData.Buffers[1].Span.CastTo<BinaryView>().Slice(0, arrayData.Length);
+                    foreach (BinaryView view in span)
+                    {
+                        if (view.Length > BinaryView.MaxInlineLength)
+                        {
+                            builder.Append(view.AdjustBufferIndex(variadicBufferCount));
+                        }
+                        else
+                        {
+                            builder.Append(view);
+                        }
+                    }
+
+                    variadicBufferCount += (arrayData.Buffers.Length - 2);
+                }
+
+                return builder.Build(_allocator);
+            }
+
             private ArrowBuffer ConcatenateUnionTypeBuffer()
             {
                 var builder = new ArrowBuffer.Builder<byte>(_totalLength);
diff --git a/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs b/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs
index af5a524798396..f8367102082f5 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrowArrayBuilderFactory.cs
@@ -54,8 +54,12 @@ internal static IArrowArrayBuilder<IArrowArray, IArrowArrayBuilder<IArrowArray>>
                     return new DoubleArray.Builder();
                 case ArrowTypeId.String:
                     return new StringArray.Builder();
+                case ArrowTypeId.StringView:
+                    return new StringViewArray.Builder();
                 case ArrowTypeId.Binary:
                     return new BinaryArray.Builder();
+                case ArrowTypeId.BinaryView:
+                    return new BinaryViewArray.Builder();
                 case ArrowTypeId.Timestamp:
                     return new TimestampArray.Builder();
                 case ArrowTypeId.Date64:
@@ -70,6 +74,8 @@ internal static IArrowArrayBuilder<IArrowArray, IArrowArrayBuilder<IArrowArray>>
                     return new DurationArray.Builder(dataType as DurationType);
                 case ArrowTypeId.List:
                     return new ListArray.Builder(dataType as ListType);
+                case ArrowTypeId.ListView:
+                    return new ListViewArray.Builder(dataType as ListViewType);
                 case ArrowTypeId.FixedSizeList:
                     return new FixedSizeListArray.Builder(dataType as FixedSizeListType);
                 case ArrowTypeId.Decimal128:
diff --git a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
index d6577260bb82d..3d2ab1d2129f1 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
@@ -51,14 +51,20 @@ public static IArrowArray BuildArray(ArrayData data)
                     return new DoubleArray(data);
                 case ArrowTypeId.String:
                     return new StringArray(data);
+                case ArrowTypeId.StringView:
+                    return new StringViewArray(data);
                 case ArrowTypeId.FixedSizedBinary:
                     return new FixedSizeBinaryArray(data);
                 case ArrowTypeId.Binary:
                     return new BinaryArray(data);
+                case ArrowTypeId.BinaryView:
+                    return new BinaryViewArray(data);
                 case ArrowTypeId.Timestamp:
                     return new TimestampArray(data);
                 case ArrowTypeId.List:
                     return new ListArray(data);
+                case ArrowTypeId.ListView:
+                    return new ListViewArray(data);
                 case ArrowTypeId.Map:
                     return new MapArray(data);
                 case ArrowTypeId.Struct:
diff --git a/csharp/src/Apache.Arrow/Arrays/BinaryViewArray.cs b/csharp/src/Apache.Arrow/Arrays/BinaryViewArray.cs
new file mode 100644
index 0000000000000..4f62dffd1ddeb
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Arrays/BinaryViewArray.cs
@@ -0,0 +1,344 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using Apache.Arrow.Memory;
+using Apache.Arrow.Scalars;
+using Apache.Arrow.Types;
+using System;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+using System.Collections;
+
+namespace Apache.Arrow
+{
+    public class BinaryViewArray : Array, IReadOnlyList<byte[]>
+    {
+        public class Builder : BuilderBase<BinaryViewArray, Builder>
+        {
+            public Builder() : base(BinaryViewType.Default) { }
+            public Builder(IArrowType dataType) : base(dataType) { }
+
+            protected override BinaryViewArray Build(ArrayData data)
+            {
+                return new BinaryViewArray(data);
+            }
+        }
+
+        public BinaryViewArray(ArrayData data)
+            : base(data)
+        {
+            data.EnsureDataType(ArrowTypeId.BinaryView);
+            data.EnsureVariadicBufferCount(2);
+        }
+
+        public BinaryViewArray(ArrowTypeId typeId, ArrayData data)
+            : base(data)
+        {
+            data.EnsureDataType(typeId);
+            data.EnsureVariadicBufferCount(2);
+        }
+
+        public abstract class BuilderBase<TArray, TBuilder> : IArrowArrayBuilder<byte, TArray, TBuilder>
+            where TArray : IArrowArray
+            where TBuilder : class, IArrowArrayBuilder<byte, TArray, TBuilder>
+        {
+            protected IArrowType DataType { get; }
+            protected TBuilder Instance => this as TBuilder;
+            protected ArrowBuffer.Builder<BinaryView> BinaryViews { get; }
+            protected ArrowBuffer.Builder<byte> ValueBuffer { get; }
+            protected ArrowBuffer.BitmapBuilder ValidityBuffer { get; }
+            protected int NullCount => this.ValidityBuffer.UnsetBitCount;
+
+            protected BuilderBase(IArrowType dataType)
+            {
+                DataType = dataType;
+                BinaryViews = new ArrowBuffer.Builder<BinaryView>();
+                ValueBuffer = new ArrowBuffer.Builder<byte>();
+                ValidityBuffer = new ArrowBuffer.BitmapBuilder();
+            }
+
+            protected abstract TArray Build(ArrayData data);
+
+            /// <summary>
+            /// Gets the length of the array built so far.
+            /// </summary>
+            public int Length => BinaryViews.Length;
+
+            /// <summary>
+            /// Build an Arrow array from the appended contents so far.
+            /// </summary>
+            /// <param name="allocator">Optional memory allocator.</param>
+            /// <returns>Returns an array of type <typeparamref name="TArray"/>.</returns>
+            public TArray Build(MemoryAllocator allocator = default)
+            {
+                bool hasValues = ValueBuffer.Length > 0;
+                var bufs = new ArrowBuffer[hasValues ? 3 : 2];
+                bufs[0] = NullCount > 0 ? ValidityBuffer.Build(allocator) : ArrowBuffer.Empty;
+                bufs[1] = BinaryViews.Build(allocator);
+                if (hasValues) { bufs[2] = ValueBuffer.Build(allocator); }
+
+                var data = new ArrayData(
+                    DataType,
+                    length: Length,
+                    NullCount,
+                    offset: 0,
+                    bufs);
+
+                return Build(data);
+            }
+
+            /// <summary>
+            /// Append a single null value to the array.
+            /// </summary>
+            /// <returns>Returns the builder (for fluent-style composition).</returns>
+            public TBuilder AppendNull()
+            {
+                // Do not add to the value buffer in the case of a null.
+                // Note that we do not need to increment the offset as a result.
+                ValidityBuffer.Append(false);
+                BinaryViews.Append(default(BinaryView));
+                return Instance;
+            }
+
+            /// <summary>
+            /// Appends a value, consisting of a single byte, to the array.
+            /// </summary>
+            /// <param name="value">Byte value to append.</param>
+            /// <returns>Returns the builder (for fluent-style composition).</returns>
+            public TBuilder Append(byte value)
+            {
+                ValidityBuffer.Append(true);
+                Span<byte> buf = stackalloc[] { value };
+                BinaryViews.Append(new BinaryView(buf));
+                return Instance;
+            }
+
+            /// <summary>
+            /// Append a value, consisting of a span of bytes, to the array.
+            /// </summary>
+            /// <remarks>
+            /// Note that a single value is added, which consists of arbitrarily many bytes.  If multiple values are
+            /// to be added, use the <see cref="AppendRange"/> method.
+            /// </remarks>
+            /// <param name="span">Span of bytes to add.</param>
+            /// <returns>Returns the builder (for fluent-style composition).</returns>
+            public TBuilder Append(ReadOnlySpan<byte> span)
+            {
+                if (span.Length > BinaryView.MaxInlineLength)
+                {
+                    int offset = ValueBuffer.Length;
+                    ValueBuffer.Append(span);
+                    BinaryViews.Append(new BinaryView(span.Length, span.Slice(0, 4), 0, offset));
+                }
+                else
+                {
+                    BinaryViews.Append(new BinaryView(span));
+                }
+                ValidityBuffer.Append(true);
+                return Instance;
+            }
+
+            /// <summary>
+            /// Append an enumerable collection of single-byte values to the array.
+            /// </summary>
+            /// <remarks>
+            /// Note that this method appends multiple values, each of which is a single byte
+            /// </remarks>
+            /// <param name="values">Single-byte values to add.</param>
+            /// <returns>Returns the builder (for fluent-style composition).</returns>
+            public TBuilder AppendRange(IEnumerable<byte> values)
+            {
+                if (values == null)
+                {
+                    throw new ArgumentNullException(nameof(values));
+                }
+
+                foreach (byte b in values)
+                {
+                    Append(b);
+                }
+
+                return Instance;
+            }
+
+            /// <summary>
+            /// Append an enumerable collection of values to the array.
+            /// </summary>
+            /// <param name="values">Values to add.</param>
+            /// <returns>Returns the builder (for fluent-style composition).</returns>
+            public TBuilder AppendRange(IEnumerable<byte[]> values)
+            {
+                if (values == null)
+                {
+                    throw new ArgumentNullException(nameof(values));
+                }
+
+                foreach (byte[] arr in values)
+                {
+                    if (arr == null)
+                    {
+                        AppendNull();
+                    }
+                    else
+                    {
+                        Append((ReadOnlySpan<byte>)arr);
+                    }
+                }
+
+                return Instance;
+            }
+
+            public TBuilder Reserve(int capacity)
+            {
+                // TODO: [ARROW-9366] Reserve capacity in the value buffer in a more sensible way.
+                BinaryViews.Reserve(capacity);
+                ValueBuffer.Reserve(capacity);
+                ValidityBuffer.Reserve(capacity);
+                return Instance;
+            }
+
+            public TBuilder Resize(int length)
+            {
+                // TODO: [ARROW-9366] Resize the value buffer to a safe length based on offsets, not `length`.
+                BinaryViews.Resize(length);
+                ValueBuffer.Resize(length);
+                ValidityBuffer.Resize(length);
+                return Instance;
+            }
+
+            public TBuilder Swap(int i, int j)
+            {
+                ValidityBuffer.Swap(i, j);
+                BinaryView view = BinaryViews.Span[i];
+                BinaryViews.Span[i] = BinaryViews.Span[j];
+                BinaryViews.Span[j] = view;
+                return Instance;
+            }
+
+            public TBuilder Set(int index, byte value)
+            {
+                // TODO: Implement
+                throw new NotImplementedException();
+            }
+
+            /// <summary>
+            /// Clear all contents appended so far.
+            /// </summary>
+            /// <returns>Returns the builder (for fluent-style composition).</returns>
+            public TBuilder Clear()
+            {
+                BinaryViews.Clear();
+                ValueBuffer.Clear();
+                ValidityBuffer.Clear();
+                return Instance;
+            }
+        }
+
+        public BinaryViewArray(IArrowType dataType, int length,
+            ArrowBuffer binaryViewsBuffer,
+            ArrowBuffer dataBuffer,
+            ArrowBuffer nullBitmapBuffer,
+            int nullCount = 0, int offset = 0)
+        : this(new ArrayData(dataType, length, nullCount, offset,
+            new[] { nullBitmapBuffer, binaryViewsBuffer, dataBuffer }))
+        { }
+
+        public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor);
+
+        public ArrowBuffer ViewsBuffer => Data.Buffers[1];
+
+        public int DataBufferCount => Data.Buffers.Length - 2;
+
+        public ArrowBuffer DataBuffer(int index) => Data.Buffers[index + 2];
+
+        public ReadOnlySpan<BinaryView> Views => ViewsBuffer.Span.CastTo<BinaryView>().Slice(Offset, Length);
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public int GetValueLength(int index)
+        {
+            if (index < 0 || index >= Length)
+            {
+                throw new ArgumentOutOfRangeException(nameof(index));
+            }
+            if (!IsValid(index))
+            {
+                return 0;
+            }
+
+            return Views[index].Length;
+        }
+
+        /// <summary>
+        /// Get the collection of bytes, as a read-only span, at a given index in the array.
+        /// </summary>
+        /// <remarks>
+        /// Note that this method cannot reliably identify null values, which are indistinguishable from empty byte
+        /// collection values when seen in the context of this method's return type of <see cref="ReadOnlySpan{Byte}"/>.
+        /// Use the <see cref="Array.IsNull"/> method or the <see cref="GetBytes(int, out bool)"/> overload instead
+        /// to reliably determine null values.
+        /// </remarks>
+        /// <param name="index">Index at which to get bytes.</param>
+        /// <returns>Returns a <see cref="ReadOnlySpan{Byte}"/> object.</returns>
+        /// <exception cref="ArgumentOutOfRangeException">If the index is negative or beyond the length of the array.
+        /// </exception>
+        public ReadOnlySpan<byte> GetBytes(int index) => GetBytes(index, out _);
+
+        /// <summary>
+        /// Get the collection of bytes, as a read-only span, at a given index in the array.
+        /// </summary>
+        /// <param name="index">Index at which to get bytes.</param>
+        /// <param name="isNull">Set to <see langword="true"/> if the value at the given index is null.</param>
+        /// <returns>Returns a <see cref="ReadOnlySpan{Byte}"/> object.</returns>
+        /// <exception cref="ArgumentOutOfRangeException">If the index is negative or beyond the length of the array.
+        /// </exception>
+        public ReadOnlySpan<byte> GetBytes(int index, out bool isNull)
+        {
+            if (index < 0 || index >= Length)
+            {
+                throw new ArgumentOutOfRangeException(nameof(index));
+            }
+
+            isNull = IsNull(index);
+
+            if (isNull)
+            {
+                // Note that `return null;` is valid syntax, but would be misleading as `null` in the context of a span
+                // is actually returned as an empty span.
+                return ReadOnlySpan<byte>.Empty;
+            }
+
+            BinaryView binaryView = Views[index];
+            if (binaryView.IsInline)
+            {
+                return ViewsBuffer.Span.Slice(16 * index + 4, binaryView.Length);
+            }
+
+            return DataBuffer(binaryView._bufferIndex).Span.Slice(binaryView._bufferOffset, binaryView.Length);
+        }
+
+        int IReadOnlyCollection<byte[]>.Count => Length;
+        byte[] IReadOnlyList<byte[]>.this[int index] => GetBytes(index).ToArray();
+
+        IEnumerator<byte[]> IEnumerable<byte[]>.GetEnumerator()
+        {
+            for (int index = 0; index < Length; index++)
+            {
+                yield return GetBytes(index).ToArray();
+            }
+        }
+
+        IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable<byte[]>)this).GetEnumerator();
+    }
+}
diff --git a/csharp/src/Apache.Arrow/Arrays/ListViewArray.cs b/csharp/src/Apache.Arrow/Arrays/ListViewArray.cs
new file mode 100644
index 0000000000000..081385d9211a4
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Arrays/ListViewArray.cs
@@ -0,0 +1,217 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using Apache.Arrow.Memory;
+using Apache.Arrow.Types;
+
+namespace Apache.Arrow
+{
+    public class ListViewArray : Array
+    {
+        public class Builder : IArrowArrayBuilder<ListViewArray, Builder>
+        {
+            public IArrowArrayBuilder<IArrowArray, IArrowArrayBuilder<IArrowArray>> ValueBuilder { get; }
+
+            public int Length => ValueOffsetsBufferBuilder.Length;
+
+            private ArrowBuffer.Builder<int> ValueOffsetsBufferBuilder { get; }
+
+            private ArrowBuffer.Builder<int> SizesBufferBuilder { get; }
+
+            private ArrowBuffer.BitmapBuilder ValidityBufferBuilder { get; }
+
+            public int NullCount { get; protected set; }
+
+            private IArrowType DataType { get; }
+
+            private int Start { get; set; }
+
+            public Builder(IArrowType valueDataType) : this(new ListViewType(valueDataType))
+            {
+            }
+
+            public Builder(Field valueField) : this(new ListViewType(valueField))
+            {
+            }
+
+            internal Builder(ListViewType dataType)
+            {
+                ValueBuilder = ArrowArrayBuilderFactory.Build(dataType.ValueDataType);
+                ValueOffsetsBufferBuilder = new ArrowBuffer.Builder<int>();
+                SizesBufferBuilder = new ArrowBuffer.Builder<int>();
+                ValidityBufferBuilder = new ArrowBuffer.BitmapBuilder();
+                DataType = dataType;
+                Start = -1;
+            }
+
+            /// <summary>
+            /// Start a new variable-length list slot
+            ///
+            /// This function should be called before beginning to append elements to the
+            /// value builder. TODO: Consider adding builder APIs to support construction
+            /// of overlapping lists.
+            /// </summary>
+            public Builder Append()
+            {
+                AppendPrevious();
+
+                ValidityBufferBuilder.Append(true);
+
+                return this;
+            }
+
+            public Builder AppendNull()
+            {
+                AppendPrevious();
+
+                ValidityBufferBuilder.Append(false);
+                ValueOffsetsBufferBuilder.Append(Start);
+                SizesBufferBuilder.Append(0);
+                NullCount++;
+                Start = -1;
+
+                return this;
+            }
+
+            private void AppendPrevious()
+            {
+                if (Start >= 0)
+                {
+                    ValueOffsetsBufferBuilder.Append(Start);
+                    SizesBufferBuilder.Append(ValueBuilder.Length - Start);
+                }
+                Start = ValueBuilder.Length;
+            }
+
+            public ListViewArray Build(MemoryAllocator allocator = default)
+            {
+                AppendPrevious();
+
+                ArrowBuffer validityBuffer = NullCount > 0
+                                        ? ValidityBufferBuilder.Build(allocator)
+                                        : ArrowBuffer.Empty;
+
+                return new ListViewArray(DataType, Length,
+                    ValueOffsetsBufferBuilder.Build(allocator), SizesBufferBuilder.Build(allocator),
+                    ValueBuilder.Build(allocator),
+                    validityBuffer, NullCount, 0);
+            }
+
+            public Builder Reserve(int capacity)
+            {
+                ValueOffsetsBufferBuilder.Reserve(capacity);
+                SizesBufferBuilder.Reserve(capacity);
+                ValidityBufferBuilder.Reserve(capacity);
+                return this;
+            }
+
+            public Builder Resize(int length)
+            {
+                ValueOffsetsBufferBuilder.Resize(length);
+                SizesBufferBuilder.Resize(length);
+                ValidityBufferBuilder.Resize(length);
+                return this;
+            }
+
+            public Builder Clear()
+            {
+                ValueOffsetsBufferBuilder.Clear();
+                SizesBufferBuilder.Clear();
+                ValueBuilder.Clear();
+                ValidityBufferBuilder.Clear();
+                return this;
+            }
+
+        }
+
+        public IArrowArray Values { get; }
+
+        public ArrowBuffer ValueOffsetsBuffer => Data.Buffers[1];
+
+        public ReadOnlySpan<int> ValueOffsets => ValueOffsetsBuffer.Span.CastTo<int>().Slice(Offset, Length);
+
+        public ArrowBuffer SizesBuffer => Data.Buffers[2];
+
+        public ReadOnlySpan<int> Sizes => SizesBuffer.Span.CastTo<int>().Slice(Offset, Length);
+
+        public ListViewArray(IArrowType dataType, int length,
+            ArrowBuffer valueOffsetsBuffer, ArrowBuffer sizesBuffer, IArrowArray values,
+            ArrowBuffer nullBitmapBuffer, int nullCount = 0, int offset = 0)
+            : this(new ArrayData(dataType, length, nullCount, offset,
+                new[] { nullBitmapBuffer, valueOffsetsBuffer, sizesBuffer }, new[] { values.Data }),
+                values)
+        {
+        }
+
+        public ListViewArray(ArrayData data)
+            : this(data, ArrowArrayFactory.BuildArray(data.Children[0]))
+        {
+        }
+
+        private ListViewArray(ArrayData data, IArrowArray values) : base(data)
+        {
+            data.EnsureBufferCount(3);
+            data.EnsureDataType(ArrowTypeId.ListView);
+            Values = values;
+        }
+
+        public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor);
+
+        public int GetValueLength(int index)
+        {
+            if (index < 0 || index >= Length)
+            {
+                throw new ArgumentOutOfRangeException(nameof(index));
+            }
+
+            if (IsNull(index))
+            {
+                return 0;
+            }
+
+            return Sizes[index];
+        }
+
+        public IArrowArray GetSlicedValues(int index)
+        {
+            if (index < 0 || index >= Length)
+            {
+                throw new ArgumentOutOfRangeException(nameof(index));
+            }
+
+            if (IsNull(index))
+            {
+                return null;
+            }
+
+            if (!(Values is Array array))
+            {
+                return default;
+            }
+
+            return array.Slice(ValueOffsets[index], GetValueLength(index));
+        }
+
+        protected override void Dispose(bool disposing)
+        {
+            if (disposing)
+            {
+                Values?.Dispose();
+            }
+            base.Dispose(disposing);
+        }
+    }
+}
diff --git a/csharp/src/Apache.Arrow/Arrays/StringViewArray.cs b/csharp/src/Apache.Arrow/Arrays/StringViewArray.cs
new file mode 100644
index 0000000000000..88644761535d9
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Arrays/StringViewArray.cs
@@ -0,0 +1,110 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using Apache.Arrow.Types;
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+using System.Text;
+
+namespace Apache.Arrow
+{
+    public class StringViewArray: BinaryViewArray, IReadOnlyList<string>
+    {
+        public static readonly Encoding DefaultEncoding = Encoding.UTF8;
+
+        public new class Builder : BuilderBase<StringViewArray, Builder>
+        {
+            public Builder() : base(StringViewType.Default) { }
+
+            protected override StringViewArray Build(ArrayData data)
+            {
+                return new StringViewArray(data);
+            }
+
+            public Builder Append(string value, Encoding encoding = null)
+            {
+                if (value == null)
+                {
+                    return AppendNull();
+                }
+                encoding = encoding ?? DefaultEncoding;
+                byte[] span = encoding.GetBytes(value);
+                return Append(span.AsSpan());
+            }
+
+            public Builder AppendRange(IEnumerable<string> values, Encoding encoding = null)
+            {
+                foreach (string value in values)
+                {
+                    Append(value, encoding);
+                }
+
+                return this;
+            }
+        }
+
+        public StringViewArray(ArrayData data)
+            : base(ArrowTypeId.StringView, data) { }
+
+        public StringViewArray(int length,
+            ArrowBuffer valueOffsetsBuffer,
+            ArrowBuffer dataBuffer,
+            ArrowBuffer nullBitmapBuffer,
+            int nullCount = 0, int offset = 0)
+            : this(new ArrayData(StringViewType.Default, length, nullCount, offset,
+                new[] { nullBitmapBuffer, valueOffsetsBuffer, dataBuffer }))
+        { }
+
+        public override void Accept(IArrowArrayVisitor visitor) => Accept(this, visitor);
+
+        public string GetString(int index, Encoding encoding = default)
+        {
+            encoding ??= DefaultEncoding;
+
+            ReadOnlySpan<byte> bytes = GetBytes(index, out bool isNull);
+
+            if (isNull)
+            {
+                return null;
+            }
+            if (bytes.Length == 0)
+            {
+                return string.Empty;
+            }
+
+            unsafe
+            {
+                fixed (byte* data = &MemoryMarshal.GetReference(bytes))
+                    return encoding.GetString(data, bytes.Length);
+            }
+        }
+
+        int IReadOnlyCollection<string>.Count => Length;
+
+        string IReadOnlyList<string>.this[int index] => GetString(index);
+
+        IEnumerator<string> IEnumerable<string>.GetEnumerator()
+        {
+            for (int index = 0; index < Length; index++)
+            {
+                yield return GetString(index);
+            };
+        }
+
+        IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable<string>)this).GetEnumerator();
+    }
+}
diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs
index 2d9febea33f54..03059eaf5d4df 100644
--- a/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowArrayExporter.cs
@@ -15,10 +15,12 @@
 
 
 using System;
+using System.Buffers;
 using System.Diagnostics;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using Apache.Arrow.Memory;
+using Apache.Arrow.Types;
 
 namespace Apache.Arrow.C
 {
@@ -121,7 +123,16 @@ private unsafe static void ConvertArray(ExportedAllocationOwner sharedOwner, Arr
             cArray->buffers = null;
             if (cArray->n_buffers > 0)
             {
-                cArray->buffers = (byte**)sharedOwner.Allocate(array.Buffers.Length * IntPtr.Size);
+                long* lengths = null;
+                int bufferCount = array.Buffers.Length;
+                if (array.DataType.TypeId == ArrowTypeId.BinaryView || array.DataType.TypeId == ArrowTypeId.StringView)
+                {
+                    lengths = (long*)sharedOwner.Allocate(8 * bufferCount); // overallocation to avoid edge case
+                    bufferCount++;
+                    cArray->n_buffers++;
+                }
+
+                cArray->buffers = (byte**)sharedOwner.Allocate(bufferCount * IntPtr.Size);
                 for (int i = 0; i < array.Buffers.Length; i++)
                 {
                     ArrowBuffer buffer = array.Buffers[i];
@@ -131,6 +142,15 @@ private unsafe static void ConvertArray(ExportedAllocationOwner sharedOwner, Arr
                         throw new NotSupportedException($"An ArrowArray of type {array.DataType.TypeId} could not be exported: failed on buffer #{i}");
                     }
                     cArray->buffers[i] = (byte*)ptr;
+                    if (lengths != null && i >= 2)
+                    {
+                        lengths[i - 2] = array.Buffers[i].Length;
+                    }
+                }
+
+                if (lengths != null)
+                {
+                    cArray->buffers[array.Buffers.Length] = (byte*)lengths;
                 }
             }
 
diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
index 1b40ec49658bb..fbb2be661fc5d 100644
--- a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
@@ -157,10 +157,18 @@ private ArrayData GetAsArrayData(CArrowArray* cArray, IArrowType type)
                     case ArrowTypeId.Binary:
                         buffers = ImportByteArrayBuffers(cArray);
                         break;
+                    case ArrowTypeId.StringView:
+                    case ArrowTypeId.BinaryView:
+                        buffers = ImportByteArrayViewBuffers(cArray);
+                        break;
                     case ArrowTypeId.List:
                         children = ProcessListChildren(cArray, ((ListType)type).ValueDataType);
                         buffers = ImportListBuffers(cArray);
                         break;
+                    case ArrowTypeId.ListView:
+                        children = ProcessListChildren(cArray, ((ListViewType)type).ValueDataType);
+                        buffers = ImportListViewBuffers(cArray);
+                        break;
                     case ArrowTypeId.FixedSizeList:
                         children = ProcessListChildren(cArray, ((FixedSizeListType)type).ValueDataType);
                         buffers = ImportFixedSizeListBuffers(cArray);
@@ -268,6 +276,28 @@ private ArrowBuffer[] ImportByteArrayBuffers(CArrowArray* cArray)
                 return buffers;
             }
 
+            private ArrowBuffer[] ImportByteArrayViewBuffers(CArrowArray* cArray)
+            {
+                if (cArray->n_buffers < 3)
+                {
+                    throw new InvalidOperationException("Byte array views are expected to have at least three buffers");
+                }
+
+                int length = checked((int)cArray->length);
+                int viewsLength = length * 16;
+
+                long* bufferLengths = (long*)cArray->buffers[cArray->n_buffers - 1];
+                ArrowBuffer[] buffers = new ArrowBuffer[cArray->n_buffers - 1];
+                buffers[0] = ImportValidityBuffer(cArray);
+                buffers[1] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[1], 0, viewsLength));
+                for (int i = 2; i < buffers.Length; i++)
+                {
+                    buffers[i] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[i], 0, checked((int)bufferLengths[i - 2])));
+                }
+
+                return buffers;
+            }
+
             private ArrowBuffer[] ImportListBuffers(CArrowArray* cArray)
             {
                 if (cArray->n_buffers != 2)
@@ -285,6 +315,24 @@ private ArrowBuffer[] ImportListBuffers(CArrowArray* cArray)
                 return buffers;
             }
 
+            private ArrowBuffer[] ImportListViewBuffers(CArrowArray* cArray)
+            {
+                if (cArray->n_buffers != 3)
+                {
+                    throw new InvalidOperationException("List view arrays are expected to have exactly three buffers");
+                }
+
+                int length = checked((int)cArray->length);
+                int offsetsLength = length * 4;
+
+                ArrowBuffer[] buffers = new ArrowBuffer[3];
+                buffers[0] = ImportValidityBuffer(cArray);
+                buffers[1] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[1], 0, offsetsLength));
+                buffers[2] = new ArrowBuffer(AddMemory((IntPtr)cArray->buffers[2], 0, offsetsLength));
+
+                return buffers;
+            }
+
             private ArrowBuffer[] ImportFixedSizeListBuffers(CArrowArray* cArray)
             {
                 if (cArray->n_buffers != 1)
diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
index c9b45a8eb2d87..3bb7134af3ba9 100644
--- a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
@@ -167,7 +167,9 @@ private static string GetFormat(IArrowType datatype)
                     return $"d:{decimalType.Precision},{decimalType.Scale},256";
                 // Binary
                 case BinaryType _: return "z";
+                case BinaryViewType _: return "vz";
                 case StringType _: return "u";
+                case StringViewType _: return "vu";
                 case FixedSizeBinaryType binaryType:
                     return $"w:{binaryType.ByteWidth}";
                 // Date
@@ -196,6 +198,7 @@ private static string GetFormat(IArrowType datatype)
                     };
                 // Nested
                 case ListType _: return "+l";
+                case ListViewType _: return "+vl";
                 case FixedSizeListType fixedListType:
                     return $"+w:{fixedListType.ListSize}";
                 case StructType _: return "+s";
diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
index 9c81195771bae..f1acc007bcef7 100644
--- a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
@@ -165,7 +165,7 @@ public ArrowType GetAsType()
                 }
 
                 // Special handling for nested types
-                if (format == "+l")
+                if (format == "+l" || format == "+vl")
                 {
                     if (_cSchema->n_children != 1)
                     {
@@ -180,7 +180,7 @@ public ArrowType GetAsType()
 
                     Field childField = childSchema.GetAsField();
 
-                    return new ListType(childField);
+                    return format[1] == 'v' ? new ListViewType(childField) : new ListType(childField);
                 }
                 else if (format == "+s")
                 {
@@ -303,8 +303,10 @@ public ArrowType GetAsType()
                     "g" => DoubleType.Default,
                     // Binary data
                     "z" => BinaryType.Default,
+                    "vz" => BinaryViewType.Default,
                     //"Z" => new LargeBinaryType() // Not yet implemented
                     "u" => StringType.Default,
+                    "vu" => StringViewType.Default,
                     //"U" => new LargeStringType(), // Not yet implemented
                     // Date and time
                     "tdD" => Date32Type.Default,
diff --git a/csharp/src/Apache.Arrow/Extensions/ArrayDataExtensions.cs b/csharp/src/Apache.Arrow/Extensions/ArrayDataExtensions.cs
index 399d9bf5e6bf1..2b6742a3d0cb2 100644
--- a/csharp/src/Apache.Arrow/Extensions/ArrayDataExtensions.cs
+++ b/csharp/src/Apache.Arrow/Extensions/ArrayDataExtensions.cs
@@ -23,6 +23,17 @@ internal static class ArrayDataExtensions
         public static void EnsureBufferCount(this ArrayData data, int count)
         {
             if (data.Buffers.Length != count)
+            {
+                // TODO: Use localizable string resource
+                throw new ArgumentException(
+                    $"Buffer count <{data.Buffers.Length}> must be at exactly <{count}>",
+                    nameof(data.Buffers.Length));
+            }
+        }
+
+        public static void EnsureVariadicBufferCount(this ArrayData data, int count)
+        {
+            if (data.Buffers.Length < count)
             {
                 // TODO: Use localizable string resource
                 throw new ArgumentException(
diff --git a/csharp/src/Apache.Arrow/Extensions/FlatbufExtensions.cs b/csharp/src/Apache.Arrow/Extensions/FlatbufExtensions.cs
index 5f39680b90ebc..b44c02d854077 100644
--- a/csharp/src/Apache.Arrow/Extensions/FlatbufExtensions.cs
+++ b/csharp/src/Apache.Arrow/Extensions/FlatbufExtensions.cs
@@ -19,25 +19,6 @@ namespace Apache.Arrow
 {
     internal static class FlatbufExtensions
     {
-        public static bool IsFixedPrimitive(this Flatbuf.Type t)
-        {
-            if (t == Flatbuf.Type.Utf8 || t == Flatbuf.Type.Binary)
-                return false;
-            return true;
-        }
-
-        public static bool IsFixedPrimitive(this Types.IArrowType t)
-        {
-            return t.TypeId.IsFixedPrimitive();
-        }
-
-        public static bool IsFixedPrimitive(this Types.ArrowTypeId t)
-        {
-            if (t == Types.ArrowTypeId.String || t == Types.ArrowTypeId.Binary)
-                return false;
-            return true;
-        }
-
         public static Types.IntervalUnit ToArrow(this Flatbuf.IntervalUnit unit)
         {
             switch (unit)
diff --git a/csharp/src/Apache.Arrow/Flatbuf/BinaryView.cs b/csharp/src/Apache.Arrow/Flatbuf/BinaryView.cs
new file mode 100644
index 0000000000000..2f9cca51737f8
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Flatbuf/BinaryView.cs
@@ -0,0 +1,47 @@
+// <auto-generated>
+//  automatically generated by the FlatBuffers compiler, do not modify
+// </auto-generated>
+
+namespace Apache.Arrow.Flatbuf
+{
+
+using global::System;
+using global::System.Collections.Generic;
+using global::Google.FlatBuffers;
+
+/// Logically the same as Binary, but the internal representation uses a view
+/// struct that contains the string length and either the string's entire data
+/// inline (for small strings) or an inlined prefix, an index of another buffer,
+/// and an offset pointing to a slice in that buffer (for non-small strings).
+///
+/// Since it uses a variable number of data buffers, each Field with this type
+/// must have a corresponding entry in `variadicBufferCounts`.
+internal struct BinaryView : IFlatbufferObject
+{
+  private Table __p;
+  public ByteBuffer ByteBuffer { get { return __p.bb; } }
+  public static void ValidateVersion() { FlatBufferConstants.FLATBUFFERS_23_5_9(); }
+  public static BinaryView GetRootAsBinaryView(ByteBuffer _bb) { return GetRootAsBinaryView(_bb, new BinaryView()); }
+  public static BinaryView GetRootAsBinaryView(ByteBuffer _bb, BinaryView obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __p = new Table(_i, _bb); }
+  public BinaryView __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+  public static void StartBinaryView(FlatBufferBuilder builder) { builder.StartTable(0); }
+  public static Offset<BinaryView> EndBinaryView(FlatBufferBuilder builder) {
+    int o = builder.EndTable();
+    return new Offset<BinaryView>(o);
+  }
+}
+
+
+static internal class BinaryViewVerify
+{
+  static public bool Verify(Google.FlatBuffers.Verifier verifier, uint tablePos)
+  {
+    return verifier.VerifyTableStart(tablePos)
+      && verifier.VerifyTableEnd(tablePos);
+  }
+}
+
+}
diff --git a/csharp/src/Apache.Arrow/Flatbuf/Enums/MetadataVersion.cs b/csharp/src/Apache.Arrow/Flatbuf/Enums/MetadataVersion.cs
index 1e893e8cb6ffc..13b5315805dc9 100644
--- a/csharp/src/Apache.Arrow/Flatbuf/Enums/MetadataVersion.cs
+++ b/csharp/src/Apache.Arrow/Flatbuf/Enums/MetadataVersion.cs
@@ -8,21 +8,21 @@ namespace Apache.Arrow.Flatbuf
 internal enum MetadataVersion : short
 {
   /// 0.1.0 (October 2016).
- V1 = 0,
+  V1 = 0,
   /// 0.2.0 (February 2017). Non-backwards compatible with V1.
- V2 = 1,
+  V2 = 1,
   /// 0.3.0 -> 0.7.1 (May - December 2017). Non-backwards compatible with V2.
- V3 = 2,
+  V3 = 2,
   /// >= 0.8.0 (December 2017). Non-backwards compatible with V3.
- V4 = 3,
-  /// >= 1.0.0 (July 2020. Backwards compatible with V4 (V5 readers can read V4
+  V4 = 3,
+  /// >= 1.0.0 (July 2020). Backwards compatible with V4 (V5 readers can read V4
   /// metadata and IPC messages). Implementations are recommended to provide a
   /// V4 compatibility mode with V5 format changes disabled.
   ///
   /// Incompatible changes between V4 and V5:
   /// - Union buffer layout has changed. In V5, Unions don't have a validity
   ///   bitmap buffer.
- V5 = 4,
+  V5 = 4,
 };
 
 
diff --git a/csharp/src/Apache.Arrow/Flatbuf/Enums/Type.cs b/csharp/src/Apache.Arrow/Flatbuf/Enums/Type.cs
index 10f852efb9b96..9c04288648dea 100644
--- a/csharp/src/Apache.Arrow/Flatbuf/Enums/Type.cs
+++ b/csharp/src/Apache.Arrow/Flatbuf/Enums/Type.cs
@@ -33,6 +33,10 @@ internal enum Type : byte
   LargeUtf8 = 20,
   LargeList = 21,
   RunEndEncoded = 22,
+  BinaryView = 23,
+  Utf8View = 24,
+  ListView = 25,
+  LargeListView = 26,
 };
 
 
@@ -110,6 +114,18 @@ static public bool Verify(Google.FlatBuffers.Verifier verifier, byte typeId, uin
       case Type.RunEndEncoded:
         result = RunEndEncodedVerify.Verify(verifier, tablePos);
         break;
+      case Type.BinaryView:
+        result = BinaryViewVerify.Verify(verifier, tablePos);
+        break;
+      case Type.Utf8View:
+        result = Utf8ViewVerify.Verify(verifier, tablePos);
+        break;
+      case Type.ListView:
+        result = ListViewVerify.Verify(verifier, tablePos);
+        break;
+      case Type.LargeListView:
+        result = LargeListViewVerify.Verify(verifier, tablePos);
+        break;
       default: result = true;
         break;
     }
diff --git a/csharp/src/Apache.Arrow/Flatbuf/Field.cs b/csharp/src/Apache.Arrow/Flatbuf/Field.cs
index c5c6c0a165598..efbc6afb06d03 100644
--- a/csharp/src/Apache.Arrow/Flatbuf/Field.cs
+++ b/csharp/src/Apache.Arrow/Flatbuf/Field.cs
@@ -57,6 +57,10 @@ internal struct Field : IFlatbufferObject
   public LargeUtf8 TypeAsLargeUtf8() { return Type<LargeUtf8>().Value; }
   public LargeList TypeAsLargeList() { return Type<LargeList>().Value; }
   public RunEndEncoded TypeAsRunEndEncoded() { return Type<RunEndEncoded>().Value; }
+  public BinaryView TypeAsBinaryView() { return Type<BinaryView>().Value; }
+  public Utf8View TypeAsUtf8View() { return Type<Utf8View>().Value; }
+  public ListView TypeAsListView() { return Type<ListView>().Value; }
+  public LargeListView TypeAsLargeListView() { return Type<LargeListView>().Value; }
   /// Present only if the field is dictionary encoded.
   public DictionaryEncoding? Dictionary { get { int o = __p.__offset(12); return o != 0 ? (DictionaryEncoding?)(new DictionaryEncoding()).__assign(__p.__indirect(o + __p.bb_pos), __p.bb) : null; } }
   /// children apply only to nested data types like Struct, List and Union. For
diff --git a/csharp/src/Apache.Arrow/Flatbuf/LargeListView.cs b/csharp/src/Apache.Arrow/Flatbuf/LargeListView.cs
new file mode 100644
index 0000000000000..685e91333c38c
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Flatbuf/LargeListView.cs
@@ -0,0 +1,42 @@
+// <auto-generated>
+//  automatically generated by the FlatBuffers compiler, do not modify
+// </auto-generated>
+
+namespace Apache.Arrow.Flatbuf
+{
+
+using global::System;
+using global::System.Collections.Generic;
+using global::Google.FlatBuffers;
+
+/// Same as ListView, but with 64-bit offsets and sizes, allowing to represent
+/// extremely large data values.
+internal struct LargeListView : IFlatbufferObject
+{
+  private Table __p;
+  public ByteBuffer ByteBuffer { get { return __p.bb; } }
+  public static void ValidateVersion() { FlatBufferConstants.FLATBUFFERS_23_5_9(); }
+  public static LargeListView GetRootAsLargeListView(ByteBuffer _bb) { return GetRootAsLargeListView(_bb, new LargeListView()); }
+  public static LargeListView GetRootAsLargeListView(ByteBuffer _bb, LargeListView obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __p = new Table(_i, _bb); }
+  public LargeListView __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+  public static void StartLargeListView(FlatBufferBuilder builder) { builder.StartTable(0); }
+  public static Offset<LargeListView> EndLargeListView(FlatBufferBuilder builder) {
+    int o = builder.EndTable();
+    return new Offset<LargeListView>(o);
+  }
+}
+
+
+static internal class LargeListViewVerify
+{
+  static public bool Verify(Google.FlatBuffers.Verifier verifier, uint tablePos)
+  {
+    return verifier.VerifyTableStart(tablePos)
+      && verifier.VerifyTableEnd(tablePos);
+  }
+}
+
+}
diff --git a/csharp/src/Apache.Arrow/Flatbuf/ListView.cs b/csharp/src/Apache.Arrow/Flatbuf/ListView.cs
new file mode 100644
index 0000000000000..d2e54e428524b
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Flatbuf/ListView.cs
@@ -0,0 +1,43 @@
+// <auto-generated>
+//  automatically generated by the FlatBuffers compiler, do not modify
+// </auto-generated>
+
+namespace Apache.Arrow.Flatbuf
+{
+
+using global::System;
+using global::System.Collections.Generic;
+using global::Google.FlatBuffers;
+
+/// Represents the same logical types that List can, but contains offsets and
+/// sizes allowing for writes in any order and sharing of child values among
+/// list values.
+internal struct ListView : IFlatbufferObject
+{
+  private Table __p;
+  public ByteBuffer ByteBuffer { get { return __p.bb; } }
+  public static void ValidateVersion() { FlatBufferConstants.FLATBUFFERS_23_5_9(); }
+  public static ListView GetRootAsListView(ByteBuffer _bb) { return GetRootAsListView(_bb, new ListView()); }
+  public static ListView GetRootAsListView(ByteBuffer _bb, ListView obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __p = new Table(_i, _bb); }
+  public ListView __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+  public static void StartListView(FlatBufferBuilder builder) { builder.StartTable(0); }
+  public static Offset<ListView> EndListView(FlatBufferBuilder builder) {
+    int o = builder.EndTable();
+    return new Offset<ListView>(o);
+  }
+}
+
+
+static internal class ListViewVerify
+{
+  static public bool Verify(Google.FlatBuffers.Verifier verifier, uint tablePos)
+  {
+    return verifier.VerifyTableStart(tablePos)
+      && verifier.VerifyTableEnd(tablePos);
+  }
+}
+
+}
diff --git a/csharp/src/Apache.Arrow/Flatbuf/RecordBatch.cs b/csharp/src/Apache.Arrow/Flatbuf/RecordBatch.cs
index 9ab9715165ddc..2df8716bc1655 100644
--- a/csharp/src/Apache.Arrow/Flatbuf/RecordBatch.cs
+++ b/csharp/src/Apache.Arrow/Flatbuf/RecordBatch.cs
@@ -38,27 +38,57 @@ internal struct RecordBatch : IFlatbufferObject
   public int BuffersLength { get { int o = __p.__offset(8); return o != 0 ? __p.__vector_len(o) : 0; } }
   /// Optional compression of the message body
   public BodyCompression? Compression { get { int o = __p.__offset(10); return o != 0 ? (BodyCompression?)(new BodyCompression()).__assign(__p.__indirect(o + __p.bb_pos), __p.bb) : null; } }
+  /// Some types such as Utf8View are represented using a variable number of buffers.
+  /// For each such Field in the pre-ordered flattened logical schema, there will be
+  /// an entry in variadicBufferCounts to indicate the number of number of variadic
+  /// buffers which belong to that Field in the current RecordBatch.
+  ///
+  /// For example, the schema
+  ///     col1: Struct<alpha: Int32, beta: BinaryView, gamma: Float64>
+  ///     col2: Utf8View
+  /// contains two Fields with variadic buffers so variadicBufferCounts will have
+  /// two entries, the first counting the variadic buffers of `col1.beta` and the
+  /// second counting `col2`'s.
+  ///
+  /// This field may be omitted if and only if the schema contains no Fields with
+  /// a variable number of buffers, such as BinaryView and Utf8View.
+  public long VariadicBufferCounts(int j) { int o = __p.__offset(12); return o != 0 ? __p.bb.GetLong(__p.__vector(o) + j * 8) : (long)0; }
+  public int VariadicBufferCountsLength { get { int o = __p.__offset(12); return o != 0 ? __p.__vector_len(o) : 0; } }
+#if ENABLE_SPAN_T
+  public Span<long> GetVariadicCountsBytes() { return __p.__vector_as_span<long>(12, 8); }
+#else
+  public ArraySegment<byte>? GetVariadicCountsBytes() { return __p.__vector_as_arraysegment(12); }
+#endif
+  public long[] GetVariadicCountsArray() { return __p.__vector_as_array<long>(12); }
 
   public static Offset<RecordBatch> CreateRecordBatch(FlatBufferBuilder builder,
       long length = 0,
       VectorOffset nodesOffset = default(VectorOffset),
       VectorOffset buffersOffset = default(VectorOffset),
-      Offset<BodyCompression> compressionOffset = default(Offset<BodyCompression>)) {
-    builder.StartTable(4);
+      Offset<BodyCompression> compressionOffset = default(Offset<BodyCompression>),
+      VectorOffset variadicCountsOffset = default(VectorOffset)) {
+    builder.StartTable(5);
     RecordBatch.AddLength(builder, length);
+    RecordBatch.AddVariadicCounts(builder, variadicCountsOffset);
     RecordBatch.AddCompression(builder, compressionOffset);
     RecordBatch.AddBuffers(builder, buffersOffset);
     RecordBatch.AddNodes(builder, nodesOffset);
     return RecordBatch.EndRecordBatch(builder);
   }
 
-  public static void StartRecordBatch(FlatBufferBuilder builder) { builder.StartTable(4); }
+  public static void StartRecordBatch(FlatBufferBuilder builder) { builder.StartTable(5); }
   public static void AddLength(FlatBufferBuilder builder, long length) { builder.AddLong(0, length, 0); }
   public static void AddNodes(FlatBufferBuilder builder, VectorOffset nodesOffset) { builder.AddOffset(1, nodesOffset.Value, 0); }
   public static void StartNodesVector(FlatBufferBuilder builder, int numElems) { builder.StartVector(16, numElems, 8); }
   public static void AddBuffers(FlatBufferBuilder builder, VectorOffset buffersOffset) { builder.AddOffset(2, buffersOffset.Value, 0); }
   public static void StartBuffersVector(FlatBufferBuilder builder, int numElems) { builder.StartVector(16, numElems, 8); }
   public static void AddCompression(FlatBufferBuilder builder, Offset<BodyCompression> compressionOffset) { builder.AddOffset(3, compressionOffset.Value, 0); }
+  public static void AddVariadicCounts(FlatBufferBuilder builder, VectorOffset variadicCountsOffset) { builder.AddOffset(4, variadicCountsOffset.Value, 0); }
+  public static VectorOffset CreateVariadicCountsVector(FlatBufferBuilder builder, long[] data) { builder.StartVector(8, data.Length, 8); for (int i = data.Length - 1; i >= 0; i--) builder.AddLong(data[i]); return builder.EndVector(); }
+  public static VectorOffset CreateVariadicCountsVectorBlock(FlatBufferBuilder builder, long[] data) { builder.StartVector(8, data.Length, 8); builder.Add(data); return builder.EndVector(); }
+  public static VectorOffset CreateVariadicCountsVectorBlock(FlatBufferBuilder builder, ArraySegment<long> data) { builder.StartVector(8, data.Count, 8); builder.Add(data); return builder.EndVector(); }
+  public static VectorOffset CreateVariadicCountsVectorBlock(FlatBufferBuilder builder, IntPtr dataPtr, int sizeInBytes) { builder.StartVector(1, sizeInBytes, 1); builder.Add<long>(dataPtr, sizeInBytes); return builder.EndVector(); }
+  public static void StartVariadicCountsVector(FlatBufferBuilder builder, int numElems) { builder.StartVector(8, numElems, 8); }
   public static Offset<RecordBatch> EndRecordBatch(FlatBufferBuilder builder) {
     int o = builder.EndTable();
     return new Offset<RecordBatch>(o);
@@ -75,6 +105,7 @@ static public bool Verify(Google.FlatBuffers.Verifier verifier, uint tablePos)
       && verifier.VerifyVectorOfData(tablePos, 6 /*Nodes*/, 16 /*FieldNode*/, false)
       && verifier.VerifyVectorOfData(tablePos, 8 /*Buffers*/, 16 /*Buffer*/, false)
       && verifier.VerifyTable(tablePos, 10 /*Compression*/, BodyCompressionVerify.Verify, false)
+      && verifier.VerifyVectorOfData(tablePos, 12 /*VariadicCounts*/, 8 /*long*/, false)
       && verifier.VerifyTableEnd(tablePos);
   }
 }
diff --git a/csharp/src/Apache.Arrow/Flatbuf/SparseTensor.cs b/csharp/src/Apache.Arrow/Flatbuf/SparseTensor.cs
index 3f9e1de7c00a9..099950fafe4ee 100644
--- a/csharp/src/Apache.Arrow/Flatbuf/SparseTensor.cs
+++ b/csharp/src/Apache.Arrow/Flatbuf/SparseTensor.cs
@@ -47,6 +47,10 @@ internal struct SparseTensor : IFlatbufferObject
   public LargeUtf8 TypeAsLargeUtf8() { return Type<LargeUtf8>().Value; }
   public LargeList TypeAsLargeList() { return Type<LargeList>().Value; }
   public RunEndEncoded TypeAsRunEndEncoded() { return Type<RunEndEncoded>().Value; }
+  public BinaryView TypeAsBinaryView() { return Type<BinaryView>().Value; }
+  public Utf8View TypeAsUtf8View() { return Type<Utf8View>().Value; }
+  public ListView TypeAsListView() { return Type<ListView>().Value; }
+  public LargeListView TypeAsLargeListView() { return Type<LargeListView>().Value; }
   /// The dimensions of the tensor, optionally named.
   public TensorDim? Shape(int j) { int o = __p.__offset(8); return o != 0 ? (TensorDim?)(new TensorDim()).__assign(__p.__indirect(__p.__vector(o) + j * 4), __p.bb) : null; }
   public int ShapeLength { get { int o = __p.__offset(8); return o != 0 ? __p.__vector_len(o) : 0; } }
diff --git a/csharp/src/Apache.Arrow/Flatbuf/Tensor.cs b/csharp/src/Apache.Arrow/Flatbuf/Tensor.cs
index f8c213768a3fc..eb39257d861ca 100644
--- a/csharp/src/Apache.Arrow/Flatbuf/Tensor.cs
+++ b/csharp/src/Apache.Arrow/Flatbuf/Tensor.cs
@@ -46,6 +46,10 @@ internal struct Tensor : IFlatbufferObject
   public LargeUtf8 TypeAsLargeUtf8() { return Type<LargeUtf8>().Value; }
   public LargeList TypeAsLargeList() { return Type<LargeList>().Value; }
   public RunEndEncoded TypeAsRunEndEncoded() { return Type<RunEndEncoded>().Value; }
+  public BinaryView TypeAsBinaryView() { return Type<BinaryView>().Value; }
+  public Utf8View TypeAsUtf8View() { return Type<Utf8View>().Value; }
+  public ListView TypeAsListView() { return Type<ListView>().Value; }
+  public LargeListView TypeAsLargeListView() { return Type<LargeListView>().Value; }
   /// The dimensions of the tensor, optionally named
   public TensorDim? Shape(int j) { int o = __p.__offset(8); return o != 0 ? (TensorDim?)(new TensorDim()).__assign(__p.__indirect(__p.__vector(o) + j * 4), __p.bb) : null; }
   public int ShapeLength { get { int o = __p.__offset(8); return o != 0 ? __p.__vector_len(o) : 0; } }
diff --git a/csharp/src/Apache.Arrow/Flatbuf/Utf8View.cs b/csharp/src/Apache.Arrow/Flatbuf/Utf8View.cs
new file mode 100644
index 0000000000000..e85c5374a9acc
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Flatbuf/Utf8View.cs
@@ -0,0 +1,47 @@
+// <auto-generated>
+//  automatically generated by the FlatBuffers compiler, do not modify
+// </auto-generated>
+
+namespace Apache.Arrow.Flatbuf
+{
+
+using global::System;
+using global::System.Collections.Generic;
+using global::Google.FlatBuffers;
+
+/// Logically the same as Utf8, but the internal representation uses a view
+/// struct that contains the string length and either the string's entire data
+/// inline (for small strings) or an inlined prefix, an index of another buffer,
+/// and an offset pointing to a slice in that buffer (for non-small strings).
+///
+/// Since it uses a variable number of data buffers, each Field with this type
+/// must have a corresponding entry in `variadicBufferCounts`.
+internal struct Utf8View : IFlatbufferObject
+{
+  private Table __p;
+  public ByteBuffer ByteBuffer { get { return __p.bb; } }
+  public static void ValidateVersion() { FlatBufferConstants.FLATBUFFERS_23_5_9(); }
+  public static Utf8View GetRootAsUtf8View(ByteBuffer _bb) { return GetRootAsUtf8View(_bb, new Utf8View()); }
+  public static Utf8View GetRootAsUtf8View(ByteBuffer _bb, Utf8View obj) { return (obj.__assign(_bb.GetInt(_bb.Position) + _bb.Position, _bb)); }
+  public void __init(int _i, ByteBuffer _bb) { __p = new Table(_i, _bb); }
+  public Utf8View __assign(int _i, ByteBuffer _bb) { __init(_i, _bb); return this; }
+
+
+  public static void StartUtf8View(FlatBufferBuilder builder) { builder.StartTable(0); }
+  public static Offset<Utf8View> EndUtf8View(FlatBufferBuilder builder) {
+    int o = builder.EndTable();
+    return new Offset<Utf8View>(o);
+  }
+}
+
+
+static internal class Utf8ViewVerify
+{
+  static public bool Verify(Google.FlatBuffers.Verifier verifier, uint tablePos)
+  {
+    return verifier.VerifyTableStart(tablePos)
+      && verifier.VerifyTableEnd(tablePos);
+  }
+}
+
+}
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs
index d3115da52cc6c..eb7349a570786 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs
@@ -191,9 +191,7 @@ private List<IArrowArray> BuildArrays(
                 Field field = schema.GetFieldByIndex(schemaFieldIndex++);
                 Flatbuf.FieldNode fieldNode = recordBatchEnumerator.CurrentNode;
 
-                ArrayData arrayData = field.DataType.IsFixedPrimitive()
-                    ? LoadPrimitiveField(version, ref recordBatchEnumerator, field, in fieldNode, messageBuffer, bufferCreator)
-                    : LoadVariableField(version, ref recordBatchEnumerator, field, in fieldNode, messageBuffer, bufferCreator);
+                ArrayData arrayData = LoadField(version, ref recordBatchEnumerator, field, in fieldNode, messageBuffer, bufferCreator);
 
                 arrays.Add(ArrowArrayFactory.BuildArray(arrayData));
             } while (recordBatchEnumerator.MoveNextNode());
@@ -229,7 +227,7 @@ private IBufferCreator GetBufferCreator(BodyCompression? compression)
             return new DecompressingBufferCreator(decompressor, _allocator);
         }
 
-        private ArrayData LoadPrimitiveField(
+        private ArrayData LoadField(
             MetadataVersion version,
             ref RecordBatchEnumerator recordBatchEnumerator,
             Field field,
@@ -276,6 +274,16 @@ private ArrayData LoadPrimitiveField(
                 case ArrowTypeId.FixedSizeList:
                     buffers = 1;
                     break;
+                case ArrowTypeId.String:
+                case ArrowTypeId.Binary:
+                case ArrowTypeId.ListView:
+                    buffers = 3;
+                    break;
+                case ArrowTypeId.StringView:
+                case ArrowTypeId.BinaryView:
+                    buffers = checked((int)(2 + recordBatchEnumerator.CurrentVariadicCount));
+                    recordBatchEnumerator.MoveNextVariadicCount();
+                    break;
                 default:
                     buffers = 2;
                     break;
@@ -300,54 +308,6 @@ private ArrayData LoadPrimitiveField(
             return new ArrayData(field.DataType, fieldLength, fieldNullCount, 0, arrowBuff, children, dictionary?.Data);
         }
 
-        private ArrayData LoadVariableField(
-            MetadataVersion version,
-            ref RecordBatchEnumerator recordBatchEnumerator,
-            Field field,
-            in Flatbuf.FieldNode fieldNode,
-            ByteBuffer bodyData,
-            IBufferCreator bufferCreator)
-        {
-
-            ArrowBuffer nullArrowBuffer = BuildArrowBuffer(bodyData, recordBatchEnumerator.CurrentBuffer, bufferCreator);
-            if (!recordBatchEnumerator.MoveNextBuffer())
-            {
-                throw new Exception("Unable to move to the next buffer.");
-            }
-            ArrowBuffer offsetArrowBuffer = BuildArrowBuffer(bodyData, recordBatchEnumerator.CurrentBuffer, bufferCreator);
-            if (!recordBatchEnumerator.MoveNextBuffer())
-            {
-                throw new Exception("Unable to move to the next buffer.");
-            }
-            ArrowBuffer valueArrowBuffer = BuildArrowBuffer(bodyData, recordBatchEnumerator.CurrentBuffer, bufferCreator);
-            recordBatchEnumerator.MoveNextBuffer();
-
-            int fieldLength = (int)fieldNode.Length;
-            int fieldNullCount = (int)fieldNode.NullCount;
-
-            if (fieldLength < 0)
-            {
-                throw new InvalidDataException("Field length must be >= 0"); // TODO: Localize exception message
-            }
-
-            if (fieldNullCount < 0)
-            {
-                throw new InvalidDataException("Null count length must be >= 0"); //TODO: Localize exception message
-            }
-
-            ArrowBuffer[] arrowBuff = new[] { nullArrowBuffer, offsetArrowBuffer, valueArrowBuffer };
-            ArrayData[] children = GetChildren(version, ref recordBatchEnumerator, field, bodyData, bufferCreator);
-
-            IArrowArray dictionary = null;
-            if (field.DataType.TypeId == ArrowTypeId.Dictionary)
-            {
-                long id = DictionaryMemo.GetId(field);
-                dictionary = DictionaryMemo.GetDictionary(id);
-            }
-
-            return new ArrayData(field.DataType, fieldLength, fieldNullCount, 0, arrowBuff, children, dictionary?.Data);
-        }
-
         private ArrayData[] GetChildren(
             MetadataVersion version,
             ref RecordBatchEnumerator recordBatchEnumerator,
@@ -365,11 +325,7 @@ private ArrayData[] GetChildren(
                 Flatbuf.FieldNode childFieldNode = recordBatchEnumerator.CurrentNode;
 
                 Field childField = type.Fields[index];
-                ArrayData child = childField.DataType.IsFixedPrimitive()
-                    ? LoadPrimitiveField(version, ref recordBatchEnumerator, childField, in childFieldNode, bodyData, bufferCreator)
-                    : LoadVariableField(version, ref recordBatchEnumerator, childField, in childFieldNode, bodyData, bufferCreator);
-
-                children[index] = child;
+                children[index] = LoadField(version, ref recordBatchEnumerator, childField, in childFieldNode, bodyData, bufferCreator);
             }
             return children;
         }
@@ -394,11 +350,14 @@ internal struct RecordBatchEnumerator
         private Flatbuf.RecordBatch RecordBatch { get; }
         internal int CurrentBufferIndex { get; private set; }
         internal int CurrentNodeIndex { get; private set; }
+        internal int CurrentVariadicCountIndex { get; private set; }
 
         internal Flatbuf.Buffer CurrentBuffer => RecordBatch.Buffers(CurrentBufferIndex).GetValueOrDefault();
 
         internal Flatbuf.FieldNode CurrentNode => RecordBatch.Nodes(CurrentNodeIndex).GetValueOrDefault();
 
+        internal long CurrentVariadicCount => RecordBatch.VariadicBufferCounts(CurrentVariadicCountIndex);
+
         internal bool MoveNextBuffer()
         {
             return ++CurrentBufferIndex < RecordBatch.BuffersLength;
@@ -409,11 +368,17 @@ internal bool MoveNextNode()
             return ++CurrentNodeIndex < RecordBatch.NodesLength;
         }
 
+        internal bool MoveNextVariadicCount()
+        {
+            return ++CurrentVariadicCountIndex < RecordBatch.VariadicBufferCountsLength;
+        }
+
         internal RecordBatchEnumerator(in Flatbuf.RecordBatch recordBatch)
         {
             RecordBatch = recordBatch;
             CurrentBufferIndex = 0;
             CurrentNodeIndex = 0;
+            CurrentVariadicCountIndex = 0;
         }
     }
 }
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
index 5f490019b2133..07d1dcfdb171d 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
@@ -54,9 +54,12 @@ internal class ArrowRecordBatchFlatBufferBuilder :
             IArrowArrayVisitor<DayTimeIntervalArray>,
             IArrowArrayVisitor<MonthDayNanosecondIntervalArray>,
             IArrowArrayVisitor<ListArray>,
+            IArrowArrayVisitor<ListViewArray>,
             IArrowArrayVisitor<FixedSizeListArray>,
             IArrowArrayVisitor<StringArray>,
+            IArrowArrayVisitor<StringViewArray>,
             IArrowArrayVisitor<BinaryArray>,
+            IArrowArrayVisitor<BinaryViewArray>,
             IArrowArrayVisitor<FixedSizeBinaryArray>,
             IArrowArrayVisitor<StructArray>,
             IArrowArrayVisitor<UnionArray>,
@@ -81,6 +84,7 @@ public Buffer(ArrowBuffer buffer, int offset)
 
             public IReadOnlyList<Buffer> Buffers => _buffers;
 
+            public List<long> VariadicCounts { get; private set; } 
             public int TotalLength { get; private set; }
 
             public ArrowRecordBatchFlatBufferBuilder()
@@ -121,6 +125,15 @@ public void Visit(ListArray array)
                 array.Values.Accept(this);
             }
 
+            public void Visit(ListViewArray array)
+            {
+                _buffers.Add(CreateBuffer(array.NullBitmapBuffer));
+                _buffers.Add(CreateBuffer(array.ValueOffsetsBuffer));
+                _buffers.Add(CreateBuffer(array.SizesBuffer));
+
+                array.Values.Accept(this);
+            }
+
             public void Visit(FixedSizeListArray array)
             {
                 _buffers.Add(CreateBuffer(array.NullBitmapBuffer));
@@ -130,6 +143,8 @@ public void Visit(FixedSizeListArray array)
 
             public void Visit(StringArray array) => Visit(array as BinaryArray);
 
+            public void Visit(StringViewArray array) => Visit(array as BinaryViewArray);
+
             public void Visit(BinaryArray array)
             {
                 _buffers.Add(CreateBuffer(array.NullBitmapBuffer));
@@ -137,6 +152,18 @@ public void Visit(BinaryArray array)
                 _buffers.Add(CreateBuffer(array.ValueBuffer));
             }
 
+            public void Visit(BinaryViewArray array)
+            {
+                _buffers.Add(CreateBuffer(array.NullBitmapBuffer));
+                _buffers.Add(CreateBuffer(array.ViewsBuffer));
+                for (int i = 0; i < array.DataBufferCount; i++)
+                {
+                    _buffers.Add(CreateBuffer(array.DataBuffer(i)));
+                }
+                VariadicCounts = VariadicCounts ?? new List<long>();
+                VariadicCounts.Add(array.DataBufferCount);
+            }
+
             public void Visit(FixedSizeBinaryArray array)
             {
                 _buffers.Add(CreateBuffer(array.NullBitmapBuffer));
@@ -328,7 +355,7 @@ private protected void WriteRecordBatchInternal(RecordBatch recordBatch)
                 HasWrittenDictionaryBatch = true;
             }
 
-            (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, VectorOffset fieldNodesVectorOffset) =
+            (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, VectorOffset fieldNodesVectorOffset, VectorOffset variadicCountsOffset) =
                 PreparingWritingRecordBatch(recordBatch);
 
             VectorOffset buffersVectorOffset = Builder.EndVector();
@@ -339,7 +366,9 @@ private protected void WriteRecordBatchInternal(RecordBatch recordBatch)
 
             Offset<Flatbuf.RecordBatch> recordBatchOffset = Flatbuf.RecordBatch.CreateRecordBatch(Builder, recordBatch.Length,
                 fieldNodesVectorOffset,
-                buffersVectorOffset);
+                buffersVectorOffset,
+                default,
+                variadicCountsOffset);
 
             long metadataLength = WriteMessage(Flatbuf.MessageHeader.RecordBatch,
                 recordBatchOffset, recordBatchBuilder.TotalLength);
@@ -367,7 +396,7 @@ private protected async Task WriteRecordBatchInternalAsync(RecordBatch recordBat
                 HasWrittenDictionaryBatch = true;
             }
 
-            (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, VectorOffset fieldNodesVectorOffset) =
+            (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, VectorOffset fieldNodesVectorOffset, VectorOffset variadicCountsOffset) =
                 PreparingWritingRecordBatch(recordBatch);
 
             VectorOffset buffersVectorOffset = Builder.EndVector();
@@ -378,7 +407,9 @@ private protected async Task WriteRecordBatchInternalAsync(RecordBatch recordBat
 
             Offset<Flatbuf.RecordBatch> recordBatchOffset = Flatbuf.RecordBatch.CreateRecordBatch(Builder, recordBatch.Length,
                 fieldNodesVectorOffset,
-                buffersVectorOffset);
+                buffersVectorOffset,
+                default,
+                variadicCountsOffset);
 
             long metadataLength = await WriteMessageAsync(Flatbuf.MessageHeader.RecordBatch,
                 recordBatchOffset, recordBatchBuilder.TotalLength,
@@ -451,12 +482,12 @@ private async ValueTask<long> WriteBufferDataAsync(IReadOnlyList<ArrowRecordBatc
             return bodyLength + bodyPaddingLength;
         }
 
-        private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset> PreparingWritingRecordBatch(RecordBatch recordBatch)
+        private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset, VectorOffset> PreparingWritingRecordBatch(RecordBatch recordBatch)
         {
             return PreparingWritingRecordBatch(recordBatch.Schema.FieldsList, recordBatch.ArrayList);
         }
 
-        private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset> PreparingWritingRecordBatch(IReadOnlyList<Field> fields, IReadOnlyList<IArrowArray> arrays)
+        private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset, VectorOffset> PreparingWritingRecordBatch(IReadOnlyList<Field> fields, IReadOnlyList<IArrowArray> arrays)
         {
             Builder.Clear();
 
@@ -483,6 +514,12 @@ private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset> PreparingWritingR
                 fieldArray.Accept(recordBatchBuilder);
             }
 
+            VectorOffset variadicCountOffset = default;
+            if (recordBatchBuilder.VariadicCounts != null)
+            {
+                variadicCountOffset = Flatbuf.RecordBatch.CreateVariadicCountsVectorBlock(Builder, recordBatchBuilder.VariadicCounts.ToArray());
+            }
+
             IReadOnlyList<ArrowRecordBatchFlatBufferBuilder.Buffer> buffers = recordBatchBuilder.Buffers;
 
             Flatbuf.RecordBatch.StartBuffersVector(Builder, buffers.Count);
@@ -494,7 +531,7 @@ private Tuple<ArrowRecordBatchFlatBufferBuilder, VectorOffset> PreparingWritingR
                     buffers[i].Offset, buffers[i].DataBuffer.Length);
             }
 
-            return Tuple.Create(recordBatchBuilder, fieldNodesVectorOffset);
+            return Tuple.Create(recordBatchBuilder, fieldNodesVectorOffset, variadicCountOffset);
         }
 
         private protected virtual void StartingWritingDictionary()
@@ -561,7 +598,7 @@ private protected async Task WriteDictionaryAsync(long id, IArrowType valueType,
 
             var arrays = new List<IArrowArray> { dictionary };
 
-            (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, VectorOffset fieldNodesVectorOffset) =
+            (ArrowRecordBatchFlatBufferBuilder recordBatchBuilder, VectorOffset fieldNodesVectorOffset, VectorOffset variadicCountsOffset) =
                 PreparingWritingRecordBatch(fields, arrays);
 
             VectorOffset buffersVectorOffset = Builder.EndVector();
@@ -569,7 +606,9 @@ private protected async Task WriteDictionaryAsync(long id, IArrowType valueType,
             // Serialize record batch
             Offset<Flatbuf.RecordBatch> recordBatchOffset = Flatbuf.RecordBatch.CreateRecordBatch(Builder, dictionary.Length,
                 fieldNodesVectorOffset,
-                buffersVectorOffset);
+                buffersVectorOffset,
+                default,
+                variadicCountsOffset);
 
             // TODO: Support delta.
             Offset<Flatbuf.DictionaryBatch> dictionaryBatchOffset = Flatbuf.DictionaryBatch.CreateDictionaryBatch(Builder, id, recordBatchOffset, false);
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
index 84ff4f9cc7202..473e18968f8cb 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
@@ -50,9 +50,13 @@ class TypeVisitor :
             IArrowTypeVisitor<UInt16Type>,
             IArrowTypeVisitor<UInt32Type>,
             IArrowTypeVisitor<UInt64Type>,
+#if NET5_0_OR_GREATER
+            IArrowTypeVisitor<HalfFloatType>,
+#endif
             IArrowTypeVisitor<FloatType>,
             IArrowTypeVisitor<DoubleType>,
             IArrowTypeVisitor<StringType>,
+            IArrowTypeVisitor<StringViewType>,
             IArrowTypeVisitor<Date32Type>,
             IArrowTypeVisitor<Date64Type>,
             IArrowTypeVisitor<Time32Type>,
@@ -60,8 +64,10 @@ class TypeVisitor :
             IArrowTypeVisitor<DurationType>,
             IArrowTypeVisitor<IntervalType>,
             IArrowTypeVisitor<BinaryType>,
+            IArrowTypeVisitor<BinaryViewType>,
             IArrowTypeVisitor<TimestampType>,
             IArrowTypeVisitor<ListType>,
+            IArrowTypeVisitor<ListViewType>,
             IArrowTypeVisitor<FixedSizeListType>,
             IArrowTypeVisitor<UnionType>,
             IArrowTypeVisitor<StructType>,
@@ -106,6 +112,14 @@ public void Visit(BinaryType type)
                     Flatbuf.Binary.EndBinary(Builder));
             }
 
+            public void Visit(BinaryViewType type)
+            {
+                Flatbuf.BinaryView.StartBinaryView(Builder);
+                Offset<BinaryView> offset = Flatbuf.BinaryView.EndBinaryView(Builder);
+                Result = FieldType.Build(
+                    Flatbuf.Type.BinaryView, offset);
+            }
+
             public void Visit(ListType type)
             {
                 Flatbuf.List.StartList(Builder);
@@ -114,6 +128,14 @@ public void Visit(ListType type)
                     Flatbuf.List.EndList(Builder));
             }
 
+            public void Visit(ListViewType type)
+            {
+                Flatbuf.ListView.StartListView(Builder);
+                Result = FieldType.Build(
+                    Flatbuf.Type.ListView,
+                    Flatbuf.ListView.EndListView(Builder));
+            }
+
             public void Visit(FixedSizeListType type)
             {
                 Result = FieldType.Build(
@@ -136,6 +158,14 @@ public void Visit(StringType type)
                     Flatbuf.Type.Utf8, offset);
             }
 
+            public void Visit(StringViewType type)
+            {
+                Flatbuf.Utf8View.StartUtf8View(Builder);
+                Offset<Utf8View> offset = Flatbuf.Utf8View.EndUtf8View(Builder);
+                Result = FieldType.Build(
+                    Flatbuf.Type.Utf8View, offset);
+            }
+
             public void Visit(TimestampType type)
             {
                 StringOffset timezoneStringOffset = default;
@@ -169,6 +199,15 @@ public void Visit(Time32Type type)
                     Flatbuf.Time.CreateTime(Builder, ToFlatBuffer(type.Unit)));
             }
 
+#if NET5_0_OR_GREATER
+            public void Visit(HalfFloatType type)
+            {
+                Result = FieldType.Build(
+                    Flatbuf.Type.FloatingPoint,
+                    Flatbuf.FloatingPoint.CreateFloatingPoint(Builder, Precision.HALF));
+            }
+#endif
+
             public void Visit(FloatType type)
             {
                 Result = FieldType.Build(
diff --git a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
index 633554fc53261..0e6f330aef091 100644
--- a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
+++ b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
@@ -184,17 +184,27 @@ private static Types.IArrowType GetFieldArrowType(Flatbuf.Field field, Field[] c
                     return Types.IntervalType.FromIntervalUnit(intervalMetadata.Unit.ToArrow());
                 case Flatbuf.Type.Utf8:
                     return Types.StringType.Default;
+                case Flatbuf.Type.Utf8View:
+                    return Types.StringViewType.Default;
                 case Flatbuf.Type.FixedSizeBinary:
                     Flatbuf.FixedSizeBinary fixedSizeBinaryMetadata = field.Type<Flatbuf.FixedSizeBinary>().Value;
                     return new Types.FixedSizeBinaryType(fixedSizeBinaryMetadata.ByteWidth);
                 case Flatbuf.Type.Binary:
                     return Types.BinaryType.Default;
+                case Flatbuf.Type.BinaryView:
+                    return Types.BinaryViewType.Default;
                 case Flatbuf.Type.List:
                     if (childFields == null || childFields.Length != 1)
                     {
                         throw new InvalidDataException($"List type must have exactly one child.");
                     }
                     return new Types.ListType(childFields[0]);
+                case Flatbuf.Type.ListView:
+                    if (childFields == null || childFields.Length != 1)
+                    {
+                        throw new InvalidDataException($"List view type must have exactly one child.");
+                    }
+                    return new Types.ListViewType(childFields[0]);
                 case Flatbuf.Type.FixedSizeList:
                     if (childFields == null || childFields.Length != 1)
                     {
diff --git a/csharp/src/Apache.Arrow/Scalars/BinaryView.cs b/csharp/src/Apache.Arrow/Scalars/BinaryView.cs
new file mode 100644
index 0000000000000..eaba89c7a3a8e
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Scalars/BinaryView.cs
@@ -0,0 +1,111 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+namespace Apache.Arrow.Scalars
+{
+    [StructLayout(LayoutKind.Explicit)]
+    public unsafe struct BinaryView : IEquatable<BinaryView>
+    {
+        public const int PrefixLength = 4;
+        public const int MaxInlineLength = 12;
+
+        [FieldOffset(0)]
+        public readonly int Length;
+
+        [FieldOffset(4)]
+        internal readonly int _prefix;
+
+        [FieldOffset(8)]
+        internal readonly int _bufferIndex;
+
+        [FieldOffset(12)]
+        internal readonly int _bufferOffset;
+
+        [FieldOffset(4)]
+        internal fixed byte _inline[MaxInlineLength];
+
+        public unsafe BinaryView(ReadOnlySpan<byte> inline) : this()
+        {
+            if (inline.Length > MaxInlineLength)
+            {
+                throw new ArgumentException("invalid inline data length", nameof(inline));
+            }
+
+            Length = inline.Length;
+            fixed (byte* dest = _inline)
+            fixed (byte* src = inline)
+            {
+                Buffer.MemoryCopy(src, dest, MaxInlineLength, inline.Length);
+            }
+        }
+
+        public BinaryView(int length, ReadOnlySpan<byte> prefix, int bufferIndex, int bufferOffset)
+        {
+            if (length < MaxInlineLength)
+            {
+                throw new ArgumentException("invalid length", nameof(length));
+            }
+            if (prefix.Length != PrefixLength)
+            {
+                throw new ArgumentException("invalid prefix length", nameof(prefix));
+            }
+
+            Length = length;
+            _bufferIndex = bufferIndex;
+            _bufferOffset = bufferOffset;
+            _prefix = prefix.CastTo<int>()[0];
+        }
+
+        private BinaryView(int length, int prefix, int bufferIndex, int offset)
+        {
+            Length = length;
+            _prefix = prefix;
+            _bufferIndex = bufferIndex;
+            _bufferOffset = offset;
+        }
+
+        public bool IsInline => Length <= MaxInlineLength;
+
+#if NET5_0_OR_GREATER
+        public ReadOnlySpan<byte> Bytes => MemoryMarshal.CreateReadOnlySpan<byte>(ref Unsafe.AsRef(_inline[0]), IsInline ? Length : PrefixLength);
+#else
+        public unsafe ReadOnlySpan<byte> Bytes => new ReadOnlySpan<byte>(Unsafe.AsPointer(ref _inline[0]), IsInline ? Length : PrefixLength);
+#endif
+
+        public int BufferIndex => IsInline ? -1 : _bufferIndex;
+
+        public int BufferOffset => IsInline ? -1 : _bufferOffset;
+
+        public override int GetHashCode() => Length ^ _prefix ^ _bufferIndex ^ _bufferOffset;
+
+        public override bool Equals(object obj)
+        {
+            BinaryView? other = obj as BinaryView?;
+            return other != null && Equals(other.Value);
+        }
+
+        public bool Equals(BinaryView other) =>
+            Length == other.Length && _prefix == other._prefix && _bufferIndex == other._bufferIndex && _bufferOffset == other._bufferOffset;
+
+        internal BinaryView AdjustBufferIndex(int bufferOffset)
+        {
+            return new BinaryView(Length, _prefix, _bufferIndex + bufferOffset, _bufferOffset);
+        }
+    }
+}
diff --git a/csharp/src/Apache.Arrow/Types/BinaryViewType.cs b/csharp/src/Apache.Arrow/Types/BinaryViewType.cs
new file mode 100644
index 0000000000000..f5cfc034dc967
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Types/BinaryViewType.cs
@@ -0,0 +1,28 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+namespace Apache.Arrow.Types
+{
+    public class BinaryViewType: ArrowType
+    {
+        public static readonly BinaryViewType Default = new BinaryViewType();
+
+        public override ArrowTypeId TypeId => ArrowTypeId.BinaryView;
+        public override string Name => "binaryview";
+
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
+    }
+}
diff --git a/csharp/src/Apache.Arrow/Types/IArrowType.cs b/csharp/src/Apache.Arrow/Types/IArrowType.cs
index 5e107813be828..cf520391fe1e6 100644
--- a/csharp/src/Apache.Arrow/Types/IArrowType.cs
+++ b/csharp/src/Apache.Arrow/Types/IArrowType.cs
@@ -50,6 +50,9 @@ public enum ArrowTypeId
         FixedSizeList,
         Duration,
         RecordBatch,
+        BinaryView,
+        StringView,
+        ListView,
     }
 
     public interface IArrowType
diff --git a/csharp/src/Apache.Arrow/Types/ListViewType.cs b/csharp/src/Apache.Arrow/Types/ListViewType.cs
new file mode 100644
index 0000000000000..ecf745723c4ae
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Types/ListViewType.cs
@@ -0,0 +1,35 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+namespace Apache.Arrow.Types
+{
+    public sealed class ListViewType : NestedType
+    {
+        public override ArrowTypeId TypeId => ArrowTypeId.ListView;
+        public override string Name => "listview";
+
+        public Field ValueField => Fields[0];
+
+        public IArrowType ValueDataType => Fields[0].DataType;
+
+        public ListViewType(Field valueField)
+           : base(valueField) { }
+
+        public ListViewType(IArrowType valueDataType)
+            : this(new Field("item", valueDataType, true)) { }
+
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
+    }
+}
diff --git a/csharp/src/Apache.Arrow/Types/StringViewType.cs b/csharp/src/Apache.Arrow/Types/StringViewType.cs
new file mode 100644
index 0000000000000..0c539a56b03b5
--- /dev/null
+++ b/csharp/src/Apache.Arrow/Types/StringViewType.cs
@@ -0,0 +1,28 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+namespace Apache.Arrow.Types
+{
+    public sealed class StringViewType : ArrowType
+    {
+        public static StringViewType Default = new StringViewType();
+
+        public override ArrowTypeId TypeId => ArrowTypeId.StringView;
+        public override string Name => "utf8view";
+
+        public override void Accept(IArrowTypeVisitor visitor) => Accept(this, visitor);
+    }
+}
diff --git a/csharp/test/Apache.Arrow.Benchmarks/ArrowWriterBenchmark.cs b/csharp/test/Apache.Arrow.Benchmarks/ArrowWriterBenchmark.cs
index c791c9969356a..f35c2a5d78d79 100644
--- a/csharp/test/Apache.Arrow.Benchmarks/ArrowWriterBenchmark.cs
+++ b/csharp/test/Apache.Arrow.Benchmarks/ArrowWriterBenchmark.cs
@@ -38,7 +38,7 @@ public class ArrowWriterBenchmark
         [GlobalSetup]
         public void GlobalSetup()
         {
-            _batch = TestData.CreateSampleRecordBatch(BatchLength, ColumnSetCount, false);
+            _batch = TestData.CreateSampleRecordBatch(BatchLength, ColumnSetCount);
             _memoryStream = new MemoryStream();
         }
 
diff --git a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
index 94ef4b5f3c5f5..dd2c75dd3df90 100644
--- a/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Compression.Tests/Apache.Arrow.Compression.Tests.csproj
@@ -8,8 +8,8 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
-    <PackageReference Include="xunit" Version="2.6.3" />
-    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.5" />
+    <PackageReference Include="xunit" Version="2.6.4" />
+    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
index 46d0a59b5d8e1..0e9c02d61977c 100644
--- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/Apache.Arrow.Flight.Sql.Tests.csproj
@@ -7,8 +7,8 @@
 
     <ItemGroup>
       <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
-      <PackageReference Include="xunit" Version="2.6.3" />
-      <PackageReference Include="xunit.runner.visualstudio" Version="2.5.5" />
+      <PackageReference Include="xunit" Version="2.6.4" />
+      <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6" />
       <PackageReference Include="coverlet.collector" Version="6.0.0" />
     </ItemGroup>
 
diff --git a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
index 99c772770d6c6..d38413ba45b3a 100644
--- a/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Flight.Tests/Apache.Arrow.Flight.Tests.csproj
@@ -7,8 +7,8 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
-    <PackageReference Include="xunit" Version="2.6.3" />
-    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.5" />
+    <PackageReference Include="xunit" Version="2.6.4" />
+    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6" />
     <PackageReference Include="coverlet.collector" Version="6.0.0" />
   </ItemGroup>
 
diff --git a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
index f3fe73588a7bb..31a5676f01315 100644
--- a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
+++ b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
@@ -21,6 +21,7 @@
 using System.Numerics;
 using System.Text;
 using System.Text.Json;
+using System.Text.Json.Nodes;
 using System.Text.Json.Serialization;
 using System.Threading.Tasks;
 using Apache.Arrow.Arrays;
@@ -175,7 +176,9 @@ private static IArrowType ToArrowType(JsonArrowType type, Field[] children)
                 "floatingpoint" => ToFloatingPointArrowType(type),
                 "decimal" => ToDecimalArrowType(type),
                 "binary" => BinaryType.Default,
+                "binaryview" => BinaryViewType.Default,
                 "utf8" => StringType.Default,
+                "utf8view" => StringViewType.Default,
                 "fixedsizebinary" => new FixedSizeBinaryType(type.ByteWidth),
                 "date" => ToDateArrowType(type),
                 "time" => ToTimeArrowType(type),
@@ -184,6 +187,7 @@ private static IArrowType ToArrowType(JsonArrowType type, Field[] children)
                 "interval_mdn" => ToIntervalArrowType(type),
                 "timestamp" => ToTimestampArrowType(type),
                 "list" => ToListArrowType(type, children),
+                "listview" => ToListViewArrowType(type, children),
                 "fixedsizelist" => ToFixedSizeListArrowType(type, children),
                 "struct" => ToStructArrowType(type, children),
                 "union" => ToUnionArrowType(type, children),
@@ -294,6 +298,11 @@ private static IArrowType ToListArrowType(JsonArrowType type, Field[] children)
             return new ListType(children[0]);
         }
 
+        private static IArrowType ToListViewArrowType(JsonArrowType type, Field[] children)
+        {
+            return new ListViewType(children[0]);
+        }
+
         private static IArrowType ToFixedSizeListArrowType(JsonArrowType type, Field[] children)
         {
             return new FixedSizeListType(children[0], type.ListSize);
@@ -451,9 +460,12 @@ private class ArrayCreator :
             IArrowTypeVisitor<IntervalType>,
             IArrowTypeVisitor<TimestampType>,
             IArrowTypeVisitor<StringType>,
+            IArrowTypeVisitor<StringViewType>,
             IArrowTypeVisitor<BinaryType>,
+            IArrowTypeVisitor<BinaryViewType>,
             IArrowTypeVisitor<FixedSizeBinaryType>,
             IArrowTypeVisitor<ListType>,
+            IArrowTypeVisitor<ListViewType>,
             IArrowTypeVisitor<FixedSizeListType>,
             IArrowTypeVisitor<StructType>,
             IArrowTypeVisitor<UnionType>,
@@ -652,6 +664,38 @@ public void Visit(StringType type)
                 Array = new StringArray(JsonFieldData.Count, offsetBuffer, valueBuffer, validityBuffer, nullCount);
             }
 
+            public void Visit(StringViewType type)
+            {
+                ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount);
+
+                // ArrowBuffer viewsBuffer = GetViewsBuffer();
+                ArrowBuffer viewsBuffer = ArrowBuffer.Empty;
+                if (JsonFieldData.Views != null)
+                {
+                    ArrowBuffer.Builder<BinaryView> viewBuilder = new ArrowBuffer.Builder<BinaryView>(JsonFieldData.Views.Count);
+                    foreach (JsonView jsonView in JsonFieldData.Views)
+                    {
+                        BinaryView view = (jsonView.BufferIndex == null) ?
+                            new BinaryView(Encoding.UTF8.GetBytes(jsonView.Inlined)) :
+                            new BinaryView(jsonView.Size, Convert.FromHexString(jsonView.PrefixHex), jsonView.BufferIndex.Value, jsonView.Offset.Value);
+                        viewBuilder.Append(view);
+                    }
+                    viewsBuffer = viewBuilder.Build();
+                }
+
+                int bufferCount = JsonFieldData.VariadicDataBuffers?.Count ?? 0;
+                ArrowBuffer[] buffers = new ArrowBuffer[2 + bufferCount];
+                buffers[0] = validityBuffer;
+                buffers[1] = viewsBuffer;
+                for (int i = 0; i < bufferCount; i++)
+                {
+                    buffers[i + 2] = new ArrowBuffer(Convert.FromHexString(JsonFieldData.VariadicDataBuffers[i])).Clone();
+                }
+
+                ArrayData arrayData = new ArrayData(type, JsonFieldData.Count, nullCount, 0, buffers);
+                Array = new StringViewArray(arrayData);
+            }
+
             public void Visit(BinaryType type)
             {
                 ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount);
@@ -671,6 +715,38 @@ public void Visit(BinaryType type)
                 Array = new BinaryArray(arrayData);
             }
 
+            public void Visit(BinaryViewType type)
+            {
+                ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount);
+
+                // ArrowBuffer viewsBuffer = GetViewsBuffer();
+                ArrowBuffer viewsBuffer = ArrowBuffer.Empty;
+                if (JsonFieldData.Views != null)
+                {
+                    ArrowBuffer.Builder<BinaryView> viewBuilder = new ArrowBuffer.Builder<BinaryView>(JsonFieldData.Views.Count);
+                    foreach (JsonView jsonView in JsonFieldData.Views)
+                    {
+                        BinaryView view = (jsonView.BufferIndex == null) ?
+                            new BinaryView(Convert.FromHexString(jsonView.Inlined)) :
+                            new BinaryView(jsonView.Size, Convert.FromHexString(jsonView.PrefixHex), jsonView.BufferIndex.Value, jsonView.Offset.Value);
+                        viewBuilder.Append(view);
+                    }
+                    viewsBuffer = viewBuilder.Build();
+                }
+
+                int bufferCount = JsonFieldData.VariadicDataBuffers?.Count ?? 0;
+                ArrowBuffer[] buffers = new ArrowBuffer[2 + bufferCount];
+                buffers[0] = validityBuffer;
+                buffers[1] = viewsBuffer;
+                for (int i = 0; i < bufferCount; i++)
+                {
+                    buffers[i + 2] = new ArrowBuffer(Convert.FromHexString(JsonFieldData.VariadicDataBuffers[i])).Clone();
+                }
+
+                ArrayData arrayData = new ArrayData(type, JsonFieldData.Count, nullCount, 0, buffers);
+                Array = new BinaryViewArray(arrayData);
+            }
+
             public void Visit(FixedSizeBinaryType type)
             {
                 ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount);
@@ -704,6 +780,22 @@ public void Visit(ListType type)
                 Array = new ListArray(arrayData);
             }
 
+            public void Visit(ListViewType type)
+            {
+                ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount);
+                ArrowBuffer offsetBuffer = GetOffsetBuffer();
+                ArrowBuffer sizeBuffer = GetSizeBuffer();
+
+                var data = JsonFieldData;
+                JsonFieldData = data.Children[0];
+                type.ValueDataType.Accept(this);
+                JsonFieldData = data;
+
+                ArrayData arrayData = new ArrayData(type, JsonFieldData.Count, nullCount, 0,
+                    new[] { validityBuffer, offsetBuffer, sizeBuffer }, new[] { Array.Data });
+                Array = new ListViewArray(arrayData);
+            }
+
             public void Visit(FixedSizeListType type)
             {
                 ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount);
@@ -878,11 +970,18 @@ private void GenerateArray<T, TArray>(Func<ArrowBuffer, ArrowBuffer, int, int, i
 
             private ArrowBuffer GetOffsetBuffer()
             {
-                ArrowBuffer.Builder<int> valueOffsets = new ArrowBuffer.Builder<int>(JsonFieldData.Offset.Length);
-                valueOffsets.AppendRange(JsonFieldData.Offset);
+                ArrowBuffer.Builder<int> valueOffsets = new ArrowBuffer.Builder<int>(JsonFieldData.Offset.Count);
+                valueOffsets.AppendRange(JsonFieldData.IntOffset);
                 return valueOffsets.Build(default);
             }
 
+            private ArrowBuffer GetSizeBuffer()
+            {
+                ArrowBuffer.Builder<int> valueSizes = new ArrowBuffer.Builder<int>(JsonFieldData.Size.Count);
+                valueSizes.AppendRange(JsonFieldData.IntSize);
+                return valueSizes.Build(default);
+            }
+
             private ArrowBuffer GetTypeIdBuffer()
             {
                 ArrowBuffer.Builder<byte> typeIds = new ArrowBuffer.Builder<byte>(JsonFieldData.TypeId.Length);
@@ -920,10 +1019,61 @@ public class JsonFieldData
         public string Name { get; set; }
         public int Count { get; set; }
         public bool[] Validity { get; set; }
-        public int[] Offset { get; set; }
+        public JsonArray Offset { get; set; }
+
+        [JsonPropertyName("SIZE")]
+        public JsonArray Size { get; set; }
         public int[] TypeId { get; set; }
         public JsonElement Data { get; set; }
         public List<JsonFieldData> Children { get; set; }
+
+        [JsonPropertyName("VIEWS")]
+        public List<JsonView> Views { get; set; }
+
+        [JsonPropertyName("VARIADIC_DATA_BUFFERS")]
+        public List<string> VariadicDataBuffers { get; set; }
+
+        [JsonIgnore]
+        public IEnumerable<int> IntOffset
+        {
+            get { return Offset.Select(GetInt); }
+        }
+
+        [JsonIgnore]
+        public IEnumerable<int> IntSize
+        {
+            get { return Size.Select(GetInt); }
+        }
+
+        static int GetInt(JsonNode node)
+        {
+            try
+            {
+                return node.GetValue<int>();
+            }
+            catch
+            {
+                return int.Parse(node.GetValue<string>());
+            }
+        }
+    }
+
+    public class JsonView
+    {
+        [JsonPropertyName("SIZE")]
+        public int Size { get; set; }
+
+        [JsonPropertyName("INLINED")]
+        public string Inlined { get; set; }
+
+        [JsonPropertyName("PREFIX_HEX")]
+        public string PrefixHex { get; set; }
+
+        [JsonPropertyName("BUFFER_INDEX")]
+        public int? BufferIndex { get; set; }
+
+        [JsonPropertyName("OFFSET")]
+        public int? Offset { get; set; }
     }
 
     internal sealed class ValidityConverter : JsonConverter<bool>
diff --git a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
index fde30a90e6479..0afd1490e7b69 100644
--- a/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
+++ b/csharp/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
@@ -15,8 +15,8 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.8.0" />
-    <PackageReference Include="xunit" Version="2.6.3" />
-    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.5">
+    <PackageReference Include="xunit" Version="2.6.4" />
+    <PackageReference Include="xunit.runner.visualstudio" Version="2.5.6">
       <PrivateAssets>all</PrivateAssets>
       <IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>
     </PackageReference>
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
index 137dc16d473a4..25ef289f0dc25 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
@@ -64,13 +64,16 @@ private static IEnumerable<Tuple<List<IArrowArray>, IArrowArray>> GenerateTestDa
                     FloatType.Default,
                     DoubleType.Default,
                     BinaryType.Default,
+                    BinaryViewType.Default,
                     StringType.Default,
+                    StringViewType.Default,
                     Date32Type.Default,
                     Date64Type.Default,
                     TimestampType.Default,
                     new Decimal128Type(14, 10),
                     new Decimal256Type(14,10),
                     new ListType(Int64Type.Default),
+                    new ListViewType(Int64Type.Default),
                     new StructType(new List<Field>{
                         new Field.Builder().Name("Strings").DataType(StringType.Default).Nullable(true).Build(),
                         new Field.Builder().Name("Ints").DataType(Int32Type.Default).Nullable(true).Build()
@@ -122,7 +125,9 @@ private class TestDataGenerator :
             IArrowTypeVisitor<FloatType>,
             IArrowTypeVisitor<DoubleType>,
             IArrowTypeVisitor<BinaryType>,
+            IArrowTypeVisitor<BinaryViewType>,
             IArrowTypeVisitor<StringType>,
+            IArrowTypeVisitor<StringViewType>,
             IArrowTypeVisitor<Decimal128Type>,
             IArrowTypeVisitor<Decimal256Type>,
             IArrowTypeVisitor<Date32Type>,
@@ -131,6 +136,7 @@ private class TestDataGenerator :
             IArrowTypeVisitor<IntervalType>,
             IArrowTypeVisitor<TimestampType>,
             IArrowTypeVisitor<ListType>,
+            IArrowTypeVisitor<ListViewType>,
             IArrowTypeVisitor<FixedSizeListType>,
             IArrowTypeVisitor<StructType>,
             IArrowTypeVisitor<UnionType>,
@@ -368,6 +374,34 @@ public void Visit(BinaryType type)
                 ExpectedArray = resultBuilder.Build();
             }
 
+            public void Visit(BinaryViewType type)
+            {
+                BinaryViewArray.Builder resultBuilder = new BinaryViewArray.Builder().Reserve(_baseDataTotalElementCount);
+
+                for (int i = 0; i < _baseDataListCount; i++)
+                {
+                    List<int?> dataList = _baseData[i];
+                    BinaryViewArray.Builder builder = new BinaryViewArray.Builder().Reserve(dataList.Count);
+
+                    foreach (byte? value in dataList)
+                    {
+                        if (value.HasValue)
+                        {
+                            builder.Append(value.Value);
+                            resultBuilder.Append(value.Value);
+                        }
+                        else
+                        {
+                            builder.AppendNull();
+                            resultBuilder.AppendNull();
+                        }
+                    }
+                    TestTargetArrayList.Add(builder.Build());
+                }
+
+                ExpectedArray = resultBuilder.Build();
+            }
+
             public void Visit(StringType type)
             {
                 StringArray.Builder resultBuilder = new StringArray.Builder().Reserve(_baseDataTotalElementCount);
@@ -388,6 +422,26 @@ public void Visit(StringType type)
                 ExpectedArray = resultBuilder.Build();
             }
 
+            public void Visit(StringViewType type)
+            {
+                StringViewArray.Builder resultBuilder = new StringViewArray.Builder().Reserve(_baseDataTotalElementCount);
+
+                for (int i = 0; i < _baseDataListCount; i++)
+                {
+                    List<int?> dataList = _baseData[i];
+                    StringViewArray.Builder builder = new StringViewArray.Builder().Reserve(dataList.Count);
+
+                    foreach (string value in dataList.Select(_ => _.ToString() ?? null))
+                    {
+                        builder.Append(value);
+                        resultBuilder.Append(value);
+                    }
+                    TestTargetArrayList.Add(builder.Build());
+                }
+
+                ExpectedArray = resultBuilder.Build();
+            }
+
             public void Visit(ListType type)
             {
                 ListArray.Builder resultBuilder = new ListArray.Builder(type.ValueDataType).Reserve(_baseDataTotalElementCount);
@@ -423,6 +477,41 @@ public void Visit(ListType type)
                 ExpectedArray = resultBuilder.Build();
             }
 
+            public void Visit(ListViewType type)
+            {
+                ListViewArray.Builder resultBuilder = new ListViewArray.Builder(type.ValueDataType).Reserve(_baseDataTotalElementCount);
+                Int64Array.Builder resultValueBuilder = (Int64Array.Builder)resultBuilder.ValueBuilder.Reserve(_baseDataTotalElementCount);
+
+                for (int i = 0; i < _baseDataListCount; i++)
+                {
+                    List<int?> dataList = _baseData[i];
+
+                    ListViewArray.Builder builder = new ListViewArray.Builder(type.ValueField).Reserve(dataList.Count);
+                    Int64Array.Builder valueBuilder = (Int64Array.Builder)builder.ValueBuilder.Reserve(dataList.Count);
+
+                    foreach (long? value in dataList)
+                    {
+                        if (value.HasValue)
+                        {
+                            builder.Append();
+                            resultBuilder.Append();
+
+                            valueBuilder.Append(value.Value);
+                            resultValueBuilder.Append(value.Value);
+                        }
+                        else
+                        {
+                            builder.AppendNull();
+                            resultBuilder.AppendNull();
+                        }
+                    }
+
+                    TestTargetArrayList.Add(builder.Build());
+                }
+
+                ExpectedArray = resultBuilder.Build();
+            }
+
             public void Visit(FixedSizeListType type)
             {
                 FixedSizeListArray.Builder resultBuilder = new FixedSizeListArray.Builder(type.ValueDataType, type.ListSize).Reserve(_baseDataTotalElementCount);
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
index 2aaffe7835258..10315ff287c0b 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
@@ -20,6 +20,7 @@
 using System.Threading.Tasks;
 using Apache.Arrow.Arrays;
 using Xunit;
+using System.Diagnostics;
 
 namespace Apache.Arrow.Tests
 {
@@ -90,10 +91,13 @@ private class ArrayComparer :
             IArrowArrayVisitor<DayTimeIntervalArray>,
             IArrowArrayVisitor<MonthDayNanosecondIntervalArray>,
             IArrowArrayVisitor<ListArray>,
+            IArrowArrayVisitor<ListViewArray>,
             IArrowArrayVisitor<FixedSizeListArray>,
             IArrowArrayVisitor<StringArray>,
+            IArrowArrayVisitor<StringViewArray>,
             IArrowArrayVisitor<FixedSizeBinaryArray>,
             IArrowArrayVisitor<BinaryArray>,
+            IArrowArrayVisitor<BinaryViewArray>,
             IArrowArrayVisitor<StructArray>,
             IArrowArrayVisitor<UnionArray>,
             IArrowArrayVisitor<Decimal128Array>,
@@ -136,12 +140,15 @@ public ArrayComparer(IArrowArray expectedArray, bool strictCompare)
             public void Visit(DayTimeIntervalArray array) => CompareArrays(array);
             public void Visit(MonthDayNanosecondIntervalArray array) => CompareArrays(array);
             public void Visit(ListArray array) => CompareArrays(array);
+            public void Visit(ListViewArray array) => CompareArrays(array);
             public void Visit(FixedSizeListArray array) => CompareArrays(array);
             public void Visit(FixedSizeBinaryArray array) => CompareArrays(array);
             public void Visit(Decimal128Array array) => CompareArrays(array);
             public void Visit(Decimal256Array array) => CompareArrays(array);
             public void Visit(StringArray array) => CompareBinaryArrays<StringArray>(array);
+            public void Visit(StringViewArray array) => CompareVariadicArrays<StringViewArray>(array);
             public void Visit(BinaryArray array) => CompareBinaryArrays<BinaryArray>(array);
+            public void Visit(BinaryViewArray array) => CompareVariadicArrays<BinaryViewArray>(array);
 
             public void Visit(StructArray array)
             {
@@ -230,6 +237,32 @@ private void CompareBinaryArrays<T>(BinaryArray actualArray)
                 }
             }
 
+            private void CompareVariadicArrays<T>(BinaryViewArray actualArray)
+                where T : IArrowArray
+            {
+                Assert.IsAssignableFrom<T>(_expectedArray);
+                Assert.IsAssignableFrom<T>(actualArray);
+
+                var expectedArray = (BinaryViewArray)_expectedArray;
+
+                actualArray.Data.DataType.Accept(_arrayTypeComparer);
+
+                Assert.Equal(expectedArray.Length, actualArray.Length);
+                Assert.Equal(expectedArray.NullCount, actualArray.NullCount);
+                Assert.Equal(expectedArray.Offset, actualArray.Offset);
+
+                CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, actualArray.NullBitmapBuffer);
+
+                Assert.True(expectedArray.Views.SequenceEqual(actualArray.Views));
+
+                for (int i = 0; i < expectedArray.Length; i++)
+                {
+                    Assert.True(
+                        expectedArray.GetBytes(i).SequenceEqual(actualArray.GetBytes(i)),
+                        $"BinaryArray values do not match at index {i}.");
+                }
+            }
+
             private void CompareArrays(FixedSizeBinaryArray actualArray)
             {
                 Assert.IsAssignableFrom<FixedSizeBinaryArray>(_expectedArray);
@@ -346,6 +379,34 @@ private void CompareArrays(ListArray actualArray)
                 actualArray.Values.Accept(new ArrayComparer(expectedArray.Values, _strictCompare));
             }
 
+            private void CompareArrays(ListViewArray actualArray)
+            {
+                Assert.IsAssignableFrom<ListViewArray>(_expectedArray);
+                ListViewArray expectedArray = (ListViewArray)_expectedArray;
+
+                actualArray.Data.DataType.Accept(_arrayTypeComparer);
+
+                Assert.Equal(expectedArray.Length, actualArray.Length);
+                Assert.Equal(expectedArray.NullCount, actualArray.NullCount);
+                Assert.Equal(expectedArray.Offset, actualArray.Offset);
+
+                CompareValidityBuffer(expectedArray.NullCount, _expectedArray.Length, expectedArray.NullBitmapBuffer, actualArray.NullBitmapBuffer);
+
+                if (_strictCompare)
+                {
+                    Assert.True(expectedArray.ValueOffsetsBuffer.Span.SequenceEqual(actualArray.ValueOffsetsBuffer.Span));
+                    Assert.True(expectedArray.SizesBuffer.Span.SequenceEqual(actualArray.SizesBuffer.Span));
+                }
+                else
+                {
+                    int length = expectedArray.Length * sizeof(int);
+                    Assert.True(expectedArray.ValueOffsetsBuffer.Span.Slice(0, length).SequenceEqual(actualArray.ValueOffsetsBuffer.Span.Slice(0, length)));
+                    Assert.True(expectedArray.SizesBuffer.Span.Slice(0, length).SequenceEqual(actualArray.SizesBuffer.Span.Slice(0, length)));
+                }
+
+                actualArray.Values.Accept(new ArrayComparer(expectedArray.Values, _strictCompare));
+            }
+
             private void CompareArrays(FixedSizeListArray actualArray)
             {
                 Assert.IsAssignableFrom<FixedSizeListArray>(_expectedArray);
diff --git a/csharp/test/Apache.Arrow.Tests/BinaryViewTests.cs b/csharp/test/Apache.Arrow.Tests/BinaryViewTests.cs
new file mode 100644
index 0000000000000..eb617b4dedc75
--- /dev/null
+++ b/csharp/test/Apache.Arrow.Tests/BinaryViewTests.cs
@@ -0,0 +1,89 @@
+﻿// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using Apache.Arrow.Scalars;
+using Xunit;
+
+namespace Apache.Arrow.Tests
+{
+    public class BinaryViewTests
+    {
+        private static readonly byte[] empty = new byte[0];
+        private static readonly byte[] oneByte = new byte[1];
+        private static readonly byte[] fourBytes = new byte[] { 1, 2, 3, 4 };
+        private static readonly byte[] fiveBytes = new byte[] { 5, 4, 3, 2, 1 };
+        private static readonly byte[] twelveBytes = new byte[] { 1, 2, 3, 4, 8, 7, 6, 5, 9, 10, 11, 12 };
+        private static readonly byte[] thirteenBytes = new byte[13];
+
+        [Fact]
+        public void Equality()
+        {
+            BinaryView one = new BinaryView(oneByte);
+            BinaryView four = new BinaryView(fourBytes);
+            BinaryView twelve = new BinaryView(twelveBytes);
+            BinaryView twelvePlus = new BinaryView(13, fourBytes, 0, 0);
+            Assert.Equal(one, one);
+            Assert.NotEqual(one, four);
+            Assert.NotEqual(four, twelve);
+            Assert.NotEqual(four, twelvePlus);
+        }
+
+        [Fact]
+        public void ConstructorThrows()
+        {
+            Assert.Throws<ArgumentException>(() => new BinaryView(thirteenBytes));
+            Assert.Throws<ArgumentException>(() => new BinaryView(20, empty, 0, 0));
+            Assert.Throws<ArgumentException>(() => new BinaryView(20, fiveBytes, 0, 0));
+            Assert.Throws<ArgumentException>(() => new BinaryView(13, thirteenBytes, 0, 0));
+            Assert.Throws<ArgumentException>(() => new BinaryView(4, fourBytes, 0, 0));
+        }
+
+        [Fact]
+        public void ConstructInline()
+        {
+            BinaryView zero = new BinaryView(empty);
+            Assert.Equal(-1, zero.BufferIndex);
+            Assert.Equal(-1, zero.BufferOffset);
+            Assert.Equal(0, zero.Length);
+            Assert.Equal(0, zero.Bytes.Length);
+
+            BinaryView one = new BinaryView(oneByte);
+            Assert.Equal(-1, one.BufferIndex);
+            Assert.Equal(-1, one.BufferOffset);
+            Assert.Equal(1, one.Length);
+            Assert.Equal(1, one.Bytes.Length);
+            Assert.Equal((byte)0, one.Bytes[0]);
+
+            BinaryView twelve = new BinaryView(twelveBytes);
+            Assert.Equal(-1, one.BufferIndex);
+            Assert.Equal(-1, one.BufferOffset);
+            Assert.Equal(12, twelve.Length);
+            Assert.Equal(12, twelve.Bytes.Length);
+            Assert.Equal((byte)8, twelve.Bytes[4]);
+        }
+
+        [Fact]
+        public void ConstructPrefix()
+        {
+            BinaryView four = new BinaryView(14, fourBytes, 2, 3);
+            Assert.Equal(2, four.BufferIndex);
+            Assert.Equal(3, four.BufferOffset);
+            Assert.Equal(14, four.Length);
+            Assert.Equal(4, four.Bytes.Length);
+            Assert.Equal((byte)2, four.Bytes[1]);
+        }
+    }
+}
diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
index 83902d8d93c70..274434e4bab09 100644
--- a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
@@ -741,7 +741,9 @@ public unsafe void ExportBatch()
         [SkippableFact]
         public unsafe void RoundTripTestBatch()
         {
-            RecordBatch batch1 = TestData.CreateSampleRecordBatch(4, createDictionaryArray: true);
+            // TODO: Enable these once this the version of pyarrow referenced during testing supports them
+            HashSet<ArrowTypeId> unsupported = new HashSet<ArrowTypeId> { ArrowTypeId.ListView, ArrowTypeId.BinaryView, ArrowTypeId.StringView };
+            RecordBatch batch1 = TestData.CreateSampleRecordBatch(4, excludedTypes: unsupported);
             RecordBatch batch2 = batch1.Clone();
 
             CArrowArray* cExportArray = CArrowArray.Create();
diff --git a/csharp/test/Apache.Arrow.Tests/TableTests.cs b/csharp/test/Apache.Arrow.Tests/TableTests.cs
index d52b514e092d9..83c88265d172b 100644
--- a/csharp/test/Apache.Arrow.Tests/TableTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/TableTests.cs
@@ -62,7 +62,11 @@ public void TestTableFromRecordBatches()
 
             Table table1 = Table.TableFromRecordBatches(recordBatch1.Schema, recordBatches);
             Assert.Equal(20, table1.RowCount);
-            Assert.Equal(30, table1.ColumnCount);
+#if NET5_0_OR_GREATER
+            Assert.Equal(35, table1.ColumnCount);
+#else
+            Assert.Equal(34, table1.ColumnCount);
+#endif
             Assert.Equal("ChunkedArray: Length=20, DataType=list", table1.Column(0).Data.ToString());
 
             FixedSizeBinaryType type = new FixedSizeBinaryType(17);
diff --git a/csharp/test/Apache.Arrow.Tests/TestData.cs b/csharp/test/Apache.Arrow.Tests/TestData.cs
index b43321abd7499..29ddef2864862 100644
--- a/csharp/test/Apache.Arrow.Tests/TestData.cs
+++ b/csharp/test/Apache.Arrow.Tests/TestData.cs
@@ -24,53 +24,66 @@ namespace Apache.Arrow.Tests
 {
     public static class TestData
     {
-        public static RecordBatch CreateSampleRecordBatch(int length, bool createDictionaryArray = true)
+        public static RecordBatch CreateSampleRecordBatch(int length, bool createDictionaryArray)
         {
-            return CreateSampleRecordBatch(length, columnSetCount: 1, createDictionaryArray);
+            HashSet<ArrowTypeId> excluded = createDictionaryArray ? null : new HashSet<ArrowTypeId> { ArrowTypeId.Dictionary };
+            return CreateSampleRecordBatch(length, columnSetCount: 1, excluded);
         }
 
-        public static RecordBatch CreateSampleRecordBatch(int length, int columnSetCount, bool createAdvancedTypeArrays)
+        public static RecordBatch CreateSampleRecordBatch(
+            int length,
+            int columnSetCount = 1,
+            HashSet<ArrowTypeId> excludedTypes = null)
         {
             Schema.Builder builder = new Schema.Builder();
-            for (int i = 0; i < columnSetCount; i++)
+
+            void AddField(Field field)
             {
-                builder.Field(CreateField(new ListType(Int64Type.Default), i));
-                builder.Field(CreateField(BooleanType.Default, i));
-                builder.Field(CreateField(UInt8Type.Default, i));
-                builder.Field(CreateField(Int8Type.Default, i));
-                builder.Field(CreateField(UInt16Type.Default, i));
-                builder.Field(CreateField(Int16Type.Default, i));
-                builder.Field(CreateField(UInt32Type.Default, i));
-                builder.Field(CreateField(Int32Type.Default, i));
-                builder.Field(CreateField(UInt64Type.Default, i));
-                builder.Field(CreateField(Int64Type.Default, i));
-                builder.Field(CreateField(FloatType.Default, i));
-                builder.Field(CreateField(DoubleType.Default, i));
-                builder.Field(CreateField(Date32Type.Default, i));
-                builder.Field(CreateField(Date64Type.Default, i));
-                builder.Field(CreateField(Time32Type.Default, i));
-                builder.Field(CreateField(Time64Type.Default, i));
-                builder.Field(CreateField(TimestampType.Default, i));
-                builder.Field(CreateField(StringType.Default, i));
-                builder.Field(CreateField(new StructType(new List<Field> { CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }), i));
-                builder.Field(CreateField(new Decimal128Type(10, 6), i));
-                builder.Field(CreateField(new Decimal256Type(16, 8), i));
-                builder.Field(CreateField(new MapType(StringType.Default, Int32Type.Default), i));
-                builder.Field(CreateField(IntervalType.YearMonth, i));
-                builder.Field(CreateField(IntervalType.DayTime, i));
-                builder.Field(CreateField(IntervalType.MonthDayNanosecond, i));
-
-                if (createAdvancedTypeArrays)
+                if (excludedTypes == null || !excludedTypes.Contains(field.DataType.TypeId))
                 {
-                    builder.Field(CreateField(new DictionaryType(Int32Type.Default, StringType.Default, false), i));
-                    builder.Field(CreateField(new FixedSizeBinaryType(16), i));
-                    builder.Field(CreateField(new FixedSizeListType(Int32Type.Default, 3), i));
-                    builder.Field(CreateField(new UnionType(new[] { CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }, new[] { 0, 1 }, UnionMode.Sparse), i));
-                    builder.Field(CreateField(new UnionType(new[] { CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }, new[] { 0, 1 }, UnionMode.Dense), -i));
+                    builder.Field(field);
                 }
+            }
 
-                //builder.Field(CreateField(HalfFloatType.Default));
-                //builder.Field(CreateField(StringType.Default));
+            for (int i = 0; i < columnSetCount; i++)
+            {
+                AddField(CreateField(new ListType(Int64Type.Default), i));
+                AddField(CreateField(new ListViewType(Int64Type.Default), i));
+                AddField(CreateField(BooleanType.Default, i));
+                AddField(CreateField(UInt8Type.Default, i));
+                AddField(CreateField(Int8Type.Default, i));
+                AddField(CreateField(UInt16Type.Default, i));
+                AddField(CreateField(Int16Type.Default, i));
+                AddField(CreateField(UInt32Type.Default, i));
+                AddField(CreateField(Int32Type.Default, i));
+                AddField(CreateField(UInt64Type.Default, i));
+                AddField(CreateField(Int64Type.Default, i));
+#if NET5_0_OR_GREATER
+                AddField(CreateField(HalfFloatType.Default, i));
+#endif
+                AddField(CreateField(FloatType.Default, i));
+                AddField(CreateField(DoubleType.Default, i));
+                AddField(CreateField(Date32Type.Default, i));
+                AddField(CreateField(Date64Type.Default, i));
+                AddField(CreateField(Time32Type.Default, i));
+                AddField(CreateField(Time64Type.Default, i));
+                AddField(CreateField(TimestampType.Default, i));
+                AddField(CreateField(StringType.Default, i));
+                AddField(CreateField(StringViewType.Default, i));
+                AddField(CreateField(new StructType(new List<Field> { CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }), i));
+                AddField(CreateField(new Decimal128Type(10, 6), i));
+                AddField(CreateField(new Decimal256Type(16, 8), i));
+                AddField(CreateField(new MapType(StringType.Default, Int32Type.Default), i));
+                AddField(CreateField(IntervalType.YearMonth, i));
+                AddField(CreateField(IntervalType.DayTime, i));
+                AddField(CreateField(IntervalType.MonthDayNanosecond, i));
+                AddField(CreateField(BinaryType.Default, i));
+                AddField(CreateField(BinaryViewType.Default, i));
+                AddField(CreateField(new FixedSizeBinaryType(16), i));
+                AddField(CreateField(new FixedSizeListType(Int32Type.Default, 3), i));
+                AddField(CreateField(new UnionType(new[] { CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }, new[] { 0, 1 }, UnionMode.Sparse), i));
+                AddField(CreateField(new UnionType(new[] { CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }, new[] { 0, 1 }, UnionMode.Dense), -i));
+                AddField(CreateField(new DictionaryType(Int32Type.Default, StringType.Default, false), i));
             }
 
             Schema schema = builder.Build();
@@ -130,16 +143,23 @@ private class ArrayCreator :
             IArrowTypeVisitor<DoubleType>,
             IArrowTypeVisitor<TimestampType>,
             IArrowTypeVisitor<StringType>,
+            IArrowTypeVisitor<StringViewType>,
             IArrowTypeVisitor<ListType>,
+            IArrowTypeVisitor<ListViewType>,
             IArrowTypeVisitor<FixedSizeListType>,
             IArrowTypeVisitor<StructType>,
             IArrowTypeVisitor<UnionType>,
             IArrowTypeVisitor<Decimal128Type>,
             IArrowTypeVisitor<Decimal256Type>,
             IArrowTypeVisitor<DictionaryType>,
+            IArrowTypeVisitor<BinaryType>,
+            IArrowTypeVisitor<BinaryViewType>,
             IArrowTypeVisitor<FixedSizeBinaryType>,
             IArrowTypeVisitor<MapType>,
             IArrowTypeVisitor<IntervalType>,
+#if NET5_0_OR_GREATER
+            IArrowTypeVisitor<HalfFloatType>,
+#endif
             IArrowTypeVisitor<NullType>
         {
             private int Length { get; }
@@ -160,6 +180,9 @@ public ArrayCreator(int length)
             public void Visit(UInt32Type type) => GenerateArray(new UInt32Array.Builder(), x => (uint)x);
             public void Visit(UInt64Type type) => GenerateArray(new UInt64Array.Builder(), x => (ulong)x);
             public void Visit(FloatType type) => GenerateArray(new FloatArray.Builder(), x => ((float)x / Length));
+#if NET5_0_OR_GREATER
+            public void Visit(HalfFloatType type) => GenerateArray(new HalfFloatArray.Builder(), x => ((Half)x / (Half)Length));
+#endif
             public void Visit(DoubleType type) => GenerateArray(new DoubleArray.Builder(), x => ((double)x / Length));
             public void Visit(Decimal128Type type)
             {
@@ -277,6 +300,30 @@ public void Visit(StringType type)
                 Array = builder.Build();
             }
 
+            public void Visit(StringViewType type)
+            {
+                var str = "length=ten";
+                var builder = new StringViewArray.Builder();
+
+                for (var i = 0; i < Length; i++)
+                {
+                    switch (i % 3)
+                    {
+                        case 0:
+                            builder.AppendNull();
+                            break;
+                        case 1:
+                            builder.Append(str);
+                            break;
+                        case 2:
+                            builder.Append(str + str);
+                            break;
+                    }
+                }
+
+                Array = builder.Build();
+            }
+
             public void Visit(ListType type)
             {
                 var builder = new ListArray.Builder(type.ValueField).Reserve(Length);
@@ -294,6 +341,23 @@ public void Visit(ListType type)
                 Array = builder.Build();
             }
 
+            public void Visit(ListViewType type)
+            {
+                var builder = new ListViewArray.Builder(type.ValueField).Reserve(Length);
+
+                var valueBuilder = (Int64Array.Builder)builder.ValueBuilder.Reserve(Length + 1);
+
+                for (var i = 0; i < Length; i++)
+                {
+                    builder.Append();
+                    valueBuilder.Append(i);
+                }
+                //Add a value to check if Values.Length can exceed ListArray.Length
+                valueBuilder.Append(0);
+
+                Array = builder.Build();
+            }
+
             public void Visit(FixedSizeListType type)
             {
                 var builder = new FixedSizeListArray.Builder(type.ValueField, type.ListSize).Reserve(Length);
@@ -411,6 +475,64 @@ public void Visit(DictionaryType type)
                 Array = new DictionaryArray(type, indicesBuilder.Build(), valueBuilder.Build());
             }
 
+            public void Visit(BinaryType type)
+            {
+                ReadOnlySpan<byte> shortData = new[] { (byte)0, (byte)1, (byte)2, (byte)3, (byte)4, (byte)5, (byte)6, (byte)7, (byte)8, (byte)9 };
+                ReadOnlySpan<byte> longData = new[]
+                {
+                    (byte)0, (byte)1, (byte)2, (byte)3, (byte)4, (byte)5, (byte)6, (byte)7, (byte)8, (byte)9,
+                    (byte)10, (byte)11, (byte)12, (byte)13, (byte)14, (byte)15, (byte)16, (byte)17, (byte)18, (byte)19
+                };
+                var builder = new BinaryArray.Builder();
+
+                for (var i = 0; i < Length; i++)
+                {
+                    switch (i % 3)
+                    {
+                        case 0:
+                            builder.AppendNull();
+                            break;
+                        case 1:
+                            builder.Append(shortData);
+                            break;
+                        case 2:
+                            builder.Append(longData);
+                            break;
+                    }
+                }
+
+                Array = builder.Build();
+            }
+
+            public void Visit(BinaryViewType type)
+            {
+                ReadOnlySpan<byte> shortData = new[] { (byte)0, (byte)1, (byte)2, (byte)3, (byte)4, (byte)5, (byte)6, (byte)7, (byte)8, (byte)9 };
+                ReadOnlySpan<byte> longData = new[]
+                {
+                    (byte)0, (byte)1, (byte)2, (byte)3, (byte)4, (byte)5, (byte)6, (byte)7, (byte)8, (byte)9,
+                    (byte)10, (byte)11, (byte)12, (byte)13, (byte)14, (byte)15, (byte)16, (byte)17, (byte)18, (byte)19
+                };
+                var builder = new BinaryViewArray.Builder();
+
+                for (var i = 0; i < Length; i++)
+                {
+                    switch (i % 3)
+                    {
+                        case 0:
+                            builder.AppendNull();
+                            break;
+                        case 1:
+                            builder.Append(shortData);
+                            break;
+                        case 2:
+                            builder.Append(longData);
+                            break;
+                    }
+                }
+
+                Array = builder.Build();
+            }
+
             public void Visit(FixedSizeBinaryType type)
             {
                 ArrowBuffer.Builder<byte> valueBuilder = new ArrowBuffer.Builder<byte>();
diff --git a/dev/archery/archery/integration/datagen.py b/dev/archery/archery/integration/datagen.py
index 2bbc843836af9..230ec5b3effff 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1932,13 +1932,12 @@ def _temp_path():
         .skip_tester('Rust'),
 
         generate_binary_view_case()
-        .skip_tester('C#')
         .skip_tester('Java')
         .skip_tester('JS')
         .skip_tester('Rust'),
 
         generate_list_view_case()
-        .skip_tester('C#')
+        .skip_tester('C#')     # Doesn't support large list views
         .skip_tester('Java')
         .skip_tester('JS')
         .skip_tester('Rust'),
diff --git a/docs/source/format/CDataInterface/PyCapsuleInterface.rst b/docs/source/format/CDataInterface/PyCapsuleInterface.rst
index 0c1a01d7c6778..03095aa2e9356 100644
--- a/docs/source/format/CDataInterface/PyCapsuleInterface.rst
+++ b/docs/source/format/CDataInterface/PyCapsuleInterface.rst
@@ -16,6 +16,8 @@
 .. under the License.
 
 
+.. _arrow-pycapsule-interface:
+
 =============================
 The Arrow PyCapsule Interface
 =============================
diff --git a/docs/source/python/extending_types.rst b/docs/source/python/extending_types.rst
index ee92cebcb549c..b7261005e66ee 100644
--- a/docs/source/python/extending_types.rst
+++ b/docs/source/python/extending_types.rst
@@ -21,6 +21,38 @@
 Extending pyarrow
 =================
 
+Controlling conversion to (Py)Arrow with the PyCapsule Interface
+----------------------------------------------------------------
+
+The :ref:`Arrow C data interface <c-data-interface>` allows moving Arrow data between
+different implementations of Arrow. This is a generic, cross-language interface not
+specific to Python, but for Python libraries this interface is extended with a Python
+specific layer: :ref:`arrow-pycapsule-interface`.
+
+This Python interface ensures that different libraries that support the C Data interface
+can export Arrow data structures in a standard way and recognize each other's objects.
+
+If you have a Python library providing data structures that hold Arrow-compatible data
+under the hood, you can implement the following methods on those objects:
+
+- ``__arrow_c_schema__`` for schema or type-like objects.
+- ``__arrow_c_array__`` for arrays and record batches (contiguous tables).
+- ``__arrow_c_stream__`` for chunked tables or streams of data.
+
+Those methods return `PyCapsule <https://docs.python.org/3/c-api/capsule.html>`__
+objects, and more details on the exact semantics can be found in the
+:ref:`specification <arrow-pycapsule-interface>`.
+
+When your data structures have those methods defined, the PyArrow constructors
+(such as :func:`pyarrow.array` or :func:`pyarrow.table`) will recognize those objects as
+supporting this protocol, and convert them to PyArrow data structures zero-copy. And the
+same can be true for any other library supporting this protocol on ingesting data.
+
+Similarly, if your library has functions that accept user-provided data, you can add
+support for this protocol by checking for the presence of those methods, and
+therefore accept any Arrow data (instead of harcoding support for a specific
+Arrow producer such as PyArrow).
+
 .. _arrow_array_protocol:
 
 Controlling conversion to pyarrow.Array with the ``__arrow_array__`` protocol
diff --git a/docs/source/python/parquet.rst b/docs/source/python/parquet.rst
index 85a9674a689ca..d4717897660b6 100644
--- a/docs/source/python/parquet.rst
+++ b/docs/source/python/parquet.rst
@@ -511,36 +511,20 @@ from a remote filesystem into a pandas dataframe you may need to run
 ``sort_index`` to maintain row ordering (as long as the ``preserve_index``
 option was enabled on write).
 
-.. note::
-
-   The ParquetDataset is being reimplemented based on the new generic Dataset
-   API (see the :ref:`dataset` docs for an overview). This is not yet the
-   default, but can already be enabled by passing the ``use_legacy_dataset=False``
-   keyword to :class:`ParquetDataset` or :func:`read_table`::
-
-      pq.ParquetDataset('dataset_name/', use_legacy_dataset=False)
-
-   Enabling this gives the following new features:
-
-   - Filtering on all columns (using row group statistics) instead of only on
-     the partition keys.
-   - More fine-grained partitioning: support for a directory partitioning scheme
-     in addition to the Hive-like partitioning (e.g. "/2019/11/15/" instead of
-     "/year=2019/month=11/day=15/"), and the ability to specify a schema for
-     the partition keys.
-   - General performance improvement and bug fixes.
+Other features:
 
-   It also has the following changes in behaviour:
+- Filtering on all columns (using row group statistics) instead of only on
+  the partition keys.
+- Fine-grained partitioning: support for a directory partitioning scheme
+  in addition to the Hive-like partitioning (e.g. "/2019/11/15/" instead of
+  "/year=2019/month=11/day=15/"), and the ability to specify a schema for
+  the partition keys.
 
-   - The partition keys need to be explicitly included in the ``columns``
-     keyword when you want to include them in the result while reading a
-     subset of the columns
+Note:
 
-   This new implementation is already enabled in ``read_table``, and in the
-   future, this will be turned on by default for ``ParquetDataset``. The new
-   implementation does not yet cover all existing ParquetDataset features (e.g.
-   specifying the ``metadata``, or the ``pieces`` property API). Feedback is
-   very welcome.
+- The partition keys need to be explicitly included in the ``columns``
+  keyword when you want to include them in the result while reading a
+  subset of the columns
 
 
 Using with Spark
diff --git a/docs/source/status.rst b/docs/source/status.rst
index e860aceb76e15..03a87012342c2 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -68,9 +68,13 @@ Data Types
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Large Utf8        | ✓     | ✓     | ✓     | ✓          |       |  ✓    | ✓     |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| Binary View       | ✓     |       | ✓     |            |       |       |       |       |
+| Binary View       | ✓     |       | ✓     |            |   ✓   |       |       |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| String View       | ✓     |       | ✓     |            |       |       |       |       |
+| Large Binary View | ✓     |       | ✓     |            |       |       |       |       |
++-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
+| Utf8 View         | ✓     |       | ✓     |            |   ✓   |       |       |       |
++-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
+| Large Utf8 View   | ✓     |       | ✓     |            |       |       |       |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
@@ -83,7 +87,7 @@ Data Types
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Large List        | ✓     | ✓     | ✓     |            |       |  ✓    | ✓     |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| List View         | ✓     |       | ✓     |            |       |       |       |       |
+| List View         | ✓     |       | ✓     |            |   ✓   |       |       |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
 | Large List View   | ✓     |       | ✓     |            |       |       |       |       |
 +-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
diff --git a/java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java b/java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java
index daa35b7e15be6..e8b780638e2c1 100644
--- a/java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java
+++ b/java/compression/src/main/java/org/apache/arrow/compression/Lz4CompressionCodec.java
@@ -79,6 +79,7 @@ protected ArrowBuf doDecompress(BufferAllocator allocator, ArrowBuf compressedBu
     byte[] outBytes = out.toByteArray();
     ArrowBuf decompressedBuffer = allocator.buffer(outBytes.length);
     decompressedBuffer.setBytes(/*index=*/0, outBytes);
+    decompressedBuffer.writerIndex(decompressedLength);
     return decompressedBuffer;
   }
 
diff --git a/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java b/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java
index 403130edba52e..01156fa2b0e0b 100644
--- a/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java
+++ b/java/compression/src/test/java/org/apache/arrow/compression/TestCompressionCodec.java
@@ -117,6 +117,12 @@ private List<ArrowBuf> deCompressBuffers(CompressionCodec codec, List<ArrowBuf>
     return outputBuffers;
   }
 
+  private void assertWriterIndex(List<ArrowBuf> decompressedBuffers) {
+    for (ArrowBuf decompressedBuf : decompressedBuffers) {
+      assertTrue(decompressedBuf.writerIndex() > 0);
+    }
+  }
+
   @ParameterizedTest
   @MethodSource("codecs")
   void testCompressFixedWidthBuffers(int vectorLength, CompressionCodec codec) throws Exception {
@@ -139,6 +145,7 @@ void testCompressFixedWidthBuffers(int vectorLength, CompressionCodec codec) thr
     List<ArrowBuf> decompressedBuffers = deCompressBuffers(codec, compressedBuffers);
 
     assertEquals(2, decompressedBuffers.size());
+    assertWriterIndex(decompressedBuffers);
 
     // orchestrate new vector
     IntVector newVec = new IntVector("new vec", allocator);
@@ -180,6 +187,7 @@ void testCompressVariableWidthBuffers(int vectorLength, CompressionCodec codec)
     List<ArrowBuf> decompressedBuffers = deCompressBuffers(codec, compressedBuffers);
 
     assertEquals(3, decompressedBuffers.size());
+    assertWriterIndex(decompressedBuffers);
 
     // orchestrate new vector
     VarCharVector newVec = new VarCharVector("new vec", allocator);
diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtils.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtils.java
index d50dc385a62e1..ffb0048181c7c 100644
--- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtils.java
+++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtils.java
@@ -115,6 +115,16 @@ static KeyStore getKeyStoreInstance(String instance)
     return keyStore;
   }
 
+  @VisibleForTesting
+  static KeyStore getDefaultKeyStoreInstance(String password)
+      throws KeyStoreException, CertificateException, NoSuchAlgorithmException, IOException {
+    try (InputStream fileInputStream = getKeystoreInputStream()) {
+      KeyStore keyStore = KeyStore.getInstance(KeyStore.getDefaultType());
+      keyStore.load(fileInputStream, password == null ? null : password.toCharArray());
+      return keyStore;
+    }
+  }
+
   static String getOperatingSystem() {
     return System.getProperty("os.name");
   }
@@ -156,16 +166,9 @@ public static InputStream getCertificateInputStreamFromSystem(String password) t
       keyStoreList.add(getKeyStoreInstance("Windows-MY"));
     } else if (isMac()) {
       keyStoreList.add(getKeyStoreInstance("KeychainStore"));
+      keyStoreList.add(getDefaultKeyStoreInstance(password));
     } else {
-      try (InputStream fileInputStream = getKeystoreInputStream()) {
-        KeyStore keyStore = KeyStore.getInstance(KeyStore.getDefaultType());
-        if (password == null) {
-          keyStore.load(fileInputStream, null);
-        } else {
-          keyStore.load(fileInputStream, password.toCharArray());
-        }
-        keyStoreList.add(keyStore);
-      }
+      keyStoreList.add(getDefaultKeyStoreInstance(password));
     }
 
     return getCertificatesInputStream(keyStoreList);
diff --git a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java
index 27bba64587367..b7977462e9c01 100644
--- a/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java
+++ b/java/flight/flight-sql-jdbc-core/src/test/java/org/apache/arrow/driver/jdbc/client/utils/ClientAuthenticationUtilsTest.java
@@ -77,6 +77,33 @@ public void testGetKeyStoreInstance() throws IOException,
     }
   }
 
+  @Test
+  public void testGetDefaultKeyStoreInstancePassword() throws IOException,
+          KeyStoreException, CertificateException, NoSuchAlgorithmException {
+    try (MockedStatic<KeyStore> keyStoreMockedStatic = Mockito.mockStatic(KeyStore.class)) {
+
+      keyStoreMockedStatic
+         .when(() -> ClientAuthenticationUtils.getDefaultKeyStoreInstance("changeit"))
+         .thenReturn(keyStoreMock);
+      KeyStore receiveKeyStore = ClientAuthenticationUtils.getDefaultKeyStoreInstance("changeit");
+      Assert.assertEquals(receiveKeyStore, keyStoreMock);
+    }
+  }
+
+  @Test
+  public void testGetDefaultKeyStoreInstanceNoPassword() throws IOException,
+          KeyStoreException, CertificateException, NoSuchAlgorithmException {
+    try (MockedStatic<KeyStore> keyStoreMockedStatic = Mockito.mockStatic(KeyStore.class)) {
+
+      keyStoreMockedStatic
+          .when(() -> ClientAuthenticationUtils.getDefaultKeyStoreInstance(null))
+          .thenReturn(keyStoreMock);
+      KeyStore receiveKeyStore = ClientAuthenticationUtils.getDefaultKeyStoreInstance(null);
+      Assert.assertEquals(receiveKeyStore, keyStoreMock);
+    }
+  }
+
+
   @Test
   public void testGetCertificateInputStreamFromMacSystem() throws IOException,
       KeyStoreException, CertificateException, NoSuchAlgorithmException {
@@ -90,11 +117,18 @@ public void testGetCertificateInputStreamFromMacSystem() throws IOException,
       keyStoreMockedStatic.when(() -> ClientAuthenticationUtils
           .getKeyStoreInstance("KeychainStore"))
           .thenReturn(keyStoreMock);
+      keyStoreMockedStatic.when(() -> ClientAuthenticationUtils
+          .getDefaultKeyStoreInstance("changeit"))
+          .thenReturn(keyStoreMock);
+      clientAuthenticationUtilsMockedStatic
+          .when(ClientAuthenticationUtils::getKeystoreInputStream)
+          .thenCallRealMethod();
+      keyStoreMockedStatic.when(KeyStore::getDefaultType).thenCallRealMethod();
       keyStoreMockedStatic.when(() -> ClientAuthenticationUtils
           .getCertificatesInputStream(Mockito.any()))
           .thenReturn(mock);
 
-      InputStream inputStream = ClientAuthenticationUtils.getCertificateInputStreamFromSystem("test");
+      InputStream inputStream = ClientAuthenticationUtils.getCertificateInputStreamFromSystem("changeit");
       Assert.assertEquals(inputStream, mock);
     }
   }
@@ -136,9 +170,11 @@ public void testGetCertificateInputStreamFromLinuxSystem() throws IOException,
 
       setOperatingSystemMock(clientAuthenticationUtilsMockedStatic, false, false);
       keyStoreMockedStatic.when(() -> ClientAuthenticationUtils
-              .getCertificatesInputStream(Mockito.any()))
+          .getCertificatesInputStream(Mockito.any()))
           .thenReturn(mock);
-
+      keyStoreMockedStatic.when(() -> ClientAuthenticationUtils
+          .getDefaultKeyStoreInstance(Mockito.any()))
+          .thenReturn(keyStoreMock);
       clientAuthenticationUtilsMockedStatic
           .when(ClientAuthenticationUtils::getKeystoreInputStream)
           .thenCallRealMethod();
diff --git a/java/memory/memory-netty/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java b/java/memory/memory-netty/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java
index 06c6669cfd162..ba9aba353c351 100644
--- a/java/memory/memory-netty/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java
+++ b/java/memory/memory-netty/src/main/java/io/netty/buffer/PooledByteBufAllocatorL.java
@@ -71,7 +71,7 @@ public UnsafeDirectLittleEndian allocate(long size) {
   }
 
   public int getChunkSize() {
-    return allocator.chunkSize;
+    return allocator.chunkSize();
   }
 
   public long getHugeBufferSize() {
@@ -137,7 +137,6 @@ private class InnerAllocator extends PooledByteBufAllocator {
 
     private final PoolArena<ByteBuffer>[] directArenas;
     private final MemoryStatusThread statusThread;
-    private final int chunkSize;
 
     public InnerAllocator() {
       super(true);
@@ -150,8 +149,6 @@ public InnerAllocator() {
         throw new RuntimeException("Failure while initializing allocator.  Unable to retrieve direct arenas field.", e);
       }
 
-      this.chunkSize = directArenas[0].chunkSize;
-
       if (memoryLogger.isTraceEnabled()) {
         statusThread = new MemoryStatusThread(this);
         statusThread.start();
@@ -166,7 +163,7 @@ private UnsafeDirectLittleEndian newDirectBufferL(int initialCapacity, int maxCa
 
       if (directArena != null) {
 
-        if (initialCapacity > directArena.chunkSize) {
+        if (initialCapacity > chunkSize()) {
           // This is beyond chunk size so we'll allocate separately.
           ByteBuf buf = UnpooledByteBufAllocator.DEFAULT.directBuffer(initialCapacity, maxCapacity);
 
diff --git a/java/performance/pom.xml b/java/performance/pom.xml
index a3e4da85b4321..4d449af46b6b1 100644
--- a/java/performance/pom.xml
+++ b/java/performance/pom.xml
@@ -199,7 +199,7 @@
                 </plugin>
                 <plugin>
                     <artifactId>maven-resources-plugin</artifactId>
-                    <version>2.6</version>
+                    <version>3.3.1</version>
                 </plugin>
                 <plugin>
                     <artifactId>maven-site-plugin</artifactId>
@@ -211,7 +211,7 @@
                 </plugin>
                 <plugin>
                     <artifactId>maven-surefire-plugin</artifactId>
-                    <version>3.0.0-M7</version>
+                    <version>3.2.3</version>
                 </plugin>
             </plugins>
         </pluginManagement>
diff --git a/java/pom.xml b/java/pom.xml
index 75e0946f10811..523e5642720cd 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -33,7 +33,7 @@
     <dep.junit.jupiter.version>5.10.1</dep.junit.jupiter.version>
     <dep.slf4j.version>2.0.9</dep.slf4j.version>
     <dep.guava-bom.version>32.1.3-jre</dep.guava-bom.version>
-    <dep.netty-bom.version>4.1.100.Final</dep.netty-bom.version>
+    <dep.netty-bom.version>4.1.104.Final</dep.netty-bom.version>
     <dep.grpc-bom.version>1.60.0</dep.grpc-bom.version>
     <dep.protobuf-bom.version>3.23.1</dep.protobuf-bom.version>
     <dep.jackson-bom.version>2.16.0</dep.jackson-bom.version>
@@ -412,7 +412,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-resources-plugin</artifactId>
-          <version>2.6</version>
+          <version>3.3.1</version>
         </plugin>
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
@@ -442,7 +442,7 @@
         </plugin>
         <plugin>
           <artifactId>maven-surefire-plugin</artifactId>
-          <version>3.0.0-M7</version>
+          <version>3.2.3</version>
           <dependencies>
             <dependency>
               <groupId>org.junit.jupiter</groupId>
@@ -609,7 +609,7 @@
       <dependency>
         <groupId>org.assertj</groupId>
         <artifactId>assertj-core</artifactId>
-        <version>3.23.1</version>
+        <version>3.24.2</version>
         <scope>test</scope>
       </dependency>
       <dependency>
diff --git a/js/src/builder.ts b/js/src/builder.ts
index a4e2d4d89325c..1880db3818ca5 100644
--- a/js/src/builder.ts
+++ b/js/src/builder.ts
@@ -342,7 +342,7 @@ export abstract class Builder<T extends DataType = any, TNull = any> {
 export abstract class FixedWidthBuilder<T extends Int | Float | FixedSizeBinary | Date_ | Timestamp | Time | Decimal | Interval | Duration = any, TNull = any> extends Builder<T, TNull> {
     constructor(opts: BuilderOptions<T, TNull>) {
         super(opts);
-        this._values = new DataBufferBuilder(new this.ArrayType(0), this.stride);
+        this._values = new DataBufferBuilder(this.ArrayType, 0, this.stride);
     }
     public setValue(index: number, value: T['TValue']) {
         const values = this._values;
diff --git a/js/src/builder/binary.ts b/js/src/builder/binary.ts
index 3c12ddf34abb0..fa9a11b24ec39 100644
--- a/js/src/builder/binary.ts
+++ b/js/src/builder/binary.ts
@@ -16,15 +16,15 @@
 // under the License.
 
 import { Binary } from '../type.js';
-import { toUint8Array } from '../util/buffer.js';
 import { BufferBuilder } from './buffer.js';
 import { VariableWidthBuilder, BuilderOptions } from '../builder.js';
+import { toUint8Array } from '../util/buffer.js';
 
 /** @ignore */
 export class BinaryBuilder<TNull = any> extends VariableWidthBuilder<Binary, TNull> {
     constructor(opts: BuilderOptions<Binary, TNull>) {
         super(opts);
-        this._values = new BufferBuilder(new Uint8Array(0));
+        this._values = new BufferBuilder(Uint8Array);
     }
     public get byteLength(): number {
         let size = this._pendingLength + (this.length * 4);
diff --git a/js/src/builder/buffer.ts b/js/src/builder/buffer.ts
index 402172059682c..18c6dcda738b9 100644
--- a/js/src/builder/buffer.ts
+++ b/js/src/builder/buffer.ts
@@ -24,20 +24,36 @@ function roundLengthUpToNearest64Bytes(len: number, BPE: number) {
     const bytesMinus1 = Math.ceil(len) * BPE - 1;
     return ((bytesMinus1 - bytesMinus1 % 64 + 64) || 64) / BPE;
 }
+
 /** @ignore */
-const sliceOrExtendArray = <T extends TypedArray | BigIntArray>(arr: T, len = 0) => (
-    arr.length >= len ? arr.subarray(0, len) : memcpy(new (arr.constructor as any)(len), arr, 0)
-) as T;
+function resizeArray<T extends TypedArray | BigIntArray>(arr: T, len = 0): T {
+    // TODO: remove when https://github.com/microsoft/TypeScript/issues/54636 is fixed
+    const buffer = arr.buffer as ArrayBufferLike & { resizable: boolean; resize: (byteLength: number) => void; maxByteLength: number };
+    const byteLength = len * arr.BYTES_PER_ELEMENT;
+    if (buffer.resizable && byteLength <= buffer.maxByteLength) {
+        buffer.resize(byteLength);
+        return arr;
+    }
+
+    // Fallback for non-resizable buffers
+    return arr.length >= len ?
+        arr.subarray(0, len) as T :
+        memcpy(new (arr.constructor as any)(len), arr, 0);
+}
+
+/** @ignore */
+export const SAFE_ARRAY_SIZE = 2 ** 32 - 1;
 
 /** @ignore */
 export class BufferBuilder<T extends TypedArray | BigIntArray> {
 
-    constructor(buffer: T, stride = 1) {
-        this.buffer = buffer;
+    constructor(bufferType: ArrayCtor<T>, initialSize = 0, stride = 1) {
+        this.length = Math.ceil(initialSize / stride);
+        // TODO: remove as any when https://github.com/microsoft/TypeScript/issues/54636 is fixed
+        this.buffer = new bufferType(new (ArrayBuffer as any)(this.length * bufferType.BYTES_PER_ELEMENT, { maxByteLength: SAFE_ARRAY_SIZE })) as T;
         this.stride = stride;
-        this.BYTES_PER_ELEMENT = buffer.BYTES_PER_ELEMENT;
-        this.ArrayType = buffer.constructor as ArrayCtor<T>;
-        this._resize(this.length = Math.ceil(buffer.length / stride));
+        this.BYTES_PER_ELEMENT = bufferType.BYTES_PER_ELEMENT;
+        this.ArrayType = bufferType;
     }
 
     public buffer: T;
@@ -72,17 +88,18 @@ export class BufferBuilder<T extends TypedArray | BigIntArray> {
     }
     public flush(length = this.length) {
         length = roundLengthUpToNearest64Bytes(length * this.stride, this.BYTES_PER_ELEMENT);
-        const array = sliceOrExtendArray<T>(this.buffer, length);
+        const array = resizeArray<T>(this.buffer, length);
         this.clear();
         return array;
     }
     public clear() {
         this.length = 0;
-        this._resize(0);
+        // TODO: remove as any when https://github.com/microsoft/TypeScript/issues/54636 is fixed
+        this.buffer = new this.ArrayType(new (ArrayBuffer as any)(0, { maxByteLength: SAFE_ARRAY_SIZE })) as T;
         return this;
     }
     protected _resize(newLength: number) {
-        return this.buffer = <T>memcpy(new this.ArrayType(newLength), this.buffer);
+        return this.buffer = resizeArray<T>(this.buffer, newLength);
     }
 }
 
@@ -100,7 +117,7 @@ export class DataBufferBuilder<T extends TypedArray | BigIntArray> extends Buffe
 /** @ignore */
 export class BitmapBufferBuilder extends DataBufferBuilder<Uint8Array> {
 
-    constructor(data = new Uint8Array(0)) { super(data, 1 / 8); }
+    constructor() { super(Uint8Array, 0, 1 / 8); }
 
     public numValid = 0;
     public get numInvalid() { return this.length - this.numValid; }
@@ -123,9 +140,8 @@ export class BitmapBufferBuilder extends DataBufferBuilder<Uint8Array> {
 /** @ignore */
 export class OffsetsBufferBuilder<T extends DataType> extends DataBufferBuilder<T['TOffsetArray']> {
     constructor(type: T) {
-        super(new type.OffsetArrayType(1), 1);
+        super(type.OffsetArrayType as ArrayCtor<T['TOffsetArray']>, 1, 1);
     }
-
     public append(value: T['TOffsetArray'][0]) {
         return this.set(this.length - 1, value);
     }
diff --git a/js/src/builder/largeutf8.ts b/js/src/builder/largeutf8.ts
index 51890100095c1..90a0bde9f3443 100644
--- a/js/src/builder/largeutf8.ts
+++ b/js/src/builder/largeutf8.ts
@@ -25,7 +25,7 @@ import { LargeBinaryBuilder } from './largebinary.js';
 export class LargeUtf8Builder<TNull = any> extends VariableWidthBuilder<LargeUtf8, TNull> {
     constructor(opts: BuilderOptions<LargeUtf8, TNull>) {
         super(opts);
-        this._values = new BufferBuilder(new Uint8Array(0));
+        this._values = new BufferBuilder(Uint8Array);
     }
     public get byteLength(): number {
         let size = this._pendingLength + (this.length * 4);
diff --git a/js/src/builder/union.ts b/js/src/builder/union.ts
index ac8a13191a549..7bee460a77de1 100644
--- a/js/src/builder/union.ts
+++ b/js/src/builder/union.ts
@@ -31,7 +31,7 @@ export abstract class UnionBuilder<T extends Union, TNull = any> extends Builder
 
     constructor(options: UnionBuilderOptions<T, TNull>) {
         super(options);
-        this._typeIds = new DataBufferBuilder(new Int8Array(0), 1);
+        this._typeIds = new DataBufferBuilder(Int8Array, 0, 1);
         if (typeof options['valueToChildTypeId'] === 'function') {
             this._valueToChildTypeId = options['valueToChildTypeId'];
         }
@@ -84,7 +84,7 @@ export class DenseUnionBuilder<T extends DenseUnion, TNull = any> extends UnionB
 
     constructor(options: UnionBuilderOptions<T, TNull>) {
         super(options);
-        this._offsets = new DataBufferBuilder(new Int32Array(0));
+        this._offsets = new DataBufferBuilder(Int32Array);
     }
 
     /** @ignore */
diff --git a/js/src/builder/utf8.ts b/js/src/builder/utf8.ts
index 53b8306cbaffd..aac0aec54fe90 100644
--- a/js/src/builder/utf8.ts
+++ b/js/src/builder/utf8.ts
@@ -25,7 +25,7 @@ import { VariableWidthBuilder, BuilderOptions } from '../builder.js';
 export class Utf8Builder<TNull = any> extends VariableWidthBuilder<Utf8, TNull> {
     constructor(opts: BuilderOptions<Utf8, TNull>) {
         super(opts);
-        this._values = new BufferBuilder(new Uint8Array(0));
+        this._values = new BufferBuilder(Uint8Array);
     }
     public get byteLength(): number {
         let size = this._pendingLength + (this.length * 4);
diff --git a/js/src/type.ts b/js/src/type.ts
index dea5301aed355..ae3aefa025999 100644
--- a/js/src/type.ts
+++ b/js/src/type.ts
@@ -79,7 +79,11 @@ export abstract class DataType<TType extends Type = Type, TChildren extends Type
     /** @nocollapse */ static isDenseUnion(x: any): x is DenseUnion { return DataType.isUnion(x) && x.mode === UnionMode.Dense; }
     /** @nocollapse */ static isSparseUnion(x: any): x is SparseUnion { return DataType.isUnion(x) && x.mode === UnionMode.Sparse; }
 
-    public get typeId(): TType { return <any>Type.NONE; }
+    declare public readonly typeId: TType;
+
+    constructor(typeId: TType) {
+        this.typeId = typeId;
+    }
 
     protected static [Symbol.toStringTag] = ((proto: DataType) => {
         (<any>proto).children = null;
@@ -93,8 +97,10 @@ export abstract class DataType<TType extends Type = Type, TChildren extends Type
 export interface Null extends DataType<Type.Null> { TArray: void; TValue: null }
 /** @ignore */
 export class Null extends DataType<Type.Null> {
+    constructor() {
+        super(Type.Null);
+    }
     public toString() { return `Null`; }
-    public get typeId() { return Type.Null as Type.Null; }
     protected static [Symbol.toStringTag] = ((proto: Null) => proto[Symbol.toStringTag] = 'Null')(Null.prototype);
 }
 
@@ -119,9 +125,8 @@ interface Int_<T extends Ints = Ints> extends DataType<T> { TArray: IType[T]['TA
 class Int_<T extends Ints = Ints> extends DataType<T> {
     constructor(public readonly isSigned: IType[T]['isSigned'],
         public readonly bitWidth: IType[T]['bitWidth']) {
-        super();
+        super(Type.Int as T);
     }
-    public get typeId() { return Type.Int as T; }
     public get ArrayType() {
         switch (this.bitWidth) {
             case 8: return this.isSigned ? Int8Array : Uint8Array;
@@ -206,9 +211,8 @@ export interface Float<T extends Floats = Floats> extends DataType<T> { TArray:
 /** @ignore */
 export class Float<T extends Floats = Floats> extends DataType<T> {
     constructor(public readonly precision: Precision) {
-        super();
+        super(Type.Float as T);
     }
-    public get typeId() { return Type.Float as T; }
     public get ArrayType(): TypedArrayConstructor<FType[T]['TArray']> {
         switch (this.precision) {
             case Precision.HALF: return Uint16Array;
@@ -241,9 +245,8 @@ export interface Binary extends DataType<Type.Binary> { TArray: Uint8Array; TOff
 /** @ignore */
 export class Binary extends DataType<Type.Binary> {
     constructor() {
-        super();
+        super(Type.Binary);
     }
-    public get typeId() { return Type.Binary as Type.Binary; }
     public toString() { return `Binary`; }
     protected static [Symbol.toStringTag] = ((proto: Binary) => {
         (<any>proto).ArrayType = Uint8Array;
@@ -256,9 +259,8 @@ export interface LargeBinary extends DataType<Type.LargeBinary> { TArray: Uint8A
 /** @ignore */
 export class LargeBinary extends DataType<Type.LargeBinary> {
     constructor() {
-        super();
+        super(Type.LargeBinary);
     }
-    public get typeId() { return Type.LargeBinary as Type.LargeBinary; }
     public toString() { return `LargeBinary`; }
     protected static [Symbol.toStringTag] = ((proto: LargeBinary) => {
         (<any>proto).ArrayType = Uint8Array;
@@ -272,9 +274,8 @@ export interface Utf8 extends DataType<Type.Utf8> { TArray: Uint8Array; TOffsetA
 /** @ignore */
 export class Utf8 extends DataType<Type.Utf8> {
     constructor() {
-        super();
+        super(Type.Utf8);
     }
-    public get typeId() { return Type.Utf8 as Type.Utf8; }
     public toString() { return `Utf8`; }
     protected static [Symbol.toStringTag] = ((proto: Utf8) => {
         (<any>proto).ArrayType = Uint8Array;
@@ -287,9 +288,8 @@ export interface LargeUtf8 extends DataType<Type.LargeUtf8> { TArray: Uint8Array
 /** @ignore */
 export class LargeUtf8 extends DataType<Type.LargeUtf8> {
     constructor() {
-        super();
+        super(Type.LargeUtf8);
     }
-    public get typeId() { return Type.LargeUtf8 as Type.LargeUtf8; }
     public toString() { return `LargeUtf8`; }
     protected static [Symbol.toStringTag] = ((proto: LargeUtf8) => {
         (<any>proto).ArrayType = Uint8Array;
@@ -303,9 +303,8 @@ export interface Bool extends DataType<Type.Bool> { TArray: Uint8Array; TValue:
 /** @ignore */
 export class Bool extends DataType<Type.Bool> {
     constructor() {
-        super();
+        super(Type.Bool);
     }
-    public get typeId() { return Type.Bool as Type.Bool; }
     public toString() { return `Bool`; }
     protected static [Symbol.toStringTag] = ((proto: Bool) => {
         (<any>proto).ArrayType = Uint8Array;
@@ -320,9 +319,8 @@ export class Decimal extends DataType<Type.Decimal> {
     constructor(public readonly scale: number,
         public readonly precision: number,
         public readonly bitWidth: number = 128) {
-        super();
+        super(Type.Decimal);
     }
-    public get typeId() { return Type.Decimal as Type.Decimal; }
     public toString() { return `Decimal[${this.precision}e${this.scale > 0 ? `+` : ``}${this.scale}]`; }
     protected static [Symbol.toStringTag] = ((proto: Decimal) => {
         (<any>proto).scale = null;
@@ -339,9 +337,8 @@ export interface Date_<T extends Dates = Dates> extends DataType<T> { TArray: In
 /** @ignore */
 export class Date_<T extends Dates = Dates> extends DataType<T> {
     constructor(public readonly unit: DateUnit) {
-        super();
+        super(Type.Date as T);
     }
-    public get typeId() { return Type.Date as T; }
     public toString() { return `Date${(this.unit + 1) * 32}<${DateUnit[this.unit]}>`; }
     protected static [Symbol.toStringTag] = ((proto: Date_) => {
         (<any>proto).unit = null;
@@ -375,9 +372,8 @@ interface Time_<T extends Times = Times> extends DataType<T> {
 class Time_<T extends Times = Times> extends DataType<T> {
     constructor(public readonly unit: TimesType[T]['unit'],
         public readonly bitWidth: TimeBitWidth) {
-        super();
+        super(Type.Time as T);
     }
-    public get typeId() { return Type.Time as T; }
     public toString() { return `Time${this.bitWidth}<${TimeUnit[this.unit]}>`; }
     public get ArrayType() {
         switch (this.bitWidth) {
@@ -418,9 +414,8 @@ interface Timestamp_<T extends Timestamps = Timestamps> extends DataType<T> {
 class Timestamp_<T extends Timestamps = Timestamps> extends DataType<T> {
     constructor(public readonly unit: TimeUnit,
         public readonly timezone?: string | null) {
-        super();
+        super(Type.Timestamp as T);
     }
-    public get typeId() { return Type.Timestamp as T; }
     public toString() { return `Timestamp<${TimeUnit[this.unit]}${this.timezone ? `, ${this.timezone}` : ``}>`; }
     protected static [Symbol.toStringTag] = ((proto: Timestamp_) => {
         (<any>proto).unit = null;
@@ -453,9 +448,8 @@ interface Interval_<T extends Intervals = Intervals> extends DataType<T> {
 /** @ignore */
 class Interval_<T extends Intervals = Intervals> extends DataType<T> {
     constructor(public readonly unit: IntervalUnit) {
-        super();
+        super(Type.Interval as T);
     }
-    public get typeId() { return Type.Interval as T; }
     public toString() { return `Interval<${IntervalUnit[this.unit]}>`; }
     protected static [Symbol.toStringTag] = ((proto: Interval_) => {
         (<any>proto).unit = null;
@@ -483,9 +477,8 @@ export interface Duration<T extends Durations = Durations> extends DataType<T> {
 /** @ignore */
 export class Duration<T extends Durations = Durations> extends DataType<T> {
     constructor(public readonly unit: TimeUnit) {
-        super();
+        super(Type.Duration as T);
     }
-    public get typeId() { return Type.Duration as T; }
     public toString() { return `Duration<${TimeUnit[this.unit]}>`; }
     protected static [Symbol.toStringTag] = ((proto: Duration) => {
         (<any>proto).unit = null;
@@ -513,11 +506,10 @@ export interface List<T extends DataType = any> extends DataType<Type.List, { [0
 /** @ignore */
 export class List<T extends DataType = any> extends DataType<Type.List, { [0]: T }> {
     constructor(child: Field<T>) {
-        super();
+        super(Type.List);
         this.children = [child];
     }
     public declare readonly children: Field<T>[];
-    public get typeId() { return Type.List as Type.List; }
     public toString() { return `List<${this.valueType}>`; }
     public get valueType(): T { return this.children[0].type as T; }
     public get valueField(): Field<T> { return this.children[0] as Field<T>; }
@@ -540,10 +532,9 @@ export class Struct<T extends TypeMap = any> extends DataType<Type.Struct, T> {
     public declare _row: StructRow<T>;
     public declare readonly children: Field<T[keyof T]>[];
     constructor(children: Field<T[keyof T]>[]) {
-        super();
+        super(Type.Struct);
         this.children = children;
     }
-    public get typeId() { return Type.Struct as Type.Struct; }
     public toString() { return `Struct<{${this.children.map((f) => `${f.name}:${f.type}`).join(`, `)}}>`; }
     protected static [Symbol.toStringTag] = ((proto: Struct) => {
         (<any>proto).children = null;
@@ -564,13 +555,12 @@ class Union_<T extends Unions = Unions> extends DataType<T> {
     constructor(mode: UnionMode,
         typeIds: number[] | Int32Array,
         children: Field<any>[]) {
-        super();
+        super(Type.Union as T);
         this.mode = mode;
         this.children = children;
         this.typeIds = typeIds = Int32Array.from(typeIds);
         this.typeIdToChildIndex = typeIds.reduce((typeIdToChildIndex, typeId, idx) => (typeIdToChildIndex[typeId] = idx) && typeIdToChildIndex || typeIdToChildIndex, Object.create(null) as { [key: number]: number });
     }
-    public get typeId() { return Type.Union as T; }
     public toString() {
         return `${this[Symbol.toStringTag]}<${this.children.map((x) => `${x.type}`).join(` | `)
             }>`;
@@ -611,9 +601,8 @@ export interface FixedSizeBinary extends DataType<Type.FixedSizeBinary> {
 /** @ignore */
 export class FixedSizeBinary extends DataType<Type.FixedSizeBinary> {
     constructor(public readonly byteWidth: number) {
-        super();
+        super(Type.FixedSizeBinary);
     }
-    public get typeId() { return Type.FixedSizeBinary as Type.FixedSizeBinary; }
     public toString() { return `FixedSizeBinary[${this.byteWidth}]`; }
     protected static [Symbol.toStringTag] = ((proto: FixedSizeBinary) => {
         (<any>proto).byteWidth = null;
@@ -632,10 +621,9 @@ export interface FixedSizeList<T extends DataType = any> extends DataType<Type.F
 export class FixedSizeList<T extends DataType = any> extends DataType<Type.FixedSizeList, { [0]: T }> {
     public declare readonly children: Field<T>[];
     constructor(public readonly listSize: number, child: Field<T>) {
-        super();
+        super(Type.FixedSizeList);
         this.children = [child];
     }
-    public get typeId() { return Type.FixedSizeList as Type.FixedSizeList; }
     public get valueType(): T { return this.children[0].type as T; }
     public get valueField(): Field<T> { return this.children[0] as Field<T>; }
     public get ArrayType(): T['ArrayType'] { return this.valueType.ArrayType; }
@@ -657,7 +645,7 @@ export interface Map_<TKey extends DataType = any, TValue extends DataType = any
 /** @ignore */
 export class Map_<TKey extends DataType = any, TValue extends DataType = any> extends DataType<Type.Map, { [0]: Struct<{ key: TKey; value: TValue }> }> {
     constructor(entries: Field<Struct<{ key: TKey; value: TValue }>>, keysSorted = false) {
-        super();
+        super(Type.Map);
         this.children = [entries];
         this.keysSorted = keysSorted;
         // ARROW-8716
@@ -678,7 +666,6 @@ export class Map_<TKey extends DataType = any, TValue extends DataType = any> ex
     }
     public declare readonly keysSorted: boolean;
     public declare readonly children: Field<Struct<{ key: TKey; value: TValue }>>[];
-    public get typeId() { return Type.Map as Type.Map; }
     public get keyType(): TKey { return this.children[0].type.children[0].type as TKey; }
     public get valueType(): TValue { return this.children[0].type.children[1].type as TValue; }
     public get childType() { return this.children[0].type as Struct<{ key: TKey; value: TValue }>; }
@@ -709,13 +696,12 @@ export class Dictionary<T extends DataType = any, TKey extends TKeys = TKeys> ex
     public declare readonly dictionary: T;
     public declare readonly isOrdered: boolean;
     constructor(dictionary: T, indices: TKey, id?: bigint | number | null, isOrdered?: boolean | null) {
-        super();
+        super(Type.Dictionary);
         this.indices = indices;
         this.dictionary = dictionary;
         this.isOrdered = isOrdered || false;
         this.id = id == null ? getId() : bigIntToNumber(id);
     }
-    public get typeId() { return Type.Dictionary as Type.Dictionary; }
     public get children() { return this.dictionary.children; }
     public get valueType(): T { return this.dictionary as T; }
     public get ArrayType(): T['ArrayType'] { return this.dictionary.ArrayType; }
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 3f810d27271e5..2df1e67b9f4c7 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -332,22 +332,6 @@ if(PYARROW_BUILD_PARQUET OR PYARROW_BUILD_PARQUET_ENCRYPTION)
   find_package(Parquet REQUIRED)
 endif()
 
-if(PYARROW_BUILD_PARQUET_ENCRYPTION)
-  if(PARQUET_REQUIRE_ENCRYPTION)
-    list(APPEND PYARROW_CPP_SRCS ${PYARROW_CPP_SOURCE_DIR}/parquet_encryption.cc)
-    if(ARROW_BUILD_SHARED)
-      list(APPEND PYARROW_CPP_LINK_LIBS Parquet::parquet_shared)
-    else()
-      list(APPEND PYARROW_CPP_LINK_LIBS Parquet::parquet_static)
-    endif()
-    message(STATUS "Parquet Encryption Enabled")
-  else()
-    message(FATAL_ERROR "You must build Arrow C++ with PARQUET_REQUIRE_ENCRYPTION=ON")
-  endif()
-else()
-  message(STATUS "Parquet Encryption is NOT Enabled")
-endif()
-
 if(PYARROW_BUILD_HDFS)
   if(NOT ARROW_HDFS)
     message(FATAL_ERROR "You must build Arrow C++ with ARROW_HDFS=ON")
@@ -391,6 +375,26 @@ install(TARGETS arrow_python
         LIBRARY DESTINATION .
         RUNTIME DESTINATION .)
 
+set(PYARROW_CPP_ENCRYPTION_SRCS ${PYARROW_CPP_SOURCE_DIR}/parquet_encryption.cc)
+if(NOT PYARROW_BUILD_PARQUET_ENCRYPTION)
+  message(STATUS "Parquet Encryption is NOT Enabled")
+else()
+  if(PARQUET_REQUIRE_ENCRYPTION)
+    add_library(arrow_python_parquet_encryption SHARED ${PYARROW_CPP_ENCRYPTION_SRCS})
+    target_link_libraries(arrow_python_parquet_encryption PUBLIC arrow_python
+                                                                 ${PARQUET_LINK_LIBS})
+    target_compile_definitions(arrow_python_parquet_encryption
+                               PRIVATE ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORTING)
+    install(TARGETS arrow_python_parquet_encryption
+            ARCHIVE DESTINATION .
+            LIBRARY DESTINATION .
+            RUNTIME DESTINATION .)
+    message(STATUS "Parquet Encryption Enabled")
+  else()
+    message(FATAL_ERROR "You must build Arrow C++ with PARQUET_REQUIRE_ENCRYPTION=ON")
+  endif()
+endif()
+
 set(PYARROW_CPP_FLIGHT_SRCS ${PYARROW_CPP_SOURCE_DIR}/flight.cc)
 if(PYARROW_BUILD_FLIGHT)
   if(NOT ARROW_FLIGHT)
@@ -814,6 +818,6 @@ endif()
 if(PYARROW_BUILD_PARQUET)
   target_link_libraries(_parquet PRIVATE ${PARQUET_LINK_LIBS})
   if(PYARROW_BUILD_PARQUET_ENCRYPTION)
-    target_link_libraries(_parquet_encryption PRIVATE ${PARQUET_LINK_LIBS})
+    target_link_libraries(_parquet_encryption PRIVATE arrow_python_parquet_encryption)
   endif()
 endif()
diff --git a/python/benchmarks/parquet.py b/python/benchmarks/parquet.py
index 3aeca425bc8f0..e459ea2c369b4 100644
--- a/python/benchmarks/parquet.py
+++ b/python/benchmarks/parquet.py
@@ -29,35 +29,6 @@
     pq = None
 
 
-class ParquetManifestCreation(object):
-    """Benchmark creating a parquet manifest."""
-
-    size = 10 ** 6
-    tmpdir = None
-
-    param_names = ('num_partitions', 'num_threads')
-    params = [(10, 100, 1000), (1, 8)]
-
-    def setup(self, num_partitions, num_threads):
-        if pq is None:
-            raise NotImplementedError("Parquet support not enabled")
-
-        self.tmpdir = tempfile.mkdtemp('benchmark_parquet')
-        rnd = np.random.RandomState(42)
-        num1 = rnd.randint(0, num_partitions, size=self.size)
-        num2 = rnd.randint(0, 1000, size=self.size)
-        output_df = pd.DataFrame({'num1': num1, 'num2': num2})
-        output_table = pa.Table.from_pandas(output_df)
-        pq.write_to_dataset(output_table, self.tmpdir, ['num1'])
-
-    def teardown(self, num_partitions, num_threads):
-        if self.tmpdir is not None:
-            shutil.rmtree(self.tmpdir)
-
-    def time_manifest_creation(self, num_partitions, num_threads):
-        pq.ParquetManifest(self.tmpdir, metadata_nthreads=num_threads)
-
-
 class ParquetWriteBinary(object):
 
     def setup(self):
diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py
index 852b339211b0d..98a4b2a1138c7 100644
--- a/python/pyarrow/parquet/core.py
+++ b/python/pyarrow/parquet/core.py
@@ -17,22 +17,17 @@
 
 
 from collections import defaultdict
-from concurrent import futures
 from contextlib import nullcontext
-from functools import partial, reduce
+from functools import reduce
 
 import inspect
 import json
-from collections.abc import Collection
-import numpy as np
 import os
 import re
 import operator
-import urllib.parse
 import warnings
 
 import pyarrow as pa
-import pyarrow.lib as lib
 
 try:
     import pyarrow._parquet as _parquet
@@ -55,28 +50,6 @@
 from pyarrow import filesystem as legacyfs
 from pyarrow.util import guid, _is_path_like, _stringify_path, _deprecate_api
 
-_URI_STRIP_SCHEMES = ('hdfs',)
-
-
-def _parse_uri(path):
-    path = _stringify_path(path)
-    parsed_uri = urllib.parse.urlparse(path)
-    if parsed_uri.scheme in _URI_STRIP_SCHEMES:
-        return parsed_uri.path
-    else:
-        # ARROW-4073: On Windows returning the path with the scheme
-        # stripped removes the drive letter, if any
-        return path
-
-
-def _get_filesystem_and_path(passed_filesystem, path):
-    if passed_filesystem is None:
-        return legacyfs.resolve_filesystem_and_path(path, passed_filesystem)
-    else:
-        passed_filesystem = legacyfs._ensure_filesystem(passed_filesystem)
-        parsed_path = _parse_uri(path)
-        return passed_filesystem, parsed_path
-
 
 def _check_contains_null(val):
     if isinstance(val, bytes):
@@ -1148,516 +1121,15 @@ def _get_pandas_index_columns(keyvalues):
             ['index_columns'])
 
 
-# ----------------------------------------------------------------------
-# Metadata container providing instructions about reading a single Parquet
-# file, possibly part of a partitioned dataset
-
-
-class ParquetDatasetPiece:
-    """
-    DEPRECATED: A single chunk of a potentially larger Parquet dataset to read.
-
-    The arguments will indicate to read either a single row group or all row
-    groups, and whether to add partition keys to the resulting pyarrow.Table.
-
-    .. deprecated:: 5.0
-        Directly constructing a ``ParquetDatasetPiece`` is deprecated, as well
-        as accessing the pieces of a ``ParquetDataset`` object. Specify
-        ``use_legacy_dataset=False`` when constructing the ``ParquetDataset``
-        and use the ``ParquetDataset.fragments`` attribute instead.
-
-    Parameters
-    ----------
-    path : str or pathlib.Path
-        Path to file in the file system where this piece is located.
-    open_file_func : callable
-        Function to use for obtaining file handle to dataset piece.
-    file_options : dict
-        Options
-    row_group : int, default None
-        Row group to load. By default, reads all row groups.
-    partition_keys : list of tuples
-        Two-element tuples of ``(column name, ordinal index)``.
-    """
-
-    def __init__(self, path, open_file_func=partial(open, mode='rb'),
-                 file_options=None, row_group=None, partition_keys=None):
-        warnings.warn(
-            "ParquetDatasetPiece is deprecated as of pyarrow 5.0.0 and will "
-            "be removed in a future version.",
-            FutureWarning, stacklevel=2)
-        self._init(
-            path, open_file_func, file_options, row_group, partition_keys)
-
-    @staticmethod
-    def _create(path, open_file_func=partial(open, mode='rb'),
-                file_options=None, row_group=None, partition_keys=None):
-        self = ParquetDatasetPiece.__new__(ParquetDatasetPiece)
-        self._init(
-            path, open_file_func, file_options, row_group, partition_keys)
-        return self
-
-    def _init(self, path, open_file_func, file_options, row_group,
-              partition_keys):
-        self.path = _stringify_path(path)
-        self.open_file_func = open_file_func
-        self.row_group = row_group
-        self.partition_keys = partition_keys or []
-        self.file_options = file_options or {}
-
-    def __eq__(self, other):
-        if not isinstance(other, ParquetDatasetPiece):
-            return False
-        return (self.path == other.path and
-                self.row_group == other.row_group and
-                self.partition_keys == other.partition_keys)
-
-    def __repr__(self):
-        return ('{}({!r}, row_group={!r}, partition_keys={!r})'
-                .format(type(self).__name__, self.path,
-                        self.row_group,
-                        self.partition_keys))
-
-    def __str__(self):
-        result = ''
-
-        if len(self.partition_keys) > 0:
-            partition_str = ', '.join('{}={}'.format(name, index)
-                                      for name, index in self.partition_keys)
-            result += 'partition[{}] '.format(partition_str)
-
-        result += self.path
-
-        if self.row_group is not None:
-            result += ' | row_group={}'.format(self.row_group)
-
-        return result
-
-    def get_metadata(self):
-        """
-        Return the file's metadata.
-
-        Returns
-        -------
-        metadata : FileMetaData
-            The file's metadata
-        """
-        with self.open() as parquet:
-            return parquet.metadata
-
-    def open(self):
-        """
-        Return instance of ParquetFile.
-        """
-        reader = self.open_file_func(self.path)
-        if not isinstance(reader, ParquetFile):
-            reader = ParquetFile(reader, **self.file_options)
-
-        # ensure reader knows it's responsible for closing source
-        # since we opened the source here internally.
-        reader._close_source = True
-        return reader
-
-    def read(self, columns=None, use_threads=True, partitions=None,
-             file=None, use_pandas_metadata=False):
-        """
-        Read this piece as a pyarrow.Table.
-
-        Parameters
-        ----------
-        columns : list of column names, default None
-        use_threads : bool, default True
-            Perform multi-threaded column reads.
-        partitions : ParquetPartitions, default None
-        file : file-like object
-            Passed to ParquetFile.
-        use_pandas_metadata : bool
-            If pandas metadata should be used or not.
-
-        Returns
-        -------
-        table : pyarrow.Table
-            The piece as a pyarrow.Table.
-        """
-        if self.open_file_func is not None:
-            reader = self.open()
-        elif file is not None:
-            reader = ParquetFile(file, **self.file_options)
-        else:
-            # try to read the local path
-            reader = ParquetFile(self.path, **self.file_options)
-
-        options = dict(columns=columns,
-                       use_threads=use_threads,
-                       use_pandas_metadata=use_pandas_metadata)
-
-        if self.row_group is not None:
-            table = reader.read_row_group(self.row_group, **options)
-        else:
-            table = reader.read(**options)
-
-        if len(self.partition_keys) > 0:
-            if partitions is None:
-                raise ValueError('Must pass partition sets')
-
-            # Here, the index is the categorical code of the partition where
-            # this piece is located. Suppose we had
-            #
-            # /foo=a/0.parq
-            # /foo=b/0.parq
-            # /foo=c/0.parq
-            #
-            # Then we assign a=0, b=1, c=2. And the resulting Table pieces will
-            # have a DictionaryArray column named foo having the constant index
-            # value as indicated. The distinct categories of the partition have
-            # been computed in the ParquetManifest
-            for i, (name, index) in enumerate(self.partition_keys):
-                # The partition code is the same for all values in this piece
-                indices = np.full(len(table), index, dtype='i4')
-
-                # This is set of all partition values, computed as part of the
-                # manifest, so ['a', 'b', 'c'] as in our example above.
-                dictionary = partitions.levels[i].dictionary
-
-                arr = pa.DictionaryArray.from_arrays(indices, dictionary)
-                table = table.append_column(name, arr)
-
-        # To ParquetFile the source looked like it was already open, so won't
-        # actually close it without overriding.
-        reader.close(force=True)
-        return table
-
-
-class PartitionSet:
-    """
-    A data structure for cataloguing the observed Parquet partitions at a
-    particular level. So if we have
-
-    /foo=a/bar=0
-    /foo=a/bar=1
-    /foo=a/bar=2
-    /foo=b/bar=0
-    /foo=b/bar=1
-    /foo=b/bar=2
-
-    Then we have two partition sets, one for foo, another for bar. As we visit
-    levels of the partition hierarchy, a PartitionSet tracks the distinct
-    values and assigns categorical codes to use when reading the pieces
-
-    Parameters
-    ----------
-    name : str
-        Name of the partition set. Under which key to collect all values.
-    keys : list
-        All possible values that have been collected for that partition set.
-    """
-
-    def __init__(self, name, keys=None):
-        self.name = name
-        self.keys = keys or []
-        self.key_indices = {k: i for i, k in enumerate(self.keys)}
-        self._dictionary = None
-
-    def get_index(self, key):
-        """
-        Get the index of the partition value if it is known, otherwise assign
-        one
-
-        Parameters
-        ----------
-        key : str or int
-            The value for which we want to known the index.
-        """
-        if key in self.key_indices:
-            return self.key_indices[key]
-        else:
-            index = len(self.key_indices)
-            self.keys.append(key)
-            self.key_indices[key] = index
-            return index
-
-    @property
-    def dictionary(self):
-        if self._dictionary is not None:
-            return self._dictionary
-
-        if len(self.keys) == 0:
-            raise ValueError('No known partition keys')
-
-        # Only integer and string partition types are supported right now
-        try:
-            integer_keys = [int(x) for x in self.keys]
-            dictionary = lib.array(integer_keys)
-        except ValueError:
-            dictionary = lib.array(self.keys)
-
-        self._dictionary = dictionary
-        return dictionary
-
-    @property
-    def is_sorted(self):
-        return list(self.keys) == sorted(self.keys)
-
-
-class ParquetPartitions:
-
-    def __init__(self):
-        self.levels = []
-        self.partition_names = set()
-
-    def __len__(self):
-        return len(self.levels)
-
-    def __getitem__(self, i):
-        return self.levels[i]
-
-    def equals(self, other):
-        if not isinstance(other, ParquetPartitions):
-            raise TypeError('`other` must be an instance of ParquetPartitions')
-
-        return (self.levels == other.levels and
-                self.partition_names == other.partition_names)
-
-    def __eq__(self, other):
-        try:
-            return self.equals(other)
-        except TypeError:
-            return NotImplemented
-
-    def get_index(self, level, name, key):
-        """
-        Record a partition value at a particular level, returning the distinct
-        code for that value at that level.
-
-        Examples
-        --------
-
-        partitions.get_index(1, 'foo', 'a') returns 0
-        partitions.get_index(1, 'foo', 'b') returns 1
-        partitions.get_index(1, 'foo', 'c') returns 2
-        partitions.get_index(1, 'foo', 'a') returns 0
-
-        Parameters
-        ----------
-        level : int
-            The nesting level of the partition we are observing
-        name : str
-            The partition name
-        key : str or int
-            The partition value
-        """
-        if level == len(self.levels):
-            if name in self.partition_names:
-                raise ValueError('{} was the name of the partition in '
-                                 'another level'.format(name))
-
-            part_set = PartitionSet(name)
-            self.levels.append(part_set)
-            self.partition_names.add(name)
-
-        return self.levels[level].get_index(key)
-
-    def filter_accepts_partition(self, part_key, filter, level):
-        p_column, p_value_index = part_key
-        f_column, op, f_value = filter
-        if p_column != f_column:
-            return True
-
-        f_type = type(f_value)
-
-        if op in {'in', 'not in'}:
-            if not isinstance(f_value, Collection):
-                raise TypeError(
-                    "'%s' object is not a collection", f_type.__name__)
-            if not f_value:
-                raise ValueError("Cannot use empty collection as filter value")
-            if len({type(item) for item in f_value}) != 1:
-                raise ValueError("All elements of the collection '%s' must be"
-                                 " of same type", f_value)
-            f_type = type(next(iter(f_value)))
-
-        elif not isinstance(f_value, str) and isinstance(f_value, Collection):
-            raise ValueError(
-                "Op '%s' not supported with a collection value", op)
-
-        p_value = f_type(self.levels[level]
-                         .dictionary[p_value_index].as_py())
-
-        if op == "=" or op == "==":
-            return p_value == f_value
-        elif op == "!=":
-            return p_value != f_value
-        elif op == '<':
-            return p_value < f_value
-        elif op == '>':
-            return p_value > f_value
-        elif op == '<=':
-            return p_value <= f_value
-        elif op == '>=':
-            return p_value >= f_value
-        elif op == 'in':
-            return p_value in f_value
-        elif op == 'not in':
-            return p_value not in f_value
-        else:
-            raise ValueError("'%s' is not a valid operator in predicates.",
-                             filter[1])
-
-
-class ParquetManifest:
-
-    def __init__(self, dirpath, open_file_func=None, filesystem=None,
-                 pathsep='/', partition_scheme='hive', metadata_nthreads=1):
-        filesystem, dirpath = _get_filesystem_and_path(filesystem, dirpath)
-        self.filesystem = filesystem
-        self.open_file_func = open_file_func
-        self.pathsep = pathsep
-        self.dirpath = _stringify_path(dirpath)
-        self.partition_scheme = partition_scheme
-        self.partitions = ParquetPartitions()
-        self.pieces = []
-        self._metadata_nthreads = metadata_nthreads
-        self._thread_pool = futures.ThreadPoolExecutor(
-            max_workers=metadata_nthreads)
-
-        self.common_metadata_path = None
-        self.metadata_path = None
-
-        self._visit_level(0, self.dirpath, [])
-
-        # Due to concurrency, pieces will potentially by out of order if the
-        # dataset is partitioned so we sort them to yield stable results
-        self.pieces.sort(key=lambda piece: piece.path)
-
-        if self.common_metadata_path is None:
-            # _common_metadata is a subset of _metadata
-            self.common_metadata_path = self.metadata_path
-
-        self._thread_pool.shutdown()
-
-    def _visit_level(self, level, base_path, part_keys):
-        fs = self.filesystem
-
-        _, directories, files = next(fs.walk(base_path))
-
-        filtered_files = []
-        for path in files:
-            full_path = self.pathsep.join((base_path, path))
-            if path.endswith('_common_metadata'):
-                self.common_metadata_path = full_path
-            elif path.endswith('_metadata'):
-                self.metadata_path = full_path
-            elif self._should_silently_exclude(path):
-                continue
-            else:
-                filtered_files.append(full_path)
-
-        # ARROW-1079: Filter out "private" directories starting with underscore
-        filtered_directories = [self.pathsep.join((base_path, x))
-                                for x in directories
-                                if not _is_private_directory(x)]
-
-        filtered_files.sort()
-        filtered_directories.sort()
-
-        if len(filtered_files) > 0 and len(filtered_directories) > 0:
-            raise ValueError('Found files in an intermediate '
-                             'directory: {}'.format(base_path))
-        elif len(filtered_directories) > 0:
-            self._visit_directories(level, filtered_directories, part_keys)
-        else:
-            self._push_pieces(filtered_files, part_keys)
-
-    def _should_silently_exclude(self, file_name):
-        return (file_name.endswith('.crc') or  # Checksums
-                file_name.endswith('_$folder$') or  # HDFS directories in S3
-                file_name.startswith('.') or  # Hidden files starting with .
-                file_name.startswith('_') or  # Hidden files starting with _
-                file_name in EXCLUDED_PARQUET_PATHS)
-
-    def _visit_directories(self, level, directories, part_keys):
-        futures_list = []
-        for path in directories:
-            head, tail = _path_split(path, self.pathsep)
-            name, key = _parse_hive_partition(tail)
-
-            index = self.partitions.get_index(level, name, key)
-            dir_part_keys = part_keys + [(name, index)]
-            # If you have less threads than levels, the wait call will block
-            # indefinitely due to multiple waits within a thread.
-            if level < self._metadata_nthreads:
-                future = self._thread_pool.submit(self._visit_level,
-                                                  level + 1,
-                                                  path,
-                                                  dir_part_keys)
-                futures_list.append(future)
-            else:
-                self._visit_level(level + 1, path, dir_part_keys)
-        if futures_list:
-            futures.wait(futures_list)
-
-    def _parse_partition(self, dirname):
-        if self.partition_scheme == 'hive':
-            return _parse_hive_partition(dirname)
-        else:
-            raise NotImplementedError('partition schema: {}'
-                                      .format(self.partition_scheme))
-
-    def _push_pieces(self, files, part_keys):
-        self.pieces.extend([
-            ParquetDatasetPiece._create(path, partition_keys=part_keys,
-                                        open_file_func=self.open_file_func)
-            for path in files
-        ])
-
-
-def _parse_hive_partition(value):
-    if '=' not in value:
-        raise ValueError('Directory name did not appear to be a '
-                         'partition: {}'.format(value))
-    return value.split('=', 1)
-
-
-def _is_private_directory(x):
-    _, tail = os.path.split(x)
-    return (tail.startswith('_') or tail.startswith('.')) and '=' not in tail
-
-
-def _path_split(path, sep):
-    i = path.rfind(sep) + 1
-    head, tail = path[:i], path[i:]
-    head = head.rstrip(sep)
-    return head, tail
-
-
 EXCLUDED_PARQUET_PATHS = {'_SUCCESS'}
 
 
-class _ParquetDatasetMetadata:
-    __slots__ = ('fs', 'memory_map', 'read_dictionary', 'common_metadata',
-                 'buffer_size')
-
-
-def _open_dataset_file(dataset, path, meta=None):
-    if (dataset.fs is not None and
-            not isinstance(dataset.fs, legacyfs.LocalFileSystem)):
-        path = dataset.fs.open(path, mode='rb')
-    return ParquetFile(
-        path,
-        metadata=meta,
-        memory_map=dataset.memory_map,
-        read_dictionary=dataset.read_dictionary,
-        common_metadata=dataset.common_metadata,
-        buffer_size=dataset.buffer_size
+def _is_local_file_system(fs):
+    return isinstance(fs, LocalFileSystem) or isinstance(
+        fs, legacyfs.LocalFileSystem
     )
 
 
-_DEPR_MSG = (
-    "'{}' attribute is deprecated as of pyarrow 5.0.0 and will be removed "
-    "in a future version.{}"
-)
-
-
 _read_docstring_common = """\
 read_dictionary : list, default None
     List of names or column paths (for nested types) to read directly
@@ -1680,6 +1152,7 @@ def _open_dataset_file(dataset, path, meta=None):
     you need to specify the field names or a full schema. See the
     ``pyarrow.dataset.partitioning()`` function for more details."""
 
+
 _parquet_dataset_example = """\
 Generate an example PyArrow Table and write it to a partitioned dataset:
 
@@ -1688,15 +1161,13 @@ def _open_dataset_file(dataset, path, meta=None):
 ...                   'n_legs': [2, 2, 4, 4, 5, 100],
 ...                   'animal': ["Flamingo", "Parrot", "Dog", "Horse",
 ...                              "Brittle stars", "Centipede"]})
-
 >>> import pyarrow.parquet as pq
->>> pq.write_to_dataset(table, root_path='dataset_name',
-...                     partition_cols=['year'],
-...                     use_legacy_dataset=False)
+>>> pq.write_to_dataset(table, root_path='dataset_v2',
+...                     partition_cols=['year'])
 
 create a ParquetDataset object from the dataset source:
 
->>> dataset = pq.ParquetDataset('dataset_name/', use_legacy_dataset=False)
+>>> dataset = pq.ParquetDataset('dataset_v2/')
 
 and read the data:
 
@@ -1711,7 +1182,7 @@ def _open_dataset_file(dataset, path, meta=None):
 
 create a ParquetDataset object with filter:
 
->>> dataset = pq.ParquetDataset('dataset_name/', use_legacy_dataset=False,
+>>> dataset = pq.ParquetDataset('dataset_v2/',
 ...                             filters=[('n_legs','=',4)])
 >>> dataset.read().to_pandas()
    n_legs animal  year
@@ -1721,7 +1192,6 @@ def _open_dataset_file(dataset, path, meta=None):
 
 
 class ParquetDataset:
-
     __doc__ = """
 Encapsulates details of reading a complete Parquet dataset possibly
 consisting of multiple files and partitions in subdirectories.
@@ -1735,39 +1205,26 @@ class ParquetDataset:
     Path will try to be found in the local on-disk filesystem otherwise
     it will be parsed as an URI to determine the filesystem.
 schema : pyarrow.parquet.Schema
-    Use schema obtained elsewhere to validate file schemas. Alternative to
-    metadata parameter.
-metadata : pyarrow.parquet.FileMetaData
-    Use metadata obtained elsewhere to validate file schemas.
-split_row_groups : bool, default False
-    Divide files into pieces for each row group in the file.
-validate_schema : bool, default True
-    Check that individual file schemas are all the same / compatible.
+    Optionally provide the Schema for the Dataset, in which case it will
+    not be inferred from the source.
 filters : pyarrow.compute.Expression or List[Tuple] or List[List[Tuple]], default None
     Rows which do not match the filter predicate will be removed from scanned
     data. Partition keys embedded in a nested directory structure will be
     exploited to avoid loading files at all if they contain no matching rows.
-    If `use_legacy_dataset` is True, filters can only reference partition
-    keys and only a hive-style directory structure is supported. When
-    setting `use_legacy_dataset` to False, also within-file level filtering
-    and different partitioning schemes are supported.
+    Within-file level filtering and different partitioning schemes are supported.
 
     {1}
-metadata_nthreads : int, default 1
-    How many threads to allow the thread pool which is used to read the
-    dataset metadata. Increasing this is helpful to read partitioned
-    datasets.
 {0}
-use_legacy_dataset : bool, default False
-    Set to False to enable the new code path (using the
-    new Arrow Dataset API). Among other things, this allows to pass
-    `filters` for all columns and not only the partition keys, enables
-    different partitioning schemes, etc.
+ignore_prefixes : list, optional
+    Files matching any of these prefixes will be ignored by the
+    discovery process.
+    This is matched to the basename of a path.
+    By default this is ['.', '_'].
+    Note that discovery happens only if a directory is passed as source.
 pre_buffer : bool, default True
     Coalesce and issue file reads in parallel to improve performance on
     high-latency filesystems (e.g. S3, GCS). If True, Arrow will use a
-    background I/O thread pool. This option is only supported for
-    use_legacy_dataset=False. If using a filesystem layer that itself
+    background I/O thread pool. If using a filesystem layer that itself
     performs readahead (e.g. fsspec's S3FS), disable readahead for best
     results. Set to False if you want to prioritize minimal memory usage
     over maximum speed.
@@ -1775,6 +1232,10 @@ class ParquetDataset:
     Cast timestamps that are stored in INT96 format to a particular resolution
     (e.g. 'ms'). Setting to None is equivalent to 'ns' and therefore INT96
     timestamps will be inferred as timestamps in nanoseconds.
+decryption_properties : FileDecryptionProperties or None
+    File-level decryption properties.
+    The decryption properties can be created using
+    ``CryptoFactory.file_decryption_properties()``.
 thrift_string_size_limit : int, default None
     If not None, override the maximum total string size allocated
     when decoding Thrift structures. The default limit should be
@@ -1785,739 +1246,95 @@ class ParquetDataset:
     sufficient for most Parquet files.
 page_checksum_verification : bool, default False
     If True, verify the page checksum for each page read from the file.
+use_legacy_dataset : bool, optional
+    Deprecated and has no effect from PyArrow version 15.0.0.
 
 Examples
 --------
 {2}
 """.format(_read_docstring_common, _DNF_filter_doc, _parquet_dataset_example)
 
-    def __new__(cls, path_or_paths=None, filesystem=None, schema=None,
-                metadata=None, split_row_groups=False, validate_schema=True,
-                filters=None, metadata_nthreads=None, read_dictionary=None,
-                memory_map=False, buffer_size=0, partitioning="hive",
-                use_legacy_dataset=None, pre_buffer=True,
-                coerce_int96_timestamp_unit=None,
-                thrift_string_size_limit=None,
-                thrift_container_size_limit=None,
-                page_checksum_verification=False):
-
-        extra_msg = ""
-        if use_legacy_dataset is None:
-            # if an old filesystem is passed -> still use to old implementation
-            if isinstance(filesystem, legacyfs.FileSystem):
-                use_legacy_dataset = True
-                extra_msg = (
-                    " The legacy behaviour was still chosen because a "
-                    "deprecated 'pyarrow.filesystem' filesystem was specified "
-                    "(use the filesystems from pyarrow.fs instead)."
-                )
-            # otherwise the default is already False
-            else:
-                use_legacy_dataset = False
-
-        if not use_legacy_dataset:
-            return _ParquetDatasetV2(
-                path_or_paths, filesystem=filesystem,
-                filters=filters,
-                partitioning=partitioning,
-                read_dictionary=read_dictionary,
-                memory_map=memory_map,
-                buffer_size=buffer_size,
-                pre_buffer=pre_buffer,
-                coerce_int96_timestamp_unit=coerce_int96_timestamp_unit,
-                # unsupported keywords
-                schema=schema, metadata=metadata,
-                split_row_groups=split_row_groups,
-                validate_schema=validate_schema,
-                metadata_nthreads=metadata_nthreads,
-                thrift_string_size_limit=thrift_string_size_limit,
-                thrift_container_size_limit=thrift_container_size_limit,
-                page_checksum_verification=page_checksum_verification,
-            )
-        warnings.warn(
-            "Passing 'use_legacy_dataset=True' to get the legacy behaviour is "
-            "deprecated as of pyarrow 11.0.0, and the legacy implementation "
-            "will be removed in a future version." + extra_msg,
-            FutureWarning, stacklevel=2)
-        self = object.__new__(cls)
-        return self
-
-    def __init__(self, path_or_paths, filesystem=None, schema=None,
-                 metadata=None, split_row_groups=False, validate_schema=True,
-                 filters=None, metadata_nthreads=None, read_dictionary=None,
-                 memory_map=False, buffer_size=0, partitioning="hive",
-                 use_legacy_dataset=None, pre_buffer=True,
+    def __init__(self, path_or_paths, filesystem=None, schema=None, *, filters=None,
+                 read_dictionary=None, memory_map=False, buffer_size=None,
+                 partitioning="hive", ignore_prefixes=None, pre_buffer=True,
                  coerce_int96_timestamp_unit=None,
-                 thrift_string_size_limit=None,
+                 decryption_properties=None, thrift_string_size_limit=None,
                  thrift_container_size_limit=None,
-                 page_checksum_verification=False):
-        if partitioning != "hive":
-            raise ValueError(
-                'Only "hive" for hive-like partitioning is supported when '
-                'using use_legacy_dataset=True')
-        if metadata_nthreads is not None:
-            warnings.warn(
-                "Specifying the 'metadata_nthreads' argument is deprecated as "
-                "of pyarrow 8.0.0, and the argument will be removed in a "
-                "future version",
-                FutureWarning, stacklevel=2,
-            )
-        else:
-            metadata_nthreads = 1
-
-        self._ds_metadata = _ParquetDatasetMetadata()
-        a_path = path_or_paths
-        if isinstance(a_path, list):
-            a_path = a_path[0]
-
-        self._ds_metadata.fs, _ = _get_filesystem_and_path(filesystem, a_path)
-        if isinstance(path_or_paths, list):
-            self.paths = [_parse_uri(path) for path in path_or_paths]
-        else:
-            self.paths = _parse_uri(path_or_paths)
-
-        self._ds_metadata.read_dictionary = read_dictionary
-        self._ds_metadata.memory_map = memory_map
-        self._ds_metadata.buffer_size = buffer_size
-
-        (self._pieces,
-         self._partitions,
-         self._common_metadata_path,
-         self._metadata_path) = _make_manifest(
-             path_or_paths, self._fs, metadata_nthreads=metadata_nthreads,
-             open_file_func=partial(_open_dataset_file, self._ds_metadata)
-        )
-
-        if self._common_metadata_path is not None:
-            with self._fs.open(self._common_metadata_path) as f:
-                self._ds_metadata.common_metadata = read_metadata(
-                    f,
-                    memory_map=memory_map
-                )
-        else:
-            self._ds_metadata.common_metadata = None
+                 page_checksum_verification=False,
+                 use_legacy_dataset=None):
 
-        if metadata is not None:
+        if use_legacy_dataset is not None:
             warnings.warn(
-                "Specifying the 'metadata' argument with 'use_legacy_dataset="
-                "True' is deprecated as of pyarrow 8.0.0.",
+                "Passing 'use_legacy_dataset' is deprecated as of pyarrow 15.0.0 "
+                "and will be removed in a future version.",
                 FutureWarning, stacklevel=2)
 
-        if metadata is None and self._metadata_path is not None:
-            with self._fs.open(self._metadata_path) as f:
-                self._metadata = read_metadata(f, memory_map=memory_map)
-        else:
-            self._metadata = metadata
-
-        if schema is not None:
-            warnings.warn(
-                "Specifying the 'schema' argument with 'use_legacy_dataset="
-                "True' is deprecated as of pyarrow 8.0.0. You can still "
-                "specify it in combination with 'use_legacy_dataset=False', "
-                "but in that case you need to specify a pyarrow.Schema "
-                "instead of a ParquetSchema.",
-                FutureWarning, stacklevel=2)
-        self._schema = schema
+        import pyarrow.dataset as ds
 
-        self.split_row_groups = split_row_groups
+        # map format arguments
+        read_options = {
+            "pre_buffer": pre_buffer,
+            "coerce_int96_timestamp_unit": coerce_int96_timestamp_unit,
+            "thrift_string_size_limit": thrift_string_size_limit,
+            "thrift_container_size_limit": thrift_container_size_limit,
+            "page_checksum_verification": page_checksum_verification,
+        }
+        if buffer_size:
+            read_options.update(use_buffered_stream=True,
+                                buffer_size=buffer_size)
+        if read_dictionary is not None:
+            read_options.update(dictionary_columns=read_dictionary)
 
-        if split_row_groups:
-            raise NotImplementedError("split_row_groups not yet implemented")
+        if decryption_properties is not None:
+            read_options.update(decryption_properties=decryption_properties)
 
+        self._filter_expression = None
         if filters is not None:
-            if hasattr(filters, "cast"):
-                raise TypeError(
-                    "Expressions as filter not supported for legacy dataset")
-            filters = _check_filters(filters)
-            self._filter(filters)
-
-        if validate_schema:
-            self.validate_schemas()
-
-    def __getnewargs_ex__(self):
-        # when creating a new instance while unpickling, force to use the
-        # legacy code path to create a ParquetDataset instance
-        # instead of a _ParquetDatasetV2 instance
-        return ((), dict(use_legacy_dataset=True))
-
-    def equals(self, other):
-        if not isinstance(other, ParquetDataset):
-            raise TypeError('`other` must be an instance of ParquetDataset')
+            self._filter_expression = filters_to_expression(filters)
 
-        if self._fs.__class__ != other._fs.__class__:
-            return False
-        for prop in ('paths', '_pieces', '_partitions',
-                     '_common_metadata_path', '_metadata_path',
-                     '_common_metadata', '_metadata', '_schema',
-                     'split_row_groups'):
-            if getattr(self, prop) != getattr(other, prop):
-                return False
-        for prop in ('memory_map', 'buffer_size'):
-            if (
-                getattr(self._ds_metadata, prop) !=
-                getattr(other._ds_metadata, prop)
-            ):
-                return False
-
-        return True
+        # map old filesystems to new one
+        if filesystem is not None:
+            filesystem = _ensure_filesystem(
+                filesystem, use_mmap=memory_map)
+        elif filesystem is None and memory_map:
+            # if memory_map is specified, assume local file system (string
+            # path can in principle be URI for any filesystem)
+            filesystem = LocalFileSystem(use_mmap=memory_map)
 
-    def __eq__(self, other):
-        try:
-            return self.equals(other)
-        except TypeError:
-            return NotImplemented
+        # This needs to be checked after _ensure_filesystem, because that
+        # handles the case of an fsspec LocalFileSystem
+        if (
+            hasattr(path_or_paths, "__fspath__") and
+            filesystem is not None and
+            not _is_local_file_system(filesystem)
+        ):
+            raise TypeError(
+                "Path-like objects with __fspath__ must only be used with "
+                f"local file systems, not {type(filesystem)}"
+            )
 
-    def validate_schemas(self):
-        if self._metadata is None and self._schema is None:
-            if self._common_metadata is not None:
-                self._schema = self._common_metadata.schema
+        # check for single fragment dataset
+        single_file = None
+        self._base_dir = None
+        if not isinstance(path_or_paths, list):
+            if _is_path_like(path_or_paths):
+                path_or_paths = _stringify_path(path_or_paths)
+                if filesystem is None:
+                    # path might be a URI describing the FileSystem as well
+                    try:
+                        filesystem, path_or_paths = FileSystem.from_uri(
+                            path_or_paths)
+                    except ValueError:
+                        filesystem = LocalFileSystem(use_mmap=memory_map)
+                finfo = filesystem.get_file_info(path_or_paths)
+                if finfo.is_file:
+                    single_file = path_or_paths
+                if finfo.type == FileType.Directory:
+                    self._base_dir = path_or_paths
             else:
-                self._schema = self._pieces[0].get_metadata().schema
-        elif self._schema is None:
-            self._schema = self._metadata.schema
-
-        # Verify schemas are all compatible
-        dataset_schema = self._schema.to_arrow_schema()
-        # Exclude the partition columns from the schema, they are provided
-        # by the path, not the DatasetPiece
-        if self._partitions is not None:
-            for partition_name in self._partitions.partition_names:
-                if dataset_schema.get_field_index(partition_name) != -1:
-                    field_idx = dataset_schema.get_field_index(partition_name)
-                    dataset_schema = dataset_schema.remove(field_idx)
-
-        for piece in self._pieces:
-            file_metadata = piece.get_metadata()
-            file_schema = file_metadata.schema.to_arrow_schema()
-            if not dataset_schema.equals(file_schema, check_metadata=False):
-                raise ValueError('Schema in {!s} was different. \n'
-                                 '{!s}\n\nvs\n\n{!s}'
-                                 .format(piece, file_schema,
-                                         dataset_schema))
+                single_file = path_or_paths
 
-    def read(self, columns=None, use_threads=True, use_pandas_metadata=False):
-        """
-        Read multiple Parquet files as a single pyarrow.Table.
-
-        Parameters
-        ----------
-        columns : List[str]
-            Names of columns to read from the file.
-        use_threads : bool, default True
-            Perform multi-threaded column reads
-        use_pandas_metadata : bool, default False
-            Passed through to each dataset piece.
-
-        Returns
-        -------
-        pyarrow.Table
-            Content of the file as a table (of columns).
-
-        Examples
-        --------
-        Generate an example dataset:
-
-        >>> import pyarrow as pa
-        >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021],
-        ...                   'n_legs': [2, 2, 4, 4, 5, 100],
-        ...                   'animal': ["Flamingo", "Parrot", "Dog", "Horse",
-        ...                              "Brittle stars", "Centipede"]})
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_to_dataset(table, root_path='dataset_name_read',
-        ...                     partition_cols=['year'],
-        ...                     use_legacy_dataset=False)
-        >>> dataset = pq.ParquetDataset('dataset_name_read/',
-        ...                             use_legacy_dataset=False)
-
-        Read multiple Parquet files as a single pyarrow.Table:
-
-        >>> dataset.read(columns=["n_legs"])
-        pyarrow.Table
-        n_legs: int64
-        ----
-        n_legs: [[5],[2],[4,100],[2,4]]
-        """
-        tables = []
-        for piece in self._pieces:
-            table = piece.read(columns=columns,
-                               use_threads=use_threads,
-                               partitions=self._partitions,
-                               use_pandas_metadata=use_pandas_metadata)
-            tables.append(table)
-
-        all_data = lib.concat_tables(tables)
-
-        if use_pandas_metadata:
-            # We need to ensure that this metadata is set in the Table's schema
-            # so that Table.to_pandas will construct pandas.DataFrame with the
-            # right index
-            common_metadata = self._get_common_pandas_metadata()
-            current_metadata = all_data.schema.metadata or {}
-
-            if common_metadata and b'pandas' not in current_metadata:
-                all_data = all_data.replace_schema_metadata({
-                    b'pandas': common_metadata})
-
-        return all_data
-
-    def read_pandas(self, **kwargs):
-        """
-        Read dataset including pandas metadata, if any. Other arguments passed
-        through to ParquetDataset.read, see docstring for further details.
-
-        Parameters
-        ----------
-        **kwargs : optional
-            All additional options to pass to the reader.
-
-        Returns
-        -------
-        pyarrow.Table
-            Content of the file as a table (of columns).
-
-        Examples
-        --------
-        Generate an example PyArrow Table and write it to a partitioned
-        dataset:
-
-        >>> import pyarrow as pa
-        >>> import pandas as pd
-        >>> df = pd.DataFrame({'year': [2020, 2022, 2021, 2022, 2019, 2021],
-        ...                    'n_legs': [2, 2, 4, 4, 5, 100],
-        ...                    'animal': ["Flamingo", "Parrot", "Dog", "Horse",
-        ...                    "Brittle stars", "Centipede"]})
-        >>> table = pa.Table.from_pandas(df)
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_table(table, 'table.parquet')
-        >>> dataset = pq.ParquetDataset('table.parquet',
-        ...                             use_legacy_dataset=False)
-
-        Read dataset including pandas metadata:
-
-        >>> dataset.read_pandas(columns=["n_legs"])
-        pyarrow.Table
-        n_legs: int64
-        ----
-        n_legs: [[2,2,4,4,5,100]]
-
-        Select pandas metadata:
-
-        >>> dataset.read_pandas(columns=["n_legs"]).schema.pandas_metadata
-        {'index_columns': [{'kind': 'range', 'name': None, 'start': 0, ...}
-        """
-        return self.read(use_pandas_metadata=True, **kwargs)
-
-    def _get_common_pandas_metadata(self):
-        if self._common_metadata is None:
-            return None
-
-        keyvalues = self._common_metadata.metadata
-        return keyvalues.get(b'pandas', None)
-
-    def _filter(self, filters):
-        accepts_filter = self._partitions.filter_accepts_partition
-
-        def one_filter_accepts(piece, filter):
-            return all(accepts_filter(part_key, filter, level)
-                       for level, part_key in enumerate(piece.partition_keys))
-
-        def all_filters_accept(piece):
-            return any(all(one_filter_accepts(piece, f) for f in conjunction)
-                       for conjunction in filters)
-
-        self._pieces = [p for p in self._pieces if all_filters_accept(p)]
-
-    @property
-    def pieces(self):
-        """
-        DEPRECATED
-        """
-        warnings.warn(
-            _DEPR_MSG.format(
-                "ParquetDataset.pieces",
-                " Specify 'use_legacy_dataset=False' while constructing the "
-                "ParquetDataset, and then use the '.fragments' attribute "
-                "instead."),
-            FutureWarning, stacklevel=2)
-        return self._pieces
-
-    @property
-    def partitions(self):
-        """
-        DEPRECATED
-        """
-        warnings.warn(
-            _DEPR_MSG.format(
-                "ParquetDataset.partitions",
-                " Specify 'use_legacy_dataset=False' while constructing the "
-                "ParquetDataset, and then use the '.partitioning' attribute "
-                "instead."),
-            FutureWarning, stacklevel=2)
-        return self._partitions
-
-    @property
-    def schema(self):
-        warnings.warn(
-            _DEPR_MSG.format(
-                "ParquetDataset.schema",
-                " Specify 'use_legacy_dataset=False' while constructing the "
-                "ParquetDataset, and then use the '.schema' attribute "
-                "instead (which will return an Arrow schema instead of a "
-                "Parquet schema)."),
-            FutureWarning, stacklevel=2)
-        return self._schema
-
-    @property
-    def memory_map(self):
-        """
-        DEPRECATED
-        """
-        warnings.warn(
-            _DEPR_MSG.format("ParquetDataset.memory_map", ""),
-            FutureWarning, stacklevel=2)
-        return self._ds_metadata.memory_map
-
-    @property
-    def read_dictionary(self):
-        """
-        DEPRECATED
-        """
-        warnings.warn(
-            _DEPR_MSG.format("ParquetDataset.read_dictionary", ""),
-            FutureWarning, stacklevel=2)
-        return self._ds_metadata.read_dictionary
-
-    @property
-    def buffer_size(self):
-        """
-        DEPRECATED
-        """
-        warnings.warn(
-            _DEPR_MSG.format("ParquetDataset.buffer_size", ""),
-            FutureWarning, stacklevel=2)
-        return self._ds_metadata.buffer_size
-
-    _fs = property(
-        operator.attrgetter('_ds_metadata.fs')
-    )
-
-    @property
-    def fs(self):
-        """
-        DEPRECATED
-        """
-        warnings.warn(
-            _DEPR_MSG.format(
-                "ParquetDataset.fs",
-                " Specify 'use_legacy_dataset=False' while constructing the "
-                "ParquetDataset, and then use the '.filesystem' attribute "
-                "instead."),
-            FutureWarning, stacklevel=2)
-        return self._ds_metadata.fs
-
-    @property
-    def metadata(self):
-        """
-        DEPRECATED
-        """
-        warnings.warn(
-            _DEPR_MSG.format("ParquetDataset.metadata", ""),
-            FutureWarning, stacklevel=2)
-        return self._metadata
-
-    @property
-    def metadata_path(self):
-        """
-        DEPRECATED
-        """
-        warnings.warn(
-            _DEPR_MSG.format("ParquetDataset.metadata_path", ""),
-            FutureWarning, stacklevel=2)
-        return self._metadata_path
-
-    @property
-    def common_metadata_path(self):
-        """
-        DEPRECATED
-        """
-        warnings.warn(
-            _DEPR_MSG.format("ParquetDataset.common_metadata_path", ""),
-            FutureWarning, stacklevel=2)
-        return self._common_metadata_path
-
-    _common_metadata = property(
-        operator.attrgetter('_ds_metadata.common_metadata')
-    )
-
-    @property
-    def common_metadata(self):
-        """
-        DEPRECATED
-        """
-        warnings.warn(
-            _DEPR_MSG.format("ParquetDataset.common_metadata", ""),
-            FutureWarning, stacklevel=2)
-        return self._ds_metadata.common_metadata
-
-    @property
-    def fragments(self):
-        """
-        A list of the Dataset source fragments or pieces with absolute
-        file paths. To use this property set 'use_legacy_dataset=False'
-        while constructing ParquetDataset object.
-
-        Examples
-        --------
-        Generate an example dataset:
-
-        >>> import pyarrow as pa
-        >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021],
-        ...                   'n_legs': [2, 2, 4, 4, 5, 100],
-        ...                   'animal': ["Flamingo", "Parrot", "Dog", "Horse",
-        ...                              "Brittle stars", "Centipede"]})
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_to_dataset(table, root_path='dataset_name_fragments',
-        ...                     partition_cols=['year'],
-        ...                     use_legacy_dataset=False)
-        >>> dataset = pq.ParquetDataset('dataset_name_fragments/',
-        ...                             use_legacy_dataset=False)
-
-        List the fragments:
-
-        >>> dataset.fragments
-        [<pyarrow.dataset.ParquetFileFragment path=dataset_name_fragments/...
-        """
-        raise NotImplementedError(
-            "To use this property set 'use_legacy_dataset=False' while "
-            "constructing the ParquetDataset")
-
-    @property
-    def files(self):
-        """
-        A list of absolute Parquet file paths in the Dataset source.
-        To use this property set 'use_legacy_dataset=False'
-        while constructing ParquetDataset object.
-
-        Examples
-        --------
-        Generate an example dataset:
-
-        >>> import pyarrow as pa
-        >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021],
-        ...                   'n_legs': [2, 2, 4, 4, 5, 100],
-        ...                   'animal': ["Flamingo", "Parrot", "Dog", "Horse",
-        ...                              "Brittle stars", "Centipede"]})
-        >>> import pyarrow.parquet as pq
-        >>> pq.write_to_dataset(table, root_path='dataset_name_files',
-        ...                     partition_cols=['year'],
-        ...                     use_legacy_dataset=False)
-        >>> dataset = pq.ParquetDataset('dataset_name_files/',
-        ...                             use_legacy_dataset=False)
-
-        List the files:
-
-        >>> dataset.files
-        ['dataset_name_files/year=2019/...-0.parquet', ...
-        """
-        raise NotImplementedError(
-            "To use this property set 'use_legacy_dataset=False' while "
-            "constructing the ParquetDataset")
-
-    @property
-    def filesystem(self):
-        """
-        The filesystem type of the Dataset source.
-        To use this property set 'use_legacy_dataset=False'
-        while constructing ParquetDataset object.
-        """
-        raise NotImplementedError(
-            "To use this property set 'use_legacy_dataset=False' while "
-            "constructing the ParquetDataset")
-
-    @property
-    def partitioning(self):
-        """
-        The partitioning of the Dataset source, if discovered.
-        To use this property set 'use_legacy_dataset=False'
-        while constructing ParquetDataset object.
-        """
-        raise NotImplementedError(
-            "To use this property set 'use_legacy_dataset=False' while "
-            "constructing the ParquetDataset")
-
-
-def _make_manifest(path_or_paths, fs, pathsep='/', metadata_nthreads=1,
-                   open_file_func=None):
-    partitions = None
-    common_metadata_path = None
-    metadata_path = None
-
-    if isinstance(path_or_paths, list) and len(path_or_paths) == 1:
-        # Dask passes a directory as a list of length 1
-        path_or_paths = path_or_paths[0]
-
-    if _is_path_like(path_or_paths) and fs.isdir(path_or_paths):
-        manifest = ParquetManifest(path_or_paths, filesystem=fs,
-                                   open_file_func=open_file_func,
-                                   pathsep=getattr(fs, "pathsep", "/"),
-                                   metadata_nthreads=metadata_nthreads)
-        common_metadata_path = manifest.common_metadata_path
-        metadata_path = manifest.metadata_path
-        pieces = manifest.pieces
-        partitions = manifest.partitions
-    else:
-        if not isinstance(path_or_paths, list):
-            path_or_paths = [path_or_paths]
-
-        # List of paths
-        if len(path_or_paths) == 0:
-            raise ValueError('Must pass at least one file path')
-
-        pieces = []
-        for path in path_or_paths:
-            if not fs.isfile(path):
-                raise OSError('Passed non-file path: {}'
-                              .format(path))
-            piece = ParquetDatasetPiece._create(
-                path, open_file_func=open_file_func)
-            pieces.append(piece)
-
-    return pieces, partitions, common_metadata_path, metadata_path
-
-
-def _is_local_file_system(fs):
-    return isinstance(fs, LocalFileSystem) or isinstance(
-        fs, legacyfs.LocalFileSystem
-    )
-
-
-class _ParquetDatasetV2:
-    """
-    ParquetDataset shim using the Dataset API under the hood.
-
-    Examples
-    --------
-    Generate an example PyArrow Table and write it to a partitioned dataset:
-
-    >>> import pyarrow as pa
-    >>> table = pa.table({'year': [2020, 2022, 2021, 2022, 2019, 2021],
-    ...                   'n_legs': [2, 2, 4, 4, 5, 100],
-    ...                   'animal': ["Flamingo", "Parrot", "Dog", "Horse",
-    ...                              "Brittle stars", "Centipede"]})
-    >>> import pyarrow.parquet as pq
-    >>> pq.write_to_dataset(table, root_path='dataset_v2',
-    ...                     partition_cols=['year'],
-    ...                     use_legacy_dataset=False)
-
-    create a ParquetDataset object from the dataset source:
-
-    >>> dataset = pq.ParquetDataset('dataset_v2/', use_legacy_dataset=False)
-
-    and read the data:
-
-    >>> dataset.read().to_pandas()
-       n_legs         animal  year
-    0       5  Brittle stars  2019
-    1       2       Flamingo  2020
-    2       4            Dog  2021
-    3     100      Centipede  2021
-    4       2         Parrot  2022
-    5       4          Horse  2022
-
-    create a ParquetDataset object with filter:
-
-    >>> dataset = pq.ParquetDataset('dataset_v2/',
-    ...                             filters=[('n_legs','=',4)],
-    ...                             use_legacy_dataset=False)
-    >>> dataset.read().to_pandas()
-       n_legs animal  year
-    0       4    Dog  2021
-    1       4  Horse  2022
-    """
-
-    def __init__(self, path_or_paths, filesystem=None, *, filters=None,
-                 partitioning="hive", read_dictionary=None, buffer_size=None,
-                 memory_map=False, ignore_prefixes=None, pre_buffer=True,
-                 coerce_int96_timestamp_unit=None, schema=None,
-                 decryption_properties=None, thrift_string_size_limit=None,
-                 thrift_container_size_limit=None,
-                 page_checksum_verification=False,
-                 **kwargs):
-        import pyarrow.dataset as ds
-
-        # Raise error for not supported keywords
-        for keyword, default in [
-                ("metadata", None), ("split_row_groups", False),
-                ("validate_schema", True), ("metadata_nthreads", None)]:
-            if keyword in kwargs and kwargs[keyword] is not default:
-                raise ValueError(
-                    "Keyword '{0}' is not yet supported with the new "
-                    "Dataset API".format(keyword))
-
-        # map format arguments
-        read_options = {
-            "pre_buffer": pre_buffer,
-            "coerce_int96_timestamp_unit": coerce_int96_timestamp_unit,
-            "thrift_string_size_limit": thrift_string_size_limit,
-            "thrift_container_size_limit": thrift_container_size_limit,
-            "page_checksum_verification": page_checksum_verification,
-        }
-        if buffer_size:
-            read_options.update(use_buffered_stream=True,
-                                buffer_size=buffer_size)
-        if read_dictionary is not None:
-            read_options.update(dictionary_columns=read_dictionary)
-
-        if decryption_properties is not None:
-            read_options.update(decryption_properties=decryption_properties)
-
-        self._filter_expression = None
-        if filters is not None:
-            self._filter_expression = filters_to_expression(filters)
-
-        # map old filesystems to new one
-        if filesystem is not None:
-            filesystem = _ensure_filesystem(
-                filesystem, use_mmap=memory_map)
-        elif filesystem is None and memory_map:
-            # if memory_map is specified, assume local file system (string
-            # path can in principle be URI for any filesystem)
-            filesystem = LocalFileSystem(use_mmap=memory_map)
-
-        # This needs to be checked after _ensure_filesystem, because that
-        # handles the case of an fsspec LocalFileSystem
-        if (
-            hasattr(path_or_paths, "__fspath__") and
-            filesystem is not None and
-            not _is_local_file_system(filesystem)
-        ):
-            raise TypeError(
-                "Path-like objects with __fspath__ must only be used with "
-                f"local file systems, not {type(filesystem)}"
-            )
-
-        # check for single fragment dataset
-        single_file = None
-        self._base_dir = None
-        if not isinstance(path_or_paths, list):
-            if _is_path_like(path_or_paths):
-                path_or_paths = _stringify_path(path_or_paths)
-                if filesystem is None:
-                    # path might be a URI describing the FileSystem as well
-                    try:
-                        filesystem, path_or_paths = FileSystem.from_uri(
-                            path_or_paths)
-                    except ValueError:
-                        filesystem = LocalFileSystem(use_mmap=memory_map)
-                finfo = filesystem.get_file_info(path_or_paths)
-                if finfo.is_file:
-                    single_file = path_or_paths
-                if finfo.type == FileType.Directory:
-                    self._base_dir = path_or_paths
-            else:
-                single_file = path_or_paths
-
-        parquet_format = ds.ParquetFileFormat(**read_options)
+        parquet_format = ds.ParquetFileFormat(**read_options)
 
         if single_file is not None:
             fragment = parquet_format.make_fragment(single_file, filesystem)
@@ -2540,12 +1357,7 @@ def __init__(self, path_or_paths, filesystem=None, *, filters=None,
                                    ignore_prefixes=ignore_prefixes)
 
     def equals(self, other):
-        if isinstance(other, ParquetDataset):
-            raise TypeError(
-                "`other` must be an instance of ParquetDataset constructed "
-                "with `use_legacy_dataset=False`"
-            )
-        if not isinstance(other, _ParquetDatasetV2):
+        if not isinstance(other, ParquetDataset):
             raise TypeError('`other` must be an instance of ParquetDataset')
 
         return (self.schema == other.schema and
@@ -2576,10 +1388,8 @@ def schema(self):
         ...                              "Brittle stars", "Centipede"]})
         >>> import pyarrow.parquet as pq
         >>> pq.write_to_dataset(table, root_path='dataset_v2_schema',
-        ...                     partition_cols=['year'],
-        ...                     use_legacy_dataset=False)
-        >>> dataset = pq.ParquetDataset('dataset_v2_schema/',
-        ...                             use_legacy_dataset=False)
+        ...                     partition_cols=['year'])
+        >>> dataset = pq.ParquetDataset('dataset_v2_schema/')
 
         Read the schema:
 
@@ -2598,8 +1408,7 @@ def read(self, columns=None, use_threads=True, use_pandas_metadata=False):
         ----------
         columns : List[str]
             Names of columns to read from the dataset. The partition fields
-            are not automatically included (in contrast to when setting
-            ``use_legacy_dataset=True``).
+            are not automatically included.
         use_threads : bool, default True
             Perform multi-threaded column reads.
         use_pandas_metadata : bool, default False
@@ -2622,10 +1431,8 @@ def read(self, columns=None, use_threads=True, use_pandas_metadata=False):
         ...                              "Brittle stars", "Centipede"]})
         >>> import pyarrow.parquet as pq
         >>> pq.write_to_dataset(table, root_path='dataset_v2_read',
-        ...                     partition_cols=['year'],
-        ...                     use_legacy_dataset=False)
-        >>> dataset = pq.ParquetDataset('dataset_v2_read/',
-        ...                             use_legacy_dataset=False)
+        ...                     partition_cols=['year'])
+        >>> dataset = pq.ParquetDataset('dataset_v2_read/')
 
         Read the dataset:
 
@@ -2694,7 +1501,12 @@ def _get_common_pandas_metadata(self):
     def read_pandas(self, **kwargs):
         """
         Read dataset including pandas metadata, if any. Other arguments passed
-        through to ParquetDataset.read, see docstring for further details.
+        through to :func:`read`, see docstring for further details.
+
+        Parameters
+        ----------
+        **kwargs : optional
+            Additional options for :func:`read`
 
         Examples
         --------
@@ -2709,8 +1521,7 @@ def read_pandas(self, **kwargs):
         >>> table = pa.Table.from_pandas(df)
         >>> import pyarrow.parquet as pq
         >>> pq.write_table(table, 'table_V2.parquet')
-        >>> dataset = pq.ParquetDataset('table_V2.parquet',
-        ...                             use_legacy_dataset=False)
+        >>> dataset = pq.ParquetDataset('table_V2.parquet')
 
         Read the dataset with pandas metadata:
 
@@ -2725,14 +1536,6 @@ def read_pandas(self, **kwargs):
         """
         return self.read(use_pandas_metadata=True, **kwargs)
 
-    @property
-    def pieces(self):
-        warnings.warn(
-            _DEPR_MSG.format("ParquetDataset.pieces",
-                             " Use the '.fragments' attribute instead"),
-            FutureWarning, stacklevel=2)
-        return list(self._dataset.get_fragments())
-
     @property
     def fragments(self):
         """
@@ -2750,10 +1553,8 @@ def fragments(self):
         ...                              "Brittle stars", "Centipede"]})
         >>> import pyarrow.parquet as pq
         >>> pq.write_to_dataset(table, root_path='dataset_v2_fragments',
-        ...                     partition_cols=['year'],
-        ...                     use_legacy_dataset=False)
-        >>> dataset = pq.ParquetDataset('dataset_v2_fragments/',
-        ...                             use_legacy_dataset=False)
+        ...                     partition_cols=['year'])
+        >>> dataset = pq.ParquetDataset('dataset_v2_fragments/')
 
         List the fragments:
 
@@ -2778,10 +1579,8 @@ def files(self):
         ...                              "Brittle stars", "Centipede"]})
         >>> import pyarrow.parquet as pq
         >>> pq.write_to_dataset(table, root_path='dataset_v2_files',
-        ...                     partition_cols=['year'],
-        ...                     use_legacy_dataset=False)
-        >>> dataset = pq.ParquetDataset('dataset_v2_files/',
-        ...                             use_legacy_dataset=False)
+        ...                     partition_cols=['year'])
+        >>> dataset = pq.ParquetDataset('dataset_v2_files/')
 
         List the files:
 
@@ -2822,8 +1621,6 @@ def partitioning(self):
     no columns.
 use_threads : bool, default True
     Perform multi-threaded column reads.
-metadata : FileMetaData
-    If separately computed
 schema : Schema, optional
     Optionally provide the Schema for the parquet dataset, in which case it
     will not be inferred from the source.
@@ -2836,30 +1633,21 @@ def partitioning(self):
     Rows which do not match the filter predicate will be removed from scanned
     data. Partition keys embedded in a nested directory structure will be
     exploited to avoid loading files at all if they contain no matching rows.
-    If `use_legacy_dataset` is True, filters can only reference partition
-    keys and only a hive-style directory structure is supported. When
-    setting `use_legacy_dataset` to False, also within-file level filtering
-    and different partitioning schemes are supported.
+    Within-file level filtering and different partitioning schemes are supported.
 
     {3}
-use_legacy_dataset : bool, default False
-    By default, `read_table` uses the new Arrow Datasets API since
-    pyarrow 1.0.0. Among other things, this allows to pass `filters`
-    for all columns and not only the partition keys, enables
-    different partitioning schemes, etc.
-    Set to True to use the legacy behaviour (this option is deprecated,
-    and the legacy implementation will be removed in a future version).
+use_legacy_dataset : bool, optional
+    Deprecated and has no effect from PyArrow version 15.0.0.
 ignore_prefixes : list, optional
     Files matching any of these prefixes will be ignored by the
-    discovery process if use_legacy_dataset=False.
+    discovery process.
     This is matched to the basename of a path.
     By default this is ['.', '_'].
     Note that discovery happens only if a directory is passed as source.
 pre_buffer : bool, default True
     Coalesce and issue file reads in parallel to improve performance on
     high-latency filesystems (e.g. S3). If True, Arrow will use a
-    background I/O thread pool. This option is only supported for
-    use_legacy_dataset=False. If using a filesystem layer that itself
+    background I/O thread pool. If using a filesystem layer that itself
     performs readahead (e.g. fsspec's S3FS), disable readahead for best
     results.
 coerce_int96_timestamp_unit : str, default None
@@ -2968,129 +1756,78 @@ def partitioning(self):
 """
 
 
-def read_table(source, *, columns=None, use_threads=True, metadata=None,
+def read_table(source, *, columns=None, use_threads=True,
                schema=None, use_pandas_metadata=False, read_dictionary=None,
                memory_map=False, buffer_size=0, partitioning="hive",
-               filesystem=None, filters=None, use_legacy_dataset=False,
+               filesystem=None, filters=None, use_legacy_dataset=None,
                ignore_prefixes=None, pre_buffer=True,
                coerce_int96_timestamp_unit=None,
                decryption_properties=None, thrift_string_size_limit=None,
                thrift_container_size_limit=None,
                page_checksum_verification=False):
-    if not use_legacy_dataset:
-        if metadata is not None:
+
+    if use_legacy_dataset is not None:
+        warnings.warn(
+            "Passing 'use_legacy_dataset' is deprecated as of pyarrow 15.0.0 "
+            "and will be removed in a future version.",
+            FutureWarning, stacklevel=2)
+
+    try:
+        dataset = ParquetDataset(
+            source,
+            schema=schema,
+            filesystem=filesystem,
+            partitioning=partitioning,
+            memory_map=memory_map,
+            read_dictionary=read_dictionary,
+            buffer_size=buffer_size,
+            filters=filters,
+            ignore_prefixes=ignore_prefixes,
+            pre_buffer=pre_buffer,
+            coerce_int96_timestamp_unit=coerce_int96_timestamp_unit,
+            thrift_string_size_limit=thrift_string_size_limit,
+            thrift_container_size_limit=thrift_container_size_limit,
+            page_checksum_verification=page_checksum_verification,
+        )
+    except ImportError:
+        # fall back on ParquetFile for simple cases when pyarrow.dataset
+        # module is not available
+        if filters is not None:
             raise ValueError(
-                "The 'metadata' keyword is no longer supported with the new "
-                "datasets-based implementation. Specify "
-                "'use_legacy_dataset=True' to temporarily recover the old "
-                "behaviour."
-            )
-        try:
-            dataset = _ParquetDatasetV2(
-                source,
-                schema=schema,
-                filesystem=filesystem,
-                partitioning=partitioning,
-                memory_map=memory_map,
-                read_dictionary=read_dictionary,
-                buffer_size=buffer_size,
-                filters=filters,
-                ignore_prefixes=ignore_prefixes,
-                pre_buffer=pre_buffer,
-                coerce_int96_timestamp_unit=coerce_int96_timestamp_unit,
-                thrift_string_size_limit=thrift_string_size_limit,
-                thrift_container_size_limit=thrift_container_size_limit,
-                page_checksum_verification=page_checksum_verification,
+                "the 'filters' keyword is not supported when the "
+                "pyarrow.dataset module is not available"
             )
-        except ImportError:
-            # fall back on ParquetFile for simple cases when pyarrow.dataset
-            # module is not available
-            if filters is not None:
-                raise ValueError(
-                    "the 'filters' keyword is not supported when the "
-                    "pyarrow.dataset module is not available"
-                )
-            if partitioning != "hive":
-                raise ValueError(
-                    "the 'partitioning' keyword is not supported when the "
-                    "pyarrow.dataset module is not available"
-                )
-            if schema is not None:
-                raise ValueError(
-                    "the 'schema' argument is not supported when the "
-                    "pyarrow.dataset module is not available"
-                )
-            filesystem, path = _resolve_filesystem_and_path(source, filesystem)
-            if filesystem is not None:
-                source = filesystem.open_input_file(path)
-            # TODO test that source is not a directory or a list
-            dataset = ParquetFile(
-                source, metadata=metadata, read_dictionary=read_dictionary,
-                memory_map=memory_map, buffer_size=buffer_size,
-                pre_buffer=pre_buffer,
-                coerce_int96_timestamp_unit=coerce_int96_timestamp_unit,
-                decryption_properties=decryption_properties,
-                thrift_string_size_limit=thrift_string_size_limit,
-                thrift_container_size_limit=thrift_container_size_limit,
-                page_checksum_verification=page_checksum_verification,
+        if partitioning != "hive":
+            raise ValueError(
+                "the 'partitioning' keyword is not supported when the "
+                "pyarrow.dataset module is not available"
             )
-
-        return dataset.read(columns=columns, use_threads=use_threads,
-                            use_pandas_metadata=use_pandas_metadata)
-
-    warnings.warn(
-        "Passing 'use_legacy_dataset=True' to get the legacy behaviour is "
-        "deprecated as of pyarrow 8.0.0, and the legacy implementation will "
-        "be removed in a future version.",
-        FutureWarning, stacklevel=2)
-
-    if ignore_prefixes is not None:
-        raise ValueError(
-            "The 'ignore_prefixes' keyword is only supported when "
-            "use_legacy_dataset=False")
-
-    if page_checksum_verification:
-        raise ValueError(
-            "The 'page_checksum_verification' keyword is only supported when "
-            "use_legacy_dataset=False")
-
-    if schema is not None:
-        raise ValueError(
-            "The 'schema' argument is only supported when "
-            "use_legacy_dataset=False")
-
-    if _is_path_like(source):
-        with warnings.catch_warnings():
-            # Suppress second warning from ParquetDataset constructor
-            warnings.filterwarnings(
-                "ignore", "Passing 'use_legacy_dataset", FutureWarning)
-            pf = ParquetDataset(
-                source, metadata=metadata, memory_map=memory_map,
-                read_dictionary=read_dictionary,
-                buffer_size=buffer_size,
-                filesystem=filesystem, filters=filters,
-                partitioning=partitioning,
-                coerce_int96_timestamp_unit=coerce_int96_timestamp_unit,
-                use_legacy_dataset=True,
+        if schema is not None:
+            raise ValueError(
+                "the 'schema' argument is not supported when the "
+                "pyarrow.dataset module is not available"
             )
-    else:
-        pf = ParquetFile(
-            source, metadata=metadata,
-            read_dictionary=read_dictionary,
-            memory_map=memory_map,
-            buffer_size=buffer_size,
+        filesystem, path = _resolve_filesystem_and_path(source, filesystem)
+        if filesystem is not None:
+            source = filesystem.open_input_file(path)
+        # TODO test that source is not a directory or a list
+        dataset = ParquetFile(
+            source, read_dictionary=read_dictionary,
+            memory_map=memory_map, buffer_size=buffer_size,
+            pre_buffer=pre_buffer,
             coerce_int96_timestamp_unit=coerce_int96_timestamp_unit,
-            decryption_properties=decryption_properties
+            decryption_properties=decryption_properties,
+            thrift_string_size_limit=thrift_string_size_limit,
+            thrift_container_size_limit=thrift_container_size_limit,
+            page_checksum_verification=page_checksum_verification,
         )
-    return pf.read(columns=columns, use_threads=use_threads,
-                   use_pandas_metadata=use_pandas_metadata)
 
+    return dataset.read(columns=columns, use_threads=use_threads,
+                        use_pandas_metadata=use_pandas_metadata)
 
-read_table.__doc__ = _read_table_docstring.format(
-    """Read a Table from Parquet format
 
-Note: starting with pyarrow 1.0, the default for `use_legacy_dataset` is
-switched to False.""",
+read_table.__doc__ = _read_table_docstring.format(
+    """Read a Table from Parquet format""",
     "\n".join(("""use_pandas_metadata : bool, default False
     If True and file has custom pandas schema metadata, ensure that
     index columns are also loaded.""", _read_docstring_common)),
@@ -3233,23 +1970,13 @@ def write_table(table, where, row_group_size=None, version='2.6',
 """.format(_parquet_writer_arg_docs, _write_table_example)
 
 
-def _mkdir_if_not_exists(fs, path):
-    if fs._isfilestore() and not fs.exists(path):
-        try:
-            fs.mkdir(path)
-        except OSError:
-            assert fs.exists(path)
-
-
 def write_to_dataset(table, root_path, partition_cols=None,
-                     partition_filename_cb=None, filesystem=None,
-                     use_legacy_dataset=None, schema=None,
-                     partitioning=None, basename_template=None,
-                     use_threads=None, file_visitor=None,
-                     existing_data_behavior=None,
+                     filesystem=None, use_legacy_dataset=None,
+                     schema=None, partitioning=None,
+                     basename_template=None, use_threads=None,
+                     file_visitor=None, existing_data_behavior=None,
                      **kwargs):
-    """Wrapper around dataset.write_dataset (when use_legacy_dataset=False) or
-    parquet.write_table (when use_legacy_dataset=True) for writing a Table to
+    """Wrapper around dataset.write_dataset for writing a Table to
     Parquet format by partitions.
     For each combination of partition columns and values,
     a subdirectories are created in the following
@@ -3271,45 +1998,31 @@ def write_to_dataset(table, root_path, partition_cols=None,
     ----------
     table : pyarrow.Table
     root_path : str, pathlib.Path
-        The root directory of the dataset
+        The root directory of the dataset.
     partition_cols : list,
         Column names by which to partition the dataset.
-        Columns are partitioned in the order they are given
-    partition_filename_cb : callable,
-        A callback function that takes the partition key(s) as an argument
-        and allow you to override the partition filename. If nothing is
-        passed, the filename will consist of a uuid.
-        This option is only supported for use_legacy_dataset=True.
-        When use_legacy_dataset=None and this option is specified,
-        use_legacy_dataset will be set to True.
+        Columns are partitioned in the order they are given.
     filesystem : FileSystem, default None
         If nothing passed, will be inferred based on path.
         Path will try to be found in the local on-disk filesystem otherwise
         it will be parsed as an URI to determine the filesystem.
-    use_legacy_dataset : bool
-        Default is False. Set to True to use the legacy behaviour
-        (this option is deprecated, and the legacy implementation will be
-        removed in a future version). The legacy implementation still
-        supports the `partition_filename_cb` keyword but is less efficient
-        when using partition columns.
+    use_legacy_dataset : bool, optional
+        Deprecated and has no effect from PyArrow version 15.0.0.
     schema : Schema, optional
-        This option is only supported for use_legacy_dataset=False.
+        This Schema of the dataset.
     partitioning : Partitioning or list[str], optional
         The partitioning scheme specified with the
         ``pyarrow.dataset.partitioning()`` function or a list of field names.
         When providing a list of field names, you can use
         ``partitioning_flavor`` to drive which partitioning type should be
         used.
-        This option is only supported for use_legacy_dataset=False.
     basename_template : str, optional
         A template string used to generate basenames of written data files.
         The token '{i}' will be replaced with an automatically incremented
         integer. If not specified, it defaults to "guid-{i}.parquet".
-        This option is only supported for use_legacy_dataset=False.
     use_threads : bool, default True
         Write files in parallel. If enabled, then maximum parallelism will be
         used determined by the number of available CPU cores.
-        This option is only supported for use_legacy_dataset=False.
     file_visitor : function
         If set, this function will be called with a WrittenFile instance
         for each file created during the call.  This object will have both
@@ -3330,7 +2043,6 @@ def write_to_dataset(table, root_path, partition_cols=None,
             def file_visitor(written_file):
                 visited_paths.append(written_file.path)
 
-        This option is only supported for use_legacy_dataset=False.
     existing_data_behavior : 'overwrite_or_ignore' | 'error' | \
 'delete_matching'
         Controls how the dataset will handle data that already exists in
@@ -3348,15 +2060,12 @@ def file_visitor(written_file):
         dataset.  The first time each partition directory is encountered
         the entire directory will be deleted.  This allows you to overwrite
         old partitions completely.
-        This option is only supported for use_legacy_dataset=False.
     **kwargs : dict,
-        When use_legacy_dataset=False, used as additional kwargs for
-        `dataset.write_dataset` function for matching kwargs, and remainder to
-        `ParquetFileFormat.make_write_options`. See the docstring
-        of `write_table` and `dataset.write_dataset` for the available options.
-        When use_legacy_dataset=True, used as additional kwargs for
-        `parquet.write_table` function (See docstring for `write_table`
-        or `ParquetWriter` for more information).
+        Used as additional kwargs for :func:`pyarrow.dataset.write_dataset`
+        function for matching kwargs, and remainder to
+        :func:`pyarrow.dataset.ParquetFileFormat.make_write_options`.
+        See the docstring of :func:`write_table` and
+        :func:`pyarrow.dataset.write_dataset` for the available options.
         Using `metadata_collector` in kwargs allows one to collect the
         file metadata instances of dataset pieces. The file paths in the
         ColumnChunkMetaData will be set relative to `root_path`.
@@ -3376,194 +2085,79 @@ def file_visitor(written_file):
     >>> import pyarrow.parquet as pq
     >>> pq.write_to_dataset(table, root_path='dataset_name_3',
     ...                     partition_cols=['year'])
-    >>> pq.ParquetDataset('dataset_name_3', use_legacy_dataset=False).files
+    >>> pq.ParquetDataset('dataset_name_3').files
     ['dataset_name_3/year=2019/...-0.parquet', ...
 
     Write a single Parquet file into the root folder:
 
     >>> pq.write_to_dataset(table, root_path='dataset_name_4')
-    >>> pq.ParquetDataset('dataset_name_4/', use_legacy_dataset=False).files
+    >>> pq.ParquetDataset('dataset_name_4/').files
     ['dataset_name_4/...-0.parquet']
     """
-    # Choose the implementation
-    if use_legacy_dataset is None:
-        # if partition_filename_cb is specified ->
-        # default to the old implementation
-        if partition_filename_cb:
-            use_legacy_dataset = True
-        # otherwise the default is False
-        else:
-            use_legacy_dataset = False
+    if use_legacy_dataset is not None:
+        warnings.warn(
+            "Passing 'use_legacy_dataset' is deprecated as of pyarrow 15.0.0 "
+            "and will be removed in a future version.",
+            FutureWarning, stacklevel=2)
+
+    metadata_collector = kwargs.pop('metadata_collector', None)
 
     # Check for conflicting keywords
-    msg_confl_0 = (
-        "The '{0}' argument is not supported by use_legacy_dataset={2}. "
-        "Use only '{1}' instead."
-    )
-    msg_confl_1 = (
-        "The '{1}' argument is not supported by use_legacy_dataset={2}. "
+    msg_confl = (
+        "The '{1}' argument is not supported. "
         "Use only '{0}' instead."
     )
-    msg_confl = msg_confl_0 if use_legacy_dataset else msg_confl_1
-    if partition_filename_cb is not None and basename_template is not None:
-        raise ValueError(msg_confl.format("basename_template",
-                                          "partition_filename_cb",
-                                          use_legacy_dataset))
-
     if partition_cols is not None and partitioning is not None:
         raise ValueError(msg_confl.format("partitioning",
-                                          "partition_cols",
-                                          use_legacy_dataset))
+                                          "partition_cols"))
 
-    metadata_collector = kwargs.pop('metadata_collector', None)
     if metadata_collector is not None and file_visitor is not None:
         raise ValueError(msg_confl.format("file_visitor",
-                                          "metadata_collector",
-                                          use_legacy_dataset))
+                                          "metadata_collector"))
 
-    # New dataset implementation
-    if not use_legacy_dataset:
-        import pyarrow.dataset as ds
+    import pyarrow.dataset as ds
 
-        # extract write_dataset specific options
-        # reset assumed to go to make_write_options
-        write_dataset_kwargs = dict()
-        for key in inspect.signature(ds.write_dataset).parameters:
-            if key in kwargs:
-                write_dataset_kwargs[key] = kwargs.pop(key)
-        write_dataset_kwargs['max_rows_per_group'] = kwargs.pop(
-            'row_group_size', kwargs.pop("chunk_size", None)
-        )
-        # raise for unsupported keywords
-        msg = (
-            "The '{}' argument is not supported with the new dataset "
-            "implementation."
-        )
-        if metadata_collector is not None:
-            def file_visitor(written_file):
-                metadata_collector.append(written_file.metadata)
-        if partition_filename_cb is not None:
-            raise ValueError(msg.format("partition_filename_cb"))
+    # extract write_dataset specific options
+    # reset assumed to go to make_write_options
+    write_dataset_kwargs = dict()
+    for key in inspect.signature(ds.write_dataset).parameters:
+        if key in kwargs:
+            write_dataset_kwargs[key] = kwargs.pop(key)
+    write_dataset_kwargs['max_rows_per_group'] = kwargs.pop(
+        'row_group_size', kwargs.pop("chunk_size", None)
+    )
 
-        # map format arguments
-        parquet_format = ds.ParquetFileFormat()
-        write_options = parquet_format.make_write_options(**kwargs)
+    if metadata_collector is not None:
+        def file_visitor(written_file):
+            metadata_collector.append(written_file.metadata)
 
-        # map old filesystems to new one
-        if filesystem is not None:
-            filesystem = _ensure_filesystem(filesystem)
-
-        if partition_cols:
-            part_schema = table.select(partition_cols).schema
-            partitioning = ds.partitioning(part_schema, flavor="hive")
-
-        if basename_template is None:
-            basename_template = guid() + '-{i}.parquet'
-
-        if existing_data_behavior is None:
-            existing_data_behavior = 'overwrite_or_ignore'
-
-        ds.write_dataset(
-            table, root_path, filesystem=filesystem,
-            format=parquet_format, file_options=write_options, schema=schema,
-            partitioning=partitioning, use_threads=use_threads,
-            file_visitor=file_visitor,
-            basename_template=basename_template,
-            existing_data_behavior=existing_data_behavior,
-            **write_dataset_kwargs)
-        return
-
-    # warnings and errors when using legacy implementation
-    if use_legacy_dataset:
-        warnings.warn(
-            "Passing 'use_legacy_dataset=True' to get the legacy behaviour is "
-            "deprecated as of pyarrow 8.0.0, and the legacy implementation "
-            "will be removed in a future version.",
-            FutureWarning, stacklevel=2)
-    msg2 = (
-        "The '{}' argument is not supported with the legacy "
-        "implementation. To use this argument specify "
-        "'use_legacy_dataset=False' while constructing the "
-        "ParquetDataset."
-    )
-    if schema is not None:
-        raise ValueError(msg2.format("schema"))
-    if partitioning is not None:
-        raise ValueError(msg2.format("partitioning"))
-    if use_threads is not None:
-        raise ValueError(msg2.format("use_threads"))
-    if file_visitor is not None:
-        raise ValueError(msg2.format("file_visitor"))
-    if existing_data_behavior is not None:
-        raise ValueError(msg2.format("existing_data_behavior"))
-    if basename_template is not None:
-        raise ValueError(msg2.format("basename_template"))
-    if partition_filename_cb is not None:
-        warnings.warn(
-            _DEPR_MSG.format("partition_filename_cb", " Specify "
-                             "'use_legacy_dataset=False' while constructing "
-                             "the ParquetDataset, and then use the "
-                             "'basename_template' parameter instead. For "
-                             "usage see `pyarrow.dataset.write_dataset`"),
-            FutureWarning, stacklevel=2)
+    # map format arguments
+    parquet_format = ds.ParquetFileFormat()
+    write_options = parquet_format.make_write_options(**kwargs)
 
-    # Legacy implementation
-    fs, root_path = legacyfs.resolve_filesystem_and_path(root_path, filesystem)
-
-    _mkdir_if_not_exists(fs, root_path)
-
-    if partition_cols is not None and len(partition_cols) > 0:
-        df = table.to_pandas()
-        partition_keys = [df[col] for col in partition_cols]
-        data_df = df.drop(partition_cols, axis='columns')
-        data_cols = df.columns.drop(partition_cols)
-        if len(data_cols) == 0:
-            raise ValueError('No data left to save outside partition columns')
-
-        subschema = table.schema
-
-        # ARROW-2891: Ensure the output_schema is preserved when writing a
-        # partitioned dataset
-        for col in table.schema.names:
-            if col in partition_cols:
-                subschema = subschema.remove(subschema.get_field_index(col))
-
-        # ARROW-17829: avoid deprecation warnings for df.groupby
-        # https://github.com/pandas-dev/pandas/issues/42795
-        if len(partition_keys) == 1:
-            partition_keys = partition_keys[0]
-
-        for keys, subgroup in data_df.groupby(partition_keys, observed=True):
-            if not isinstance(keys, tuple):
-                keys = (keys,)
-            subdir = '/'.join(
-                ['{colname}={value}'.format(colname=name, value=val)
-                 for name, val in zip(partition_cols, keys)])
-            subtable = pa.Table.from_pandas(subgroup, schema=subschema,
-                                            safe=False)
-            _mkdir_if_not_exists(fs, '/'.join([root_path, subdir]))
-            if partition_filename_cb:
-                outfile = partition_filename_cb(keys)
-            else:
-                outfile = guid() + '.parquet'
-            relative_path = '/'.join([subdir, outfile])
-            full_path = '/'.join([root_path, relative_path])
-            with fs.open(full_path, 'wb') as f:
-                write_table(subtable, f, metadata_collector=metadata_collector,
-                            **kwargs)
-            if metadata_collector is not None:
-                metadata_collector[-1].set_file_path(relative_path)
-    else:
-        if partition_filename_cb:
-            outfile = partition_filename_cb(None)
-        else:
-            outfile = guid() + '.parquet'
-        full_path = '/'.join([root_path, outfile])
-        with fs.open(full_path, 'wb') as f:
-            write_table(table, f, metadata_collector=metadata_collector,
-                        **kwargs)
-        if metadata_collector is not None:
-            metadata_collector[-1].set_file_path(outfile)
+    # map old filesystems to new one
+    if filesystem is not None:
+        filesystem = _ensure_filesystem(filesystem)
+
+    if partition_cols:
+        part_schema = table.select(partition_cols).schema
+        partitioning = ds.partitioning(part_schema, flavor="hive")
+
+    if basename_template is None:
+        basename_template = guid() + '-{i}.parquet'
+
+    if existing_data_behavior is None:
+        existing_data_behavior = 'overwrite_or_ignore'
+
+    ds.write_dataset(
+        table, root_path, filesystem=filesystem,
+        format=parquet_format, file_options=write_options, schema=schema,
+        partitioning=partitioning, use_threads=use_threads,
+        file_visitor=file_visitor,
+        basename_template=basename_template,
+        existing_data_behavior=existing_data_behavior,
+        **write_dataset_kwargs)
+    return
 
 
 def write_metadata(schema, where, metadata_collector=None, filesystem=None,
@@ -3741,15 +2335,11 @@ def read_schema(where, memory_map=False, decryption_properties=None,
     "FileEncryptionProperties",
     "FileMetaData",
     "ParquetDataset",
-    "ParquetDatasetPiece",
     "ParquetFile",
     "ParquetLogicalType",
-    "ParquetManifest",
-    "ParquetPartitions",
     "ParquetReader",
     "ParquetSchema",
     "ParquetWriter",
-    "PartitionSet",
     "RowGroupMetaData",
     "SortingColumn",
     "Statistics",
diff --git a/python/pyarrow/src/arrow/python/parquet_encryption.h b/python/pyarrow/src/arrow/python/parquet_encryption.h
index 23ee478348ecd..a1aaa30e260f5 100644
--- a/python/pyarrow/src/arrow/python/parquet_encryption.h
+++ b/python/pyarrow/src/arrow/python/parquet_encryption.h
@@ -26,6 +26,27 @@
 #include "parquet/encryption/kms_client.h"
 #include "parquet/encryption/kms_client_factory.h"
 
+#if defined(_WIN32) || defined(__CYGWIN__)  // Windows
+#if defined(_MSC_VER)
+#pragma warning(disable : 4251)
+#else
+#pragma GCC diagnostic ignored "-Wattributes"
+#endif
+
+#ifdef ARROW_PYTHON_STATIC
+#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT
+#elif defined(ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORTING)
+#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __declspec(dllexport)
+#else
+#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __declspec(dllimport)
+#endif
+
+#else  // Not Windows
+#ifndef ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT
+#define ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT __attribute__((visibility("default")))
+#endif
+#endif  // Non-Windows
+
 namespace arrow {
 namespace py {
 namespace parquet {
@@ -33,7 +54,7 @@ namespace encryption {
 
 /// \brief A table of function pointers for calling from C++ into
 /// Python.
-class ARROW_PYTHON_EXPORT PyKmsClientVtable {
+class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClientVtable {
  public:
   std::function<void(PyObject*, const std::string& key_bytes,
                      const std::string& master_key_identifier, std::string* out)>
@@ -44,7 +65,8 @@ class ARROW_PYTHON_EXPORT PyKmsClientVtable {
 };
 
 /// \brief A helper for KmsClient implementation in Python.
-class ARROW_PYTHON_EXPORT PyKmsClient : public ::parquet::encryption::KmsClient {
+class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClient
+    : public ::parquet::encryption::KmsClient {
  public:
   PyKmsClient(PyObject* handler, PyKmsClientVtable vtable);
   ~PyKmsClient() override;
@@ -62,7 +84,7 @@ class ARROW_PYTHON_EXPORT PyKmsClient : public ::parquet::encryption::KmsClient
 
 /// \brief A table of function pointers for calling from C++ into
 /// Python.
-class ARROW_PYTHON_EXPORT PyKmsClientFactoryVtable {
+class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClientFactoryVtable {
  public:
   std::function<void(
       PyObject*, const ::parquet::encryption::KmsConnectionConfig& kms_connection_config,
@@ -71,7 +93,7 @@ class ARROW_PYTHON_EXPORT PyKmsClientFactoryVtable {
 };
 
 /// \brief A helper for KmsClientFactory implementation in Python.
-class ARROW_PYTHON_EXPORT PyKmsClientFactory
+class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyKmsClientFactory
     : public ::parquet::encryption::KmsClientFactory {
  public:
   PyKmsClientFactory(PyObject* handler, PyKmsClientFactoryVtable vtable);
@@ -86,7 +108,8 @@ class ARROW_PYTHON_EXPORT PyKmsClientFactory
 };
 
 /// \brief A CryptoFactory that returns Results instead of throwing exceptions.
-class ARROW_PYTHON_EXPORT PyCryptoFactory : public ::parquet::encryption::CryptoFactory {
+class ARROW_PYTHON_PARQUET_ENCRYPTION_EXPORT PyCryptoFactory
+    : public ::parquet::encryption::CryptoFactory {
  public:
   arrow::Result<std::shared_ptr<::parquet::FileEncryptionProperties>>
   SafeGetFileEncryptionProperties(
diff --git a/python/pyarrow/tests/parquet/__init__.py b/python/pyarrow/tests/parquet/__init__.py
index 4c4e8240b8736..d08d67d2860f4 100644
--- a/python/pyarrow/tests/parquet/__init__.py
+++ b/python/pyarrow/tests/parquet/__init__.py
@@ -21,7 +21,4 @@
 # Ignore these with pytest ... -m 'not parquet'
 pytestmark = [
     pytest.mark.parquet,
-    pytest.mark.filterwarnings(
-        "ignore:Passing 'use_legacy_dataset=True':FutureWarning"
-    ),
 ]
diff --git a/python/pyarrow/tests/parquet/common.py b/python/pyarrow/tests/parquet/common.py
index 4401d3ca6bb75..8365ed5b28543 100644
--- a/python/pyarrow/tests/parquet/common.py
+++ b/python/pyarrow/tests/parquet/common.py
@@ -18,31 +18,10 @@
 import io
 
 import numpy as np
-import pytest
 
 import pyarrow as pa
 from pyarrow.tests import util
 
-legacy_filter_mark = pytest.mark.filterwarnings(
-    "ignore:Passing 'use_legacy:FutureWarning"
-)
-
-parametrize_legacy_dataset = pytest.mark.parametrize(
-    "use_legacy_dataset",
-    [pytest.param(True, marks=legacy_filter_mark),
-     pytest.param(False, marks=pytest.mark.dataset)]
-)
-parametrize_legacy_dataset_not_supported = pytest.mark.parametrize(
-    "use_legacy_dataset",
-    [pytest.param(True, marks=legacy_filter_mark),
-     pytest.param(False, marks=pytest.mark.skip)]
-)
-parametrize_legacy_dataset_fixed = pytest.mark.parametrize(
-    "use_legacy_dataset",
-    [pytest.param(True, marks=[pytest.mark.xfail, legacy_filter_mark]),
-     pytest.param(False, marks=pytest.mark.dataset)]
-)
-
 
 def _write_table(table, path, **kwargs):
     # So we see the ImportError somewhere
@@ -65,19 +44,18 @@ def _read_table(*args, **kwargs):
 
 
 def _roundtrip_table(table, read_table_kwargs=None,
-                     write_table_kwargs=None, use_legacy_dataset=False):
+                     write_table_kwargs=None):
     read_table_kwargs = read_table_kwargs or {}
     write_table_kwargs = write_table_kwargs or {}
 
     writer = pa.BufferOutputStream()
     _write_table(table, writer, **write_table_kwargs)
     reader = pa.BufferReader(writer.getvalue())
-    return _read_table(reader, use_legacy_dataset=use_legacy_dataset,
-                       **read_table_kwargs)
+    return _read_table(reader, **read_table_kwargs)
 
 
 def _check_roundtrip(table, expected=None, read_table_kwargs=None,
-                     use_legacy_dataset=False, **write_table_kwargs):
+                     **write_table_kwargs):
     if expected is None:
         expected = table
 
@@ -85,20 +63,17 @@ def _check_roundtrip(table, expected=None, read_table_kwargs=None,
 
     # intentionally check twice
     result = _roundtrip_table(table, read_table_kwargs=read_table_kwargs,
-                              write_table_kwargs=write_table_kwargs,
-                              use_legacy_dataset=use_legacy_dataset)
+                              write_table_kwargs=write_table_kwargs)
     assert result.equals(expected)
     result = _roundtrip_table(result, read_table_kwargs=read_table_kwargs,
-                              write_table_kwargs=write_table_kwargs,
-                              use_legacy_dataset=use_legacy_dataset)
+                              write_table_kwargs=write_table_kwargs)
     assert result.equals(expected)
 
 
-def _roundtrip_pandas_dataframe(df, write_kwargs, use_legacy_dataset=False):
+def _roundtrip_pandas_dataframe(df, write_kwargs):
     table = pa.Table.from_pandas(df)
     result = _roundtrip_table(
-        table, write_table_kwargs=write_kwargs,
-        use_legacy_dataset=use_legacy_dataset)
+        table, write_table_kwargs=write_kwargs)
     return result.to_pandas()
 
 
diff --git a/python/pyarrow/tests/parquet/test_basic.py b/python/pyarrow/tests/parquet/test_basic.py
index 83e6ebeb7a1fc..3c867776ac052 100644
--- a/python/pyarrow/tests/parquet/test_basic.py
+++ b/python/pyarrow/tests/parquet/test_basic.py
@@ -28,7 +28,6 @@
 from pyarrow.filesystem import LocalFileSystem, FileSystem
 from pyarrow.tests import util
 from pyarrow.tests.parquet.common import (_check_roundtrip, _roundtrip_table,
-                                          parametrize_legacy_dataset,
                                           _test_dataframe)
 
 try:
@@ -63,21 +62,18 @@ def test_parquet_invalid_version(tempdir):
                      data_page_version="2.2")
 
 
-@parametrize_legacy_dataset
-def test_set_data_page_size(use_legacy_dataset):
+def test_set_data_page_size():
     arr = pa.array([1, 2, 3] * 100000)
     t = pa.Table.from_arrays([arr], names=['f0'])
 
     # 128K, 512K
     page_sizes = [2 << 16, 2 << 18]
     for target_page_size in page_sizes:
-        _check_roundtrip(t, data_page_size=target_page_size,
-                         use_legacy_dataset=use_legacy_dataset)
+        _check_roundtrip(t, data_page_size=target_page_size)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_set_write_batch_size(use_legacy_dataset):
+def test_set_write_batch_size():
     df = _test_dataframe(100)
     table = pa.Table.from_pandas(df, preserve_index=False)
 
@@ -87,8 +83,7 @@ def test_set_write_batch_size(use_legacy_dataset):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_set_dictionary_pagesize_limit(use_legacy_dataset):
+def test_set_dictionary_pagesize_limit():
     df = _test_dataframe(100)
     table = pa.Table.from_pandas(df, preserve_index=False)
 
@@ -101,8 +96,7 @@ def test_set_dictionary_pagesize_limit(use_legacy_dataset):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_chunked_table_write(use_legacy_dataset):
+def test_chunked_table_write():
     # ARROW-232
     tables = []
     batch = pa.RecordBatch.from_pandas(alltypes_sample(size=10))
@@ -116,66 +110,56 @@ def test_chunked_table_write(use_legacy_dataset):
             for table in tables:
                 _check_roundtrip(
                     table, version='2.6',
-                    use_legacy_dataset=use_legacy_dataset,
                     data_page_version=data_page_version,
                     use_dictionary=use_dictionary)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_memory_map(tempdir, use_legacy_dataset):
+def test_memory_map(tempdir):
     df = alltypes_sample(size=10)
 
     table = pa.Table.from_pandas(df)
     _check_roundtrip(table, read_table_kwargs={'memory_map': True},
-                     version='2.6', use_legacy_dataset=use_legacy_dataset)
+                     version='2.6')
 
     filename = str(tempdir / 'tmp_file')
     with open(filename, 'wb') as f:
         _write_table(table, f, version='2.6')
-    table_read = pq.read_pandas(filename, memory_map=True,
-                                use_legacy_dataset=use_legacy_dataset)
+    table_read = pq.read_pandas(filename, memory_map=True)
     assert table_read.equals(table)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_enable_buffered_stream(tempdir, use_legacy_dataset):
+def test_enable_buffered_stream(tempdir):
     df = alltypes_sample(size=10)
 
     table = pa.Table.from_pandas(df)
     _check_roundtrip(table, read_table_kwargs={'buffer_size': 1025},
-                     version='2.6', use_legacy_dataset=use_legacy_dataset)
+                     version='2.6')
 
     filename = str(tempdir / 'tmp_file')
     with open(filename, 'wb') as f:
         _write_table(table, f, version='2.6')
-    table_read = pq.read_pandas(filename, buffer_size=4096,
-                                use_legacy_dataset=use_legacy_dataset)
+    table_read = pq.read_pandas(filename, buffer_size=4096)
     assert table_read.equals(table)
 
 
-@parametrize_legacy_dataset
-def test_special_chars_filename(tempdir, use_legacy_dataset):
+def test_special_chars_filename(tempdir):
     table = pa.Table.from_arrays([pa.array([42])], ["ints"])
     filename = "foo # bar"
     path = tempdir / filename
     assert not path.exists()
     _write_table(table, str(path))
     assert path.exists()
-    table_read = _read_table(str(path), use_legacy_dataset=use_legacy_dataset)
+    table_read = _read_table(str(path))
     assert table_read.equals(table)
 
 
-@parametrize_legacy_dataset
-def test_invalid_source(use_legacy_dataset):
+def test_invalid_source():
     # Test that we provide an helpful error message pointing out
     # that None wasn't expected when trying to open a Parquet None file.
-    #
-    # Depending on use_legacy_dataset the message changes slightly
-    # but in both cases it should point out that None wasn't expected.
     with pytest.raises(TypeError, match="None"):
-        pq.read_table(None, use_legacy_dataset=use_legacy_dataset)
+        pq.read_table(None)
 
     with pytest.raises(TypeError, match="None"):
         pq.ParquetFile(None)
@@ -193,8 +177,7 @@ def test_file_with_over_int16_max_row_groups():
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_empty_table_roundtrip(use_legacy_dataset):
+def test_empty_table_roundtrip():
     df = alltypes_sample(size=10)
 
     # Create a non-empty table to infer the types correctly, then slice to 0
@@ -206,19 +189,17 @@ def test_empty_table_roundtrip(use_legacy_dataset):
     assert table.schema.field('null').type == pa.null()
     assert table.schema.field('null_list').type == pa.list_(pa.null())
     _check_roundtrip(
-        table, version='2.6', use_legacy_dataset=use_legacy_dataset)
+        table, version='2.6')
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_empty_table_no_columns(use_legacy_dataset):
+def test_empty_table_no_columns():
     df = pd.DataFrame()
     empty = pa.Table.from_pandas(df, preserve_index=False)
-    _check_roundtrip(empty, use_legacy_dataset=use_legacy_dataset)
+    _check_roundtrip(empty)
 
 
-@parametrize_legacy_dataset
-def test_write_nested_zero_length_array_chunk_failure(use_legacy_dataset):
+def test_write_nested_zero_length_array_chunk_failure():
     # Bug report in ARROW-3792
     cols = OrderedDict(
         int32=pa.int32(),
@@ -243,17 +224,16 @@ def test_write_nested_zero_length_array_chunk_failure(use_legacy_dataset):
     my_batches = [pa.RecordBatch.from_arrays(batch, schema=pa.schema(cols))
                   for batch in my_arrays]
     tbl = pa.Table.from_batches(my_batches, pa.schema(cols))
-    _check_roundtrip(tbl, use_legacy_dataset=use_legacy_dataset)
+    _check_roundtrip(tbl)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_multiple_path_types(tempdir, use_legacy_dataset):
+def test_multiple_path_types(tempdir):
     # Test compatibility with PEP 519 path-like objects
     path = tempdir / 'zzz.parquet'
     df = pd.DataFrame({'x': np.arange(10, dtype=np.int64)})
     _write_table(df, path)
-    table_read = _read_table(path, use_legacy_dataset=use_legacy_dataset)
+    table_read = _read_table(path)
     df_read = table_read.to_pandas()
     tm.assert_frame_equal(df, df_read)
 
@@ -261,13 +241,12 @@ def test_multiple_path_types(tempdir, use_legacy_dataset):
     path = str(tempdir) + 'zzz.parquet'
     df = pd.DataFrame({'x': np.arange(10, dtype=np.int64)})
     _write_table(df, path)
-    table_read = _read_table(path, use_legacy_dataset=use_legacy_dataset)
+    table_read = _read_table(path)
     df_read = table_read.to_pandas()
     tm.assert_frame_equal(df, df_read)
 
 
-@parametrize_legacy_dataset
-def test_fspath(tempdir, use_legacy_dataset):
+def test_fspath(tempdir):
     # ARROW-12472 support __fspath__ objects without using str()
     path = tempdir / "test.parquet"
     table = pa.table({"a": [1, 2, 3]})
@@ -275,9 +254,7 @@ def test_fspath(tempdir, use_legacy_dataset):
 
     fs_protocol_obj = util.FSProtocolClass(path)
 
-    result = _read_table(
-        fs_protocol_obj, use_legacy_dataset=use_legacy_dataset
-    )
+    result = _read_table(fs_protocol_obj)
     assert result.equals(table)
 
     # combined with non-local filesystem raises
@@ -285,15 +262,11 @@ def test_fspath(tempdir, use_legacy_dataset):
         _read_table(fs_protocol_obj, filesystem=FileSystem())
 
 
-@pytest.mark.dataset
-@parametrize_legacy_dataset
 @pytest.mark.parametrize("filesystem", [
     None, fs.LocalFileSystem(), LocalFileSystem._get_instance()
 ])
 @pytest.mark.parametrize("name", ("data.parquet", "例.parquet"))
-def test_relative_paths(tempdir, use_legacy_dataset, filesystem, name):
-    if use_legacy_dataset and isinstance(filesystem, fs.FileSystem):
-        pytest.skip("Passing new filesystem not supported for legacy reader")
+def test_relative_paths(tempdir, filesystem, name):
     # reading and writing from relative paths
     table = pa.table({"a": [1, 2, 3]})
     path = tempdir / name
@@ -301,8 +274,7 @@ def test_relative_paths(tempdir, use_legacy_dataset, filesystem, name):
     # reading
     pq.write_table(table, str(path))
     with util.change_cwd(tempdir):
-        result = pq.read_table(name, filesystem=filesystem,
-                               use_legacy_dataset=use_legacy_dataset)
+        result = pq.read_table(name, filesystem=filesystem)
     assert result.equals(table)
 
     path.unlink()
@@ -334,24 +306,21 @@ def seek(self, *args):
         pq.read_table(BogusFile(b""))
 
 
-@parametrize_legacy_dataset
-def test_parquet_read_from_buffer(tempdir, use_legacy_dataset):
+def test_parquet_read_from_buffer(tempdir):
     # reading from a buffer from python's open()
     table = pa.table({"a": [1, 2, 3]})
     pq.write_table(table, str(tempdir / "data.parquet"))
 
     with open(str(tempdir / "data.parquet"), "rb") as f:
-        result = pq.read_table(f, use_legacy_dataset=use_legacy_dataset)
+        result = pq.read_table(f)
     assert result.equals(table)
 
     with open(str(tempdir / "data.parquet"), "rb") as f:
-        result = pq.read_table(pa.PythonFile(f),
-                               use_legacy_dataset=use_legacy_dataset)
+        result = pq.read_table(pa.PythonFile(f))
     assert result.equals(table)
 
 
-@parametrize_legacy_dataset
-def test_byte_stream_split(use_legacy_dataset):
+def test_byte_stream_split():
     # This is only a smoke test.
     arr_float = pa.array(list(map(float, range(100))))
     arr_int = pa.array(list(map(int, range(100))))
@@ -385,12 +354,10 @@ def test_byte_stream_split(use_legacy_dataset):
     table = pa.Table.from_arrays([arr_int], names=['tmp'])
     with pytest.raises(IOError):
         _check_roundtrip(table, expected=table, use_byte_stream_split=True,
-                         use_dictionary=False,
-                         use_legacy_dataset=use_legacy_dataset)
+                         use_dictionary=False)
 
 
-@parametrize_legacy_dataset
-def test_column_encoding(use_legacy_dataset):
+def test_column_encoding():
     arr_float = pa.array(list(map(float, range(100))))
     arr_int = pa.array(list(map(int, range(100))))
     arr_bin = pa.array([str(x) for x in range(100)], type=pa.binary())
@@ -406,30 +373,26 @@ def test_column_encoding(use_legacy_dataset):
     _check_roundtrip(mixed_table, expected=mixed_table, use_dictionary=False,
                      column_encoding={'a': "BYTE_STREAM_SPLIT",
                                       'b': "PLAIN",
-                                      'c': "PLAIN"},
-                     use_legacy_dataset=use_legacy_dataset)
+                                      'c': "PLAIN"})
 
     # Check "PLAIN" for all columns.
     _check_roundtrip(mixed_table, expected=mixed_table,
                      use_dictionary=False,
-                     column_encoding="PLAIN",
-                     use_legacy_dataset=use_legacy_dataset)
+                     column_encoding="PLAIN")
 
     # Check "DELTA_BINARY_PACKED" for integer columns.
     _check_roundtrip(mixed_table, expected=mixed_table,
                      use_dictionary=False,
                      column_encoding={'a': "PLAIN",
                                       'b': "DELTA_BINARY_PACKED",
-                                      'c': "PLAIN"},
-                     use_legacy_dataset=use_legacy_dataset)
+                                      'c': "PLAIN"})
 
     # Check "DELTA_LENGTH_BYTE_ARRAY" for byte columns.
     _check_roundtrip(mixed_table, expected=mixed_table,
                      use_dictionary=False,
                      column_encoding={'a': "PLAIN",
                                       'b': "DELTA_BINARY_PACKED",
-                                      'c': "DELTA_LENGTH_BYTE_ARRAY"},
-                     use_legacy_dataset=use_legacy_dataset)
+                                      'c': "DELTA_LENGTH_BYTE_ARRAY"})
 
     # Check "DELTA_BYTE_ARRAY" for byte columns.
     _check_roundtrip(mixed_table, expected=mixed_table,
@@ -437,14 +400,12 @@ def test_column_encoding(use_legacy_dataset):
                      column_encoding={'a': "PLAIN",
                                       'b': "DELTA_BINARY_PACKED",
                                       'c': "DELTA_BYTE_ARRAY",
-                                      'd': "DELTA_BYTE_ARRAY"},
-                     use_legacy_dataset=use_legacy_dataset)
+                                      'd': "DELTA_BYTE_ARRAY"})
 
     # Check "RLE" for boolean columns.
     _check_roundtrip(mixed_table, expected=mixed_table,
                      use_dictionary=False,
-                     column_encoding={'e': "RLE"},
-                     use_legacy_dataset=use_legacy_dataset)
+                     column_encoding={'e': "RLE"})
 
     # Try to pass "BYTE_STREAM_SPLIT" column encoding for integer column 'b'.
     # This should throw an error as it is only supports FLOAT and DOUBLE.
@@ -455,8 +416,7 @@ def test_column_encoding(use_legacy_dataset):
                          use_dictionary=False,
                          column_encoding={'a': "PLAIN",
                                           'b': "BYTE_STREAM_SPLIT",
-                                          'c': "PLAIN"},
-                         use_legacy_dataset=use_legacy_dataset)
+                                          'c': "PLAIN"})
 
     # Try to pass use "DELTA_BINARY_PACKED" encoding on float column.
     # This should throw an error as only integers are supported.
@@ -465,8 +425,7 @@ def test_column_encoding(use_legacy_dataset):
                          use_dictionary=False,
                          column_encoding={'a': "DELTA_BINARY_PACKED",
                                           'b': "PLAIN",
-                                          'c': "PLAIN"},
-                         use_legacy_dataset=use_legacy_dataset)
+                                          'c': "PLAIN"})
 
     # Try to pass "RLE_DICTIONARY".
     # This should throw an error as dictionary encoding is already used by
@@ -474,30 +433,26 @@ def test_column_encoding(use_legacy_dataset):
     with pytest.raises(ValueError):
         _check_roundtrip(mixed_table, expected=mixed_table,
                          use_dictionary=False,
-                         column_encoding="RLE_DICTIONARY",
-                         use_legacy_dataset=use_legacy_dataset)
+                         column_encoding="RLE_DICTIONARY")
 
     # Try to pass unsupported encoding.
     with pytest.raises(ValueError):
         _check_roundtrip(mixed_table, expected=mixed_table,
                          use_dictionary=False,
-                         column_encoding={'a': "MADE_UP_ENCODING"},
-                         use_legacy_dataset=use_legacy_dataset)
+                         column_encoding={'a': "MADE_UP_ENCODING"})
 
     # Try to pass column_encoding and use_dictionary.
     # This should throw an error.
     with pytest.raises(ValueError):
         _check_roundtrip(mixed_table, expected=mixed_table,
                          use_dictionary=['b'],
-                         column_encoding={'b': "PLAIN"},
-                         use_legacy_dataset=use_legacy_dataset)
+                         column_encoding={'b': "PLAIN"})
 
     # Try to pass column_encoding and use_dictionary=True (default value).
     # This should throw an error.
     with pytest.raises(ValueError):
         _check_roundtrip(mixed_table, expected=mixed_table,
-                         column_encoding={'b': "PLAIN"},
-                         use_legacy_dataset=use_legacy_dataset)
+                         column_encoding={'b': "PLAIN"})
 
     # Try to pass column_encoding and use_byte_stream_split on same column.
     # This should throw an error.
@@ -507,8 +462,7 @@ def test_column_encoding(use_legacy_dataset):
                          use_byte_stream_split=['a'],
                          column_encoding={'a': "RLE",
                                           'b': "BYTE_STREAM_SPLIT",
-                                          'c': "PLAIN"},
-                         use_legacy_dataset=use_legacy_dataset)
+                                          'c': "PLAIN"})
 
     # Try to pass column_encoding and use_byte_stream_split=True.
     # This should throw an error.
@@ -518,54 +472,45 @@ def test_column_encoding(use_legacy_dataset):
                          use_byte_stream_split=True,
                          column_encoding={'a': "RLE",
                                           'b': "BYTE_STREAM_SPLIT",
-                                          'c': "PLAIN"},
-                         use_legacy_dataset=use_legacy_dataset)
+                                          'c': "PLAIN"})
 
     # Try to pass column_encoding=True.
     # This should throw an error.
     with pytest.raises(TypeError):
         _check_roundtrip(mixed_table, expected=mixed_table,
                          use_dictionary=False,
-                         column_encoding=True,
-                         use_legacy_dataset=use_legacy_dataset)
+                         column_encoding=True)
 
 
-@parametrize_legacy_dataset
-def test_compression_level(use_legacy_dataset):
+def test_compression_level():
     arr = pa.array(list(map(int, range(1000))))
     data = [arr, arr]
     table = pa.Table.from_arrays(data, names=['a', 'b'])
 
     # Check one compression level.
     _check_roundtrip(table, expected=table, compression="gzip",
-                     compression_level=1,
-                     use_legacy_dataset=use_legacy_dataset)
+                     compression_level=1)
 
     # Check another one to make sure that compression_level=1 does not
     # coincide with the default one in Arrow.
     _check_roundtrip(table, expected=table, compression="gzip",
-                     compression_level=5,
-                     use_legacy_dataset=use_legacy_dataset)
+                     compression_level=5)
 
     # Check that the user can provide a compression per column
     _check_roundtrip(table, expected=table,
-                     compression={'a': "gzip", 'b': "snappy"},
-                     use_legacy_dataset=use_legacy_dataset)
+                     compression={'a': "gzip", 'b': "snappy"})
 
     # Check that the user can provide a compression level per column
     _check_roundtrip(table, expected=table, compression="gzip",
-                     compression_level={'a': 2, 'b': 3},
-                     use_legacy_dataset=use_legacy_dataset)
+                     compression_level={'a': 2, 'b': 3})
 
     # Check if both LZ4 compressors are working
     # (level < 3 -> fast, level >= 3 -> HC)
     _check_roundtrip(table, expected=table, compression="lz4",
-                     compression_level=1,
-                     use_legacy_dataset=use_legacy_dataset)
+                     compression_level=1)
 
     _check_roundtrip(table, expected=table, compression="lz4",
-                     compression_level=9,
-                     use_legacy_dataset=use_legacy_dataset)
+                     compression_level=9)
 
     # Check that specifying a compression level for a codec which does allow
     # specifying one, results into an error.
@@ -594,8 +539,7 @@ def test_sanitized_spark_field_names():
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_multithreaded_read(use_legacy_dataset):
+def test_multithreaded_read():
     df = alltypes_sample(size=10000)
 
     table = pa.Table.from_pandas(df)
@@ -604,19 +548,16 @@ def test_multithreaded_read(use_legacy_dataset):
     _write_table(table, buf, compression='SNAPPY', version='2.6')
 
     buf.seek(0)
-    table1 = _read_table(
-        buf, use_threads=True, use_legacy_dataset=use_legacy_dataset)
+    table1 = _read_table(buf, use_threads=True)
 
     buf.seek(0)
-    table2 = _read_table(
-        buf, use_threads=False, use_legacy_dataset=use_legacy_dataset)
+    table2 = _read_table(buf, use_threads=False)
 
     assert table1.equals(table2)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_min_chunksize(use_legacy_dataset):
+def test_min_chunksize():
     data = pd.DataFrame([np.arange(4)], columns=['A', 'B', 'C', 'D'])
     table = pa.Table.from_pandas(data.reset_index())
 
@@ -624,7 +565,7 @@ def test_min_chunksize(use_legacy_dataset):
     _write_table(table, buf, chunk_size=-1)
 
     buf.seek(0)
-    result = _read_table(buf, use_legacy_dataset=use_legacy_dataset)
+    result = _read_table(buf)
 
     assert result.equals(table)
 
@@ -659,57 +600,46 @@ def test_write_error_deletes_incomplete_file(tempdir):
     assert not filename.exists()
 
 
-@parametrize_legacy_dataset
-def test_read_non_existent_file(tempdir, use_legacy_dataset):
+def test_read_non_existent_file(tempdir):
     path = 'nonexistent-file.parquet'
     try:
-        pq.read_table(path, use_legacy_dataset=use_legacy_dataset)
+        pq.read_table(path)
     except Exception as e:
         assert path in e.args[0]
 
 
-@parametrize_legacy_dataset
-def test_read_table_doesnt_warn(datadir, use_legacy_dataset):
-    if use_legacy_dataset:
-        msg = "Passing 'use_legacy_dataset=True'"
-        with pytest.warns(FutureWarning, match=msg):
-            pq.read_table(datadir / 'v0.7.1.parquet',
-                          use_legacy_dataset=use_legacy_dataset)
-    else:
-        with warnings.catch_warnings():
-            warnings.simplefilter(action="error")
-            pq.read_table(datadir / 'v0.7.1.parquet',
-                          use_legacy_dataset=use_legacy_dataset)
+def test_read_table_doesnt_warn(datadir):
+    with warnings.catch_warnings():
+        warnings.simplefilter(action="error")
+        pq.read_table(datadir / 'v0.7.1.parquet')
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_zlib_compression_bug(use_legacy_dataset):
+def test_zlib_compression_bug():
     # ARROW-3514: "zlib deflate failed, output buffer too small"
     table = pa.Table.from_arrays([pa.array(['abc', 'def'])], ['some_col'])
     f = io.BytesIO()
     pq.write_table(table, f, compression='gzip')
 
     f.seek(0)
-    roundtrip = pq.read_table(f, use_legacy_dataset=use_legacy_dataset)
+    roundtrip = pq.read_table(f)
     tm.assert_frame_equal(roundtrip.to_pandas(), table.to_pandas())
 
 
-@parametrize_legacy_dataset
-def test_parquet_file_too_small(tempdir, use_legacy_dataset):
+def test_parquet_file_too_small(tempdir):
     path = str(tempdir / "test.parquet")
     # TODO(dataset) with datasets API it raises OSError instead
     with pytest.raises((pa.ArrowInvalid, OSError),
                        match='size is 0 bytes'):
         with open(path, 'wb') as f:
             pass
-        pq.read_table(path, use_legacy_dataset=use_legacy_dataset)
+        pq.read_table(path)
 
     with pytest.raises((pa.ArrowInvalid, OSError),
                        match='size is 4 bytes'):
         with open(path, 'wb') as f:
             f.write(b'ffff')
-        pq.read_table(path, use_legacy_dataset=use_legacy_dataset)
+        pq.read_table(path)
 
 
 @pytest.mark.pandas
@@ -752,17 +682,15 @@ def test_fastparquet_cross_compatibility(tempdir):
     tm.assert_frame_equal(table_fp.to_pandas(), df)
 
 
-@parametrize_legacy_dataset
 @pytest.mark.parametrize('array_factory', [
     lambda: pa.array([0, None] * 10),
     lambda: pa.array([0, None] * 10).dictionary_encode(),
     lambda: pa.array(["", None] * 10),
     lambda: pa.array(["", None] * 10).dictionary_encode(),
 ])
-@pytest.mark.parametrize('use_dictionary', [False, True])
 @pytest.mark.parametrize('read_dictionary', [False, True])
 def test_buffer_contents(
-        array_factory, use_dictionary, read_dictionary, use_legacy_dataset
+        array_factory, read_dictionary
 ):
     # Test that null values are deterministically initialized to zero
     # after a roundtrip through Parquet.
@@ -773,8 +701,7 @@ def test_buffer_contents(
     bio.seek(0)
     read_dictionary = ['col'] if read_dictionary else None
     table = pq.read_table(bio, use_threads=False,
-                          read_dictionary=read_dictionary,
-                          use_legacy_dataset=use_legacy_dataset)
+                          read_dictionary=read_dictionary)
 
     for col in table.columns:
         [chunk] = col.chunks
@@ -826,7 +753,6 @@ def test_reads_over_batch(tempdir):
     assert table == table2
 
 
-@pytest.mark.dataset
 def test_permutation_of_column_order(tempdir):
     # ARROW-2366
     case = tempdir / "dataset_column_order_permutation"
@@ -846,18 +772,6 @@ def test_permutation_of_column_order(tempdir):
     assert table == table2
 
 
-def test_read_table_legacy_deprecated(tempdir):
-    # ARROW-15870
-    table = pa.table({'a': [1, 2, 3]})
-    path = tempdir / 'data.parquet'
-    pq.write_table(table, path)
-
-    with pytest.warns(
-        FutureWarning, match="Passing 'use_legacy_dataset=True'"
-    ):
-        pq.read_table(path, use_legacy_dataset=True)
-
-
 def test_thrift_size_limits(tempdir):
     path = tempdir / 'largethrift.parquet'
 
@@ -942,28 +856,9 @@ def test_page_checksum_verification_write_table(tempdir):
     with pytest.raises(OSError, match="CRC checksum verification"):
         _ = corrupted_pq_file.read()
 
-    # Case 5: Check that enabling page checksum verification in combination
-    # with legacy dataset raises an exception
-    with pytest.raises(ValueError, match="page_checksum_verification"):
-        _ = pq.read_table(corrupted_path,
-                          page_checksum_verification=True,
-                          use_legacy_dataset=True)
-
 
 @pytest.mark.dataset
-@pytest.mark.parametrize(
-    "use_legacy_dataset",
-    [
-        False,
-        pytest.param(
-            True,
-            marks=pytest.mark.filterwarnings(
-                "ignore:Passing 'use_legacy_dataset=True':FutureWarning"
-            ),
-        ),
-    ],
-)
-def test_checksum_write_to_dataset(tempdir, use_legacy_dataset):
+def test_checksum_write_to_dataset(tempdir):
     """Check that checksum verification works for datasets created with
     pq.write_to_dataset"""
 
@@ -973,8 +868,7 @@ def test_checksum_write_to_dataset(tempdir, use_legacy_dataset):
     original_dir_path = tempdir / 'correct_dir'
     pq.write_to_dataset(table_orig,
                         original_dir_path,
-                        write_page_checksum=True,
-                        use_legacy_dataset=use_legacy_dataset)
+                        write_page_checksum=True)
 
     # Read file and verify that the data is correct
     original_file_path_list = list(original_dir_path.iterdir())
@@ -1014,3 +908,23 @@ def test_checksum_write_to_dataset(tempdir, use_legacy_dataset):
     # checksum verification enabled raises an exception
     with pytest.raises(OSError, match="CRC checksum verification"):
         _ = pq.read_table(corrupted_file_path, page_checksum_verification=True)
+
+
+@pytest.mark.dataset
+def test_deprecated_use_legacy_dataset(tempdir):
+    # Test that specifying use_legacy_dataset in ParquetDataset, write_to_dataset
+    # and read_table doesn't raise an error but gives a warning.
+    table = pa.table({"a": [1, 2, 3]})
+    path = tempdir / "deprecate_legacy"
+
+    msg = "Passing 'use_legacy_dataset'"
+    with pytest.warns(FutureWarning, match=msg):
+        pq.write_to_dataset(table, path, use_legacy_dataset=False)
+
+    pq.write_to_dataset(table, path)
+
+    with pytest.warns(FutureWarning, match=msg):
+        pq.read_table(path, use_legacy_dataset=False)
+
+    with pytest.warns(FutureWarning, match=msg):
+        pq.ParquetDataset(path, use_legacy_dataset=False)
diff --git a/python/pyarrow/tests/parquet/test_compliant_nested_type.py b/python/pyarrow/tests/parquet/test_compliant_nested_type.py
index ca1ad7ee32255..2345855a3321b 100644
--- a/python/pyarrow/tests/parquet/test_compliant_nested_type.py
+++ b/python/pyarrow/tests/parquet/test_compliant_nested_type.py
@@ -18,7 +18,6 @@
 import pytest
 
 import pyarrow as pa
-from pyarrow.tests.parquet.common import parametrize_legacy_dataset
 
 try:
     import pyarrow.parquet as pq
@@ -58,16 +57,13 @@
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
 @parametrize_test_data
-def test_write_compliant_nested_type_enable(tempdir,
-                                            use_legacy_dataset, test_data):
+def test_write_compliant_nested_type_enable(tempdir, test_data):
     # prepare dataframe for testing
     df = pd.DataFrame(data=test_data)
     # verify that we can read/write pandas df with new flag (default behaviour)
     _roundtrip_pandas_dataframe(df,
-                                write_kwargs={},
-                                use_legacy_dataset=use_legacy_dataset)
+                                write_kwargs={})
 
     # Write to a parquet file with compliant nested type
     table = pa.Table.from_pandas(df, preserve_index=False)
@@ -83,21 +79,17 @@ def test_write_compliant_nested_type_enable(tempdir,
     assert new_table.schema.types[0].value_field.name == 'element'
 
     # Verify that the new table can be read/written correctly
-    _check_roundtrip(new_table,
-                     use_legacy_dataset=use_legacy_dataset)
+    _check_roundtrip(new_table)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
 @parametrize_test_data
-def test_write_compliant_nested_type_disable(tempdir,
-                                             use_legacy_dataset, test_data):
+def test_write_compliant_nested_type_disable(tempdir, test_data):
     # prepare dataframe for testing
     df = pd.DataFrame(data=test_data)
     # verify that we can read/write with new flag disabled
     _roundtrip_pandas_dataframe(df, write_kwargs={
-        'use_compliant_nested_type': False},
-        use_legacy_dataset=use_legacy_dataset)
+        'use_compliant_nested_type': False})
 
     # Write to a parquet file while disabling compliant nested type
     table = pa.Table.from_pandas(df, preserve_index=False)
@@ -114,5 +106,4 @@ def test_write_compliant_nested_type_disable(tempdir,
 
     # Verify that the new table can be read/written correctly
     _check_roundtrip(new_table,
-                     use_legacy_dataset=use_legacy_dataset,
                      use_compliant_nested_type=False)
diff --git a/python/pyarrow/tests/parquet/test_data_types.py b/python/pyarrow/tests/parquet/test_data_types.py
index 32fe128bbae9b..e6b66b00428fb 100644
--- a/python/pyarrow/tests/parquet/test_data_types.py
+++ b/python/pyarrow/tests/parquet/test_data_types.py
@@ -23,8 +23,7 @@
 
 import pyarrow as pa
 from pyarrow.tests import util
-from pyarrow.tests.parquet.common import (_check_roundtrip,
-                                          parametrize_legacy_dataset)
+from pyarrow.tests.parquet.common import _check_roundtrip
 
 try:
     import pyarrow.parquet as pq
@@ -54,9 +53,8 @@
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
 @pytest.mark.parametrize('chunk_size', [None, 1000])
-def test_parquet_2_0_roundtrip(tempdir, chunk_size, use_legacy_dataset):
+def test_parquet_2_0_roundtrip(tempdir, chunk_size):
     df = alltypes_sample(size=10000, categorical=True)
 
     filename = tempdir / 'pandas_roundtrip.parquet'
@@ -65,8 +63,7 @@ def test_parquet_2_0_roundtrip(tempdir, chunk_size, use_legacy_dataset):
 
     _write_table(arrow_table, filename, version='2.6',
                  chunk_size=chunk_size)
-    table_read = pq.read_pandas(
-        filename, use_legacy_dataset=use_legacy_dataset)
+    table_read = pq.read_pandas(filename)
     assert table_read.schema.pandas_metadata is not None
 
     read_metadata = table_read.schema.metadata
@@ -77,8 +74,7 @@ def test_parquet_2_0_roundtrip(tempdir, chunk_size, use_legacy_dataset):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_parquet_1_0_roundtrip(tempdir, use_legacy_dataset):
+def test_parquet_1_0_roundtrip(tempdir):
     size = 10000
     np.random.seed(0)
     df = pd.DataFrame({
@@ -100,7 +96,7 @@ def test_parquet_1_0_roundtrip(tempdir, use_legacy_dataset):
     filename = tempdir / 'pandas_roundtrip.parquet'
     arrow_table = pa.Table.from_pandas(df)
     _write_table(arrow_table, filename, version='1.0')
-    table_read = _read_table(filename, use_legacy_dataset=use_legacy_dataset)
+    table_read = _read_table(filename)
     df_read = table_read.to_pandas()
 
     # We pass uint32_t as int64_t if we write Parquet version 1.0
@@ -113,18 +109,17 @@ def test_parquet_1_0_roundtrip(tempdir, use_legacy_dataset):
 # -----------------------------------------------------------------------------
 
 
-def _simple_table_write_read(table, use_legacy_dataset):
+def _simple_table_write_read(table):
     bio = pa.BufferOutputStream()
     pq.write_table(table, bio)
     contents = bio.getvalue()
     return pq.read_table(
-        pa.BufferReader(contents), use_legacy_dataset=use_legacy_dataset
+        pa.BufferReader(contents)
     )
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_direct_read_dictionary(use_legacy_dataset):
+def test_direct_read_dictionary():
     # ARROW-3325
     repeats = 10
     nunique = 5
@@ -140,8 +135,7 @@ def test_direct_read_dictionary(use_legacy_dataset):
     contents = bio.getvalue()
 
     result = pq.read_table(pa.BufferReader(contents),
-                           read_dictionary=['f0'],
-                           use_legacy_dataset=use_legacy_dataset)
+                           read_dictionary=['f0'])
 
     # Compute dictionary-encoded subfield
     expected = pa.table([table[0].dictionary_encode()], names=['f0'])
@@ -149,8 +143,7 @@ def test_direct_read_dictionary(use_legacy_dataset):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_direct_read_dictionary_subfield(use_legacy_dataset):
+def test_direct_read_dictionary_subfield():
     repeats = 10
     nunique = 5
 
@@ -163,8 +156,7 @@ def test_direct_read_dictionary_subfield(use_legacy_dataset):
     pq.write_table(table, bio)
     contents = bio.getvalue()
     result = pq.read_table(pa.BufferReader(contents),
-                           read_dictionary=['f0.list.element'],
-                           use_legacy_dataset=use_legacy_dataset)
+                           read_dictionary=['f0.list.element'])
 
     arr = pa.array(data[0])
     values_as_dict = arr.values.dictionary_encode()
@@ -181,8 +173,7 @@ def test_direct_read_dictionary_subfield(use_legacy_dataset):
     assert result[0].num_chunks == 1
 
 
-@parametrize_legacy_dataset
-def test_dictionary_array_automatically_read(use_legacy_dataset):
+def test_dictionary_array_automatically_read():
     # ARROW-3246
 
     # Make a large dictionary, a little over 4MB of data
@@ -200,7 +191,7 @@ def test_dictionary_array_automatically_read(use_legacy_dataset):
                                                      dict_values))
 
     table = pa.table([pa.chunked_array(chunks)], names=['f0'])
-    result = _simple_table_write_read(table, use_legacy_dataset)
+    result = _simple_table_write_read(table)
 
     assert result.equals(table)
 
@@ -213,8 +204,7 @@ def test_dictionary_array_automatically_read(use_legacy_dataset):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_decimal_roundtrip(tempdir, use_legacy_dataset):
+def test_decimal_roundtrip(tempdir):
     num_values = 10
 
     columns = {}
@@ -234,8 +224,7 @@ def test_decimal_roundtrip(tempdir, use_legacy_dataset):
     string_filename = str(filename)
     table = pa.Table.from_pandas(expected)
     _write_table(table, string_filename)
-    result_table = _read_table(
-        string_filename, use_legacy_dataset=use_legacy_dataset)
+    result_table = _read_table(string_filename)
     result = result_table.to_pandas()
     tm.assert_frame_equal(result, expected)
 
@@ -259,14 +248,13 @@ def test_decimal_roundtrip_negative_scale(tempdir):
 # -----------------------------------------------------------------------------
 
 
-@parametrize_legacy_dataset
 @pytest.mark.parametrize('dtype', [int, float])
-def test_single_pylist_column_roundtrip(tempdir, dtype, use_legacy_dataset):
+def test_single_pylist_column_roundtrip(tempdir, dtype,):
     filename = tempdir / 'single_{}_column.parquet'.format(dtype.__name__)
     data = [pa.array(list(map(dtype, range(5))))]
     table = pa.Table.from_arrays(data, names=['a'])
     _write_table(table, filename)
-    table_read = _read_table(filename, use_legacy_dataset=use_legacy_dataset)
+    table_read = _read_table(filename)
     for i in range(table.num_columns):
         col_written = table[i]
         col_read = table_read[i]
@@ -277,16 +265,14 @@ def test_single_pylist_column_roundtrip(tempdir, dtype, use_legacy_dataset):
         assert data_written.equals(data_read)
 
 
-@parametrize_legacy_dataset
-def test_empty_lists_table_roundtrip(use_legacy_dataset):
+def test_empty_lists_table_roundtrip():
     # ARROW-2744: Shouldn't crash when writing an array of empty lists
     arr = pa.array([[], []], type=pa.list_(pa.int32()))
     table = pa.Table.from_arrays([arr], ["A"])
-    _check_roundtrip(table, use_legacy_dataset=use_legacy_dataset)
+    _check_roundtrip(table)
 
 
-@parametrize_legacy_dataset
-def test_nested_list_nonnullable_roundtrip_bug(use_legacy_dataset):
+def test_nested_list_nonnullable_roundtrip_bug():
     # Reproduce failure in ARROW-5630
     typ = pa.list_(pa.field("item", pa.float32(), False))
     num_rows = 10000
@@ -295,26 +281,22 @@ def test_nested_list_nonnullable_roundtrip_bug(use_legacy_dataset):
                   (num_rows // 10)), type=typ)
     ], ['a'])
     _check_roundtrip(
-        t, data_page_size=4096, use_legacy_dataset=use_legacy_dataset)
+        t, data_page_size=4096)
 
 
-@parametrize_legacy_dataset
-def test_nested_list_struct_multiple_batches_roundtrip(
-    tempdir, use_legacy_dataset
-):
+def test_nested_list_struct_multiple_batches_roundtrip(tempdir):
     # Reproduce failure in ARROW-11024
     data = [[{'x': 'abc', 'y': 'abc'}]]*100 + [[{'x': 'abc', 'y': 'gcb'}]]*100
     table = pa.table([pa.array(data)], names=['column'])
     _check_roundtrip(
-        table, row_group_size=20, use_legacy_dataset=use_legacy_dataset)
+        table, row_group_size=20)
 
     # Reproduce failure in ARROW-11069 (plain non-nested structs with strings)
     data = pa.array(
         [{'a': '1', 'b': '2'}, {'a': '3', 'b': '4'}, {'a': '5', 'b': '6'}]*10
     )
     table = pa.table({'column': data})
-    _check_roundtrip(
-        table, row_group_size=10, use_legacy_dataset=use_legacy_dataset)
+    _check_roundtrip(table, row_group_size=10)
 
 
 def test_writing_empty_lists():
@@ -366,8 +348,7 @@ def test_large_list_records():
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_parquet_nested_convenience(tempdir, use_legacy_dataset):
+def test_parquet_nested_convenience(tempdir):
     # ARROW-1684
     df = pd.DataFrame({
         'a': [[1, 2, 3], None, [4, 5], []],
@@ -380,11 +361,11 @@ def test_parquet_nested_convenience(tempdir, use_legacy_dataset):
     _write_table(table, path)
 
     read = pq.read_table(
-        path, columns=['a'], use_legacy_dataset=use_legacy_dataset)
+        path, columns=['a'])
     tm.assert_frame_equal(read.to_pandas(), df[['a']])
 
     read = pq.read_table(
-        path, columns=['a', 'b'], use_legacy_dataset=use_legacy_dataset)
+        path, columns=['a', 'b'])
     tm.assert_frame_equal(read.to_pandas(), df)
 
 
@@ -420,17 +401,16 @@ def test_large_table_int32_overflow():
     _write_table(table, f)
 
 
-def _simple_table_roundtrip(table, use_legacy_dataset=False, **write_kwargs):
+def _simple_table_roundtrip(table, **write_kwargs):
     stream = pa.BufferOutputStream()
     _write_table(table, stream, **write_kwargs)
     buf = stream.getvalue()
-    return _read_table(buf, use_legacy_dataset=use_legacy_dataset)
+    return _read_table(buf)
 
 
 @pytest.mark.slow
 @pytest.mark.large_memory
-@parametrize_legacy_dataset
-def test_byte_array_exactly_2gb(use_legacy_dataset):
+def test_byte_array_exactly_2gb():
     # Test edge case reported in ARROW-3762
     val = b'x' * (1 << 10)
 
@@ -444,15 +424,14 @@ def test_byte_array_exactly_2gb(use_legacy_dataset):
         values = pa.chunked_array([base, pa.array(case)])
         t = pa.table([values], names=['f0'])
         result = _simple_table_roundtrip(
-            t, use_legacy_dataset=use_legacy_dataset, use_dictionary=False)
+            t, use_dictionary=False)
         assert t.equals(result)
 
 
 @pytest.mark.slow
 @pytest.mark.pandas
 @pytest.mark.large_memory
-@parametrize_legacy_dataset
-def test_binary_array_overflow_to_chunked(use_legacy_dataset):
+def test_binary_array_overflow_to_chunked():
     # ARROW-3762
 
     # 2^31 + 1 bytes
@@ -462,8 +441,7 @@ def test_binary_array_overflow_to_chunked(use_legacy_dataset):
     df = pd.DataFrame({'byte_col': values})
 
     tbl = pa.Table.from_pandas(df, preserve_index=False)
-    read_tbl = _simple_table_roundtrip(
-        tbl, use_legacy_dataset=use_legacy_dataset)
+    read_tbl = _simple_table_roundtrip(tbl)
 
     col0_data = read_tbl[0]
     assert isinstance(col0_data, pa.ChunkedArray)
@@ -477,8 +455,7 @@ def test_binary_array_overflow_to_chunked(use_legacy_dataset):
 @pytest.mark.slow
 @pytest.mark.pandas
 @pytest.mark.large_memory
-@parametrize_legacy_dataset
-def test_list_of_binary_large_cell(use_legacy_dataset):
+def test_list_of_binary_large_cell():
     # ARROW-4688
     data = []
 
@@ -491,8 +468,7 @@ def test_list_of_binary_large_cell(use_legacy_dataset):
 
     arr = pa.array(data)
     table = pa.Table.from_arrays([arr], ['chunky_cells'])
-    read_table = _simple_table_roundtrip(
-        table, use_legacy_dataset=use_legacy_dataset)
+    read_table = _simple_table_roundtrip(table)
     assert table.equals(read_table)
 
 
diff --git a/python/pyarrow/tests/parquet/test_dataset.py b/python/pyarrow/tests/parquet/test_dataset.py
index a9e99d5d65cf9..b6e351bdef9a7 100644
--- a/python/pyarrow/tests/parquet/test_dataset.py
+++ b/python/pyarrow/tests/parquet/test_dataset.py
@@ -29,9 +29,6 @@
 from pyarrow import fs
 from pyarrow.filesystem import LocalFileSystem
 from pyarrow.tests import util
-from pyarrow.tests.parquet.common import (
-    parametrize_legacy_dataset, parametrize_legacy_dataset_fixed,
-    parametrize_legacy_dataset_not_supported)
 from pyarrow.util import guid
 from pyarrow.vendored.version import Version
 
@@ -53,76 +50,10 @@
 
 # Marks all of the tests in this module
 # Ignore these with pytest ... -m 'not parquet'
-pytestmark = pytest.mark.parquet
+pytestmark = [pytest.mark.parquet, pytest.mark.dataset]
 
 
-@pytest.mark.pandas
-def test_parquet_piece_read(tempdir):
-    df = _test_dataframe(1000)
-    table = pa.Table.from_pandas(df)
-
-    path = tempdir / 'parquet_piece_read.parquet'
-    _write_table(table, path, version='2.6')
-
-    with pytest.warns(FutureWarning):
-        piece1 = pq.ParquetDatasetPiece(path)
-
-    result = piece1.read()
-    assert result.equals(table)
-
-
-@pytest.mark.pandas
-def test_parquet_piece_open_and_get_metadata(tempdir):
-    df = _test_dataframe(100)
-    table = pa.Table.from_pandas(df)
-
-    path = tempdir / 'parquet_piece_read.parquet'
-    _write_table(table, path, version='2.6')
-
-    with pytest.warns(FutureWarning):
-        piece = pq.ParquetDatasetPiece(path)
-
-    table1 = piece.read()
-    assert isinstance(table1, pa.Table)
-    meta1 = piece.get_metadata()
-    assert isinstance(meta1, pq.FileMetaData)
-
-    assert table.equals(table1)
-
-
-@pytest.mark.filterwarnings("ignore:ParquetDatasetPiece:FutureWarning")
-def test_parquet_piece_basics():
-    path = '/baz.parq'
-
-    piece1 = pq.ParquetDatasetPiece(path)
-    piece2 = pq.ParquetDatasetPiece(path, row_group=1)
-    piece3 = pq.ParquetDatasetPiece(
-        path, row_group=1, partition_keys=[('foo', 0), ('bar', 1)])
-
-    assert str(piece1) == path
-    assert str(piece2) == '/baz.parq | row_group=1'
-    assert str(piece3) == 'partition[foo=0, bar=1] /baz.parq | row_group=1'
-
-    assert piece1 == piece1
-    assert piece2 == piece2
-    assert piece3 == piece3
-    assert piece1 != piece3
-
-
-def test_partition_set_dictionary_type():
-    set1 = pq.PartitionSet('key1', ['foo', 'bar', 'baz'])
-    set2 = pq.PartitionSet('key2', [2007, 2008, 2009])
-
-    assert isinstance(set1.dictionary, pa.StringArray)
-    assert isinstance(set2.dictionary, pa.IntegerArray)
-
-    set3 = pq.PartitionSet('key2', [datetime.datetime(2007, 1, 1)])
-    with pytest.raises(TypeError):
-        set3.dictionary
-
-
-@parametrize_legacy_dataset_fixed
-def test_filesystem_uri(tempdir, use_legacy_dataset):
+def test_filesystem_uri(tempdir):
     table = pa.table({"a": [1, 2, 3]})
 
     directory = tempdir / "data_dir"
@@ -132,72 +63,36 @@ def test_filesystem_uri(tempdir, use_legacy_dataset):
 
     # filesystem object
     result = pq.read_table(
-        path, filesystem=fs.LocalFileSystem(),
-        use_legacy_dataset=use_legacy_dataset)
+        path, filesystem=fs.LocalFileSystem())
     assert result.equals(table)
 
     # filesystem URI
     result = pq.read_table(
-        "data_dir/data.parquet", filesystem=util._filesystem_uri(tempdir),
-        use_legacy_dataset=use_legacy_dataset)
+        "data_dir/data.parquet", filesystem=util._filesystem_uri(tempdir))
     assert result.equals(table)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_read_partitioned_directory(tempdir, use_legacy_dataset):
+def test_read_partitioned_directory(tempdir):
     fs = LocalFileSystem._get_instance()
-    _partition_test_for_filesystem(fs, tempdir, use_legacy_dataset)
+    _partition_test_for_filesystem(fs, tempdir)
 
 
-@pytest.mark.filterwarnings("ignore:'ParquetDataset:FutureWarning")
 @pytest.mark.pandas
-def test_create_parquet_dataset_multi_threaded(tempdir):
-    fs = LocalFileSystem._get_instance()
-    base_path = tempdir
-
-    _partition_test_for_filesystem(fs, base_path)
-
-    manifest = pq.ParquetManifest(base_path, filesystem=fs,
-                                  metadata_nthreads=1)
-    with pytest.warns(
-        FutureWarning, match="Specifying the 'metadata_nthreads'"
-    ):
-        dataset = pq.ParquetDataset(
-            base_path, filesystem=fs, metadata_nthreads=16,
-            use_legacy_dataset=True
-        )
-    assert len(dataset.pieces) > 0
-    partitions = dataset.partitions
-    assert len(partitions.partition_names) > 0
-    assert partitions.partition_names == manifest.partitions.partition_names
-    assert len(partitions.levels) == len(manifest.partitions.levels)
-
-
-@pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_read_partitioned_columns_selection(tempdir, use_legacy_dataset):
+def test_read_partitioned_columns_selection(tempdir):
     # ARROW-3861 - do not include partition columns in resulting table when
     # `columns` keyword was passed without those columns
     fs = LocalFileSystem._get_instance()
     base_path = tempdir
     _partition_test_for_filesystem(fs, base_path)
 
-    dataset = pq.ParquetDataset(
-        base_path, use_legacy_dataset=use_legacy_dataset)
+    dataset = pq.ParquetDataset(base_path)
     result = dataset.read(columns=["values"])
-    if use_legacy_dataset:
-        # ParquetDataset implementation always includes the partition columns
-        # automatically, and we can't easily "fix" this since dask relies on
-        # this behaviour (ARROW-8644)
-        assert result.column_names == ["values", "foo", "bar"]
-    else:
-        assert result.column_names == ["values"]
+    assert result.column_names == ["values"]
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_filters_equivalency(tempdir, use_legacy_dataset):
+def test_filters_equivalency(tempdir):
     fs = LocalFileSystem._get_instance()
     base_path = tempdir
 
@@ -225,7 +120,6 @@ def test_filters_equivalency(tempdir, use_legacy_dataset):
         base_path, filesystem=fs,
         filters=[('integer', '=', 1), ('string', '!=', 'b'),
                  ('boolean', '==', 'True')],
-        use_legacy_dataset=use_legacy_dataset,
     )
     table = dataset.read()
     result_df = (table.to_pandas().reset_index(drop=True))
@@ -247,8 +141,7 @@ def test_filters_equivalency(tempdir, use_legacy_dataset):
         [('integer', '=', 0), ('boolean', '==', 'False')]
     ]
     dataset = pq.ParquetDataset(
-        base_path, filesystem=fs, filters=filters,
-        use_legacy_dataset=use_legacy_dataset)
+        base_path, filesystem=fs, filters=filters)
     table = dataset.read()
     result_df = table.to_pandas().reset_index(drop=True)
 
@@ -262,30 +155,15 @@ def test_filters_equivalency(tempdir, use_legacy_dataset):
     assert df_filter_2.sum() > 0
     assert result_df.shape[0] == (df_filter_1.sum() + df_filter_2.sum())
 
-    if use_legacy_dataset:
-        # Check for \0 in predicate values. Until they are correctly
-        # implemented in ARROW-3391, they would otherwise lead to weird
-        # results with the current code.
-        with pytest.raises(NotImplementedError):
-            filters = [[('string', '==', b'1\0a')]]
-            pq.ParquetDataset(base_path, filesystem=fs, filters=filters,
-                              use_legacy_dataset=True)
-        with pytest.raises(NotImplementedError):
-            filters = [[('string', '==', '1\0a')]]
-            pq.ParquetDataset(base_path, filesystem=fs, filters=filters,
-                              use_legacy_dataset=True)
-    else:
-        for filters in [[[('string', '==', b'1\0a')]],
-                        [[('string', '==', '1\0a')]]]:
-            dataset = pq.ParquetDataset(
-                base_path, filesystem=fs, filters=filters,
-                use_legacy_dataset=False)
-            assert dataset.read().num_rows == 0
+    for filters in [[[('string', '==', b'1\0a')]],
+                    [[('string', '==', '1\0a')]]]:
+        dataset = pq.ParquetDataset(
+            base_path, filesystem=fs, filters=filters)
+        assert dataset.read().num_rows == 0
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_filters_cutoff_exclusive_integer(tempdir, use_legacy_dataset):
+def test_filters_cutoff_exclusive_integer(tempdir):
     fs = LocalFileSystem._get_instance()
     base_path = tempdir
 
@@ -308,7 +186,6 @@ def test_filters_cutoff_exclusive_integer(tempdir, use_legacy_dataset):
             ('integers', '<', 4),
             ('integers', '>', 1),
         ],
-        use_legacy_dataset=use_legacy_dataset
     )
     table = dataset.read()
     result_df = (table.to_pandas()
@@ -319,15 +196,14 @@ def test_filters_cutoff_exclusive_integer(tempdir, use_legacy_dataset):
     assert result_list == [2, 3]
 
 
-@pytest.mark.pandas
-@parametrize_legacy_dataset
 @pytest.mark.xfail(
     # different error with use_legacy_datasets because result_df is no longer
     # categorical
     raises=(TypeError, AssertionError),
     reason='Loss of type information in creation of categoricals.'
 )
-def test_filters_cutoff_exclusive_datetime(tempdir, use_legacy_dataset):
+@pytest.mark.pandas
+def test_filters_cutoff_exclusive_datetime(tempdir):
     fs = LocalFileSystem._get_instance()
     base_path = tempdir
 
@@ -356,7 +232,6 @@ def test_filters_cutoff_exclusive_datetime(tempdir, use_legacy_dataset):
             ('dates', '<', "2018-04-12"),
             ('dates', '>', "2018-04-10")
         ],
-        use_legacy_dataset=use_legacy_dataset
     )
     table = dataset.read()
     result_df = (table.to_pandas()
@@ -371,7 +246,6 @@ def test_filters_cutoff_exclusive_datetime(tempdir, use_legacy_dataset):
 
 
 @pytest.mark.pandas
-@pytest.mark.dataset
 def test_filters_inclusive_datetime(tempdir):
     # ARROW-11480
     path = tempdir / 'timestamps.parquet'
@@ -389,8 +263,7 @@ def test_filters_inclusive_datetime(tempdir):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_filters_inclusive_integer(tempdir, use_legacy_dataset):
+def test_filters_inclusive_integer(tempdir):
     fs = LocalFileSystem._get_instance()
     base_path = tempdir
 
@@ -413,7 +286,6 @@ def test_filters_inclusive_integer(tempdir, use_legacy_dataset):
             ('integers', '<=', 3),
             ('integers', '>=', 2),
         ],
-        use_legacy_dataset=use_legacy_dataset
     )
     table = dataset.read()
     result_df = (table.to_pandas()
@@ -425,8 +297,7 @@ def test_filters_inclusive_integer(tempdir, use_legacy_dataset):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_filters_inclusive_set(tempdir, use_legacy_dataset):
+def test_filters_inclusive_set(tempdir):
     fs = LocalFileSystem._get_instance()
     base_path = tempdir
 
@@ -451,7 +322,6 @@ def test_filters_inclusive_set(tempdir, use_legacy_dataset):
     dataset = pq.ParquetDataset(
         base_path, filesystem=fs,
         filters=[('string', 'in', 'ab')],
-        use_legacy_dataset=use_legacy_dataset
     )
     table = dataset.read()
     result_df = (table.to_pandas().reset_index(drop=True))
@@ -464,7 +334,6 @@ def test_filters_inclusive_set(tempdir, use_legacy_dataset):
         base_path, filesystem=fs,
         filters=[('integer', 'in', [1]), ('string', 'in', ('a', 'b')),
                  ('boolean', 'not in', {'False'})],
-        use_legacy_dataset=use_legacy_dataset
     )
     table = dataset.read()
     result_df = (table.to_pandas().reset_index(drop=True))
@@ -475,8 +344,7 @@ def test_filters_inclusive_set(tempdir, use_legacy_dataset):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_filters_invalid_pred_op(tempdir, use_legacy_dataset):
+def test_filters_invalid_pred_op(tempdir):
     fs = LocalFileSystem._get_instance()
     base_path = tempdir
 
@@ -496,49 +364,30 @@ def test_filters_invalid_pred_op(tempdir, use_legacy_dataset):
     with pytest.raises(TypeError):
         pq.ParquetDataset(base_path,
                           filesystem=fs,
-                          filters=[('integers', 'in', 3), ],
-                          use_legacy_dataset=use_legacy_dataset)
+                          filters=[('integers', 'in', 3), ])
 
     with pytest.raises(ValueError):
         pq.ParquetDataset(base_path,
                           filesystem=fs,
-                          filters=[('integers', '=<', 3), ],
-                          use_legacy_dataset=use_legacy_dataset)
-
-    if use_legacy_dataset:
-        with pytest.raises(ValueError):
-            pq.ParquetDataset(base_path,
-                              filesystem=fs,
-                              filters=[('integers', 'in', set()), ],
-                              use_legacy_dataset=use_legacy_dataset)
-    else:
-        # Dataset API returns empty table instead
-        dataset = pq.ParquetDataset(base_path,
-                                    filesystem=fs,
-                                    filters=[('integers', 'in', set()), ],
-                                    use_legacy_dataset=use_legacy_dataset)
-        assert dataset.read().num_rows == 0
+                          filters=[('integers', '=<', 3), ])
 
-    if use_legacy_dataset:
-        with pytest.raises(ValueError):
-            pq.ParquetDataset(base_path,
-                              filesystem=fs,
-                              filters=[('integers', '!=', {3})],
-                              use_legacy_dataset=use_legacy_dataset)
-    else:
-        dataset = pq.ParquetDataset(base_path,
-                                    filesystem=fs,
-                                    filters=[('integers', '!=', {3})],
-                                    use_legacy_dataset=use_legacy_dataset)
-        with pytest.raises(NotImplementedError):
-            assert dataset.read().num_rows == 0
+    # Dataset API returns empty table
+    dataset = pq.ParquetDataset(base_path,
+                                filesystem=fs,
+                                filters=[('integers', 'in', set()), ])
+    assert dataset.read().num_rows == 0
+
+    dataset = pq.ParquetDataset(base_path,
+                                filesystem=fs,
+                                filters=[('integers', '!=', {3})])
+    with pytest.raises(NotImplementedError):
+        assert dataset.read().num_rows == 0
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset_fixed
-def test_filters_invalid_column(tempdir, use_legacy_dataset):
+def test_filters_invalid_column(tempdir):
     # ARROW-5572 - raise error on invalid name in filter specification
-    # works with new dataset / xfail with legacy implementation
+    # works with new dataset
     fs = LocalFileSystem._get_instance()
     base_path = tempdir
 
@@ -556,12 +405,10 @@ def test_filters_invalid_column(tempdir, use_legacy_dataset):
     msg = r"No match for FieldRef.Name\(non_existent_column\)"
     with pytest.raises(ValueError, match=msg):
         pq.ParquetDataset(base_path, filesystem=fs,
-                          filters=[('non_existent_column', '<', 3), ],
-                          use_legacy_dataset=use_legacy_dataset).read()
+                          filters=[('non_existent_column', '<', 3), ]).read()
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
 @pytest.mark.parametrize("filters",
                          ([('integers', '<', 3)],
                           [[('integers', '<', 3)]],
@@ -569,7 +416,7 @@ def test_filters_invalid_column(tempdir, use_legacy_dataset):
                           pc.field('nested', 'a') < 3,
                           pc.field('nested', 'b').cast(pa.int64()) < 3))
 @pytest.mark.parametrize("read_method", ("read_table", "read_pandas"))
-def test_filters_read_table(tempdir, use_legacy_dataset, filters, read_method):
+def test_filters_read_table(tempdir, filters, read_method):
     read = getattr(pq, read_method)
     # test that filters keyword is passed through in read_table
     fs = LocalFileSystem._get_instance()
@@ -589,24 +436,15 @@ def test_filters_read_table(tempdir, use_legacy_dataset, filters, read_method):
 
     _generate_partition_directories(fs, base_path, partition_spec, df)
 
-    kwargs = dict(filesystem=fs, filters=filters,
-                  use_legacy_dataset=use_legacy_dataset)
+    kwargs = dict(filesystem=fs, filters=filters)
 
-    # Using Expression in legacy dataset not supported
-    if use_legacy_dataset and isinstance(filters, pc.Expression):
-        msg = "Expressions as filter not supported for legacy dataset"
-        with pytest.raises(TypeError, match=msg):
-            read(base_path, **kwargs)
-    else:
-        table = read(base_path, **kwargs)
-        assert table.num_rows == 3
+    table = read(base_path, **kwargs)
+    assert table.num_rows == 3
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset_fixed
-def test_partition_keys_with_underscores(tempdir, use_legacy_dataset):
+def test_partition_keys_with_underscores(tempdir):
     # ARROW-5666 - partition field values with underscores preserve underscores
-    # xfail with legacy dataset -> they get interpreted as integers
     fs = LocalFileSystem._get_instance()
     base_path = tempdir
 
@@ -623,60 +461,47 @@ def test_partition_keys_with_underscores(tempdir, use_legacy_dataset):
 
     _generate_partition_directories(fs, base_path, partition_spec, df)
 
-    dataset = pq.ParquetDataset(
-        base_path, use_legacy_dataset=use_legacy_dataset)
+    dataset = pq.ParquetDataset(base_path)
     result = dataset.read()
     assert result.column("year_week").to_pylist() == string_keys
 
 
 @pytest.mark.s3
-@parametrize_legacy_dataset
-def test_read_s3fs(s3_example_s3fs, use_legacy_dataset):
+def test_read_s3fs(s3_example_s3fs, ):
     fs, path = s3_example_s3fs
     path = path + "/test.parquet"
     table = pa.table({"a": [1, 2, 3]})
     _write_table(table, path, filesystem=fs)
 
-    result = _read_table(
-        path, filesystem=fs, use_legacy_dataset=use_legacy_dataset
-    )
+    result = _read_table(path, filesystem=fs)
     assert result.equals(table)
 
 
 @pytest.mark.s3
-@parametrize_legacy_dataset
-def test_read_directory_s3fs(s3_example_s3fs, use_legacy_dataset):
+def test_read_directory_s3fs(s3_example_s3fs):
     fs, directory = s3_example_s3fs
     path = directory + "/test.parquet"
     table = pa.table({"a": [1, 2, 3]})
     _write_table(table, path, filesystem=fs)
 
-    result = _read_table(
-        directory, filesystem=fs, use_legacy_dataset=use_legacy_dataset
-    )
+    result = _read_table(directory, filesystem=fs)
     assert result.equals(table)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_read_single_file_list(tempdir, use_legacy_dataset):
+def test_read_single_file_list(tempdir):
     data_path = str(tempdir / 'data.parquet')
 
     table = pa.table({"a": [1, 2, 3]})
     _write_table(table, data_path)
 
-    result = pq.ParquetDataset(
-        [data_path], use_legacy_dataset=use_legacy_dataset
-    ).read()
+    result = pq.ParquetDataset([data_path]).read()
     assert result.equals(table)
 
 
 @pytest.mark.pandas
 @pytest.mark.s3
-@parametrize_legacy_dataset
-def test_read_partitioned_directory_s3fs_wrapper(
-    s3_example_s3fs, use_legacy_dataset
-):
+def test_read_partitioned_directory_s3fs_wrapper(s3_example_s3fs):
     import s3fs
 
     from pyarrow.filesystem import S3FSWrapper
@@ -690,23 +515,18 @@ def test_read_partitioned_directory_s3fs_wrapper(
     _partition_test_for_filesystem(wrapper, path)
 
     # Check that we can auto-wrap
-    dataset = pq.ParquetDataset(
-        path, filesystem=fs, use_legacy_dataset=use_legacy_dataset
-    )
+    dataset = pq.ParquetDataset(path, filesystem=fs)
     dataset.read()
 
 
 @pytest.mark.pandas
 @pytest.mark.s3
-@parametrize_legacy_dataset
-def test_read_partitioned_directory_s3fs(s3_example_s3fs, use_legacy_dataset):
+def test_read_partitioned_directory_s3fs(s3_example_s3fs):
     fs, path = s3_example_s3fs
-    _partition_test_for_filesystem(
-        fs, path, use_legacy_dataset=use_legacy_dataset
-    )
+    _partition_test_for_filesystem(fs, path)
 
 
-def _partition_test_for_filesystem(fs, base_path, use_legacy_dataset=True):
+def _partition_test_for_filesystem(fs, base_path):
     foo_keys = [0, 1]
     bar_keys = ['a', 'b', 'c']
     partition_spec = [
@@ -724,8 +544,7 @@ def _partition_test_for_filesystem(fs, base_path, use_legacy_dataset=True):
 
     _generate_partition_directories(fs, base_path, partition_spec, df)
 
-    dataset = pq.ParquetDataset(
-        base_path, filesystem=fs, use_legacy_dataset=use_legacy_dataset)
+    dataset = pq.ParquetDataset(base_path, filesystem=fs)
     table = dataset.read()
     result_df = (table.to_pandas()
                  .sort_values(by='index')
@@ -735,15 +554,11 @@ def _partition_test_for_filesystem(fs, base_path, use_legacy_dataset=True):
                    .reset_index(drop=True)
                    .reindex(columns=result_df.columns))
 
-    if use_legacy_dataset or Version(pd.__version__) < Version("2.0.0"):
-        expected_df['foo'] = pd.Categorical(df['foo'], categories=foo_keys)
-        expected_df['bar'] = pd.Categorical(df['bar'], categories=bar_keys)
-    else:
-        # With pandas 2.0.0 Index can store all numeric dtypes (not just
-        # int64/uint64/float64). Using astype() to create a categorical
-        # column preserves original dtype (int32)
-        expected_df['foo'] = expected_df['foo'].astype("category")
-        expected_df['bar'] = expected_df['bar'].astype("category")
+    # With pandas 2.0.0 Index can store all numeric dtypes (not just
+    # int64/uint64/float64). Using astype() to create a categorical
+    # column preserves original dtype (int32)
+    expected_df['foo'] = expected_df['foo'].astype("category")
+    expected_df['bar'] = expected_df['bar'].astype("category")
 
     assert (result_df.columns == ['index', 'values', 'foo', 'bar']).all()
 
@@ -790,83 +605,6 @@ def _visit_level(base_dir, level, part_keys):
     _visit_level(base_dir, 0, [])
 
 
-def _test_read_common_metadata_files(fs, base_path):
-    import pandas as pd
-
-    import pyarrow.parquet as pq
-
-    N = 100
-    df = pd.DataFrame({
-        'index': np.arange(N),
-        'values': np.random.randn(N)
-    }, columns=['index', 'values'])
-
-    base_path = str(base_path)
-    data_path = os.path.join(base_path, 'data.parquet')
-
-    table = pa.Table.from_pandas(df)
-
-    with fs.open(data_path, 'wb') as f:
-        _write_table(table, f)
-
-    metadata_path = os.path.join(base_path, '_common_metadata')
-    with fs.open(metadata_path, 'wb') as f:
-        pq.write_metadata(table.schema, f)
-
-    dataset = pq.ParquetDataset(base_path, filesystem=fs,
-                                use_legacy_dataset=True)
-    with pytest.warns(FutureWarning):
-        assert dataset.common_metadata_path == str(metadata_path)
-
-    with fs.open(data_path) as f:
-        common_schema = pq.read_metadata(f).schema
-    assert dataset.schema.equals(common_schema)
-
-    # handle list of one directory
-    dataset2 = pq.ParquetDataset([base_path], filesystem=fs,
-                                 use_legacy_dataset=True)
-    assert dataset2.schema.equals(dataset.schema)
-
-
-@pytest.mark.pandas
-@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:FutureWarning")
-def test_read_common_metadata_files(tempdir):
-    fs = LocalFileSystem._get_instance()
-    _test_read_common_metadata_files(fs, tempdir)
-
-
-@pytest.mark.pandas
-@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:FutureWarning")
-def test_read_metadata_files(tempdir):
-    fs = LocalFileSystem._get_instance()
-
-    N = 100
-    df = pd.DataFrame({
-        'index': np.arange(N),
-        'values': np.random.randn(N)
-    }, columns=['index', 'values'])
-
-    data_path = tempdir / 'data.parquet'
-
-    table = pa.Table.from_pandas(df)
-
-    with fs.open(data_path, 'wb') as f:
-        _write_table(table, f)
-
-    metadata_path = tempdir / '_metadata'
-    with fs.open(metadata_path, 'wb') as f:
-        pq.write_metadata(table.schema, f)
-
-    dataset = pq.ParquetDataset(tempdir, filesystem=fs,
-                                use_legacy_dataset=True)
-    with pytest.warns(FutureWarning):
-        assert dataset.metadata_path == str(metadata_path)
-
-    with fs.open(data_path) as f:
-        metadata_schema = pq.read_metadata(f).schema
-    assert dataset.schema.equals(metadata_schema)
-
-
 def _filter_partition(df, part_keys):
     predicate = np.ones(len(df), dtype=bool)
 
@@ -883,9 +621,8 @@ def _filter_partition(df, part_keys):
     return df[predicate].drop(to_drop, axis=1)
 
 
-@parametrize_legacy_dataset
 @pytest.mark.pandas
-def test_filter_before_validate_schema(tempdir, use_legacy_dataset):
+def test_filter_before_validate_schema(tempdir):
     # ARROW-4076 apply filter before schema validation
     # to avoid checking unneeded schemas
 
@@ -902,16 +639,12 @@ def test_filter_before_validate_schema(tempdir, use_legacy_dataset):
     pq.write_table(table2, dir2 / 'data.parquet')
 
     # read single file using filter
-    table = pq.read_table(tempdir, filters=[[('A', '==', 0)]],
-                          use_legacy_dataset=use_legacy_dataset)
+    table = pq.read_table(tempdir, filters=[[('A', '==', 0)]])
     assert table.column('B').equals(pa.chunked_array([[1, 2, 3]]))
 
 
 @pytest.mark.pandas
-@pytest.mark.filterwarnings(
-    "ignore:Specifying the 'metadata':FutureWarning")
-@parametrize_legacy_dataset
-def test_read_multiple_files(tempdir, use_legacy_dataset):
+def test_read_multiple_files(tempdir):
     nfiles = 10
     size = 5
 
@@ -938,8 +671,7 @@ def test_read_multiple_files(tempdir, use_legacy_dataset):
     (dirpath / '_SUCCESS.crc').touch()
 
     def read_multiple_files(paths, columns=None, use_threads=True, **kwargs):
-        dataset = pq.ParquetDataset(
-            paths, use_legacy_dataset=use_legacy_dataset, **kwargs)
+        dataset = pq.ParquetDataset(paths, **kwargs)
         return dataset.read(columns=columns, use_threads=use_threads)
 
     result = read_multiple_files(paths)
@@ -947,37 +679,18 @@ def read_multiple_files(paths, columns=None, use_threads=True, **kwargs):
 
     assert result.equals(expected)
 
-    # Read with provided metadata
-    # TODO(dataset) specifying metadata not yet supported
-    metadata = pq.read_metadata(paths[0])
-    if use_legacy_dataset:
-        result2 = read_multiple_files(paths, metadata=metadata)
-        assert result2.equals(expected)
-
-        with pytest.warns(FutureWarning, match="Specifying the 'schema'"):
-            result3 = pq.ParquetDataset(dirpath, schema=metadata.schema,
-                                        use_legacy_dataset=True).read()
-        assert result3.equals(expected)
-    else:
-        with pytest.raises(ValueError, match="no longer supported"):
-            pq.read_table(paths, metadata=metadata, use_legacy_dataset=False)
-
     # Read column subset
     to_read = [0, 2, 6, result.num_columns - 1]
 
     col_names = [result.field(i).name for i in to_read]
-    out = pq.read_table(
-        dirpath, columns=col_names, use_legacy_dataset=use_legacy_dataset
-    )
+    out = pq.read_table(dirpath, columns=col_names)
     expected = pa.Table.from_arrays([result.column(i) for i in to_read],
                                     names=col_names,
                                     metadata=result.schema.metadata)
     assert out.equals(expected)
 
     # Read with multiple threads
-    pq.read_table(
-        dirpath, use_threads=True, use_legacy_dataset=use_legacy_dataset
-    )
+    pq.read_table(dirpath, use_threads=True)
 
     # Test failure modes with non-uniform metadata
     bad_apple = _test_dataframe(size, seed=i).iloc[:, :4]
@@ -986,31 +699,24 @@ def read_multiple_files(paths, columns=None, use_threads=True, **kwargs):
     t = pa.Table.from_pandas(bad_apple)
     _write_table(t, bad_apple_path)
 
-    if not use_legacy_dataset:
-        # TODO(dataset) Dataset API skips bad files
-        return
+    # TODO(dataset) Dataset API skips bad files
 
-    bad_meta = pq.read_metadata(bad_apple_path)
+    # bad_meta = pq.read_metadata(bad_apple_path)
 
-    with pytest.raises(ValueError):
-        read_multiple_files(paths + [bad_apple_path])
+    # with pytest.raises(ValueError):
+    #     read_multiple_files(paths + [bad_apple_path])
 
-    with pytest.raises(ValueError):
-        read_multiple_files(paths, metadata=bad_meta)
+    # with pytest.raises(ValueError):
+    #     read_multiple_files(paths, metadata=bad_meta)
 
-    mixed_paths = [bad_apple_path, paths[0]]
+    # mixed_paths = [bad_apple_path, paths[0]]
 
-    with pytest.raises(ValueError):
-        with pytest.warns(FutureWarning, match="Specifying the 'schema'"):
-            read_multiple_files(mixed_paths, schema=bad_meta.schema)
-
-    with pytest.raises(ValueError):
-        read_multiple_files(mixed_paths)
+    # with pytest.raises(ValueError):
+    #     read_multiple_files(mixed_paths)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_dataset_read_pandas(tempdir, use_legacy_dataset):
+def test_dataset_read_pandas(tempdir):
     nfiles = 5
     size = 5
 
@@ -1033,7 +739,7 @@ def test_dataset_read_pandas(tempdir, use_legacy_dataset):
         frames.append(df)
         paths.append(path)
 
-    dataset = pq.ParquetDataset(dirpath, use_legacy_dataset=use_legacy_dataset)
+    dataset = pq.ParquetDataset(dirpath)
     columns = ['uint8', 'strings']
     result = dataset.read_pandas(columns=columns).to_pandas()
     expected = pd.concat([x[columns] for x in frames])
@@ -1047,10 +753,8 @@ def test_dataset_read_pandas(tempdir, use_legacy_dataset):
     tm.assert_frame_equal(result.reindex(columns=expected.columns), expected)
 
 
-@pytest.mark.filterwarnings("ignore:'ParquetDataset:FutureWarning")
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_dataset_memory_map(tempdir, use_legacy_dataset):
+def test_dataset_memory_map(tempdir):
     # ARROW-2627: Check that we can use ParquetDataset with memory-mapping
     dirpath = tempdir / guid()
     dirpath.mkdir()
@@ -1061,15 +765,12 @@ def test_dataset_memory_map(tempdir, use_legacy_dataset):
     _write_table(table, path, version='2.6')
 
     dataset = pq.ParquetDataset(
-        dirpath, memory_map=True, use_legacy_dataset=use_legacy_dataset)
+        dirpath, memory_map=True)
     assert dataset.read().equals(table)
-    if use_legacy_dataset:
-        assert dataset.pieces[0].read().equals(table)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_dataset_enable_buffered_stream(tempdir, use_legacy_dataset):
+def test_dataset_enable_buffered_stream(tempdir):
     dirpath = tempdir / guid()
     dirpath.mkdir()
 
@@ -1080,19 +781,16 @@ def test_dataset_enable_buffered_stream(tempdir, use_legacy_dataset):
 
     with pytest.raises(ValueError):
         pq.ParquetDataset(
-            dirpath, buffer_size=-64,
-            use_legacy_dataset=use_legacy_dataset)
+            dirpath, buffer_size=-64)
 
     for buffer_size in [128, 1024]:
         dataset = pq.ParquetDataset(
-            dirpath, buffer_size=buffer_size,
-            use_legacy_dataset=use_legacy_dataset)
+            dirpath, buffer_size=buffer_size)
         assert dataset.read().equals(table)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_dataset_enable_pre_buffer(tempdir, use_legacy_dataset):
+def test_dataset_enable_pre_buffer(tempdir):
     dirpath = tempdir / guid()
     dirpath.mkdir()
 
@@ -1103,11 +801,9 @@ def test_dataset_enable_pre_buffer(tempdir, use_legacy_dataset):
 
     for pre_buffer in (True, False):
         dataset = pq.ParquetDataset(
-            dirpath, pre_buffer=pre_buffer,
-            use_legacy_dataset=use_legacy_dataset)
+            dirpath, pre_buffer=pre_buffer)
         assert dataset.read().equals(table)
-        actual = pq.read_table(dirpath, pre_buffer=pre_buffer,
-                               use_legacy_dataset=use_legacy_dataset)
+        actual = pq.read_table(dirpath, pre_buffer=pre_buffer)
         assert actual.equals(table)
 
 
@@ -1123,18 +819,14 @@ def _make_example_multifile_dataset(base_path, nfiles=10, file_nrows=5):
     return paths
 
 
-def _assert_dataset_paths(dataset, paths, use_legacy_dataset):
-    if use_legacy_dataset:
-        assert set(map(str, paths)) == {x.path for x in dataset._pieces}
-    else:
-        paths = [str(path.as_posix()) for path in paths]
-        assert set(paths) == set(dataset._dataset.files)
+def _assert_dataset_paths(dataset, paths):
+    paths = [str(path.as_posix()) for path in paths]
+    assert set(paths) == set(dataset.files)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
 @pytest.mark.parametrize('dir_prefix', ['_', '.'])
-def test_ignore_private_directories(tempdir, dir_prefix, use_legacy_dataset):
+def test_ignore_private_directories(tempdir, dir_prefix):
     dirpath = tempdir / guid()
     dirpath.mkdir()
 
@@ -1144,14 +836,13 @@ def test_ignore_private_directories(tempdir, dir_prefix, use_legacy_dataset):
     # private directory
     (dirpath / '{}staging'.format(dir_prefix)).mkdir()
 
-    dataset = pq.ParquetDataset(dirpath, use_legacy_dataset=use_legacy_dataset)
+    dataset = pq.ParquetDataset(dirpath)
 
-    _assert_dataset_paths(dataset, paths, use_legacy_dataset)
+    _assert_dataset_paths(dataset, paths)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_ignore_hidden_files_dot(tempdir, use_legacy_dataset):
+def test_ignore_hidden_files_dot(tempdir):
     dirpath = tempdir / guid()
     dirpath.mkdir()
 
@@ -1164,14 +855,13 @@ def test_ignore_hidden_files_dot(tempdir, use_legacy_dataset):
     with (dirpath / '.private').open('wb') as f:
         f.write(b'gibberish')
 
-    dataset = pq.ParquetDataset(dirpath, use_legacy_dataset=use_legacy_dataset)
+    dataset = pq.ParquetDataset(dirpath)
 
-    _assert_dataset_paths(dataset, paths, use_legacy_dataset)
+    _assert_dataset_paths(dataset, paths)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_ignore_hidden_files_underscore(tempdir, use_legacy_dataset):
+def test_ignore_hidden_files_underscore(tempdir):
     dirpath = tempdir / guid()
     dirpath.mkdir()
 
@@ -1184,17 +874,14 @@ def test_ignore_hidden_files_underscore(tempdir, use_legacy_dataset):
     with (dirpath / '_started_321').open('wb') as f:
         f.write(b'abcd')
 
-    dataset = pq.ParquetDataset(dirpath, use_legacy_dataset=use_legacy_dataset)
+    dataset = pq.ParquetDataset(dirpath)
 
-    _assert_dataset_paths(dataset, paths, use_legacy_dataset)
+    _assert_dataset_paths(dataset, paths)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
 @pytest.mark.parametrize('dir_prefix', ['_', '.'])
-def test_ignore_no_private_directories_in_base_path(
-    tempdir, dir_prefix, use_legacy_dataset
-):
+def test_ignore_no_private_directories_in_base_path(tempdir, dir_prefix):
     # ARROW-8427 - don't ignore explicitly listed files if parent directory
     # is a private directory
     dirpath = tempdir / "{0}data".format(dir_prefix) / guid()
@@ -1203,17 +890,15 @@ def test_ignore_no_private_directories_in_base_path(
     paths = _make_example_multifile_dataset(dirpath, nfiles=10,
                                             file_nrows=5)
 
-    dataset = pq.ParquetDataset(paths, use_legacy_dataset=use_legacy_dataset)
-    _assert_dataset_paths(dataset, paths, use_legacy_dataset)
+    dataset = pq.ParquetDataset(paths)
+    _assert_dataset_paths(dataset, paths)
 
     # ARROW-9644 - don't ignore full directory with underscore in base path
-    dataset = pq.ParquetDataset(dirpath, use_legacy_dataset=use_legacy_dataset)
-    _assert_dataset_paths(dataset, paths, use_legacy_dataset)
+    dataset = pq.ParquetDataset(dirpath)
+    _assert_dataset_paths(dataset, paths)
 
 
-@pytest.mark.pandas
-@parametrize_legacy_dataset_fixed
-def test_ignore_custom_prefixes(tempdir, use_legacy_dataset):
+def test_ignore_custom_prefixes(tempdir):
     # ARROW-9573 - allow override of default ignore_prefixes
     part = ["xxx"] * 3 + ["yyy"] * 3
     table = pa.table([
@@ -1221,7 +906,6 @@ def test_ignore_custom_prefixes(tempdir, use_legacy_dataset):
         pa.array(part).dictionary_encode(),
     ], names=['index', '_part'])
 
-    # TODO use_legacy_dataset ARROW-10247
     pq.write_to_dataset(table, str(tempdir), partition_cols=['_part'])
 
     private_duplicate = tempdir / '_private_duplicate'
@@ -1230,29 +914,23 @@ def test_ignore_custom_prefixes(tempdir, use_legacy_dataset):
                         partition_cols=['_part'])
 
     read = pq.read_table(
-        tempdir, use_legacy_dataset=use_legacy_dataset,
-        ignore_prefixes=['_private'])
+        tempdir, ignore_prefixes=['_private'])
 
     assert read.equals(table)
 
 
-@parametrize_legacy_dataset_fixed
-def test_empty_directory(tempdir, use_legacy_dataset):
-    # ARROW-5310 - reading empty directory
-    # fails with legacy implementation
+def test_empty_directory(tempdir):
+    # ARROW-5310
     empty_dir = tempdir / 'dataset'
     empty_dir.mkdir()
 
-    dataset = pq.ParquetDataset(
-        empty_dir, use_legacy_dataset=use_legacy_dataset)
+    dataset = pq.ParquetDataset(empty_dir)
     result = dataset.read()
     assert result.num_rows == 0
     assert result.num_columns == 0
 
 
-@pytest.mark.filterwarnings("ignore:'ParquetDataset.schema:FutureWarning")
 def _test_write_to_dataset_with_partitions(base_path,
-                                           use_legacy_dataset=True,
                                            filesystem=None,
                                            schema=None,
                                            index_name=None):
@@ -1275,8 +953,7 @@ def _test_write_to_dataset_with_partitions(base_path,
     output_table = pa.Table.from_pandas(output_df, schema=schema, safe=False,
                                         preserve_index=False)
     pq.write_to_dataset(output_table, base_path, partition_by,
-                        filesystem=filesystem,
-                        use_legacy_dataset=use_legacy_dataset)
+                        filesystem=filesystem)
 
     metadata_path = os.path.join(str(base_path), '_common_metadata')
 
@@ -1286,19 +963,11 @@ def _test_write_to_dataset_with_partitions(base_path,
     else:
         pq.write_metadata(output_table.schema, metadata_path)
 
-    # ARROW-2891: Ensure the output_schema is preserved when writing a
-    # partitioned dataset
     dataset = pq.ParquetDataset(base_path,
-                                filesystem=filesystem,
-                                validate_schema=True,
-                                use_legacy_dataset=use_legacy_dataset)
+                                filesystem=filesystem)
     # ARROW-2209: Ensure the dataset schema also includes the partition columns
-    if use_legacy_dataset:
-        with pytest.warns(FutureWarning, match="'ParquetDataset.schema'"):
-            dataset_cols = set(dataset.schema.to_arrow_schema().names)
-    else:
-        # NB schema property is an arrow and not parquet schema
-        dataset_cols = set(dataset.schema.names)
+    # NB schema property is an arrow and not parquet schema
+    dataset_cols = set(dataset.schema.names)
 
     assert dataset_cols == set(output_table.schema.names)
 
@@ -1323,7 +992,6 @@ def _test_write_to_dataset_with_partitions(base_path,
 
 
 def _test_write_to_dataset_no_partitions(base_path,
-                                         use_legacy_dataset=True,
                                          filesystem=None):
     import pandas as pd
 
@@ -1347,7 +1015,6 @@ def _test_write_to_dataset_no_partitions(base_path,
     n = 5
     for i in range(n):
         pq.write_to_dataset(output_table, base_path,
-                            use_legacy_dataset=use_legacy_dataset,
                             filesystem=filesystem)
     output_files = [file for file in filesystem.ls(str(base_path))
                     if file.endswith(".parquet")]
@@ -1356,8 +1023,7 @@ def _test_write_to_dataset_no_partitions(base_path,
     # Deduplicated incoming DataFrame should match
     # original outgoing Dataframe
     input_table = pq.ParquetDataset(
-        base_path, filesystem=filesystem,
-        use_legacy_dataset=use_legacy_dataset
+        base_path, filesystem=filesystem
     ).read()
     input_df = input_table.to_pandas()
     input_df = input_df.drop_duplicates()
@@ -1366,131 +1032,71 @@ def _test_write_to_dataset_no_partitions(base_path,
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_write_to_dataset_with_partitions(tempdir, use_legacy_dataset):
-    _test_write_to_dataset_with_partitions(str(tempdir), use_legacy_dataset)
+def test_write_to_dataset_with_partitions(tempdir):
+    _test_write_to_dataset_with_partitions(str(tempdir))
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_write_to_dataset_with_partitions_and_schema(
-    tempdir, use_legacy_dataset
-):
+def test_write_to_dataset_with_partitions_and_schema(tempdir):
     schema = pa.schema([pa.field('group1', type=pa.string()),
                         pa.field('group2', type=pa.string()),
                         pa.field('num', type=pa.int64()),
                         pa.field('nan', type=pa.int32()),
                         pa.field('date', type=pa.timestamp(unit='us'))])
     _test_write_to_dataset_with_partitions(
-        str(tempdir), use_legacy_dataset, schema=schema)
+        str(tempdir), schema=schema)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_write_to_dataset_with_partitions_and_index_name(
-    tempdir, use_legacy_dataset
-):
+def test_write_to_dataset_with_partitions_and_index_name(tempdir):
     _test_write_to_dataset_with_partitions(
-        str(tempdir), use_legacy_dataset, index_name='index_name')
+        str(tempdir), index_name='index_name')
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_write_to_dataset_no_partitions(tempdir, use_legacy_dataset):
-    _test_write_to_dataset_no_partitions(str(tempdir), use_legacy_dataset)
+def test_write_to_dataset_no_partitions(tempdir):
+    _test_write_to_dataset_no_partitions(str(tempdir))
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_write_to_dataset_pathlib(tempdir, use_legacy_dataset):
-    _test_write_to_dataset_with_partitions(
-        tempdir / "test1", use_legacy_dataset)
-    _test_write_to_dataset_no_partitions(
-        tempdir / "test2", use_legacy_dataset)
+def test_write_to_dataset_pathlib(tempdir):
+    _test_write_to_dataset_with_partitions(tempdir / "test1")
+    _test_write_to_dataset_no_partitions(tempdir / "test2")
 
 
 @pytest.mark.pandas
 @pytest.mark.s3
-@parametrize_legacy_dataset
-def test_write_to_dataset_pathlib_nonlocal(
-    tempdir, s3_example_s3fs, use_legacy_dataset
-):
+def test_write_to_dataset_pathlib_nonlocal(tempdir, s3_example_s3fs):
     # pathlib paths are only accepted for local files
     fs, _ = s3_example_s3fs
 
     with pytest.raises(TypeError, match="path-like objects are only allowed"):
         _test_write_to_dataset_with_partitions(
-            tempdir / "test1", use_legacy_dataset, filesystem=fs)
+            tempdir / "test1", filesystem=fs)
 
     with pytest.raises(TypeError, match="path-like objects are only allowed"):
         _test_write_to_dataset_no_partitions(
-            tempdir / "test2", use_legacy_dataset, filesystem=fs)
+            tempdir / "test2", filesystem=fs)
 
 
 @pytest.mark.pandas
 @pytest.mark.s3
-@parametrize_legacy_dataset
-def test_write_to_dataset_with_partitions_s3fs(
-    s3_example_s3fs, use_legacy_dataset
-):
+def test_write_to_dataset_with_partitions_s3fs(s3_example_s3fs):
     fs, path = s3_example_s3fs
 
     _test_write_to_dataset_with_partitions(
-        path, use_legacy_dataset, filesystem=fs)
+        path, filesystem=fs)
 
 
 @pytest.mark.pandas
 @pytest.mark.s3
-@parametrize_legacy_dataset
-def test_write_to_dataset_no_partitions_s3fs(
-    s3_example_s3fs, use_legacy_dataset
-):
+def test_write_to_dataset_no_partitions_s3fs(s3_example_s3fs):
     fs, path = s3_example_s3fs
 
     _test_write_to_dataset_no_partitions(
-        path, use_legacy_dataset, filesystem=fs)
+        path, filesystem=fs)
 
 
-@pytest.mark.filterwarnings(
-    "ignore:'ParquetDataset:FutureWarning",
-    "ignore:'partition_filename_cb':FutureWarning")
-@pytest.mark.pandas
-@parametrize_legacy_dataset_not_supported
-def test_write_to_dataset_with_partitions_and_custom_filenames(
-    tempdir, use_legacy_dataset
-):
-    output_df = pd.DataFrame({'group1': list('aaabbbbccc'),
-                              'group2': list('eefeffgeee'),
-                              'num': list(range(10)),
-                              'nan': [np.nan] * 10,
-                              'date': np.arange('2017-01-01', '2017-01-11',
-                                                dtype='datetime64[D]')})
-    partition_by = ['group1', 'group2']
-    output_table = pa.Table.from_pandas(output_df)
-    path = str(tempdir)
-
-    def partition_filename_callback(keys):
-        return "{}-{}.parquet".format(*keys)
-
-    pq.write_to_dataset(output_table, path,
-                        partition_by, partition_filename_callback,
-                        use_legacy_dataset=use_legacy_dataset)
-
-    dataset = pq.ParquetDataset(path, use_legacy_dataset=use_legacy_dataset)
-
-    # ARROW-3538: Ensure partition filenames match the given pattern
-    # defined in the local function partition_filename_callback
-    expected_basenames = [
-        'a-e.parquet', 'a-f.parquet',
-        'b-e.parquet', 'b-f.parquet',
-        'b-g.parquet', 'c-e.parquet'
-    ]
-    output_basenames = [os.path.basename(p.path) for p in dataset.pieces]
-
-    assert sorted(expected_basenames) == sorted(output_basenames)
-
-
-@pytest.mark.dataset
 @pytest.mark.pandas
 def test_write_to_dataset_filesystem(tempdir):
     df = pd.DataFrame({'A': [1, 2, 3]})
@@ -1502,7 +1108,7 @@ def test_write_to_dataset_filesystem(tempdir):
     assert result.equals(table)
 
 
-def _make_dataset_for_pickling(tempdir, use_legacy_dataset=False, N=100):
+def _make_dataset_for_pickling(tempdir, N=100):
     path = tempdir / 'data.parquet'
     fs = LocalFileSystem._get_instance()
 
@@ -1525,42 +1131,22 @@ def _make_dataset_for_pickling(tempdir, use_legacy_dataset=False, N=100):
         pq.write_metadata(table.schema, f)
 
     dataset = pq.ParquetDataset(
-        tempdir, filesystem=fs, use_legacy_dataset=use_legacy_dataset)
-    if use_legacy_dataset:
-        with pytest.warns(FutureWarning):
-            assert dataset.metadata_path == str(metadata_path)
+        tempdir, filesystem=fs)
 
     return dataset
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_pickle_dataset(tempdir, datadir, use_legacy_dataset, pickle_module):
+def test_pickle_dataset(tempdir, pickle_module):
     def is_pickleable(obj):
         return obj == pickle_module.loads(pickle_module.dumps(obj))
 
-    dataset = _make_dataset_for_pickling(tempdir, use_legacy_dataset)
+    dataset = _make_dataset_for_pickling(tempdir)
     assert is_pickleable(dataset)
-    if use_legacy_dataset:
-        with pytest.warns(FutureWarning):
-            metadata = dataset.metadata
-        assert is_pickleable(metadata)
-        assert is_pickleable(metadata.schema)
-        assert len(metadata.schema)
-        for column in metadata.schema:
-            assert is_pickleable(column)
-
-        for piece in dataset._pieces:
-            assert is_pickleable(piece)
-            metadata = piece.get_metadata()
-            assert metadata.num_row_groups
-            for i in range(metadata.num_row_groups):
-                assert is_pickleable(metadata.row_group(i))
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_partitioned_dataset(tempdir, use_legacy_dataset):
+def test_partitioned_dataset(tempdir):
     # ARROW-3208: Segmentation fault when reading a Parquet partitioned dataset
     # to a Parquet file
     path = tempdir / "ARROW-3208"
@@ -1571,27 +1157,20 @@ def test_partitioned_dataset(tempdir, use_legacy_dataset):
     })
     table = pa.Table.from_pandas(df)
     pq.write_to_dataset(table, root_path=str(path),
-                        partition_cols=['one', 'two'],
-                        use_legacy_dataset=use_legacy_dataset)
-    table = pq.ParquetDataset(
-        path, use_legacy_dataset=use_legacy_dataset).read()
+                        partition_cols=['one', 'two'])
+    table = pq.ParquetDataset(path).read()
     pq.write_table(table, path / "output.parquet")
 
 
-@pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_dataset_read_dictionary(tempdir, use_legacy_dataset):
+def test_dataset_read_dictionary(tempdir):
     path = tempdir / "ARROW-3325-dataset"
     t1 = pa.table([[util.rands(10) for i in range(5)] * 10], names=['f0'])
     t2 = pa.table([[util.rands(10) for i in range(5)] * 10], names=['f0'])
-    pq.write_to_dataset(t1, root_path=str(path),
-                        use_legacy_dataset=use_legacy_dataset)
-    pq.write_to_dataset(t2, root_path=str(path),
-                        use_legacy_dataset=use_legacy_dataset)
+    pq.write_to_dataset(t1, root_path=str(path))
+    pq.write_to_dataset(t2, root_path=str(path))
 
     result = pq.ParquetDataset(
-        path, read_dictionary=['f0'],
-        use_legacy_dataset=use_legacy_dataset).read()
+        path, read_dictionary=['f0']).read()
 
     # The order of the chunks is non-deterministic
     ex_chunks = [t1[0].chunk(0).dictionary_encode(),
@@ -1606,9 +1185,6 @@ def test_dataset_read_dictionary(tempdir, use_legacy_dataset):
         assert c1.equals(ex_chunks[0])
 
 
-@pytest.mark.dataset
-@pytest.mark.pandas
-@pytest.mark.filterwarnings("ignore:Passing 'use_legacy:FutureWarning")
 def test_read_table_schema(tempdir):
     # test that schema keyword is passed through in read_table
     table = pa.table({'a': pa.array([1, 2, 3], pa.int32())})
@@ -1627,42 +1203,24 @@ def test_read_table_schema(tempdir):
     expected = pa.table({'a': [1, 2, 3, 1, 2, 3]}, schema=schema)
     assert result.equals(expected)
 
-    # don't allow it with the legacy reader
-    with pytest.raises(
-        ValueError, match="The 'schema' argument is only supported"
-    ):
-        pq.read_table(tempdir / "data.parquet", schema=schema,
-                      use_legacy_dataset=True)
-
-    # using ParquetDataset directory with non-legacy implementation
-    result = pq.ParquetDataset(
-        tempdir, schema=schema, use_legacy_dataset=False
-    )
+    result = pq.ParquetDataset(tempdir, schema=schema)
     expected = pa.table({'a': [1, 2, 3, 1, 2, 3]}, schema=schema)
     assert result.read().equals(expected)
 
 
-@pytest.mark.dataset
-def test_dataset_unsupported_keywords():
-
-    with pytest.raises(ValueError, match="not yet supported with the new"):
-        pq.ParquetDataset("", use_legacy_dataset=False, metadata=pa.schema([]))
+def test_read_table_duplicate_column_selection(tempdir):
+    # test that duplicate column selection gives duplicate columns
+    table = pa.table({'a': pa.array([1, 2, 3], pa.int32()),
+                      'b': pa.array([1, 2, 3], pa.uint8())})
+    pq.write_table(table, tempdir / "data.parquet")
 
-    with pytest.raises(ValueError, match="not yet supported with the new"):
-        pq.ParquetDataset("", use_legacy_dataset=False, validate_schema=False)
+    result = pq.read_table(tempdir / "data.parquet", columns=['a', 'a'])
+    expected_schema = pa.schema([('a', 'int32'), ('a', 'int32')])
 
-    with pytest.raises(ValueError, match="not yet supported with the new"):
-        pq.ParquetDataset("", use_legacy_dataset=False, split_row_groups=True)
+    assert result.column_names == ['a', 'a']
+    assert result.schema == expected_schema
 
-    with pytest.raises(ValueError, match="not yet supported with the new"):
-        pq.ParquetDataset("", use_legacy_dataset=False, metadata_nthreads=4)
 
-    with pytest.raises(ValueError, match="no longer supported"):
-        pq.read_table("", use_legacy_dataset=False, metadata=pa.schema([]))
-
-
-@pytest.mark.dataset
-@pytest.mark.filterwarnings("ignore:Passing 'use_legacy:FutureWarning")
 def test_dataset_partitioning(tempdir):
     import pyarrow.dataset as ds
 
@@ -1679,42 +1237,25 @@ def test_dataset_partitioning(tempdir):
     # read_table
     part = ds.partitioning(field_names=["year", "month", "day"])
     result = pq.read_table(
-        str(root_path), partitioning=part, use_legacy_dataset=False)
+        str(root_path), partitioning=part)
     assert result.column_names == ["a", "year", "month", "day"]
 
     result = pq.ParquetDataset(
-        str(root_path), partitioning=part, use_legacy_dataset=False).read()
+        str(root_path), partitioning=part).read()
     assert result.column_names == ["a", "year", "month", "day"]
 
-    # This raises an error for legacy dataset
-    with pytest.raises(ValueError):
-        pq.read_table(
-            str(root_path), partitioning=part, use_legacy_dataset=True)
-
-    with pytest.raises(ValueError):
-        pq.ParquetDataset(
-            str(root_path), partitioning=part, use_legacy_dataset=True)
-
 
-@pytest.mark.dataset
 def test_parquet_dataset_new_filesystem(tempdir):
     # Ensure we can pass new FileSystem object to ParquetDataset
-    # (use new implementation automatically without specifying
-    #  use_legacy_dataset=False)
     table = pa.table({'a': [1, 2, 3]})
     pq.write_table(table, tempdir / 'data.parquet')
-    # don't use simple LocalFileSystem (as that gets mapped to legacy one)
     filesystem = fs.SubTreeFileSystem(str(tempdir), fs.LocalFileSystem())
     dataset = pq.ParquetDataset('.', filesystem=filesystem)
     result = dataset.read()
     assert result.equals(table)
 
 
-@pytest.mark.filterwarnings("ignore:'ParquetDataset:FutureWarning")
-@parametrize_legacy_dataset
-def test_parquet_dataset_partitions_piece_path_with_fsspec(
-    tempdir, use_legacy_dataset
-):
+def test_parquet_dataset_partitions_piece_path_with_fsspec(tempdir):
     # ARROW-10462 ensure that on Windows we properly use posix-style paths
     # as used by fsspec
     fsspec = pytest.importorskip("fsspec")
@@ -1725,109 +1266,12 @@ def test_parquet_dataset_partitions_piece_path_with_fsspec(
     # pass a posix-style path (using "/" also on Windows)
     path = str(tempdir).replace("\\", "/")
     dataset = pq.ParquetDataset(
-        path, filesystem=filesystem, use_legacy_dataset=use_legacy_dataset)
+        path, filesystem=filesystem)
     # ensure the piece path is also posix-style
     expected = path + "/data.parquet"
-    assert dataset.pieces[0].path == expected
-
-
-@pytest.mark.dataset
-def test_parquet_dataset_deprecated_properties(tempdir):
-    table = pa.table({'a': [1, 2, 3]})
-    path = tempdir / 'data.parquet'
-    pq.write_table(table, path)
-    dataset = pq.ParquetDataset(path, use_legacy_dataset=True)
-
-    with pytest.warns(FutureWarning, match="'ParquetDataset.pieces"):
-        dataset.pieces
-
-    with pytest.warns(FutureWarning, match="'ParquetDataset.partitions"):
-        dataset.partitions
-
-    with pytest.warns(FutureWarning, match="'ParquetDataset.memory_map"):
-        dataset.memory_map
-
-    with pytest.warns(FutureWarning, match="'ParquetDataset.read_dictio"):
-        dataset.read_dictionary
-
-    with pytest.warns(FutureWarning, match="'ParquetDataset.buffer_size"):
-        dataset.buffer_size
-
-    with pytest.warns(FutureWarning, match="'ParquetDataset.fs"):
-        dataset.fs
-
-    with pytest.warns(FutureWarning, match="'ParquetDataset.schema'"):
-        dataset.schema
-
-    with pytest.warns(FutureWarning, match="'ParquetDataset.common_metadata'"):
-        dataset.common_metadata
-
-    with pytest.warns(FutureWarning, match="'ParquetDataset.metadata"):
-        dataset.metadata
+    assert dataset.fragments[0].path == expected
 
-    with pytest.warns(FutureWarning, match="'ParquetDataset.metadata_path"):
-        dataset.metadata_path
 
-    with pytest.warns(FutureWarning,
-                      match="'ParquetDataset.common_metadata_path"):
-        dataset.common_metadata_path
-
-    dataset2 = pq.ParquetDataset(path, use_legacy_dataset=False)
-
-    with pytest.warns(FutureWarning, match="'ParquetDataset.pieces"):
-        dataset2.pieces
-
-
-@pytest.mark.dataset
-def test_parquet_write_to_dataset_deprecated_properties(tempdir):
-    table = pa.table({'a': [1, 2, 3]})
-    path = tempdir / 'data.parquet'
-
-    with pytest.warns(FutureWarning,
-                      match="Passing 'use_legacy_dataset=True'"):
-        pq.write_to_dataset(table, path, use_legacy_dataset=True)
-
-    # check also that legacy implementation is set when
-    # partition_filename_cb is specified
-    with pytest.warns(FutureWarning,
-                      match="Passing 'use_legacy_dataset=True'"):
-        pq.write_to_dataset(table, path,
-                            partition_filename_cb=lambda x: 'filename.parquet')
-
-
-@pytest.mark.dataset
-def test_parquet_write_to_dataset_unsupported_keywords_in_legacy(tempdir):
-    table = pa.table({'a': [1, 2, 3]})
-    path = tempdir / 'data.parquet'
-
-    with pytest.raises(ValueError, match="schema"):
-        pq.write_to_dataset(table, path, use_legacy_dataset=True,
-                            schema=pa.schema([
-                                ('a', pa.int32())
-                            ]))
-
-    with pytest.raises(ValueError, match="partitioning"):
-        pq.write_to_dataset(table, path, use_legacy_dataset=True,
-                            partitioning=["a"])
-
-    with pytest.raises(ValueError, match="use_threads"):
-        pq.write_to_dataset(table, path, use_legacy_dataset=True,
-                            use_threads=False)
-
-    with pytest.raises(ValueError, match="file_visitor"):
-        pq.write_to_dataset(table, path, use_legacy_dataset=True,
-                            file_visitor=lambda x: x)
-
-    with pytest.raises(ValueError, match="existing_data_behavior"):
-        pq.write_to_dataset(table, path, use_legacy_dataset=True,
-                            existing_data_behavior='error')
-
-    with pytest.raises(ValueError, match="basename_template"):
-        pq.write_to_dataset(table, path, use_legacy_dataset=True,
-                            basename_template='part-{i}.parquet')
-
-
-@pytest.mark.dataset
 def test_parquet_write_to_dataset_exposed_keywords(tempdir):
     table = pa.table({'a': [1, 2, 3]})
     path = tempdir / 'partitioning'
@@ -1841,8 +1285,7 @@ def file_visitor(written_file):
 
     pq.write_to_dataset(table, path, partitioning=["a"],
                         file_visitor=file_visitor,
-                        basename_template=basename_template,
-                        use_legacy_dataset=False)
+                        basename_template=basename_template)
 
     expected_paths = {
         path / '1' / 'part-0.parquet',
@@ -1853,53 +1296,6 @@ def file_visitor(written_file):
     assert paths_written_set == expected_paths
 
 
-@pytest.mark.dataset
-def test_write_to_dataset_conflicting_keywords(tempdir):
-    table = pa.table({'a': [1, 2, 3]})
-    path = tempdir / 'data.parquet'
-
-    with pytest.raises(ValueError, match="'basename_template' argument "
-                       "is not supported by use_legacy_dataset=True"):
-        pq.write_to_dataset(table, path,
-                            use_legacy_dataset=True,
-                            partition_filename_cb=lambda x: 'filename.parquet',
-                            basename_template='file-{i}.parquet')
-    with pytest.raises(ValueError, match="'partition_filename_cb' argument "
-                       "is not supported by use_legacy_dataset=False"):
-        pq.write_to_dataset(table, path,
-                            use_legacy_dataset=False,
-                            partition_filename_cb=lambda x: 'filename.parquet',
-                            basename_template='file-{i}.parquet')
-
-    with pytest.raises(ValueError, match="'partitioning' argument "
-                       "is not supported by use_legacy_dataset=True"):
-        pq.write_to_dataset(table, path,
-                            use_legacy_dataset=True,
-                            partition_cols=["a"],
-                            partitioning=["a"])
-
-    with pytest.raises(ValueError, match="'partition_cols' argument "
-                       "is not supported by use_legacy_dataset=False"):
-        pq.write_to_dataset(table, path,
-                            use_legacy_dataset=False,
-                            partition_cols=["a"],
-                            partitioning=["a"])
-
-    with pytest.raises(ValueError, match="'file_visitor' argument "
-                       "is not supported by use_legacy_dataset=True"):
-        pq.write_to_dataset(table, path,
-                            use_legacy_dataset=True,
-                            metadata_collector=[],
-                            file_visitor=lambda x: x)
-    with pytest.raises(ValueError, match="'metadata_collector' argument "
-                       "is not supported by use_legacy_dataset=False"):
-        pq.write_to_dataset(table, path,
-                            use_legacy_dataset=False,
-                            metadata_collector=[],
-                            file_visitor=lambda x: x)
-
-
-@pytest.mark.dataset
 @pytest.mark.parametrize("write_dataset_kwarg", (
     ("create_dir", True),
     ("create_dir", False),
@@ -1926,8 +1322,7 @@ def test_write_to_dataset_kwargs_passed(tempdir, write_dataset_kwarg):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_write_to_dataset_category_observed(tempdir, use_legacy_dataset):
+def test_write_to_dataset_category_observed(tempdir):
     # if we partition on a categorical variable with "unobserved" categories
     # (values present in the dictionary, but not in the actual data)
     # ensure those are not creating empty files/directories
@@ -1938,8 +1333,7 @@ def test_write_to_dataset_category_observed(tempdir, use_legacy_dataset):
     table = pa.table(df)
     path = tempdir / "dataset"
     pq.write_to_dataset(
-        table, tempdir / "dataset", partition_cols=["cat"],
-        use_legacy_dataset=use_legacy_dataset
+        table, tempdir / "dataset", partition_cols=["cat"]
     )
     subdirs = [f.name for f in path.iterdir() if f.is_dir()]
     assert len(subdirs) == 2
diff --git a/python/pyarrow/tests/parquet/test_datetime.py b/python/pyarrow/tests/parquet/test_datetime.py
index f97c451df7ad7..6a9cbd4f73d4f 100644
--- a/python/pyarrow/tests/parquet/test_datetime.py
+++ b/python/pyarrow/tests/parquet/test_datetime.py
@@ -23,8 +23,7 @@
 import pytest
 
 import pyarrow as pa
-from pyarrow.tests.parquet.common import (
-    _check_roundtrip, parametrize_legacy_dataset)
+from pyarrow.tests.parquet.common import _check_roundtrip
 
 try:
     import pyarrow.parquet as pq
@@ -48,8 +47,7 @@
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_pandas_parquet_datetime_tz(use_legacy_dataset):
+def test_pandas_parquet_datetime_tz():
     # Pandas v2 defaults to [ns], but Arrow defaults to [us] time units
     # so we need to cast the pandas dtype. Pandas v1 will always silently
     # coerce to [ns] due to lack of non-[ns] support.
@@ -69,21 +67,19 @@ def test_pandas_parquet_datetime_tz(use_legacy_dataset):
     _write_table(arrow_table, f)
     f.seek(0)
 
-    table_read = pq.read_pandas(f, use_legacy_dataset=use_legacy_dataset)
+    table_read = pq.read_pandas(f)
 
     df_read = table_read.to_pandas()
     tm.assert_frame_equal(df, df_read)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_datetime_timezone_tzinfo(use_legacy_dataset):
+def test_datetime_timezone_tzinfo():
     value = datetime.datetime(2018, 1, 1, 1, 23, 45,
                               tzinfo=datetime.timezone.utc)
     df = pd.DataFrame({'foo': [value]})
 
-    _roundtrip_pandas_dataframe(
-        df, write_kwargs={}, use_legacy_dataset=use_legacy_dataset)
+    _roundtrip_pandas_dataframe(df, write_kwargs={})
 
 
 @pytest.mark.pandas
diff --git a/python/pyarrow/tests/parquet/test_pandas.py b/python/pyarrow/tests/parquet/test_pandas.py
index 0ed305bff1945..f194d12876968 100644
--- a/python/pyarrow/tests/parquet/test_pandas.py
+++ b/python/pyarrow/tests/parquet/test_pandas.py
@@ -23,8 +23,6 @@
 
 import pyarrow as pa
 from pyarrow.fs import LocalFileSystem, SubTreeFileSystem
-from pyarrow.tests.parquet.common import (
-    parametrize_legacy_dataset, parametrize_legacy_dataset_not_supported)
 from pyarrow.util import guid
 from pyarrow.vendored.version import Version
 
@@ -101,8 +99,7 @@ def test_merging_parquet_tables_with_different_pandas_metadata(tempdir):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_pandas_parquet_column_multiindex(tempdir, use_legacy_dataset):
+def test_pandas_parquet_column_multiindex(tempdir):
     df = alltypes_sample(size=10)
     df.columns = pd.MultiIndex.from_tuples(
         list(zip(df.columns, df.columns[::-1])),
@@ -115,17 +112,13 @@ def test_pandas_parquet_column_multiindex(tempdir, use_legacy_dataset):
 
     _write_table(arrow_table, filename)
 
-    table_read = pq.read_pandas(
-        filename, use_legacy_dataset=use_legacy_dataset)
+    table_read = pq.read_pandas(filename)
     df_read = table_read.to_pandas()
     tm.assert_frame_equal(df, df_read)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_pandas_parquet_2_0_roundtrip_read_pandas_no_index_written(
-    tempdir, use_legacy_dataset
-):
+def test_pandas_parquet_2_0_roundtrip_read_pandas_no_index_written(tempdir):
     df = alltypes_sample(size=10000)
 
     filename = tempdir / 'pandas_roundtrip.parquet'
@@ -137,8 +130,7 @@ def test_pandas_parquet_2_0_roundtrip_read_pandas_no_index_written(
     assert js['columns']
 
     _write_table(arrow_table, filename)
-    table_read = pq.read_pandas(
-        filename, use_legacy_dataset=use_legacy_dataset)
+    table_read = pq.read_pandas(filename)
 
     js = table_read.schema.pandas_metadata
     assert not js['index_columns']
@@ -150,52 +142,20 @@ def test_pandas_parquet_2_0_roundtrip_read_pandas_no_index_written(
     tm.assert_frame_equal(df, df_read)
 
 
-# TODO(dataset) duplicate column selection actually gives duplicate columns now
-@pytest.mark.pandas
-@parametrize_legacy_dataset_not_supported
-def test_pandas_column_selection(tempdir, use_legacy_dataset):
-    size = 10000
-    np.random.seed(0)
-    df = pd.DataFrame({
-        'uint8': np.arange(size, dtype=np.uint8),
-        'uint16': np.arange(size, dtype=np.uint16)
-    })
-    filename = tempdir / 'pandas_roundtrip.parquet'
-    arrow_table = pa.Table.from_pandas(df)
-    _write_table(arrow_table, filename)
-    table_read = _read_table(
-        filename, columns=['uint8'], use_legacy_dataset=use_legacy_dataset)
-    df_read = table_read.to_pandas()
-
-    tm.assert_frame_equal(df[['uint8']], df_read)
-
-    # ARROW-4267: Selection of duplicate columns still leads to these columns
-    # being read uniquely.
-    table_read = _read_table(
-        filename, columns=['uint8', 'uint8'],
-        use_legacy_dataset=use_legacy_dataset)
-    df_read = table_read.to_pandas()
-
-    tm.assert_frame_equal(df[['uint8']], df_read)
-
-
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_pandas_parquet_native_file_roundtrip(tempdir, use_legacy_dataset):
+def test_pandas_parquet_native_file_roundtrip():
     df = _test_dataframe(10000)
     arrow_table = pa.Table.from_pandas(df)
     imos = pa.BufferOutputStream()
     _write_table(arrow_table, imos, version='2.6')
     buf = imos.getvalue()
     reader = pa.BufferReader(buf)
-    df_read = _read_table(
-        reader, use_legacy_dataset=use_legacy_dataset).to_pandas()
+    df_read = _read_table(reader).to_pandas()
     tm.assert_frame_equal(df, df_read)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_read_pandas_column_subset(tempdir, use_legacy_dataset):
+def test_read_pandas_column_subset():
     df = _test_dataframe(10000)
     arrow_table = pa.Table.from_pandas(df)
     imos = pa.BufferOutputStream()
@@ -204,27 +164,24 @@ def test_read_pandas_column_subset(tempdir, use_legacy_dataset):
     reader = pa.BufferReader(buf)
     df_read = pq.read_pandas(
         reader, columns=['strings', 'uint8'],
-        use_legacy_dataset=use_legacy_dataset
     ).to_pandas()
     tm.assert_frame_equal(df[['strings', 'uint8']], df_read)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_pandas_parquet_empty_roundtrip(tempdir, use_legacy_dataset):
+def test_pandas_parquet_empty_roundtrip():
     df = _test_dataframe(0)
     arrow_table = pa.Table.from_pandas(df)
     imos = pa.BufferOutputStream()
     _write_table(arrow_table, imos, version='2.6')
     buf = imos.getvalue()
     reader = pa.BufferReader(buf)
-    df_read = _read_table(
-        reader, use_legacy_dataset=use_legacy_dataset).to_pandas()
+    df_read = _read_table(reader).to_pandas()
     tm.assert_frame_equal(df, df_read)
 
 
 @pytest.mark.pandas
-def test_pandas_can_write_nested_data(tempdir):
+def test_pandas_can_write_nested_data():
     data = {
         "agg_col": [
             {"page_type": 1},
@@ -241,8 +198,7 @@ def test_pandas_can_write_nested_data(tempdir):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_pandas_parquet_pyfile_roundtrip(tempdir, use_legacy_dataset):
+def test_pandas_parquet_pyfile_roundtrip(tempdir):
     filename = tempdir / 'pandas_pyfile_roundtrip.parquet'
     size = 5
     df = pd.DataFrame({
@@ -260,14 +216,13 @@ def test_pandas_parquet_pyfile_roundtrip(tempdir, use_legacy_dataset):
 
     data = io.BytesIO(filename.read_bytes())
 
-    table_read = _read_table(data, use_legacy_dataset=use_legacy_dataset)
+    table_read = _read_table(data)
     df_read = table_read.to_pandas()
     tm.assert_frame_equal(df, df_read)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_pandas_parquet_configuration_options(tempdir, use_legacy_dataset):
+def test_pandas_parquet_configuration_options(tempdir):
     size = 10000
     np.random.seed(0)
     df = pd.DataFrame({
@@ -289,16 +244,14 @@ def test_pandas_parquet_configuration_options(tempdir, use_legacy_dataset):
     for use_dictionary in [True, False]:
         _write_table(arrow_table, filename, version='2.6',
                      use_dictionary=use_dictionary)
-        table_read = _read_table(
-            filename, use_legacy_dataset=use_legacy_dataset)
+        table_read = _read_table(filename)
         df_read = table_read.to_pandas()
         tm.assert_frame_equal(df, df_read)
 
     for write_statistics in [True, False]:
         _write_table(arrow_table, filename, version='2.6',
                      write_statistics=write_statistics)
-        table_read = _read_table(filename,
-                                 use_legacy_dataset=use_legacy_dataset)
+        table_read = _read_table(filename)
         df_read = table_read.to_pandas()
         tm.assert_frame_equal(df, df_read)
 
@@ -308,8 +261,7 @@ def test_pandas_parquet_configuration_options(tempdir, use_legacy_dataset):
             continue
         _write_table(arrow_table, filename, version='2.6',
                      compression=compression)
-        table_read = _read_table(
-            filename, use_legacy_dataset=use_legacy_dataset)
+        table_read = _read_table(filename)
         df_read = table_read.to_pandas()
         tm.assert_frame_equal(df, df_read)
 
@@ -327,8 +279,7 @@ def test_spark_flavor_preserves_pandas_metadata():
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_index_column_name_duplicate(tempdir, use_legacy_dataset):
+def test_index_column_name_duplicate(tempdir):
     data = {
         'close': {
             pd.Timestamp('2017-06-30 01:31:00'): 154.99958999999998,
@@ -352,14 +303,13 @@ def test_index_column_name_duplicate(tempdir, use_legacy_dataset):
 
     tdfx = pa.Table.from_pandas(dfx)
     _write_table(tdfx, path)
-    arrow_table = _read_table(path, use_legacy_dataset=use_legacy_dataset)
+    arrow_table = _read_table(path)
     result_df = arrow_table.to_pandas()
     tm.assert_frame_equal(result_df, dfx)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_multiindex_duplicate_values(tempdir, use_legacy_dataset):
+def test_multiindex_duplicate_values(tempdir):
     num_rows = 3
     numbers = list(range(num_rows))
     index = pd.MultiIndex.from_arrays(
@@ -373,7 +323,7 @@ def test_multiindex_duplicate_values(tempdir, use_legacy_dataset):
     filename = tempdir / 'dup_multi_index_levels.parquet'
 
     _write_table(table, filename)
-    result_table = _read_table(filename, use_legacy_dataset=use_legacy_dataset)
+    result_table = _read_table(filename)
     assert table.equals(result_table)
 
     result_df = result_table.to_pandas()
@@ -381,8 +331,7 @@ def test_multiindex_duplicate_values(tempdir, use_legacy_dataset):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_backwards_compatible_index_naming(datadir, use_legacy_dataset):
+def test_backwards_compatible_index_naming(datadir):
     expected_string = b"""\
 carat        cut  color  clarity  depth  table  price     x     y     z
  0.23      Ideal      E      SI2   61.5   55.0    326  3.95  3.98  2.43
@@ -397,17 +346,13 @@ def test_backwards_compatible_index_naming(datadir, use_legacy_dataset):
  0.23  Very Good      H      VS1   59.4   61.0    338  4.00  4.05  2.39"""
     expected = pd.read_csv(io.BytesIO(expected_string), sep=r'\s{2,}',
                            index_col=None, header=0, engine='python')
-    table = _read_table(
-        datadir / 'v0.7.1.parquet', use_legacy_dataset=use_legacy_dataset)
+    table = _read_table(datadir / 'v0.7.1.parquet')
     result = table.to_pandas()
     tm.assert_frame_equal(result, expected)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_backwards_compatible_index_multi_level_named(
-    datadir, use_legacy_dataset
-):
+def test_backwards_compatible_index_multi_level_named(datadir):
     expected_string = b"""\
 carat        cut  color  clarity  depth  table  price     x     y     z
  0.23      Ideal      E      SI2   61.5   55.0    326  3.95  3.98  2.43
@@ -426,17 +371,13 @@ def test_backwards_compatible_index_multi_level_named(
         header=0, engine='python'
     ).sort_index()
 
-    table = _read_table(datadir / 'v0.7.1.all-named-index.parquet',
-                        use_legacy_dataset=use_legacy_dataset)
+    table = _read_table(datadir / 'v0.7.1.all-named-index.parquet')
     result = table.to_pandas()
     tm.assert_frame_equal(result, expected)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_backwards_compatible_index_multi_level_some_named(
-        datadir, use_legacy_dataset
-):
+def test_backwards_compatible_index_multi_level_some_named(datadir):
     expected_string = b"""\
 carat        cut  color  clarity  depth  table  price     x     y     z
  0.23      Ideal      E      SI2   61.5   55.0    326  3.95  3.98  2.43
@@ -456,17 +397,13 @@ def test_backwards_compatible_index_multi_level_some_named(
     ).sort_index()
     expected.index = expected.index.set_names(['cut', None, 'clarity'])
 
-    table = _read_table(datadir / 'v0.7.1.some-named-index.parquet',
-                        use_legacy_dataset=use_legacy_dataset)
+    table = _read_table(datadir / 'v0.7.1.some-named-index.parquet')
     result = table.to_pandas()
     tm.assert_frame_equal(result, expected)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_backwards_compatible_column_metadata_handling(
-    datadir, use_legacy_dataset
-):
+def test_backwards_compatible_column_metadata_handling(datadir):
     expected = pd.DataFrame(
         {'a': [1, 2, 3], 'b': [.1, .2, .3],
          'c': pd.date_range("2017-01-01", periods=3, tz='Europe/Brussels')})
@@ -476,19 +413,18 @@ def test_backwards_compatible_column_metadata_handling(
         names=['index', None])
 
     path = datadir / 'v0.7.1.column-metadata-handling.parquet'
-    table = _read_table(path, use_legacy_dataset=use_legacy_dataset)
+    table = _read_table(path)
     result = table.to_pandas()
     tm.assert_frame_equal(result, expected)
 
     table = _read_table(
-        path, columns=['a'], use_legacy_dataset=use_legacy_dataset)
+        path, columns=['a'])
     result = table.to_pandas()
     tm.assert_frame_equal(result, expected[['a']].reset_index(drop=True))
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_categorical_index_survives_roundtrip(use_legacy_dataset):
+def test_categorical_index_survives_roundtrip():
     # ARROW-3652, addressed by ARROW-3246
     df = pd.DataFrame([['a', 'b'], ['c', 'd']], columns=['c1', 'c2'])
     df['c1'] = df['c1'].astype('category')
@@ -497,15 +433,13 @@ def test_categorical_index_survives_roundtrip(use_legacy_dataset):
     table = pa.Table.from_pandas(df)
     bos = pa.BufferOutputStream()
     pq.write_table(table, bos)
-    ref_df = pq.read_pandas(
-        bos.getvalue(), use_legacy_dataset=use_legacy_dataset).to_pandas()
+    ref_df = pq.read_pandas(bos.getvalue()).to_pandas()
     assert isinstance(ref_df.index, pd.CategoricalIndex)
     assert ref_df.index.equals(df.index)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_categorical_order_survives_roundtrip(use_legacy_dataset):
+def test_categorical_order_survives_roundtrip():
     # ARROW-6302
     df = pd.DataFrame({"a": pd.Categorical(
         ["a", "b", "c", "a"], categories=["b", "c", "d"], ordered=True)})
@@ -515,15 +449,13 @@ def test_categorical_order_survives_roundtrip(use_legacy_dataset):
     pq.write_table(table, bos)
 
     contents = bos.getvalue()
-    result = pq.read_pandas(
-        contents, use_legacy_dataset=use_legacy_dataset).to_pandas()
+    result = pq.read_pandas(contents).to_pandas()
 
     tm.assert_frame_equal(result, df)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_pandas_categorical_na_type_row_groups(use_legacy_dataset):
+def test_pandas_categorical_na_type_row_groups():
     # ARROW-5085
     df = pd.DataFrame({"col": [None] * 100, "int": [1.0] * 100})
     df_category = df.astype({"col": "category", "int": "category"})
@@ -533,8 +465,7 @@ def test_pandas_categorical_na_type_row_groups(use_legacy_dataset):
 
     # it works
     pq.write_table(table_cat, buf, version='2.6', chunk_size=10)
-    result = pq.read_table(
-        buf.getvalue(), use_legacy_dataset=use_legacy_dataset)
+    result = pq.read_table(buf.getvalue())
 
     # Result is non-categorical
     assert result[0].equals(table[0])
@@ -542,8 +473,7 @@ def test_pandas_categorical_na_type_row_groups(use_legacy_dataset):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_pandas_categorical_roundtrip(use_legacy_dataset):
+def test_pandas_categorical_roundtrip():
     # ARROW-5480, this was enabled by ARROW-3246
 
     # Have one of the categories unobserved and include a null (-1)
@@ -555,8 +485,7 @@ def test_pandas_categorical_roundtrip(use_legacy_dataset):
     buf = pa.BufferOutputStream()
     pq.write_table(pa.table(df), buf)
 
-    result = pq.read_table(
-        buf.getvalue(), use_legacy_dataset=use_legacy_dataset).to_pandas()
+    result = pq.read_table(buf.getvalue()).to_pandas()
     assert result.x.dtype == 'category'
     assert (result.x.cat.categories == categories).all()
     tm.assert_frame_equal(result, df)
@@ -587,41 +516,28 @@ def test_categories_with_string_pyarrow_dtype(tempdir):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_write_to_dataset_pandas_preserve_extensiondtypes(
-    tempdir, use_legacy_dataset
-):
+def test_write_to_dataset_pandas_preserve_extensiondtypes(tempdir):
     df = pd.DataFrame({'part': 'a', "col": [1, 2, 3]})
     df['col'] = df['col'].astype("Int64")
     table = pa.table(df)
 
     pq.write_to_dataset(
         table, str(tempdir / "case1"), partition_cols=['part'],
-        use_legacy_dataset=use_legacy_dataset
     )
-    result = pq.read_table(
-        str(tempdir / "case1"), use_legacy_dataset=use_legacy_dataset
-    ).to_pandas()
+    result = pq.read_table(str(tempdir / "case1")).to_pandas()
     tm.assert_frame_equal(result[["col"]], df[["col"]])
 
-    pq.write_to_dataset(
-        table, str(tempdir / "case2"), use_legacy_dataset=use_legacy_dataset
-    )
-    result = pq.read_table(
-        str(tempdir / "case2"), use_legacy_dataset=use_legacy_dataset
-    ).to_pandas()
+    pq.write_to_dataset(table, str(tempdir / "case2"))
+    result = pq.read_table(str(tempdir / "case2")).to_pandas()
     tm.assert_frame_equal(result[["col"]], df[["col"]])
 
     pq.write_table(table, str(tempdir / "data.parquet"))
-    result = pq.read_table(
-        str(tempdir / "data.parquet"), use_legacy_dataset=use_legacy_dataset
-    ).to_pandas()
+    result = pq.read_table(str(tempdir / "data.parquet")).to_pandas()
     tm.assert_frame_equal(result[["col"]], df[["col"]])
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_write_to_dataset_pandas_preserve_index(tempdir, use_legacy_dataset):
+def test_write_to_dataset_pandas_preserve_index(tempdir):
     # ARROW-8251 - preserve pandas index in roundtrip
 
     df = pd.DataFrame({'part': ['a', 'a', 'b'], "col": [1, 2, 3]})
@@ -632,34 +548,24 @@ def test_write_to_dataset_pandas_preserve_index(tempdir, use_legacy_dataset):
 
     pq.write_to_dataset(
         table, str(tempdir / "case1"), partition_cols=['part'],
-        use_legacy_dataset=use_legacy_dataset
     )
-    result = pq.read_table(
-        str(tempdir / "case1"), use_legacy_dataset=use_legacy_dataset
-    ).to_pandas()
+    result = pq.read_table(str(tempdir / "case1")).to_pandas()
     tm.assert_frame_equal(result, df_cat)
 
-    pq.write_to_dataset(
-        table, str(tempdir / "case2"), use_legacy_dataset=use_legacy_dataset
-    )
-    result = pq.read_table(
-        str(tempdir / "case2"), use_legacy_dataset=use_legacy_dataset
-    ).to_pandas()
+    pq.write_to_dataset(table, str(tempdir / "case2"))
+    result = pq.read_table(str(tempdir / "case2")).to_pandas()
     tm.assert_frame_equal(result, df)
 
     pq.write_table(table, str(tempdir / "data.parquet"))
-    result = pq.read_table(
-        str(tempdir / "data.parquet"), use_legacy_dataset=use_legacy_dataset
-    ).to_pandas()
+    result = pq.read_table(str(tempdir / "data.parquet")).to_pandas()
     tm.assert_frame_equal(result, df)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
 @pytest.mark.parametrize('preserve_index', [True, False, None])
 @pytest.mark.parametrize('metadata_fname', ["_metadata", "_common_metadata"])
 def test_dataset_read_pandas_common_metadata(
-    tempdir, use_legacy_dataset, preserve_index, metadata_fname
+    tempdir, preserve_index, metadata_fname
 ):
     # ARROW-1103
     nfiles = 5
@@ -696,7 +602,7 @@ def test_dataset_read_pandas_common_metadata(
     )
     pq.write_metadata(table_for_metadata.schema, dirpath / metadata_fname)
 
-    dataset = pq.ParquetDataset(dirpath, use_legacy_dataset=use_legacy_dataset)
+    dataset = pq.ParquetDataset(dirpath)
     columns = ['uint8', 'strings']
     result = dataset.read_pandas(columns=columns).to_pandas()
     expected = pd.concat([x[columns] for x in frames])
diff --git a/python/pyarrow/tests/parquet/test_parquet_file.py b/python/pyarrow/tests/parquet/test_parquet_file.py
index 9f920206a107e..93097a1afaac9 100644
--- a/python/pyarrow/tests/parquet/test_parquet_file.py
+++ b/python/pyarrow/tests/parquet/test_parquet_file.py
@@ -18,7 +18,6 @@
 import io
 import os
 import sys
-from unittest import mock
 
 import pytest
 
@@ -296,28 +295,6 @@ def test_parquet_file_explicitly_closed(tempdir):
     table = pa.table({'col1': [0, 1], 'col2': [0, 1]})
     pq.write_table(table, fn)
 
-    # read_table (legacy) with opened file (will leave open)
-    with open(fn, 'rb') as f:
-        pq.read_table(f, use_legacy_dataset=True)
-        assert not f.closed  # Didn't close it internally after read_table
-
-    # read_table (legacy) with unopened file (will close)
-    with mock.patch.object(pq.ParquetFile, "close") as mock_close:
-        pq.read_table(fn, use_legacy_dataset=True)
-        mock_close.assert_called()
-
-    # ParquetDataset test (legacy) with unopened file (will close)
-    with mock.patch.object(pq.ParquetFile, "close") as mock_close:
-        pq.ParquetDataset(fn, use_legacy_dataset=True).read()
-        mock_close.assert_called()
-
-    # ParquetDataset test (legacy) with opened file (will leave open)
-    with open(fn, 'rb') as f:
-        # ARROW-8075: support ParquetDataset from file-like, not just path-like
-        with pytest.raises(TypeError, match='not a path-like object'):
-            pq.ParquetDataset(f, use_legacy_dataset=True).read()
-            assert not f.closed
-
     # ParquetFile with opened file (will leave open)
     with open(fn, 'rb') as f:
         with pq.ParquetFile(f) as p:
@@ -338,7 +315,7 @@ def test_parquet_file_explicitly_closed(tempdir):
 
 @pytest.mark.s3
 @pytest.mark.parametrize("use_uri", (True, False))
-def test_parquet_file_with_filesystem(tempdir, s3_example_fs, use_uri):
+def test_parquet_file_with_filesystem(s3_example_fs, use_uri):
     s3_fs, s3_uri, s3_path = s3_example_fs
 
     args = (s3_uri if use_uri else s3_path,)
diff --git a/python/pyarrow/tests/parquet/test_parquet_writer.py b/python/pyarrow/tests/parquet/test_parquet_writer.py
index b902541015aa2..16584684f5c7f 100644
--- a/python/pyarrow/tests/parquet/test_parquet_writer.py
+++ b/python/pyarrow/tests/parquet/test_parquet_writer.py
@@ -20,7 +20,6 @@
 import pyarrow as pa
 from pyarrow import fs
 from pyarrow.filesystem import FileSystem, LocalFileSystem
-from pyarrow.tests.parquet.common import parametrize_legacy_dataset
 
 try:
     import pyarrow.parquet as pq
@@ -44,8 +43,7 @@
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_parquet_incremental_file_build(tempdir, use_legacy_dataset):
+def test_parquet_incremental_file_build(tempdir):
     df = _test_dataframe(100)
     df['unique_id'] = 0
 
@@ -65,8 +63,7 @@ def test_parquet_incremental_file_build(tempdir, use_legacy_dataset):
     writer.close()
 
     buf = out.getvalue()
-    result = _read_table(
-        pa.BufferReader(buf), use_legacy_dataset=use_legacy_dataset)
+    result = _read_table(pa.BufferReader(buf))
 
     expected = pd.concat(frames, ignore_index=True)
     tm.assert_frame_equal(result.to_pandas(), expected)
@@ -105,8 +102,7 @@ def test_parquet_invalid_writer(tempdir):
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_parquet_writer_context_obj(tempdir, use_legacy_dataset):
+def test_parquet_writer_context_obj(tempdir):
     df = _test_dataframe(100)
     df['unique_id'] = 0
 
@@ -124,18 +120,14 @@ def test_parquet_writer_context_obj(tempdir, use_legacy_dataset):
             frames.append(df.copy())
 
     buf = out.getvalue()
-    result = _read_table(
-        pa.BufferReader(buf), use_legacy_dataset=use_legacy_dataset)
+    result = _read_table(pa.BufferReader(buf))
 
     expected = pd.concat(frames, ignore_index=True)
     tm.assert_frame_equal(result.to_pandas(), expected)
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_parquet_writer_context_obj_with_exception(
-    tempdir, use_legacy_dataset
-):
+def test_parquet_writer_context_obj_with_exception(tempdir):
     df = _test_dataframe(100)
     df['unique_id'] = 0
 
@@ -160,8 +152,7 @@ def test_parquet_writer_context_obj_with_exception(
         assert str(e) == error_text
 
     buf = out.getvalue()
-    result = _read_table(
-        pa.BufferReader(buf), use_legacy_dataset=use_legacy_dataset)
+    result = _read_table(pa.BufferReader(buf))
 
     expected = pd.concat(frames, ignore_index=True)
     tm.assert_frame_equal(result.to_pandas(), expected)
@@ -340,8 +331,7 @@ def test_parquet_writer_filesystem_buffer_raises():
 
 
 @pytest.mark.pandas
-@parametrize_legacy_dataset
-def test_parquet_writer_with_caller_provided_filesystem(use_legacy_dataset):
+def test_parquet_writer_with_caller_provided_filesystem():
     out = pa.BufferOutputStream()
 
     class CustomFS(FileSystem):
@@ -368,8 +358,7 @@ def open(self, path, mode='rb'):
     assert out.closed
 
     buf = out.getvalue()
-    table_read = _read_table(
-        pa.BufferReader(buf), use_legacy_dataset=use_legacy_dataset)
+    table_read = _read_table(pa.BufferReader(buf))
     df_read = table_read.to_pandas()
     tm.assert_frame_equal(df_read, df)
 
diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
index a37eb1e426f7a..e2bb4400c8bde 100644
--- a/python/pyarrow/tests/test_dataset.py
+++ b/python/pyarrow/tests/test_dataset.py
@@ -1148,7 +1148,6 @@ def _create_dataset_for_fragments(tempdir, chunk_size=None, filesystem=None):
 
     path = str(tempdir / "test_parquet_dataset")
 
-    # write_to_dataset currently requires pandas
     pq.write_to_dataset(table, path,
                         partition_cols=["part"], chunk_size=chunk_size)
     dataset = ds.dataset(
@@ -1158,10 +1157,7 @@ def _create_dataset_for_fragments(tempdir, chunk_size=None, filesystem=None):
     return table, dataset
 
 
-@pytest.mark.pandas
 @pytest.mark.parquet
-@pytest.mark.filterwarnings(
-    "ignore:Passing 'use_legacy_dataset=True':FutureWarning")
 def test_fragments(tempdir, dataset_reader):
     table, dataset = _create_dataset_for_fragments(tempdir)
 
@@ -1208,10 +1204,7 @@ def test_fragments_implicit_cast(tempdir):
     assert len(list(fragments)) == 1
 
 
-@pytest.mark.pandas
 @pytest.mark.parquet
-@pytest.mark.filterwarnings(
-    "ignore:Passing 'use_legacy_dataset=True':FutureWarning")
 def test_fragments_reconstruct(tempdir, dataset_reader, pickle_module):
     table, dataset = _create_dataset_for_fragments(tempdir)
 
@@ -1272,10 +1265,7 @@ def assert_yields_projected(fragment, row_slice,
         dataset_reader.to_table(new_fragment, filter=ds.field('part') == 'a')
 
 
-@pytest.mark.pandas
 @pytest.mark.parquet
-@pytest.mark.filterwarnings(
-    "ignore:Passing 'use_legacy_dataset=True':FutureWarning")
 def test_fragments_parquet_row_groups(tempdir, dataset_reader):
     table, dataset = _create_dataset_for_fragments(tempdir, chunk_size=2)
 
@@ -1326,8 +1316,6 @@ def test_fragments_parquet_num_row_groups(tempdir):
 @pytest.mark.pandas
 @pytest.mark.parquet
 def test_fragments_parquet_row_groups_dictionary(tempdir, dataset_reader):
-    import pandas as pd
-
     df = pd.DataFrame(dict(col1=['a', 'b'], col2=[1, 2]))
     df['col1'] = df['col1'].astype("category")
 
@@ -1340,10 +1328,7 @@ def test_fragments_parquet_row_groups_dictionary(tempdir, dataset_reader):
     assert (df.iloc[0] == result.to_pandas()).all().all()
 
 
-@pytest.mark.pandas
 @pytest.mark.parquet
-@pytest.mark.filterwarnings(
-    "ignore:Passing 'use_legacy_dataset=True':FutureWarning")
 def test_fragments_parquet_ensure_metadata(tempdir, open_logging_fs, pickle_module):
     fs, assert_opens = open_logging_fs
     _, dataset = _create_dataset_for_fragments(
@@ -1384,7 +1369,6 @@ def test_fragments_parquet_ensure_metadata(tempdir, open_logging_fs, pickle_modu
         assert row_group.statistics is not None
 
 
-@pytest.mark.pandas
 @pytest.mark.parquet
 def test_fragments_parquet_pickle_no_metadata(tempdir, open_logging_fs, pickle_module):
     # https://issues.apache.org/jira/browse/ARROW-15796
@@ -1454,16 +1438,13 @@ def _create_dataset_all_types(tempdir, chunk_size=None):
     path = str(tempdir / "test_parquet_dataset_all_types")
 
     # write_to_dataset currently requires pandas
-    pq.write_to_dataset(table, path, use_legacy_dataset=True,
-                        chunk_size=chunk_size)
+    pq.write_to_dataset(table, path, chunk_size=chunk_size)
 
     return table, ds.dataset(path, format="parquet", partitioning="hive")
 
 
 @pytest.mark.pandas
 @pytest.mark.parquet
-@pytest.mark.filterwarnings(
-    "ignore:Passing 'use_legacy_dataset=True':FutureWarning")
 def test_parquet_fragment_statistics(tempdir):
     table, dataset = _create_dataset_all_types(tempdir)
 
@@ -1529,10 +1510,7 @@ def test_parquet_empty_row_group_statistics(tempdir):
     assert fragments[0].row_groups[0].statistics == {}
 
 
-@pytest.mark.pandas
 @pytest.mark.parquet
-@pytest.mark.filterwarnings(
-    "ignore:Passing 'use_legacy_dataset=True':FutureWarning")
 def test_fragments_parquet_row_groups_predicate(tempdir):
     table, dataset = _create_dataset_for_fragments(tempdir, chunk_size=2)
 
@@ -1555,10 +1533,7 @@ def test_fragments_parquet_row_groups_predicate(tempdir):
     assert len(row_group_fragments) == 0
 
 
-@pytest.mark.pandas
 @pytest.mark.parquet
-@pytest.mark.filterwarnings(
-    "ignore:Passing 'use_legacy_dataset=True':FutureWarning")
 def test_fragments_parquet_row_groups_reconstruct(tempdir, dataset_reader,
                                                   pickle_module):
     table, dataset = _create_dataset_for_fragments(tempdir, chunk_size=2)
@@ -1600,10 +1575,7 @@ def test_fragments_parquet_row_groups_reconstruct(tempdir, dataset_reader,
         dataset_reader.to_table(new_fragment)
 
 
-@pytest.mark.pandas
 @pytest.mark.parquet
-@pytest.mark.filterwarnings(
-    "ignore:Passing 'use_legacy_dataset=True':FutureWarning")
 def test_fragments_parquet_subset_ids(tempdir, open_logging_fs,
                                       dataset_reader):
     fs, assert_opens = open_logging_fs
@@ -1631,10 +1603,7 @@ def test_fragments_parquet_subset_ids(tempdir, open_logging_fs,
     assert result.equals(table[:0])
 
 
-@pytest.mark.pandas
 @pytest.mark.parquet
-@pytest.mark.filterwarnings(
-    "ignore:Passing 'use_legacy_dataset=True':FutureWarning")
 def test_fragments_parquet_subset_filter(tempdir, open_logging_fs,
                                          dataset_reader):
     fs, assert_opens = open_logging_fs
@@ -1666,10 +1635,7 @@ def test_fragments_parquet_subset_filter(tempdir, open_logging_fs,
     assert subfrag.num_row_groups == 4
 
 
-@pytest.mark.pandas
 @pytest.mark.parquet
-@pytest.mark.filterwarnings(
-    "ignore:Passing 'use_legacy_dataset=True':FutureWarning")
 def test_fragments_parquet_subset_invalid(tempdir):
     _, dataset = _create_dataset_for_fragments(tempdir, chunk_size=1)
     fragment = list(dataset.get_fragments())[0]
@@ -3591,10 +3557,7 @@ def test_parquet_dataset_factory_fsspec(tempdir):
 
 @pytest.mark.parquet
 @pytest.mark.pandas  # write_to_dataset currently requires pandas
-@pytest.mark.parametrize('use_legacy_dataset', [False, True])
-@pytest.mark.filterwarnings(
-    "ignore:Passing 'use_legacy_dataset=True':FutureWarning")
-def test_parquet_dataset_factory_roundtrip(tempdir, use_legacy_dataset):
+def test_parquet_dataset_factory_roundtrip(tempdir):
     # Simple test to ensure we can roundtrip dataset to
     # _metadata/common_metadata and back.  A more complex test
     # using partitioning will have to wait for ARROW-13269.  The
@@ -3606,7 +3569,6 @@ def test_parquet_dataset_factory_roundtrip(tempdir, use_legacy_dataset):
     metadata_collector = []
     pq.write_to_dataset(
         table, str(root_path), metadata_collector=metadata_collector,
-        use_legacy_dataset=use_legacy_dataset
     )
     metadata_path = str(root_path / '_metadata')
     # write _metadata file
@@ -3820,7 +3782,6 @@ def test_dataset_project_only_partition_columns(tempdir, dataset_reader):
 @pytest.mark.parquet
 @pytest.mark.pandas
 def test_dataset_project_null_column(tempdir, dataset_reader):
-    import pandas as pd
     df = pd.DataFrame({"col": np.array([None, None, None], dtype='object')})
 
     f = tempdir / "test_dataset_project_null_column.parquet"
@@ -3930,8 +3891,7 @@ def test_write_to_dataset_given_null_just_works(tempdir):
                       'col': list(range(4))}, schema=schema)
 
     path = str(tempdir / 'test_dataset')
-    pq.write_to_dataset(table, path, partition_cols=[
-                        'part'], use_legacy_dataset=False)
+    pq.write_to_dataset(table, path, partition_cols=['part'])
 
     actual_table = pq.read_table(tempdir / 'test_dataset')
     # column.equals can handle the difference in chunking but not the fact
@@ -3941,28 +3901,6 @@ def test_write_to_dataset_given_null_just_works(tempdir):
     assert actual_table.column('col').equals(table.column('col'))
 
 
-@pytest.mark.parquet
-@pytest.mark.pandas
-@pytest.mark.filterwarnings(
-    "ignore:Passing 'use_legacy_dataset=True':FutureWarning")
-def test_legacy_write_to_dataset_drops_null(tempdir):
-    schema = pa.schema([
-        pa.field('col', pa.int64()),
-        pa.field('part', pa.dictionary(pa.int32(), pa.string()))
-    ])
-    table = pa.table({'part': ['a', 'a', None, None],
-                      'col': list(range(4))}, schema=schema)
-    expected = pa.table(
-        {'part': ['a', 'a'], 'col': list(range(2))}, schema=schema)
-
-    path = str(tempdir / 'test_dataset')
-    pq.write_to_dataset(table, path, partition_cols=[
-                        'part'], use_legacy_dataset=True)
-
-    actual = pq.read_table(tempdir / 'test_dataset')
-    assert actual == expected
-
-
 def _sort_table(tab, sort_col):
     import pyarrow.compute as pc
     sorted_indices = pc.sort_indices(
diff --git a/python/pyarrow/tests/test_hdfs.py b/python/pyarrow/tests/test_hdfs.py
index 511dbf9a1c4e1..5b94c200f35de 100644
--- a/python/pyarrow/tests/test_hdfs.py
+++ b/python/pyarrow/tests/test_hdfs.py
@@ -27,7 +27,7 @@
 from pyarrow.tests import util
 from pyarrow.tests.parquet.common import _test_dataframe
 from pyarrow.tests.parquet.test_dataset import (
-    _test_read_common_metadata_files, _test_write_to_dataset_with_partitions,
+    _test_write_to_dataset_with_partitions,
     _test_write_to_dataset_no_partitions
 )
 from pyarrow.util import guid
@@ -309,6 +309,9 @@ def _write_multiple_hdfs_pq_files(self, tmpdir):
         expected = pa.concat_tables(test_data)
         return expected
 
+    @pytest.mark.xfail(reason="legacy.FileSystem not supported with ParquetDataset "
+                       "due to legacy path being removed in PyArrow 15.0.0.",
+                       raises=TypeError)
     @pytest.mark.pandas
     @pytest.mark.parquet
     def test_read_multiple_parquet_files(self):
@@ -343,6 +346,9 @@ def test_read_multiple_parquet_files_with_uri(self):
             expected.to_pandas()
         )
 
+    @pytest.mark.xfail(reason="legacy.FileSystem not supported with ParquetDataset "
+                       "due to legacy path being removed in PyArrow 15.0.0.",
+                       raises=TypeError)
     @pytest.mark.pandas
     @pytest.mark.parquet
     def test_read_write_parquet_files_with_uri(self):
@@ -360,19 +366,13 @@ def test_read_write_parquet_files_with_uri(self):
 
         pq.write_table(table, path, filesystem=self.hdfs)
 
-        result = pq.read_table(
-            path, filesystem=self.hdfs, use_legacy_dataset=True
-        ).to_pandas()
+        result = pq.read_table(path, filesystem=self.hdfs).to_pandas()
 
         assert_frame_equal(result, df)
 
-    @pytest.mark.parquet
-    @pytest.mark.pandas
-    def test_read_common_metadata_files(self):
-        tmpdir = pjoin(self.tmp_path, 'common-metadata-' + guid())
-        self.hdfs.mkdir(tmpdir)
-        _test_read_common_metadata_files(self.hdfs, tmpdir)
-
+    @pytest.mark.xfail(reason="legacy.FileSystem not supported with ParquetDataset "
+                       "due to legacy path being removed in PyArrow 15.0.0.",
+                       raises=TypeError)
     @pytest.mark.parquet
     @pytest.mark.pandas
     def test_write_to_dataset_with_partitions(self):
@@ -381,6 +381,9 @@ def test_write_to_dataset_with_partitions(self):
         _test_write_to_dataset_with_partitions(
             tmpdir, filesystem=self.hdfs)
 
+    @pytest.mark.xfail(reason="legacy.FileSystem not supported with ParquetDataset "
+                       "due to legacy path being removed in PyArrow 15.0.0.",
+                       raises=TypeError)
     @pytest.mark.parquet
     @pytest.mark.pandas
     def test_write_to_dataset_no_partitions(self):
diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp
index 9745393d01bbc..bdaac0a9ce5d2 100644
--- a/r/src/altrep.cpp
+++ b/r/src/altrep.cpp
@@ -275,7 +275,8 @@ struct AltrepVectorPrimitive : public AltrepVectorBase<AltrepVectorPrimitive<sex
     auto altrep_data =
         reinterpret_cast<ArrowAltrepData*>(R_ExternalPtrAddr(R_altrep_data1(alt)));
     auto resolve = altrep_data->locate(i);
-    const auto& array = altrep_data->chunked_array()->chunk(resolve.chunk_index);
+    const auto& array =
+        altrep_data->chunked_array()->chunk(static_cast<int>(resolve.chunk_index));
     auto j = resolve.index_in_chunk;
 
     return array->IsNull(j) ? cpp11::na<c_type>()
@@ -466,10 +467,10 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
       std::unique_ptr<arrow::DictionaryUnifier> unifier_ =
           ValueOrStop(DictionaryUnifier::Make(arr_type.value_type()));
 
-      size_t n_arrays = chunked_array->num_chunks();
+      int n_arrays = chunked_array->num_chunks();
       BufferVector arrays_transpose(n_arrays);
 
-      for (size_t i = 0; i < n_arrays; i++) {
+      for (int i = 0; i < n_arrays; i++) {
         const auto& dict_i =
             *internal::checked_cast<const DictionaryArray&>(*chunked_array->chunk(i))
                  .dictionary();
@@ -559,17 +560,14 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
     return dup;
   }
 
-  // The value at position i
-  static int Elt(SEXP alt, R_xlen_t i) {
-    if (Base::IsMaterialized(alt)) {
-      return INTEGER_ELT(Representation(alt), i);
-    }
-
+  // The value at position i as an int64_t (to make bounds checking less verbose)
+  static int64_t Elt64(SEXP alt, R_xlen_t i) {
     auto altrep_data =
         reinterpret_cast<ArrowAltrepData*>(R_ExternalPtrAddr(R_altrep_data1(alt)));
     auto resolve = altrep_data->locate(i);
 
-    const auto& array = altrep_data->chunked_array()->chunk(resolve.chunk_index);
+    const auto& array =
+        altrep_data->chunked_array()->chunk(static_cast<int>(resolve.chunk_index));
     auto j = resolve.index_in_chunk;
 
     if (!array->IsNull(j)) {
@@ -578,7 +576,7 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
 
       if (WasUnified(alt)) {
         const auto* transpose_data = reinterpret_cast<const int32_t*>(
-            GetArrayTransposed(alt, resolve.chunk_index)->data());
+            GetArrayTransposed(alt, static_cast<int>(resolve.chunk_index))->data());
 
         switch (indices->type_id()) {
           case Type::UINT8:
@@ -617,7 +615,7 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
           case Type::INT64:
             return indices->data()->GetValues<int64_t>(1)[j] + 1;
           case Type::UINT64:
-            return indices->data()->GetValues<uint64_t>(1)[j] + 1;
+            return static_cast<int64_t>(indices->data()->GetValues<uint64_t>(1)[j] + 1);
           default:
             break;
         }
@@ -628,6 +626,18 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
     return NA_INTEGER;
   }
 
+  // The value at position i as an int (which R needs because this is a factor)
+  static int Elt(SEXP alt, R_xlen_t i) {
+    if (Base::IsMaterialized(alt)) {
+      return INTEGER_ELT(Representation(alt), i);
+    }
+
+    int64_t elt64 = Elt64(alt, i);
+    ARROW_R_DCHECK(elt64 == NA_INTEGER || elt64 >= 1);
+    ARROW_R_DCHECK(elt64 <= std::numeric_limits<int>::max());
+    return static_cast<int>(elt64);
+  }
+
   static R_xlen_t Get_region(SEXP alt, R_xlen_t start, R_xlen_t n, int* buf) {
     // If we have data2, we can just copy the region into buf
     // using the standard Get_region for this R type
@@ -667,7 +677,7 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
         // using the transpose data for this chunk
         const auto* transpose_data =
             reinterpret_cast<const int32_t*>(GetArrayTransposed(alt, j)->data());
-        auto transpose = [transpose_data](int x) { return transpose_data[x]; };
+        auto transpose = [transpose_data](int64_t x) { return transpose_data[x]; };
 
         GetRegionDispatch(array, indices, transpose, out);
 
@@ -677,7 +687,7 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
 
     } else {
       // simpler case, identity transpose
-      auto transpose = [](int x) { return x; };
+      auto transpose = [](int64_t x) { return static_cast<int>(x); };
 
       int* out = buf;
       for (const auto& array : slice->chunks()) {
@@ -718,7 +728,13 @@ struct AltrepFactor : public AltrepVectorBase<AltrepFactor> {
 
     VisitArraySpanInline<Type>(
         *array->data(),
-        /*valid_func=*/[&](index_type index) { *out++ = transpose(index) + 1; },
+        /*valid_func=*/
+        [&](index_type index) {
+          int64_t transposed = transpose(index) + 1;
+          ARROW_R_DCHECK(transposed >= 1);
+          ARROW_R_DCHECK(transposed <= std::numeric_limits<int>::max());
+          *out++ = static_cast<int>(transposed);
+        },
         /*null_func=*/[&]() { *out++ = cpp11::na<int>(); });
   }
 
@@ -765,7 +781,8 @@ struct AltrepVectorString : public AltrepVectorBase<AltrepVectorString<Type>> {
       bool no_nul = std::find(view_.begin(), view_.end(), '\0') == view_.end();
 
       if (no_nul) {
-        return Rf_mkCharLenCE(view_.data(), view_.size(), CE_UTF8);
+        ARROW_R_DCHECK(view_.size() <= std::numeric_limits<int>::max());
+        return Rf_mkCharLenCE(view_.data(), static_cast<int>(view_.size()), CE_UTF8);
       } else if (strip_out_nuls_) {
         return ConvertStripNul();
       } else {
@@ -802,7 +819,9 @@ struct AltrepVectorString : public AltrepVectorBase<AltrepVectorString<Type>> {
       }
 
       nul_was_stripped_ = true;
-      return Rf_mkCharLenCE(stripped_string_.data(), stripped_len, CE_UTF8);
+      ARROW_R_DCHECK(stripped_len <= std::numeric_limits<int>::max());
+      return Rf_mkCharLenCE(stripped_string_.data(), static_cast<int>(stripped_len),
+                            CE_UTF8);
     }
 
     bool nul_was_stripped() const { return nul_was_stripped_; }
@@ -847,7 +866,8 @@ struct AltrepVectorString : public AltrepVectorBase<AltrepVectorString<Type>> {
     auto altrep_data =
         reinterpret_cast<ArrowAltrepData*>(R_ExternalPtrAddr(R_altrep_data1(alt)));
     auto resolve = altrep_data->locate(i);
-    const auto& array = altrep_data->chunked_array()->chunk(resolve.chunk_index);
+    const auto& array =
+        altrep_data->chunked_array()->chunk(static_cast<int>(resolve.chunk_index));
     auto j = resolve.index_in_chunk;
 
     SEXP s = NA_STRING;
diff --git a/r/src/array.cpp b/r/src/array.cpp
index ae76c01a94910..38406e494d67b 100644
--- a/r/src/array.cpp
+++ b/r/src/array.cpp
@@ -92,7 +92,7 @@ std::shared_ptr<arrow::Array> Array__Slice2(const std::shared_ptr<arrow::Array>&
   return array->Slice(offset, length);
 }
 
-void arrow::r::validate_index(int i, int len) {
+void arrow::r::validate_index(int64_t i, int64_t len) {
   if (i == NA_INTEGER) {
     cpp11::stop("'i' cannot be NA");
   }
@@ -119,10 +119,14 @@ r_vec_size Array__length(const std::shared_ptr<arrow::Array>& x) {
 }
 
 // [[arrow::export]]
-int Array__offset(const std::shared_ptr<arrow::Array>& x) { return x->offset(); }
+r_vec_size Array__offset(const std::shared_ptr<arrow::Array>& x) {
+  return r_vec_size(x->offset());
+}
 
 // [[arrow::export]]
-int Array__null_count(const std::shared_ptr<arrow::Array>& x) { return x->null_count(); }
+r_vec_size Array__null_count(const std::shared_ptr<arrow::Array>& x) {
+  return r_vec_size(x->null_count());
+}
 
 // [[arrow::export]]
 std::shared_ptr<arrow::DataType> Array__type(const std::shared_ptr<arrow::Array>& x) {
@@ -263,9 +267,9 @@ r_vec_size LargeListArray__value_length(
 }
 
 // [[arrow::export]]
-r_vec_size FixedSizeListArray__value_length(
+int FixedSizeListArray__value_length(
     const std::shared_ptr<arrow::FixedSizeListArray>& array, int64_t i) {
-  return r_vec_size(array->value_length(i));
+  return array->value_length(i);
 }
 
 // [[arrow::export]]
@@ -294,10 +298,10 @@ cpp11::writable::integers ListArray__raw_value_offsets(
 }
 
 // [[arrow::export]]
-cpp11::writable::integers LargeListArray__raw_value_offsets(
+cpp11::writable::doubles LargeListArray__raw_value_offsets(
     const std::shared_ptr<arrow::LargeListArray>& array) {
   auto offsets = array->raw_value_offsets();
-  return cpp11::writable::integers(offsets, offsets + array->length());
+  return cpp11::writable::doubles(offsets, offsets + array->length());
 }
 
 // [[arrow::export]]
diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp
index bf026d2723a1a..2f0508eb7a47a 100644
--- a/r/src/array_to_vector.cpp
+++ b/r/src/array_to_vector.cpp
@@ -375,7 +375,7 @@ struct Converter_String : public Converter {
 
  private:
   static SEXP r_string_from_view(std::string_view view) {
-    return Rf_mkCharLenCE(view.data(), view.size(), CE_UTF8);
+    return Rf_mkCharLenCE(view.data(), static_cast<int>(view.size()), CE_UTF8);
   }
 
   static SEXP r_string_from_view_strip_nul(std::string_view view,
@@ -576,10 +576,10 @@ class Converter_Dictionary : public Converter {
       const auto& arr_type = checked_cast<const DictionaryType&>(*chunked_array->type());
       unifier_ = ValueOrStop(DictionaryUnifier::Make(arr_type.value_type()));
 
-      size_t n_arrays = chunked_array->num_chunks();
+      int n_arrays = chunked_array->num_chunks();
       arrays_transpose_.resize(n_arrays);
 
-      for (size_t i = 0; i < n_arrays; i++) {
+      for (int i = 0; i < n_arrays; i++) {
         const auto& dict_i =
             *checked_cast<const DictionaryArray&>(*chunked_array->chunk(i)).dictionary();
         StopIfNotOk(unifier_->Unify(dict_i, &arrays_transpose_[i]));
@@ -748,7 +748,7 @@ class Converter_Struct : public Converter {
     auto colnames = arrow::r::to_r_strings(
         type->fields(),
         [](const std::shared_ptr<Field>& field) { return field->name(); });
-    out.attr(symbols::row_names) = arrow::r::short_row_names(n);
+    out.attr(symbols::row_names) = arrow::r::short_row_names(static_cast<int>(n));
     out.attr(R_NamesSymbol) = colnames;
     out.attr(R_ClassSymbol) = arrow::r::data::classes_tbl_df;
 
@@ -756,7 +756,7 @@ class Converter_Struct : public Converter {
   }
 
   Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const {
-    int nf = converters.size();
+    int nf = static_cast<int>(converters.size());
     for (int i = 0; i < nf; i++) {
       SEXP data_i = VECTOR_ELT(data, i);
 
@@ -771,7 +771,7 @@ class Converter_Struct : public Converter {
   Status Ingest_some_nulls(SEXP data, const std::shared_ptr<arrow::Array>& array,
                            R_xlen_t start, R_xlen_t n, size_t chunk_index) const {
     auto struct_array = checked_cast<const arrow::StructArray*>(array.get());
-    int nf = converters.size();
+    int nf = static_cast<int>(converters.size());
     // Flatten() deals with merging of nulls
     auto arrays = ValueOrStop(struct_array->Flatten(gc_memory_pool()));
     for (int i = 0; i < nf; i++) {
@@ -1384,7 +1384,7 @@ cpp11::writable::list to_data_frame(const std::shared_ptr<Rectangle>& data,
 
   tbl.attr(R_NamesSymbol) = names;
   tbl.attr(R_ClassSymbol) = arrow::r::data::classes_tbl_df;
-  tbl.attr(R_RowNamesSymbol) = arrow::r::short_row_names(nr);
+  tbl.attr(R_RowNamesSymbol) = arrow::r::short_row_names(static_cast<int>(nr));
 
   return tbl;
 }
diff --git a/r/src/arraydata.cpp b/r/src/arraydata.cpp
index cdab38f1147aa..d879e807323af 100644
--- a/r/src/arraydata.cpp
+++ b/r/src/arraydata.cpp
@@ -26,18 +26,18 @@ std::shared_ptr<arrow::DataType> ArrayData__get_type(
 }
 
 // [[arrow::export]]
-int ArrayData__get_length(const std::shared_ptr<arrow::ArrayData>& x) {
-  return x->length;
+r_vec_size ArrayData__get_length(const std::shared_ptr<arrow::ArrayData>& x) {
+  return r_vec_size(x->length);
 }
 
 // [[arrow::export]]
-int ArrayData__get_null_count(const std::shared_ptr<arrow::ArrayData>& x) {
-  return x->null_count;
+r_vec_size ArrayData__get_null_count(const std::shared_ptr<arrow::ArrayData>& x) {
+  return r_vec_size(x->null_count);
 }
 
 // [[arrow::export]]
-int ArrayData__get_offset(const std::shared_ptr<arrow::ArrayData>& x) {
-  return x->offset;
+r_vec_size ArrayData__get_offset(const std::shared_ptr<arrow::ArrayData>& x) {
+  return r_vec_size(x->offset);
 }
 
 // [[arrow::export]]
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 790207efce1d2..75e0f27b4002e 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -110,7 +110,7 @@ BEGIN_CPP11
 END_CPP11
 }
 // array.cpp
-int Array__offset(const std::shared_ptr<arrow::Array>& x);
+r_vec_size Array__offset(const std::shared_ptr<arrow::Array>& x);
 extern "C" SEXP _arrow_Array__offset(SEXP x_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::Array>&>::type x(x_sexp);
@@ -118,7 +118,7 @@ BEGIN_CPP11
 END_CPP11
 }
 // array.cpp
-int Array__null_count(const std::shared_ptr<arrow::Array>& x);
+r_vec_size Array__null_count(const std::shared_ptr<arrow::Array>& x);
 extern "C" SEXP _arrow_Array__null_count(SEXP x_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::Array>&>::type x(x_sexp);
@@ -315,7 +315,7 @@ BEGIN_CPP11
 END_CPP11
 }
 // array.cpp
-r_vec_size FixedSizeListArray__value_length(const std::shared_ptr<arrow::FixedSizeListArray>& array, int64_t i);
+int FixedSizeListArray__value_length(const std::shared_ptr<arrow::FixedSizeListArray>& array, int64_t i);
 extern "C" SEXP _arrow_FixedSizeListArray__value_length(SEXP array_sexp, SEXP i_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::FixedSizeListArray>&>::type array(array_sexp);
@@ -359,7 +359,7 @@ BEGIN_CPP11
 END_CPP11
 }
 // array.cpp
-cpp11::writable::integers LargeListArray__raw_value_offsets(const std::shared_ptr<arrow::LargeListArray>& array);
+cpp11::writable::doubles LargeListArray__raw_value_offsets(const std::shared_ptr<arrow::LargeListArray>& array);
 extern "C" SEXP _arrow_LargeListArray__raw_value_offsets(SEXP array_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::LargeListArray>&>::type array(array_sexp);
@@ -467,7 +467,7 @@ BEGIN_CPP11
 END_CPP11
 }
 // arraydata.cpp
-int ArrayData__get_length(const std::shared_ptr<arrow::ArrayData>& x);
+r_vec_size ArrayData__get_length(const std::shared_ptr<arrow::ArrayData>& x);
 extern "C" SEXP _arrow_ArrayData__get_length(SEXP x_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::ArrayData>&>::type x(x_sexp);
@@ -475,7 +475,7 @@ BEGIN_CPP11
 END_CPP11
 }
 // arraydata.cpp
-int ArrayData__get_null_count(const std::shared_ptr<arrow::ArrayData>& x);
+r_vec_size ArrayData__get_null_count(const std::shared_ptr<arrow::ArrayData>& x);
 extern "C" SEXP _arrow_ArrayData__get_null_count(SEXP x_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::ArrayData>&>::type x(x_sexp);
@@ -483,7 +483,7 @@ BEGIN_CPP11
 END_CPP11
 }
 // arraydata.cpp
-int ArrayData__get_offset(const std::shared_ptr<arrow::ArrayData>& x);
+r_vec_size ArrayData__get_offset(const std::shared_ptr<arrow::ArrayData>& x);
 extern "C" SEXP _arrow_ArrayData__get_offset(SEXP x_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::ArrayData>&>::type x(x_sexp);
@@ -765,7 +765,7 @@ BEGIN_CPP11
 END_CPP11
 }
 // chunkedarray.cpp
-r_vec_size ChunkedArray__num_chunks(const std::shared_ptr<arrow::ChunkedArray>& chunked_array);
+int ChunkedArray__num_chunks(const std::shared_ptr<arrow::ChunkedArray>& chunked_array);
 extern "C" SEXP _arrow_ChunkedArray__num_chunks(SEXP chunked_array_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::ChunkedArray>&>::type chunked_array(chunked_array_sexp);
@@ -869,11 +869,11 @@ BEGIN_CPP11
 END_CPP11
 }
 // compression.cpp
-std::shared_ptr<arrow::util::Codec> util___Codec__Create(arrow::Compression::type codec, R_xlen_t compression_level);
+std::shared_ptr<arrow::util::Codec> util___Codec__Create(arrow::Compression::type codec, int compression_level);
 extern "C" SEXP _arrow_util___Codec__Create(SEXP codec_sexp, SEXP compression_level_sexp){
 BEGIN_CPP11
 	arrow::r::Input<arrow::Compression::type>::type codec(codec_sexp);
-	arrow::r::Input<R_xlen_t>::type compression_level(compression_level_sexp);
+	arrow::r::Input<int>::type compression_level(compression_level_sexp);
 	return cpp11::as_sexp(util___Codec__Create(codec, compression_level));
 END_CPP11
 }
@@ -2024,14 +2024,14 @@ extern "C" SEXP _arrow_dataset___JsonFragmentScanOptions__Make(SEXP parse_option
 
 // dataset.cpp
 #if defined(ARROW_R_WITH_DATASET)
-std::shared_ptr<ds::ParquetFragmentScanOptions> dataset___ParquetFragmentScanOptions__Make(bool use_buffered_stream, int64_t buffer_size, bool pre_buffer, int64_t thrift_string_size_limit, int64_t thrift_container_size_limit);
+std::shared_ptr<ds::ParquetFragmentScanOptions> dataset___ParquetFragmentScanOptions__Make(bool use_buffered_stream, int64_t buffer_size, bool pre_buffer, int32_t thrift_string_size_limit, int32_t thrift_container_size_limit);
 extern "C" SEXP _arrow_dataset___ParquetFragmentScanOptions__Make(SEXP use_buffered_stream_sexp, SEXP buffer_size_sexp, SEXP pre_buffer_sexp, SEXP thrift_string_size_limit_sexp, SEXP thrift_container_size_limit_sexp){
 BEGIN_CPP11
 	arrow::r::Input<bool>::type use_buffered_stream(use_buffered_stream_sexp);
 	arrow::r::Input<int64_t>::type buffer_size(buffer_size_sexp);
 	arrow::r::Input<bool>::type pre_buffer(pre_buffer_sexp);
-	arrow::r::Input<int64_t>::type thrift_string_size_limit(thrift_string_size_limit_sexp);
-	arrow::r::Input<int64_t>::type thrift_container_size_limit(thrift_container_size_limit_sexp);
+	arrow::r::Input<int32_t>::type thrift_string_size_limit(thrift_string_size_limit_sexp);
+	arrow::r::Input<int32_t>::type thrift_container_size_limit(thrift_container_size_limit_sexp);
 	return cpp11::as_sexp(dataset___ParquetFragmentScanOptions__Make(use_buffered_stream, buffer_size, pre_buffer, thrift_string_size_limit, thrift_container_size_limit));
 END_CPP11
 }
@@ -2567,10 +2567,10 @@ BEGIN_CPP11
 END_CPP11
 }
 // datatype.cpp
-std::shared_ptr<arrow::DataType> FixedSizeBinary__initialize(R_xlen_t byte_width);
+std::shared_ptr<arrow::DataType> FixedSizeBinary__initialize(int32_t byte_width);
 extern "C" SEXP _arrow_FixedSizeBinary__initialize(SEXP byte_width_sexp){
 BEGIN_CPP11
-	arrow::r::Input<R_xlen_t>::type byte_width(byte_width_sexp);
+	arrow::r::Input<int32_t>::type byte_width(byte_width_sexp);
 	return cpp11::as_sexp(FixedSizeBinary__initialize(byte_width));
 END_CPP11
 }
@@ -3976,7 +3976,7 @@ BEGIN_CPP11
 END_CPP11
 }
 // message.cpp
-r_vec_size ipc___Message__Verify(const std::unique_ptr<arrow::ipc::Message>& message);
+bool ipc___Message__Verify(const std::unique_ptr<arrow::ipc::Message>& message);
 extern "C" SEXP _arrow_ipc___Message__Verify(SEXP message_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::unique_ptr<arrow::ipc::Message>&>::type message(message_sexp);
@@ -4684,7 +4684,7 @@ BEGIN_CPP11
 END_CPP11
 }
 // recordbatch.cpp
-r_vec_size RecordBatch__num_columns(const std::shared_ptr<arrow::RecordBatch>& x);
+int RecordBatch__num_columns(const std::shared_ptr<arrow::RecordBatch>& x);
 extern "C" SEXP _arrow_RecordBatch__num_columns(SEXP x_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::RecordBatch>&>::type x(x_sexp);
@@ -4734,11 +4734,11 @@ BEGIN_CPP11
 END_CPP11
 }
 // recordbatch.cpp
-std::shared_ptr<arrow::Array> RecordBatch__column(const std::shared_ptr<arrow::RecordBatch>& batch, R_xlen_t i);
+std::shared_ptr<arrow::Array> RecordBatch__column(const std::shared_ptr<arrow::RecordBatch>& batch, int i);
 extern "C" SEXP _arrow_RecordBatch__column(SEXP batch_sexp, SEXP i_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::RecordBatch>&>::type batch(batch_sexp);
-	arrow::r::Input<R_xlen_t>::type i(i_sexp);
+	arrow::r::Input<int>::type i(i_sexp);
 	return cpp11::as_sexp(RecordBatch__column(batch, i));
 END_CPP11
 }
@@ -4771,42 +4771,42 @@ BEGIN_CPP11
 END_CPP11
 }
 // recordbatch.cpp
-std::shared_ptr<arrow::RecordBatch> RecordBatch__AddColumn(const std::shared_ptr<arrow::RecordBatch>& batch, R_xlen_t i, const std::shared_ptr<arrow::Field>& field, const std::shared_ptr<arrow::Array>& column);
+std::shared_ptr<arrow::RecordBatch> RecordBatch__AddColumn(const std::shared_ptr<arrow::RecordBatch>& batch, int i, const std::shared_ptr<arrow::Field>& field, const std::shared_ptr<arrow::Array>& column);
 extern "C" SEXP _arrow_RecordBatch__AddColumn(SEXP batch_sexp, SEXP i_sexp, SEXP field_sexp, SEXP column_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::RecordBatch>&>::type batch(batch_sexp);
-	arrow::r::Input<R_xlen_t>::type i(i_sexp);
+	arrow::r::Input<int>::type i(i_sexp);
 	arrow::r::Input<const std::shared_ptr<arrow::Field>&>::type field(field_sexp);
 	arrow::r::Input<const std::shared_ptr<arrow::Array>&>::type column(column_sexp);
 	return cpp11::as_sexp(RecordBatch__AddColumn(batch, i, field, column));
 END_CPP11
 }
 // recordbatch.cpp
-std::shared_ptr<arrow::RecordBatch> RecordBatch__SetColumn(const std::shared_ptr<arrow::RecordBatch>& batch, R_xlen_t i, const std::shared_ptr<arrow::Field>& field, const std::shared_ptr<arrow::Array>& column);
+std::shared_ptr<arrow::RecordBatch> RecordBatch__SetColumn(const std::shared_ptr<arrow::RecordBatch>& batch, int i, const std::shared_ptr<arrow::Field>& field, const std::shared_ptr<arrow::Array>& column);
 extern "C" SEXP _arrow_RecordBatch__SetColumn(SEXP batch_sexp, SEXP i_sexp, SEXP field_sexp, SEXP column_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::RecordBatch>&>::type batch(batch_sexp);
-	arrow::r::Input<R_xlen_t>::type i(i_sexp);
+	arrow::r::Input<int>::type i(i_sexp);
 	arrow::r::Input<const std::shared_ptr<arrow::Field>&>::type field(field_sexp);
 	arrow::r::Input<const std::shared_ptr<arrow::Array>&>::type column(column_sexp);
 	return cpp11::as_sexp(RecordBatch__SetColumn(batch, i, field, column));
 END_CPP11
 }
 // recordbatch.cpp
-std::shared_ptr<arrow::RecordBatch> RecordBatch__RemoveColumn(const std::shared_ptr<arrow::RecordBatch>& batch, R_xlen_t i);
+std::shared_ptr<arrow::RecordBatch> RecordBatch__RemoveColumn(const std::shared_ptr<arrow::RecordBatch>& batch, int i);
 extern "C" SEXP _arrow_RecordBatch__RemoveColumn(SEXP batch_sexp, SEXP i_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::RecordBatch>&>::type batch(batch_sexp);
-	arrow::r::Input<R_xlen_t>::type i(i_sexp);
+	arrow::r::Input<int>::type i(i_sexp);
 	return cpp11::as_sexp(RecordBatch__RemoveColumn(batch, i));
 END_CPP11
 }
 // recordbatch.cpp
-std::string RecordBatch__column_name(const std::shared_ptr<arrow::RecordBatch>& batch, R_xlen_t i);
+std::string RecordBatch__column_name(const std::shared_ptr<arrow::RecordBatch>& batch, int i);
 extern "C" SEXP _arrow_RecordBatch__column_name(SEXP batch_sexp, SEXP i_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::RecordBatch>&>::type batch(batch_sexp);
-	arrow::r::Input<R_xlen_t>::type i(i_sexp);
+	arrow::r::Input<int>::type i(i_sexp);
 	return cpp11::as_sexp(RecordBatch__column_name(batch, i));
 END_CPP11
 }
@@ -5346,7 +5346,7 @@ BEGIN_CPP11
 END_CPP11
 }
 // table.cpp
-r_vec_size Table__num_columns(const std::shared_ptr<arrow::Table>& x);
+int Table__num_columns(const std::shared_ptr<arrow::Table>& x);
 extern "C" SEXP _arrow_Table__num_columns(SEXP x_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::Table>&>::type x(x_sexp);
@@ -5379,20 +5379,20 @@ BEGIN_CPP11
 END_CPP11
 }
 // table.cpp
-std::shared_ptr<arrow::ChunkedArray> Table__column(const std::shared_ptr<arrow::Table>& table, R_xlen_t i);
+std::shared_ptr<arrow::ChunkedArray> Table__column(const std::shared_ptr<arrow::Table>& table, int i);
 extern "C" SEXP _arrow_Table__column(SEXP table_sexp, SEXP i_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::Table>&>::type table(table_sexp);
-	arrow::r::Input<R_xlen_t>::type i(i_sexp);
+	arrow::r::Input<int>::type i(i_sexp);
 	return cpp11::as_sexp(Table__column(table, i));
 END_CPP11
 }
 // table.cpp
-std::shared_ptr<arrow::Field> Table__field(const std::shared_ptr<arrow::Table>& table, R_xlen_t i);
+std::shared_ptr<arrow::Field> Table__field(const std::shared_ptr<arrow::Table>& table, int i);
 extern "C" SEXP _arrow_Table__field(SEXP table_sexp, SEXP i_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::Table>&>::type table(table_sexp);
-	arrow::r::Input<R_xlen_t>::type i(i_sexp);
+	arrow::r::Input<int>::type i(i_sexp);
 	return cpp11::as_sexp(Table__field(table, i));
 END_CPP11
 }
@@ -5476,31 +5476,31 @@ BEGIN_CPP11
 END_CPP11
 }
 // table.cpp
-std::shared_ptr<arrow::Table> Table__RemoveColumn(const std::shared_ptr<arrow::Table>& table, R_xlen_t i);
+std::shared_ptr<arrow::Table> Table__RemoveColumn(const std::shared_ptr<arrow::Table>& table, int i);
 extern "C" SEXP _arrow_Table__RemoveColumn(SEXP table_sexp, SEXP i_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::Table>&>::type table(table_sexp);
-	arrow::r::Input<R_xlen_t>::type i(i_sexp);
+	arrow::r::Input<int>::type i(i_sexp);
 	return cpp11::as_sexp(Table__RemoveColumn(table, i));
 END_CPP11
 }
 // table.cpp
-std::shared_ptr<arrow::Table> Table__AddColumn(const std::shared_ptr<arrow::Table>& table, R_xlen_t i, const std::shared_ptr<arrow::Field>& field, const std::shared_ptr<arrow::ChunkedArray>& column);
+std::shared_ptr<arrow::Table> Table__AddColumn(const std::shared_ptr<arrow::Table>& table, int i, const std::shared_ptr<arrow::Field>& field, const std::shared_ptr<arrow::ChunkedArray>& column);
 extern "C" SEXP _arrow_Table__AddColumn(SEXP table_sexp, SEXP i_sexp, SEXP field_sexp, SEXP column_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::Table>&>::type table(table_sexp);
-	arrow::r::Input<R_xlen_t>::type i(i_sexp);
+	arrow::r::Input<int>::type i(i_sexp);
 	arrow::r::Input<const std::shared_ptr<arrow::Field>&>::type field(field_sexp);
 	arrow::r::Input<const std::shared_ptr<arrow::ChunkedArray>&>::type column(column_sexp);
 	return cpp11::as_sexp(Table__AddColumn(table, i, field, column));
 END_CPP11
 }
 // table.cpp
-std::shared_ptr<arrow::Table> Table__SetColumn(const std::shared_ptr<arrow::Table>& table, R_xlen_t i, const std::shared_ptr<arrow::Field>& field, const std::shared_ptr<arrow::ChunkedArray>& column);
+std::shared_ptr<arrow::Table> Table__SetColumn(const std::shared_ptr<arrow::Table>& table, int i, const std::shared_ptr<arrow::Field>& field, const std::shared_ptr<arrow::ChunkedArray>& column);
 extern "C" SEXP _arrow_Table__SetColumn(SEXP table_sexp, SEXP i_sexp, SEXP field_sexp, SEXP column_sexp){
 BEGIN_CPP11
 	arrow::r::Input<const std::shared_ptr<arrow::Table>&>::type table(table_sexp);
-	arrow::r::Input<R_xlen_t>::type i(i_sexp);
+	arrow::r::Input<int>::type i(i_sexp);
 	arrow::r::Input<const std::shared_ptr<arrow::Field>&>::type field(field_sexp);
 	arrow::r::Input<const std::shared_ptr<arrow::ChunkedArray>&>::type column(column_sexp);
 	return cpp11::as_sexp(Table__SetColumn(table, i, field, column));
diff --git a/r/src/arrow_cpp11.h b/r/src/arrow_cpp11.h
index d8c4b719d1d3e..ab60586628164 100644
--- a/r/src/arrow_cpp11.h
+++ b/r/src/arrow_cpp11.h
@@ -27,6 +27,18 @@
 
 #include "./nameof.h"
 
+// Simple dcheck that doesn't use assert (i.e., won't crash the R session)
+// Condition this on our own debug flag to avoid this ending up in any CRAN
+// checks.
+#if defined(ARROW_R_DEBUG)
+#define ARROW_R_DCHECK(EXPR)                                              \
+  do {                                                                    \
+    if (!(EXPR)) Rf_error("Failed DCHECK: %s evaluated to false", #EXPR); \
+  } while (false)
+#else
+#define ARROW_R_DCHECK(EXPR)
+#endif
+
 // borrowed from enc package
 // because R does not make these macros available (i.e. from Defn.h)
 #define UTF8_MASK (1 << 3)
@@ -465,7 +477,7 @@ inline SEXP as_sexp(r_vec_size size) {
   if (x > std::numeric_limits<int>::max()) {
     return Rf_ScalarReal(x);
   } else {
-    return Rf_ScalarInteger(x);
+    return Rf_ScalarInteger(static_cast<int>(x));
   }
 }
 
diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h
index fadc39c75fc06..05c8f6062dabb 100644
--- a/r/src/arrow_types.h
+++ b/r/src/arrow_types.h
@@ -189,13 +189,13 @@ void validate_slice_offset(R_xlen_t offset, int64_t len);
 
 void validate_slice_length(R_xlen_t length, int64_t available);
 
-void validate_index(int i, int len);
+void validate_index(int64_t i, int64_t len);
 
 template <typename Lambda>
 void TraverseDots(cpp11::list dots, int num_fields, Lambda lambda) {
   cpp11::strings names(dots.attr(R_NamesSymbol));
 
-  for (R_xlen_t i = 0, j = 0; j < num_fields; i++) {
+  for (int i = 0, j = 0; j < num_fields; i++) {
     auto name_i = names[i];
 
     if (name_i.size() == 0) {
diff --git a/r/src/chunkedarray.cpp b/r/src/chunkedarray.cpp
index 36884bb531b62..258013fc4da57 100644
--- a/r/src/chunkedarray.cpp
+++ b/r/src/chunkedarray.cpp
@@ -34,9 +34,8 @@ r_vec_size ChunkedArray__null_count(
 }
 
 // [[arrow::export]]
-r_vec_size ChunkedArray__num_chunks(
-    const std::shared_ptr<arrow::ChunkedArray>& chunked_array) {
-  return r_vec_size(chunked_array->num_chunks());
+int ChunkedArray__num_chunks(const std::shared_ptr<arrow::ChunkedArray>& chunked_array) {
+  return chunked_array->num_chunks();
 }
 
 // [[arrow::export]]
diff --git a/r/src/compression.cpp b/r/src/compression.cpp
index 148c6e14002f5..bc893afd8d28b 100644
--- a/r/src/compression.cpp
+++ b/r/src/compression.cpp
@@ -22,7 +22,7 @@
 
 // [[arrow::export]]
 std::shared_ptr<arrow::util::Codec> util___Codec__Create(arrow::Compression::type codec,
-                                                         R_xlen_t compression_level) {
+                                                         int compression_level) {
   return ValueOrStop(arrow::util::Codec::Create(codec, compression_level));
 }
 
diff --git a/r/src/compute.cpp b/r/src/compute.cpp
index 87d1326ed3419..bd97e30005ca3 100644
--- a/r/src/compute.cpp
+++ b/r/src/compute.cpp
@@ -241,10 +241,10 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
               interpolation);
     }
     if (!Rf_isNull(options["min_count"])) {
-      out->min_count = cpp11::as_cpp<int64_t>(options["min_count"]);
+      out->min_count = cpp11::as_cpp<uint32_t>(options["min_count"]);
     }
     if (!Rf_isNull(options["skip_nulls"])) {
-      out->skip_nulls = cpp11::as_cpp<int64_t>(options["skip_nulls"]);
+      out->skip_nulls = cpp11::as_cpp<bool>(options["skip_nulls"]);
     }
     return out;
   }
@@ -479,9 +479,9 @@ std::shared_ptr<arrow::compute::FunctionOptions> make_compute_options(
       func_name == "hash_stddev") {
     using Options = arrow::compute::VarianceOptions;
     auto out = std::make_shared<Options>();
-    out->ddof = cpp11::as_cpp<int64_t>(options["ddof"]);
+    out->ddof = cpp11::as_cpp<int>(options["ddof"]);
     if (!Rf_isNull(options["min_count"])) {
-      out->min_count = cpp11::as_cpp<int64_t>(options["min_count"]);
+      out->min_count = cpp11::as_cpp<uint32_t>(options["min_count"]);
     }
     if (!Rf_isNull(options["skip_nulls"])) {
       out->skip_nulls = cpp11::as_cpp<bool>(options["skip_nulls"]);
@@ -683,7 +683,7 @@ arrow::Status CallRScalarUDF(arrow::compute::KernelContext* context,
           }
         }
 
-        cpp11::sexp batch_length_sexp = cpp11::as_sexp(span.length);
+        cpp11::sexp batch_length_sexp = cpp11::as_sexp(static_cast<double>(span.length));
 
         std::shared_ptr<arrow::DataType> output_type = result->type()->GetSharedPtr();
         cpp11::sexp output_type_sexp = cpp11::to_r6<arrow::DataType>(output_type);
@@ -738,8 +738,7 @@ void RegisterScalarUDF(std::string name, cpp11::list func_sexp) {
 
   // Compute the Arity from the list of input kernels. We don't currently handle
   // variable numbers of arguments in a user-defined function.
-  int64_t n_args =
-      cpp11::as_cpp<std::shared_ptr<arrow::Schema>>(in_type_r[0])->num_fields();
+  int n_args = cpp11::as_cpp<std::shared_ptr<arrow::Schema>>(in_type_r[0])->num_fields();
   for (R_xlen_t i = 1; i < n_kernels; i++) {
     auto in_types = cpp11::as_cpp<std::shared_ptr<arrow::Schema>>(in_type_r[i]);
     if (in_types->num_fields() != n_args) {
@@ -767,7 +766,7 @@ void RegisterScalarUDF(std::string name, cpp11::list func_sexp) {
     cpp11::sexp out_type_func = out_type_r[i];
 
     std::vector<arrow::compute::InputType> compute_in_types(in_types->num_fields());
-    for (int64_t j = 0; j < in_types->num_fields(); j++) {
+    for (int j = 0; j < in_types->num_fields(); j++) {
       compute_in_types[j] = arrow::compute::InputType(in_types->field(j)->type());
     }
 
diff --git a/r/src/dataset.cpp b/r/src/dataset.cpp
index 83c430fb634d3..e53fc03bdb413 100644
--- a/r/src/dataset.cpp
+++ b/r/src/dataset.cpp
@@ -343,8 +343,8 @@ std::shared_ptr<ds::JsonFragmentScanOptions> dataset___JsonFragmentScanOptions__
 std::shared_ptr<ds::ParquetFragmentScanOptions>
 dataset___ParquetFragmentScanOptions__Make(bool use_buffered_stream, int64_t buffer_size,
                                            bool pre_buffer,
-                                           int64_t thrift_string_size_limit,
-                                           int64_t thrift_container_size_limit) {
+                                           int32_t thrift_string_size_limit,
+                                           int32_t thrift_container_size_limit) {
   auto options = std::make_shared<ds::ParquetFragmentScanOptions>();
   if (use_buffered_stream) {
     options->reader_properties->enable_buffered_stream();
diff --git a/r/src/datatype.cpp b/r/src/datatype.cpp
index f19ba92527157..2f2b89d658d91 100644
--- a/r/src/datatype.cpp
+++ b/r/src/datatype.cpp
@@ -201,7 +201,7 @@ std::shared_ptr<arrow::DataType> DayTimeInterval__initialize() {
 }
 
 // [[arrow::export]]
-std::shared_ptr<arrow::DataType> FixedSizeBinary__initialize(R_xlen_t byte_width) {
+std::shared_ptr<arrow::DataType> FixedSizeBinary__initialize(int32_t byte_width) {
   if (byte_width == NA_INTEGER) {
     cpp11::stop("'byte_width' cannot be NA");
   }
diff --git a/r/src/io.cpp b/r/src/io.cpp
index 321b1b17febc3..4d5ee31794ae8 100644
--- a/r/src/io.cpp
+++ b/r/src/io.cpp
@@ -253,11 +253,16 @@ class RConnectionFileInterface : public virtual arrow::io::FileInterface {
       return arrow::Status::IOError("R connection is closed");
     }
 
+    if (nbytes > std::numeric_limits<int>::max()) {
+      return arrow::Status::Invalid(
+          "Can't read more than INT_MAX bytes from an R connection");
+    }
+
     return SafeCallIntoR<int64_t>(
         [&] {
           cpp11::function read_bin = cpp11::package("base")["readBin"];
           cpp11::writable::raws ptype((R_xlen_t)0);
-          cpp11::integers n = cpp11::as_sexp<int>(nbytes);
+          cpp11::integers n = cpp11::as_sexp<int>(static_cast<int>(nbytes));
 
           cpp11::sexp result = read_bin(connection_sexp_, ptype, n);
 
@@ -512,8 +517,8 @@ struct ReencodeUTF8TransformFunctionWrapper {
     // UTF-16, and UTF-32.
     while (in_bytes_left > 0) {
       // Make enough place in the output to hopefully consume all of the input.
-      RETURN_NOT_OK(
-          builder.Reserve(std::max<int64_t>(in_bytes_left * kOversizeFactor, 4)));
+      RETURN_NOT_OK(builder.Reserve(
+          std::max<int64_t>(static_cast<int64_t>(in_bytes_left * kOversizeFactor), 4)));
       out_buf = builder.mutable_data() + builder.length();
       out_bytes_left = builder.capacity() - builder.length();
 
diff --git a/r/src/message.cpp b/r/src/message.cpp
index d9832ddc22a74..3f21873fea3b2 100644
--- a/r/src/message.cpp
+++ b/r/src/message.cpp
@@ -39,8 +39,8 @@ std::shared_ptr<arrow::Buffer> ipc___Message__body(
 }
 
 // [[arrow::export]]
-r_vec_size ipc___Message__Verify(const std::unique_ptr<arrow::ipc::Message>& message) {
-  return r_vec_size(message->Verify());
+bool ipc___Message__Verify(const std::unique_ptr<arrow::ipc::Message>& message) {
+  return message->Verify();
 }
 
 // [[arrow::export]]
diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp
index d9bf848e24292..d2db11e14a787 100644
--- a/r/src/r_to_arrow.cpp
+++ b/r/src/r_to_arrow.cpp
@@ -335,7 +335,7 @@ struct RConvert {
   template <typename Type, typename From>
   static enable_if_integer<Type, Result<typename Type::c_type>> Convert(Type*,
                                                                         From from) {
-    return CIntFromRScalarImpl<typename Type::c_type>(from);
+    return CIntFromRScalarImpl<typename Type::c_type>(static_cast<int64_t>(from));
   }
 
   // ---- convert R integer types to double
@@ -461,7 +461,7 @@ class RPrimitiveConverter<
 
     if (std::is_same<typename T::c_type, r_value_type>::value) {
       auto append_value = [this](r_value_type value) {
-        this->primitive_builder_->UnsafeAppend(value);
+        this->primitive_builder_->UnsafeAppend(static_cast<typename T::c_type>(value));
         return Status::OK();
       };
       return VisitVector(it, size, append_null, append_value);
@@ -595,19 +595,21 @@ class RPrimitiveConverter<T, enable_if_t<is_date_type<T>::value>>
     return VisitVector(it, size, append_null, append_value);
   }
 
-  static int FromRDate(const Date32Type*, int from) { return from; }
+  static int FromRDate(const Date32Type*, double from) { return static_cast<int>(from); }
 
-  static int64_t FromRDate(const Date64Type*, int from) {
+  static int64_t FromRDate(const Date64Type*, double from) {
     constexpr int64_t kMilliSecondsPerDay = 86400000;
-    return from * kMilliSecondsPerDay;
+    return static_cast<int64_t>(from * kMilliSecondsPerDay);
   }
 
   static int FromPosixct(const Date32Type*, double from) {
     constexpr int64_t kSecondsPerDay = 86400;
-    return from / kSecondsPerDay;
+    return static_cast<int>(from / kSecondsPerDay);
   }
 
-  static int64_t FromPosixct(const Date64Type*, double from) { return from * 1000; }
+  static int64_t FromPosixct(const Date64Type*, double from) {
+    return static_cast<int64_t>(from * 1000);
+  }
 };
 
 int64_t get_TimeUnit_multiplier(TimeUnit::type unit) {
@@ -1081,7 +1083,7 @@ class RListConverter : public ListConverter<T, RConverter, RConverterTrait> {
     auto append_value = [this](SEXP value) {
       // TODO: if we decide that this can be run concurrently
       //       we'll have to do vec_size() upfront
-      int n = arrow::r::vec_size(value);
+      R_xlen_t n = arrow::r::vec_size(value);
 
       RETURN_NOT_OK(this->list_builder_->ValidateOverflow(n));
       RETURN_NOT_OK(this->list_builder_->Append());
diff --git a/r/src/recordbatch.cpp b/r/src/recordbatch.cpp
index aca3a74fd81df..bf88e98ed1026 100644
--- a/r/src/recordbatch.cpp
+++ b/r/src/recordbatch.cpp
@@ -27,8 +27,8 @@
 #include <arrow/util/key_value_metadata.h>
 
 // [[arrow::export]]
-r_vec_size RecordBatch__num_columns(const std::shared_ptr<arrow::RecordBatch>& x) {
-  return r_vec_size(x->num_columns());
+int RecordBatch__num_columns(const std::shared_ptr<arrow::RecordBatch>& x) {
+  return x->num_columns();
 }
 
 // [[arrow::export]]
@@ -80,7 +80,7 @@ cpp11::list RecordBatch__columns(const std::shared_ptr<arrow::RecordBatch>& batc
 
 // [[arrow::export]]
 std::shared_ptr<arrow::Array> RecordBatch__column(
-    const std::shared_ptr<arrow::RecordBatch>& batch, R_xlen_t i) {
+    const std::shared_ptr<arrow::RecordBatch>& batch, int i) {
   arrow::r::validate_index(i, batch->num_columns());
   return batch->column(i);
 }
@@ -106,7 +106,7 @@ bool RecordBatch__Equals(const std::shared_ptr<arrow::RecordBatch>& self,
 
 // [[arrow::export]]
 std::shared_ptr<arrow::RecordBatch> RecordBatch__AddColumn(
-    const std::shared_ptr<arrow::RecordBatch>& batch, R_xlen_t i,
+    const std::shared_ptr<arrow::RecordBatch>& batch, int i,
     const std::shared_ptr<arrow::Field>& field,
     const std::shared_ptr<arrow::Array>& column) {
   return ValueOrStop(batch->AddColumn(i, field, column));
@@ -114,7 +114,7 @@ std::shared_ptr<arrow::RecordBatch> RecordBatch__AddColumn(
 
 // [[arrow::export]]
 std::shared_ptr<arrow::RecordBatch> RecordBatch__SetColumn(
-    const std::shared_ptr<arrow::RecordBatch>& batch, R_xlen_t i,
+    const std::shared_ptr<arrow::RecordBatch>& batch, int i,
     const std::shared_ptr<arrow::Field>& field,
     const std::shared_ptr<arrow::Array>& column) {
   return ValueOrStop(batch->SetColumn(i, field, column));
@@ -122,14 +122,14 @@ std::shared_ptr<arrow::RecordBatch> RecordBatch__SetColumn(
 
 // [[arrow::export]]
 std::shared_ptr<arrow::RecordBatch> RecordBatch__RemoveColumn(
-    const std::shared_ptr<arrow::RecordBatch>& batch, R_xlen_t i) {
+    const std::shared_ptr<arrow::RecordBatch>& batch, int i) {
   arrow::r::validate_index(i, batch->num_columns());
   return ValueOrStop(batch->RemoveColumn(i));
 }
 
 // [[arrow::export]]
 std::string RecordBatch__column_name(const std::shared_ptr<arrow::RecordBatch>& batch,
-                                     R_xlen_t i) {
+                                     int i) {
   arrow::r::validate_index(i, batch->num_columns());
   return batch->column_name(i);
 }
diff --git a/r/src/schema.cpp b/r/src/schema.cpp
index cf959707305a7..41d3d38d2eda3 100644
--- a/r/src/schema.cpp
+++ b/r/src/schema.cpp
@@ -29,14 +29,14 @@ std::shared_ptr<arrow::Schema> Schema__from_fields(
 
 // [[arrow::export]]
 std::shared_ptr<arrow::Schema> Schema__from_list(cpp11::list field_list) {
-  int n = field_list.size();
+  R_xlen_t n = field_list.size();
 
   bool nullable = true;
   cpp11::strings names(field_list.attr(R_NamesSymbol));
 
   std::vector<std::shared_ptr<arrow::Field>> fields(n);
 
-  for (int i = 0; i < n; i++) {
+  for (R_xlen_t i = 0; i < n; i++) {
     fields[i] = arrow::field(
         names[i], cpp11::as_cpp<std::shared_ptr<arrow::DataType>>(field_list[i]),
         nullable);
diff --git a/r/src/table.cpp b/r/src/table.cpp
index 04537000f5d48..04a8c7caf24fd 100644
--- a/r/src/table.cpp
+++ b/r/src/table.cpp
@@ -23,8 +23,8 @@
 #include <arrow/util/key_value_metadata.h>
 
 // [[arrow::export]]
-r_vec_size Table__num_columns(const std::shared_ptr<arrow::Table>& x) {
-  return r_vec_size(x->num_columns());
+int Table__num_columns(const std::shared_ptr<arrow::Table>& x) {
+  return x->num_columns();
 }
 
 // [[arrow::export]]
@@ -49,14 +49,14 @@ std::shared_ptr<arrow::Table> Table__ReplaceSchemaMetadata(
 
 // [[arrow::export]]
 std::shared_ptr<arrow::ChunkedArray> Table__column(
-    const std::shared_ptr<arrow::Table>& table, R_xlen_t i) {
+    const std::shared_ptr<arrow::Table>& table, int i) {
   arrow::r::validate_index(i, table->num_columns());
   return table->column(i);
 }
 
 // [[arrow::export]]
 std::shared_ptr<arrow::Field> Table__field(const std::shared_ptr<arrow::Table>& table,
-                                           R_xlen_t i) {
+                                           int i) {
   arrow::r::validate_index(i, table->num_columns());
   return table->field(i);
 }
@@ -123,13 +123,13 @@ std::shared_ptr<arrow::ChunkedArray> Table__GetColumnByName(
 
 // [[arrow::export]]
 std::shared_ptr<arrow::Table> Table__RemoveColumn(
-    const std::shared_ptr<arrow::Table>& table, R_xlen_t i) {
+    const std::shared_ptr<arrow::Table>& table, int i) {
   return ValueOrStop(table->RemoveColumn(i));
 }
 
 // [[arrow::export]]
 std::shared_ptr<arrow::Table> Table__AddColumn(
-    const std::shared_ptr<arrow::Table>& table, R_xlen_t i,
+    const std::shared_ptr<arrow::Table>& table, int i,
     const std::shared_ptr<arrow::Field>& field,
     const std::shared_ptr<arrow::ChunkedArray>& column) {
   return ValueOrStop(table->AddColumn(i, field, column));
@@ -137,7 +137,7 @@ std::shared_ptr<arrow::Table> Table__AddColumn(
 
 // [[arrow::export]]
 std::shared_ptr<arrow::Table> Table__SetColumn(
-    const std::shared_ptr<arrow::Table>& table, R_xlen_t i,
+    const std::shared_ptr<arrow::Table>& table, int i,
     const std::shared_ptr<arrow::Field>& field,
     const std::shared_ptr<arrow::ChunkedArray>& column) {
   return ValueOrStop(table->SetColumn(i, field, column));
@@ -241,7 +241,7 @@ arrow::Status AddMetadataFromDots(SEXP lst, int num_fields,
 
   // Remove metadata for ExtensionType columns, because these have their own mechanism for
   // preserving R type information
-  for (R_xlen_t i = 0; i < schema->num_fields(); i++) {
+  for (int i = 0; i < schema->num_fields(); i++) {
     if (schema->field(i)->type()->id() == Type::EXTENSION) {
       metadata_columns[i] = R_NilValue;
     }