diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index 03d6025b4ef93..d9856eb695652 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -194,7 +194,7 @@ documentation. ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ``to_numpy`` for NumPy nullable and Arrow types will now convert to a -suitable NumPy dtype instead of ``object`` dtype for nullable extension dtypes. +suitable NumPy dtype instead of ``object`` dtype for nullable and PyArrow backed extension dtypes. *Old behavior:* diff --git a/pandas/core/arrays/_utils.py b/pandas/core/arrays/_utils.py new file mode 100644 index 0000000000000..c75ec7f843ed2 --- /dev/null +++ b/pandas/core/arrays/_utils.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Any, +) + +import numpy as np + +from pandas._libs import lib +from pandas.errors import LossySetitemError + +from pandas.core.dtypes.cast import np_can_hold_element +from pandas.core.dtypes.common import is_numeric_dtype + +if TYPE_CHECKING: + from pandas._typing import ( + ArrayLike, + npt, + ) + + +def to_numpy_dtype_inference( + arr: ArrayLike, dtype: npt.DTypeLike | None, na_value, hasna: bool +) -> tuple[npt.DTypeLike, Any]: + if dtype is None and is_numeric_dtype(arr.dtype): + dtype_given = False + if hasna: + if arr.dtype.kind == "b": + dtype = np.dtype(np.object_) + else: + if arr.dtype.kind in "iu": + dtype = np.dtype(np.float64) + else: + dtype = arr.dtype.numpy_dtype # type: ignore[union-attr] + if na_value is lib.no_default: + na_value = np.nan + else: + dtype = arr.dtype.numpy_dtype # type: ignore[union-attr] + elif dtype is not None: + dtype = np.dtype(dtype) + dtype_given = True + else: + dtype_given = True + + if na_value is lib.no_default: + na_value = arr.dtype.na_value + + if not dtype_given and hasna: + try: + np_can_hold_element(dtype, na_value) # type: ignore[arg-type] + except LossySetitemError: + dtype = np.dtype(np.object_) + return dtype, na_value diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 859c4b39571f8..23b5448029dd9 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -39,6 +39,7 @@ is_bool_dtype, is_integer, is_list_like, + is_numeric_dtype, is_scalar, ) from pandas.core.dtypes.dtypes import DatetimeTZDtype @@ -50,8 +51,10 @@ ops, roperator, ) +from pandas.core.algorithms import map_array from pandas.core.arraylike import OpsMixin from pandas.core.arrays._arrow_string_mixins import ArrowStringArrayMixin +from pandas.core.arrays._utils import to_numpy_dtype_inference from pandas.core.arrays.base import ( ExtensionArray, ExtensionArraySupportsAnyAll, @@ -1317,12 +1320,7 @@ def to_numpy( copy: bool = False, na_value: object = lib.no_default, ) -> np.ndarray: - if dtype is not None: - dtype = np.dtype(dtype) - - if na_value is lib.no_default: - na_value = self.dtype.na_value - + dtype, na_value = to_numpy_dtype_inference(self, dtype, na_value, self._hasna) pa_type = self._pa_array.type if not self._hasna or isna(na_value) or pa.types.is_null(pa_type): data = self @@ -1366,6 +1364,12 @@ def to_numpy( result[~mask] = data[~mask]._pa_array.to_numpy() return result + def map(self, mapper, na_action=None): + if is_numeric_dtype(self.dtype): + return map_array(self.to_numpy(), mapper, na_action=None) + else: + return super().map(mapper, na_action) + @doc(ExtensionArray.duplicated) def duplicated( self, keep: Literal["first", "last", False] = "first" diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 00c7276a2216e..03c09c5b2fd18 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -35,15 +35,11 @@ IS64, is_platform_windows, ) -from pandas.errors import ( - AbstractMethodError, - LossySetitemError, -) +from pandas.errors import AbstractMethodError from pandas.util._decorators import doc from pandas.util._validators import validate_fillna_kwargs from pandas.core.dtypes.base import ExtensionDtype -from pandas.core.dtypes.cast import np_can_hold_element from pandas.core.dtypes.common import ( is_bool, is_integer_dtype, @@ -80,6 +76,7 @@ ) from pandas.core.array_algos.quantile import quantile_with_mask from pandas.core.arraylike import OpsMixin +from pandas.core.arrays._utils import to_numpy_dtype_inference from pandas.core.arrays.base import ExtensionArray from pandas.core.construction import ( array as pd_array, @@ -477,32 +474,7 @@ def to_numpy( array([ True, False, False]) """ hasna = self._hasna - - if dtype is None: - dtype_given = False - if hasna: - if self.dtype.kind == "b": - dtype = object - else: - if self.dtype.kind in "iu": - dtype = np.dtype(np.float64) - else: - dtype = self.dtype.numpy_dtype - if na_value is lib.no_default: - na_value = np.nan - else: - dtype = self.dtype.numpy_dtype - else: - dtype = np.dtype(dtype) - dtype_given = True - if na_value is lib.no_default: - na_value = libmissing.NA - - if not dtype_given and hasna: - try: - np_can_hold_element(dtype, na_value) # type: ignore[arg-type] - except LossySetitemError: - dtype = object + dtype, na_value = to_numpy_dtype_inference(self, dtype, na_value, hasna) if hasna: if ( diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 7d87c366f0566..3b03272f18203 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -278,7 +278,13 @@ def test_map(self, data_missing, na_action): expected = data_missing.to_numpy(dtype=object) tm.assert_numpy_array_equal(result, expected) else: - super().test_map(data_missing, na_action) + result = data_missing.map(lambda x: x, na_action=na_action) + if data_missing.dtype == "float32[pyarrow]": + # map roundtrips through objects, which converts to float64 + expected = data_missing.to_numpy(dtype="float64", na_value=np.nan) + else: + expected = data_missing.to_numpy() + tm.assert_numpy_array_equal(result, expected) def test_astype_str(self, data, request): pa_dtype = data.dtype.pyarrow_dtype @@ -1585,7 +1591,7 @@ def test_to_numpy_with_defaults(data): else: expected = np.array(data._pa_array) - if data._hasna: + if data._hasna and not is_numeric_dtype(data.dtype): expected = expected.astype(object) expected[pd.isna(data)] = pd.NA @@ -1597,8 +1603,8 @@ def test_to_numpy_int_with_na(): data = [1, None] arr = pd.array(data, dtype="int64[pyarrow]") result = arr.to_numpy() - expected = np.array([1, pd.NA], dtype=object) - assert isinstance(result[0], int) + expected = np.array([1, np.nan]) + assert isinstance(result[0], float) tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/series/test_npfuncs.py b/pandas/tests/series/test_npfuncs.py index 08950db25b282..11a51c4700d5c 100644 --- a/pandas/tests/series/test_npfuncs.py +++ b/pandas/tests/series/test_npfuncs.py @@ -5,6 +5,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas import Series import pandas._testing as tm @@ -33,3 +35,12 @@ def test_numpy_argwhere(index): expected = np.array([[3], [4]], dtype=np.int64) tm.assert_numpy_array_equal(result, expected) + + +@td.skip_if_no("pyarrow") +def test_log_arrow_backed_missing_value(): + # GH#56285 + ser = Series([1, 2, None], dtype="float64[pyarrow]") + result = np.log(ser) + expected = np.log(Series([1, 2, None], dtype="float64")) + tm.assert_series_equal(result, expected)