diff --git a/asv_bench/benchmarks/frame_methods.py b/asv_bench/benchmarks/frame_methods.py index a283afd1f0f1e..eead38dffaff4 100644 --- a/asv_bench/benchmarks/frame_methods.py +++ b/asv_bench/benchmarks/frame_methods.py @@ -593,8 +593,6 @@ def setup(self): N = 10000 # this is the worst case, where every column has NaNs. arr = np.random.randn(N, 100) - # NB: we need to set values in array, not in df.values, otherwise - # the benchmark will be misleading for ArrayManager arr[::2] = np.nan self.df = DataFrame(arr) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 4accf8be46b9e..f316f6b44c1b4 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -104,6 +104,8 @@ Removal of prior version deprecations/changes - Removed :meth:`DataFrameGroupby.fillna` and :meth:`SeriesGroupBy.fillna` (:issue:`55719`) - Removed ``axis`` argument from all groupby operations (:issue:`50405`) - Removed deprecated argument ``obj`` in :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` (:issue:`53545`) +- Removed the ``ArrayManager`` (:issue:`55043`) +- .. --------------------------------------------------------------------------- .. _whatsnew_300.performance: diff --git a/pandas/__init__.py b/pandas/__init__.py index ed524c2bb3619..535522253c415 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -1,6 +1,5 @@ from __future__ import annotations -import os import warnings __docformat__ = "restructuredtext" @@ -193,16 +192,6 @@ __git_version__ = v.get("full-revisionid") del get_versions, v -# GH#55043 - deprecation of the data_manager option -if "PANDAS_DATA_MANAGER" in os.environ: - warnings.warn( - "The env variable PANDAS_DATA_MANAGER is set. The data_manager option is " - "deprecated and will be removed in a future version. Only the BlockManager " - "will be available. Unset this environment variable to silence this warning.", - FutureWarning, - stacklevel=2, - ) - # DeprecationWarning for missing pyarrow from pandas.compat.pyarrow import pa_version_under10p1, pa_not_found @@ -232,7 +221,7 @@ del VERSIONS, pa_msg # Delete all unnecessary imported modules -del pa_version_under10p1, pa_not_found, warnings, os +del pa_version_under10p1, pa_not_found, warnings # module level doc-string __doc__ = """ diff --git a/pandas/_config/__init__.py b/pandas/_config/__init__.py index 97784c924dab4..5b2bac2e8d747 100644 --- a/pandas/_config/__init__.py +++ b/pandas/_config/__init__.py @@ -33,18 +33,12 @@ def using_copy_on_write() -> bool: _mode_options = _global_config["mode"] - return ( - _mode_options["copy_on_write"] is True - and _mode_options["data_manager"] == "block" - ) + return _mode_options["copy_on_write"] is True def warn_copy_on_write() -> bool: _mode_options = _global_config["mode"] - return ( - _mode_options["copy_on_write"] == "warn" - and _mode_options["data_manager"] == "block" - ) + return _mode_options["copy_on_write"] == "warn" def using_nullable_dtypes() -> bool: diff --git a/pandas/_typing.py b/pandas/_typing.py index 0233fbbcf3a12..c704516f74300 100644 --- a/pandas/_typing.py +++ b/pandas/_typing.py @@ -61,9 +61,7 @@ ) from pandas.core.indexes.base import Index from pandas.core.internals import ( - ArrayManager, BlockManager, - SingleArrayManager, SingleBlockManager, ) from pandas.core.resample import Resampler @@ -382,11 +380,7 @@ def closed(self) -> bool: ] # internals -Manager = Union[ - "ArrayManager", "SingleArrayManager", "BlockManager", "SingleBlockManager" -] -SingleManager = Union["SingleArrayManager", "SingleBlockManager"] -Manager2D = Union["ArrayManager", "BlockManager"] +Manager = Union["BlockManager", "SingleBlockManager"] # indexing # PositionalIndexer -> valid 1D positional indexer, e.g. can pass diff --git a/pandas/conftest.py b/pandas/conftest.py index 9979488bb6d5d..26e03ca30d4fb 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -48,8 +48,6 @@ utc, ) -from pandas._config.config import _get_option - import pandas.util._test_decorators as td from pandas.core.dtypes.dtypes import ( @@ -1965,10 +1963,7 @@ def using_copy_on_write() -> bool: """ Fixture to check if Copy-on-Write is enabled. """ - return ( - pd.options.mode.copy_on_write is True - and _get_option("mode.data_manager", silent=True) == "block" - ) + return pd.options.mode.copy_on_write is True @pytest.fixture @@ -1976,10 +1971,7 @@ def warn_copy_on_write() -> bool: """ Fixture to check if Copy-on-Write is in warning mode. """ - return ( - pd.options.mode.copy_on_write == "warn" - and _get_option("mode.data_manager", silent=True) == "block" - ) + return pd.options.mode.copy_on_write == "warn" @pytest.fixture diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 7d37c28359684..8734a1303853f 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -1256,7 +1256,7 @@ def series_generator(self) -> Generator[Series, None, None]: ser = self.obj._ixs(0, axis=0) mgr = ser._mgr - is_view = mgr.blocks[0].refs.has_reference() # type: ignore[union-attr] + is_view = mgr.blocks[0].refs.has_reference() if isinstance(ser.dtype, ExtensionDtype): # values will be incorrect for this block @@ -1278,7 +1278,7 @@ def series_generator(self) -> Generator[Series, None, None]: # -> if that happened and `ser` is already a copy, then we reset # the refs here to avoid triggering a unnecessary CoW inside the # applied function (https://github.com/pandas-dev/pandas/pull/56212) - mgr.blocks[0].refs = BlockValuesRefs(mgr.blocks[0]) # type: ignore[union-attr] + mgr.blocks[0].refs = BlockValuesRefs(mgr.blocks[0]) yield ser @staticmethod diff --git a/pandas/core/arraylike.py b/pandas/core/arraylike.py index 83ee81c05ff6e..dde1b8a35e2f0 100644 --- a/pandas/core/arraylike.py +++ b/pandas/core/arraylike.py @@ -265,10 +265,7 @@ def array_ufunc(self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any) Series, ) from pandas.core.generic import NDFrame - from pandas.core.internals import ( - ArrayManager, - BlockManager, - ) + from pandas.core.internals import BlockManager cls = type(self) @@ -352,7 +349,7 @@ def _reconstruct(result): if method == "outer": raise NotImplementedError return result - if isinstance(result, (BlockManager, ArrayManager)): + if isinstance(result, BlockManager): # we went through BlockManager.apply e.g. np.sqrt result = self._constructor_from_mgr(result, axes=result.axes) else: diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 0a9d5af7cbd42..f9e6b3296eb13 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -436,32 +436,6 @@ def use_inf_as_na_cb(key) -> None: "version. Convert inf values to NaN before operating instead.", ) -data_manager_doc = """ -: string - Internal data manager type; can be "block" or "array". Defaults to "block", - unless overridden by the 'PANDAS_DATA_MANAGER' environment variable (needs - to be set before pandas is imported). -""" - - -with cf.config_prefix("mode"): - cf.register_option( - "data_manager", - # Get the default from an environment variable, if set, otherwise defaults - # to "block". This environment variable can be set for testing. - os.environ.get("PANDAS_DATA_MANAGER", "block"), - data_manager_doc, - validator=is_one_of_factory(["block", "array"]), - ) - -cf.deprecate_option( - # GH#55043 - "mode.data_manager", - "data_manager option is deprecated and will be removed in a future " - "version. Only the BlockManager will be available.", -) - - # TODO better name? copy_on_write_doc = """ : bool diff --git a/pandas/core/frame.py b/pandas/core/frame.py index bf793a9bc677a..b33be6f3f2ac4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -44,7 +44,6 @@ using_copy_on_write, warn_copy_on_write, ) -from pandas._config.config import _get_option from pandas._libs import ( algos as libalgos, @@ -168,15 +167,11 @@ check_bool_indexer, check_dict_or_set_indexers, ) -from pandas.core.internals import ( - ArrayManager, - BlockManager, -) +from pandas.core.internals import BlockManager from pandas.core.internals.construction import ( arrays_to_mgr, dataclasses_to_dicts, dict_to_mgr, - mgr_to_mgr, ndarray_to_mgr, nested_data_to_arrays, rec_array_to_mgr, @@ -650,7 +645,7 @@ class DataFrame(NDFrame, OpsMixin): _HANDLED_TYPES = (Series, Index, ExtensionArray, np.ndarray) _accessors: set[str] = {"sparse"} _hidden_attrs: frozenset[str] = NDFrame._hidden_attrs | frozenset([]) - _mgr: BlockManager | ArrayManager + _mgr: BlockManager # similar to __array_priority__, positions DataFrame before Series, Index, # and ExtensionArray. Should NOT be overridden by subclasses. @@ -704,7 +699,7 @@ def __init__( # to avoid the result sharing the same Manager data = data.copy(deep=False) - if isinstance(data, (BlockManager, ArrayManager)): + if isinstance(data, BlockManager): if not allow_mgr: # GH#52419 warnings.warn( @@ -724,8 +719,6 @@ def __init__( NDFrame.__init__(self, data) return - manager = _get_option("mode.data_manager", silent=True) - is_pandas_object = isinstance(data, (Series, Index, ExtensionArray)) data_dtype = getattr(data, "dtype", None) original_dtype = dtype @@ -740,14 +733,6 @@ def __init__( if isinstance(data, dict): # retain pre-GH#38939 default behavior copy = True - elif ( - manager == "array" - and isinstance(data, (np.ndarray, ExtensionArray)) - and data.ndim == 2 - ): - # INFO(ArrayManager) by default copy the 2D input array to get - # contiguous 1D arrays - copy = True elif using_copy_on_write() and not isinstance( data, (Index, DataFrame, Series) ): @@ -761,14 +746,14 @@ def __init__( dtype = dtype if dtype is not None else pandas_dtype(object) data = [] - if isinstance(data, (BlockManager, ArrayManager)): + if isinstance(data, BlockManager): mgr = self._init_mgr( data, axes={"index": index, "columns": columns}, dtype=dtype, copy=copy ) elif isinstance(data, dict): # GH#38939 de facto copy defaults to False only in non-dict cases - mgr = dict_to_mgr(data, index, columns, dtype=dtype, copy=copy, typ=manager) + mgr = dict_to_mgr(data, index, columns, dtype=dtype, copy=copy) elif isinstance(data, ma.MaskedArray): from numpy.ma import mrecords @@ -788,7 +773,6 @@ def __init__( columns, dtype=dtype, copy=copy, - typ=manager, ) elif isinstance(data, (np.ndarray, Series, Index, ExtensionArray)): @@ -801,7 +785,6 @@ def __init__( columns, dtype, copy, - typ=manager, ) elif getattr(data, "name", None) is not None: # i.e. Series/Index with non-None name @@ -813,7 +796,6 @@ def __init__( index, columns, dtype=dtype, - typ=manager, copy=_copy, ) else: @@ -823,7 +805,6 @@ def __init__( columns, dtype=dtype, copy=copy, - typ=manager, ) # For data is list-like, or Iterable (will consume into list) @@ -854,7 +835,6 @@ def __init__( columns, index, dtype=dtype, - typ=manager, ) else: mgr = ndarray_to_mgr( @@ -863,7 +843,6 @@ def __init__( columns, dtype=dtype, copy=copy, - typ=manager, ) else: mgr = dict_to_mgr( @@ -871,7 +850,6 @@ def __init__( index, columns if columns is not None else default_index(0), dtype=dtype, - typ=manager, ) # For data is scalar else: @@ -892,7 +870,7 @@ def __init__( construct_1d_arraylike_from_scalar(data, len(index), dtype) for _ in range(len(columns)) ] - mgr = arrays_to_mgr(values, columns, index, dtype=None, typ=manager) + mgr = arrays_to_mgr(values, columns, index, dtype=None) else: arr2d = construct_2d_arraylike_from_scalar( data, @@ -908,12 +886,8 @@ def __init__( columns, dtype=arr2d.dtype, copy=False, - typ=manager, ) - # ensure correct Manager type according to settings - mgr = mgr_to_mgr(mgr, typ=manager) - NDFrame.__init__(self, mgr) if original_dtype is None and is_pandas_object and data_dtype == np.object_: @@ -1094,8 +1068,6 @@ def _can_fast_transpose(self) -> bool: """ Can we transpose this DataFrame without creating any new array objects. """ - if isinstance(self._mgr, ArrayManager): - return False blocks = self._mgr.blocks if len(blocks) != 1: return False @@ -1111,13 +1083,6 @@ def _values(self) -> np.ndarray | DatetimeArray | TimedeltaArray | PeriodArray: """ mgr = self._mgr - if isinstance(mgr, ArrayManager): - if len(mgr.arrays) == 1 and not is_1d_only_ea_dtype(mgr.arrays[0].dtype): - # error: Item "ExtensionArray" of "Union[ndarray, ExtensionArray]" - # has no attribute "reshape" - return mgr.arrays[0].reshape(-1, 1) # type: ignore[union-attr] - return ensure_wrapped_if_datetimelike(self.values) - blocks = mgr.blocks if len(blocks) != 1: return ensure_wrapped_if_datetimelike(self.values) @@ -1546,7 +1511,7 @@ def iterrows(self) -> Iterable[tuple[Hashable, Series]]: for k, v in zip(self.index, self.values): s = klass(v, index=columns, name=k).__finalize__(self) if using_cow and self._mgr.is_single_block: - s._mgr.add_references(self._mgr) # type: ignore[arg-type] + s._mgr.add_references(self._mgr) yield k, s def itertuples( @@ -2514,9 +2479,7 @@ def maybe_reorder( columns = columns.drop(exclude) - manager = _get_option("mode.data_manager", silent=True) - mgr = arrays_to_mgr(arrays, columns, result_index, typ=manager) - + mgr = arrays_to_mgr(arrays, columns, result_index) return cls._from_mgr(mgr, axes=mgr.axes) def to_records( @@ -2714,7 +2677,6 @@ def _from_arrays( if dtype is not None: dtype = pandas_dtype(dtype) - manager = _get_option("mode.data_manager", silent=True) columns = ensure_index(columns) if len(columns) != len(arrays): raise ValueError("len(columns) must match len(arrays)") @@ -2724,7 +2686,6 @@ def _from_arrays( index, dtype=dtype, verify_integrity=verify_integrity, - typ=manager, ) return cls._from_mgr(mgr, axes=mgr.axes) @@ -3869,7 +3830,7 @@ def transpose(self, *args, copy: bool = False) -> DataFrame: dtype=new_vals.dtype, ) if using_copy_on_write() and len(self) > 0: - result._mgr.add_references(self._mgr) # type: ignore[arg-type] + result._mgr.add_references(self._mgr) elif ( self._is_homogeneous_type @@ -4009,11 +3970,8 @@ def _iter_column_arrays(self) -> Iterator[ArrayLike]: Warning! The returned array is a view but doesn't handle Copy-on-Write, so this should be used with caution (for read-only purposes). """ - if isinstance(self._mgr, ArrayManager): - yield from self._mgr.arrays - else: - for i in range(len(self.columns)): - yield self._get_column_array(i) + for i in range(len(self.columns)): + yield self._get_column_array(i) def _getitem_nocopy(self, key: list): """ @@ -4256,7 +4214,7 @@ def __setitem__(self, key, value) -> None: warn_copy_on_write() or ( not warn_copy_on_write() - and any(b.refs.has_reference() for b in self._mgr.blocks) # type: ignore[union-attr] + and any(b.refs.has_reference() for b in self._mgr.blocks) ) ): warnings.warn( @@ -7999,13 +7957,7 @@ def _dispatch_frame_op( # TODO operate_blockwise expects a manager of the same type bm = self._mgr.operate_blockwise( - # error: Argument 1 to "operate_blockwise" of "ArrayManager" has - # incompatible type "Union[ArrayManager, BlockManager]"; expected - # "ArrayManager" - # error: Argument 1 to "operate_blockwise" of "BlockManager" has - # incompatible type "Union[ArrayManager, BlockManager]"; expected - # "BlockManager" - right._mgr, # type: ignore[arg-type] + right._mgr, array_op, ) return self._constructor_from_mgr(bm, axes=bm.axes) @@ -11516,9 +11468,7 @@ def func(values: np.ndarray): def blk_func(values, axis: Axis = 1): if isinstance(values, ExtensionArray): - if not is_1d_only_ea_dtype(values.dtype) and not isinstance( - self._mgr, ArrayManager - ): + if not is_1d_only_ea_dtype(values.dtype): return values._reduce(name, axis=1, skipna=skipna, **kwds) has_keepdims = dtype_has_keepdims.get(values.dtype) if has_keepdims is None: @@ -12648,8 +12598,6 @@ def _to_dict_of_blocks(self): Internal ONLY - only works for BlockManager """ mgr = self._mgr - # convert to BlockManager if needed -> this way support ArrayManager as well - mgr = cast(BlockManager, mgr_to_mgr(mgr, "block")) return { k: self._constructor_from_mgr(v, axes=v.axes).__finalize__(self) for k, v in mgr.to_dict().items() diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cb3368b489766..bee8111d7c92e 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -176,15 +176,8 @@ default_index, ensure_index, ) -from pandas.core.internals import ( - ArrayManager, - BlockManager, - SingleArrayManager, -) -from pandas.core.internals.construction import ( - mgr_to_mgr, - ndarray_to_mgr, -) +from pandas.core.internals import BlockManager +from pandas.core.internals.construction import ndarray_to_mgr from pandas.core.methods.describe import describe_ndframe from pandas.core.missing import ( clean_fill_method, @@ -318,29 +311,6 @@ def _init_mgr( mgr = mgr.astype(dtype=dtype) return mgr - @final - def _as_manager(self, typ: str, copy: bool_t = True) -> Self: - """ - Private helper function to create a DataFrame with specific manager. - - Parameters - ---------- - typ : {"block", "array"} - copy : bool, default True - Only controls whether the conversion from Block->ArrayManager - copies the 1D arrays (to ensure proper/contiguous memory layout). - - Returns - ------- - DataFrame - New DataFrame using specified manager type. Is not guaranteed - to be a copy or not. - """ - new_mgr: Manager - new_mgr = mgr_to_mgr(self._mgr, typ=typ, copy=copy) - # fastpath of passing a manager doesn't check the option/manager class - return self._constructor_from_mgr(new_mgr, axes=new_mgr.axes).__finalize__(self) - @classmethod def _from_mgr(cls, mgr: Manager, axes: list[Index]) -> Self: """ @@ -679,9 +649,9 @@ def _is_view_after_cow_rules(self): # Only to be used in cases of chained assignment checks, this is a # simplified check that assumes that either the whole object is a view # or a copy - if len(self._mgr.blocks) == 0: # type: ignore[union-attr] + if len(self._mgr.blocks) == 0: return False - return self._mgr.blocks[0].refs.has_reference() # type: ignore[union-attr] + return self._mgr.blocks[0].refs.has_reference() @property def shape(self) -> tuple[int, ...]: @@ -865,7 +835,6 @@ def swapaxes(self, axis1: Axis, axis2: Axis, copy: bool_t | None = None) -> Self new_axes[1], dtype=None, copy=False, - typ="block", ) assert isinstance(new_mgr, BlockManager) assert isinstance(self._mgr, BlockManager) @@ -6437,8 +6406,6 @@ def _protect_consolidate(self, f): Consolidate _mgr -- if the blocks have changed, then clear the cache """ - if isinstance(self._mgr, (ArrayManager, SingleArrayManager)): - return f() blocks_before = len(self._mgr.blocks) result = f() if len(self._mgr.blocks) != blocks_before: @@ -7108,7 +7075,7 @@ def convert_dtypes( dtype: string """ check_dtype_backend(dtype_backend) - new_mgr = self._mgr.convert_dtypes( # type: ignore[union-attr] + new_mgr = self._mgr.convert_dtypes( infer_objects=infer_objects, convert_string=convert_string, convert_integer=convert_integer, @@ -12845,8 +12812,8 @@ def _inplace_method(self, other, op) -> Self: and not (warn_copy_on_write() and not warn) ): # GH#36498 this inplace op can _actually_ be inplace. - # Item "ArrayManager" of "Union[ArrayManager, SingleArrayManager, - # BlockManager, SingleBlockManager]" has no attribute "setitem_inplace" + # Item "BlockManager" of "Union[BlockManager, SingleBlockManager]" has + # no attribute "setitem_inplace" self._mgr.setitem_inplace( # type: ignore[union-attr] slice(None), result._values, warn=warn ) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index e16578aa51bcd..13d58602351dd 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -96,11 +96,11 @@ from pandas._typing import ( ArrayLike, AxisInt, + BlockManager, CorrelationMethod, IndexLabel, Manager, - Manager2D, - SingleManager, + SingleBlockManager, TakeIndexer, ) @@ -153,7 +153,7 @@ def _wrap_agged_manager(self, mgr: Manager) -> Series: def _get_data_to_aggregate( self, *, numeric_only: bool = False, name: str | None = None - ) -> SingleManager: + ) -> SingleBlockManager: ser = self._obj_with_exclusions single = ser._mgr if numeric_only and not is_numeric_dtype(ser.dtype): @@ -1577,7 +1577,7 @@ def _cython_transform( # test_transform_numeric_ret # With self.axis == 1, _get_data_to_aggregate does a transpose # so we always have a single block. - mgr: Manager2D = self._get_data_to_aggregate( + mgr: BlockManager = self._get_data_to_aggregate( numeric_only=numeric_only, name=how ) @@ -1586,10 +1586,7 @@ def arr_func(bvalues: ArrayLike) -> ArrayLike: "transform", bvalues, how, 1, **kwargs ) - # We could use `mgr.apply` here and not have to set_axis, but - # we would have to do shape gymnastics for ArrayManager compat - res_mgr = mgr.grouped_reduce(arr_func) - res_mgr.set_axis(1, mgr.axes[1]) + res_mgr = mgr.apply(arr_func) res_df = self.obj._constructor_from_mgr(res_mgr, axes=res_mgr.axes) res_df = self._maybe_transpose_result(res_df) @@ -1893,7 +1890,7 @@ def _gotitem(self, key, ndim: int, subset=None): def _get_data_to_aggregate( self, *, numeric_only: bool = False, name: str | None = None - ) -> Manager2D: + ) -> BlockManager: obj = self._obj_with_exclusions if self.axis == 1: mgr = obj.T._mgr @@ -1904,7 +1901,7 @@ def _get_data_to_aggregate( mgr = mgr.get_numeric_data() return mgr - def _wrap_agged_manager(self, mgr: Manager2D) -> DataFrame: + def _wrap_agged_manager(self, mgr: BlockManager) -> DataFrame: return self.obj._constructor_from_mgr(mgr, axes=mgr.axes) def _apply_to_column_groupbys(self, func) -> DataFrame: diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index e72365a55ced1..74e217e0649ed 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -1007,10 +1007,7 @@ def is_in_obj(gpr) -> bool: except (KeyError, IndexError, InvalidIndexError, OutOfBoundsDatetime): return False if isinstance(gpr, Series) and isinstance(obj_gpr_column, Series): - return gpr._mgr.references_same_values( # type: ignore[union-attr] - obj_gpr_column._mgr, # type: ignore[arg-type] - 0, - ) + return gpr._mgr.references_same_values(obj_gpr_column._mgr, 0) return False try: return gpr is obj[gpr.name] diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py index 2eb413440ba9c..da394e783be4b 100644 --- a/pandas/core/internals/__init__.py +++ b/pandas/core/internals/__init__.py @@ -1,8 +1,4 @@ from pandas.core.internals.api import make_block # 2023-09-18 pyarrow uses this -from pandas.core.internals.array_manager import ( - ArrayManager, - SingleArrayManager, -) from pandas.core.internals.base import ( DataManager, SingleDataManager, @@ -19,11 +15,9 @@ "ExtensionBlock", # pylint: disable=undefined-all-variable "make_block", "DataManager", - "ArrayManager", "BlockManager", "SingleDataManager", "SingleBlockManager", - "SingleArrayManager", "concatenate_managers", ] diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py deleted file mode 100644 index ee62441ab8f55..0000000000000 --- a/pandas/core/internals/array_manager.py +++ /dev/null @@ -1,1342 +0,0 @@ -""" -Experimental manager based on storing a collection of 1D arrays -""" -from __future__ import annotations - -import itertools -from typing import ( - TYPE_CHECKING, - Callable, - Literal, -) - -import numpy as np - -from pandas._libs import ( - NaT, - lib, -) - -from pandas.core.dtypes.astype import ( - astype_array, - astype_array_safe, -) -from pandas.core.dtypes.cast import ( - ensure_dtype_can_hold_na, - find_common_type, - infer_dtype_from_scalar, - np_find_common_type, -) -from pandas.core.dtypes.common import ( - ensure_platform_int, - is_datetime64_ns_dtype, - is_integer, - is_numeric_dtype, - is_object_dtype, - is_timedelta64_ns_dtype, -) -from pandas.core.dtypes.dtypes import ExtensionDtype -from pandas.core.dtypes.generic import ( - ABCDataFrame, - ABCSeries, -) -from pandas.core.dtypes.missing import ( - array_equals, - isna, - na_value_for_dtype, -) - -import pandas.core.algorithms as algos -from pandas.core.array_algos.quantile import quantile_compat -from pandas.core.array_algos.take import take_1d -from pandas.core.arrays import ( - DatetimeArray, - ExtensionArray, - NumpyExtensionArray, - TimedeltaArray, -) -from pandas.core.construction import ( - ensure_wrapped_if_datetimelike, - extract_array, - sanitize_array, -) -from pandas.core.indexers import ( - maybe_convert_indices, - validate_indices, -) -from pandas.core.indexes.api import ( - Index, - ensure_index, -) -from pandas.core.indexes.base import get_values_for_csv -from pandas.core.internals.base import ( - DataManager, - SingleDataManager, - ensure_np_dtype, - interleaved_dtype, -) -from pandas.core.internals.blocks import ( - BlockPlacement, - ensure_block_shape, - external_values, - extract_pandas_array, - maybe_coerce_values, - new_block, -) -from pandas.core.internals.managers import make_na_array - -if TYPE_CHECKING: - from collections.abc import Hashable - - from pandas._typing import ( - ArrayLike, - AxisInt, - DtypeObj, - QuantileInterpolation, - Self, - npt, - ) - - -class BaseArrayManager(DataManager): - """ - Core internal data structure to implement DataFrame and Series. - - Alternative to the BlockManager, storing a list of 1D arrays instead of - Blocks. - - This is *not* a public API class - - Parameters - ---------- - arrays : Sequence of arrays - axes : Sequence of Index - verify_integrity : bool, default True - - """ - - __slots__ = [ - "_axes", # private attribute, because 'axes' has different order, see below - "arrays", - ] - - arrays: list[np.ndarray | ExtensionArray] - _axes: list[Index] - - def __init__( - self, - arrays: list[np.ndarray | ExtensionArray], - axes: list[Index], - verify_integrity: bool = True, - ) -> None: - raise NotImplementedError - - def make_empty(self, axes=None) -> Self: - """Return an empty ArrayManager with the items axis of len 0 (no columns)""" - if axes is None: - axes = [self.axes[1:], Index([])] - - arrays: list[np.ndarray | ExtensionArray] = [] - return type(self)(arrays, axes) - - @property - def items(self) -> Index: - return self._axes[-1] - - @property - # error: Signature of "axes" incompatible with supertype "DataManager" - def axes(self) -> list[Index]: # type: ignore[override] - # mypy doesn't work to override attribute with property - # see https://github.com/python/mypy/issues/4125 - """Axes is BlockManager-compatible order (columns, rows)""" - return [self._axes[1], self._axes[0]] - - @property - def shape_proper(self) -> tuple[int, ...]: - # this returns (n_rows, n_columns) - return tuple(len(ax) for ax in self._axes) - - @staticmethod - def _normalize_axis(axis: AxisInt) -> int: - # switch axis - axis = 1 if axis == 0 else 0 - return axis - - def set_axis(self, axis: AxisInt, new_labels: Index) -> None: - # Caller is responsible for ensuring we have an Index object. - self._validate_set_axis(axis, new_labels) - axis = self._normalize_axis(axis) - self._axes[axis] = new_labels - - def get_dtypes(self) -> npt.NDArray[np.object_]: - return np.array([arr.dtype for arr in self.arrays], dtype="object") - - def add_references(self, mgr: BaseArrayManager) -> None: - """ - Only implemented on the BlockManager level - """ - return - - def __getstate__(self): - return self.arrays, self._axes - - def __setstate__(self, state) -> None: - self.arrays = state[0] - self._axes = state[1] - - def __repr__(self) -> str: - output = type(self).__name__ - output += f"\nIndex: {self._axes[0]}" - if self.ndim == 2: - output += f"\nColumns: {self._axes[1]}" - output += f"\n{len(self.arrays)} arrays:" - for arr in self.arrays: - output += f"\n{arr.dtype}" - return output - - def apply( - self, - f, - align_keys: list[str] | None = None, - **kwargs, - ) -> Self: - """ - Iterate over the arrays, collect and create a new ArrayManager. - - Parameters - ---------- - f : str or callable - Name of the Array method to apply. - align_keys: List[str] or None, default None - **kwargs - Keywords to pass to `f` - - Returns - ------- - ArrayManager - """ - assert "filter" not in kwargs - - align_keys = align_keys or [] - result_arrays: list[ArrayLike] = [] - # fillna: Series/DataFrame is responsible for making sure value is aligned - - aligned_args = {k: kwargs[k] for k in align_keys} - - if f == "apply": - f = kwargs.pop("func") - - for i, arr in enumerate(self.arrays): - if aligned_args: - for k, obj in aligned_args.items(): - if isinstance(obj, (ABCSeries, ABCDataFrame)): - # The caller is responsible for ensuring that - # obj.axes[-1].equals(self.items) - if obj.ndim == 1: - kwargs[k] = obj.iloc[i] - else: - kwargs[k] = obj.iloc[:, i]._values - else: - # otherwise we have an array-like - kwargs[k] = obj[i] - - if callable(f): - applied = f(arr, **kwargs) - else: - applied = getattr(arr, f)(**kwargs) - - result_arrays.append(applied) - - new_axes = self._axes - return type(self)(result_arrays, new_axes) - - def apply_with_block(self, f, align_keys=None, **kwargs) -> Self: - # switch axis to follow BlockManager logic - swap_axis = True - if f == "interpolate": - swap_axis = False - if swap_axis and "axis" in kwargs and self.ndim == 2: - kwargs["axis"] = 1 if kwargs["axis"] == 0 else 0 - - align_keys = align_keys or [] - aligned_args = {k: kwargs[k] for k in align_keys} - - result_arrays = [] - - for i, arr in enumerate(self.arrays): - if aligned_args: - for k, obj in aligned_args.items(): - if isinstance(obj, (ABCSeries, ABCDataFrame)): - # The caller is responsible for ensuring that - # obj.axes[-1].equals(self.items) - if obj.ndim == 1: - if self.ndim == 2: - kwargs[k] = obj.iloc[slice(i, i + 1)]._values - else: - kwargs[k] = obj.iloc[:]._values - else: - kwargs[k] = obj.iloc[:, [i]]._values - else: - # otherwise we have an ndarray - if obj.ndim == 2: - kwargs[k] = obj[[i]] - - if isinstance(arr.dtype, np.dtype) and not isinstance(arr, np.ndarray): - # i.e. TimedeltaArray, DatetimeArray with tz=None. Need to - # convert for the Block constructors. - arr = np.asarray(arr) - - arr = maybe_coerce_values(arr) - if self.ndim == 2: - arr = ensure_block_shape(arr, 2) - bp = BlockPlacement(slice(0, 1, 1)) - block = new_block(arr, placement=bp, ndim=2) - else: - bp = BlockPlacement(slice(0, len(self), 1)) - block = new_block(arr, placement=bp, ndim=1) - - applied = getattr(block, f)(**kwargs) - if isinstance(applied, list): - applied = applied[0] - arr = applied.values - if self.ndim == 2 and arr.ndim == 2: - # 2D for np.ndarray or DatetimeArray/TimedeltaArray - assert len(arr) == 1 - # error: No overload variant of "__getitem__" of "ExtensionArray" - # matches argument type "Tuple[int, slice]" - arr = arr[0, :] # type: ignore[call-overload] - result_arrays.append(arr) - - return type(self)(result_arrays, self._axes) - - def setitem(self, indexer, value, warn: bool = True) -> Self: - return self.apply_with_block("setitem", indexer=indexer, value=value) - - def diff(self, n: int) -> Self: - assert self.ndim == 2 # caller ensures - return self.apply(algos.diff, n=n) - - def astype(self, dtype, copy: bool | None = False, errors: str = "raise") -> Self: - if copy is None: - copy = True - - return self.apply(astype_array_safe, dtype=dtype, copy=copy, errors=errors) - - def convert(self, copy: bool | None) -> Self: - if copy is None: - copy = True - - def _convert(arr): - if is_object_dtype(arr.dtype): - # extract NumpyExtensionArray for tests that patch - # NumpyExtensionArray._typ - arr = np.asarray(arr) - result = lib.maybe_convert_objects( - arr, - convert_non_numeric=True, - ) - if result is arr and copy: - return arr.copy() - return result - else: - return arr.copy() if copy else arr - - return self.apply(_convert) - - def get_values_for_csv( - self, *, float_format, date_format, decimal, na_rep: str = "nan", quoting=None - ) -> Self: - return self.apply( - get_values_for_csv, - na_rep=na_rep, - quoting=quoting, - float_format=float_format, - date_format=date_format, - decimal=decimal, - ) - - @property - def any_extension_types(self) -> bool: - """Whether any of the blocks in this manager are extension blocks""" - return False # any(block.is_extension for block in self.blocks) - - @property - def is_view(self) -> bool: - """return a boolean if we are a single block and are a view""" - # TODO what is this used for? - return False - - @property - def is_single_block(self) -> bool: - return len(self.arrays) == 1 - - def _get_data_subset(self, predicate: Callable) -> Self: - indices = [i for i, arr in enumerate(self.arrays) if predicate(arr)] - arrays = [self.arrays[i] for i in indices] - # TODO copy? - # Note: using Index.take ensures we can retain e.g. DatetimeIndex.freq, - # see test_describe_datetime_columns - taker = np.array(indices, dtype="intp") - new_cols = self._axes[1].take(taker) - new_axes = [self._axes[0], new_cols] - return type(self)(arrays, new_axes, verify_integrity=False) - - def get_bool_data(self, copy: bool = False) -> Self: - """ - Select columns that are bool-dtype and object-dtype columns that are all-bool. - - Parameters - ---------- - copy : bool, default False - Whether to copy the blocks - """ - return self._get_data_subset(lambda x: x.dtype == np.dtype(bool)) - - def get_numeric_data(self, copy: bool = False) -> Self: - """ - Select columns that have a numeric dtype. - - Parameters - ---------- - copy : bool, default False - Whether to copy the blocks - """ - return self._get_data_subset( - lambda arr: is_numeric_dtype(arr.dtype) - or getattr(arr.dtype, "_is_numeric", False) - ) - - def copy(self, deep: bool | Literal["all"] | None = True) -> Self: - """ - Make deep or shallow copy of ArrayManager - - Parameters - ---------- - deep : bool or string, default True - If False, return shallow copy (do not copy data) - If 'all', copy data and a deep copy of the index - - Returns - ------- - BlockManager - """ - if deep is None: - # ArrayManager does not yet support CoW, so deep=None always means - # deep=True for now - deep = True - - # this preserves the notion of view copying of axes - if deep: - # hit in e.g. tests.io.json.test_pandas - - def copy_func(ax): - return ax.copy(deep=True) if deep == "all" else ax.view() - - new_axes = [copy_func(ax) for ax in self._axes] - else: - new_axes = list(self._axes) - - if deep: - new_arrays = [arr.copy() for arr in self.arrays] - else: - new_arrays = list(self.arrays) - return type(self)(new_arrays, new_axes, verify_integrity=False) - - def reindex_indexer( - self, - new_axis, - indexer, - axis: AxisInt, - fill_value=None, - allow_dups: bool = False, - copy: bool | None = True, - # ignored keywords - only_slice: bool = False, - # ArrayManager specific keywords - use_na_proxy: bool = False, - ) -> Self: - axis = self._normalize_axis(axis) - return self._reindex_indexer( - new_axis, - indexer, - axis, - fill_value, - allow_dups, - copy, - use_na_proxy, - ) - - def _reindex_indexer( - self, - new_axis, - indexer: npt.NDArray[np.intp] | None, - axis: AxisInt, - fill_value=None, - allow_dups: bool = False, - copy: bool | None = True, - use_na_proxy: bool = False, - ) -> Self: - """ - Parameters - ---------- - new_axis : Index - indexer : ndarray[intp] or None - axis : int - fill_value : object, default None - allow_dups : bool, default False - copy : bool, default True - - - pandas-indexer with -1's only. - """ - if copy is None: - # ArrayManager does not yet support CoW, so deep=None always means - # deep=True for now - copy = True - - if indexer is None: - if new_axis is self._axes[axis] and not copy: - return self - - result = self.copy(deep=copy) - result._axes = list(self._axes) - result._axes[axis] = new_axis - return result - - # some axes don't allow reindexing with dups - if not allow_dups: - self._axes[axis]._validate_can_reindex(indexer) - - if axis >= self.ndim: - raise IndexError("Requested axis not found in manager") - - if axis == 1: - new_arrays = [] - for i in indexer: - if i == -1: - arr = self._make_na_array( - fill_value=fill_value, use_na_proxy=use_na_proxy - ) - else: - arr = self.arrays[i] - if copy: - arr = arr.copy() - new_arrays.append(arr) - - else: - validate_indices(indexer, len(self._axes[0])) - indexer = ensure_platform_int(indexer) - mask = indexer == -1 - needs_masking = mask.any() - new_arrays = [ - take_1d( - arr, - indexer, - allow_fill=needs_masking, - fill_value=fill_value, - mask=mask, - # if fill_value is not None else blk.fill_value - ) - for arr in self.arrays - ] - - new_axes = list(self._axes) - new_axes[axis] = new_axis - - return type(self)(new_arrays, new_axes, verify_integrity=False) - - def take( - self, - indexer: npt.NDArray[np.intp], - axis: AxisInt = 1, - verify: bool = True, - ) -> Self: - """ - Take items along any axis. - """ - assert isinstance(indexer, np.ndarray), type(indexer) - assert indexer.dtype == np.intp, indexer.dtype - - axis = self._normalize_axis(axis) - - if not indexer.ndim == 1: - raise ValueError("indexer should be 1-dimensional") - - n = self.shape_proper[axis] - indexer = maybe_convert_indices(indexer, n, verify=verify) - - new_labels = self._axes[axis].take(indexer) - return self._reindex_indexer( - new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True - ) - - def _make_na_array(self, fill_value=None, use_na_proxy: bool = False): - if use_na_proxy: - assert fill_value is None - return NullArrayProxy(self.shape_proper[0]) - - if fill_value is None: - fill_value = np.nan - - dtype, fill_value = infer_dtype_from_scalar(fill_value) - array_values = make_na_array(dtype, self.shape_proper[:1], fill_value) - return array_values - - def _equal_values(self, other) -> bool: - """ - Used in .equals defined in base class. Only check the column values - assuming shape and indexes have already been checked. - """ - for left, right in zip(self.arrays, other.arrays): - if not array_equals(left, right): - return False - return True - - # TODO - # to_dict - - -class ArrayManager(BaseArrayManager): - @property - def ndim(self) -> Literal[2]: - return 2 - - def __init__( - self, - arrays: list[np.ndarray | ExtensionArray], - axes: list[Index], - verify_integrity: bool = True, - ) -> None: - # Note: we are storing the axes in "_axes" in the (row, columns) order - # which contrasts the order how it is stored in BlockManager - self._axes = axes - self.arrays = arrays - - if verify_integrity: - self._axes = [ensure_index(ax) for ax in axes] - arrays = [extract_pandas_array(x, None, 1)[0] for x in arrays] - self.arrays = [maybe_coerce_values(arr) for arr in arrays] - self._verify_integrity() - - def _verify_integrity(self) -> None: - n_rows, n_columns = self.shape_proper - if not len(self.arrays) == n_columns: - raise ValueError( - "Number of passed arrays must equal the size of the column Index: " - f"{len(self.arrays)} arrays vs {n_columns} columns." - ) - for arr in self.arrays: - if not len(arr) == n_rows: - raise ValueError( - "Passed arrays should have the same length as the rows Index: " - f"{len(arr)} vs {n_rows} rows" - ) - if not isinstance(arr, (np.ndarray, ExtensionArray)): - raise ValueError( - "Passed arrays should be np.ndarray or ExtensionArray instances, " - f"got {type(arr)} instead" - ) - if not arr.ndim == 1: - raise ValueError( - "Passed arrays should be 1-dimensional, got array with " - f"{arr.ndim} dimensions instead." - ) - - # -------------------------------------------------------------------- - # Indexing - - def fast_xs(self, loc: int) -> SingleArrayManager: - """ - Return the array corresponding to `frame.iloc[loc]`. - - Parameters - ---------- - loc : int - - Returns - ------- - np.ndarray or ExtensionArray - """ - dtype = interleaved_dtype([arr.dtype for arr in self.arrays]) - - values = [arr[loc] for arr in self.arrays] - if isinstance(dtype, ExtensionDtype): - result: np.ndarray | ExtensionArray = ( - dtype.construct_array_type()._from_sequence(values, dtype=dtype) - ) - # for datetime64/timedelta64, the np.ndarray constructor cannot handle pd.NaT - elif is_datetime64_ns_dtype(dtype): - result = DatetimeArray._from_sequence(values, dtype=dtype)._ndarray - elif is_timedelta64_ns_dtype(dtype): - result = TimedeltaArray._from_sequence(values, dtype=dtype)._ndarray - else: - result = np.array(values, dtype=dtype) - return SingleArrayManager([result], [self._axes[1]]) - - def get_slice(self, slobj: slice, axis: AxisInt = 0) -> ArrayManager: - axis = self._normalize_axis(axis) - - if axis == 0: - arrays = [arr[slobj] for arr in self.arrays] - elif axis == 1: - arrays = self.arrays[slobj] - - new_axes = list(self._axes) - new_axes[axis] = new_axes[axis]._getitem_slice(slobj) - - return type(self)(arrays, new_axes, verify_integrity=False) - - def iget(self, i: int) -> SingleArrayManager: - """ - Return the data as a SingleArrayManager. - """ - values = self.arrays[i] - return SingleArrayManager([values], [self._axes[0]]) - - def iget_values(self, i: int) -> ArrayLike: - """ - Return the data for column i as the values (ndarray or ExtensionArray). - """ - return self.arrays[i] - - @property - def column_arrays(self) -> list[ArrayLike]: - """ - Used in the JSON C code to access column arrays. - """ - - return [np.asarray(arr) for arr in self.arrays] - - def iset( - self, - loc: int | slice | np.ndarray, - value: ArrayLike, - inplace: bool = False, - refs=None, - ) -> None: - """ - Set new column(s). - - This changes the ArrayManager in-place, but replaces (an) existing - column(s), not changing column values in-place). - - Parameters - ---------- - loc : integer, slice or boolean mask - Positional location (already bounds checked) - value : np.ndarray or ExtensionArray - inplace : bool, default False - Whether overwrite existing array as opposed to replacing it. - """ - # single column -> single integer index - if lib.is_integer(loc): - # TODO can we avoid needing to unpack this here? That means converting - # DataFrame into 1D array when loc is an integer - if isinstance(value, np.ndarray) and value.ndim == 2: - assert value.shape[1] == 1 - value = value[:, 0] - - # TODO we receive a datetime/timedelta64 ndarray from DataFrame._iset_item - # but we should avoid that and pass directly the proper array - value = maybe_coerce_values(value) - - assert isinstance(value, (np.ndarray, ExtensionArray)) - assert value.ndim == 1 - assert len(value) == len(self._axes[0]) - self.arrays[loc] = value - return - - # multiple columns -> convert slice or array to integer indices - elif isinstance(loc, slice): - indices: range | np.ndarray = range( - loc.start if loc.start is not None else 0, - loc.stop if loc.stop is not None else self.shape_proper[1], - loc.step if loc.step is not None else 1, - ) - else: - assert isinstance(loc, np.ndarray) - assert loc.dtype == "bool" - indices = np.nonzero(loc)[0] - - assert value.ndim == 2 - assert value.shape[0] == len(self._axes[0]) - - for value_idx, mgr_idx in enumerate(indices): - # error: No overload variant of "__getitem__" of "ExtensionArray" matches - # argument type "Tuple[slice, int]" - value_arr = value[:, value_idx] # type: ignore[call-overload] - self.arrays[mgr_idx] = value_arr - return - - def column_setitem( - self, loc: int, idx: int | slice | np.ndarray, value, inplace_only: bool = False - ) -> None: - """ - Set values ("setitem") into a single column (not setting the full column). - - This is a method on the ArrayManager level, to avoid creating an - intermediate Series at the DataFrame level (`s = df[loc]; s[idx] = value`) - """ - if not is_integer(loc): - raise TypeError("The column index should be an integer") - arr = self.arrays[loc] - mgr = SingleArrayManager([arr], [self._axes[0]]) - if inplace_only: - mgr.setitem_inplace(idx, value) - else: - new_mgr = mgr.setitem((idx,), value) - # update existing ArrayManager in-place - self.arrays[loc] = new_mgr.arrays[0] - - def insert(self, loc: int, item: Hashable, value: ArrayLike, refs=None) -> None: - """ - Insert item at selected position. - - Parameters - ---------- - loc : int - item : hashable - value : np.ndarray or ExtensionArray - """ - # insert to the axis; this could possibly raise a TypeError - new_axis = self.items.insert(loc, item) - - value = extract_array(value, extract_numpy=True) - if value.ndim == 2: - if value.shape[0] == 1: - # error: No overload variant of "__getitem__" of "ExtensionArray" - # matches argument type "Tuple[int, slice]" - value = value[0, :] # type: ignore[call-overload] - else: - raise ValueError( - f"Expected a 1D array, got an array with shape {value.shape}" - ) - value = maybe_coerce_values(value) - - # TODO self.arrays can be empty - # assert len(value) == len(self.arrays[0]) - - # TODO is this copy needed? - arrays = self.arrays.copy() - arrays.insert(loc, value) - - self.arrays = arrays - self._axes[1] = new_axis - - def idelete(self, indexer) -> ArrayManager: - """ - Delete selected locations in-place (new block and array, same BlockManager) - """ - to_keep = np.ones(self.shape[0], dtype=np.bool_) - to_keep[indexer] = False - - self.arrays = [self.arrays[i] for i in np.nonzero(to_keep)[0]] - self._axes = [self._axes[0], self._axes[1][to_keep]] - return self - - # -------------------------------------------------------------------- - # Array-wise Operation - - def grouped_reduce(self, func: Callable) -> Self: - """ - Apply grouped reduction function columnwise, returning a new ArrayManager. - - Parameters - ---------- - func : grouped reduction function - - Returns - ------- - ArrayManager - """ - result_arrays: list[np.ndarray] = [] - result_indices: list[int] = [] - - for i, arr in enumerate(self.arrays): - # grouped_reduce functions all expect 2D arrays - arr = ensure_block_shape(arr, ndim=2) - res = func(arr) - if res.ndim == 2: - # reverse of ensure_block_shape - assert res.shape[0] == 1 - res = res[0] - - result_arrays.append(res) - result_indices.append(i) - - if len(result_arrays) == 0: - nrows = 0 - else: - nrows = result_arrays[0].shape[0] - index = Index(range(nrows)) - - columns = self.items - - # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]"; - # expected "List[Union[ndarray, ExtensionArray]]" - return type(self)(result_arrays, [index, columns]) # type: ignore[arg-type] - - def reduce(self, func: Callable) -> Self: - """ - Apply reduction function column-wise, returning a single-row ArrayManager. - - Parameters - ---------- - func : reduction function - - Returns - ------- - ArrayManager - """ - result_arrays: list[np.ndarray] = [] - for i, arr in enumerate(self.arrays): - res = func(arr, axis=0) - - # TODO NaT doesn't preserve dtype, so we need to ensure to create - # a timedelta result array if original was timedelta - # what if datetime results in timedelta? (eg std) - dtype = arr.dtype if res is NaT else None - result_arrays.append( - sanitize_array([res], None, dtype=dtype) # type: ignore[arg-type] - ) - - index = Index._simple_new(np.array([None], dtype=object)) # placeholder - columns = self.items - - # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]"; - # expected "List[Union[ndarray, ExtensionArray]]" - new_mgr = type(self)(result_arrays, [index, columns]) # type: ignore[arg-type] - return new_mgr - - def operate_blockwise(self, other: ArrayManager, array_op) -> ArrayManager: - """ - Apply array_op blockwise with another (aligned) BlockManager. - """ - # TODO what if `other` is BlockManager ? - left_arrays = self.arrays - right_arrays = other.arrays - result_arrays = [ - array_op(left, right) for left, right in zip(left_arrays, right_arrays) - ] - return type(self)(result_arrays, self._axes) - - def quantile( - self, - *, - qs: Index, # with dtype float64 - transposed: bool = False, - interpolation: QuantileInterpolation = "linear", - ) -> ArrayManager: - arrs = [ensure_block_shape(x, 2) for x in self.arrays] - new_arrs = [ - quantile_compat(x, np.asarray(qs._values), interpolation) for x in arrs - ] - for i, arr in enumerate(new_arrs): - if arr.ndim == 2: - assert arr.shape[0] == 1, arr.shape - new_arrs[i] = arr[0] - - axes = [qs, self._axes[1]] - return type(self)(new_arrs, axes) - - # ---------------------------------------------------------------- - - def unstack(self, unstacker, fill_value) -> ArrayManager: - """ - Return a BlockManager with all blocks unstacked. - - Parameters - ---------- - unstacker : reshape._Unstacker - fill_value : Any - fill_value for newly introduced missing values. - - Returns - ------- - unstacked : BlockManager - """ - indexer, _ = unstacker._indexer_and_to_sort - if unstacker.mask.all(): - new_indexer = indexer - allow_fill = False - new_mask2D = None - needs_masking = None - else: - new_indexer = np.full(unstacker.mask.shape, -1) - new_indexer[unstacker.mask] = indexer - allow_fill = True - # calculating the full mask once and passing it to take_1d is faster - # than letting take_1d calculate it in each repeated call - new_mask2D = (~unstacker.mask).reshape(*unstacker.full_shape) - needs_masking = new_mask2D.any(axis=0) - new_indexer2D = new_indexer.reshape(*unstacker.full_shape) - new_indexer2D = ensure_platform_int(new_indexer2D) - - new_arrays = [] - for arr in self.arrays: - for i in range(unstacker.full_shape[1]): - if allow_fill: - # error: Value of type "Optional[Any]" is not indexable [index] - new_arr = take_1d( - arr, - new_indexer2D[:, i], - allow_fill=needs_masking[i], # type: ignore[index] - fill_value=fill_value, - mask=new_mask2D[:, i], # type: ignore[index] - ) - else: - new_arr = take_1d(arr, new_indexer2D[:, i], allow_fill=False) - new_arrays.append(new_arr) - - new_index = unstacker.new_index - new_columns = unstacker.get_new_columns(self._axes[1]) - new_axes = [new_index, new_columns] - - return type(self)(new_arrays, new_axes, verify_integrity=False) - - def as_array( - self, - dtype=None, - copy: bool = False, - na_value: object = lib.no_default, - ) -> np.ndarray: - """ - Convert the blockmanager data into an numpy array. - - Parameters - ---------- - dtype : object, default None - Data type of the return array. - copy : bool, default False - If True then guarantee that a copy is returned. A value of - False does not guarantee that the underlying data is not - copied. - na_value : object, default lib.no_default - Value to be used as the missing value sentinel. - - Returns - ------- - arr : ndarray - """ - if len(self.arrays) == 0: - empty_arr = np.empty(self.shape, dtype=float) - return empty_arr.transpose() - - # We want to copy when na_value is provided to avoid - # mutating the original object - copy = copy or na_value is not lib.no_default - - if not dtype: - dtype = interleaved_dtype([arr.dtype for arr in self.arrays]) - - dtype = ensure_np_dtype(dtype) - - result = np.empty(self.shape_proper, dtype=dtype) - - for i, arr in enumerate(self.arrays): - arr = arr.astype(dtype, copy=copy) - result[:, i] = arr - - if na_value is not lib.no_default: - result[isna(result)] = na_value - - return result - - @classmethod - def concat_horizontal(cls, mgrs: list[Self], axes: list[Index]) -> Self: - """ - Concatenate uniformly-indexed ArrayManagers horizontally. - """ - # concatting along the columns -> combine reindexed arrays in a single manager - arrays = list(itertools.chain.from_iterable([mgr.arrays for mgr in mgrs])) - new_mgr = cls(arrays, [axes[1], axes[0]], verify_integrity=False) - return new_mgr - - @classmethod - def concat_vertical(cls, mgrs: list[Self], axes: list[Index]) -> Self: - """ - Concatenate uniformly-indexed ArrayManagers vertically. - """ - # concatting along the rows -> concat the reindexed arrays - # TODO(ArrayManager) doesn't yet preserve the correct dtype - arrays = [ - concat_arrays([mgrs[i].arrays[j] for i in range(len(mgrs))]) - for j in range(len(mgrs[0].arrays)) - ] - new_mgr = cls(arrays, [axes[1], axes[0]], verify_integrity=False) - return new_mgr - - -class SingleArrayManager(BaseArrayManager, SingleDataManager): - __slots__ = [ - "_axes", # private attribute, because 'axes' has different order, see below - "arrays", - ] - - arrays: list[np.ndarray | ExtensionArray] - _axes: list[Index] - - @property - def ndim(self) -> Literal[1]: - return 1 - - def __init__( - self, - arrays: list[np.ndarray | ExtensionArray], - axes: list[Index], - verify_integrity: bool = True, - ) -> None: - self._axes = axes - self.arrays = arrays - - if verify_integrity: - assert len(axes) == 1 - assert len(arrays) == 1 - self._axes = [ensure_index(ax) for ax in self._axes] - arr = arrays[0] - arr = maybe_coerce_values(arr) - arr = extract_pandas_array(arr, None, 1)[0] - self.arrays = [arr] - self._verify_integrity() - - def _verify_integrity(self) -> None: - (n_rows,) = self.shape - assert len(self.arrays) == 1 - arr = self.arrays[0] - assert len(arr) == n_rows - if not arr.ndim == 1: - raise ValueError( - "Passed array should be 1-dimensional, got array with " - f"{arr.ndim} dimensions instead." - ) - - @staticmethod - def _normalize_axis(axis): - return axis - - def make_empty(self, axes=None) -> Self: - """Return an empty ArrayManager with index/array of length 0""" - if axes is None: - axes = [Index([], dtype=object)] - array: np.ndarray = np.array([], dtype=self.dtype) - return type(self)([array], axes) - - @classmethod - def from_array(cls, array, index) -> SingleArrayManager: - return cls([array], [index]) - - # error: Cannot override writeable attribute with read-only property - @property - def axes(self) -> list[Index]: # type: ignore[override] - return self._axes - - @property - def index(self) -> Index: - return self._axes[0] - - @property - def dtype(self): - return self.array.dtype - - def external_values(self): - """The array that Series.values returns""" - return external_values(self.array) - - def internal_values(self): - """The array that Series._values returns""" - return self.array - - def array_values(self): - """The array that Series.array returns""" - arr = self.array - if isinstance(arr, np.ndarray): - arr = NumpyExtensionArray(arr) - return arr - - @property - def _can_hold_na(self) -> bool: - if isinstance(self.array, np.ndarray): - return self.array.dtype.kind not in "iub" - else: - # ExtensionArray - return self.array._can_hold_na - - @property - def is_single_block(self) -> bool: - return True - - def fast_xs(self, loc: int) -> SingleArrayManager: - raise NotImplementedError("Use series._values[loc] instead") - - def get_slice(self, slobj: slice, axis: AxisInt = 0) -> SingleArrayManager: - if axis >= self.ndim: - raise IndexError("Requested axis not found in manager") - - new_array = self.array[slobj] - new_index = self.index._getitem_slice(slobj) - return type(self)([new_array], [new_index], verify_integrity=False) - - def get_rows_with_mask(self, indexer: npt.NDArray[np.bool_]) -> SingleArrayManager: - new_array = self.array[indexer] - new_index = self.index[indexer] - return type(self)([new_array], [new_index]) - - # error: Signature of "apply" incompatible with supertype "BaseArrayManager" - def apply(self, func, **kwargs) -> Self: # type: ignore[override] - if callable(func): - new_array = func(self.array, **kwargs) - else: - new_array = getattr(self.array, func)(**kwargs) - return type(self)([new_array], self._axes) - - def setitem(self, indexer, value, warn: bool = True) -> SingleArrayManager: - """ - Set values with indexer. - - For SingleArrayManager, this backs s[indexer] = value - - See `setitem_inplace` for a version that works inplace and doesn't - return a new Manager. - """ - if isinstance(indexer, np.ndarray) and indexer.ndim > self.ndim: - raise ValueError(f"Cannot set values with ndim > {self.ndim}") - return self.apply_with_block("setitem", indexer=indexer, value=value) - - def idelete(self, indexer) -> SingleArrayManager: - """ - Delete selected locations in-place (new array, same ArrayManager) - """ - to_keep = np.ones(self.shape[0], dtype=np.bool_) - to_keep[indexer] = False - - self.arrays = [self.arrays[0][to_keep]] - self._axes = [self._axes[0][to_keep]] - return self - - def _get_data_subset(self, predicate: Callable) -> SingleArrayManager: - # used in get_numeric_data / get_bool_data - if predicate(self.array): - return type(self)(self.arrays, self._axes, verify_integrity=False) - else: - return self.make_empty() - - def set_values(self, values: ArrayLike) -> None: - """ - Set (replace) the values of the SingleArrayManager in place. - - Use at your own risk! This does not check if the passed values are - valid for the current SingleArrayManager (length, dtype, etc). - """ - self.arrays[0] = values - - def to_2d_mgr(self, columns: Index) -> ArrayManager: - """ - Manager analogue of Series.to_frame - """ - arrays = [self.arrays[0]] - axes = [self.axes[0], columns] - - return ArrayManager(arrays, axes, verify_integrity=False) - - -class NullArrayProxy: - """ - Proxy object for an all-NA array. - - Only stores the length of the array, and not the dtype. The dtype - will only be known when actually concatenating (after determining the - common dtype, for which this proxy is ignored). - Using this object avoids that the internals/concat.py needs to determine - the proper dtype and array type. - """ - - ndim = 1 - - def __init__(self, n: int) -> None: - self.n = n - - @property - def shape(self) -> tuple[int]: - return (self.n,) - - def to_array(self, dtype: DtypeObj) -> ArrayLike: - """ - Helper function to create the actual all-NA array from the NullArrayProxy - object. - - Parameters - ---------- - arr : NullArrayProxy - dtype : the dtype for the resulting array - - Returns - ------- - np.ndarray or ExtensionArray - """ - if isinstance(dtype, ExtensionDtype): - empty = dtype.construct_array_type()._from_sequence([], dtype=dtype) - indexer = -np.ones(self.n, dtype=np.intp) - return empty.take(indexer, allow_fill=True) - else: - # when introducing missing values, int becomes float, bool becomes object - dtype = ensure_dtype_can_hold_na(dtype) - fill_value = na_value_for_dtype(dtype) - arr = np.empty(self.n, dtype=dtype) - arr.fill(fill_value) - return ensure_wrapped_if_datetimelike(arr) - - -def concat_arrays(to_concat: list) -> ArrayLike: - """ - Alternative for concat_compat but specialized for use in the ArrayManager. - - Differences: only deals with 1D arrays (no axis keyword), assumes - ensure_wrapped_if_datetimelike and does not skip empty arrays to determine - the dtype. - In addition ensures that all NullArrayProxies get replaced with actual - arrays. - - Parameters - ---------- - to_concat : list of arrays - - Returns - ------- - np.ndarray or ExtensionArray - """ - # ignore the all-NA proxies to determine the resulting dtype - to_concat_no_proxy = [x for x in to_concat if not isinstance(x, NullArrayProxy)] - - dtypes = {x.dtype for x in to_concat_no_proxy} - single_dtype = len(dtypes) == 1 - - if single_dtype: - target_dtype = to_concat_no_proxy[0].dtype - elif all(lib.is_np_dtype(x, "iub") for x in dtypes): - # GH#42092 - target_dtype = np_find_common_type(*dtypes) - else: - target_dtype = find_common_type([arr.dtype for arr in to_concat_no_proxy]) - - to_concat = [ - arr.to_array(target_dtype) - if isinstance(arr, NullArrayProxy) - else astype_array(arr, target_dtype, copy=False) - for arr in to_concat - ] - - if isinstance(to_concat[0], ExtensionArray): - cls = type(to_concat[0]) - return cls._concat_same_type(to_concat) - - result = np.concatenate(to_concat) - - # TODO decide on exact behaviour (we shouldn't do this only for empty result) - # see https://github.com/pandas-dev/pandas/issues/39817 - if len(result) == 0: - # all empties -> check for bool to not coerce to float - kinds = {obj.dtype.kind for obj in to_concat_no_proxy} - if len(kinds) != 1: - if "b" in kinds: - result = result.astype(object) - return result diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index 8f16a6623c8cb..d6d588d5e2492 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -1,6 +1,5 @@ """ -Base class for the internal managers. Both BlockManager and ArrayManager -inherit from this class. +Base class for the internal managers. BlockManager inherits from this class. """ from __future__ import annotations @@ -183,7 +182,7 @@ def fillna(self, value, limit: int | None, inplace: bool, downcast) -> Self: # Do this validation even if we go through one of the no-op paths limit = libalgos.validate_limit(None, limit=limit) - return self.apply_with_block( + return self.apply( "fillna", value=value, limit=limit, @@ -201,7 +200,7 @@ def where(self, other, cond, align: bool) -> Self: align_keys = ["cond"] other = extract_array(other, extract_numpy=True) - return self.apply_with_block( + return self.apply( "where", align_keys=align_keys, other=other, @@ -223,7 +222,7 @@ def putmask(self, mask, new, align: bool = True, warn: bool = True) -> Self: if not warn: already_warned.warned_already = True - return self.apply_with_block( + return self.apply( "putmask", align_keys=align_keys, mask=mask, @@ -234,7 +233,7 @@ def putmask(self, mask, new, align: bool = True, warn: bool = True) -> Self: @final def round(self, decimals: int, using_cow: bool = False) -> Self: - return self.apply_with_block( + return self.apply( "round", decimals=decimals, using_cow=using_cow, @@ -246,7 +245,7 @@ def replace(self, to_replace, value, inplace: bool) -> Self: # NDFrame.replace ensures the not-is_list_likes here assert not lib.is_list_like(to_replace) assert not lib.is_list_like(value) - return self.apply_with_block( + return self.apply( "replace", to_replace=to_replace, value=value, @@ -257,7 +256,7 @@ def replace(self, to_replace, value, inplace: bool) -> Self: @final def replace_regex(self, **kwargs) -> Self: - return self.apply_with_block( + return self.apply( "_replace_regex", **kwargs, using_cow=using_copy_on_write(), @@ -275,7 +274,7 @@ def replace_list( """do a list replace""" inplace = validate_bool_kwarg(inplace, "inplace") - bm = self.apply_with_block( + bm = self.apply( "replace_list", src_list=src_list, dest_list=dest_list, @@ -288,7 +287,7 @@ def replace_list( return bm def interpolate(self, inplace: bool, **kwargs) -> Self: - return self.apply_with_block( + return self.apply( "interpolate", inplace=inplace, **kwargs, @@ -297,7 +296,7 @@ def interpolate(self, inplace: bool, **kwargs) -> Self: ) def pad_or_backfill(self, inplace: bool, **kwargs) -> Self: - return self.apply_with_block( + return self.apply( "pad_or_backfill", inplace=inplace, **kwargs, @@ -309,7 +308,7 @@ def shift(self, periods: int, fill_value) -> Self: if fill_value is lib.no_default: fill_value = None - return self.apply_with_block("shift", periods=periods, fill_value=fill_value) + return self.apply("shift", periods=periods, fill_value=fill_value) # -------------------------------------------------------------------- # Consolidation: No-ops for all but BlockManager @@ -333,7 +332,7 @@ def ndim(self) -> Literal[1]: @property def array(self) -> ArrayLike: """ - Quick access to the backing array of the Block or SingleArrayManager. + Quick access to the backing array of the Block. """ # error: "SingleDataManager" has no attribute "arrays"; maybe "array" return self.arrays[0] # type: ignore[attr-defined] @@ -342,7 +341,7 @@ def setitem_inplace(self, indexer, value, warn: bool = True) -> None: """ Set values with indexer. - For Single[Block/Array]Manager, this backs s[indexer] = value + For SingleBlockManager, this backs s[indexer] = value This is an inplace version of `setitem()`, mutating the manager/values in place, not returning a new Manager (and Block), and thus never changing diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 010ac34b2966a..af16a4175319d 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -39,7 +39,6 @@ ) from pandas.core.construction import ensure_wrapped_if_datetimelike -from pandas.core.internals.array_manager import ArrayManager from pandas.core.internals.blocks import ( ensure_block_shape, new_block_2d, @@ -56,7 +55,6 @@ ArrayLike, AxisInt, DtypeObj, - Manager2D, Shape, ) @@ -67,33 +65,9 @@ ) -def _concatenate_array_managers( - mgrs: list[ArrayManager], axes: list[Index], concat_axis: AxisInt -) -> Manager2D: - """ - Concatenate array managers into one. - - Parameters - ---------- - mgrs_indexers : list of (ArrayManager, {axis: indexer,...}) tuples - axes : list of Index - concat_axis : int - - Returns - ------- - ArrayManager - """ - if concat_axis == 1: - return mgrs[0].concat_vertical(mgrs, axes) - else: - # concatting along the columns -> combine reindexed arrays in a single manager - assert concat_axis == 0 - return mgrs[0].concat_horizontal(mgrs, axes) - - def concatenate_managers( mgrs_indexers, axes: list[Index], concat_axis: AxisInt, copy: bool -) -> Manager2D: +) -> BlockManager: """ Concatenate block managers into one. @@ -111,18 +85,6 @@ def concatenate_managers( needs_copy = copy and concat_axis == 0 - # TODO(ArrayManager) this assumes that all managers are of the same type - if isinstance(mgrs_indexers[0][0], ArrayManager): - mgrs = _maybe_reindex_columns_na_proxy(axes, mgrs_indexers, needs_copy) - # error: Argument 1 to "_concatenate_array_managers" has incompatible - # type "List[BlockManager]"; expected "List[Union[ArrayManager, - # SingleArrayManager, BlockManager, SingleBlockManager]]" - return _concatenate_array_managers( - mgrs, # type: ignore[arg-type] - axes, - concat_axis, - ) - # Assertions disabled for performance # for tup in mgrs_indexers: # # caller is responsible for ensuring this diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 609d2c9a7a285..ff5e8e35f92ab 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -46,7 +46,6 @@ from pandas.core.arrays.string_ import StringDtype from pandas.core.construction import ( array as pd_array, - ensure_wrapped_if_datetimelike, extract_array, range_to_ndarray, sanitize_array, @@ -60,10 +59,6 @@ get_objs_combined_axis, union_indexes, ) -from pandas.core.internals.array_manager import ( - ArrayManager, - SingleArrayManager, -) from pandas.core.internals.blocks import ( BlockPlacement, ensure_block_shape, @@ -71,8 +66,6 @@ new_block_2d, ) from pandas.core.internals.managers import ( - BlockManager, - SingleBlockManager, create_block_manager_from_blocks, create_block_manager_from_column_arrays, ) @@ -100,7 +93,6 @@ def arrays_to_mgr( *, dtype: DtypeObj | None = None, verify_integrity: bool = True, - typ: str | None = None, consolidate: bool = True, ) -> Manager: """ @@ -148,14 +140,9 @@ def arrays_to_mgr( # from BlockManager perspective axes = [columns, index] - if typ == "block": - return create_block_manager_from_column_arrays( - arrays, axes, consolidate=consolidate, refs=refs - ) - elif typ == "array": - return ArrayManager(arrays, [index, columns]) - else: - raise ValueError(f"'typ' needs to be one of {{'block', 'array'}}, got '{typ}'") + return create_block_manager_from_column_arrays( + arrays, axes, consolidate=consolidate, refs=refs + ) def rec_array_to_mgr( @@ -164,7 +151,6 @@ def rec_array_to_mgr( columns, dtype: DtypeObj | None, copy: bool, - typ: str, ) -> Manager: """ Extract from a masked rec array and create the manager. @@ -186,56 +172,19 @@ def rec_array_to_mgr( if columns is None: columns = arr_columns - mgr = arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ) + mgr = arrays_to_mgr(arrays, columns, index, dtype=dtype) if copy: mgr = mgr.copy() return mgr -def mgr_to_mgr(mgr, typ: str, copy: bool = True) -> Manager: - """ - Convert to specific type of Manager. Does not copy if the type is already - correct. Does not guarantee a copy otherwise. `copy` keyword only controls - whether conversion from Block->ArrayManager copies the 1D arrays. - """ - new_mgr: Manager - - if typ == "block": - if isinstance(mgr, BlockManager): - new_mgr = mgr - else: - if mgr.ndim == 2: - new_mgr = arrays_to_mgr( - mgr.arrays, mgr.axes[0], mgr.axes[1], typ="block" - ) - else: - new_mgr = SingleBlockManager.from_array(mgr.arrays[0], mgr.index) - elif typ == "array": - if isinstance(mgr, ArrayManager): - new_mgr = mgr - else: - if mgr.ndim == 2: - arrays = [mgr.iget_values(i) for i in range(len(mgr.axes[0]))] - if copy: - arrays = [arr.copy() for arr in arrays] - new_mgr = ArrayManager(arrays, [mgr.axes[1], mgr.axes[0]]) - else: - array = mgr.internal_values() - if copy: - array = array.copy() - new_mgr = SingleArrayManager([array], [mgr.index]) - else: - raise ValueError(f"'typ' needs to be one of {{'block', 'array'}}, got '{typ}'") - return new_mgr - - # --------------------------------------------------------------------- # DataFrame Constructor Interface def ndarray_to_mgr( - values, index, columns, dtype: DtypeObj | None, copy: bool, typ: str + values, index, columns, dtype: DtypeObj | None, copy: bool ) -> Manager: # used in DataFrame.__init__ # input must be a ndarray, list, Series, Index, ExtensionArray @@ -253,10 +202,6 @@ def ndarray_to_mgr( if not len(values) and columns is not None and len(columns): values = np.empty((0, 1), dtype=object) - # if the array preparation does a copy -> avoid this for ArrayManager, - # since the copy is done on conversion to 1D arrays - copy_on_sanitize = False if typ == "array" else copy - vdtype = getattr(values, "dtype", None) refs = None if is_1d_only_ea_dtype(vdtype) or is_1d_only_ea_dtype(dtype): @@ -279,7 +224,7 @@ def ndarray_to_mgr( else: columns = ensure_index(columns) - return arrays_to_mgr(values, columns, index, dtype=dtype, typ=typ) + return arrays_to_mgr(values, columns, index, dtype=dtype) elif isinstance(vdtype, ExtensionDtype): # i.e. Datetime64TZ, PeriodDtype; cases with is_1d_only_ea_dtype(vdtype) @@ -291,12 +236,10 @@ def ndarray_to_mgr( values = values.reshape(-1, 1) elif isinstance(values, (ABCSeries, Index)): - if not copy_on_sanitize and ( - dtype is None or astype_is_view(values.dtype, dtype) - ): + if not copy and (dtype is None or astype_is_view(values.dtype, dtype)): refs = values._references - if copy_on_sanitize: + if copy: values = values._values.copy() else: values = values._values @@ -306,9 +249,7 @@ def ndarray_to_mgr( elif isinstance(values, (np.ndarray, ExtensionArray)): # drop subclass info _copy = ( - copy_on_sanitize - if (dtype is None or astype_is_view(values.dtype, dtype)) - else False + copy if (dtype is None or astype_is_view(values.dtype, dtype)) else False ) values = np.array(values, copy=_copy) values = _ensure_2d(values) @@ -316,7 +257,7 @@ def ndarray_to_mgr( else: # by definition an array here # the dtypes will be coerced to a single dtype - values = _prep_ndarraylike(values, copy=copy_on_sanitize) + values = _prep_ndarraylike(values, copy=copy) if dtype is not None and values.dtype != dtype: # GH#40110 see similar check inside sanitize_array @@ -324,7 +265,7 @@ def ndarray_to_mgr( values, None, dtype=dtype, - copy=copy_on_sanitize, + copy=copy, allow_2d=True, ) @@ -335,27 +276,6 @@ def ndarray_to_mgr( _check_values_indices_shape_match(values, index, columns) - if typ == "array": - if issubclass(values.dtype.type, str): - values = np.array(values, dtype=object) - - if dtype is None and is_object_dtype(values.dtype): - arrays = [ - ensure_wrapped_if_datetimelike( - maybe_infer_to_datetimelike(values[:, i]) - ) - for i in range(values.shape[1]) - ] - else: - if lib.is_np_dtype(values.dtype, "mM"): - values = ensure_wrapped_if_datetimelike(values) - arrays = [values[:, i] for i in range(values.shape[1])] - - if copy: - arrays = [arr.copy() for arr in arrays] - - return ArrayManager(arrays, [index, columns], verify_integrity=False) - values = values.T # if we don't have a dtype specified, then try to convert objects @@ -426,7 +346,6 @@ def dict_to_mgr( columns, *, dtype: DtypeObj | None = None, - typ: str = "block", copy: bool = True, ) -> Manager: """ @@ -481,26 +400,22 @@ def dict_to_mgr( arrays = [com.maybe_iterable_to_list(data[k]) for k in keys] if copy: - if typ == "block": - # We only need to copy arrays that will not get consolidated, i.e. - # only EA arrays - arrays = [ - x.copy() - if isinstance(x, ExtensionArray) - else x.copy(deep=True) - if ( - isinstance(x, Index) - or isinstance(x, ABCSeries) - and is_1d_only_ea_dtype(x.dtype) - ) - else x - for x in arrays - ] - else: - # dtype check to exclude e.g. range objects, scalars - arrays = [x.copy() if hasattr(x, "dtype") else x for x in arrays] + # We only need to copy arrays that will not get consolidated, i.e. + # only EA arrays + arrays = [ + x.copy() + if isinstance(x, ExtensionArray) + else x.copy(deep=True) + if ( + isinstance(x, Index) + or isinstance(x, ABCSeries) + and is_1d_only_ea_dtype(x.dtype) + ) + else x + for x in arrays + ] - return arrays_to_mgr(arrays, columns, index, dtype=dtype, typ=typ, consolidate=copy) + return arrays_to_mgr(arrays, columns, index, dtype=dtype, consolidate=copy) def nested_data_to_arrays( diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index d08dee3663395..c6a8b61e0c51e 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -367,9 +367,6 @@ def apply( out = type(self).from_blocks(result_blocks, self.axes) return out - # Alias so we can share code with ArrayManager - apply_with_block = apply - def setitem(self, indexer, value, warn: bool = True) -> Self: """ Set values with indexer. @@ -2025,7 +2022,7 @@ def setitem_inplace(self, indexer, value, warn: bool = True) -> None: """ Set values with indexer. - For Single[Block/Array]Manager, this backs s[indexer] = value + For SingleBlockManager, this backs s[indexer] = value This is an inplace version of `setitem()`, mutating the manager/values in place, not returning a new Manager (and Block), and thus never changing diff --git a/pandas/core/series.py b/pandas/core/series.py index 3fb9b06bf905a..71b888448618c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -30,7 +30,6 @@ using_copy_on_write, warn_copy_on_write, ) -from pandas._config.config import _get_option from pandas._libs import ( lib, @@ -150,10 +149,7 @@ check_bool_indexer, check_dict_or_set_indexers, ) -from pandas.core.internals import ( - SingleArrayManager, - SingleBlockManager, -) +from pandas.core.internals import SingleBlockManager from pandas.core.methods import selectn from pandas.core.shared_docs import _shared_docs from pandas.core.sorting import ( @@ -200,7 +196,6 @@ Renamer, Scalar, Self, - SingleManager, SortKind, StorageOptions, Suffixes, @@ -385,7 +380,7 @@ class Series(base.IndexOpsMixin, NDFrame): # type: ignore[misc] base.IndexOpsMixin.hasnans.fget, # type: ignore[attr-defined] doc=base.IndexOpsMixin.hasnans.__doc__, ) - _mgr: SingleManager + _mgr: SingleBlockManager # ---------------------------------------------------------------------- # Constructors @@ -411,7 +406,7 @@ def __init__( allow_mgr = False if ( - isinstance(data, (SingleBlockManager, SingleArrayManager)) + isinstance(data, SingleBlockManager) and index is None and dtype is None and (copy is False or copy is None) @@ -450,12 +445,8 @@ def __init__( # we are called internally, so short-circuit if fastpath: # data is a ndarray, index is defined - if not isinstance(data, (SingleBlockManager, SingleArrayManager)): - manager = _get_option("mode.data_manager", silent=True) - if manager == "block": - data = SingleBlockManager.from_array(data, index) - elif manager == "array": - data = SingleArrayManager.from_array(data, index) + if not isinstance(data, SingleBlockManager): + data = SingleBlockManager.from_array(data, index) allow_mgr = True elif using_copy_on_write() and not copy: data = data.copy(deep=False) @@ -541,7 +532,7 @@ def __init__( data, index = self._init_dict(data, index, dtype) dtype = None copy = False - elif isinstance(data, (SingleBlockManager, SingleArrayManager)): + elif isinstance(data, SingleBlockManager): if index is None: index = data.index elif not data.index.equals(index) or copy: @@ -579,19 +570,14 @@ def __init__( com.require_length_match(data, index) # create/copy the manager - if isinstance(data, (SingleBlockManager, SingleArrayManager)): + if isinstance(data, SingleBlockManager): if dtype is not None: data = data.astype(dtype=dtype, errors="ignore", copy=copy) elif copy: data = data.copy() else: data = sanitize_array(data, index, dtype, copy) - - manager = _get_option("mode.data_manager", silent=True) - if manager == "block": - data = SingleBlockManager.from_array(data, index, refs=refs) - elif manager == "array": - data = SingleArrayManager.from_array(data, index) + data = SingleBlockManager.from_array(data, index, refs=refs) NDFrame.__init__(self, data) self.name = name @@ -863,9 +849,7 @@ def _values(self): return self._mgr.internal_values() @property - def _references(self) -> BlockValuesRefs | None: - if isinstance(self._mgr, SingleArrayManager): - return None + def _references(self) -> BlockValuesRefs: return self._mgr._block.refs # error: Decorated property not supported @@ -969,7 +953,6 @@ def view(self, dtype: Dtype | None = None) -> Series: res_ser = self._constructor(res_values, index=self.index, copy=False) if isinstance(res_ser._mgr, SingleBlockManager): blk = res_ser._mgr._block - blk.refs = cast("BlockValuesRefs", self._references) blk.refs.add_reference(blk) return res_ser.__finalize__(self, method="view") @@ -1205,7 +1188,7 @@ def _get_values_tuple(self, key: tuple): indexer, new_index = self.index.get_loc_level(key) new_ser = self._constructor(self._values[indexer], index=new_index, copy=False) if isinstance(indexer, slice): - new_ser._mgr.add_references(self._mgr) # type: ignore[arg-type] + new_ser._mgr.add_references(self._mgr) return new_ser.__finalize__(self) def _get_rows_with_mask(self, indexer: npt.NDArray[np.bool_]) -> Series: @@ -1247,7 +1230,7 @@ def _get_value(self, label, takeable: bool = False): new_values, index=new_index, name=self.name, copy=False ) if isinstance(loc, slice): - new_ser._mgr.add_references(self._mgr) # type: ignore[arg-type] + new_ser._mgr.add_references(self._mgr) return new_ser.__finalize__(self) else: @@ -1270,7 +1253,7 @@ def __setitem__(self, key, value) -> None: warn_copy_on_write() or ( not warn_copy_on_write() - and self._mgr.blocks[0].refs.has_reference() # type: ignore[union-attr] + and self._mgr.blocks[0].refs.has_reference() ) ): warn = False diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 6c679f7daa746..263128834783d 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -13,7 +13,6 @@ from warnings import catch_warnings from pandas._config import using_pyarrow_string_dtype -from pandas._config.config import _get_option from pandas._libs import lib from pandas.compat._optional import import_optional_dependency @@ -260,10 +259,6 @@ def read( elif using_pyarrow_string_dtype(): to_pandas_kwargs["types_mapper"] = arrow_string_types_mapper() - manager = _get_option("mode.data_manager", silent=True) - if manager == "array": - to_pandas_kwargs["split_blocks"] = True # type: ignore[assignment] - path_or_handle, handles, filesystem = _get_path_or_handle( path, filesystem, @@ -280,9 +275,6 @@ def read( ) result = pa_table.to_pandas(**to_pandas_kwargs) - if manager == "array": - result = result._as_manager("array", copy=False) - if pa_table.schema.metadata: if b"PANDAS_ATTRS" in pa_table.schema.metadata: df_metadata = pa_table.schema.metadata[b"PANDAS_ATTRS"] diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index c558a31bc90ee..1e11a9783f0e1 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -92,10 +92,6 @@ ) from pandas.core.construction import extract_array from pandas.core.indexes.api import ensure_index -from pandas.core.internals import ( - ArrayManager, - BlockManager, -) from pandas.io.common import stringify_path from pandas.io.formats.printing import ( @@ -3311,10 +3307,6 @@ def read( def write(self, obj, **kwargs) -> None: super().write(obj, **kwargs) - # TODO(ArrayManager) HDFStore relies on accessing the blocks - if isinstance(obj._mgr, ArrayManager): - obj = obj._as_manager("block") - data = obj._mgr if not data.is_consolidated(): data = data.consolidate() @@ -4123,16 +4115,10 @@ def _get_blocks_and_items( data_columns, ): # Helper to clarify non-state-altering parts of _create_axes - - # TODO(ArrayManager) HDFStore relies on accessing the blocks - if isinstance(frame._mgr, ArrayManager): - frame = frame._as_manager("block") - def get_blk_items(mgr): return [mgr.items.take(blk.mgr_locs) for blk in mgr.blocks] mgr = frame._mgr - mgr = cast(BlockManager, mgr) blocks: list[Block] = list(mgr.blocks) blk_items: list[Index] = get_blk_items(mgr) @@ -4144,7 +4130,6 @@ def get_blk_items(mgr): axis, axis_labels = new_non_index_axes[0] new_labels = Index(axis_labels).difference(Index(data_columns)) mgr = frame.reindex(new_labels, axis=axis)._mgr - mgr = cast(BlockManager, mgr) blocks = list(mgr.blocks) blk_items = get_blk_items(mgr) @@ -4153,7 +4138,6 @@ def get_blk_items(mgr): # index, so we can infer that (as long as axis==1) we # get a single column back, so a single block. mgr = frame.reindex([c], axis=axis)._mgr - mgr = cast(BlockManager, mgr) blocks.extend(mgr.blocks) blk_items.extend(get_blk_items(mgr)) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index fd1ad7322d6ca..ba303ab48aad4 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1853,10 +1853,6 @@ def _do_convert_missing(self, data: DataFrame, convert_missing: bool) -> DataFra if dtype not in (np.float32, np.float64): dtype = np.float64 replacement = Series(series, dtype=dtype) - if not replacement._values.flags["WRITEABLE"]: - # only relevant for ArrayManager; construction - # path for BlockManager ensures writeability - replacement = replacement.copy() # Note: operating on ._values is much faster than directly # TODO: can we fix that? replacement._values[missing] = np.nan diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 0839f005305a5..66a43f2ba4bcd 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1507,8 +1507,6 @@ def func(row): tm.assert_frame_equal(result, expected) if using_copy_on_write: # INFO(CoW) With copy on write, mutating a viewing row doesn't mutate the parent - # INFO(ArrayManager) With BlockManager, the row is a view and mutated in place, - # with ArrayManager the row is not a view, and thus not mutated in place tm.assert_frame_equal(df, df_orig) else: tm.assert_frame_equal(df, result) diff --git a/pandas/tests/copy_view/test_indexing.py b/pandas/tests/copy_view/test_indexing.py index 72cb410c9068d..c35a0b89585c3 100644 --- a/pandas/tests/copy_view/test_indexing.py +++ b/pandas/tests/copy_view/test_indexing.py @@ -215,7 +215,7 @@ def test_subset_loc_rows_columns( subset = df.loc[row_indexer, column_indexer] # a few corner cases _do_ actually modify the parent (with both row and column - # slice, and in case of ArrayManager or BlockManager with single block) + # slice, and in case of BlockManager with single block) mutate_parent = ( isinstance(row_indexer, slice) and isinstance(column_indexer, slice) @@ -271,7 +271,7 @@ def test_subset_iloc_rows_columns( subset = df.iloc[row_indexer, column_indexer] # a few corner cases _do_ actually modify the parent (with both row and column - # slice, and in case of ArrayManager or BlockManager with single block) + # slice, and in case of BlockManager with single block) mutate_parent = ( isinstance(row_indexer, slice) and isinstance(column_indexer, slice) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 7ee2c23c5b23a..f8bb0c92cf59e 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -414,7 +414,6 @@ def test_setitem_frame_2d_values(self, data): tm.assert_frame_equal(df, orig) if not using_copy_on_write: # GH#33457 Check that this setting occurred in-place - # FIXME(ArrayManager): this should work there too assert df._mgr.arrays[0] is blk_data df.iloc[:-1] = df.values[:-1] diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py index c5b1295ee4a7d..77f1dd2a8e49c 100644 --- a/pandas/tests/extension/conftest.py +++ b/pandas/tests/extension/conftest.py @@ -2,8 +2,6 @@ import pytest -from pandas._config.config import _get_option - from pandas import ( Series, options, @@ -224,7 +222,4 @@ def using_copy_on_write() -> bool: """ Fixture to check if Copy-on-Write is enabled. """ - return ( - options.mode.copy_on_write is True - and _get_option("mode.data_manager", silent=True) == "block" - ) + return options.mode.copy_on_write is True diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index e7f2c410bf4ac..84e642f47417b 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -553,27 +553,11 @@ def test_astype_to_incorrect_datetimelike(self, unit): other = f"m8[{unit}]" df = DataFrame(np.array([[1, 2, 3]], dtype=dtype)) - msg = "|".join( - [ - # BlockManager path - rf"Cannot cast DatetimeArray to dtype timedelta64\[{unit}\]", - # ArrayManager path - "cannot astype a datetimelike from " - rf"\[datetime64\[ns\]\] to \[timedelta64\[{unit}\]\]", - ] - ) + msg = rf"Cannot cast DatetimeArray to dtype timedelta64\[{unit}\]" with pytest.raises(TypeError, match=msg): df.astype(other) - msg = "|".join( - [ - # BlockManager path - rf"Cannot cast TimedeltaArray to dtype datetime64\[{unit}\]", - # ArrayManager path - "cannot astype a timedelta from " - rf"\[timedelta64\[ns\]\] to \[datetime64\[{unit}\]\]", - ] - ) + msg = rf"Cannot cast TimedeltaArray to dtype datetime64\[{unit}\]" df = DataFrame(np.array([[1, 2, 3]], dtype=other)) with pytest.raises(TypeError, match=msg): df.astype(dtype) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index c66b6a0f8b99b..9ff2b52bd35ff 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -311,8 +311,6 @@ def test_constructor_dtype_nocast_view_2d_array( should_be_view.iloc[0, 0] = 97 assert df.values[0, 0] == 97 else: - # INFO(ArrayManager) DataFrame(ndarray) doesn't necessarily preserve - # a view on the array to ensure contiguous 1D arrays df2 = DataFrame(df.values, dtype=df[0].dtype) assert df2._mgr.arrays[0].flags.c_contiguous @@ -2504,8 +2502,6 @@ def get_base(obj): raise TypeError def check_views(c_only: bool = False): - # written to work for either BlockManager or ArrayManager - # Check that the underlying data behind df["c"] is still `c` # after setting with iloc. Since we don't know which entry in # df._mgr.arrays corresponds to df["c"], we just check that exactly diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index fa233619ad3a3..63c15fab76562 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1180,10 +1180,7 @@ def test_idxmax_axis_2(self, float_frame): def test_idxmax_mixed_dtype(self): # don't cast to object, which would raise in nanops dti = date_range("2016-01-01", periods=3) - - # Copying dti is needed for ArrayManager otherwise when we set - # df.loc[0, 3] = pd.NaT below it edits dti - df = DataFrame({1: [0, 2, 1], 2: range(3)[::-1], 3: dti.copy(deep=True)}) + df = DataFrame({1: [0, 2, 1], 2: range(3)[::-1], 3: dti}) result = df.idxmax() expected = Series([1, 0, 2], index=[1, 2, 3]) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 5eeaa50e2c3b6..5acfb72c4a666 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -51,7 +51,6 @@ def test_slice_consolidate_invalidate_item_cache(self, using_copy_on_write): if not using_copy_on_write: tm.assert_almost_equal(df["bb"][0], 0.17) else: - # with ArrayManager, parent is not mutated with chained assignment tm.assert_almost_equal(df["bb"][0], 2.2) @pytest.mark.parametrize("do_ref", [True, False]) diff --git a/pandas/tests/internals/test_api.py b/pandas/tests/internals/test_api.py index 1251a6ae97a1c..ccd7222fb16e1 100644 --- a/pandas/tests/internals/test_api.py +++ b/pandas/tests/internals/test_api.py @@ -23,7 +23,6 @@ def test_namespace(): "concat", "managers", "construction", - "array_manager", "base", "api", "ops", @@ -31,11 +30,9 @@ def test_namespace(): expected = [ "make_block", "DataManager", - "ArrayManager", "BlockManager", "SingleDataManager", "SingleBlockManager", - "SingleArrayManager", "concatenate_managers", ] diff --git a/pandas/tests/internals/test_managers.py b/pandas/tests/internals/test_managers.py deleted file mode 100644 index f40362c299717..0000000000000 --- a/pandas/tests/internals/test_managers.py +++ /dev/null @@ -1,103 +0,0 @@ -""" -Testing interaction between the different managers (BlockManager, ArrayManager) -""" -import os -import subprocess -import sys - -import pytest - -from pandas.core.dtypes.missing import array_equivalent - -import pandas as pd -import pandas._testing as tm -from pandas.core.internals import ( - ArrayManager, - BlockManager, - SingleArrayManager, - SingleBlockManager, -) - - -def test_dataframe_creation(): - msg = "data_manager option is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - with pd.option_context("mode.data_manager", "block"): - df_block = pd.DataFrame( - {"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]} - ) - assert isinstance(df_block._mgr, BlockManager) - - with tm.assert_produces_warning(FutureWarning, match=msg): - with pd.option_context("mode.data_manager", "array"): - df_array = pd.DataFrame( - {"a": [1, 2, 3], "b": [0.1, 0.2, 0.3], "c": [4, 5, 6]} - ) - assert isinstance(df_array._mgr, ArrayManager) - - # also ensure both are seen as equal - tm.assert_frame_equal(df_block, df_array) - - # conversion from one manager to the other - result = df_block._as_manager("block") - assert isinstance(result._mgr, BlockManager) - result = df_block._as_manager("array") - assert isinstance(result._mgr, ArrayManager) - tm.assert_frame_equal(result, df_block) - assert all( - array_equivalent(left, right) - for left, right in zip(result._mgr.arrays, df_array._mgr.arrays) - ) - - result = df_array._as_manager("array") - assert isinstance(result._mgr, ArrayManager) - result = df_array._as_manager("block") - assert isinstance(result._mgr, BlockManager) - tm.assert_frame_equal(result, df_array) - assert len(result._mgr.blocks) == 2 - - -def test_series_creation(): - msg = "data_manager option is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - with pd.option_context("mode.data_manager", "block"): - s_block = pd.Series([1, 2, 3], name="A", index=["a", "b", "c"]) - assert isinstance(s_block._mgr, SingleBlockManager) - - with tm.assert_produces_warning(FutureWarning, match=msg): - with pd.option_context("mode.data_manager", "array"): - s_array = pd.Series([1, 2, 3], name="A", index=["a", "b", "c"]) - assert isinstance(s_array._mgr, SingleArrayManager) - - # also ensure both are seen as equal - tm.assert_series_equal(s_block, s_array) - - # conversion from one manager to the other - result = s_block._as_manager("block") - assert isinstance(result._mgr, SingleBlockManager) - result = s_block._as_manager("array") - assert isinstance(result._mgr, SingleArrayManager) - tm.assert_series_equal(result, s_block) - - result = s_array._as_manager("array") - assert isinstance(result._mgr, SingleArrayManager) - result = s_array._as_manager("block") - assert isinstance(result._mgr, SingleBlockManager) - tm.assert_series_equal(result, s_array) - - -@pytest.mark.single_cpu -@pytest.mark.parametrize("manager", ["block", "array"]) -def test_array_manager_depr_env_var(manager): - # GH#55043 - test_env = os.environ.copy() - test_env["PANDAS_DATA_MANAGER"] = manager - response = subprocess.run( - [sys.executable, "-c", "import pandas"], - capture_output=True, - env=test_env, - check=True, - ) - msg = "FutureWarning: The env variable PANDAS_DATA_MANAGER is set" - stderr_msg = response.stderr.decode("utf-8") - assert msg in stderr_msg, stderr_msg diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 83a962ec26a7e..bba53f7ff50a8 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -9,7 +9,6 @@ import pytest from pandas._config import using_copy_on_write -from pandas._config.config import _get_option from pandas.compat import is_platform_windows from pandas.compat.pyarrow import ( @@ -45,8 +44,6 @@ _HAVE_FASTPARQUET = False -# TODO(ArrayManager) fastparquet relies on BlockManager internals - pytestmark = [ pytest.mark.filterwarnings("ignore:DataFrame._data is deprecated:FutureWarning"), pytest.mark.filterwarnings( @@ -61,9 +58,8 @@ pytest.param( "fastparquet", marks=pytest.mark.skipif( - not _HAVE_FASTPARQUET - or _get_option("mode.data_manager", silent=True) == "array", - reason="fastparquet is not installed or ArrayManager is used", + not _HAVE_FASTPARQUET, + reason="fastparquet is not installed", ), ), pytest.param( @@ -89,8 +85,6 @@ def pa(): def fp(): if not _HAVE_FASTPARQUET: pytest.skip("fastparquet is not installed") - elif _get_option("mode.data_manager", silent=True) == "array": - pytest.skip("ArrayManager is not supported with fastparquet") return "fastparquet" @@ -997,17 +991,6 @@ def test_filter_row_groups(self, pa): result = read_parquet(path, pa, filters=[("a", "==", 0)]) assert len(result) == 1 - def test_read_parquet_manager(self, pa): - # ensure that read_parquet honors the pandas.options.mode.data_manager option - df = pd.DataFrame( - np.random.default_rng(2).standard_normal((10, 3)), columns=["A", "B", "C"] - ) - - with tm.ensure_clean() as path: - df.to_parquet(path, engine=pa) - result = read_parquet(path, pa) - assert isinstance(result._mgr, pd.core.internals.BlockManager) - def test_read_dtype_backend_pyarrow_config(self, pa, df_full): import pyarrow diff --git a/requirements-dev.txt b/requirements-dev.txt index 13691d114ece4..162e6caebcd8a 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,7 +3,7 @@ pip versioneer[toml] -cython~==3.0.5 +cython~=3.0.5 meson[ninja]==1.2.1 meson-python==0.13.1 pytest>=7.3.2 diff --git a/scripts/generate_pip_deps_from_conda.py b/scripts/generate_pip_deps_from_conda.py index 5fcf09cd073fe..d54d35bc0171f 100755 --- a/scripts/generate_pip_deps_from_conda.py +++ b/scripts/generate_pip_deps_from_conda.py @@ -45,7 +45,7 @@ def conda_package_to_pip(package: str): - A package requiring a specific version, in conda is defined with a single equal (e.g. ``pandas=1.0``) and in pip with two (e.g. ``pandas==1.0``) """ - package = re.sub("(?<=[^<>])=", "==", package).strip() + package = re.sub("(?<=[^<>~])=", "==", package).strip() for compare in ("<=", ">=", "=="): if compare in package: diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 698268ce382f8..6f36f7f47cb88 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -56,7 +56,7 @@ "_version_meson", # The numba extensions need this to mock the iloc object "_iLocIndexer", - # TODO(3.0): GH#55043 - remove upon removal of ArrayManager + # TODO(4.0): GH#55043 - remove upon removal of CoW option "_get_option", "_fill_limit_area_1d", }