diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 69db055fe87..3cc42dbe982 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -2055,18 +2055,28 @@ def _make_operands_and_index_for_binop( dict[str | None, tuple[ColumnBase, Any, bool, Any]] | NotImplementedType, BaseIndex | None, - bool, + dict[str, Any], ]: lhs, rhs = self._data, other index = self.index fill_requires_key = False left_default: Any = False equal_columns = False - can_use_self_column_name = True + ca_attributes: dict[str, Any] = {} + + def _fill_same_ca_attributes( + attrs: dict[str, Any], ca: ColumnAccessor + ) -> dict[str, Any]: + attrs["rangeindex"] = ca.rangeindex + attrs["multiindex"] = ca.multiindex + attrs["label_dtype"] = ca.label_dtype + attrs["level_names"] = ca.level_names + return attrs if _is_scalar_or_zero_d_array(other): rhs = {name: other for name in self._data} equal_columns = True + ca_attributes = _fill_same_ca_attributes(ca_attributes, self._data) elif isinstance(other, Series): if ( not (self_pd_columns := self._data.to_pandas_index).equals( @@ -2085,9 +2095,12 @@ def _make_operands_and_index_for_binop( # NULL!) and the right value (result is NaN). left_default = as_column(np.nan, length=len(self)) equal_columns = other_pd_index.equals(self_pd_columns) - can_use_self_column_name = ( - equal_columns or other_pd_index.names == self_pd_columns.names - ) + if equal_columns: + ca_attributes = _fill_same_ca_attributes( + ca_attributes, self._data + ) + elif other_pd_index.names == self_pd_columns.names: + ca_attributes["level_names"] = self._data.level_names elif isinstance(other, DataFrame): if ( not can_reindex @@ -2110,17 +2123,19 @@ def _make_operands_and_index_for_binop( # the fill value. left_default = fill_value equal_columns = self._column_names == other._column_names - can_use_self_column_name = ( - equal_columns - or self._data._level_names == other._data._level_names - ) + if self._data.to_pandas_index.equals(other._data.to_pandas_index): + ca_attributes = _fill_same_ca_attributes( + ca_attributes, self._data + ) + elif self._data._level_names == other._data._level_names: + ca_attributes["level_names"] = self._data.level_names elif isinstance(other, (dict, abc.Mapping)): # Need to fail early on host mapping types because we ultimately # convert everything to a dict. - return NotImplemented, None, True + return NotImplemented, None, ca_attributes if not isinstance(rhs, (dict, abc.Mapping)): - return NotImplemented, None, True + return NotImplemented, None, ca_attributes operands = { k: ( @@ -2150,8 +2165,8 @@ def _make_operands_and_index_for_binop( raise ValueError("other must be a DataFrame or Series.") sorted_dict = {key: operands[key] for key in column_names_list} - return sorted_dict, index, can_use_self_column_name - return operands, index, can_use_self_column_name + return sorted_dict, index, ca_attributes + return operands, index, ca_attributes @classmethod @_performance_tracking diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py index 9d426ad6bf7..8a625dc9225 100644 --- a/python/cudf/cudf/core/indexed_frame.py +++ b/python/cudf/cudf/core/indexed_frame.py @@ -4888,20 +4888,16 @@ def _binaryop( ( operands, out_index, - can_use_self_column_name, + ca_attributes, ) = self._make_operands_and_index_for_binop( other, op, fill_value, reflect, can_reindex ) if operands is NotImplemented: return NotImplemented - - level_names = ( - self._data._level_names if can_use_self_column_name else None - ) return self._from_data( ColumnAccessor( type(self)._colwise_binop(operands, op), - level_names=level_names, + **ca_attributes, ), index=out_index, ) @@ -4917,7 +4913,7 @@ def _make_operands_and_index_for_binop( dict[str | None, tuple[ColumnBase, Any, bool, Any]] | NotImplementedType, cudf.BaseIndex | None, - bool, + dict[str, Any], ]: raise NotImplementedError( f"Binary operations are not supported for {self.__class__}" diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index f6f1b31dc43..d25550553b1 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -1531,7 +1531,7 @@ def _make_operands_and_index_for_binop( dict[str | None, tuple[ColumnBase, Any, bool, Any]] | NotImplementedType, BaseIndex | None, - bool, + dict[str, Any], ]: # Specialize binops to align indices. if isinstance(other, Series): @@ -1547,15 +1547,14 @@ def _make_operands_and_index_for_binop( else: lhs = self - try: - can_use_self_column_name = cudf.utils.utils._is_same_name( - self.name, other.name - ) - except AttributeError: - can_use_self_column_name = False + ca_attributes = {} + if hasattr(other, "name") and cudf.utils.utils._is_same_name( + self.name, other.name + ): + ca_attributes["level_names"] = self._data._level_names operands = lhs._make_operands_for_binop(other, fill_value, reflect) - return operands, lhs.index, can_use_self_column_name + return operands, lhs.index, ca_attributes @copy_docstring(CategoricalAccessor) # type: ignore @property diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index 15c11db5a84..d6bbbf601be 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -11083,6 +11083,21 @@ def test_dataframe_columns_set_preserve_type(klass): pd.testing.assert_index_equal(result, expected) +@pytest.mark.parametrize( + "expected", + [ + pd.RangeIndex(1, 2, name="a"), + pd.Index([1], dtype=np.int8, name="a"), + pd.MultiIndex.from_arrays([[1]], names=["a"]), + ], +) +@pytest.mark.parametrize("binop", [lambda df: df == df, lambda df: df - 1]) +def test_dataframe_binop_preserves_column_metadata(expected, binop): + df = cudf.DataFrame([1], columns=expected) + result = binop(df).columns + pd.testing.assert_index_equal(result, expected, exact=True) + + @pytest.mark.parametrize( "scalar", [