Skip to content

Commit

Permalink
Backport PR #60943: BUG(string dtype): Resolve pytables xfail when re…
Browse files Browse the repository at this point in the history
…ading with condition (#60967)

* ENH: Improved error message and raise new error for small-string NaN edge case in HDFStore.append (#60829)

* Add clearer error messages for datatype mismatch in HDFStore.append. Raise ValueError when nan_rep too large for pytable column. Add and modify applicable test code.

* Fix missed tests and correct mistake in error message.

* Remove excess comments. Reverse error type change to avoid api changes. Move nan_rep tests into separate function.

(cherry picked from commit 57340ec)

* TST(string dtype): Resolve xfails in pytables (#60795)

(cherry picked from commit 4511251)

* BUG(string dtype): Resolve pytables xfail when reading with condition (#60943)

(cherry picked from commit 0ec5f26)

---------

Co-authored-by: Jake Thomas Trevallion <136272202+JakeTT404@users.noreply.github.com>
  • Loading branch information
rhshadrach and JakeTT404 authored Feb 20, 2025
1 parent 3143f44 commit b8624cb
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 15 deletions.
18 changes: 16 additions & 2 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -4093,6 +4093,8 @@ def _create_axes(
ordered = data_converted.ordered
meta = "category"
metadata = np.asarray(data_converted.categories).ravel()
elif isinstance(blk.dtype, StringDtype):
meta = str(blk.dtype)

data, dtype_name = _get_data_and_dtype_name(data_converted)

Expand Down Expand Up @@ -4360,7 +4362,9 @@ def read_column(
encoding=self.encoding,
errors=self.errors,
)
return Series(_set_tz(col_values[1], a.tz), name=column, copy=False)
cvs = _set_tz(col_values[1], a.tz)
dtype = getattr(self.table.attrs, f"{column}_meta", None)
return Series(cvs, name=column, copy=False, dtype=dtype)

raise KeyError(f"column [{column}] not found in the table")

Expand Down Expand Up @@ -4708,8 +4712,18 @@ def read(
df = DataFrame._from_arrays([values], columns=cols_, index=index_)
if not (using_string_dtype() and values.dtype.kind == "O"):
assert (df.dtypes == values.dtype).all(), (df.dtypes, values.dtype)

# If str / string dtype is stored in meta, use that.
converted = False
for column in cols_:
dtype = getattr(self.table.attrs, f"{column}_meta", None)
if dtype in ["str", "string"]:
df[column] = df[column].astype(dtype)
converted = True
# Otherwise try inference.
if (
using_string_dtype()
not converted
and using_string_dtype()
and isinstance(values, np.ndarray)
and is_string_array(
values,
Expand Down
3 changes: 0 additions & 3 deletions pandas/tests/io/pytables/test_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas._libs.tslibs import Timestamp
import pandas.util._test_decorators as td

Expand Down Expand Up @@ -507,7 +505,6 @@ def test_append_with_empty_string(setup_path):
tm.assert_frame_equal(store.select("df"), df)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_append_with_data_columns(setup_path):
with ensure_clean_store(setup_path) as store:
df = DataFrame(
Expand Down
3 changes: 0 additions & 3 deletions pandas/tests/io/pytables/test_categorical.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas import (
Categorical,
DataFrame,
Expand Down Expand Up @@ -140,7 +138,6 @@ def test_categorical(setup_path):
store.select("df3/meta/s/meta")


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_categorical_conversion(tmp_path, setup_path):
# GH13322
# Check that read_hdf with categorical columns doesn't return rows if
Expand Down
3 changes: 0 additions & 3 deletions pandas/tests/io/pytables/test_read.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas._libs.tslibs import Timestamp
from pandas.compat import is_platform_windows

Expand Down Expand Up @@ -74,7 +72,6 @@ def test_read_missing_key_opened_store(tmp_path, setup_path):
read_hdf(store, "k1")


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_read_column(setup_path):
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
Expand Down
4 changes: 0 additions & 4 deletions pandas/tests/io/pytables/test_select.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas._libs.tslibs import Timestamp

import pandas as pd
Expand Down Expand Up @@ -651,7 +649,6 @@ def test_frame_select(setup_path):
# store.select('frame', [crit1, crit2])


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_frame_select_complex(setup_path):
# select via complex criteria

Expand Down Expand Up @@ -965,7 +962,6 @@ def test_query_long_float_literal(setup_path):
tm.assert_frame_equal(expected, result)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_query_compare_column_type(setup_path):
# GH 15492
df = DataFrame(
Expand Down

0 comments on commit b8624cb

Please sign in to comment.