diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index d93a3f26934a0..65f95dab7b42f 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -4093,6 +4093,8 @@ def _create_axes( ordered = data_converted.ordered meta = "category" metadata = np.asarray(data_converted.categories).ravel() + elif isinstance(blk.dtype, StringDtype): + meta = str(blk.dtype) data, dtype_name = _get_data_and_dtype_name(data_converted) @@ -4360,7 +4362,9 @@ def read_column( encoding=self.encoding, errors=self.errors, ) - return Series(_set_tz(col_values[1], a.tz), name=column, copy=False) + cvs = _set_tz(col_values[1], a.tz) + dtype = getattr(self.table.attrs, f"{column}_meta", None) + return Series(cvs, name=column, copy=False, dtype=dtype) raise KeyError(f"column [{column}] not found in the table") @@ -4708,8 +4712,18 @@ def read( df = DataFrame._from_arrays([values], columns=cols_, index=index_) if not (using_string_dtype() and values.dtype.kind == "O"): assert (df.dtypes == values.dtype).all(), (df.dtypes, values.dtype) + + # If str / string dtype is stored in meta, use that. + converted = False + for column in cols_: + dtype = getattr(self.table.attrs, f"{column}_meta", None) + if dtype in ["str", "string"]: + df[column] = df[column].astype(dtype) + converted = True + # Otherwise try inference. if ( - using_string_dtype() + not converted + and using_string_dtype() and isinstance(values, np.ndarray) and is_string_array( values, diff --git a/pandas/tests/io/pytables/test_append.py b/pandas/tests/io/pytables/test_append.py index 39c203c558a5b..d0246c8f58d6a 100644 --- a/pandas/tests/io/pytables/test_append.py +++ b/pandas/tests/io/pytables/test_append.py @@ -5,8 +5,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas._libs.tslibs import Timestamp import pandas.util._test_decorators as td @@ -507,7 +505,6 @@ def test_append_with_empty_string(setup_path): tm.assert_frame_equal(store.select("df"), df) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_append_with_data_columns(setup_path): with ensure_clean_store(setup_path) as store: df = DataFrame( diff --git a/pandas/tests/io/pytables/test_categorical.py b/pandas/tests/io/pytables/test_categorical.py index a875e19ea7f0e..449bc5cf1fc57 100644 --- a/pandas/tests/io/pytables/test_categorical.py +++ b/pandas/tests/io/pytables/test_categorical.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas import ( Categorical, DataFrame, @@ -140,7 +138,6 @@ def test_categorical(setup_path): store.select("df3/meta/s/meta") -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_categorical_conversion(tmp_path, setup_path): # GH13322 # Check that read_hdf with categorical columns doesn't return rows if diff --git a/pandas/tests/io/pytables/test_read.py b/pandas/tests/io/pytables/test_read.py index bfebf18c0e0ab..5bec673ad3c70 100644 --- a/pandas/tests/io/pytables/test_read.py +++ b/pandas/tests/io/pytables/test_read.py @@ -5,8 +5,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas._libs.tslibs import Timestamp from pandas.compat import is_platform_windows @@ -74,7 +72,6 @@ def test_read_missing_key_opened_store(tmp_path, setup_path): read_hdf(store, "k1") -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_read_column(setup_path): df = DataFrame( np.random.default_rng(2).standard_normal((10, 4)), diff --git a/pandas/tests/io/pytables/test_select.py b/pandas/tests/io/pytables/test_select.py index f781b6756fec9..e76934745f004 100644 --- a/pandas/tests/io/pytables/test_select.py +++ b/pandas/tests/io/pytables/test_select.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas._libs.tslibs import Timestamp import pandas as pd @@ -651,7 +649,6 @@ def test_frame_select(setup_path): # store.select('frame', [crit1, crit2]) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_frame_select_complex(setup_path): # select via complex criteria @@ -965,7 +962,6 @@ def test_query_long_float_literal(setup_path): tm.assert_frame_equal(expected, result) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") def test_query_compare_column_type(setup_path): # GH 15492 df = DataFrame(