|
30 | 30 | from pandas._config import (
|
31 | 31 | config,
|
32 | 32 | get_option,
|
| 33 | + using_pyarrow_string_dtype, |
33 | 34 | )
|
34 | 35 |
|
35 | 36 | from pandas._libs import (
|
36 | 37 | lib,
|
37 | 38 | writers as libwriters,
|
38 | 39 | )
|
| 40 | +from pandas._libs.lib import is_string_array |
39 | 41 | from pandas._libs.tslibs import timezones
|
40 | 42 | from pandas.compat._optional import import_optional_dependency
|
41 | 43 | from pandas.compat.pickle_compat import patch_pickle
|
|
66 | 68 | )
|
67 | 69 | from pandas.core.dtypes.missing import array_equivalent
|
68 | 70 |
|
| 71 | +import pandas as pd |
69 | 72 | from pandas import (
|
70 | 73 | DataFrame,
|
71 | 74 | DatetimeIndex,
|
@@ -3219,7 +3222,12 @@ def read(
|
3219 | 3222 | self.validate_read(columns, where)
|
3220 | 3223 | index = self.read_index("index", start=start, stop=stop)
|
3221 | 3224 | values = self.read_array("values", start=start, stop=stop)
|
3222 |
| - return Series(values, index=index, name=self.name, copy=False) |
| 3225 | + result = Series(values, index=index, name=self.name, copy=False) |
| 3226 | + if using_pyarrow_string_dtype() and is_string_array(values, skipna=True): |
| 3227 | + import pyarrow as pa |
| 3228 | + |
| 3229 | + result = result.astype(pd.ArrowDtype(pa.string())) |
| 3230 | + return result |
3223 | 3231 |
|
3224 | 3232 | # error: Signature of "write" incompatible with supertype "Fixed"
|
3225 | 3233 | def write(self, obj, **kwargs) -> None: # type: ignore[override]
|
@@ -3287,6 +3295,10 @@ def read(
|
3287 | 3295 |
|
3288 | 3296 | columns = items[items.get_indexer(blk_items)]
|
3289 | 3297 | df = DataFrame(values.T, columns=columns, index=axes[1], copy=False)
|
| 3298 | + if using_pyarrow_string_dtype() and is_string_array(values, skipna=True): |
| 3299 | + import pyarrow as pa |
| 3300 | + |
| 3301 | + df = df.astype(pd.ArrowDtype(pa.string())) |
3290 | 3302 | dfs.append(df)
|
3291 | 3303 |
|
3292 | 3304 | if len(dfs) > 0:
|
@@ -4668,7 +4680,15 @@ def read(
|
4668 | 4680 | else:
|
4669 | 4681 | # Categorical
|
4670 | 4682 | df = DataFrame._from_arrays([values], columns=cols_, index=index_)
|
4671 |
| - assert (df.dtypes == values.dtype).all(), (df.dtypes, values.dtype) |
| 4683 | + if not (using_pyarrow_string_dtype() and values.dtype.kind == "O"): |
| 4684 | + assert (df.dtypes == values.dtype).all(), (df.dtypes, values.dtype) |
| 4685 | + if using_pyarrow_string_dtype() and is_string_array( |
| 4686 | + values, # type: ignore[arg-type] |
| 4687 | + skipna=True, |
| 4688 | + ): |
| 4689 | + import pyarrow as pa |
| 4690 | + |
| 4691 | + df = df.astype(pd.ArrowDtype(pa.string())) |
4672 | 4692 | frames.append(df)
|
4673 | 4693 |
|
4674 | 4694 | if len(frames) == 1:
|
|
0 commit comments