Skip to content

Commit 6f8e79d

Browse files
phoflim-vinicius
authored and
im-vinicius
committed
BUG: convert_dtypes ingoring convert keywords for pyarrow backend (pandas-dev#52872)
1 parent 0cf70eb commit 6f8e79d

File tree

3 files changed

+38
-14
lines changed

3 files changed

+38
-14
lines changed

doc/source/whatsnew/v2.0.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ Bug fixes
2222
~~~~~~~~~
2323
- Bug in :func:`api.interchange.from_dataframe` was returning :class:`DataFrame`'s of incorrect sizes when called on slices (:issue:`52824`)
2424
- Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on bitmasks (:issue:`49888`)
25+
- Bug in :meth:`DataFrame.convert_dtypes` ignores ``convert_*`` keywords when set to False ``dtype_backend="pyarrow"`` (:issue:`52872`)
2526
- Bug in :meth:`pd.array` raising for ``NumPy`` array and ``pa.large_string`` or ``pa.large_binary`` (:issue:`52590`)
2627
-
2728

pandas/core/dtypes/cast.py

+23-14
Original file line numberDiff line numberDiff line change
@@ -1106,20 +1106,29 @@ def convert_dtypes(
11061106
from pandas.core.arrays.arrow.dtype import ArrowDtype
11071107
from pandas.core.arrays.string_ import StringDtype
11081108

1109-
if isinstance(inferred_dtype, PandasExtensionDtype):
1110-
base_dtype = inferred_dtype.base
1111-
elif isinstance(inferred_dtype, (BaseMaskedDtype, ArrowDtype)):
1112-
base_dtype = inferred_dtype.numpy_dtype
1113-
elif isinstance(inferred_dtype, StringDtype):
1114-
base_dtype = np.dtype(str)
1115-
else:
1116-
# error: Incompatible types in assignment (expression has type
1117-
# "Union[str, Any, dtype[Any], ExtensionDtype]",
1118-
# variable has type "Union[dtype[Any], ExtensionDtype, None]")
1119-
base_dtype = inferred_dtype # type: ignore[assignment]
1120-
pa_type = to_pyarrow_type(base_dtype)
1121-
if pa_type is not None:
1122-
inferred_dtype = ArrowDtype(pa_type)
1109+
assert not isinstance(inferred_dtype, str)
1110+
1111+
if (
1112+
(convert_integer and inferred_dtype.kind in "iu")
1113+
or (convert_floating and inferred_dtype.kind in "fc")
1114+
or (convert_boolean and inferred_dtype.kind == "b")
1115+
or (convert_string and isinstance(inferred_dtype, StringDtype))
1116+
or (
1117+
inferred_dtype.kind not in "iufcb"
1118+
and not isinstance(inferred_dtype, StringDtype)
1119+
)
1120+
):
1121+
if isinstance(inferred_dtype, PandasExtensionDtype):
1122+
base_dtype = inferred_dtype.base
1123+
elif isinstance(inferred_dtype, (BaseMaskedDtype, ArrowDtype)):
1124+
base_dtype = inferred_dtype.numpy_dtype
1125+
elif isinstance(inferred_dtype, StringDtype):
1126+
base_dtype = np.dtype(str)
1127+
else:
1128+
base_dtype = inferred_dtype
1129+
pa_type = to_pyarrow_type(base_dtype)
1130+
if pa_type is not None:
1131+
inferred_dtype = ArrowDtype(pa_type)
11231132

11241133
# error: Incompatible return value type (got "Union[str, Union[dtype[Any],
11251134
# ExtensionDtype]]", expected "Union[dtype[Any], ExtensionDtype]")

pandas/tests/frame/methods/test_convert_dtypes.py

+14
Original file line numberDiff line numberDiff line change
@@ -134,3 +134,17 @@ def test_pyarrow_engine_lines_false(self):
134134
)
135135
with pytest.raises(ValueError, match=msg):
136136
df.convert_dtypes(dtype_backend="numpy")
137+
138+
def test_pyarrow_backend_no_convesion(self):
139+
# GH#52872
140+
pytest.importorskip("pyarrow")
141+
df = pd.DataFrame({"a": [1, 2], "b": 1.5, "c": True, "d": "x"})
142+
expected = df.copy()
143+
result = df.convert_dtypes(
144+
convert_floating=False,
145+
convert_integer=False,
146+
convert_boolean=False,
147+
convert_string=False,
148+
dtype_backend="pyarrow",
149+
)
150+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)