From 9ea24b8e031845a6b6e40f5771abee30c6f33e52 Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Mon, 14 Dec 2020 09:11:02 -0500 Subject: [PATCH] Backport PR #38427: REGR: Assigning label with registered EA dtype raises --- doc/source/whatsnew/v1.2.0.rst | 2 +- pandas/core/dtypes/common.py | 2 +- pandas/io/parsers.py | 24 ++++++++++----------- pandas/tests/dtypes/test_common.py | 2 ++ pandas/tests/frame/indexing/test_setitem.py | 20 +++++++++++++++++ 5 files changed, 35 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 4906288cc07d9..e2521cedb64cc 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -858,7 +858,7 @@ Other - Bug in :meth:`Index.drop` raising ``InvalidIndexError`` when index has duplicates (:issue:`38051`) - Bug in :meth:`RangeIndex.difference` returning :class:`Int64Index` in some cases where it should return :class:`RangeIndex` (:issue:`38028`) - Fixed bug in :func:`assert_series_equal` when comparing a datetime-like array with an equivalent non extension dtype array (:issue:`37609`) - +- Bug in :func:`.is_bool_dtype` would raise when passed a valid string such as ``"boolean"`` (:issue:`38386`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index b4f6d587c6642..d8b0ad739b056 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1397,7 +1397,7 @@ def is_bool_dtype(arr_or_dtype) -> bool: # guess this return arr_or_dtype.is_object and arr_or_dtype.inferred_type == "boolean" elif is_extension_array_dtype(arr_or_dtype): - return getattr(arr_or_dtype, "dtype", arr_or_dtype)._is_boolean + return getattr(dtype, "_is_boolean", False) return issubclass(dtype.type, np.bool_) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 44d3c8de0ae23..fcbf7ec3897fc 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1689,9 +1689,8 @@ def _convert_to_ndarrays( values, set(col_na_values) | col_na_fvalues, try_num_bool=False ) else: - is_str_or_ea_dtype = is_string_dtype( - cast_type - ) or is_extension_array_dtype(cast_type) + is_ea = is_extension_array_dtype(cast_type) + is_str_or_ea_dtype = is_ea or is_string_dtype(cast_type) # skip inference if specified dtype is object # or casting to an EA try_num_bool = not (cast_type and is_str_or_ea_dtype) @@ -1706,16 +1705,15 @@ def _convert_to_ndarrays( not is_dtype_equal(cvals, cast_type) or is_extension_array_dtype(cast_type) ): - try: - if ( - is_bool_dtype(cast_type) - and not is_categorical_dtype(cast_type) - and na_count > 0 - ): - raise ValueError(f"Bool column has NA values in column {c}") - except (AttributeError, TypeError): - # invalid input to is_bool_dtype - pass + if not is_ea and na_count > 0: + try: + if is_bool_dtype(cast_type): + raise ValueError( + f"Bool column has NA values in column {c}" + ) + except (AttributeError, TypeError): + # invalid input to is_bool_dtype + pass cvals = self._cast_types(cvals, cast_type, c) result[c] = cvals diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index ce6737db44195..19d80b714a674 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -545,6 +545,7 @@ def test_is_bool_dtype(): assert not com.is_bool_dtype(pd.Series([1, 2])) assert not com.is_bool_dtype(np.array(["a", "b"])) assert not com.is_bool_dtype(pd.Index(["a", "b"])) + assert not com.is_bool_dtype("Int64") assert com.is_bool_dtype(bool) assert com.is_bool_dtype(np.bool_) @@ -553,6 +554,7 @@ def test_is_bool_dtype(): assert com.is_bool_dtype(pd.BooleanDtype()) assert com.is_bool_dtype(pd.array([True, False, None], dtype="boolean")) + assert com.is_bool_dtype("boolean") @pytest.mark.filterwarnings("ignore:'is_extension_type' is deprecated:FutureWarning") diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 884cb6c20b77e..19d2f8301037a 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1,6 +1,7 @@ import numpy as np import pytest +from pandas.core.dtypes.base import registry as ea_registry from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype from pandas import ( @@ -197,6 +198,25 @@ def test_setitem_extension_types(self, obj, dtype): tm.assert_frame_equal(df, expected) + @pytest.mark.parametrize( + "ea_name", + [ + dtype.name + for dtype in ea_registry.dtypes + # property would require instantiation + if not isinstance(dtype.name, property) + ] + # mypy doesn't allow adding lists of different types + # https://github.com/python/mypy/issues/5492 + + ["datetime64[ns, UTC]", "period[D]"], # type: ignore[list-item] + ) + def test_setitem_with_ea_name(self, ea_name): + # GH 38386 + result = DataFrame([0]) + result[ea_name] = [1] + expected = DataFrame({0: [0], ea_name: [1]}) + tm.assert_frame_equal(result, expected) + def test_setitem_dt64_ndarray_with_NaT_and_diff_time_units(self): # GH#7492 data_ns = np.array([1, "nat"], dtype="datetime64[ns]")