From cc3d78beca55cb8ecd83a6c3bda5f77aaf75a498 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 29 Oct 2021 19:04:13 -0700 Subject: [PATCH 1/3] BUG: setitem into td64/dt64 series/frame with Categorical[strings] --- pandas/core/arrays/datetimelike.py | 15 ++++++++++++++- pandas/tests/indexing/test_indexing.py | 4 ++-- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 72c00dfe7c65a..cfe9d4b0f2fe8 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -136,6 +136,19 @@ DatetimeLikeArrayT = TypeVar("DatetimeLikeArrayT", bound="DatetimeLikeArrayMixin") +def is_all_strings(value: ArrayLike) -> bool: + """ + Check if this is an array of strings that we should try parsing. + """ + dtype = value.dtype + + if isinstance(dtype, np.dtype): + return dtype == object and lib.infer_dtype(value, skipna=False) == "string" + elif is_categorical_dtype(dtype): + return dtype.categories.inferred_type == "string" + return dtype == "string" + + class InvalidComparison(Exception): """ Raised by _validate_comparison_value to indicate to caller it should @@ -720,7 +733,7 @@ def _validate_listlike(self, value, allow_object: bool = False): value = pd_array(value) value = extract_array(value, extract_numpy=True) - if is_dtype_equal(value.dtype, "string"): + if is_all_strings(value): # We got a StringArray try: # TODO: Could use from_sequence_of_strings if implemented diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 4604fad019eca..d6402e027be98 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -871,7 +871,7 @@ def test_setitem_dt64_string_scalar(self, tz_naive_fixture, indexer_sli): else: assert ser._values is values - @pytest.mark.parametrize("box", [list, np.array, pd.array]) + @pytest.mark.parametrize("box", [list, np.array, pd.array, pd.Categorical, Index]) @pytest.mark.parametrize( "key", [[0, 1], slice(0, 2), np.array([True, True, False])] ) @@ -911,7 +911,7 @@ def test_setitem_td64_scalar(self, indexer_sli, scalar): indexer_sli(ser)[0] = scalar assert ser._values._data is values._data - @pytest.mark.parametrize("box", [list, np.array, pd.array]) + @pytest.mark.parametrize("box", [list, np.array, pd.array, pd.Categorical, Index]) @pytest.mark.parametrize( "key", [[0, 1], slice(0, 2), np.array([True, True, False])] ) From 0412d71e6776062e57632e8ce636ae2931688afa Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 29 Oct 2021 21:46:49 -0700 Subject: [PATCH 2/3] mypy fixup --- pandas/core/arrays/datetimelike.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index cfe9d4b0f2fe8..8e53a749b0282 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -85,6 +85,7 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import ( + CategoricalDtype, DatetimeTZDtype, ExtensionDtype, PeriodDtype, @@ -143,8 +144,11 @@ def is_all_strings(value: ArrayLike) -> bool: dtype = value.dtype if isinstance(dtype, np.dtype): - return dtype == object and lib.infer_dtype(value, skipna=False) == "string" - elif is_categorical_dtype(dtype): + return ( + dtype == np.dtype("object") + and lib.infer_dtype(value, skipna=False) == "string" + ) + elif isinstance(dtype, CategoricalDtype): return dtype.categories.inferred_type == "string" return dtype == "string" From e0825f04aa07ffbbfbeca3ad216c3c6fac39f5b5 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 30 Oct 2021 22:26:46 -0700 Subject: [PATCH 3/3] whatsnew, move func to dtypes.common --- doc/source/whatsnew/v1.4.0.rst | 2 +- pandas/core/arrays/datetimelike.py | 18 +----------------- pandas/core/dtypes/common.py | 21 +++++++++++++++++++++ 3 files changed, 23 insertions(+), 18 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 699d8a81243db..30f8da25c06dc 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -531,7 +531,7 @@ Indexing - Bug in :meth:`DataFrame.sort_index` where ``ignore_index=True`` was not being respected when the index was already sorted (:issue:`43591`) - Bug in :meth:`Index.get_indexer_non_unique` when index contains multiple ``np.datetime64("NaT")`` and ``np.timedelta64("NaT")`` (:issue:`43869`) - Bug in setting a scalar :class:`Interval` value into a :class:`Series` with ``IntervalDtype`` when the scalar's sides are floats and the values' sides are integers (:issue:`44201`) -- +- Bug when setting string-backed :class:`Categorical` values that can be parsed to datetimes into a :class:`DatetimeArray` or :class:`Series` or :class:`DataFrame` column backed by :class:`DatetimeArray` failing to parse these strings (:issue:`44236`) Missing diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 8e53a749b0282..f8aa1656c8c30 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -68,6 +68,7 @@ from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( + is_all_strings, is_categorical_dtype, is_datetime64_any_dtype, is_datetime64_dtype, @@ -85,7 +86,6 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import ( - CategoricalDtype, DatetimeTZDtype, ExtensionDtype, PeriodDtype, @@ -137,22 +137,6 @@ DatetimeLikeArrayT = TypeVar("DatetimeLikeArrayT", bound="DatetimeLikeArrayMixin") -def is_all_strings(value: ArrayLike) -> bool: - """ - Check if this is an array of strings that we should try parsing. - """ - dtype = value.dtype - - if isinstance(dtype, np.dtype): - return ( - dtype == np.dtype("object") - and lib.infer_dtype(value, skipna=False) == "string" - ) - elif isinstance(dtype, CategoricalDtype): - return dtype.categories.inferred_type == "string" - return dtype == "string" - - class InvalidComparison(Exception): """ Raised by _validate_comparison_value to indicate to caller it should diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 0788ecdd8b4b5..815a0a2040ddb 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -15,6 +15,7 @@ Interval, Period, algos, + lib, ) from pandas._libs.tslibs import conversion from pandas._typing import ( @@ -1788,3 +1789,23 @@ def pandas_dtype(dtype) -> DtypeObj: raise TypeError(f"dtype '{dtype}' not understood") return npdtype + + +def is_all_strings(value: ArrayLike) -> bool: + """ + Check if this is an array of strings that we should try parsing. + + Includes object-dtype ndarray containing all-strings, StringArray, + and Categorical with all-string categories. + Does not include numpy string dtypes. + """ + dtype = value.dtype + + if isinstance(dtype, np.dtype): + return ( + dtype == np.dtype("object") + and lib.infer_dtype(value, skipna=False) == "string" + ) + elif isinstance(dtype, CategoricalDtype): + return dtype.categories.inferred_type == "string" + return dtype == "string"