Skip to content

Commit cb83977

Browse files
authored
BUG: setitem into td64/dt64 series/frame with Categorical[strings] (#44236)
1 parent 62c238e commit cb83977

File tree

4 files changed

+26
-4
lines changed

4 files changed

+26
-4
lines changed

doc/source/whatsnew/v1.4.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -535,7 +535,7 @@ Indexing
535535
- Bug in :meth:`DataFrame.sort_index` where ``ignore_index=True`` was not being respected when the index was already sorted (:issue:`43591`)
536536
- Bug in :meth:`Index.get_indexer_non_unique` when index contains multiple ``np.datetime64("NaT")`` and ``np.timedelta64("NaT")`` (:issue:`43869`)
537537
- Bug in setting a scalar :class:`Interval` value into a :class:`Series` with ``IntervalDtype`` when the scalar's sides are floats and the values' sides are integers (:issue:`44201`)
538-
-
538+
- Bug when setting string-backed :class:`Categorical` values that can be parsed to datetimes into a :class:`DatetimeArray` or :class:`Series` or :class:`DataFrame` column backed by :class:`DatetimeArray` failing to parse these strings (:issue:`44236`)
539539

540540

541541
Missing

pandas/core/arrays/datetimelike.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@
6868
from pandas.util._exceptions import find_stack_level
6969

7070
from pandas.core.dtypes.common import (
71+
is_all_strings,
7172
is_categorical_dtype,
7273
is_datetime64_any_dtype,
7374
is_datetime64_dtype,
@@ -720,7 +721,7 @@ def _validate_listlike(self, value, allow_object: bool = False):
720721
value = pd_array(value)
721722
value = extract_array(value, extract_numpy=True)
722723

723-
if is_dtype_equal(value.dtype, "string"):
724+
if is_all_strings(value):
724725
# We got a StringArray
725726
try:
726727
# TODO: Could use from_sequence_of_strings if implemented

pandas/core/dtypes/common.py

+21
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
Interval,
1616
Period,
1717
algos,
18+
lib,
1819
)
1920
from pandas._libs.tslibs import conversion
2021
from pandas._typing import (
@@ -1788,3 +1789,23 @@ def pandas_dtype(dtype) -> DtypeObj:
17881789
raise TypeError(f"dtype '{dtype}' not understood")
17891790

17901791
return npdtype
1792+
1793+
1794+
def is_all_strings(value: ArrayLike) -> bool:
1795+
"""
1796+
Check if this is an array of strings that we should try parsing.
1797+
1798+
Includes object-dtype ndarray containing all-strings, StringArray,
1799+
and Categorical with all-string categories.
1800+
Does not include numpy string dtypes.
1801+
"""
1802+
dtype = value.dtype
1803+
1804+
if isinstance(dtype, np.dtype):
1805+
return (
1806+
dtype == np.dtype("object")
1807+
and lib.infer_dtype(value, skipna=False) == "string"
1808+
)
1809+
elif isinstance(dtype, CategoricalDtype):
1810+
return dtype.categories.inferred_type == "string"
1811+
return dtype == "string"

pandas/tests/indexing/test_indexing.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -871,7 +871,7 @@ def test_setitem_dt64_string_scalar(self, tz_naive_fixture, indexer_sli):
871871
else:
872872
assert ser._values is values
873873

874-
@pytest.mark.parametrize("box", [list, np.array, pd.array])
874+
@pytest.mark.parametrize("box", [list, np.array, pd.array, pd.Categorical, Index])
875875
@pytest.mark.parametrize(
876876
"key", [[0, 1], slice(0, 2), np.array([True, True, False])]
877877
)
@@ -911,7 +911,7 @@ def test_setitem_td64_scalar(self, indexer_sli, scalar):
911911
indexer_sli(ser)[0] = scalar
912912
assert ser._values._data is values._data
913913

914-
@pytest.mark.parametrize("box", [list, np.array, pd.array])
914+
@pytest.mark.parametrize("box", [list, np.array, pd.array, pd.Categorical, Index])
915915
@pytest.mark.parametrize(
916916
"key", [[0, 1], slice(0, 2), np.array([True, True, False])]
917917
)

0 commit comments

Comments
 (0)