Skip to content

Commit e256811

Browse files
MarcoGorelliproost
authored andcommitted
BUG: make tz_localize operate on values rather than categories (pandas-dev#28300)
1 parent 409a3c8 commit e256811

File tree

3 files changed

+42
-4
lines changed

3 files changed

+42
-4
lines changed

doc/source/whatsnew/v1.0.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,9 @@ Categorical
288288
- :meth:`Categorical.searchsorted` and :meth:`CategoricalIndex.searchsorted` now work on unordered categoricals also (:issue:`21667`)
289289
- Added test to assert roundtripping to parquet with :func:`DataFrame.to_parquet` or :func:`read_parquet` will preserve Categorical dtypes for string types (:issue:`27955`)
290290
- Changed the error message in :meth:`Categorical.remove_categories` to always show the invalid removals as a set (:issue:`28669`)
291+
- Using date accessors on a categorical dtyped :class:`Series` of datetimes was not returning an object of the
292+
same type as if one used the :meth:`.str.` / :meth:`.dt.` on a :class:`Series` of that type. E.g. when accessing :meth:`Series.dt.tz_localize` on a
293+
:class:`Categorical` with duplicate entries, the accessor was skipping duplicates (:issue: `27952`)
291294

292295

293296
Datetimelike

pandas/core/indexes/accessors.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
from pandas.core.dtypes.generic import ABCSeries
1717

1818
from pandas.core.accessor import PandasDelegate, delegate_names
19-
from pandas.core.algorithms import take_1d
2019
from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray
2120
from pandas.core.base import NoNewAttributesMixin, PandasObject
2221
from pandas.core.indexes.datetimes import DatetimeIndex
@@ -75,9 +74,7 @@ def _delegate_property_get(self, name):
7574

7675
result = np.asarray(result)
7776

78-
# blow up if we operate on categories
7977
if self.orig is not None:
80-
result = take_1d(result, self.orig.cat.codes)
8178
index = self.orig.index
8279
else:
8380
index = self._parent.index
@@ -324,7 +321,12 @@ def __new__(cls, data):
324321

325322
orig = data if is_categorical_dtype(data) else None
326323
if orig is not None:
327-
data = Series(orig.values.categories, name=orig.name, copy=False)
324+
data = Series(
325+
orig.array,
326+
name=orig.name,
327+
copy=False,
328+
dtype=orig.values.categories.dtype,
329+
)
328330

329331
if is_datetime64_dtype(data.dtype):
330332
return DatetimeProperties(data, orig)

pandas/tests/series/test_datetime_values.py

+33
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,39 @@ def test_dt_namespace_accessor_categorical(self):
344344
expected = Series([2017, 2017, 2018, 2018], name="foo")
345345
tm.assert_series_equal(result, expected)
346346

347+
def test_dt_tz_localize_categorical(self, tz_aware_fixture):
348+
# GH 27952
349+
tz = tz_aware_fixture
350+
datetimes = pd.Series(
351+
["2019-01-01", "2019-01-01", "2019-01-02"], dtype="datetime64[ns]"
352+
)
353+
categorical = datetimes.astype("category")
354+
result = categorical.dt.tz_localize(tz)
355+
expected = datetimes.dt.tz_localize(tz)
356+
tm.assert_series_equal(result, expected)
357+
358+
def test_dt_tz_convert_categorical(self, tz_aware_fixture):
359+
# GH 27952
360+
tz = tz_aware_fixture
361+
datetimes = pd.Series(
362+
["2019-01-01", "2019-01-01", "2019-01-02"], dtype="datetime64[ns, MET]"
363+
)
364+
categorical = datetimes.astype("category")
365+
result = categorical.dt.tz_convert(tz)
366+
expected = datetimes.dt.tz_convert(tz)
367+
tm.assert_series_equal(result, expected)
368+
369+
@pytest.mark.parametrize("accessor", ["year", "month", "day"])
370+
def test_dt_other_accessors_categorical(self, accessor):
371+
# GH 27952
372+
datetimes = pd.Series(
373+
["2018-01-01", "2018-01-01", "2019-01-02"], dtype="datetime64[ns]"
374+
)
375+
categorical = datetimes.astype("category")
376+
result = getattr(categorical.dt, accessor)
377+
expected = getattr(datetimes.dt, accessor)
378+
tm.assert_series_equal(result, expected)
379+
347380
def test_dt_accessor_no_new_attributes(self):
348381
# https://github.com/pandas-dev/pandas/issues/10673
349382
s = Series(date_range("20130101", periods=5, freq="D"))

0 commit comments

Comments
 (0)