Skip to content

BUG: make tz_localize operate on values rather than categories #28300

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Nov 2, 2019
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,9 @@ Categorical
- :meth:`Categorical.searchsorted` and :meth:`CategoricalIndex.searchsorted` now work on unordered categoricals also (:issue:`21667`)
- Added test to assert roundtripping to parquet with :func:`DataFrame.to_parquet` or :func:`read_parquet` will preserve Categorical dtypes for string types (:issue:`27955`)
- Changed the error message in :meth:`Categorical.remove_categories` to always show the invalid removals as a set (:issue:`28669`)
- Using date accessors on a categorical dtyped :class:`Series` of datetimes was not returning an object of the
same type as if one used the :meth:`.str.` / :meth:`.dt.` on a :class:`Series` of that type. E.g. when accessing :meth:`Series.dt.tz_localize` on a
:class:`Categorical` with duplicate entries, the accessor was skipping duplicates (:issue: `27952`)


Datetimelike
Expand Down
10 changes: 6 additions & 4 deletions pandas/core/indexes/accessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from pandas.core.dtypes.generic import ABCSeries

from pandas.core.accessor import PandasDelegate, delegate_names
from pandas.core.algorithms import take_1d
from pandas.core.arrays import DatetimeArray, PeriodArray, TimedeltaArray
from pandas.core.base import NoNewAttributesMixin, PandasObject
from pandas.core.indexes.datetimes import DatetimeIndex
Expand Down Expand Up @@ -75,9 +74,7 @@ def _delegate_property_get(self, name):

result = np.asarray(result)

# blow up if we operate on categories
if self.orig is not None:
result = take_1d(result, self.orig.cat.codes)
index = self.orig.index
else:
index = self._parent.index
Expand Down Expand Up @@ -324,7 +321,12 @@ def __new__(cls, data):

orig = data if is_categorical_dtype(data) else None
if orig is not None:
data = Series(orig.values.categories, name=orig.name, copy=False)
data = Series(
orig.array,
name=orig.name,
copy=False,
dtype=orig.values.categories.dtype,
)

if is_datetime64_dtype(data.dtype):
return DatetimeProperties(data, orig)
Expand Down
33 changes: 33 additions & 0 deletions pandas/tests/series/test_datetime_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,39 @@ def test_dt_namespace_accessor_categorical(self):
expected = Series([2017, 2017, 2018, 2018], name="foo")
tm.assert_series_equal(result, expected)

def test_dt_tz_localize_categorical(self, tz_aware_fixture):
# GH 27952
tz = tz_aware_fixture
datetimes = pd.Series(
["2019-01-01", "2019-01-01", "2019-01-02"], dtype="datetime64[ns]"
)
categorical = datetimes.astype("category")
result = categorical.dt.tz_localize(tz)
expected = datetimes.dt.tz_localize(tz)
tm.assert_series_equal(result, expected)

def test_dt_tz_convert_categorical(self, tz_aware_fixture):
# GH 27952
tz = tz_aware_fixture
datetimes = pd.Series(
["2019-01-01", "2019-01-01", "2019-01-02"], dtype="datetime64[ns, MET]"
)
categorical = datetimes.astype("category")
result = categorical.dt.tz_convert(tz)
expected = datetimes.dt.tz_convert(tz)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("accessor", ["year", "month", "day"])
def test_dt_other_accessors_categorical(self, accessor):
# GH 27952
datetimes = pd.Series(
["2018-01-01", "2018-01-01", "2019-01-02"], dtype="datetime64[ns]"
)
categorical = datetimes.astype("category")
result = getattr(categorical.dt, accessor)
expected = getattr(datetimes.dt, accessor)
tm.assert_series_equal(result, expected)

def test_dt_accessor_no_new_attributes(self):
# https://github.com/pandas-dev/pandas/issues/10673
s = Series(date_range("20130101", periods=5, freq="D"))
Expand Down