Skip to content

BUG: Index([date]).astype("category").astype(object) roundtrip #38552

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Dec 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,8 @@ Categorical
^^^^^^^^^^^

- Bug in ``CategoricalIndex.reindex`` failed when ``Index`` passed with elements all in category (:issue:`28690`)
-
- Bug where construcing a :class:`Categorical` from an object-dtype array of ``date`` objects did not round-trip correctly with ``astype`` (:issue:`38552`)


Datetimelike
^^^^^^^^^^^^
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ def __init__(
elif not isinstance(values, (ABCIndex, ABCSeries, ExtensionArray)):
# sanitize_array coerces np.nan to a string under certain versions
# of numpy
values = maybe_infer_to_datetimelike(values, convert_dates=True)
values = maybe_infer_to_datetimelike(values)
if not isinstance(values, (np.ndarray, ExtensionArray)):
values = com.convert_to_list_like(values)

Expand Down
10 changes: 9 additions & 1 deletion pandas/tests/arrays/categorical/test_constructors.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from datetime import datetime
from datetime import date, datetime

import numpy as np
import pytest
Expand Down Expand Up @@ -346,6 +346,14 @@ def test_constructor_from_index_series_datetimetz(self):
result = Categorical(Series(idx))
tm.assert_index_equal(result.categories, idx)

def test_constructor_date_objects(self):
# we dont cast date objects to timestamps, matching Index constructor
v = date.today()

cat = Categorical([v, v])
assert cat.categories.dtype == object
assert type(cat.categories[0]) is date

def test_constructor_from_index_series_timedelta(self):
idx = timedelta_range("1 days", freq="D", periods=3)
idx = idx._with_freq(None) # freq not preserved in result.categories
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/indexes/categorical/test_astype.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from datetime import date

import numpy as np
import pytest

Expand Down Expand Up @@ -64,3 +66,16 @@ def test_astype_category(self, name, dtype_ordered, index_ordered):
result = index.astype("category")
expected = index
tm.assert_index_equal(result, expected)

def test_categorical_date_roundtrip(self):
# astype to categorical and back should preserve date objects
v = date.today()

obj = Index([v, v])
assert obj.dtype == object

cat = obj.astype("category")

rtrip = cat.astype(object)
assert rtrip.dtype == object
assert type(rtrip[0]) is date
31 changes: 30 additions & 1 deletion pandas/tests/indexes/multi/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -774,7 +774,7 @@ def test_datetimeindex():

# from datetime combos
# GH 7888
date1 = date.today()
date1 = np.datetime64("today")
date2 = datetime.today()
date3 = Timestamp.today()

Expand All @@ -783,6 +783,12 @@ def test_datetimeindex():
assert isinstance(index.levels[0], pd.DatetimeIndex)
assert isinstance(index.levels[1], pd.DatetimeIndex)

# but NOT date objects, matching Index behavior
date4 = date.today()
index = MultiIndex.from_product([[date4], [date2]])
assert not isinstance(index.levels[0], pd.DatetimeIndex)
assert isinstance(index.levels[1], pd.DatetimeIndex)


def test_constructor_with_tz():

Expand All @@ -804,3 +810,26 @@ def test_constructor_with_tz():
assert result.names == ["dt1", "dt2"]
tm.assert_index_equal(result.levels[0], index)
tm.assert_index_equal(result.levels[1], columns)


def test_multiindex_inference_consistency():
# check that inference behavior matches the base class

v = date.today()

arr = [v, v]

idx = Index(arr)
assert idx.dtype == object

mi = MultiIndex.from_arrays([arr])
lev = mi.levels[0]
assert lev.dtype == object

mi = MultiIndex.from_product([arr])
lev = mi.levels[0]
assert lev.dtype == object

mi = MultiIndex.from_tuples([(x,) for x in arr])
lev = mi.levels[0]
assert lev.dtype == object
30 changes: 14 additions & 16 deletions pandas/tests/reshape/concat/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,39 +130,37 @@ def test_concat_datetimeindex_freq(self):

def test_concat_multiindex_datetime_object_index(self):
# https://github.com/pandas-dev/pandas/issues/11058
idx = Index(
[dt.date(2013, 1, 1), dt.date(2014, 1, 1), dt.date(2015, 1, 1)],
dtype="object",
)

s = Series(
["a", "b"],
index=MultiIndex.from_arrays(
[
[1, 2],
Index([dt.date(2013, 1, 1), dt.date(2014, 1, 1)], dtype="object"),
idx[:-1],
],
names=["first", "second"],
),
)
s2 = Series(
["a", "b"],
index=MultiIndex.from_arrays(
[
[1, 2],
Index([dt.date(2013, 1, 1), dt.date(2015, 1, 1)], dtype="object"),
],
[[1, 2], idx[::2]],
names=["first", "second"],
),
)
mi = MultiIndex.from_arrays(
[[1, 2, 2], idx],
names=["first", "second"],
)
assert mi.levels[1].dtype == object

expected = DataFrame(
[["a", "a"], ["b", np.nan], [np.nan, "b"]],
index=MultiIndex.from_arrays(
[
[1, 2, 2],
DatetimeIndex(
["2013-01-01", "2014-01-01", "2015-01-01"],
dtype="datetime64[ns]",
freq=None,
),
],
names=["first", "second"],
),
index=mi,
)
result = concat([s, s2], axis=1)
tm.assert_frame_equal(result, expected)
Expand Down
9 changes: 5 additions & 4 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1811,17 +1811,18 @@ def test_dtype_on_categorical_dates(self):

expected_outer = DataFrame(
[
[pd.Timestamp("2001-01-01"), 1.1, 1.3],
[pd.Timestamp("2001-01-02"), 1.3, np.nan],
[pd.Timestamp("2001-01-03"), np.nan, 1.4],
[pd.Timestamp("2001-01-01").date(), 1.1, 1.3],
[pd.Timestamp("2001-01-02").date(), 1.3, np.nan],
[pd.Timestamp("2001-01-03").date(), np.nan, 1.4],
],
columns=["date", "num2", "num4"],
)
result_outer = pd.merge(df, df2, how="outer", on=["date"])
tm.assert_frame_equal(result_outer, expected_outer)

expected_inner = DataFrame(
[[pd.Timestamp("2001-01-01"), 1.1, 1.3]], columns=["date", "num2", "num4"]
[[pd.Timestamp("2001-01-01").date(), 1.1, 1.3]],
columns=["date", "num2", "num4"],
)
result_inner = pd.merge(df, df2, how="inner", on=["date"])
tm.assert_frame_equal(result_inner, expected_inner)
Expand Down