Skip to content

Commit 545a942

Browse files
authored
BUG: Index([date]).astype("category").astype(object) roundtrip (#38552)
1 parent 0230d03 commit 545a942

File tree

7 files changed

+76
-24
lines changed

7 files changed

+76
-24
lines changed

doc/source/whatsnew/v1.3.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,8 @@ Categorical
172172
^^^^^^^^^^^
173173

174174
- Bug in ``CategoricalIndex.reindex`` failed when ``Index`` passed with elements all in category (:issue:`28690`)
175-
-
175+
- Bug where construcing a :class:`Categorical` from an object-dtype array of ``date`` objects did not round-trip correctly with ``astype`` (:issue:`38552`)
176+
176177

177178
Datetimelike
178179
^^^^^^^^^^^^

pandas/core/arrays/categorical.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -331,7 +331,7 @@ def __init__(
331331
elif not isinstance(values, (ABCIndex, ABCSeries, ExtensionArray)):
332332
# sanitize_array coerces np.nan to a string under certain versions
333333
# of numpy
334-
values = maybe_infer_to_datetimelike(values, convert_dates=True)
334+
values = maybe_infer_to_datetimelike(values)
335335
if not isinstance(values, (np.ndarray, ExtensionArray)):
336336
values = com.convert_to_list_like(values)
337337

pandas/tests/arrays/categorical/test_constructors.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from datetime import datetime
1+
from datetime import date, datetime
22

33
import numpy as np
44
import pytest
@@ -346,6 +346,14 @@ def test_constructor_from_index_series_datetimetz(self):
346346
result = Categorical(Series(idx))
347347
tm.assert_index_equal(result.categories, idx)
348348

349+
def test_constructor_date_objects(self):
350+
# we dont cast date objects to timestamps, matching Index constructor
351+
v = date.today()
352+
353+
cat = Categorical([v, v])
354+
assert cat.categories.dtype == object
355+
assert type(cat.categories[0]) is date
356+
349357
def test_constructor_from_index_series_timedelta(self):
350358
idx = timedelta_range("1 days", freq="D", periods=3)
351359
idx = idx._with_freq(None) # freq not preserved in result.categories

pandas/tests/indexes/categorical/test_astype.py

+15
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from datetime import date
2+
13
import numpy as np
24
import pytest
35

@@ -64,3 +66,16 @@ def test_astype_category(self, name, dtype_ordered, index_ordered):
6466
result = index.astype("category")
6567
expected = index
6668
tm.assert_index_equal(result, expected)
69+
70+
def test_categorical_date_roundtrip(self):
71+
# astype to categorical and back should preserve date objects
72+
v = date.today()
73+
74+
obj = Index([v, v])
75+
assert obj.dtype == object
76+
77+
cat = obj.astype("category")
78+
79+
rtrip = cat.astype(object)
80+
assert rtrip.dtype == object
81+
assert type(rtrip[0]) is date

pandas/tests/indexes/multi/test_constructors.py

+30-1
Original file line numberDiff line numberDiff line change
@@ -774,7 +774,7 @@ def test_datetimeindex():
774774

775775
# from datetime combos
776776
# GH 7888
777-
date1 = date.today()
777+
date1 = np.datetime64("today")
778778
date2 = datetime.today()
779779
date3 = Timestamp.today()
780780

@@ -783,6 +783,12 @@ def test_datetimeindex():
783783
assert isinstance(index.levels[0], pd.DatetimeIndex)
784784
assert isinstance(index.levels[1], pd.DatetimeIndex)
785785

786+
# but NOT date objects, matching Index behavior
787+
date4 = date.today()
788+
index = MultiIndex.from_product([[date4], [date2]])
789+
assert not isinstance(index.levels[0], pd.DatetimeIndex)
790+
assert isinstance(index.levels[1], pd.DatetimeIndex)
791+
786792

787793
def test_constructor_with_tz():
788794

@@ -804,3 +810,26 @@ def test_constructor_with_tz():
804810
assert result.names == ["dt1", "dt2"]
805811
tm.assert_index_equal(result.levels[0], index)
806812
tm.assert_index_equal(result.levels[1], columns)
813+
814+
815+
def test_multiindex_inference_consistency():
816+
# check that inference behavior matches the base class
817+
818+
v = date.today()
819+
820+
arr = [v, v]
821+
822+
idx = Index(arr)
823+
assert idx.dtype == object
824+
825+
mi = MultiIndex.from_arrays([arr])
826+
lev = mi.levels[0]
827+
assert lev.dtype == object
828+
829+
mi = MultiIndex.from_product([arr])
830+
lev = mi.levels[0]
831+
assert lev.dtype == object
832+
833+
mi = MultiIndex.from_tuples([(x,) for x in arr])
834+
lev = mi.levels[0]
835+
assert lev.dtype == object

pandas/tests/reshape/concat/test_datetimes.py

+14-16
Original file line numberDiff line numberDiff line change
@@ -130,39 +130,37 @@ def test_concat_datetimeindex_freq(self):
130130

131131
def test_concat_multiindex_datetime_object_index(self):
132132
# https://github.com/pandas-dev/pandas/issues/11058
133+
idx = Index(
134+
[dt.date(2013, 1, 1), dt.date(2014, 1, 1), dt.date(2015, 1, 1)],
135+
dtype="object",
136+
)
137+
133138
s = Series(
134139
["a", "b"],
135140
index=MultiIndex.from_arrays(
136141
[
137142
[1, 2],
138-
Index([dt.date(2013, 1, 1), dt.date(2014, 1, 1)], dtype="object"),
143+
idx[:-1],
139144
],
140145
names=["first", "second"],
141146
),
142147
)
143148
s2 = Series(
144149
["a", "b"],
145150
index=MultiIndex.from_arrays(
146-
[
147-
[1, 2],
148-
Index([dt.date(2013, 1, 1), dt.date(2015, 1, 1)], dtype="object"),
149-
],
151+
[[1, 2], idx[::2]],
150152
names=["first", "second"],
151153
),
152154
)
155+
mi = MultiIndex.from_arrays(
156+
[[1, 2, 2], idx],
157+
names=["first", "second"],
158+
)
159+
assert mi.levels[1].dtype == object
160+
153161
expected = DataFrame(
154162
[["a", "a"], ["b", np.nan], [np.nan, "b"]],
155-
index=MultiIndex.from_arrays(
156-
[
157-
[1, 2, 2],
158-
DatetimeIndex(
159-
["2013-01-01", "2014-01-01", "2015-01-01"],
160-
dtype="datetime64[ns]",
161-
freq=None,
162-
),
163-
],
164-
names=["first", "second"],
165-
),
163+
index=mi,
166164
)
167165
result = concat([s, s2], axis=1)
168166
tm.assert_frame_equal(result, expected)

pandas/tests/reshape/merge/test_merge.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -1811,17 +1811,18 @@ def test_dtype_on_categorical_dates(self):
18111811

18121812
expected_outer = DataFrame(
18131813
[
1814-
[pd.Timestamp("2001-01-01"), 1.1, 1.3],
1815-
[pd.Timestamp("2001-01-02"), 1.3, np.nan],
1816-
[pd.Timestamp("2001-01-03"), np.nan, 1.4],
1814+
[pd.Timestamp("2001-01-01").date(), 1.1, 1.3],
1815+
[pd.Timestamp("2001-01-02").date(), 1.3, np.nan],
1816+
[pd.Timestamp("2001-01-03").date(), np.nan, 1.4],
18171817
],
18181818
columns=["date", "num2", "num4"],
18191819
)
18201820
result_outer = pd.merge(df, df2, how="outer", on=["date"])
18211821
tm.assert_frame_equal(result_outer, expected_outer)
18221822

18231823
expected_inner = DataFrame(
1824-
[[pd.Timestamp("2001-01-01"), 1.1, 1.3]], columns=["date", "num2", "num4"]
1824+
[[pd.Timestamp("2001-01-01").date(), 1.1, 1.3]],
1825+
columns=["date", "num2", "num4"],
18251826
)
18261827
result_inner = pd.merge(df, df2, how="inner", on=["date"])
18271828
tm.assert_frame_equal(result_inner, expected_inner)

0 commit comments

Comments
 (0)