diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index fbd2c2b5345fc..874ce40f36a9f 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -172,7 +172,8 @@ Categorical ^^^^^^^^^^^ - Bug in ``CategoricalIndex.reindex`` failed when ``Index`` passed with elements all in category (:issue:`28690`) -- +- Bug where construcing a :class:`Categorical` from an object-dtype array of ``date`` objects did not round-trip correctly with ``astype`` (:issue:`38552`) + Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 24cbbd9ec6ac9..002f36f7949e5 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -331,7 +331,7 @@ def __init__( elif not isinstance(values, (ABCIndex, ABCSeries, ExtensionArray)): # sanitize_array coerces np.nan to a string under certain versions # of numpy - values = maybe_infer_to_datetimelike(values, convert_dates=True) + values = maybe_infer_to_datetimelike(values) if not isinstance(values, (np.ndarray, ExtensionArray)): values = com.convert_to_list_like(values) diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 25b5be2ccc918..59d4700874810 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import date, datetime import numpy as np import pytest @@ -346,6 +346,14 @@ def test_constructor_from_index_series_datetimetz(self): result = Categorical(Series(idx)) tm.assert_index_equal(result.categories, idx) + def test_constructor_date_objects(self): + # we dont cast date objects to timestamps, matching Index constructor + v = date.today() + + cat = Categorical([v, v]) + assert cat.categories.dtype == object + assert type(cat.categories[0]) is date + def test_constructor_from_index_series_timedelta(self): idx = timedelta_range("1 days", freq="D", periods=3) idx = idx._with_freq(None) # freq not preserved in result.categories diff --git a/pandas/tests/indexes/categorical/test_astype.py b/pandas/tests/indexes/categorical/test_astype.py index 44c4bcc951194..48a90652a2c06 100644 --- a/pandas/tests/indexes/categorical/test_astype.py +++ b/pandas/tests/indexes/categorical/test_astype.py @@ -1,3 +1,5 @@ +from datetime import date + import numpy as np import pytest @@ -64,3 +66,16 @@ def test_astype_category(self, name, dtype_ordered, index_ordered): result = index.astype("category") expected = index tm.assert_index_equal(result, expected) + + def test_categorical_date_roundtrip(self): + # astype to categorical and back should preserve date objects + v = date.today() + + obj = Index([v, v]) + assert obj.dtype == object + + cat = obj.astype("category") + + rtrip = cat.astype(object) + assert rtrip.dtype == object + assert type(rtrip[0]) is date diff --git a/pandas/tests/indexes/multi/test_constructors.py b/pandas/tests/indexes/multi/test_constructors.py index ca6387938d747..7666e9670e6a6 100644 --- a/pandas/tests/indexes/multi/test_constructors.py +++ b/pandas/tests/indexes/multi/test_constructors.py @@ -774,7 +774,7 @@ def test_datetimeindex(): # from datetime combos # GH 7888 - date1 = date.today() + date1 = np.datetime64("today") date2 = datetime.today() date3 = Timestamp.today() @@ -783,6 +783,12 @@ def test_datetimeindex(): assert isinstance(index.levels[0], pd.DatetimeIndex) assert isinstance(index.levels[1], pd.DatetimeIndex) + # but NOT date objects, matching Index behavior + date4 = date.today() + index = MultiIndex.from_product([[date4], [date2]]) + assert not isinstance(index.levels[0], pd.DatetimeIndex) + assert isinstance(index.levels[1], pd.DatetimeIndex) + def test_constructor_with_tz(): @@ -804,3 +810,26 @@ def test_constructor_with_tz(): assert result.names == ["dt1", "dt2"] tm.assert_index_equal(result.levels[0], index) tm.assert_index_equal(result.levels[1], columns) + + +def test_multiindex_inference_consistency(): + # check that inference behavior matches the base class + + v = date.today() + + arr = [v, v] + + idx = Index(arr) + assert idx.dtype == object + + mi = MultiIndex.from_arrays([arr]) + lev = mi.levels[0] + assert lev.dtype == object + + mi = MultiIndex.from_product([arr]) + lev = mi.levels[0] + assert lev.dtype == object + + mi = MultiIndex.from_tuples([(x,) for x in arr]) + lev = mi.levels[0] + assert lev.dtype == object diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py index 44a5e7f806309..92181e7dffc50 100644 --- a/pandas/tests/reshape/concat/test_datetimes.py +++ b/pandas/tests/reshape/concat/test_datetimes.py @@ -130,12 +130,17 @@ def test_concat_datetimeindex_freq(self): def test_concat_multiindex_datetime_object_index(self): # https://github.com/pandas-dev/pandas/issues/11058 + idx = Index( + [dt.date(2013, 1, 1), dt.date(2014, 1, 1), dt.date(2015, 1, 1)], + dtype="object", + ) + s = Series( ["a", "b"], index=MultiIndex.from_arrays( [ [1, 2], - Index([dt.date(2013, 1, 1), dt.date(2014, 1, 1)], dtype="object"), + idx[:-1], ], names=["first", "second"], ), @@ -143,26 +148,19 @@ def test_concat_multiindex_datetime_object_index(self): s2 = Series( ["a", "b"], index=MultiIndex.from_arrays( - [ - [1, 2], - Index([dt.date(2013, 1, 1), dt.date(2015, 1, 1)], dtype="object"), - ], + [[1, 2], idx[::2]], names=["first", "second"], ), ) + mi = MultiIndex.from_arrays( + [[1, 2, 2], idx], + names=["first", "second"], + ) + assert mi.levels[1].dtype == object + expected = DataFrame( [["a", "a"], ["b", np.nan], [np.nan, "b"]], - index=MultiIndex.from_arrays( - [ - [1, 2, 2], - DatetimeIndex( - ["2013-01-01", "2014-01-01", "2015-01-01"], - dtype="datetime64[ns]", - freq=None, - ), - ], - names=["first", "second"], - ), + index=mi, ) result = concat([s, s2], axis=1) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 73d94f4e5a432..d430856776269 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -1811,9 +1811,9 @@ def test_dtype_on_categorical_dates(self): expected_outer = DataFrame( [ - [pd.Timestamp("2001-01-01"), 1.1, 1.3], - [pd.Timestamp("2001-01-02"), 1.3, np.nan], - [pd.Timestamp("2001-01-03"), np.nan, 1.4], + [pd.Timestamp("2001-01-01").date(), 1.1, 1.3], + [pd.Timestamp("2001-01-02").date(), 1.3, np.nan], + [pd.Timestamp("2001-01-03").date(), np.nan, 1.4], ], columns=["date", "num2", "num4"], ) @@ -1821,7 +1821,8 @@ def test_dtype_on_categorical_dates(self): tm.assert_frame_equal(result_outer, expected_outer) expected_inner = DataFrame( - [[pd.Timestamp("2001-01-01"), 1.1, 1.3]], columns=["date", "num2", "num4"] + [[pd.Timestamp("2001-01-01").date(), 1.1, 1.3]], + columns=["date", "num2", "num4"], ) result_inner = pd.merge(df, df2, how="inner", on=["date"]) tm.assert_frame_equal(result_inner, expected_inner)