pandas-dev · jbrockmendel · Apr 28, 2020 · Apr 28, 2020 · Apr 28, 2020 · Apr 28, 2020
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -184,8 +184,16 @@ def _reconstruct_data(values, dtype, original):
     -------
     Index for extension types, otherwise ndarray casted to dtype
     """
+    if isinstance(values, ABCExtensionArray) and values.dtype == dtype:
+        # Catch DatetimeArray/TimedeltaArray
+        return values
+
     if is_extension_array_dtype(dtype):
-        values = dtype.construct_array_type()._from_sequence(values)
+        cls = dtype.construct_array_type()
+        if isinstance(values, cls) and values.dtype == dtype:
+            return values
+
+        values = cls._from_sequence(values)
     elif is_bool_dtype(dtype):
         values = values.astype(dtype, copy=False)
 
@@ -613,9 +621,11 @@ def factorize(
 
     values = _ensure_arraylike(values)
     original = values
+    if not isinstance(values, ABCMultiIndex):
+        values = extract_array(values, extract_numpy=True)
 
-    if is_extension_array_dtype(values.dtype):
-        values = extract_array(values)
+    if isinstance(values, ABCExtensionArray):
+        # Includes DatetimeArray, TimedeltaArray
         codes, uniques = values.factorize(na_sentinel=na_sentinel)
         dtype = original.dtype
     else:

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -437,6 +437,13 @@ def _with_freq(self, freq):
         arr._freq = freq
         return arr
 
+    def factorize(self, na_sentinel=-1):
+        if self.freq is not None:
+            # We must be unique, so can short-circuit (and retain freq)
+            codes = np.arange(len(self), dtype=np.intp)
+            return codes, self.copy()
+        return ExtensionArray.factorize(self, na_sentinel=na_sentinel)
+
 
 DatetimeLikeArrayT = TypeVar("DatetimeLikeArrayT", bound="DatetimeLikeArrayMixin")
 

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -112,7 +112,7 @@ def f(self):
     return property(f)
 
 
-class DatetimeArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps, dtl.DatelikeOps):
+class DatetimeArray(dtl.TimelikeOps, dtl.DatetimeLikeArrayMixin, dtl.DatelikeOps):
     """
     Pandas ExtensionArray for tz-naive or tz-aware datetime data.
 

diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
@@ -57,7 +57,7 @@ def f(self):
     return property(f)
 
 
-class TimedeltaArray(dtl.DatetimeLikeArrayMixin, dtl.TimelikeOps):
+class TimedeltaArray(dtl.TimelikeOps, dtl.DatetimeLikeArrayMixin):
     """
     Pandas ExtensionArray for timedelta data.
 

diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
@@ -602,6 +602,15 @@ def _shallow_copy(self, values=None, name: Label = lib.no_default):
         result._cache = cache
         return result
 
+    def factorize(self, sort=False, na_sentinel=-1):
+        if self.freq is not None and sort is False:
+            # we are unique, so can short-circuit, also can preserve freq
+            codes = np.arange(len(self), dtype=np.intp)
+            return codes, self.copy()
+            # TODO: In the sort=True case we could check for montonic_decreasing
+            #  and operate on self[::-1]
+        return super().factorize(sort=sort, na_sentinel=na_sentinel)
+
     # --------------------------------------------------------------------
     # Set Operation Methods
 

diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py
@@ -328,10 +328,12 @@ def test_factorize(self):
         arr, idx = idx1.factorize()
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, exp_idx)
+        assert idx.freq == exp_idx.freq
 
         arr, idx = idx1.factorize(sort=True)
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, exp_idx)
+        assert idx.freq == exp_idx.freq
 
         # tz must be preserved
         idx1 = idx1.tz_localize("Asia/Tokyo")
@@ -340,6 +342,7 @@ def test_factorize(self):
         arr, idx = idx1.factorize()
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, exp_idx)
+        assert idx.freq == exp_idx.freq
 
         idx2 = pd.DatetimeIndex(
             ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"]
@@ -350,49 +353,65 @@ def test_factorize(self):
         arr, idx = idx2.factorize(sort=True)
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, exp_idx)
+        assert idx.freq == exp_idx.freq
 
         exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp)
         exp_idx = DatetimeIndex(["2014-03", "2014-02", "2014-01"])
         arr, idx = idx2.factorize()
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, exp_idx)
+        assert idx.freq == exp_idx.freq
 
-        # freq must be preserved
+    def test_factorize_preserves_freq(self):
+        # GH#33836 freq should be preserved
         idx3 = date_range("2000-01", periods=4, freq="M", tz="Asia/Tokyo")
         exp_arr = np.array([0, 1, 2, 3], dtype=np.intp)
+
         arr, idx = idx3.factorize()
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, idx3)
+        assert idx.freq == idx3.freq
+
+        arr, idx = pd.factorize(idx3)
+        tm.assert_numpy_array_equal(arr, exp_arr)
+        tm.assert_index_equal(idx, idx3)
+        assert idx.freq == idx3.freq
 
-    def test_factorize_tz(self, tz_naive_fixture):
+    def test_factorize_tz(self, tz_naive_fixture, index_or_series):
         tz = tz_naive_fixture
         # GH#13750
         base = pd.date_range("2016-11-05", freq="H", periods=100, tz=tz)
         idx = base.repeat(5)
 
         exp_arr = np.arange(100, dtype=np.intp).repeat(5)
 
-        for obj in [idx, pd.Series(idx)]:
-            arr, res = obj.factorize()
-            tm.assert_numpy_array_equal(arr, exp_arr)
-            expected = base._with_freq(None)
-            tm.assert_index_equal(res, expected)
+        obj = index_or_series(idx)
+
+        arr, res = obj.factorize()
+        tm.assert_numpy_array_equal(arr, exp_arr)
+        expected = base._with_freq(None)
+        tm.assert_index_equal(res, expected)
+        assert res.freq == expected.freq
 
-    def test_factorize_dst(self):
+    def test_factorize_dst(self, index_or_series):
         # GH 13750
         idx = pd.date_range("2016-11-06", freq="H", periods=12, tz="US/Eastern")
+        obj = index_or_series(idx)
 
-        for obj in [idx, pd.Series(idx)]:
-            arr, res = obj.factorize()
-            tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
-            tm.assert_index_equal(res, idx)
+        arr, res = obj.factorize()
+        tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
+        tm.assert_index_equal(res, idx)
+        if index_or_series is Index:
+            assert res.freq == idx.freq
 
         idx = pd.date_range("2016-06-13", freq="H", periods=12, tz="US/Eastern")
+        obj = index_or_series(idx)
 
-        for obj in [idx, pd.Series(idx)]:
-            arr, res = obj.factorize()
-            tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
-            tm.assert_index_equal(res, idx)
+        arr, res = obj.factorize()
+        tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
+        tm.assert_index_equal(res, idx)
+        if index_or_series is Index:
+            assert res.freq == idx.freq
 
     @pytest.mark.parametrize(
         "arr, expected",

diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py
@@ -76,17 +76,26 @@ def test_factorize(self):
         arr, idx = idx1.factorize()
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, exp_idx)
+        assert idx.freq == exp_idx.freq
 
         arr, idx = idx1.factorize(sort=True)
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, exp_idx)
+        assert idx.freq == exp_idx.freq
 
-        # freq must be preserved
+    def test_factorize_preserves_freq(self):
+        # GH#33836 freq should be preserved
         idx3 = timedelta_range("1 day", periods=4, freq="s")
         exp_arr = np.array([0, 1, 2, 3], dtype=np.intp)
         arr, idx = idx3.factorize()
         tm.assert_numpy_array_equal(arr, exp_arr)
         tm.assert_index_equal(idx, idx3)
+        assert idx.freq == idx3.freq
+
+        arr, idx = pd.factorize(idx3)
+        tm.assert_numpy_array_equal(arr, exp_arr)
+        tm.assert_index_equal(idx, idx3)
+        assert idx.freq == idx3.freq
 
     def test_sort_values(self):