From f1a61d823c47f623666993472cb1c25a95491786 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 21 Feb 2020 08:24:54 -0800 Subject: [PATCH 1/7] _wrap_joined_index preserve freq --- pandas/core/indexes/base.py | 3 +++ pandas/core/indexes/datetimelike.py | 21 +++++++++++---------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 14ee21ea5614c..e7dccf5f81c34 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3625,6 +3625,8 @@ def _join_multi(self, other, how, return_indexers=True): def _join_non_unique(self, other, how="left", return_indexers=False): from pandas.core.reshape.merge import _get_join_indexers + assert other.dtype == self.dtype, (other.dtype, self.dtype) + left_idx, right_idx = _get_join_indexers( [self._ndarray_values], [other._ndarray_values], how=how, sort=True ) @@ -3785,6 +3787,7 @@ def _get_leaf_sorter(labels): return join_index def _join_monotonic(self, other, how="left", return_indexers=False): + assert other.dtype == self.dtype, (other.dtype, self.dtype) if self.equals(other): ret_index = other if how == "right" else self if return_indexers: diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 1b3b6934aa53a..6af579e43d336 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -919,17 +919,18 @@ def _is_convertible_to_index_for_join(cls, other: Index) -> bool: return True return False - def _wrap_joined_index(self, joined, other): + def _wrap_joined_index(self, joined: np.ndarray, other): + # Expected dtypes for joined: + # DTI -> datetime64[ns] + # TDI -> timedelta64[ns] + # PI -> int64 + assert other.dtype == self.dtype, (other.dtype, self.dtype) name = get_op_result_name(self, other) - if self._can_fast_union(other): - joined = self._shallow_copy(joined) - joined.name = name - return joined - else: - kwargs = {} - if hasattr(self, "tz"): - kwargs["tz"] = getattr(other, "tz", None) - return type(self)._simple_new(joined, name, **kwargs) + + freq = self.freq if self._can_fast_union(other) else None + new_data = type(self._data)._simple_new(joined, dtype=self.dtype, freq=freq) + + return type(self)._simple_new(new_data, name=name) # -------------------------------------------------------------------- # List-Like Methods From f7bb017507393f18438e6340ee6d3d66c18b67c6 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 21 Feb 2020 15:57:52 -0800 Subject: [PATCH 2/7] tests --- pandas/tests/indexes/datetimes/test_setops.py | 13 +++++++++++++ pandas/tests/indexes/timedeltas/test_timedelta.py | 12 ++++++++++++ 2 files changed, 25 insertions(+) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 78188c54b1d85..6116f2a038b22 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -367,6 +367,19 @@ def test_join_nonunique(self): rs = idx1.join(idx2, how="outer") assert rs.is_monotonic + @pytest.mark.parametrize("tz", [None, "UTC"]) + def test_join_preserves_freq(self, tz): + # GH#32157 + dti = pd.date_range("2016-01-01", periods=10, tz=tz) + result = dti[:5].join(dti[5:], how="outer") + assert result.freq == dti.freq + tm.assert_index_equal(result, dti) + + result = dti[:5].join(dti[6:], how="outer") + assert result.freq is None + expected = dti.delete(5) + tm.assert_index_equal(result, expected) + class TestBusinessDatetimeIndex: def setup_method(self, method): diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 8a91c9d5e09c8..572d5b1810e63 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -91,6 +91,18 @@ def test_factorize(self): tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, idx3) + def test_join_preserves_freq(self): + # GH#32157 + tdi = timedelta_range("1 day", periods=10) + result = tdi[:5].join(tdi[5:], how="outer") + assert result.freq == tdi.freq + tm.assert_index_equal(result, tdi) + + result = tdi[:5].join(tdi[6:], how="outer") + assert result.freq is None + expected = tdi.delete(5) + tm.assert_index_equal(result, expected) + def test_join_self(self, join_type): index = timedelta_range("1 day", periods=10) joined = index.join(index, how=join_type) From 37e4cc0e0c10df172fe9f3e22d0bb42b59552b42 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 21 Feb 2020 16:00:32 -0800 Subject: [PATCH 3/7] revert --- pandas/core/indexes/base.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e7dccf5f81c34..14ee21ea5614c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3625,8 +3625,6 @@ def _join_multi(self, other, how, return_indexers=True): def _join_non_unique(self, other, how="left", return_indexers=False): from pandas.core.reshape.merge import _get_join_indexers - assert other.dtype == self.dtype, (other.dtype, self.dtype) - left_idx, right_idx = _get_join_indexers( [self._ndarray_values], [other._ndarray_values], how=how, sort=True ) @@ -3787,7 +3785,6 @@ def _get_leaf_sorter(labels): return join_index def _join_monotonic(self, other, how="left", return_indexers=False): - assert other.dtype == self.dtype, (other.dtype, self.dtype) if self.equals(other): ret_index = other if how == "right" else self if return_indexers: From 0872dd1195e3c66727ac0e16692e7009d60db7d7 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 21 Feb 2020 16:05:28 -0800 Subject: [PATCH 4/7] TST: use non-utc --- pandas/tests/indexes/datetimes/test_setops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 6116f2a038b22..7c80af07385a1 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -367,7 +367,7 @@ def test_join_nonunique(self): rs = idx1.join(idx2, how="outer") assert rs.is_monotonic - @pytest.mark.parametrize("tz", [None, "UTC"]) + @pytest.mark.parametrize("tz", [None, "US/Pacific"]) def test_join_preserves_freq(self, tz): # GH#32157 dti = pd.date_range("2016-01-01", periods=10, tz=tz) From 77ef3907c853f61700ae3e8366a41e5ba3758ce5 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 21 Feb 2020 16:06:54 -0800 Subject: [PATCH 5/7] remove comment --- pandas/core/indexes/datetimelike.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 6af579e43d336..8e446f6d1d797 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -920,10 +920,6 @@ def _is_convertible_to_index_for_join(cls, other: Index) -> bool: return False def _wrap_joined_index(self, joined: np.ndarray, other): - # Expected dtypes for joined: - # DTI -> datetime64[ns] - # TDI -> timedelta64[ns] - # PI -> int64 assert other.dtype == self.dtype, (other.dtype, self.dtype) name = get_op_result_name(self, other) From d64b64acc997bd86115456e194205129f27ccca9 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 24 Feb 2020 14:10:22 -0800 Subject: [PATCH 6/7] whatsnew --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 705c335acfb48..94504cbc92292 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -57,6 +57,7 @@ Other API changes will now be ``min`` and ``max`` to match with numeric dtypes in :meth:`DataFrame.describe` (:issue:`30164`) - :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`) - ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`) +- Joining on :class:`DatetimeIndex` or :class:`TimedeltaIndex` will preserve ``freq`` in simple cases (:issue:`32166`) - Backwards incompatible API changes From d3f244de51f76d5e1f06378c868a78233ac6dd30 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 25 Feb 2020 18:37:04 -0800 Subject: [PATCH 7/7] move whatsnew to 1.0.2 --- doc/source/whatsnew/v1.0.2.rst | 1 + doc/source/whatsnew/v1.1.0.rst | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index f491774991090..e91bab0925bf7 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -23,6 +23,7 @@ Fixed regressions - Fixed regression where :func:`read_pickle` raised a ``UnicodeDecodeError`` when reading a py27 pickle with :class:`MultiIndex` column (:issue:`31988`). - Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`) - Fixed regression in :meth:`GroupBy.agg` calling a user-provided function an extra time on an empty input (:issue:`31760`) +- Joining on :class:`DatetimeIndex` or :class:`TimedeltaIndex` will preserve ``freq`` in simple cases (:issue:`32166`) - .. --------------------------------------------------------------------------- diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 44e18dfe92ea8..888b7d23aeb35 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -58,7 +58,6 @@ Other API changes - Added :meth:`DataFrame.value_counts` (:issue:`5377`) - :meth:`Groupby.groups` now returns an abbreviated representation when called on large dataframes (:issue:`1135`) - ``loc`` lookups with an object-dtype :class:`Index` and an integer key will now raise ``KeyError`` instead of ``TypeError`` when key is missing (:issue:`31905`) -- Joining on :class:`DatetimeIndex` or :class:`TimedeltaIndex` will preserve ``freq`` in simple cases (:issue:`32166`) - Backwards incompatible API changes