From 6ae3670e7bf95b95318528e6a37ddad1e84fbe7b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 26 Feb 2020 04:36:35 -0800 Subject: [PATCH] REGR: preserve freq in DTI/TDI outer join (#32166) --- doc/source/whatsnew/v1.0.2.rst | 1 + pandas/core/indexes/datetimelike.py | 23 ++- pandas/tests/indexes/datetimes/test_join.py | 144 +++++++++++++++++++ pandas/tests/indexes/timedeltas/test_join.py | 49 +++++++ 4 files changed, 203 insertions(+), 14 deletions(-) create mode 100644 pandas/tests/indexes/datetimes/test_join.py create mode 100644 pandas/tests/indexes/timedeltas/test_join.py diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index d3b1442953e41..1b6098e6b6ac1 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -23,6 +23,7 @@ Fixed regressions - Fixed regression where :func:`read_pickle` raised a ``UnicodeDecodeError`` when reading a py27 pickle with :class:`MultiIndex` column (:issue:`31988`). - Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`) - Fixed regression in :meth:`GroupBy.agg` calling a user-provided function an extra time on an empty input (:issue:`31760`) +- Joining on :class:`DatetimeIndex` or :class:`TimedeltaIndex` will preserve ``freq`` in simple cases (:issue:`32166`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index c98b4f21dbb92..aaea609ec5049 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -857,21 +857,16 @@ def _is_convertible_to_index_for_join(cls, other: Index) -> bool: return True return False - def _wrap_joined_index(self, joined, other): + def _wrap_joined_index(self, joined: np.ndarray, other): + assert other.dtype == self.dtype, (other.dtype, self.dtype) name = get_op_result_name(self, other) - if ( - isinstance(other, type(self)) - and self.freq == other.freq - and self._can_fast_union(other) - ): - joined = self._shallow_copy(joined) - joined.name = name - return joined - else: - kwargs = {} - if hasattr(self, "tz"): - kwargs["tz"] = getattr(other, "tz", None) - return self._simple_new(joined, name, **kwargs) + + freq = self.freq if self._can_fast_union(other) else None + new_data = type(self._data)._simple_new( # type: ignore + joined, dtype=self.dtype, freq=freq + ) + + return type(self)._simple_new(new_data, name=name) class DatetimelikeDelegateMixin(PandasDelegate): diff --git a/pandas/tests/indexes/datetimes/test_join.py b/pandas/tests/indexes/datetimes/test_join.py new file mode 100644 index 0000000000000..f2f88fd7dc90c --- /dev/null +++ b/pandas/tests/indexes/datetimes/test_join.py @@ -0,0 +1,144 @@ +from datetime import datetime + +import numpy as np +import pytest + +from pandas import DatetimeIndex, Index, Timestamp, date_range, to_datetime +import pandas._testing as tm + +from pandas.tseries.offsets import BDay, BMonthEnd + + +class TestJoin: + def test_does_not_convert_mixed_integer(self): + df = tm.makeCustomDataframe( + 10, + 10, + data_gen_f=lambda *args, **kwargs: np.random.randn(), + r_idx_type="i", + c_idx_type="dt", + ) + cols = df.columns.join(df.index, how="outer") + joined = cols.join(df.columns) + assert cols.dtype == np.dtype("O") + assert cols.dtype == joined.dtype + tm.assert_numpy_array_equal(cols.values, joined.values) + + def test_join_self(self, join_type): + index = date_range("1/1/2000", periods=10) + joined = index.join(index, how=join_type) + assert index is joined + + def test_join_with_period_index(self, join_type): + df = tm.makeCustomDataframe( + 10, + 10, + data_gen_f=lambda *args: np.random.randint(2), + c_idx_type="p", + r_idx_type="dt", + ) + s = df.iloc[:5, 0] + + expected = df.columns.astype("O").join(s.index, how=join_type) + result = df.columns.join(s.index, how=join_type) + tm.assert_index_equal(expected, result) + + def test_join_object_index(self): + rng = date_range("1/1/2000", periods=10) + idx = Index(["a", "b", "c", "d"]) + + result = rng.join(idx, how="outer") + assert isinstance(result[0], Timestamp) + + def test_join_utc_convert(self, join_type): + rng = date_range("1/1/2011", periods=100, freq="H", tz="utc") + + left = rng.tz_convert("US/Eastern") + right = rng.tz_convert("Europe/Berlin") + + result = left.join(left[:-5], how=join_type) + assert isinstance(result, DatetimeIndex) + assert result.tz == left.tz + + result = left.join(right[:-5], how=join_type) + assert isinstance(result, DatetimeIndex) + assert result.tz.zone == "UTC" + + @pytest.mark.parametrize("sort", [None, False]) + def test_datetimeindex_union_join_empty(self, sort): + dti = date_range(start="1/1/2001", end="2/1/2001", freq="D") + empty = Index([]) + + result = dti.union(empty, sort=sort) + expected = dti.astype("O") + tm.assert_index_equal(result, expected) + + result = dti.join(empty) + assert isinstance(result, DatetimeIndex) + tm.assert_index_equal(result, dti) + + def test_join_nonunique(self): + idx1 = to_datetime(["2012-11-06 16:00:11.477563", "2012-11-06 16:00:11.477563"]) + idx2 = to_datetime(["2012-11-06 15:11:09.006507", "2012-11-06 15:11:09.006507"]) + rs = idx1.join(idx2, how="outer") + assert rs.is_monotonic + + @pytest.mark.parametrize("freq", ["B", "C"]) + def test_outer_join(self, freq): + # should just behave as union + start, end = datetime(2009, 1, 1), datetime(2010, 1, 1) + rng = date_range(start=start, end=end, freq=freq) + + # overlapping + left = rng[:10] + right = rng[5:10] + + the_join = left.join(right, how="outer") + assert isinstance(the_join, DatetimeIndex) + + # non-overlapping, gap in middle + left = rng[:5] + right = rng[10:] + + the_join = left.join(right, how="outer") + assert isinstance(the_join, DatetimeIndex) + assert the_join.freq is None + + # non-overlapping, no gap + left = rng[:5] + right = rng[5:10] + + the_join = left.join(right, how="outer") + assert isinstance(the_join, DatetimeIndex) + + # overlapping, but different offset + other = date_range(start, end, freq=BMonthEnd()) + + the_join = rng.join(other, how="outer") + assert isinstance(the_join, DatetimeIndex) + assert the_join.freq is None + + def test_naive_aware_conflicts(self): + start, end = datetime(2009, 1, 1), datetime(2010, 1, 1) + naive = date_range(start, end, freq=BDay(), tz=None) + aware = date_range(start, end, freq=BDay(), tz="Asia/Hong_Kong") + + msg = "tz-naive.*tz-aware" + with pytest.raises(TypeError, match=msg): + naive.join(aware) + + with pytest.raises(TypeError, match=msg): + aware.join(naive) + + @pytest.mark.parametrize("tz", [None, "US/Pacific"]) + def test_join_preserves_freq(self, tz): + # GH#32157 + dti = date_range("2016-01-01", periods=10, tz=tz) + result = dti[:5].join(dti[5:], how="outer") + assert result.freq == dti.freq + tm.assert_index_equal(result, dti) + + result = dti[:5].join(dti[6:], how="outer") + assert result.freq is None + expected = dti.delete(5) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/timedeltas/test_join.py b/pandas/tests/indexes/timedeltas/test_join.py new file mode 100644 index 0000000000000..aaf4ef29e162b --- /dev/null +++ b/pandas/tests/indexes/timedeltas/test_join.py @@ -0,0 +1,49 @@ +import numpy as np + +from pandas import Index, Timedelta, timedelta_range +import pandas._testing as tm + + +class TestJoin: + def test_append_join_nondatetimeindex(self): + rng = timedelta_range("1 days", periods=10) + idx = Index(["a", "b", "c", "d"]) + + result = rng.append(idx) + assert isinstance(result[0], Timedelta) + + # it works + rng.join(idx, how="outer") + + def test_join_self(self, join_type): + index = timedelta_range("1 day", periods=10) + joined = index.join(index, how=join_type) + tm.assert_index_equal(index, joined) + + def test_does_not_convert_mixed_integer(self): + df = tm.makeCustomDataframe( + 10, + 10, + data_gen_f=lambda *args, **kwargs: np.random.randn(), + r_idx_type="i", + c_idx_type="td", + ) + str(df) + + cols = df.columns.join(df.index, how="outer") + joined = cols.join(df.columns) + assert cols.dtype == np.dtype("O") + assert cols.dtype == joined.dtype + tm.assert_index_equal(cols, joined) + + def test_join_preserves_freq(self): + # GH#32157 + tdi = timedelta_range("1 day", periods=10) + result = tdi[:5].join(tdi[5:], how="outer") + assert result.freq == tdi.freq + tm.assert_index_equal(result, tdi) + + result = tdi[:5].join(tdi[6:], how="outer") + assert result.freq is None + expected = tdi.delete(5) + tm.assert_index_equal(result, expected)