Skip to content

Backport PR #32166 on branch 1.0.x #32269

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Fixed regressions
- Fixed regression where :func:`read_pickle` raised a ``UnicodeDecodeError`` when reading a py27 pickle with :class:`MultiIndex` column (:issue:`31988`).
- Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`)
- Fixed regression in :meth:`GroupBy.agg` calling a user-provided function an extra time on an empty input (:issue:`31760`)
- Joining on :class:`DatetimeIndex` or :class:`TimedeltaIndex` will preserve ``freq`` in simple cases (:issue:`32166`)
-

.. ---------------------------------------------------------------------------
Expand Down
23 changes: 9 additions & 14 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -857,21 +857,16 @@ def _is_convertible_to_index_for_join(cls, other: Index) -> bool:
return True
return False

def _wrap_joined_index(self, joined, other):
def _wrap_joined_index(self, joined: np.ndarray, other):
assert other.dtype == self.dtype, (other.dtype, self.dtype)
name = get_op_result_name(self, other)
if (
isinstance(other, type(self))
and self.freq == other.freq
and self._can_fast_union(other)
):
joined = self._shallow_copy(joined)
joined.name = name
return joined
else:
kwargs = {}
if hasattr(self, "tz"):
kwargs["tz"] = getattr(other, "tz", None)
return self._simple_new(joined, name, **kwargs)

freq = self.freq if self._can_fast_union(other) else None
new_data = type(self._data)._simple_new( # type: ignore
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a comment why the type:ignore has been added.

joined, dtype=self.dtype, freq=freq
)

return type(self)._simple_new(new_data, name=name)


class DatetimelikeDelegateMixin(PandasDelegate):
Expand Down
144 changes: 144 additions & 0 deletions pandas/tests/indexes/datetimes/test_join.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
from datetime import datetime

import numpy as np
import pytest

from pandas import DatetimeIndex, Index, Timestamp, date_range, to_datetime
import pandas._testing as tm

from pandas.tseries.offsets import BDay, BMonthEnd


class TestJoin:
def test_does_not_convert_mixed_integer(self):
df = tm.makeCustomDataframe(
10,
10,
data_gen_f=lambda *args, **kwargs: np.random.randn(),
r_idx_type="i",
c_idx_type="dt",
)
cols = df.columns.join(df.index, how="outer")
joined = cols.join(df.columns)
assert cols.dtype == np.dtype("O")
assert cols.dtype == joined.dtype
tm.assert_numpy_array_equal(cols.values, joined.values)

def test_join_self(self, join_type):
index = date_range("1/1/2000", periods=10)
joined = index.join(index, how=join_type)
assert index is joined

def test_join_with_period_index(self, join_type):
df = tm.makeCustomDataframe(
10,
10,
data_gen_f=lambda *args: np.random.randint(2),
c_idx_type="p",
r_idx_type="dt",
)
s = df.iloc[:5, 0]

expected = df.columns.astype("O").join(s.index, how=join_type)
result = df.columns.join(s.index, how=join_type)
tm.assert_index_equal(expected, result)

def test_join_object_index(self):
rng = date_range("1/1/2000", periods=10)
idx = Index(["a", "b", "c", "d"])

result = rng.join(idx, how="outer")
assert isinstance(result[0], Timestamp)

def test_join_utc_convert(self, join_type):
rng = date_range("1/1/2011", periods=100, freq="H", tz="utc")

left = rng.tz_convert("US/Eastern")
right = rng.tz_convert("Europe/Berlin")

result = left.join(left[:-5], how=join_type)
assert isinstance(result, DatetimeIndex)
assert result.tz == left.tz

result = left.join(right[:-5], how=join_type)
assert isinstance(result, DatetimeIndex)
assert result.tz.zone == "UTC"

@pytest.mark.parametrize("sort", [None, False])
def test_datetimeindex_union_join_empty(self, sort):
dti = date_range(start="1/1/2001", end="2/1/2001", freq="D")
empty = Index([])

result = dti.union(empty, sort=sort)
expected = dti.astype("O")
tm.assert_index_equal(result, expected)

result = dti.join(empty)
assert isinstance(result, DatetimeIndex)
tm.assert_index_equal(result, dti)

def test_join_nonunique(self):
idx1 = to_datetime(["2012-11-06 16:00:11.477563", "2012-11-06 16:00:11.477563"])
idx2 = to_datetime(["2012-11-06 15:11:09.006507", "2012-11-06 15:11:09.006507"])
rs = idx1.join(idx2, how="outer")
assert rs.is_monotonic

@pytest.mark.parametrize("freq", ["B", "C"])
def test_outer_join(self, freq):
# should just behave as union
start, end = datetime(2009, 1, 1), datetime(2010, 1, 1)
rng = date_range(start=start, end=end, freq=freq)

# overlapping
left = rng[:10]
right = rng[5:10]

the_join = left.join(right, how="outer")
assert isinstance(the_join, DatetimeIndex)

# non-overlapping, gap in middle
left = rng[:5]
right = rng[10:]

the_join = left.join(right, how="outer")
assert isinstance(the_join, DatetimeIndex)
assert the_join.freq is None

# non-overlapping, no gap
left = rng[:5]
right = rng[5:10]

the_join = left.join(right, how="outer")
assert isinstance(the_join, DatetimeIndex)

# overlapping, but different offset
other = date_range(start, end, freq=BMonthEnd())

the_join = rng.join(other, how="outer")
assert isinstance(the_join, DatetimeIndex)
assert the_join.freq is None

def test_naive_aware_conflicts(self):
start, end = datetime(2009, 1, 1), datetime(2010, 1, 1)
naive = date_range(start, end, freq=BDay(), tz=None)
aware = date_range(start, end, freq=BDay(), tz="Asia/Hong_Kong")

msg = "tz-naive.*tz-aware"
with pytest.raises(TypeError, match=msg):
naive.join(aware)

with pytest.raises(TypeError, match=msg):
aware.join(naive)

@pytest.mark.parametrize("tz", [None, "US/Pacific"])
def test_join_preserves_freq(self, tz):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

only this test should be backported

# GH#32157
dti = date_range("2016-01-01", periods=10, tz=tz)
result = dti[:5].join(dti[5:], how="outer")
assert result.freq == dti.freq
tm.assert_index_equal(result, dti)

result = dti[:5].join(dti[6:], how="outer")
assert result.freq is None
expected = dti.delete(5)
tm.assert_index_equal(result, expected)
49 changes: 49 additions & 0 deletions pandas/tests/indexes/timedeltas/test_join.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import numpy as np

from pandas import Index, Timedelta, timedelta_range
import pandas._testing as tm


class TestJoin:
def test_append_join_nondatetimeindex(self):
rng = timedelta_range("1 days", periods=10)
idx = Index(["a", "b", "c", "d"])

result = rng.append(idx)
assert isinstance(result[0], Timedelta)

# it works
rng.join(idx, how="outer")

def test_join_self(self, join_type):
index = timedelta_range("1 day", periods=10)
joined = index.join(index, how=join_type)
tm.assert_index_equal(index, joined)

def test_does_not_convert_mixed_integer(self):
df = tm.makeCustomDataframe(
10,
10,
data_gen_f=lambda *args, **kwargs: np.random.randn(),
r_idx_type="i",
c_idx_type="td",
)
str(df)

cols = df.columns.join(df.index, how="outer")
joined = cols.join(df.columns)
assert cols.dtype == np.dtype("O")
assert cols.dtype == joined.dtype
tm.assert_index_equal(cols, joined)

def test_join_preserves_freq(self):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

only this test should be backported

# GH#32157
tdi = timedelta_range("1 day", periods=10)
result = tdi[:5].join(tdi[5:], how="outer")
assert result.freq == tdi.freq
tm.assert_index_equal(result, tdi)

result = tdi[:5].join(tdi[6:], how="outer")
assert result.freq is None
expected = tdi.delete(5)
tm.assert_index_equal(result, expected)