Skip to content

Commit 6ae3670

Browse files
committed
REGR: preserve freq in DTI/TDI outer join (pandas-dev#32166)
1 parent e2ea34d commit 6ae3670

File tree

4 files changed

+203
-14
lines changed

4 files changed

+203
-14
lines changed

doc/source/whatsnew/v1.0.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ Fixed regressions
2323
- Fixed regression where :func:`read_pickle` raised a ``UnicodeDecodeError`` when reading a py27 pickle with :class:`MultiIndex` column (:issue:`31988`).
2424
- Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`)
2525
- Fixed regression in :meth:`GroupBy.agg` calling a user-provided function an extra time on an empty input (:issue:`31760`)
26+
- Joining on :class:`DatetimeIndex` or :class:`TimedeltaIndex` will preserve ``freq`` in simple cases (:issue:`32166`)
2627
-
2728

2829
.. ---------------------------------------------------------------------------

pandas/core/indexes/datetimelike.py

+9-14
Original file line numberDiff line numberDiff line change
@@ -857,21 +857,16 @@ def _is_convertible_to_index_for_join(cls, other: Index) -> bool:
857857
return True
858858
return False
859859

860-
def _wrap_joined_index(self, joined, other):
860+
def _wrap_joined_index(self, joined: np.ndarray, other):
861+
assert other.dtype == self.dtype, (other.dtype, self.dtype)
861862
name = get_op_result_name(self, other)
862-
if (
863-
isinstance(other, type(self))
864-
and self.freq == other.freq
865-
and self._can_fast_union(other)
866-
):
867-
joined = self._shallow_copy(joined)
868-
joined.name = name
869-
return joined
870-
else:
871-
kwargs = {}
872-
if hasattr(self, "tz"):
873-
kwargs["tz"] = getattr(other, "tz", None)
874-
return self._simple_new(joined, name, **kwargs)
863+
864+
freq = self.freq if self._can_fast_union(other) else None
865+
new_data = type(self._data)._simple_new( # type: ignore
866+
joined, dtype=self.dtype, freq=freq
867+
)
868+
869+
return type(self)._simple_new(new_data, name=name)
875870

876871

877872
class DatetimelikeDelegateMixin(PandasDelegate):
+144
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
from datetime import datetime
2+
3+
import numpy as np
4+
import pytest
5+
6+
from pandas import DatetimeIndex, Index, Timestamp, date_range, to_datetime
7+
import pandas._testing as tm
8+
9+
from pandas.tseries.offsets import BDay, BMonthEnd
10+
11+
12+
class TestJoin:
13+
def test_does_not_convert_mixed_integer(self):
14+
df = tm.makeCustomDataframe(
15+
10,
16+
10,
17+
data_gen_f=lambda *args, **kwargs: np.random.randn(),
18+
r_idx_type="i",
19+
c_idx_type="dt",
20+
)
21+
cols = df.columns.join(df.index, how="outer")
22+
joined = cols.join(df.columns)
23+
assert cols.dtype == np.dtype("O")
24+
assert cols.dtype == joined.dtype
25+
tm.assert_numpy_array_equal(cols.values, joined.values)
26+
27+
def test_join_self(self, join_type):
28+
index = date_range("1/1/2000", periods=10)
29+
joined = index.join(index, how=join_type)
30+
assert index is joined
31+
32+
def test_join_with_period_index(self, join_type):
33+
df = tm.makeCustomDataframe(
34+
10,
35+
10,
36+
data_gen_f=lambda *args: np.random.randint(2),
37+
c_idx_type="p",
38+
r_idx_type="dt",
39+
)
40+
s = df.iloc[:5, 0]
41+
42+
expected = df.columns.astype("O").join(s.index, how=join_type)
43+
result = df.columns.join(s.index, how=join_type)
44+
tm.assert_index_equal(expected, result)
45+
46+
def test_join_object_index(self):
47+
rng = date_range("1/1/2000", periods=10)
48+
idx = Index(["a", "b", "c", "d"])
49+
50+
result = rng.join(idx, how="outer")
51+
assert isinstance(result[0], Timestamp)
52+
53+
def test_join_utc_convert(self, join_type):
54+
rng = date_range("1/1/2011", periods=100, freq="H", tz="utc")
55+
56+
left = rng.tz_convert("US/Eastern")
57+
right = rng.tz_convert("Europe/Berlin")
58+
59+
result = left.join(left[:-5], how=join_type)
60+
assert isinstance(result, DatetimeIndex)
61+
assert result.tz == left.tz
62+
63+
result = left.join(right[:-5], how=join_type)
64+
assert isinstance(result, DatetimeIndex)
65+
assert result.tz.zone == "UTC"
66+
67+
@pytest.mark.parametrize("sort", [None, False])
68+
def test_datetimeindex_union_join_empty(self, sort):
69+
dti = date_range(start="1/1/2001", end="2/1/2001", freq="D")
70+
empty = Index([])
71+
72+
result = dti.union(empty, sort=sort)
73+
expected = dti.astype("O")
74+
tm.assert_index_equal(result, expected)
75+
76+
result = dti.join(empty)
77+
assert isinstance(result, DatetimeIndex)
78+
tm.assert_index_equal(result, dti)
79+
80+
def test_join_nonunique(self):
81+
idx1 = to_datetime(["2012-11-06 16:00:11.477563", "2012-11-06 16:00:11.477563"])
82+
idx2 = to_datetime(["2012-11-06 15:11:09.006507", "2012-11-06 15:11:09.006507"])
83+
rs = idx1.join(idx2, how="outer")
84+
assert rs.is_monotonic
85+
86+
@pytest.mark.parametrize("freq", ["B", "C"])
87+
def test_outer_join(self, freq):
88+
# should just behave as union
89+
start, end = datetime(2009, 1, 1), datetime(2010, 1, 1)
90+
rng = date_range(start=start, end=end, freq=freq)
91+
92+
# overlapping
93+
left = rng[:10]
94+
right = rng[5:10]
95+
96+
the_join = left.join(right, how="outer")
97+
assert isinstance(the_join, DatetimeIndex)
98+
99+
# non-overlapping, gap in middle
100+
left = rng[:5]
101+
right = rng[10:]
102+
103+
the_join = left.join(right, how="outer")
104+
assert isinstance(the_join, DatetimeIndex)
105+
assert the_join.freq is None
106+
107+
# non-overlapping, no gap
108+
left = rng[:5]
109+
right = rng[5:10]
110+
111+
the_join = left.join(right, how="outer")
112+
assert isinstance(the_join, DatetimeIndex)
113+
114+
# overlapping, but different offset
115+
other = date_range(start, end, freq=BMonthEnd())
116+
117+
the_join = rng.join(other, how="outer")
118+
assert isinstance(the_join, DatetimeIndex)
119+
assert the_join.freq is None
120+
121+
def test_naive_aware_conflicts(self):
122+
start, end = datetime(2009, 1, 1), datetime(2010, 1, 1)
123+
naive = date_range(start, end, freq=BDay(), tz=None)
124+
aware = date_range(start, end, freq=BDay(), tz="Asia/Hong_Kong")
125+
126+
msg = "tz-naive.*tz-aware"
127+
with pytest.raises(TypeError, match=msg):
128+
naive.join(aware)
129+
130+
with pytest.raises(TypeError, match=msg):
131+
aware.join(naive)
132+
133+
@pytest.mark.parametrize("tz", [None, "US/Pacific"])
134+
def test_join_preserves_freq(self, tz):
135+
# GH#32157
136+
dti = date_range("2016-01-01", periods=10, tz=tz)
137+
result = dti[:5].join(dti[5:], how="outer")
138+
assert result.freq == dti.freq
139+
tm.assert_index_equal(result, dti)
140+
141+
result = dti[:5].join(dti[6:], how="outer")
142+
assert result.freq is None
143+
expected = dti.delete(5)
144+
tm.assert_index_equal(result, expected)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import numpy as np
2+
3+
from pandas import Index, Timedelta, timedelta_range
4+
import pandas._testing as tm
5+
6+
7+
class TestJoin:
8+
def test_append_join_nondatetimeindex(self):
9+
rng = timedelta_range("1 days", periods=10)
10+
idx = Index(["a", "b", "c", "d"])
11+
12+
result = rng.append(idx)
13+
assert isinstance(result[0], Timedelta)
14+
15+
# it works
16+
rng.join(idx, how="outer")
17+
18+
def test_join_self(self, join_type):
19+
index = timedelta_range("1 day", periods=10)
20+
joined = index.join(index, how=join_type)
21+
tm.assert_index_equal(index, joined)
22+
23+
def test_does_not_convert_mixed_integer(self):
24+
df = tm.makeCustomDataframe(
25+
10,
26+
10,
27+
data_gen_f=lambda *args, **kwargs: np.random.randn(),
28+
r_idx_type="i",
29+
c_idx_type="td",
30+
)
31+
str(df)
32+
33+
cols = df.columns.join(df.index, how="outer")
34+
joined = cols.join(df.columns)
35+
assert cols.dtype == np.dtype("O")
36+
assert cols.dtype == joined.dtype
37+
tm.assert_index_equal(cols, joined)
38+
39+
def test_join_preserves_freq(self):
40+
# GH#32157
41+
tdi = timedelta_range("1 day", periods=10)
42+
result = tdi[:5].join(tdi[5:], how="outer")
43+
assert result.freq == tdi.freq
44+
tm.assert_index_equal(result, tdi)
45+
46+
result = tdi[:5].join(tdi[6:], how="outer")
47+
assert result.freq is None
48+
expected = tdi.delete(5)
49+
tm.assert_index_equal(result, expected)

0 commit comments

Comments
 (0)