Skip to content

Commit 1dbf582

Browse files
committed
BUG: GH14323 Union of differences from DatetimeIndex incorrect
Sets freq to None when doing a difference operation on a DatetimeIndex or TimedeltaIndex, rather than retaining the frequency (which can cause problems with downstream operations). Frequency of PeriodIndex is retained.
1 parent 96b364a commit 1dbf582

File tree

2 files changed

+75
-1
lines changed

2 files changed

+75
-1
lines changed

pandas/indexes/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1973,7 +1973,7 @@ def difference(self, other):
19731973
except TypeError:
19741974
pass
19751975

1976-
return this._shallow_copy(the_diff, name=result_name)
1976+
return this._shallow_copy(the_diff, name=result_name, freq=None)
19771977

19781978
def symmetric_difference(self, other, result_name=None):
19791979
"""

pandas/tests/indexes/test_datetimelike.py

+74
Original file line numberDiff line numberDiff line change
@@ -732,6 +732,31 @@ def test_fillna_datetime64(self):
732732
dtype=object)
733733
self.assert_index_equal(idx.fillna('x'), exp)
734734

735+
def test_difference_of_union(self):
736+
# GH14323: Test taking the union of differences of an Index.
737+
# Difference of DatetimeIndex does not preserve frequency,
738+
# so a differencing operation should not retain the freq field of the
739+
# original index.
740+
i = pd.date_range("20160920", "20160925", freq="D")
741+
742+
a = pd.date_range("20160921", "20160924", freq="D")
743+
expected = pd.DatetimeIndex(["20160920", "20160925"], freq=None)
744+
a_diff = i.difference(a)
745+
tm.assert_index_equal(a_diff, expected)
746+
tm.assert_attr_equal('freq', a_diff, expected)
747+
748+
b = pd.date_range("20160922", "20160925", freq="D")
749+
b_diff = i.difference(b)
750+
expected = pd.DatetimeIndex(["20160920", "20160921"], freq=None)
751+
tm.assert_index_equal(b_diff, expected)
752+
tm.assert_attr_equal('freq', b_diff, expected)
753+
754+
union_of_diff = a_diff.union(b_diff)
755+
expected = pd.DatetimeIndex(["20160920", "20160921", "20160925"],
756+
freq=None)
757+
tm.assert_index_equal(union_of_diff, expected)
758+
tm.assert_attr_equal('freq', union_of_diff, expected)
759+
735760

736761
class TestPeriodIndex(DatetimeLike, tm.TestCase):
737762
_holder = PeriodIndex
@@ -938,6 +963,30 @@ def test_no_millisecond_field(self):
938963
with self.assertRaises(AttributeError):
939964
DatetimeIndex([]).millisecond
940965

966+
def test_difference_of_union(self):
967+
# GH14323: Test taking the union of differences of an Index.
968+
# Difference of Period MUST preserve frequency, but the ability
969+
# to union results must be preserved
970+
i = pd.period_range("20160920", "20160925", freq="D")
971+
972+
a = pd.period_range("20160921", "20160924", freq="D")
973+
expected = pd.PeriodIndex(["20160920", "20160925"], freq='D')
974+
a_diff = i.difference(a)
975+
tm.assert_index_equal(a_diff, expected)
976+
tm.assert_attr_equal('freq', a_diff, expected)
977+
978+
b = pd.period_range("20160922", "20160925", freq="D")
979+
b_diff = i.difference(b)
980+
expected = pd.PeriodIndex(["20160920", "20160921"], freq='D')
981+
tm.assert_index_equal(b_diff, expected)
982+
tm.assert_attr_equal('freq', b_diff, expected)
983+
984+
union_of_diff = a_diff.union(b_diff)
985+
expected = pd.PeriodIndex(["20160920", "20160921", "20160925"],
986+
freq='D')
987+
tm.assert_index_equal(union_of_diff, expected)
988+
tm.assert_attr_equal('freq', union_of_diff, expected)
989+
941990

942991
class TestTimedeltaIndex(DatetimeLike, tm.TestCase):
943992
_holder = TimedeltaIndex
@@ -1149,3 +1198,28 @@ def test_fillna_timedelta(self):
11491198
exp = pd.Index(
11501199
[pd.Timedelta('1 day'), 'x', pd.Timedelta('3 day')], dtype=object)
11511200
self.assert_index_equal(idx.fillna('x'), exp)
1201+
1202+
def test_difference_of_union(self):
1203+
# GH14323: Test taking the union of differences of an Index.
1204+
# Difference of TimedeltaIndex does not preserve frequency,
1205+
# so a differencing operation should not retain the freq field of the
1206+
# original index.
1207+
i = pd.timedelta_range("0 days", "5 days", freq="D")
1208+
1209+
a = pd.timedelta_range("1 days", "4 days", freq="D")
1210+
expected = pd.TimedeltaIndex(["0 days", "5 days"], freq=None)
1211+
a_diff = i.difference(a)
1212+
tm.assert_index_equal(a_diff, expected)
1213+
tm.assert_attr_equal('freq', a_diff, expected)
1214+
1215+
b = pd.timedelta_range("2 days", "5 days", freq="D")
1216+
b_diff = i.difference(b)
1217+
expected = pd.TimedeltaIndex(["0 days", "1 days"], freq=None)
1218+
tm.assert_index_equal(b_diff, expected)
1219+
tm.assert_attr_equal('freq', b_diff, expected)
1220+
1221+
union_of_difference = a_diff.union(b_diff)
1222+
expected = pd.TimedeltaIndex(["0 days", "1 days", "5 days"],
1223+
freq=None)
1224+
tm.assert_index_equal(union_of_difference, expected)
1225+
tm.assert_attr_equal('freq', union_of_difference, expected)

0 commit comments

Comments
 (0)