Skip to content

Commit 6400cdd

Browse files
Liam3851jorisvandenbossche
authored andcommitted
[Backport 14346] BUG: GH14323 Union of differences from DatetimeIndex incorrect
closes pandas-dev#14323 Sets freq to None when doing a difference operation on a DatetimeIndex or TimedeltaIndex, rather than retaining the frequency (which can cause problems with downstream operations). Frequency of PeriodIndex is retained. Author: David Krych <[email protected]> Closes pandas-dev#14346 from Liam3851/dtind_diff_14323 and squashes the following commits: 1dbf582 [David Krych] BUG: GH14323 Union of differences from DatetimeIndex incorrect (cherry picked from commit bee90a7)
1 parent 9bca038 commit 6400cdd

File tree

3 files changed

+77
-1
lines changed

3 files changed

+77
-1
lines changed

doc/source/whatsnew/v0.19.1.txt

+2
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ Bug Fixes
4444
- Corrrecly raise ``ValueError`` on empty input to ``pd.eval()`` and ``df.query()`` (:issue:`13139`)
4545

4646
- Bug in ``RangeIndex.intersection`` when result is a empty set (:issue:`14364`).
47+
- Bug in union of differences from a ``DatetimeIndex`; this is a regression in 0.19.0 from 0.18.1 (:issue:`14323`)
48+
4749

4850

4951
- Source installs from PyPI will now work without ``cython`` installed, as in previous versions (:issue:`14204`)

pandas/indexes/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2003,7 +2003,7 @@ def difference(self, other):
20032003
except TypeError:
20042004
pass
20052005

2006-
return this._shallow_copy(the_diff, name=result_name)
2006+
return this._shallow_copy(the_diff, name=result_name, freq=None)
20072007

20082008
def symmetric_difference(self, other, result_name=None):
20092009
"""

pandas/tests/indexes/test_datetimelike.py

+74
Original file line numberDiff line numberDiff line change
@@ -732,6 +732,31 @@ def test_fillna_datetime64(self):
732732
dtype=object)
733733
self.assert_index_equal(idx.fillna('x'), exp)
734734

735+
def test_difference_of_union(self):
736+
# GH14323: Test taking the union of differences of an Index.
737+
# Difference of DatetimeIndex does not preserve frequency,
738+
# so a differencing operation should not retain the freq field of the
739+
# original index.
740+
i = pd.date_range("20160920", "20160925", freq="D")
741+
742+
a = pd.date_range("20160921", "20160924", freq="D")
743+
expected = pd.DatetimeIndex(["20160920", "20160925"], freq=None)
744+
a_diff = i.difference(a)
745+
tm.assert_index_equal(a_diff, expected)
746+
tm.assert_attr_equal('freq', a_diff, expected)
747+
748+
b = pd.date_range("20160922", "20160925", freq="D")
749+
b_diff = i.difference(b)
750+
expected = pd.DatetimeIndex(["20160920", "20160921"], freq=None)
751+
tm.assert_index_equal(b_diff, expected)
752+
tm.assert_attr_equal('freq', b_diff, expected)
753+
754+
union_of_diff = a_diff.union(b_diff)
755+
expected = pd.DatetimeIndex(["20160920", "20160921", "20160925"],
756+
freq=None)
757+
tm.assert_index_equal(union_of_diff, expected)
758+
tm.assert_attr_equal('freq', union_of_diff, expected)
759+
735760

736761
class TestPeriodIndex(DatetimeLike, tm.TestCase):
737762
_holder = PeriodIndex
@@ -938,6 +963,30 @@ def test_no_millisecond_field(self):
938963
with self.assertRaises(AttributeError):
939964
DatetimeIndex([]).millisecond
940965

966+
def test_difference_of_union(self):
967+
# GH14323: Test taking the union of differences of an Index.
968+
# Difference of Period MUST preserve frequency, but the ability
969+
# to union results must be preserved
970+
i = pd.period_range("20160920", "20160925", freq="D")
971+
972+
a = pd.period_range("20160921", "20160924", freq="D")
973+
expected = pd.PeriodIndex(["20160920", "20160925"], freq='D')
974+
a_diff = i.difference(a)
975+
tm.assert_index_equal(a_diff, expected)
976+
tm.assert_attr_equal('freq', a_diff, expected)
977+
978+
b = pd.period_range("20160922", "20160925", freq="D")
979+
b_diff = i.difference(b)
980+
expected = pd.PeriodIndex(["20160920", "20160921"], freq='D')
981+
tm.assert_index_equal(b_diff, expected)
982+
tm.assert_attr_equal('freq', b_diff, expected)
983+
984+
union_of_diff = a_diff.union(b_diff)
985+
expected = pd.PeriodIndex(["20160920", "20160921", "20160925"],
986+
freq='D')
987+
tm.assert_index_equal(union_of_diff, expected)
988+
tm.assert_attr_equal('freq', union_of_diff, expected)
989+
941990

942991
class TestTimedeltaIndex(DatetimeLike, tm.TestCase):
943992
_holder = TimedeltaIndex
@@ -1149,3 +1198,28 @@ def test_fillna_timedelta(self):
11491198
exp = pd.Index(
11501199
[pd.Timedelta('1 day'), 'x', pd.Timedelta('3 day')], dtype=object)
11511200
self.assert_index_equal(idx.fillna('x'), exp)
1201+
1202+
def test_difference_of_union(self):
1203+
# GH14323: Test taking the union of differences of an Index.
1204+
# Difference of TimedeltaIndex does not preserve frequency,
1205+
# so a differencing operation should not retain the freq field of the
1206+
# original index.
1207+
i = pd.timedelta_range("0 days", "5 days", freq="D")
1208+
1209+
a = pd.timedelta_range("1 days", "4 days", freq="D")
1210+
expected = pd.TimedeltaIndex(["0 days", "5 days"], freq=None)
1211+
a_diff = i.difference(a)
1212+
tm.assert_index_equal(a_diff, expected)
1213+
tm.assert_attr_equal('freq', a_diff, expected)
1214+
1215+
b = pd.timedelta_range("2 days", "5 days", freq="D")
1216+
b_diff = i.difference(b)
1217+
expected = pd.TimedeltaIndex(["0 days", "1 days"], freq=None)
1218+
tm.assert_index_equal(b_diff, expected)
1219+
tm.assert_attr_equal('freq', b_diff, expected)
1220+
1221+
union_of_difference = a_diff.union(b_diff)
1222+
expected = pd.TimedeltaIndex(["0 days", "1 days", "5 days"],
1223+
freq=None)
1224+
tm.assert_index_equal(union_of_difference, expected)
1225+
tm.assert_attr_equal('freq', union_of_difference, expected)

0 commit comments

Comments
 (0)