Skip to content

Commit bf18b3d

Browse files
lukemanleyhedeershowk
authored andcommitted
PERF: Index.difference (pandas-dev#55108)
* PERF: Index.difference * whatsnew * remove is_monotonic check
1 parent 7d1738a commit bf18b3d

File tree

4 files changed

+10
-16
lines changed

4 files changed

+10
-16
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ Performance improvements
178178
- Performance improvement in :func:`concat` with ``axis=1`` and objects with unaligned indexes (:issue:`55084`)
179179
- Performance improvement in :func:`to_dict` on converting DataFrame to dictionary (:issue:`50990`)
180180
- Performance improvement in :meth:`DataFrame.sort_index` and :meth:`Series.sort_index` when indexed by a :class:`MultiIndex` (:issue:`54835`)
181+
- Performance improvement in :meth:`Index.difference` (:issue:`55108`)
181182
- Performance improvement when indexing with more than 4 keys (:issue:`54550`)
182183
-
183184

pandas/core/indexes/base.py

+3-14
Original file line numberDiff line numberDiff line change
@@ -3615,21 +3615,10 @@ def difference(self, other, sort=None):
36153615

36163616
def _difference(self, other, sort):
36173617
# overridden by RangeIndex
3618-
3619-
this = self.unique()
3620-
3621-
indexer = this.get_indexer_for(other)
3622-
indexer = indexer.take((indexer != -1).nonzero()[0])
3623-
3624-
label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True)
3625-
3626-
the_diff: MultiIndex | ArrayLike
3627-
if isinstance(this, ABCMultiIndex):
3628-
the_diff = this.take(label_diff)
3629-
else:
3630-
the_diff = this._values.take(label_diff)
3618+
other = other.unique()
3619+
the_diff = self[other.get_indexer_for(self) == -1]
3620+
the_diff = the_diff if self.is_unique else the_diff.unique()
36313621
the_diff = _maybe_try_sort(the_diff, sort)
3632-
36333622
return the_diff
36343623

36353624
def _wrap_difference_result(self, other, result):

pandas/tests/indexes/datetimes/test_setops.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -343,9 +343,11 @@ def test_difference_freq(self, sort):
343343
tm.assert_index_equal(idx_diff, expected)
344344
tm.assert_attr_equal("freq", idx_diff, expected)
345345

346+
# preserve frequency when the difference is a contiguous
347+
# subset of the original range
346348
other = date_range("20160922", "20160925", freq="D")
347349
idx_diff = index.difference(other, sort)
348-
expected = DatetimeIndex(["20160920", "20160921"], freq=None)
350+
expected = DatetimeIndex(["20160920", "20160921"], freq="D")
349351
tm.assert_index_equal(idx_diff, expected)
350352
tm.assert_attr_equal("freq", idx_diff, expected)
351353

pandas/tests/indexes/timedeltas/test_setops.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -219,9 +219,11 @@ def test_difference_freq(self, sort):
219219
tm.assert_index_equal(idx_diff, expected)
220220
tm.assert_attr_equal("freq", idx_diff, expected)
221221

222+
# preserve frequency when the difference is a contiguous
223+
# subset of the original range
222224
other = timedelta_range("2 days", "5 days", freq="D")
223225
idx_diff = index.difference(other, sort)
224-
expected = TimedeltaIndex(["0 days", "1 days"], freq=None)
226+
expected = TimedeltaIndex(["0 days", "1 days"], freq="D")
225227
tm.assert_index_equal(idx_diff, expected)
226228
tm.assert_attr_equal("freq", idx_diff, expected)
227229

0 commit comments

Comments
 (0)