Skip to content

Commit 07a46af

Browse files
committed
API: raise on setops for + and - for Indexes (GH8226)
1 parent 54678dd commit 07a46af

23 files changed

+110
-98
lines changed

doc/source/indexing.rst

+11-6
Original file line numberDiff line numberDiff line change
@@ -1616,28 +1616,33 @@ display:
16161616
df
16171617
df['A']
16181618
1619+
.. _indexing.setops:
16191620
16201621
Set operations on Index objects
16211622
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
16221623
1624+
.. warning::
1625+
1626+
In 0.15.0. the set operations ``+`` and ``-`` were deprecated in order to provide these for numeric type operations on certain
1627+
index types. ``+`` can be replace by ``.union()`` or ``|``, and ``-`` by ``.difference()``.
1628+
16231629
.. _indexing.set_ops:
16241630
1625-
The three main operations are ``union (|)``, ``intersection (&)``, and ``diff
1626-
(-)``. These can be directly called as instance methods or used via overloaded
1627-
operators:
1631+
The two main operations are ``union (|)``, ``intersection (&)``
1632+
These can be directly called as instance methods or used via overloaded
1633+
operators. Difference is provided via the ``.difference()`` method.
16281634
16291635
.. ipython:: python
16301636
16311637
a = Index(['c', 'b', 'a'])
16321638
b = Index(['c', 'e', 'd'])
1633-
a.union(b)
16341639
a | b
16351640
a & b
1636-
a - b
1641+
a.difference(b)
16371642
16381643
Also available is the ``sym_diff (^)`` operation, which returns elements
16391644
that appear in either ``idx1`` or ``idx2`` but not both. This is
1640-
equivalent to the Index created by ``(idx1 - idx2) + (idx2 - idx1)``,
1645+
equivalent to the Index created by ``(idx1.difference(idx2)).union(idx2.difference(idx1))``,
16411646
with duplicates dropped.
16421647

16431648
.. ipython:: python

doc/source/v0.15.0.txt

+6
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ users upgrade to this version.
1919
- Internal refactoring of the ``Index`` class to no longer sub-class ``ndarray``, see :ref:`Internal Refactoring <whatsnew_0150.refactoring>`
2020
- New datetimelike properties accessor ``.dt`` for Series, see :ref:`Datetimelike Properties <whatsnew_0150.dt>`
2121
- dropping support for ``PyTables`` less than version 3.0.0, and ``numexpr`` less than version 2.1 (:issue:`7990`)
22+
- API change in using Indexs set operations, see :ref:`here <whatsnew_0150.index_set_ops>`
2223

2324
- :ref:`Other Enhancements <whatsnew_0150.enhancements>`
2425

@@ -343,6 +344,11 @@ API changes
343344
- ``Series.to_csv()`` now returns a string when ``path=None``, matching the behaviour of
344345
``DataFrame.to_csv()`` (:issue:`8215`).
345346

347+
348+
.. _whatsnew_0150.index_set_ops:
349+
350+
- The Index set operations ``+`` and ``-`` were deprecated in order to provide these for numeric type operations on certain index types. ``+`` can be replace by ``.union()`` or ``|``, and ``-`` by ``.difference()``. Further the method name ``Index.diff()`` is deprecated and can be replaced by ``Index.difference()``
351+
346352
.. _whatsnew_0150.dt:
347353

348354
.dt accessor

pandas/core/base.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -711,8 +711,10 @@ def __add__(self, other):
711711
from pandas.core.index import Index
712712
from pandas.tseries.offsets import DateOffset
713713
if isinstance(other, Index):
714+
warnings.warn("using '+' to provide set union with Indexes is deprecated, "
715+
"use .union()",FutureWarning)
714716
return self.union(other)
715-
elif isinstance(other, (DateOffset, datetime.timedelta, np.timedelta64)):
717+
if isinstance(other, (DateOffset, datetime.timedelta, np.timedelta64)):
716718
return self._add_delta(other)
717719
elif com.is_integer(other):
718720
return self.shift(other)
@@ -723,8 +725,10 @@ def __sub__(self, other):
723725
from pandas.core.index import Index
724726
from pandas.tseries.offsets import DateOffset
725727
if isinstance(other, Index):
726-
return self.diff(other)
727-
elif isinstance(other, (DateOffset, datetime.timedelta, np.timedelta64)):
728+
warnings.warn("using '-' to provide set differences with Indexes is deprecated, "
729+
"use .difference()",FutureWarning)
730+
return self.difference(other)
731+
if isinstance(other, (DateOffset, datetime.timedelta, np.timedelta64)):
728732
return self._add_delta(-other)
729733
elif com.is_integer(other):
730734
return self.shift(-other)

pandas/core/categorical.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,7 @@ def reorder_levels(self, new_levels, ordered=None):
435435
"""
436436
new_levels = self._validate_levels(new_levels)
437437

438-
if len(new_levels) < len(self._levels) or len(self._levels-new_levels):
438+
if len(new_levels) < len(self._levels) or len(self._levels.difference(new_levels)):
439439
raise ValueError('Reordered levels must include all original levels')
440440
values = self.__array__()
441441
self._codes = _get_codes_for_values(values, new_levels)
@@ -887,7 +887,7 @@ def __setitem__(self, key, value):
887887
raise ValueError("cannot set a Categorical with another, without identical levels")
888888

889889
rvalue = value if com.is_list_like(value) else [value]
890-
to_add = Index(rvalue)-self.levels
890+
to_add = Index(rvalue).difference(self.levels)
891891
# no assignments of values not in levels, but it's always ok to set something to np.nan
892892
if len(to_add) and not isnull(to_add).all():
893893
raise ValueError("cannot setitem on a Categorical with a new level,"

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3682,7 +3682,7 @@ def append(self, other, ignore_index=False, verify_integrity=False):
36823682
'ignore_index=True')
36833683

36843684
index = None if other.name is None else [other.name]
3685-
combined_columns = self.columns.tolist() + ((self.columns | other.index) - self.columns).tolist()
3685+
combined_columns = self.columns.tolist() + (self.columns | other.index).difference(self.columns).tolist()
36863686
other = other.reindex(combined_columns, copy=False)
36873687
other = DataFrame(other.values.reshape((1, len(other))),
36883688
index=index, columns=combined_columns).convert_objects()

pandas/core/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -473,7 +473,7 @@ def _set_selection_from_grouper(self):
473473
ax = self.obj._info_axis
474474
groupers = [ g.name for g in grp.groupings if g.level is None and g.name is not None and g.name in ax ]
475475
if len(groupers):
476-
self._group_selection = (ax-Index(groupers)).tolist()
476+
self._group_selection = ax.difference(Index(groupers)).tolist()
477477

478478
def _set_result_index_ordered(self, result):
479479
# set the result index on the passed values object

pandas/core/index.py

+15-8
Original file line numberDiff line numberDiff line change
@@ -1128,9 +1128,10 @@ def argsort(self, *args, **kwargs):
11281128

11291129
def __add__(self, other):
11301130
if isinstance(other, Index):
1131+
warnings.warn("using '+' to provide set union with Indexes is deprecated, "
1132+
"use '|' or .union()",FutureWarning)
11311133
return self.union(other)
1132-
else:
1133-
return Index(np.array(self) + other)
1134+
return Index(np.array(self) + other)
11341135

11351136
__iadd__ = __add__
11361137
__eq__ = _indexOp('__eq__')
@@ -1141,7 +1142,10 @@ def __add__(self, other):
11411142
__ge__ = _indexOp('__ge__')
11421143

11431144
def __sub__(self, other):
1144-
return self.diff(other)
1145+
if isinstance(other, Index):
1146+
warnings.warn("using '-' to provide set differences with Indexes is deprecated, "
1147+
"use .difference()",FutureWarning)
1148+
return self.difference(other)
11451149

11461150
def __and__(self, other):
11471151
return self.intersection(other)
@@ -1273,7 +1277,7 @@ def intersection(self, other):
12731277
taken.name = None
12741278
return taken
12751279

1276-
def diff(self, other):
1280+
def difference(self, other):
12771281
"""
12781282
Compute sorted set difference of two Index objects
12791283
@@ -1289,8 +1293,7 @@ def diff(self, other):
12891293
-----
12901294
One can do either of these and achieve the same result
12911295
1292-
>>> index - index2
1293-
>>> index.diff(index2)
1296+
>>> index.difference(index2)
12941297
"""
12951298

12961299
if not hasattr(other, '__iter__'):
@@ -1308,6 +1311,8 @@ def diff(self, other):
13081311
theDiff = sorted(set(self) - set(other))
13091312
return Index(theDiff, name=result_name)
13101313

1314+
diff = deprecate('diff',difference)
1315+
13111316
def sym_diff(self, other, result_name=None):
13121317
"""
13131318
Compute the sorted symmetric difference of two Index objects.
@@ -1350,7 +1355,7 @@ def sym_diff(self, other, result_name=None):
13501355
other = Index(other)
13511356
result_name = result_name or self.name
13521357

1353-
the_diff = sorted(set((self - other) + (other - self)))
1358+
the_diff = sorted(set((self.difference(other)).union(other.difference(self))))
13541359
return Index(the_diff, name=result_name)
13551360

13561361
def get_loc(self, key):
@@ -4135,6 +4140,8 @@ def union(self, other):
41354140
Returns
41364141
-------
41374142
Index
4143+
4144+
>>> index.union(index2)
41384145
"""
41394146
self._assert_can_do_setop(other)
41404147

@@ -4177,7 +4184,7 @@ def intersection(self, other):
41774184
return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0,
41784185
names=result_names)
41794186

4180-
def diff(self, other):
4187+
def difference(self, other):
41814188
"""
41824189
Compute sorted set difference of two MultiIndex objects
41834190

pandas/core/panel.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -678,9 +678,9 @@ def _combine_frame(self, other, func, axis=0):
678678
self.minor_axis)
679679

680680
def _combine_panel(self, other, func):
681-
items = self.items + other.items
682-
major = self.major_axis + other.major_axis
683-
minor = self.minor_axis + other.minor_axis
681+
items = self.items.union(other.items)
682+
major = self.major_axis.union(other.major_axis)
683+
minor = self.minor_axis.union(other.minor_axis)
684684

685685
# could check that everything's the same size, but forget it
686686
this = self.reindex(items=items, major=major, minor=minor)

pandas/core/panelnd.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ def _combine_with_constructor(self, other, func):
8282
# combine labels to form new axes
8383
new_axes = []
8484
for a in self._AXIS_ORDERS:
85-
new_axes.append(getattr(self, a) + getattr(other, a))
85+
new_axes.append(getattr(self, a).union(getattr(other, a)))
8686

8787
# reindex: could check that everything's the same size, but forget it
8888
d = dict([(a, ax) for a, ax in zip(self._AXIS_ORDERS, new_axes)])

pandas/core/reshape.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -848,7 +848,7 @@ def lreshape(data, groups, dropna=True, label=None):
848848
keys, values = zip(*groups)
849849

850850
all_cols = list(set.union(*[set(x) for x in values]))
851-
id_cols = list(data.columns.diff(all_cols))
851+
id_cols = list(data.columns.difference(all_cols))
852852

853853
K = len(values[0])
854854

pandas/core/series.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1458,7 +1458,7 @@ def combine(self, other, func, fill_value=nan):
14581458
result : Series
14591459
"""
14601460
if isinstance(other, Series):
1461-
new_index = self.index + other.index
1461+
new_index = self.index.union(other.index)
14621462
new_name = _maybe_match_name(self, other)
14631463
new_values = pa.empty(len(new_index), dtype=self.dtype)
14641464
for i, idx in enumerate(new_index):
@@ -1484,7 +1484,7 @@ def combine_first(self, other):
14841484
-------
14851485
y : Series
14861486
"""
1487-
new_index = self.index + other.index
1487+
new_index = self.index.union(other.index)
14881488
this = self.reindex(new_index, copy=False)
14891489
other = other.reindex(new_index, copy=False)
14901490
name = _maybe_match_name(self, other)

pandas/io/pytables.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -970,7 +970,7 @@ def append_to_multiple(self, d, value, selector, data_columns=None,
970970
remain_values.extend(v)
971971
if remain_key is not None:
972972
ordered = value.axes[axis]
973-
ordd = ordered - Index(remain_values)
973+
ordd = ordered.difference(Index(remain_values))
974974
ordd = sorted(ordered.get_indexer(ordd))
975975
d[remain_key] = ordered.take(ordd)
976976

@@ -3245,7 +3245,7 @@ def get_blk_items(mgr, blocks):
32453245
data_columns, min_itemsize)
32463246
if len(data_columns):
32473247
mgr = block_obj.reindex_axis(
3248-
Index(axis_labels) - Index(data_columns),
3248+
Index(axis_labels).difference(Index(data_columns)),
32493249
axis=axis
32503250
)._data
32513251

@@ -3362,7 +3362,7 @@ def process_filter(field, filt):
33623362
# if we have a multi-index, then need to include
33633363
# the levels
33643364
if self.is_multi_index:
3365-
filt = filt + Index(self.levels)
3365+
filt = filt.union(Index(self.levels))
33663366

33673367
takers = op(axis_values, filt)
33683368
return obj.ix._getitem_axis(takers,

pandas/io/tests/test_pytables.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -2320,7 +2320,7 @@ def test_remove_startstop(self):
23202320
n = store.remove('wp5', start=16, stop=-16)
23212321
self.assertTrue(n == 120-32)
23222322
result = store.select('wp5')
2323-
expected = wp.reindex(major_axis=wp.major_axis[:16//4]+wp.major_axis[-16//4:])
2323+
expected = wp.reindex(major_axis=wp.major_axis[:16//4].union(wp.major_axis[-16//4:]))
23242324
assert_panel_equal(result, expected)
23252325

23262326
_maybe_remove(store, 'wp6')
@@ -2339,7 +2339,7 @@ def test_remove_startstop(self):
23392339
n = store.remove('wp7', where=[crit], stop=80)
23402340
self.assertTrue(n == 28)
23412341
result = store.select('wp7')
2342-
expected = wp.reindex(major_axis=wp.major_axis-wp.major_axis[np.arange(0,20,3)])
2342+
expected = wp.reindex(major_axis=wp.major_axis.difference(wp.major_axis[np.arange(0,20,3)]))
23432343
assert_panel_equal(result, expected)
23442344

23452345
def test_remove_crit(self):
@@ -2357,7 +2357,7 @@ def test_remove_crit(self):
23572357
self.assertTrue(n == 36)
23582358

23592359
result = store.select('wp3')
2360-
expected = wp.reindex(major_axis=wp.major_axis - date4)
2360+
expected = wp.reindex(major_axis=wp.major_axis.difference(date4))
23612361
assert_panel_equal(result, expected)
23622362

23632363
# upper half
@@ -2385,23 +2385,23 @@ def test_remove_crit(self):
23852385
crit1 = Term('major_axis=date1')
23862386
store.remove('wp2', where=[crit1])
23872387
result = store.select('wp2')
2388-
expected = wp.reindex(major_axis=wp.major_axis - date1)
2388+
expected = wp.reindex(major_axis=wp.major_axis.difference(date1))
23892389
assert_panel_equal(result, expected)
23902390

23912391
date2 = wp.major_axis[5]
23922392
crit2 = Term('major_axis=date2')
23932393
store.remove('wp2', where=[crit2])
23942394
result = store['wp2']
23952395
expected = wp.reindex(
2396-
major_axis=wp.major_axis - date1 - Index([date2]))
2396+
major_axis=wp.major_axis.difference(date1).difference(Index([date2])))
23972397
assert_panel_equal(result, expected)
23982398

23992399
date3 = [wp.major_axis[7], wp.major_axis[9]]
24002400
crit3 = Term('major_axis=date3')
24012401
store.remove('wp2', where=[crit3])
24022402
result = store['wp2']
24032403
expected = wp.reindex(
2404-
major_axis=wp.major_axis - date1 - Index([date2]) - Index(date3))
2404+
major_axis=wp.major_axis.difference(date1).difference(Index([date2])).difference(Index(date3)))
24052405
assert_panel_equal(result, expected)
24062406

24072407
# corners

pandas/sparse/panel.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -427,9 +427,9 @@ def _new_like(self, new_frames):
427427
default_kind=self.default_kind)
428428

429429
def _combinePanel(self, other, func):
430-
items = self.items + other.items
431-
major = self.major_axis + other.major_axis
432-
minor = self.minor_axis + other.minor_axis
430+
items = self.items.union(other.items)
431+
major = self.major_axis.union(other.major_axis)
432+
minor = self.minor_axis.union(other.minor_axis)
433433

434434
# could check that everything's the same size, but forget it
435435

0 commit comments

Comments
 (0)