Skip to content

Commit 8d34276

Browse files
committed
BUG: Index.union cannot handle array-likes
1 parent 1a709c3 commit 8d34276

File tree

6 files changed

+174
-38
lines changed

6 files changed

+174
-38
lines changed

doc/source/whatsnew/v0.17.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ Bug Fixes
6464

6565

6666
- Bug where Panel.from_dict does not set dtype when specified (:issue:`10058`)
67+
- Bug in ``Index.union`` raises ``AttributeError`` when passing array-likes. (:issue:`10149`)
6768
- Bug in ``Timestamp``'s' ``microsecond``, ``quarter``, ``dayofyear``, ``week`` and ``daysinmonth`` properties return ``np.int`` type, not built-in ``int``. (:issue:`10050`)
6869
- Bug in ``NaT`` raises ``AttributeError`` when accessing to ``daysinmonth``, ``dayofweek`` properties. (:issue:`10096`)
6970

@@ -76,3 +77,4 @@ Bug Fixes
7677
- Bug in `Series.plot(label="LABEL")` not correctly setting the label (:issue:`10119`)
7778

7879

80+

pandas/core/index.py

+8-19
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,8 @@ def to_datetime(self, dayfirst=False):
580580
return DatetimeIndex(self.values)
581581

582582
def _assert_can_do_setop(self, other):
583+
if not com.is_list_like(other):
584+
raise TypeError('Input must be Index or array-like')
583585
return True
584586

585587
@property
@@ -1364,16 +1366,14 @@ def union(self, other):
13641366
-------
13651367
union : Index
13661368
"""
1367-
if not hasattr(other, '__iter__'):
1368-
raise TypeError('Input must be iterable.')
1369+
self._assert_can_do_setop(other)
1370+
other = _ensure_index(other)
13691371

13701372
if len(other) == 0 or self.equals(other):
13711373
return self
13721374

13731375
if len(self) == 0:
1374-
return _ensure_index(other)
1375-
1376-
self._assert_can_do_setop(other)
1376+
return other
13771377

13781378
if not is_dtype_equal(self.dtype,other.dtype):
13791379
this = self.astype('O')
@@ -1439,11 +1439,7 @@ def intersection(self, other):
14391439
-------
14401440
intersection : Index
14411441
"""
1442-
if not hasattr(other, '__iter__'):
1443-
raise TypeError('Input must be iterable!')
1444-
14451442
self._assert_can_do_setop(other)
1446-
14471443
other = _ensure_index(other)
14481444

14491445
if self.equals(other):
@@ -1492,9 +1488,7 @@ def difference(self, other):
14921488
14931489
>>> index.difference(index2)
14941490
"""
1495-
1496-
if not hasattr(other, '__iter__'):
1497-
raise TypeError('Input must be iterable!')
1491+
self._assert_can_do_setop(other)
14981492

14991493
if self.equals(other):
15001494
return Index([], name=self.name)
@@ -1517,7 +1511,7 @@ def sym_diff(self, other, result_name=None):
15171511
Parameters
15181512
----------
15191513
1520-
other : array-like
1514+
other : Index or array-like
15211515
result_name : str
15221516
15231517
Returns
@@ -1545,9 +1539,7 @@ def sym_diff(self, other, result_name=None):
15451539
>>> idx1 ^ idx2
15461540
Int64Index([1, 5], dtype='int64')
15471541
"""
1548-
if not hasattr(other, '__iter__'):
1549-
raise TypeError('Input must be iterable!')
1550-
1542+
self._assert_can_do_setop(other)
15511543
if not isinstance(other, Index):
15521544
other = Index(other)
15531545
result_name = result_name or self.name
@@ -5537,9 +5529,6 @@ def difference(self, other):
55375529
return MultiIndex.from_tuples(difference, sortorder=0,
55385530
names=result_names)
55395531

5540-
def _assert_can_do_setop(self, other):
5541-
pass
5542-
55435532
def astype(self, dtype):
55445533
if not is_object_dtype(np.dtype(dtype)):
55455534
raise TypeError('Setting %s dtype to anything other than object '

pandas/tests/test_index.py

+156-16
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,124 @@ def test_take(self):
251251
expected = ind[indexer]
252252
self.assertTrue(result.equals(expected))
253253

254+
def test_setops_errorcases(self):
255+
for name, idx in compat.iteritems(self.indices):
256+
# # non-iterable input
257+
cases = [0.5, 'xxx']
258+
methods = [idx.intersection, idx.union, idx.difference, idx.sym_diff]
259+
260+
for method in methods:
261+
for case in cases:
262+
assertRaisesRegexp(TypeError,
263+
"Input must be Index or array-like",
264+
method, case)
265+
266+
def test_intersection_base(self):
267+
for name, idx in compat.iteritems(self.indices):
268+
first = idx[:5]
269+
second = idx[:3]
270+
intersect = first.intersection(second)
271+
272+
if isinstance(idx, CategoricalIndex):
273+
pass
274+
else:
275+
self.assertTrue(tm.equalContents(intersect, second))
276+
277+
# GH 10149
278+
cases = [klass(second.values) for klass in [np.array, Series, list]]
279+
for case in cases:
280+
if isinstance(idx, PeriodIndex):
281+
msg = "can only call with other PeriodIndex-ed objects"
282+
with tm.assertRaisesRegexp(ValueError, msg):
283+
result = first.intersection(case)
284+
elif isinstance(idx, CategoricalIndex):
285+
pass
286+
elif isinstance(idx, MultiIndex):
287+
pass
288+
else:
289+
result = first.intersection(case)
290+
self.assertTrue(tm.equalContents(result, second))
291+
292+
def test_union_base(self):
293+
for name, idx in compat.iteritems(self.indices):
294+
first = idx[3:]
295+
second = idx[:5]
296+
everything = idx
297+
union = first.union(second)
298+
self.assertTrue(tm.equalContents(union, everything))
299+
300+
# GH 10149
301+
cases = [klass(second.values) for klass in [np.array, Series, list]]
302+
for case in cases:
303+
if isinstance(idx, PeriodIndex):
304+
msg = "can only call with other PeriodIndex-ed objects"
305+
with tm.assertRaisesRegexp(ValueError, msg):
306+
result = first.union(case)
307+
elif isinstance(idx, MultiIndex):
308+
pass
309+
elif isinstance(idx, CategoricalIndex):
310+
pass
311+
else:
312+
result = first.union(case)
313+
self.assertTrue(tm.equalContents(result, everything))
314+
315+
def test_difference_base(self):
316+
for name, idx in compat.iteritems(self.indices):
317+
first = idx[2:]
318+
second = idx[:4]
319+
answer = idx[4:]
320+
result = first.difference(second)
321+
322+
if isinstance(idx, CategoricalIndex):
323+
pass
324+
else:
325+
self.assertTrue(tm.equalContents(result, answer))
326+
327+
# GH 10149
328+
cases = [klass(second.values) for klass in [np.array, Series, list]]
329+
for case in cases:
330+
if isinstance(idx, PeriodIndex):
331+
msg = "can only call with other PeriodIndex-ed objects"
332+
with tm.assertRaisesRegexp(ValueError, msg):
333+
result = first.difference(case)
334+
elif isinstance(idx, MultiIndex):
335+
pass
336+
elif isinstance(idx, CategoricalIndex):
337+
pass
338+
elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)):
339+
self.assertEqual(result.__class__, answer.__class__)
340+
self.assert_numpy_array_equal(result.asi8, answer.asi8)
341+
else:
342+
result = first.difference(case)
343+
self.assertTrue(tm.equalContents(result, answer))
344+
345+
def test_symmetric_diff(self):
346+
for name, idx in compat.iteritems(self.indices):
347+
first = idx[1:]
348+
second = idx[:-1]
349+
if isinstance(idx, CategoricalIndex):
350+
pass
351+
else:
352+
answer = idx[[0, -1]]
353+
result = first.sym_diff(second)
354+
self.assertTrue(tm.equalContents(result, answer))
355+
356+
# GH 10149
357+
cases = [klass(second.values) for klass in [np.array, Series, list]]
358+
for case in cases:
359+
if isinstance(idx, PeriodIndex):
360+
msg = "can only call with other PeriodIndex-ed objects"
361+
with tm.assertRaisesRegexp(ValueError, msg):
362+
result = first.sym_diff(case)
363+
elif isinstance(idx, MultiIndex):
364+
pass
365+
elif isinstance(idx, CategoricalIndex):
366+
pass
367+
else:
368+
result = first.sym_diff(case)
369+
self.assertTrue(tm.equalContents(result, answer))
370+
371+
254372
class TestIndex(Base, tm.TestCase):
255373
_holder = Index
256374
_multiprocess_can_split_ = True
@@ -620,16 +738,12 @@ def test_intersection(self):
620738
first = self.strIndex[:20]
621739
second = self.strIndex[:10]
622740
intersect = first.intersection(second)
623-
624741
self.assertTrue(tm.equalContents(intersect, second))
625742

626743
# Corner cases
627744
inter = first.intersection(first)
628745
self.assertIs(inter, first)
629746

630-
# non-iterable input
631-
assertRaisesRegexp(TypeError, "iterable", first.intersection, 0.5)
632-
633747
idx1 = Index([1, 2, 3, 4, 5], name='idx')
634748
# if target has the same name, it is preserved
635749
idx2 = Index([3, 4, 5, 6, 7], name='idx')
@@ -671,6 +785,12 @@ def test_union(self):
671785
union = first.union(second)
672786
self.assertTrue(tm.equalContents(union, everything))
673787

788+
# GH 10149
789+
cases = [klass(second.values) for klass in [np.array, Series, list]]
790+
for case in cases:
791+
result = first.union(case)
792+
self.assertTrue(tm.equalContents(result, everything))
793+
674794
# Corner cases
675795
union = first.union(first)
676796
self.assertIs(union, first)
@@ -681,9 +801,6 @@ def test_union(self):
681801
union = Index([]).union(first)
682802
self.assertIs(union, first)
683803

684-
# non-iterable input
685-
assertRaisesRegexp(TypeError, "iterable", first.union, 0.5)
686-
687804
# preserve names
688805
first.name = 'A'
689806
second.name = 'A'
@@ -792,11 +909,7 @@ def test_difference(self):
792909
self.assertEqual(len(result), 0)
793910
self.assertEqual(result.name, first.name)
794911

795-
# non-iterable input
796-
assertRaisesRegexp(TypeError, "iterable", first.difference, 0.5)
797-
798912
def test_symmetric_diff(self):
799-
800913
# smoke
801914
idx1 = Index([1, 2, 3, 4], name='idx1')
802915
idx2 = Index([2, 3, 4, 5])
@@ -842,10 +955,6 @@ def test_symmetric_diff(self):
842955
self.assertTrue(tm.equalContents(result, expected))
843956
self.assertEqual(result.name, 'new_name')
844957

845-
# other isn't iterable
846-
with tm.assertRaises(TypeError):
847-
Index(idx1,dtype='object').difference(1)
848-
849958
def test_is_numeric(self):
850959
self.assertFalse(self.dateIndex.is_numeric())
851960
self.assertFalse(self.strIndex.is_numeric())
@@ -1786,6 +1895,7 @@ def test_equals(self):
17861895
self.assertFalse(CategoricalIndex(list('aabca') + [np.nan],categories=['c','a','b',np.nan]).equals(list('aabca')))
17871896
self.assertTrue(CategoricalIndex(list('aabca') + [np.nan],categories=['c','a','b',np.nan]).equals(list('aabca') + [np.nan]))
17881897

1898+
17891899
class Numeric(Base):
17901900

17911901
def test_numeric_compat(self):
@@ -2642,6 +2752,36 @@ def test_time_overflow_for_32bit_machines(self):
26422752
idx2 = pd.date_range(end='2000', periods=periods, freq='S')
26432753
self.assertEqual(len(idx2), periods)
26442754

2755+
def test_intersection(self):
2756+
first = self.index
2757+
second = self.index[5:]
2758+
intersect = first.intersection(second)
2759+
self.assertTrue(tm.equalContents(intersect, second))
2760+
2761+
# GH 10149
2762+
cases = [klass(second.values) for klass in [np.array, Series, list]]
2763+
for case in cases:
2764+
result = first.intersection(case)
2765+
self.assertTrue(tm.equalContents(result, second))
2766+
2767+
third = Index(['a', 'b', 'c'])
2768+
result = first.intersection(third)
2769+
expected = pd.Index([], dtype=object)
2770+
self.assert_index_equal(result, expected)
2771+
2772+
def test_union(self):
2773+
first = self.index[:5]
2774+
second = self.index[5:]
2775+
everything = self.index
2776+
union = first.union(second)
2777+
self.assertTrue(tm.equalContents(union, everything))
2778+
2779+
# GH 10149
2780+
cases = [klass(second.values) for klass in [np.array, Series, list]]
2781+
for case in cases:
2782+
result = first.union(case)
2783+
self.assertTrue(tm.equalContents(result, everything))
2784+
26452785

26462786
class TestPeriodIndex(DatetimeLike, tm.TestCase):
26472787
_holder = PeriodIndex
@@ -2652,7 +2792,7 @@ def setUp(self):
26522792
self.setup_indices()
26532793

26542794
def create_index(self):
2655-
return period_range('20130101',periods=5,freq='D')
2795+
return period_range('20130101', periods=5, freq='D')
26562796

26572797
def test_pickle_compat_construction(self):
26582798
pass

pandas/tseries/index.py

+2
Original file line numberDiff line numberDiff line change
@@ -804,6 +804,7 @@ def union(self, other):
804804
-------
805805
y : Index or DatetimeIndex
806806
"""
807+
self._assert_can_do_setop(other)
807808
if not isinstance(other, DatetimeIndex):
808809
try:
809810
other = DatetimeIndex(other)
@@ -1039,6 +1040,7 @@ def intersection(self, other):
10391040
-------
10401041
y : Index or DatetimeIndex
10411042
"""
1043+
self._assert_can_do_setop(other)
10421044
if not isinstance(other, DatetimeIndex):
10431045
try:
10441046
other = DatetimeIndex(other)

pandas/tseries/period.py

+2
Original file line numberDiff line numberDiff line change
@@ -680,6 +680,8 @@ def join(self, other, how='left', level=None, return_indexers=False):
680680
return self._apply_meta(result)
681681

682682
def _assert_can_do_setop(self, other):
683+
super(PeriodIndex, self)._assert_can_do_setop(other)
684+
683685
if not isinstance(other, PeriodIndex):
684686
raise ValueError('can only call with other PeriodIndex-ed objects')
685687

pandas/tseries/tdi.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -436,12 +436,12 @@ def union(self, other):
436436
-------
437437
y : Index or TimedeltaIndex
438438
"""
439-
if _is_convertible_to_index(other):
439+
self._assert_can_do_setop(other)
440+
if not isinstance(other, TimedeltaIndex):
440441
try:
441442
other = TimedeltaIndex(other)
442-
except TypeError:
443+
except (TypeError, ValueError):
443444
pass
444-
445445
this, other = self, other
446446

447447
if this._can_fast_union(other):
@@ -581,6 +581,7 @@ def intersection(self, other):
581581
-------
582582
y : Index or TimedeltaIndex
583583
"""
584+
self._assert_can_do_setop(other)
584585
if not isinstance(other, TimedeltaIndex):
585586
try:
586587
other = TimedeltaIndex(other)

0 commit comments

Comments
 (0)