Skip to content

Commit eeaec42

Browse files
committed
BUG: Index.union cannot handle array-likes
1 parent d03a22f commit eeaec42

File tree

6 files changed

+87
-33
lines changed

6 files changed

+87
-33
lines changed

doc/source/whatsnew/v0.17.0.txt

+5
Original file line numberDiff line numberDiff line change
@@ -67,3 +67,8 @@ Bug Fixes
6767
- Bug in ``NaT`` raises ``AttributeError`` when accessing to ``daysinmonth``, ``dayofweek`` properties. (:issue:`10096`)
6868

6969
- Bug in getting timezone data with ``dateutil`` on various platforms ( :issue:`9059`, :issue:`8639`, :issue:`9663`, :issue:`10121`)
70+
71+
72+
- Bug in ``Index.union`` raises ``AttributeError`` when passing array-likes. (:issue:`10149`)
73+
74+

pandas/core/index.py

+8-19
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,8 @@ def to_datetime(self, dayfirst=False):
580580
return DatetimeIndex(self.values)
581581

582582
def _assert_can_do_setop(self, other):
583+
if not com.is_list_like(other):
584+
raise TypeError('Input must be Index or array-like')
583585
return True
584586

585587
@property
@@ -1364,16 +1366,14 @@ def union(self, other):
13641366
-------
13651367
union : Index
13661368
"""
1367-
if not hasattr(other, '__iter__'):
1368-
raise TypeError('Input must be iterable.')
1369+
self._assert_can_do_setop(other)
1370+
other = _ensure_index(other)
13691371

13701372
if len(other) == 0 or self.equals(other):
13711373
return self
13721374

13731375
if len(self) == 0:
1374-
return _ensure_index(other)
1375-
1376-
self._assert_can_do_setop(other)
1376+
return other
13771377

13781378
if not is_dtype_equal(self.dtype,other.dtype):
13791379
this = self.astype('O')
@@ -1439,11 +1439,7 @@ def intersection(self, other):
14391439
-------
14401440
intersection : Index
14411441
"""
1442-
if not hasattr(other, '__iter__'):
1443-
raise TypeError('Input must be iterable!')
1444-
14451442
self._assert_can_do_setop(other)
1446-
14471443
other = _ensure_index(other)
14481444

14491445
if self.equals(other):
@@ -1492,9 +1488,7 @@ def difference(self, other):
14921488
14931489
>>> index.difference(index2)
14941490
"""
1495-
1496-
if not hasattr(other, '__iter__'):
1497-
raise TypeError('Input must be iterable!')
1491+
self._assert_can_do_setop(other)
14981492

14991493
if self.equals(other):
15001494
return Index([], name=self.name)
@@ -1517,7 +1511,7 @@ def sym_diff(self, other, result_name=None):
15171511
Parameters
15181512
----------
15191513
1520-
other : array-like
1514+
other : Index or array-like
15211515
result_name : str
15221516
15231517
Returns
@@ -1545,9 +1539,7 @@ def sym_diff(self, other, result_name=None):
15451539
>>> idx1 ^ idx2
15461540
Int64Index([1, 5], dtype='int64')
15471541
"""
1548-
if not hasattr(other, '__iter__'):
1549-
raise TypeError('Input must be iterable!')
1550-
1542+
self._assert_can_do_setop(other)
15511543
if not isinstance(other, Index):
15521544
other = Index(other)
15531545
result_name = result_name or self.name
@@ -5537,9 +5529,6 @@ def difference(self, other):
55375529
return MultiIndex.from_tuples(difference, sortorder=0,
55385530
names=result_names)
55395531

5540-
def _assert_can_do_setop(self, other):
5541-
pass
5542-
55435532
def astype(self, dtype):
55445533
if not is_object_dtype(np.dtype(dtype)):
55455534
raise TypeError('Setting %s dtype to anything other than object '

pandas/tests/test_index.py

+68-14
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,20 @@ def test_take(self):
251251
expected = ind[indexer]
252252
self.assertTrue(result.equals(expected))
253253

254+
def test_setops_errorcases(self):
255+
for name, idx in compat.iteritems(self.indices):
256+
257+
# # non-iterable input
258+
cases = [0.5, 'xxx']
259+
methods = [idx.intersection, idx.union, idx.difference, idx.sym_diff]
260+
261+
for method in methods:
262+
for case in cases:
263+
assertRaisesRegexp(TypeError,
264+
"Input must be Index or array-like",
265+
method, case)
266+
267+
254268
class TestIndex(Base, tm.TestCase):
255269
_holder = Index
256270
_multiprocess_can_split_ = True
@@ -620,16 +634,18 @@ def test_intersection(self):
620634
first = self.strIndex[:20]
621635
second = self.strIndex[:10]
622636
intersect = first.intersection(second)
623-
624637
self.assertTrue(tm.equalContents(intersect, second))
625638

639+
# GH 10149
640+
cases = [klass(second.values) for klass in [np.array, Series, list]]
641+
for case in cases:
642+
result = first.intersection(case)
643+
self.assertTrue(tm.equalContents(result, second))
644+
626645
# Corner cases
627646
inter = first.intersection(first)
628647
self.assertIs(inter, first)
629648

630-
# non-iterable input
631-
assertRaisesRegexp(TypeError, "iterable", first.intersection, 0.5)
632-
633649
idx1 = Index([1, 2, 3, 4, 5], name='idx')
634650
# if target has the same name, it is preserved
635651
idx2 = Index([3, 4, 5, 6, 7], name='idx')
@@ -671,6 +687,12 @@ def test_union(self):
671687
union = first.union(second)
672688
self.assertTrue(tm.equalContents(union, everything))
673689

690+
# GH 10149
691+
cases = [klass(second.values) for klass in [np.array, Series, list]]
692+
for case in cases:
693+
result = first.union(case)
694+
self.assertTrue(tm.equalContents(result, everything))
695+
674696
# Corner cases
675697
union = first.union(first)
676698
self.assertIs(union, first)
@@ -681,9 +703,6 @@ def test_union(self):
681703
union = Index([]).union(first)
682704
self.assertIs(union, first)
683705

684-
# non-iterable input
685-
assertRaisesRegexp(TypeError, "iterable", first.union, 0.5)
686-
687706
# preserve names
688707
first.name = 'A'
689708
second.name = 'A'
@@ -777,6 +796,12 @@ def test_difference(self):
777796
self.assertTrue(tm.equalContents(result, answer))
778797
self.assertEqual(result.name, None)
779798

799+
# GH 10149
800+
cases = [klass(second.values) for klass in [np.array, Series, list]]
801+
for case in cases:
802+
result = first.difference(case)
803+
self.assertTrue(tm.equalContents(result, answer))
804+
780805
# same names
781806
second.name = 'name'
782807
result = first.difference(second)
@@ -792,9 +817,6 @@ def test_difference(self):
792817
self.assertEqual(len(result), 0)
793818
self.assertEqual(result.name, first.name)
794819

795-
# non-iterable input
796-
assertRaisesRegexp(TypeError, "iterable", first.difference, 0.5)
797-
798820
def test_symmetric_diff(self):
799821

800822
# smoke
@@ -810,6 +832,12 @@ def test_symmetric_diff(self):
810832
self.assertTrue(tm.equalContents(result, expected))
811833
self.assertIsNone(result.name)
812834

835+
# GH 10149
836+
cases = [klass(idx2.values) for klass in [np.array, Series, list]]
837+
for case in cases:
838+
result = idx1.sym_diff(case)
839+
self.assertTrue(tm.equalContents(result, expected))
840+
813841
# multiIndex
814842
idx1 = MultiIndex.from_tuples(self.tuples)
815843
idx2 = MultiIndex.from_tuples([('foo', 1), ('bar', 3)])
@@ -842,10 +870,6 @@ def test_symmetric_diff(self):
842870
self.assertTrue(tm.equalContents(result, expected))
843871
self.assertEqual(result.name, 'new_name')
844872

845-
# other isn't iterable
846-
with tm.assertRaises(TypeError):
847-
Index(idx1,dtype='object').difference(1)
848-
849873
def test_is_numeric(self):
850874
self.assertFalse(self.dateIndex.is_numeric())
851875
self.assertFalse(self.strIndex.is_numeric())
@@ -2642,6 +2666,36 @@ def test_time_overflow_for_32bit_machines(self):
26422666
idx2 = pd.date_range(end='2000', periods=periods, freq='S')
26432667
self.assertEqual(len(idx2), periods)
26442668

2669+
def test_intersection(self):
2670+
first = self.index
2671+
second = self.index[5:]
2672+
intersect = first.intersection(second)
2673+
self.assertTrue(tm.equalContents(intersect, second))
2674+
2675+
# GH 10149
2676+
cases = [klass(second.values) for klass in [np.array, Series, list]]
2677+
for case in cases:
2678+
result = first.intersection(case)
2679+
self.assertTrue(tm.equalContents(result, second))
2680+
2681+
third = Index(['a', 'b', 'c'])
2682+
result = first.intersection(third)
2683+
expected = pd.Index([], dtype=object)
2684+
self.assert_index_equal(result, expected)
2685+
2686+
def test_union(self):
2687+
first = self.index[:5]
2688+
second = self.index[5:]
2689+
everything = self.index
2690+
union = first.union(second)
2691+
self.assertTrue(tm.equalContents(union, everything))
2692+
2693+
# GH 10149
2694+
cases = [klass(second.values) for klass in [np.array, Series, list]]
2695+
for case in cases:
2696+
result = first.union(case)
2697+
self.assertTrue(tm.equalContents(result, everything))
2698+
26452699

26462700
class TestPeriodIndex(DatetimeLike, tm.TestCase):
26472701
_holder = PeriodIndex

pandas/tseries/index.py

+2
Original file line numberDiff line numberDiff line change
@@ -800,6 +800,7 @@ def union(self, other):
800800
-------
801801
y : Index or DatetimeIndex
802802
"""
803+
self._assert_can_do_setop(other)
803804
if not isinstance(other, DatetimeIndex):
804805
try:
805806
other = DatetimeIndex(other)
@@ -1035,6 +1036,7 @@ def intersection(self, other):
10351036
-------
10361037
y : Index or DatetimeIndex
10371038
"""
1039+
self._assert_can_do_setop(other)
10381040
if not isinstance(other, DatetimeIndex):
10391041
try:
10401042
other = DatetimeIndex(other)

pandas/tseries/period.py

+2
Original file line numberDiff line numberDiff line change
@@ -680,6 +680,8 @@ def join(self, other, how='left', level=None, return_indexers=False):
680680
return self._apply_meta(result)
681681

682682
def _assert_can_do_setop(self, other):
683+
super(PeriodIndex, self)._assert_can_do_setop(other)
684+
683685
if not isinstance(other, PeriodIndex):
684686
raise ValueError('can only call with other PeriodIndex-ed objects')
685687

pandas/tseries/tdi.py

+2
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,7 @@ def union(self, other):
433433
-------
434434
y : Index or TimedeltaIndex
435435
"""
436+
self._assert_can_do_setop(other)
436437
if _is_convertible_to_index(other):
437438
try:
438439
other = TimedeltaIndex(other)
@@ -578,6 +579,7 @@ def intersection(self, other):
578579
-------
579580
y : Index or TimedeltaIndex
580581
"""
582+
self._assert_can_do_setop(other)
581583
if not isinstance(other, TimedeltaIndex):
582584
try:
583585
other = TimedeltaIndex(other)

0 commit comments

Comments
 (0)