Skip to content

Commit 725ffe5

Browse files
committed
FIX: interesction and union correct name chaning behavior. fixes pandas-dev#9943 partly pandas-dev#9862
1 parent 76571d0 commit 725ffe5

File tree

4 files changed

+194
-11
lines changed

4 files changed

+194
-11
lines changed

doc/source/whatsnew/v0.16.1.txt

+10
Original file line numberDiff line numberDiff line change
@@ -244,3 +244,13 @@ Bug Fixes
244244

245245

246246
- Bug in hiding ticklabels with subplots and shared axes when adding a new plot to an existing grid of axes (:issue:`9158`)
247+
248+
249+
250+
251+
252+
253+
254+
255+
256+
- ``Union`` and ``intersection`` now wont change index name. (:issue:`9943`)

pandas/core/index.py

+20-7
Original file line numberDiff line numberDiff line change
@@ -1219,14 +1219,17 @@ def union(self, other):
12191219
if len(other) == 0 or self.equals(other):
12201220
return self
12211221

1222+
other = _ensure_index(other)
1223+
12221224
if len(self) == 0:
1223-
return _ensure_index(other)
1225+
return other
12241226

12251227
self._assert_can_do_setop(other)
12261228

12271229
if not is_dtype_equal(self.dtype,other.dtype):
12281230
this = self.astype('O')
12291231
other = other.astype('O')
1232+
12301233
return this.union(other)
12311234

12321235
if self.is_monotonic and other.is_monotonic:
@@ -1272,7 +1275,7 @@ def union(self, other):
12721275
return self._wrap_union_result(other, result)
12731276

12741277
def _wrap_union_result(self, other, result):
1275-
name = self.name if self.name == other.name else None
1278+
name = self.name if other.name == self.name or other.name==None else None
12761279
return self.__class__(data=result, name=name)
12771280

12781281
def intersection(self, other):
@@ -1306,6 +1309,7 @@ def intersection(self, other):
13061309
if self.is_monotonic and other.is_monotonic:
13071310
try:
13081311
result = self._inner_indexer(self.values, other.values)[0]
1312+
13091313
return self._wrap_union_result(other, result)
13101314
except TypeError:
13111315
pass
@@ -1319,8 +1323,9 @@ def intersection(self, other):
13191323
indexer = indexer[indexer != -1]
13201324

13211325
taken = self.take(indexer)
1322-
if self.name != other.name:
1326+
if self.name != other.name and not other.name == None:
13231327
taken.name = None
1328+
13241329
return taken
13251330

13261331
def difference(self, other):
@@ -5324,7 +5329,9 @@ def union(self, other):
53245329
if len(other) == 0 or self.equals(other):
53255330
return self
53265331

5327-
result_names = self.names if self.names == other.names else None
5332+
result_names = None
5333+
if self.names == other.names or other.names is None:
5334+
result_names = self.names
53285335

53295336
uniq_tuples = lib.fast_unique_multiple([self.values, other.values])
53305337
return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0,
@@ -5347,7 +5354,9 @@ def intersection(self, other):
53475354
if self.equals(other):
53485355
return self
53495356

5350-
result_names = self.names if self.names == other.names else None
5357+
result_names = None
5358+
if self.names == other.names or other.name is None:
5359+
result_names = self.names
53515360

53525361
self_tuples = self.values
53535362
other_tuples = other.values
@@ -5380,7 +5389,9 @@ def difference(self, other):
53805389
' tuples')
53815390
result_names = self.names
53825391
else:
5383-
result_names = self.names if self.names == other.names else None
5392+
result_names = None
5393+
if self.names == other.names or other.names == None:
5394+
result_names = self.names
53845395

53855396
if self.equals(other):
53865397
return MultiIndex(levels=[[]] * self.nlevels,
@@ -5475,7 +5486,9 @@ def _bounds(self):
54755486
return self.__bounds
54765487

54775488
def _wrap_joined_index(self, joined, other):
5478-
names = self.names if self.names == other.names else None
5489+
names = None
5490+
if self.names == other.names or other.names == None:
5491+
names = self.names
54795492
return MultiIndex.from_tuples(joined, names=names)
54805493

54815494
@Appender(Index.isin.__doc__)

pandas/tests/test_index.py

+159-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex)
1818
from pandas.core.index import InvalidIndexError, NumericIndex
1919
from pandas.util.testing import (assert_almost_equal, assertRaisesRegexp,
20-
assert_copy)
20+
assert_copy, assert_frame_equal)
2121
from pandas import compat
2222
from pandas.compat import long
2323

@@ -603,6 +603,157 @@ def test_shift(self):
603603
shifted.name = 'shifted'
604604
self.assertEqual(shifted.name, shifted.shift(1, 'D').name)
605605

606+
def test_union_naming_behavior(self):
607+
#9965
608+
idx_name_a = pd.Index([1,2,3], name='a')
609+
idx_name_b = pd.Index([4,5,6], name='b')
610+
idx2_name_a = pd.Index([2,9,8], name='a')
611+
612+
stridx_name_stra = pd.Index(['1','2'], name='stra')
613+
stridx_name_a = pd.Index(['1','2'], name='a')
614+
615+
idx_name_none = pd.Index(['1','2'], name=None)
616+
617+
dateindex_name_a = pd.DatetimeIndex([datetime.today()], name='a')
618+
dateindex_name_b = pd.DatetimeIndex([datetime.today()], name='b')
619+
dateindex_name_None = pd.DatetimeIndex([datetime.today()], name=None)
620+
621+
python_array = [1,2,3]
622+
numpy_array = np.array([1,2,3])
623+
624+
#index union index naming behavior
625+
self.assertEqual(idx_name_a.union(idx_name_b).name, None)
626+
self.assertEqual(idx_name_a.union(idx2_name_a).name, 'a')
627+
628+
#index union array
629+
self.assertEqual(idx_name_a.union(python_array).name, 'a')
630+
self.assertEqual(idx_name_a.union(numpy_array).name, 'a')
631+
632+
#index union index different dtype
633+
self.assertEqual(idx_name_a.union(stridx_name_a).name, 'a')
634+
self.assertEqual(idx_name_a.union(stridx_name_stra).name, None)
635+
636+
#index union index with no name
637+
self.assertEqual(idx_name_a.union(idx_name_none).name, 'a')
638+
639+
#index union dateindex
640+
self.assertEqual(idx_name_a.union(dateindex_name_a).name, 'a')
641+
self.assertEqual(idx_name_a.union(dateindex_name_b).name, None)
642+
self.assertEqual(idx_name_a.union(dateindex_name_None).name, 'a')
643+
644+
#dateindex union
645+
self.assertEqual(dateindex_name_a.union(python_array).name, 'a')
646+
self.assertEqual(dateindex_name_a.union(numpy_array).name, 'a')
647+
648+
self.assertEqual(dateindex_name_a.union(idx_name_none).name, 'a')
649+
self.assertEqual(dateindex_name_a.union(dateindex_name_b).name, None)
650+
self.assertEqual(dateindex_name_a.union(dateindex_name_None).name, 'a')
651+
self.assertEqual(dateindex_name_a.union(idx_name_a).name, 'a')
652+
653+
def test_intersection_naming_behavior(self):
654+
#9965
655+
idx_name_a = pd.Index([1,2,3], name='a')
656+
idx_name_b = pd.Index([4,5,6], name='b')
657+
idx2_name_a = pd.Index([2,9,8], name='a')
658+
659+
stridx_name_stra = pd.Index(['1','2'], name='stra')
660+
stridx_name_a = pd.Index(['1','2'], name='a')
661+
662+
idx_name_none = pd.Index(['1','2'], name=None)
663+
664+
dateindex_name_a = pd.DatetimeIndex([datetime.today()], name='a')
665+
dateindex_name_b = pd.DatetimeIndex([datetime.today()], name='b')
666+
dateindex_name_None = pd.DatetimeIndex([datetime.today()], name=None)
667+
668+
python_array = [1,2,3]
669+
numpy_array = np.array([1,2,3])
670+
671+
#index intersection index naming behavior
672+
self.assertEqual(idx_name_a.intersection(idx_name_b).name, None)
673+
self.assertEqual(idx_name_a.intersection(idx2_name_a).name, 'a')
674+
675+
#index intersection array
676+
self.assertEqual(idx_name_a.intersection(python_array).name, 'a')
677+
self.assertEqual(idx_name_a.intersection(numpy_array).name, 'a')
678+
679+
#index intersection index different dtype
680+
self.assertEqual(idx_name_a.intersection(stridx_name_a).name, 'a')
681+
self.assertEqual(idx_name_a.intersection(stridx_name_stra).name, None)
682+
683+
#index intersection index with no name
684+
self.assertEqual(idx_name_a.intersection(idx_name_none).name, 'a')
685+
686+
#index intersection dateindex
687+
self.assertEqual(idx_name_a.intersection(dateindex_name_a).name, 'a')
688+
self.assertEqual(idx_name_a.intersection(dateindex_name_b).name, None)
689+
self.assertEqual(idx_name_a.intersection(dateindex_name_None).name, 'a')
690+
691+
#dateindex intersection
692+
self.assertEqual(dateindex_name_a.intersection(python_array).name, 'a')
693+
self.assertEqual(dateindex_name_a.intersection(numpy_array).name, 'a')
694+
695+
self.assertEqual(dateindex_name_a.intersection(idx_name_none).name, 'a')
696+
self.assertEqual(dateindex_name_a.intersection(dateindex_name_b).name, None)
697+
self.assertEqual(dateindex_name_a.intersection(dateindex_name_None).name, 'a')
698+
self.assertEqual(dateindex_name_a.intersection(idx_name_a).name, 'a')
699+
700+
def test_append_naming_behavior(self):
701+
#9965
702+
idx_name_a = pd.Index([1,2,3], name='a')
703+
idx_name_b = pd.Index([4,5,6], name='b')
704+
idx2_name_a = pd.Index([2,9,8], name='a')
705+
706+
stridx_name_stra = pd.Index(['1','2'], name='stra')
707+
stridx_name_a = pd.Index(['1','2'], name='a')
708+
709+
idx_name_none = pd.Index(['1','2'], name=None)
710+
711+
dateindex_name_a = pd.DatetimeIndex([datetime.today()], name='a')
712+
dateindex_name_b = pd.DatetimeIndex([datetime.today()], name='b')
713+
dateindex_name_None = pd.DatetimeIndex([datetime.today()], name=None)
714+
715+
python_array = [1,2,3]
716+
python_array_transposed = [[1],[2],[3]]
717+
numpy_array = np.array([1,2,3])
718+
719+
#index append index naming behavior
720+
self.assertEqual(idx_name_a.append(idx_name_b).name, None)
721+
self.assertEqual(idx_name_a.append(idx2_name_a).name, 'a')
722+
723+
#index append array
724+
self.assertEqual(idx_name_a.append(python_array_transposed).name, 'a')
725+
self.assertEqual(idx_name_a.append(numpy_array.T).name, 'a')
726+
727+
#index append index different dtype
728+
self.assertEqual(idx_name_a.append(stridx_name_a).name, 'a')
729+
self.assertEqual(idx_name_a.append(stridx_name_stra).name, None)
730+
731+
#index append index with no name
732+
self.assertEqual(idx_name_a.append(idx_name_none).name, 'a')
733+
734+
#index append dateindex
735+
self.assertEqual(idx_name_a.append(dateindex_name_a).name, 'a')
736+
self.assertEqual(idx_name_a.append(dateindex_name_b).name, None)
737+
self.assertEqual(idx_name_a.append(dateindex_name_None).name, 'a')
738+
739+
#dateindex append
740+
self.assertEqual(dateindex_name_a.append(python_array_transposed).name, 'a')
741+
self.assertEqual(dateindex_name_a.append(numpy_array.T).name, 'a')
742+
743+
self.assertEqual(dateindex_name_a.append(idx_name_none).name, 'a')
744+
self.assertEqual(dateindex_name_a.append(dateindex_name_b).name, None)
745+
self.assertEqual(dateindex_name_a.append(dateindex_name_None).name, 'a')
746+
self.assertEqual(dateindex_name_a.append(idx_name_a).name, 'a')
747+
748+
def test_intersection_preserves_name(self):
749+
#GH 9943
750+
df = pd.DataFrame([np.nan, np.nan], columns = ['tags'], index=pd.Int64Index([4815961, 4815962], dtype='int64', name='id'))
751+
self.assertEqual(str(df), ' tags\nid \n4815961 NaN\n4815962 NaN')
752+
L = [4815962]
753+
self.assertEqual(list(L), list(df.index.intersection(L)))
754+
self.assertEqual( df.ix[L].tags.index.name, df.ix[df.index.intersection(L)].tags.index.name)
755+
assert_frame_equal(df.ix[L], df.ix[df.index.intersection(L)])
756+
606757
def test_intersection(self):
607758
first = self.strIndex[:20]
608759
second = self.strIndex[:10]
@@ -679,7 +830,13 @@ def test_union(self):
679830

680831
second.name = 'B'
681832
union = first.union(second)
682-
self.assertIsNone(union.name)
833+
self.assertEqual(None, union.name)
834+
835+
#union array-like
836+
print first
837+
print first.name
838+
union = first.union([1234])
839+
self.assertEqual('A', union.name)
683840

684841
def test_add(self):
685842

pandas/tseries/index.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -879,7 +879,10 @@ def append(self, other):
879879
to_concat.append(other)
880880

881881
for obj in to_concat:
882-
if isinstance(obj, Index) and obj.name != name:
882+
if (isinstance(obj, Index) and
883+
obj.name != name and
884+
obj.name is not None):
885+
883886
name = None
884887
break
885888

@@ -1026,7 +1029,7 @@ def __iter__(self):
10261029
yield v
10271030

10281031
def _wrap_union_result(self, other, result):
1029-
name = self.name if self.name == other.name else None
1032+
name = self.name if self.name == other.name or other.name == None else None
10301033
if self.tz != other.tz:
10311034
raise ValueError('Passed item and index have different timezone')
10321035
return self._simple_new(result, name=name, freq=None, tz=self.tz)

0 commit comments

Comments
 (0)