Skip to content

Commit 91ced29

Browse files
committed
FIX: interesction and union correct name chaning behavior. fixes pandas-dev#9943 partly pandas-dev#9862
1 parent 76571d0 commit 91ced29

File tree

4 files changed

+204
-7
lines changed

4 files changed

+204
-7
lines changed

doc/source/whatsnew/v0.16.1.txt

+10
Original file line numberDiff line numberDiff line change
@@ -244,3 +244,13 @@ Bug Fixes
244244

245245

246246
- Bug in hiding ticklabels with subplots and shared axes when adding a new plot to an existing grid of axes (:issue:`9158`)
247+
248+
249+
250+
251+
252+
253+
254+
255+
256+
- ``Union`` and ``intersection`` now wont change index name. (:issue:`9943`)

pandas/core/index.py

+30-3
Original file line numberDiff line numberDiff line change
@@ -1219,14 +1219,26 @@ def union(self, other):
12191219
if len(other) == 0 or self.equals(other):
12201220
return self
12211221

1222+
keep_name = True
1223+
#GH 9943, 9862 added keep_name
1224+
if hasattr(other, 'name'):
1225+
if other.name != self.name and not other.name is None:
1226+
keep_name = False
1227+
1228+
other = _ensure_index(other)
1229+
12221230
if len(self) == 0:
1223-
return _ensure_index(other)
1231+
return other
12241232

12251233
self._assert_can_do_setop(other)
12261234

12271235
if not is_dtype_equal(self.dtype,other.dtype):
12281236
this = self.astype('O')
12291237
other = other.astype('O')
1238+
1239+
if keep_name:
1240+
other.name=self.name
1241+
12301242
return this.union(other)
12311243

12321244
if self.is_monotonic and other.is_monotonic:
@@ -1268,11 +1280,14 @@ def union(self, other):
12681280
warnings.warn("%s, sort order is undefined for "
12691281
"incomparable objects" % e, RuntimeWarning)
12701282

1283+
if keep_name:
1284+
other.name = self.name
1285+
12711286
# for subclasses
12721287
return self._wrap_union_result(other, result)
12731288

12741289
def _wrap_union_result(self, other, result):
1275-
name = self.name if self.name == other.name else None
1290+
name = self.name if other.name == self.name or other.name==None else None
12761291
return self.__class__(data=result, name=name)
12771292

12781293
def intersection(self, other):
@@ -1293,6 +1308,12 @@ def intersection(self, other):
12931308

12941309
self._assert_can_do_setop(other)
12951310

1311+
keep_name = True
1312+
#GH 9943, 9862 added keep_name
1313+
if hasattr(other, 'name'):
1314+
if other.name != self.name and not other.name is None:
1315+
keep_name = False
1316+
12961317
other = _ensure_index(other)
12971318

12981319
if self.equals(other):
@@ -1306,6 +1327,10 @@ def intersection(self, other):
13061327
if self.is_monotonic and other.is_monotonic:
13071328
try:
13081329
result = self._inner_indexer(self.values, other.values)[0]
1330+
1331+
if keep_name:
1332+
other.name = self.name
1333+
13091334
return self._wrap_union_result(other, result)
13101335
except TypeError:
13111336
pass
@@ -1319,8 +1344,10 @@ def intersection(self, other):
13191344
indexer = indexer[indexer != -1]
13201345

13211346
taken = self.take(indexer)
1322-
if self.name != other.name:
1347+
1348+
if not keep_name:
13231349
taken.name = None
1350+
13241351
return taken
13251352

13261353
def difference(self, other):

pandas/tests/test_index.py

+159-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex)
1818
from pandas.core.index import InvalidIndexError, NumericIndex
1919
from pandas.util.testing import (assert_almost_equal, assertRaisesRegexp,
20-
assert_copy)
20+
assert_copy, assert_frame_equal)
2121
from pandas import compat
2222
from pandas.compat import long
2323

@@ -603,6 +603,157 @@ def test_shift(self):
603603
shifted.name = 'shifted'
604604
self.assertEqual(shifted.name, shifted.shift(1, 'D').name)
605605

606+
def test_union_naming_behavior(self):
607+
#9965
608+
idx_name_a = pd.Index([1,2,3], name='a')
609+
idx_name_b = pd.Index([4,5,6], name='b')
610+
idx2_name_a = pd.Index([2,9,8], name='a')
611+
612+
stridx_name_stra = pd.Index(['1','2'], name='stra')
613+
stridx_name_a = pd.Index(['1','2'], name='a')
614+
615+
idx_name_none = pd.Index(['1','2'], name=None)
616+
617+
dateindex_name_a = pd.DatetimeIndex([datetime.today()], name='a')
618+
dateindex_name_b = pd.DatetimeIndex([datetime.today()], name='b')
619+
dateindex_name_None = pd.DatetimeIndex([datetime.today()], name=None)
620+
621+
python_array = [1,2,3]
622+
numpy_array = np.array([1,2,3])
623+
624+
#index union index naming behavior
625+
assert idx_name_a.union(idx_name_b).name == None
626+
assert idx_name_a.union(idx2_name_a).name == 'a'
627+
628+
#index union array
629+
assert idx_name_a.union(python_array).name == 'a'
630+
assert idx_name_a.union(numpy_array).name == 'a'
631+
632+
#index union index different dtype
633+
assert idx_name_a.union(stridx_name_a).name == 'a'
634+
assert idx_name_a.union(stridx_name_stra).name == None
635+
636+
#index union index with no name
637+
assert idx_name_a.union(idx_name_none).name == 'a'
638+
639+
#index union dateindex
640+
assert idx_name_a.union(dateindex_name_a).name == 'a'
641+
assert idx_name_a.union(dateindex_name_b).name == None
642+
assert idx_name_a.union(dateindex_name_None).name == 'a'
643+
644+
#dateindex union
645+
assert dateindex_name_a.union(python_array).name == 'a'
646+
assert dateindex_name_a.union(numpy_array).name == 'a'
647+
648+
assert dateindex_name_a.union(idx_name_none).name == 'a'
649+
assert dateindex_name_a.union(dateindex_name_b).name == None
650+
assert dateindex_name_a.union(dateindex_name_None).name == 'a'
651+
assert dateindex_name_a.union(idx_name_a).name == 'a'
652+
653+
def test_intersection_naming_behavior(self):
654+
#9965
655+
idx_name_a = pd.Index([1,2,3], name='a')
656+
idx_name_b = pd.Index([4,5,6], name='b')
657+
idx2_name_a = pd.Index([2,9,8], name='a')
658+
659+
stridx_name_stra = pd.Index(['1','2'], name='stra')
660+
stridx_name_a = pd.Index(['1','2'], name='a')
661+
662+
idx_name_none = pd.Index(['1','2'], name=None)
663+
664+
dateindex_name_a = pd.DatetimeIndex([datetime.today()], name='a')
665+
dateindex_name_b = pd.DatetimeIndex([datetime.today()], name='b')
666+
dateindex_name_None = pd.DatetimeIndex([datetime.today()], name=None)
667+
668+
python_array = [1,2,3]
669+
numpy_array = np.array([1,2,3])
670+
671+
#index intersection index naming behavior
672+
assert idx_name_a.intersection(idx_name_b).name == None
673+
assert idx_name_a.intersection(idx2_name_a).name == 'a'
674+
675+
#index intersection array
676+
assert idx_name_a.intersection(python_array).name == 'a'
677+
assert idx_name_a.intersection(numpy_array).name == 'a'
678+
679+
#index intersection index different dtype
680+
assert idx_name_a.intersection(stridx_name_a).name == 'a'
681+
assert idx_name_a.intersection(stridx_name_stra).name == None
682+
683+
#index intersection index with no name
684+
assert idx_name_a.intersection(idx_name_none).name == 'a'
685+
686+
#index intersection dateindex
687+
assert idx_name_a.intersection(dateindex_name_a).name == 'a'
688+
assert idx_name_a.intersection(dateindex_name_b).name == None
689+
assert idx_name_a.intersection(dateindex_name_None).name == 'a'
690+
691+
#dateindex intersection
692+
assert dateindex_name_a.intersection(python_array).name == 'a'
693+
assert dateindex_name_a.intersection(numpy_array).name == 'a'
694+
695+
assert dateindex_name_a.intersection(idx_name_none).name == 'a'
696+
assert dateindex_name_a.intersection(dateindex_name_b).name == None
697+
assert dateindex_name_a.intersection(dateindex_name_None).name == 'a'
698+
assert dateindex_name_a.intersection(idx_name_a).name == 'a'
699+
700+
def test_append_naming_behavior(self):
701+
#9965
702+
idx_name_a = pd.Index([1,2,3], name='a')
703+
idx_name_b = pd.Index([4,5,6], name='b')
704+
idx2_name_a = pd.Index([2,9,8], name='a')
705+
706+
stridx_name_stra = pd.Index(['1','2'], name='stra')
707+
stridx_name_a = pd.Index(['1','2'], name='a')
708+
709+
idx_name_none = pd.Index(['1','2'], name=None)
710+
711+
dateindex_name_a = pd.DatetimeIndex([datetime.today()], name='a')
712+
dateindex_name_b = pd.DatetimeIndex([datetime.today()], name='b')
713+
dateindex_name_None = pd.DatetimeIndex([datetime.today()], name=None)
714+
715+
python_array = [1,2,3]
716+
python_array_transposed = [[1],[2],[3]]
717+
numpy_array = np.array([1,2,3])
718+
719+
#index append index naming behavior
720+
assert idx_name_a.append(idx_name_b).name == None
721+
assert idx_name_a.append(idx2_name_a).name == 'a'
722+
723+
#index append array
724+
assert idx_name_a.append(python_array_transposed).name == 'a'
725+
assert idx_name_a.append(numpy_array.T).name == 'a'
726+
727+
#index append index different dtype
728+
assert idx_name_a.append(stridx_name_a).name == 'a'
729+
assert idx_name_a.append(stridx_name_stra).name == None
730+
731+
#index append index with no name
732+
assert idx_name_a.append(idx_name_none).name == 'a'
733+
734+
#index append dateindex
735+
assert idx_name_a.append(dateindex_name_a).name == 'a'
736+
assert idx_name_a.append(dateindex_name_b).name == None
737+
assert idx_name_a.append(dateindex_name_None).name == 'a'
738+
739+
#dateindex append
740+
assert dateindex_name_a.append(python_array_transposed).name == 'a'
741+
assert dateindex_name_a.append(numpy_array.T).name == 'a'
742+
743+
assert dateindex_name_a.append(idx_name_none).name == 'a'
744+
assert dateindex_name_a.append(dateindex_name_b).name == None
745+
assert dateindex_name_a.append(dateindex_name_None).name == 'a'
746+
assert dateindex_name_a.append(idx_name_a).name == 'a'
747+
748+
def test_intersection_preserves_name(self):
749+
#GH 9943
750+
df = pd.DataFrame([np.nan, np.nan], columns = ['tags'], index=pd.Int64Index([4815961, 4815962], dtype='int64', name='id'))
751+
assert str(df) == ' tags\nid \n4815961 NaN\n4815962 NaN'
752+
L = [4815962]
753+
assert list(L) == list(df.index.intersection(L))
754+
assert df.ix[L].tags.index.name == df.ix[df.index.intersection(L)].tags.index.name
755+
assert_frame_equal(df.ix[L], df.ix[df.index.intersection(L)])
756+
606757
def test_intersection(self):
607758
first = self.strIndex[:20]
608759
second = self.strIndex[:10]
@@ -679,7 +830,13 @@ def test_union(self):
679830

680831
second.name = 'B'
681832
union = first.union(second)
682-
self.assertIsNone(union.name)
833+
self.assertEqual(None, union.name)
834+
835+
#union array-like
836+
print first
837+
print first.name
838+
union = first.union([1234])
839+
self.assertEqual('A', union.name)
683840

684841
def test_add(self):
685842

pandas/tseries/index.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -879,7 +879,10 @@ def append(self, other):
879879
to_concat.append(other)
880880

881881
for obj in to_concat:
882-
if isinstance(obj, Index) and obj.name != name:
882+
if (isinstance(obj, Index) and
883+
obj.name != name and
884+
obj.name is not None):
885+
883886
name = None
884887
break
885888

@@ -1026,7 +1029,7 @@ def __iter__(self):
10261029
yield v
10271030

10281031
def _wrap_union_result(self, other, result):
1029-
name = self.name if self.name == other.name else None
1032+
name = self.name if self.name == other.name or other.name == None else None
10301033
if self.tz != other.tz:
10311034
raise ValueError('Passed item and index have different timezone')
10321035
return self._simple_new(result, name=name, freq=None, tz=self.tz)

0 commit comments

Comments
 (0)