Skip to content

Commit 991a6b6

Browse files
committed
FIX: interesction and union correct name chaning behavior. fixes #9943 partly #9862
1 parent 76571d0 commit 991a6b6

File tree

4 files changed

+216
-11
lines changed

4 files changed

+216
-11
lines changed

doc/source/whatsnew/v0.16.1.txt

+10
Original file line numberDiff line numberDiff line change
@@ -244,3 +244,13 @@ Bug Fixes
244244

245245

246246
- Bug in hiding ticklabels with subplots and shared axes when adding a new plot to an existing grid of axes (:issue:`9158`)
247+
248+
249+
250+
251+
252+
253+
254+
255+
256+
- ``Union`` and ``intersection`` now wont change index name. (:issue:`9943`)

pandas/core/index.py

+42-7
Original file line numberDiff line numberDiff line change
@@ -1219,14 +1219,26 @@ def union(self, other):
12191219
if len(other) == 0 or self.equals(other):
12201220
return self
12211221

1222+
keep_name = True
1223+
#GH 9943, 9862 added keep_name
1224+
if hasattr(other, 'name'):
1225+
if other.name != self.name and not other.name is None:
1226+
keep_name = False
1227+
1228+
other = _ensure_index(other)
1229+
12221230
if len(self) == 0:
1223-
return _ensure_index(other)
1231+
return other
12241232

12251233
self._assert_can_do_setop(other)
12261234

12271235
if not is_dtype_equal(self.dtype,other.dtype):
12281236
this = self.astype('O')
12291237
other = other.astype('O')
1238+
1239+
if keep_name:
1240+
other.name=self.name
1241+
12301242
return this.union(other)
12311243

12321244
if self.is_monotonic and other.is_monotonic:
@@ -1268,11 +1280,14 @@ def union(self, other):
12681280
warnings.warn("%s, sort order is undefined for "
12691281
"incomparable objects" % e, RuntimeWarning)
12701282

1283+
if keep_name:
1284+
other.name = self.name
1285+
12711286
# for subclasses
12721287
return self._wrap_union_result(other, result)
12731288

12741289
def _wrap_union_result(self, other, result):
1275-
name = self.name if self.name == other.name else None
1290+
name = self.name if other.name == self.name or other.name==None else None
12761291
return self.__class__(data=result, name=name)
12771292

12781293
def intersection(self, other):
@@ -1293,6 +1308,12 @@ def intersection(self, other):
12931308

12941309
self._assert_can_do_setop(other)
12951310

1311+
keep_name = True
1312+
#GH 9943, 9862 added keep_name
1313+
if hasattr(other, 'name'):
1314+
if not (other.name == self.name or other.name is None):
1315+
keep_name = False
1316+
12961317
other = _ensure_index(other)
12971318

12981319
if self.equals(other):
@@ -1306,6 +1327,10 @@ def intersection(self, other):
13061327
if self.is_monotonic and other.is_monotonic:
13071328
try:
13081329
result = self._inner_indexer(self.values, other.values)[0]
1330+
1331+
if keep_name:
1332+
other.name = self.name
1333+
13091334
return self._wrap_union_result(other, result)
13101335
except TypeError:
13111336
pass
@@ -1319,8 +1344,10 @@ def intersection(self, other):
13191344
indexer = indexer[indexer != -1]
13201345

13211346
taken = self.take(indexer)
1322-
if self.name != other.name:
1347+
1348+
if not keep_name:
13231349
taken.name = None
1350+
13241351
return taken
13251352

13261353
def difference(self, other):
@@ -5324,7 +5351,9 @@ def union(self, other):
53245351
if len(other) == 0 or self.equals(other):
53255352
return self
53265353

5327-
result_names = self.names if self.names == other.names else None
5354+
result_names = None
5355+
if self.names == other.names or other.names is None:
5356+
result_names = self.names
53285357

53295358
uniq_tuples = lib.fast_unique_multiple([self.values, other.values])
53305359
return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0,
@@ -5347,7 +5376,9 @@ def intersection(self, other):
53475376
if self.equals(other):
53485377
return self
53495378

5350-
result_names = self.names if self.names == other.names else None
5379+
result_names = None
5380+
if self.names == other.names or other.name is None:
5381+
result_names = self.names
53515382

53525383
self_tuples = self.values
53535384
other_tuples = other.values
@@ -5380,7 +5411,9 @@ def difference(self, other):
53805411
' tuples')
53815412
result_names = self.names
53825413
else:
5383-
result_names = self.names if self.names == other.names else None
5414+
result_names = None
5415+
if self.names == other.names or other.names == None:
5416+
result_names = self.names
53845417

53855418
if self.equals(other):
53865419
return MultiIndex(levels=[[]] * self.nlevels,
@@ -5475,7 +5508,9 @@ def _bounds(self):
54755508
return self.__bounds
54765509

54775510
def _wrap_joined_index(self, joined, other):
5478-
names = self.names if self.names == other.names else None
5511+
names = None
5512+
if self.names == other.names or other.names == None:
5513+
names = self.names
54795514
return MultiIndex.from_tuples(joined, names=names)
54805515

54815516
@Appender(Index.isin.__doc__)

pandas/tests/test_index.py

+159-2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex)
1818
from pandas.core.index import InvalidIndexError, NumericIndex
1919
from pandas.util.testing import (assert_almost_equal, assertRaisesRegexp,
20-
assert_copy)
20+
assert_copy, assert_frame_equal)
2121
from pandas import compat
2222
from pandas.compat import long
2323

@@ -603,6 +603,157 @@ def test_shift(self):
603603
shifted.name = 'shifted'
604604
self.assertEqual(shifted.name, shifted.shift(1, 'D').name)
605605

606+
def test_union_naming_behavior(self):
607+
#9965
608+
idx_name_a = pd.Index([1,2,3], name='a')
609+
idx_name_b = pd.Index([4,5,6], name='b')
610+
idx2_name_a = pd.Index([2,9,8], name='a')
611+
612+
stridx_name_stra = pd.Index(['1','2'], name='stra')
613+
stridx_name_a = pd.Index(['1','2'], name='a')
614+
615+
idx_name_none = pd.Index(['1','2'], name=None)
616+
617+
dateindex_name_a = pd.DatetimeIndex([datetime.today()], name='a')
618+
dateindex_name_b = pd.DatetimeIndex([datetime.today()], name='b')
619+
dateindex_name_None = pd.DatetimeIndex([datetime.today()], name=None)
620+
621+
python_array = [1,2,3]
622+
numpy_array = np.array([1,2,3])
623+
624+
#index union index naming behavior
625+
self.assertEqual(idx_name_a.union(idx_name_b).name, None)
626+
self.assertEqual(idx_name_a.union(idx2_name_a).name, 'a')
627+
628+
#index union array
629+
self.assertEqual(idx_name_a.union(python_array).name, 'a')
630+
self.assertEqual(idx_name_a.union(numpy_array).name, 'a')
631+
632+
#index union index different dtype
633+
self.assertEqual(idx_name_a.union(stridx_name_a).name, 'a')
634+
self.assertEqual(idx_name_a.union(stridx_name_stra).name, None)
635+
636+
#index union index with no name
637+
self.assertEqual(idx_name_a.union(idx_name_none).name, 'a')
638+
639+
#index union dateindex
640+
self.assertEqual(idx_name_a.union(dateindex_name_a).name, 'a')
641+
self.assertEqual(idx_name_a.union(dateindex_name_b).name, None)
642+
self.assertEqual(idx_name_a.union(dateindex_name_None).name, 'a')
643+
644+
#dateindex union
645+
self.assertEqual(dateindex_name_a.union(python_array).name, 'a')
646+
self.assertEqual(dateindex_name_a.union(numpy_array).name, 'a')
647+
648+
self.assertEqual(dateindex_name_a.union(idx_name_none).name, 'a')
649+
self.assertEqual(dateindex_name_a.union(dateindex_name_b).name, None)
650+
self.assertEqual(dateindex_name_a.union(dateindex_name_None).name, 'a')
651+
self.assertEqual(dateindex_name_a.union(idx_name_a).name, 'a')
652+
653+
def test_intersection_naming_behavior(self):
654+
#9965
655+
idx_name_a = pd.Index([1,2,3], name='a')
656+
idx_name_b = pd.Index([4,5,6], name='b')
657+
idx2_name_a = pd.Index([2,9,8], name='a')
658+
659+
stridx_name_stra = pd.Index(['1','2'], name='stra')
660+
stridx_name_a = pd.Index(['1','2'], name='a')
661+
662+
idx_name_none = pd.Index(['1','2'], name=None)
663+
664+
dateindex_name_a = pd.DatetimeIndex([datetime.today()], name='a')
665+
dateindex_name_b = pd.DatetimeIndex([datetime.today()], name='b')
666+
dateindex_name_None = pd.DatetimeIndex([datetime.today()], name=None)
667+
668+
python_array = [1,2,3]
669+
numpy_array = np.array([1,2,3])
670+
671+
#index intersection index naming behavior
672+
self.assertEqual(idx_name_a.intersection(idx_name_b).name, None)
673+
self.assertEqual(idx_name_a.intersection(idx2_name_a).name, 'a')
674+
675+
#index intersection array
676+
self.assertEqual(idx_name_a.intersection(python_array).name, 'a')
677+
self.assertEqual(idx_name_a.intersection(numpy_array).name, 'a')
678+
679+
#index intersection index different dtype
680+
self.assertEqual(idx_name_a.intersection(stridx_name_a).name, 'a')
681+
self.assertEqual(idx_name_a.intersection(stridx_name_stra).name, None)
682+
683+
#index intersection index with no name
684+
self.assertEqual(idx_name_a.intersection(idx_name_none).name, 'a')
685+
686+
#index intersection dateindex
687+
self.assertEqual(idx_name_a.intersection(dateindex_name_a).name, 'a')
688+
self.assertEqual(idx_name_a.intersection(dateindex_name_b).name, None)
689+
self.assertEqual(idx_name_a.intersection(dateindex_name_None).name, 'a')
690+
691+
#dateindex intersection
692+
self.assertEqual(dateindex_name_a.intersection(python_array).name, 'a')
693+
self.assertEqual(dateindex_name_a.intersection(numpy_array).name, 'a')
694+
695+
self.assertEqual(dateindex_name_a.intersection(idx_name_none).name, 'a')
696+
self.assertEqual(dateindex_name_a.intersection(dateindex_name_b).name, None)
697+
self.assertEqual(dateindex_name_a.intersection(dateindex_name_None).name, 'a')
698+
self.assertEqual(dateindex_name_a.intersection(idx_name_a).name, 'a')
699+
700+
def test_append_naming_behavior(self):
701+
#9965
702+
idx_name_a = pd.Index([1,2,3], name='a')
703+
idx_name_b = pd.Index([4,5,6], name='b')
704+
idx2_name_a = pd.Index([2,9,8], name='a')
705+
706+
stridx_name_stra = pd.Index(['1','2'], name='stra')
707+
stridx_name_a = pd.Index(['1','2'], name='a')
708+
709+
idx_name_none = pd.Index(['1','2'], name=None)
710+
711+
dateindex_name_a = pd.DatetimeIndex([datetime.today()], name='a')
712+
dateindex_name_b = pd.DatetimeIndex([datetime.today()], name='b')
713+
dateindex_name_None = pd.DatetimeIndex([datetime.today()], name=None)
714+
715+
python_array = [1,2,3]
716+
python_array_transposed = [[1],[2],[3]]
717+
numpy_array = np.array([1,2,3])
718+
719+
#index append index naming behavior
720+
self.assertEqual(idx_name_a.append(idx_name_b).name, None)
721+
self.assertEqual(idx_name_a.append(idx2_name_a).name, 'a')
722+
723+
#index append array
724+
self.assertEqual(idx_name_a.append(python_array_transposed).name, 'a')
725+
self.assertEqual(idx_name_a.append(numpy_array.T).name, 'a')
726+
727+
#index append index different dtype
728+
self.assertEqual(idx_name_a.append(stridx_name_a).name, 'a')
729+
self.assertEqual(idx_name_a.append(stridx_name_stra).name, None)
730+
731+
#index append index with no name
732+
self.assertEqual(idx_name_a.append(idx_name_none).name, 'a')
733+
734+
#index append dateindex
735+
self.assertEqual(idx_name_a.append(dateindex_name_a).name, 'a')
736+
self.assertEqual(idx_name_a.append(dateindex_name_b).name, None)
737+
self.assertEqual(idx_name_a.append(dateindex_name_None).name, 'a')
738+
739+
#dateindex append
740+
self.assertEqual(dateindex_name_a.append(python_array_transposed).name, 'a')
741+
self.assertEqual(dateindex_name_a.append(numpy_array.T).name, 'a')
742+
743+
self.assertEqual(dateindex_name_a.append(idx_name_none).name, 'a')
744+
self.assertEqual(dateindex_name_a.append(dateindex_name_b).name, None)
745+
self.assertEqual(dateindex_name_a.append(dateindex_name_None).name, 'a')
746+
self.assertEqual(dateindex_name_a.append(idx_name_a).name, 'a')
747+
748+
def test_intersection_preserves_name(self):
749+
#GH 9943
750+
df = pd.DataFrame([np.nan, np.nan], columns = ['tags'], index=pd.Int64Index([4815961, 4815962], dtype='int64', name='id'))
751+
self.assertEqual(str(df), ' tags\nid \n4815961 NaN\n4815962 NaN')
752+
L = [4815962]
753+
self.assertEqual(list(L), list(df.index.intersection(L)))
754+
self.assertEqual( df.ix[L].tags.index.name, df.ix[df.index.intersection(L)].tags.index.name)
755+
assert_frame_equal(df.ix[L], df.ix[df.index.intersection(L)])
756+
606757
def test_intersection(self):
607758
first = self.strIndex[:20]
608759
second = self.strIndex[:10]
@@ -679,7 +830,13 @@ def test_union(self):
679830

680831
second.name = 'B'
681832
union = first.union(second)
682-
self.assertIsNone(union.name)
833+
self.assertEqual(None, union.name)
834+
835+
#union array-like
836+
print first
837+
print first.name
838+
union = first.union([1234])
839+
self.assertEqual('A', union.name)
683840

684841
def test_add(self):
685842

pandas/tseries/index.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -879,7 +879,10 @@ def append(self, other):
879879
to_concat.append(other)
880880

881881
for obj in to_concat:
882-
if isinstance(obj, Index) and obj.name != name:
882+
if (isinstance(obj, Index) and
883+
obj.name != name and
884+
obj.name is not None):
885+
883886
name = None
884887
break
885888

@@ -1026,7 +1029,7 @@ def __iter__(self):
10261029
yield v
10271030

10281031
def _wrap_union_result(self, other, result):
1029-
name = self.name if self.name == other.name else None
1032+
name = self.name if self.name == other.name or other.name == None else None
10301033
if self.tz != other.tz:
10311034
raise ValueError('Passed item and index have different timezone')
10321035
return self._simple_new(result, name=name, freq=None, tz=self.tz)

0 commit comments

Comments
 (0)