Skip to content

Commit 02161ff

Browse files
committed
Merge pull request #8460 from immerrr/preserve-index-names-when-reindexing
BUG: fix Index.reindex to preserve name when target is list/ndarray
2 parents ba4f146 + c1e3369 commit 02161ff

File tree

7 files changed

+90
-30
lines changed

7 files changed

+90
-30
lines changed

doc/source/v0.15.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1012,3 +1012,4 @@ Bug Fixes
10121012
- Bug in masked series assignment where mismatching types would break alignment (:issue:`8387`)
10131013
- Bug in NDFrame.equals gives false negatives with dtype=object (:issue:`8437`)
10141014
- Bug in assignment with indexer where type diversity would break alignment (:issue:`8258`)
1015+
- Bug in ``NDFrame.loc`` indexing when row/column names were lost when target was a list/ndarray (:issue:`6552`)

pandas/core/frame.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -2260,17 +2260,15 @@ def _reindex_axes(self, axes, level, limit, method, fill_value, copy):
22602260
def _reindex_index(self, new_index, method, copy, level, fill_value=NA,
22612261
limit=None):
22622262
new_index, indexer = self.index.reindex(new_index, method, level,
2263-
limit=limit,
2264-
copy_if_needed=True)
2263+
limit=limit)
22652264
return self._reindex_with_indexers({0: [new_index, indexer]},
22662265
copy=copy, fill_value=fill_value,
22672266
allow_dups=False)
22682267

22692268
def _reindex_columns(self, new_columns, copy, level, fill_value=NA,
22702269
limit=None):
22712270
new_columns, indexer = self.columns.reindex(new_columns, level=level,
2272-
limit=limit,
2273-
copy_if_needed=True)
2271+
limit=limit)
22742272
return self._reindex_with_indexers({1: [new_columns, indexer]},
22752273
copy=copy, fill_value=fill_value,
22762274
allow_dups=False)

pandas/core/generic.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1776,8 +1776,8 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True,
17761776
axis_name = self._get_axis_name(axis)
17771777
axis_values = self._get_axis(axis_name)
17781778
method = com._clean_fill_method(method)
1779-
new_index, indexer = axis_values.reindex(
1780-
labels, method, level, limit=limit, copy_if_needed=True)
1779+
new_index, indexer = axis_values.reindex(labels, method, level,
1780+
limit=limit)
17811781
return self._reindex_with_indexers(
17821782
{axis: [new_index, indexer]}, method=method, fill_value=fill_value,
17831783
limit=limit, copy=copy)

pandas/core/index.py

+30-20
Original file line numberDiff line numberDiff line change
@@ -1578,34 +1578,31 @@ def _get_method(self, method):
15781578
}
15791579
return aliases.get(method, method)
15801580

1581-
def reindex(self, target, method=None, level=None, limit=None,
1582-
copy_if_needed=False):
1581+
def reindex(self, target, method=None, level=None, limit=None):
15831582
"""
1584-
For Index, simply returns the new index and the results of
1585-
get_indexer. Provided here to enable an interface that is amenable for
1586-
subclasses of Index whose internals are different (like MultiIndex)
1583+
Create index with target's values (move/add/delete values as necessary)
15871584
15881585
Returns
15891586
-------
1590-
(new_index, indexer, mask) : tuple
1587+
new_index : pd.Index
1588+
Resulting index
1589+
indexer : np.ndarray or None
1590+
Indices of output values in original index
1591+
15911592
"""
1593+
# GH6552: preserve names when reindexing to non-named target
1594+
# (i.e. neither Index nor Series).
1595+
preserve_names = not hasattr(target, 'name')
1596+
15921597
target = _ensure_index(target)
15931598
if level is not None:
15941599
if method is not None:
15951600
raise TypeError('Fill method not supported if level passed')
15961601
_, indexer, _ = self._join_level(target, level, how='right',
15971602
return_indexers=True)
15981603
else:
1599-
16001604
if self.equals(target):
16011605
indexer = None
1602-
1603-
# to avoid aliasing an existing index
1604-
if (copy_if_needed and target.name != self.name and
1605-
self.name is not None):
1606-
if target.name is None:
1607-
target = self.copy()
1608-
16091606
else:
16101607
if self.is_unique:
16111608
indexer = self.get_indexer(target, method=method,
@@ -1616,6 +1613,10 @@ def reindex(self, target, method=None, level=None, limit=None,
16161613
"with a method or limit")
16171614
indexer, missing = self.get_indexer_non_unique(target)
16181615

1616+
if preserve_names and target.nlevels == 1 and target.name != self.name:
1617+
target = target.copy()
1618+
target.name = self.name
1619+
16191620
return target, indexer
16201621

16211622
def join(self, other, how='left', level=None, return_indexers=False):
@@ -3686,17 +3687,21 @@ def get_indexer(self, target, method=None, limit=None):
36863687

36873688
return com._ensure_platform_int(indexer)
36883689

3689-
def reindex(self, target, method=None, level=None, limit=None,
3690-
copy_if_needed=False):
3690+
def reindex(self, target, method=None, level=None, limit=None):
36913691
"""
3692-
Performs any necessary conversion on the input index and calls
3693-
get_indexer. This method is here so MultiIndex and an Index of
3694-
like-labeled tuples can play nice together
3692+
Create index with target's values (move/add/delete values as necessary)
36953693
36963694
Returns
36973695
-------
3698-
(new_index, indexer, mask) : (MultiIndex, ndarray, ndarray)
3696+
new_index : pd.MultiIndex
3697+
Resulting index
3698+
indexer : np.ndarray or None
3699+
Indices of output values in original index
3700+
36993701
"""
3702+
# GH6552: preserve names when reindexing to non-named target
3703+
# (i.e. neither Index nor Series).
3704+
preserve_names = not hasattr(target, 'names')
37003705

37013706
if level is not None:
37023707
if method is not None:
@@ -3724,6 +3729,11 @@ def reindex(self, target, method=None, level=None, limit=None,
37243729
# hopefully?
37253730
target = MultiIndex.from_tuples(target)
37263731

3732+
if (preserve_names and target.nlevels == self.nlevels and
3733+
target.names != self.names):
3734+
target = target.copy(deep=False)
3735+
target.names = self.names
3736+
37273737
return target, indexer
37283738

37293739
@cache_readonly

pandas/core/internals.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3088,7 +3088,7 @@ def reindex_axis(self, new_index, axis, method=None, limit=None,
30883088
"""
30893089
new_index = _ensure_index(new_index)
30903090
new_index, indexer = self.axes[axis].reindex(
3091-
new_index, method=method, limit=limit, copy_if_needed=True)
3091+
new_index, method=method, limit=limit)
30923092

30933093
return self.reindex_indexer(new_index, indexer, axis=axis,
30943094
fill_value=fill_value, copy=copy)

pandas/tests/test_index.py

+54
Original file line numberDiff line numberDiff line change
@@ -1019,6 +1019,36 @@ def test_nan_first_take_datetime(self):
10191019
exp = Index([idx[-1], idx[0], idx[1]])
10201020
tm.assert_index_equal(res, exp)
10211021

1022+
def test_reindex_preserves_name_if_target_is_list_or_ndarray(self):
1023+
# GH6552
1024+
idx = pd.Index([0, 1, 2])
1025+
1026+
dt_idx = pd.date_range('20130101', periods=3)
1027+
1028+
idx.name = None
1029+
self.assertEqual(idx.reindex([])[0].name, None)
1030+
self.assertEqual(idx.reindex(np.array([]))[0].name, None)
1031+
self.assertEqual(idx.reindex(idx.tolist())[0].name, None)
1032+
self.assertEqual(idx.reindex(idx.tolist()[:-1])[0].name, None)
1033+
self.assertEqual(idx.reindex(idx.values)[0].name, None)
1034+
self.assertEqual(idx.reindex(idx.values[:-1])[0].name, None)
1035+
1036+
# Must preserve name even if dtype changes.
1037+
self.assertEqual(idx.reindex(dt_idx.values)[0].name, None)
1038+
self.assertEqual(idx.reindex(dt_idx.tolist())[0].name, None)
1039+
1040+
idx.name = 'foobar'
1041+
self.assertEqual(idx.reindex([])[0].name, 'foobar')
1042+
self.assertEqual(idx.reindex(np.array([]))[0].name, 'foobar')
1043+
self.assertEqual(idx.reindex(idx.tolist())[0].name, 'foobar')
1044+
self.assertEqual(idx.reindex(idx.tolist()[:-1])[0].name, 'foobar')
1045+
self.assertEqual(idx.reindex(idx.values)[0].name, 'foobar')
1046+
self.assertEqual(idx.reindex(idx.values[:-1])[0].name, 'foobar')
1047+
1048+
# Must preserve name even if dtype changes.
1049+
self.assertEqual(idx.reindex(dt_idx.values)[0].name, 'foobar')
1050+
self.assertEqual(idx.reindex(dt_idx.tolist())[0].name, 'foobar')
1051+
10221052

10231053
class Numeric(Base):
10241054

@@ -3267,6 +3297,30 @@ def test_isin_level_kwarg(self):
32673297

32683298
self.assertRaises(KeyError, idx.isin, vals_1, level='C')
32693299

3300+
def test_reindex_preserves_names_when_target_is_list_or_ndarray(self):
3301+
# GH6552
3302+
idx = self.index.copy()
3303+
target = idx.copy()
3304+
idx.names = target.names = [None, None]
3305+
3306+
other_dtype = pd.MultiIndex.from_product([[1, 2], [3, 4]])
3307+
3308+
# list & ndarray cases
3309+
self.assertEqual(idx.reindex([])[0].names, [None, None])
3310+
self.assertEqual(idx.reindex(np.array([]))[0].names, [None, None])
3311+
self.assertEqual(idx.reindex(target.tolist())[0].names, [None, None])
3312+
self.assertEqual(idx.reindex(target.values)[0].names, [None, None])
3313+
self.assertEqual(idx.reindex(other_dtype.tolist())[0].names, [None, None])
3314+
self.assertEqual(idx.reindex(other_dtype.values)[0].names, [None, None])
3315+
3316+
idx.names = ['foo', 'bar']
3317+
self.assertEqual(idx.reindex([])[0].names, ['foo', 'bar'])
3318+
self.assertEqual(idx.reindex(np.array([]))[0].names, ['foo', 'bar'])
3319+
self.assertEqual(idx.reindex(target.tolist())[0].names, ['foo', 'bar'])
3320+
self.assertEqual(idx.reindex(target.values)[0].names, ['foo', 'bar'])
3321+
self.assertEqual(idx.reindex(other_dtype.tolist())[0].names, ['foo', 'bar'])
3322+
self.assertEqual(idx.reindex(other_dtype.values)[0].names, ['foo', 'bar'])
3323+
32703324

32713325
def test_get_combined_index():
32723326
from pandas.core.index import _get_combined_index

pandas/tests/test_indexing.py

-3
Original file line numberDiff line numberDiff line change
@@ -3836,17 +3836,14 @@ def test_iloc_empty_list_indexer_is_ok(self):
38363836
assert_frame_equal(df.iloc[[],:], df.iloc[:0, :]) # horizontal empty
38373837
assert_frame_equal(df.iloc[[]], df.iloc[:0, :]) # horizontal empty
38383838

3839-
# FIXME: fix loc & xs
38403839
def test_loc_empty_list_indexer_is_ok(self):
3841-
raise nose.SkipTest('loc discards columns names')
38423840
from pandas.util.testing import makeCustomDataframe as mkdf
38433841
df = mkdf(5, 2)
38443842
assert_frame_equal(df.loc[:,[]], df.iloc[:, :0]) # vertical empty
38453843
assert_frame_equal(df.loc[[],:], df.iloc[:0, :]) # horizontal empty
38463844
assert_frame_equal(df.loc[[]], df.iloc[:0, :]) # horizontal empty
38473845

38483846
def test_ix_empty_list_indexer_is_ok(self):
3849-
raise nose.SkipTest('ix discards columns names')
38503847
from pandas.util.testing import makeCustomDataframe as mkdf
38513848
df = mkdf(5, 2)
38523849
assert_frame_equal(df.ix[:,[]], df.iloc[:, :0]) # vertical empty

0 commit comments

Comments
 (0)