Skip to content

Commit 503359c

Browse files
committed
Merge pull request #11400 from rekcahpassyla/multiindex_setitem
BUG: using .ix with a multi-index indexer
2 parents 1135ce3 + 3d84a72 commit 503359c

File tree

3 files changed

+112
-26
lines changed

3 files changed

+112
-26
lines changed

doc/source/whatsnew/v0.17.1.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ Bug Fixes
7171
- Bug in ``HDFStore.append`` with strings whose encoded length exceded the max unencoded length (:issue:`11234`)
7272
- Bug in merging ``datetime64[ns, tz]`` dtypes (:issue:`11405`)
7373
- Bug in ``HDFStore.select`` when comparing with a numpy scalar in a where clause (:issue:`11283`)
74-
74+
- Bug in using ``DataFrame.ix`` with a multi-index indexer(:issue:`11372`)
7575

7676

7777
- Bug in tz-conversions with an ambiguous time and ``.dt`` accessors (:issue:`11295`)

pandas/core/indexing.py

+27-4
Original file line numberDiff line numberDiff line change
@@ -443,11 +443,14 @@ def can_do_equal_len():
443443
# we have an equal len Frame
444444
if isinstance(value, ABCDataFrame) and value.ndim > 1:
445445
sub_indexer = list(indexer)
446+
multiindex_indexer = isinstance(labels, MultiIndex)
446447

447448
for item in labels:
448449
if item in value:
449450
sub_indexer[info_axis] = item
450-
v = self._align_series(tuple(sub_indexer), value[item])
451+
v = self._align_series(
452+
tuple(sub_indexer), value[item], multiindex_indexer
453+
)
451454
else:
452455
v = np.nan
453456

@@ -516,8 +519,28 @@ def can_do_equal_len():
516519
self.obj._data = self.obj._data.setitem(indexer=indexer, value=value)
517520
self.obj._maybe_update_cacher(clear=True)
518521

519-
def _align_series(self, indexer, ser):
520-
# indexer to assign Series can be tuple, slice, scalar
522+
def _align_series(self, indexer, ser, multiindex_indexer=False):
523+
"""
524+
Parameters
525+
----------
526+
indexer : tuple, slice, scalar
527+
The indexer used to get the locations that will be set to
528+
`ser`
529+
530+
ser : pd.Series
531+
The values to assign to the locations specified by `indexer`
532+
533+
multiindex_indexer : boolean, optional
534+
Defaults to False. Should be set to True if `indexer` was from
535+
a `pd.MultiIndex`, to avoid unnecessary broadcasting.
536+
537+
538+
Returns:
539+
--------
540+
`np.array` of `ser` broadcast to the appropriate shape for assignment
541+
to the locations selected by `indexer`
542+
543+
"""
521544
if isinstance(indexer, (slice, np.ndarray, list, Index)):
522545
indexer = tuple([indexer])
523546

@@ -555,7 +578,7 @@ def _align_series(self, indexer, ser):
555578
ser = ser.reindex(obj.axes[0][indexer[0]], copy=True)._values
556579

557580
# single indexer
558-
if len(indexer) > 1:
581+
if len(indexer) > 1 and not multiindex_indexer:
559582
l = len(indexer[1])
560583
ser = np.tile(ser, l).reshape(l, -1).T
561584

pandas/tests/test_indexing.py

+84-21
Original file line numberDiff line numberDiff line change
@@ -762,32 +762,95 @@ def compare(result, expected):
762762
result2 = s.loc[0:3]
763763
assert_series_equal(result1,result2)
764764

765-
def test_loc_setitem_multiindex(self):
765+
def test_setitem_multiindex(self):
766+
for index_fn in ('ix', 'loc'):
767+
def check(target, indexers, value, compare_fn, expected=None):
768+
fn = getattr(target, index_fn)
769+
fn.__setitem__(indexers, value)
770+
result = fn.__getitem__(indexers)
771+
if expected is None:
772+
expected = value
773+
compare_fn(result, expected)
774+
# GH7190
775+
index = pd.MultiIndex.from_product([np.arange(0,100), np.arange(0, 80)], names=['time', 'firm'])
776+
t, n = 0, 2
777+
df = DataFrame(np.nan,columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], index=index)
778+
check(
779+
target=df, indexers=((t,n), 'X'),
780+
value=0, compare_fn=self.assertEqual
781+
)
766782

767-
# GH7190
768-
index = pd.MultiIndex.from_product([np.arange(0,100), np.arange(0, 80)], names=['time', 'firm'])
769-
t, n = 0, 2
783+
df = DataFrame(-999,columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], index=index)
784+
check(
785+
target=df, indexers=((t,n), 'X'),
786+
value=1, compare_fn=self.assertEqual
787+
)
770788

771-
df = DataFrame(np.nan,columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], index=index)
772-
df.loc[(t,n),'X'] = 0
773-
result = df.loc[(t,n),'X']
774-
self.assertEqual(result, 0)
789+
df = DataFrame(columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], index=index)
790+
check(
791+
target=df, indexers=((t,n), 'X'),
792+
value=2, compare_fn=self.assertEqual
793+
)
775794

776-
df = DataFrame(-999,columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], index=index)
777-
df.loc[(t,n),'X'] = 1
778-
result = df.loc[(t,n),'X']
779-
self.assertEqual(result, 1)
795+
# GH 7218, assinging with 0-dim arrays
796+
df = DataFrame(-999,columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], index=index)
797+
check(
798+
target=df, indexers=((t,n), 'X'),
799+
value=np.array(3), compare_fn=self.assertEqual,
800+
expected=3,
801+
)
780802

781-
df = DataFrame(columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], index=index)
782-
df.loc[(t,n),'X'] = 2
783-
result = df.loc[(t,n),'X']
784-
self.assertEqual(result, 2)
803+
# GH5206
804+
df = pd.DataFrame(
805+
np.arange(25).reshape(5, 5), columns='A,B,C,D,E'.split(','),
806+
dtype=float
807+
)
808+
df['F'] = 99
809+
row_selection = df['A'] % 2 == 0
810+
col_selection = ['B', 'C']
811+
df.ix[row_selection, col_selection] = df['F']
812+
output = pd.DataFrame(99., index=[0, 2, 4], columns=['B', 'C'])
813+
assert_frame_equal(df.ix[row_selection, col_selection], output)
814+
check(
815+
target=df, indexers=(row_selection, col_selection),
816+
value=df['F'], compare_fn=assert_frame_equal,
817+
expected=output,
818+
)
785819

786-
# GH 7218, assinging with 0-dim arrays
787-
df = DataFrame(-999,columns=['A', 'w', 'l', 'a', 'x', 'X', 'd', 'profit'], index=index)
788-
df.loc[(t,n), 'X'] = np.array(3)
789-
result = df.loc[(t,n),'X']
790-
self.assertEqual(result,3)
820+
# GH11372
821+
idx = pd.MultiIndex.from_product([
822+
['A', 'B', 'C'],
823+
pd.date_range('2015-01-01', '2015-04-01', freq='MS')
824+
])
825+
cols = pd.MultiIndex.from_product([
826+
['foo', 'bar'],
827+
pd.date_range('2016-01-01', '2016-02-01', freq='MS')
828+
])
829+
df = pd.DataFrame(np.random.random((12, 4)), index=idx, columns=cols)
830+
subidx = pd.MultiIndex.from_tuples(
831+
[('A', pd.Timestamp('2015-01-01')), ('A', pd.Timestamp('2015-02-01'))]
832+
)
833+
subcols = pd.MultiIndex.from_tuples(
834+
[('foo', pd.Timestamp('2016-01-01')), ('foo', pd.Timestamp('2016-02-01'))]
835+
)
836+
vals = pd.DataFrame(np.random.random((2, 2)), index=subidx, columns=subcols)
837+
check(
838+
target=df, indexers=(subidx, subcols),
839+
value=vals, compare_fn=assert_frame_equal,
840+
)
841+
# set all columns
842+
vals = pd.DataFrame(np.random.random((2, 4)), index=subidx, columns=cols)
843+
check(
844+
target=df, indexers=(subidx, slice(None, None, None)),
845+
value=vals, compare_fn=assert_frame_equal,
846+
)
847+
# identity
848+
copy = df.copy()
849+
check(
850+
target=df, indexers=(df.index, df.columns),
851+
value=df, compare_fn=assert_frame_equal,
852+
expected=copy
853+
)
791854

792855
def test_indexing_with_datetime_tz(self):
793856

0 commit comments

Comments
 (0)