Skip to content

Commit 5d6e7c8

Browse files
committed
ENH/BUG: setting multiple columns via hierarchical idnexing. close #2295
1 parent 9202540 commit 5d6e7c8

File tree

7 files changed

+46
-21
lines changed

7 files changed

+46
-21
lines changed

RELEASE.rst

+2
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ pandas 0.10.0
5757
- Fix regression in Index console formatting (#2319)
5858
- Box Period data when assigning PeriodIndex to frame column (#2243, #2281)
5959
- Raise exception on calling reset_index on Series with inplace=True (#2277)
60+
- Enable setting multiple columns in DataFrame with hierarchical columns
61+
(#2295)
6062

6163
pandas 0.9.1
6264
============

pandas/core/frame.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -2008,12 +2008,16 @@ def _sanitize_column(self, key, value):
20082008
# Need to make sure new columns (which go into the BlockManager as new
20092009
# blocks) are always copied
20102010
if _is_sequence(value):
2011-
if isinstance(value, Series):
2011+
is_frame = isinstance(value, DataFrame)
2012+
if isinstance(value, Series) or is_frame:
20122013
if value.index.equals(self.index):
20132014
# copy the values
20142015
value = value.values.copy()
20152016
else:
20162017
value = value.reindex(self.index).values
2018+
2019+
if is_frame:
2020+
value = value.T
20172021
else:
20182022
if len(value) != len(self.index):
20192023
raise AssertionError('Length of values does not match '
@@ -2023,6 +2027,8 @@ def _sanitize_column(self, key, value):
20232027
value = com._asarray_tuplesafe(value)
20242028
elif isinstance(value, PeriodIndex):
20252029
value = value.asobject
2030+
elif value.ndim == 2:
2031+
value = value.copy().T
20262032
else:
20272033
value = value.copy()
20282034
else:

pandas/core/generic.py

+1-12
Original file line numberDiff line numberDiff line change
@@ -542,19 +542,8 @@ def _clear_item_cache(self):
542542
self._item_cache.clear()
543543

544544
def _set_item(self, key, value):
545-
if hasattr(self, 'columns') and isinstance(self.columns, MultiIndex):
546-
# Pad the key with empty strings if lower levels of the key
547-
# aren't specified:
548-
if not isinstance(key, tuple):
549-
key = (key,)
550-
if len(key) != self.columns.nlevels:
551-
key += ('',) * (self.columns.nlevels - len(key))
552545
self._data.set(key, value)
553-
554-
try:
555-
del self._item_cache[key]
556-
except KeyError:
557-
pass
546+
self._clear_item_cache()
558547

559548
def __delitem__(self, key):
560549
"""

pandas/core/index.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -2369,9 +2369,12 @@ def insert(self, loc, item):
23692369
-------
23702370
new_index : Index
23712371
"""
2372-
if not isinstance(item, tuple) or len(item) != self.nlevels:
2373-
raise Exception("%s cannot be inserted in this MultiIndex"
2374-
% str(item))
2372+
# Pad the key with empty strings if lower levels of the key
2373+
# aren't specified:
2374+
if not isinstance(item, tuple):
2375+
item = (item,) + ('',) * (self.nlevels - 1)
2376+
elif len(item) != self.nlevels:
2377+
raise ValueError('Passed item incompatible tuple length')
23752378

23762379
new_levels = []
23772380
new_labels = []

pandas/core/internals.py

+16-4
Original file line numberDiff line numberDiff line change
@@ -921,15 +921,27 @@ def set(self, item, value):
921921
if value.shape[1:] != self.shape[1:]:
922922
raise AssertionError('Shape of new values must be compatible '
923923
'with manager shape')
924-
if item in self.items:
924+
925+
def _set_item(item, arr):
925926
i, block = self._find_block(item)
926927
if not block.should_store(value):
927928
# delete from block, create and append new block
928929
self._delete_from_block(i, item)
929-
self._add_new_block(item, value, loc=None)
930+
self._add_new_block(item, arr, loc=None)
930931
else:
931-
block.set(item, value)
932-
else:
932+
block.set(item, arr)
933+
934+
try:
935+
loc = self.items.get_loc(item)
936+
if isinstance(loc, int):
937+
_set_item(self.items[loc], value)
938+
else:
939+
subset = self.items[loc]
940+
if len(value) != len(subset):
941+
raise AssertionError('Number of items to set did not match')
942+
for i, (item, arr) in enumerate(zip(subset, value)):
943+
_set_item(item, arr[None, :])
944+
except KeyError:
933945
# insert at end
934946
self.insert(len(self.items), item, value)
935947

pandas/tests/test_index.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -849,7 +849,7 @@ def test_int_name_format(self):
849849

850850
def test_print_unicode_columns(self):
851851
df=pd.DataFrame({u"\u05d0":[1,2,3],"\u05d1":[4,5,6],"c":[7,8,9]})
852-
print(df.columns) # should not raise UnicodeDecodeError
852+
repr(df.columns) # should not raise UnicodeDecodeError
853853

854854
def test_repr_summary(self):
855855
r = repr(pd.Index(np.arange(10000)))

pandas/tests/test_multilevel.py

+13
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,19 @@ def test_frame_getitem_setitem_multislice(self):
274274
df.ix[:,:] = 10
275275
assert_frame_equal(df, result)
276276

277+
def test_frame_setitem_multi_column(self):
278+
df = DataFrame(randn(10, 4), columns=[['a', 'a', 'b', 'b'],
279+
[0, 1, 0, 1]])
280+
281+
cp = df.copy()
282+
cp['a'] = cp['b']
283+
assert_frame_equal(cp['a'], cp['b'])
284+
285+
# set with ndarray
286+
cp = df.copy()
287+
cp['a'] = cp['b'].values
288+
assert_frame_equal(cp['a'], cp['b'])
289+
277290
def test_getitem_tuple_plus_slice(self):
278291
# GH #671
279292
df = DataFrame({'a' : range(10),

0 commit comments

Comments
 (0)