pandas-dev · jreback · Apr 25, 2014 · Mar 12, 2014 · Apr 16, 2014 · jreback
diff --git a/pandas/core/format.py b/pandas/core/format.py
@@ -1024,9 +1024,8 @@ def __init__(self, obj, path_or_buf=None, sep=",", na_rep='', float_format=None,
 
         # preallocate data 2d list
         self.blocks = self.obj._data.blocks
-        ncols = sum(len(b.items) for b in self.blocks)
+        ncols = sum(b.shape[0] for b in self.blocks)
         self.data = [None] * ncols
-        self.column_map = self.obj._data.get_items_map(use_cached=False)
 
         if chunksize is None:
             chunksize = (100000 / (len(self.cols) or 1)) or 1
@@ -1293,10 +1292,9 @@ def _save_chunk(self, start_i, end_i):
                                   float_format=self.float_format,
                                   date_format=self.date_format)
 
-            for i, item in enumerate(b.items):
-
+            for col_loc, col in zip(b.mgr_locs, d):
                 # self.data is a preallocated list
-                self.data[self.column_map[b][i]] = d[i]
+                self.data[col_loc] = col
 
         ix = data_index.to_native_types(slicer=slicer, na_rep=self.na_rep,
                                         float_format=self.float_format,

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1043,9 +1043,11 @@ def to_panel(self):
 
         new_blocks = []
         for block in selfsorted._data.blocks:
-            newb = block2d_to_blocknd(block.values.T, block.items, shape,
-                                      [major_labels, minor_labels],
-                                      ref_items=selfsorted.columns)
+            newb = block2d_to_blocknd(
+                values=block.values.T,
+                placement=block.mgr_locs, shape=shape,
+                labels=[major_labels, minor_labels],
+                ref_items=selfsorted.columns)
             new_blocks.append(newb)
 
         # preserve names, if any
@@ -1934,7 +1936,9 @@ def _ensure_valid_index(self, value):
                     raise ValueError('Cannot set a frame with no defined index '
                                      'and a value that cannot be converted to a '
                                      'Series')
-                self._data.set_axis(1, value.index.copy(), check_axis=False)
+
+                self._data = self._data.reindex_axis(value.index.copy(), axis=1,
+                                                     fill_value=np.nan)
 
             # we are a scalar
             # noop
@@ -2039,7 +2043,11 @@ def _sanitize_column(self, key, value):
 
     @property
     def _series(self):
-        return self._data.get_series_dict()
+        result = {}
+        for idx, item in enumerate(self.columns):
+            result[item] = Series(self._data.iget(idx), index=self.index,
+                                  name=item)
+        return result
 
     def lookup(self, row_labels, col_labels):
         """Label-based "fancy indexing" function for DataFrame.
@@ -2629,16 +2637,14 @@ def trans(v):
             indexer = _nargsort(labels, kind=kind, ascending=ascending,
                                 na_position=na_position)
 
+        bm_axis = self._get_block_manager_axis(axis)
+        new_data = self._data.take(indexer, axis=bm_axis,
+                                   convert=False, verify=False)
+
         if inplace:
-            if axis == 1:
-                new_data = self._data.reindex_items(
-                    self._data.items[indexer],
-                    copy=False)
-            elif axis == 0:
-                new_data = self._data.take(indexer)
-            self._update_inplace(new_data)
+            return self._update_inplace(new_data)
         else:
-            return self.take(indexer, axis=axis, convert=False, is_copy=False)
+            return self._constructor(new_data).__finalize__(self)
 
     def sortlevel(self, level=0, axis=0, ascending=True, inplace=False):
         """
@@ -2673,16 +2679,13 @@ def sortlevel(self, level=0, axis=0, ascending=True, inplace=False):
             else:
                 return self.take(indexer, axis=axis, convert=False)
 
+        bm_axis = self._get_block_manager_axis(axis)
+        new_data = self._data.take(indexer, axis=bm_axis,
+                                   convert=False, verify=False)
         if inplace:
-            if axis == 1:
-                new_data = self._data.reindex_items(
-                    self._data.items[indexer],
-                    copy=False)
-            elif axis == 0:
-                new_data = self._data.take(indexer)
-            self._update_inplace(new_data)
+            return self._update_inplace(new_data)
         else:
-            return self.take(indexer, axis=axis, convert=False, is_copy=False)
+            return self._constructor(new_data).__finalize__(self)
 
     def swaplevel(self, i, j, axis=0):
         """

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -565,7 +565,7 @@ def f(x):
             f = _get_rename_function(v)
 
             baxis = self._get_block_manager_axis(axis)
-            result._data = result._data.rename(f, axis=baxis, copy=copy)
+            result._data = result._data.rename_axis(f, axis=baxis, copy=copy)
             result._clear_item_cache()
 
         if inplace:
@@ -1217,21 +1217,9 @@ def take(self, indices, axis=0, convert=True, is_copy=True):
         taken : type of caller
         """
 
-        # check/convert indicies here
-        if convert:
-            axis = self._get_axis_number(axis)
-            indices = _maybe_convert_indices(
-                indices, len(self._get_axis(axis)))
-
-        baxis = self._get_block_manager_axis(axis)
-        if baxis == 0:
-            labels = self._get_axis(axis)
-            new_items = labels.take(indices)
-            new_data = self._data.reindex_axis(new_items, indexer=indices,
-                                               axis=baxis)
-        else:
-            new_data = self._data.take(indices, axis=baxis)
-
+        new_data = self._data.take(indices,
+                                   axis=self._get_block_manager_axis(axis),
+                                   convert=True, verify=True)
         result = self._constructor(new_data).__finalize__(self)
 
         # maybe set copy if we didn't actually change the index
@@ -1701,7 +1689,7 @@ def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True,
             labels, method, level, limit=limit, copy_if_needed=True)
         return self._reindex_with_indexers(
             {axis: [new_index, indexer]}, method=method, fill_value=fill_value,
-            limit=limit, copy=copy).__finalize__(self)
+            limit=limit, copy=copy)
 
     def _reindex_with_indexers(self, reindexers, method=None,
                                fill_value=np.nan, limit=None, copy=False,
@@ -1716,30 +1704,16 @@ def _reindex_with_indexers(self, reindexers, method=None,
 
             if index is None:
                 continue
-            index = _ensure_index(index)
 
-            # reindex the axis
-            if method is not None:
-                new_data = new_data.reindex_axis(
-                    index, indexer=indexer, method=method, axis=baxis,
-                    fill_value=fill_value, limit=limit, copy=copy)
-
-            elif indexer is not None:
-                # TODO: speed up on homogeneous DataFrame objects
+            index = _ensure_index(index)
+            if indexer is not None:
                 indexer = com._ensure_int64(indexer)
-                new_data = new_data.reindex_indexer(index, indexer, axis=baxis,
-                                                    fill_value=fill_value,
-                                                    allow_dups=allow_dups)
-
-            elif (baxis == 0 and index is not None and
-                    index is not new_data.axes[baxis]):
-                new_data = new_data.reindex_items(index, copy=copy,
-                                                  fill_value=fill_value)
-
-            elif (baxis > 0 and index is not None and
-                    index is not new_data.axes[baxis]):
-                new_data = new_data.copy(deep=copy)
-                new_data.set_axis(baxis, index)
+
+            # TODO: speed up on homogeneous DataFrame objects
+            new_data = new_data.reindex_indexer(index, indexer, axis=baxis,
+                                                fill_value=fill_value,
+                                                allow_dups=allow_dups,
+                                                copy=copy)
 
         if copy and new_data is self._data:
             new_data = new_data.copy()

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -2196,10 +2196,10 @@ def _iterate_slices(self):
             yield val, slicer(val)
 
     def _cython_agg_general(self, how, numeric_only=True):
-        new_blocks = self._cython_agg_blocks(how, numeric_only=numeric_only)
-        return self._wrap_agged_blocks(new_blocks)
+        new_items, new_blocks = self._cython_agg_blocks(how, numeric_only=numeric_only)
+        return self._wrap_agged_blocks(new_items, new_blocks)
 
-    def _wrap_agged_blocks(self, blocks):
+    def _wrap_agged_blocks(self, items, blocks):
         obj = self._obj_with_exclusions
 
         new_axes = list(obj._data.axes)
@@ -2210,6 +2210,10 @@ def _wrap_agged_blocks(self, blocks):
         else:
             new_axes[self.axis] = self.grouper.result_index
 
+        # Make sure block manager integrity check passes.
+        assert new_axes[0].equals(items)
+        new_axes[0] = items
+
         mgr = BlockManager(blocks, new_axes)
 
         new_obj = type(obj)(mgr)
@@ -2223,14 +2227,14 @@ def _cython_agg_blocks(self, how, numeric_only=True):
 
         new_blocks = []
 
+        if numeric_only:
+            data = data.get_numeric_data(copy=False)
+
         for block in data.blocks:
             values = block.values
 
             is_numeric = is_numeric_dtype(values.dtype)
 
-            if numeric_only and not is_numeric:
-                continue
-
             if is_numeric:
                 values = com.ensure_float(values)
 
@@ -2239,13 +2243,13 @@ def _cython_agg_blocks(self, how, numeric_only=True):
             # see if we can cast the block back to the original dtype
             result = block._try_cast_result(result)
 
-            newb = make_block(result, block.items, block.ref_items)
+            newb = make_block(result, placement=block.mgr_locs)
             new_blocks.append(newb)
 
         if len(new_blocks) == 0:
             raise DataError('No numeric types to aggregate')
 
-        return new_blocks
+        return data.items, new_blocks
 
     def _get_data_to_aggregate(self):
         obj = self._obj_with_exclusions
@@ -2837,28 +2841,10 @@ def _wrap_aggregated_output(self, output, names=None):
 
         return result.convert_objects()
 
-    def _wrap_agged_blocks(self, blocks):
-        obj = self._obj_with_exclusions
-
-        if self.axis == 0:
-            agg_labels = obj.columns
-        else:
-            agg_labels = obj.index
-
-        if sum(len(x.items) for x in blocks) == len(agg_labels):
-            output_keys = agg_labels
-        else:
-            all_items = []
-            for b in blocks:
-                all_items.extend(b.items)
-            output_keys = agg_labels[agg_labels.isin(all_items)]
-
-            for blk in blocks:
-                blk.set_ref_items(output_keys, maybe_rename=False)
-
+    def _wrap_agged_blocks(self, items, blocks):
         if not self.as_index:
             index = np.arange(blocks[0].values.shape[1])
-            mgr = BlockManager(blocks, [output_keys, index])
+            mgr = BlockManager(blocks, [items, index])
             result = DataFrame(mgr)
 
             group_levels = self.grouper.get_group_levels()
@@ -2869,7 +2855,7 @@ def _wrap_agged_blocks(self, blocks):
             result = result.consolidate()
         else:
             index = self.grouper.result_index
-            mgr = BlockManager(blocks, [output_keys, index])
+            mgr = BlockManager(blocks, [items, index])
             result = DataFrame(mgr)
 
         if self.axis == 1: