diff --git a/doc/source/release.rst b/doc/source/release.rst index c0d4c0c73296f..5e0593a2beec4 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -296,6 +296,7 @@ Bug Fixes - Bug in consistency of groupby aggregation when passing a custom function (:issue:`6715`) - Bug in resample when ``how=None`` resample freq is the same as the axis frequency (:issue:`5955`) - Bug in downcasting inference with empty arrays (:issue:`6733`) +- Bug in ``obj.blocks`` on sparse containers dropping all but the last items of same for dtype (:issue:`6748`) pandas 0.13.1 ------------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fc7883f789703..38f4ba0a25d07 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2003,7 +2003,7 @@ def ftypes(self): return Series(self._data.get_ftypes(), index=self._info_axis, dtype=np.object_) - def as_blocks(self, columns=None): + def as_blocks(self): """ Convert the frame to a dict of dtype -> Constructor Types that each has a homogeneous dtype. @@ -2025,12 +2025,18 @@ def as_blocks(self, columns=None): """ self._consolidate_inplace() - bd = dict() + bd = {} for b in self._data.blocks: - b = b.reindex_items_from(columns or b.items) - bd[str(b.dtype)] = self._constructor( - BlockManager([b], [b.items, self.index])).__finalize__(self) - return bd + bd.setdefault(str(b.dtype), []).append(b) + + result = {} + for dtype, blocks in bd.items(): + # Must combine even after consolidation, because there may be + # sparse items which are never consolidated into one block. + combined = self._data.combine(blocks, copy=True) + result[dtype] = self._constructor(combined).__finalize__(self) + + return result @property def blocks(self): diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py index 030fe5fb821c4..7696353dca6f1 100644 --- a/pandas/sparse/tests/test_sparse.py +++ b/pandas/sparse/tests/test_sparse.py @@ -1515,6 +1515,14 @@ def test_sparse_pow_issue(self): self.assertEqual(len(r2.sp_values), len(r1.sp_values)) + def test_as_blocks(self): + df = SparseDataFrame({'A': [1.1, 3.3], 'B': [nan, -3.9]}, + dtype='float64') + + df_blocks = df.blocks + self.assertEqual(list(df_blocks.keys()), ['float64']) + assert_frame_equal(df_blocks['float64'], df) + def _dense_series_compare(s, f): result = f(s)