diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 64e6bc0ab307c..7ed98bd3170c0 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -752,6 +752,7 @@ Bug Fixes - Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing result may have normal ``Index`` (:issue:`13144`) - Bug in ``SparseDataFrame`` in which ``axis=None`` did not default to ``axis=0`` (:issue:`13048`) - Bug in ``SparseSeries`` and ``SparseDataFrame`` creation with ``object`` dtype may raise ``TypeError`` (:issue:`11633`) +- Bug in ``SparseDataFrame`` doesn't respect passed ``SparseArray`` or ``SparseSeries`` 's dtype and ``fill_value`` (:issue:`13866`) - Bug when passing a not-default-indexed ``Series`` as ``xerr`` or ``yerr`` in ``.plot()`` (:issue:`11858`) - Bug in matplotlib ``AutoDataFormatter``; this restores the second scaled formatting and re-adds micro-second scaled formatting (:issue:`13131`) - Bug in selection from a ``HDFStore`` with a fixed format and ``start`` and/or ``stop`` specified will now return the selected range (:issue:`8287`) diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index 35233d1b6ba94..a0dbb35bffe92 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -9,7 +9,6 @@ import pandas as pd from pandas.core.base import PandasObject -import pandas.core.common as com from pandas import compat, lib from pandas.compat import range @@ -577,11 +576,9 @@ def _maybe_to_dense(obj): def _maybe_to_sparse(array): + """ array must be SparseSeries or SparseArray """ if isinstance(array, ABCSparseSeries): - array = SparseArray(array.values, sparse_index=array.sp_index, - fill_value=array.fill_value, copy=True) - if not isinstance(array, SparseArray): - array = com._values_from_object(array) + array = array.values.copy() return array diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 2ea0536ca4fbb..985899e6c6b79 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -151,7 +151,7 @@ def _init_dict(self, data, index, columns, dtype=None): if not isinstance(v, SparseSeries): v = sp_maker(v.values) elif isinstance(v, SparseArray): - v = sp_maker(v.values) + v = v.copy() else: if isinstance(v, dict): v = [v.get(i, nan) for i in index] diff --git a/pandas/sparse/tests/test_frame.py b/pandas/sparse/tests/test_frame.py index 43d35a4e7f72e..9514f9322f68e 100644 --- a/pandas/sparse/tests/test_frame.py +++ b/pandas/sparse/tests/test_frame.py @@ -192,6 +192,28 @@ def test_constructor_from_series(self): # without sparse value raises error # df2 = SparseDataFrame([x2_sparse, y]) + def test_constructor_preserve_attr(self): + # GH 13866 + arr = pd.SparseArray([1, 0, 3, 0], dtype=np.int64, fill_value=0) + self.assertEqual(arr.dtype, np.int64) + self.assertEqual(arr.fill_value, 0) + + df = pd.SparseDataFrame({'x': arr}) + self.assertEqual(df['x'].dtype, np.int64) + self.assertEqual(df['x'].fill_value, 0) + + s = pd.SparseSeries(arr, name='x') + self.assertEqual(s.dtype, np.int64) + self.assertEqual(s.fill_value, 0) + + df = pd.SparseDataFrame(s) + self.assertEqual(df['x'].dtype, np.int64) + self.assertEqual(df['x'].fill_value, 0) + + df = pd.SparseDataFrame({'x': s}) + self.assertEqual(df['x'].dtype, np.int64) + self.assertEqual(df['x'].fill_value, 0) + def test_dtypes(self): df = DataFrame(np.random.randn(10000, 4)) df.ix[:9998] = np.nan diff --git a/pandas/sparse/tests/test_series.py b/pandas/sparse/tests/test_series.py index 27112319ea915..f9ac7d9d34072 100644 --- a/pandas/sparse/tests/test_series.py +++ b/pandas/sparse/tests/test_series.py @@ -136,6 +136,15 @@ def test_construct_DataFrame_with_sp_series(self): result = df.ftypes tm.assert_series_equal(expected, result) + def test_constructor_preserve_attr(self): + arr = pd.SparseArray([1, 0, 3, 0], dtype=np.int64, fill_value=0) + self.assertEqual(arr.dtype, np.int64) + self.assertEqual(arr.fill_value, 0) + + s = pd.SparseSeries(arr, name='x') + self.assertEqual(s.dtype, np.int64) + self.assertEqual(s.fill_value, 0) + def test_series_density(self): # GH2803 ts = Series(np.random.randn(10)) diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py index dabecefaee9d1..be7a0eccf6b7c 100644 --- a/pandas/tests/series/test_subclass.py +++ b/pandas/tests/series/test_subclass.py @@ -1,6 +1,8 @@ # coding=utf-8 # pylint: disable-msg=E1101,W0612 +import numpy as np +import pandas as pd import pandas.util.testing as tm @@ -32,6 +34,11 @@ def test_to_frame(self): tm.assert_frame_equal(res, exp) tm.assertIsInstance(res, tm.SubclassedDataFrame) + +class TestSparseSeriesSubclassing(tm.TestCase): + + _multiprocess_can_split_ = True + def test_subclass_sparse_slice(self): s = tm.SubclassedSparseSeries([1, 2, 3, 4, 5]) tm.assert_sp_series_equal(s.loc[1:3], @@ -53,5 +60,15 @@ def test_subclass_sparse_addition(self): def test_subclass_sparse_to_frame(self): s = tm.SubclassedSparseSeries([1, 2], index=list('abcd'), name='xxx') res = s.to_frame() - exp = tm.SubclassedSparseDataFrame({'xxx': [1, 2]}, index=list('abcd')) + + exp_arr = pd.SparseArray([1, 2], dtype=np.int64, kind='block') + exp = tm.SubclassedSparseDataFrame({'xxx': exp_arr}, + index=list('abcd')) + tm.assert_sp_frame_equal(res, exp) + + s = tm.SubclassedSparseSeries([1.1, 2.1], index=list('abcd'), + name='xxx') + res = s.to_frame() + exp = tm.SubclassedSparseDataFrame({'xxx': [1.1, 2.1]}, + index=list('abcd')) tm.assert_sp_frame_equal(res, exp)