diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 2e822729873ad..359a038d236b0 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1045,7 +1045,7 @@ Bug Fixes - Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`) - Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`, :issue:`13046`) - +- Bug in ``SparseDataFrame`` construction with lists not coercing to dtype (:issue:`15682`) - Bug in ``.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`) - Bug in ``groupby.transform()`` that would coerce the resultant dtypes back to the original (:issue:`10972`, :issue:`11444`) diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 41f301f263374..455d120cca640 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -142,7 +142,7 @@ def _init_dict(self, data, index, columns, dtype=None): sp_maker = lambda x: SparseArray(x, kind=self._default_kind, fill_value=self._default_fill_value, - copy=True) + copy=True, dtype=dtype) sdict = DataFrame() for k, v in compat.iteritems(data): if isinstance(v, Series): diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py index c0c678c184ee8..ae1a1e35f1859 100644 --- a/pandas/tests/sparse/test_frame.py +++ b/pandas/tests/sparse/test_frame.py @@ -28,7 +28,6 @@ class TestSparseDataFrame(tm.TestCase, SharedWithSparse): - klass = SparseDataFrame def setUp(self): @@ -237,6 +236,18 @@ def test_constructor_nan_dataframe(self): dtype=float) tm.assert_sp_frame_equal(result, expected) + def test_type_coercion_at_construction(self): + # GH 15682 + result = pd.SparseDataFrame( + {'a': [1, 0, 0], 'b': [0, 1, 0], 'c': [0, 0, 1]}, dtype='uint8', + default_fill_value=0) + expected = pd.SparseDataFrame( + {'a': pd.SparseSeries([1, 0, 0], dtype='uint8'), + 'b': pd.SparseSeries([0, 1, 0], dtype='uint8'), + 'c': pd.SparseSeries([0, 0, 1], dtype='uint8')}, + default_fill_value=0) + tm.assert_sp_frame_equal(result, expected) + def test_dtypes(self): df = DataFrame(np.random.randn(10000, 4)) df.loc[:9998] = np.nan @@ -756,9 +767,18 @@ def test_sparse_frame_fillna_limit(self): tm.assert_frame_equal(result, expected) def test_rename(self): - # just check this works - renamed = self.frame.rename(index=str) # noqa - renamed = self.frame.rename(columns=lambda x: '%s%d' % (x, len(x))) # noqa + result = self.frame.rename(index=str) + expected = SparseDataFrame(self.data, index=self.dates.strftime( + "%Y-%m-%d %H:%M:%S")) + tm.assert_sp_frame_equal(result, expected) + + result = self.frame.rename(columns=lambda x: '%s%d' % (x, len(x))) + data = {'A1': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], + 'B1': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], + 'C1': np.arange(10, dtype=np.float64), + 'D1': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]} + expected = SparseDataFrame(data, index=self.dates) + tm.assert_sp_frame_equal(result, expected) def test_corr(self): res = self.frame.corr() @@ -967,7 +987,6 @@ def _check(frame, orig): def test_shift(self): def _check(frame, orig): - shifted = frame.shift(0) exp = orig.shift(0) tm.assert_frame_equal(shifted.to_dense(), exp) @@ -1060,7 +1079,7 @@ def test_sparse_pow_issue(self): df = SparseDataFrame({'A': [nan, 0, 1]}) # note that 2 ** df works fine, also df ** 1 - result = 1**df + result = 1 ** df r1 = result.take([0], 1)['A'] r2 = result['A'] @@ -1126,7 +1145,7 @@ def test_isnotnull(self): tm.assert_frame_equal(res.to_dense(), exp) -@pytest.mark.parametrize('index', [None, list('ab')]) # noqa: F811 +@pytest.mark.parametrize('index', [None, list('ab')]) # noqa: F811 @pytest.mark.parametrize('columns', [None, list('cd')]) @pytest.mark.parametrize('fill_value', [None, 0, np.nan]) @pytest.mark.parametrize('dtype', [bool, int, float, np.uint16]) @@ -1180,7 +1199,7 @@ def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype): tm.assert_equal(sdf.to_coo().dtype, np.object_) -@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) # noqa: F811 +@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) # noqa: F811 def test_from_to_scipy_object(spmatrix, fill_value): # GH 4343 dtype = object @@ -1255,7 +1274,6 @@ def test_comparison_op_scalar(self): class TestSparseDataFrameAnalytics(tm.TestCase): - def setUp(self): self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],