BUG: SparseDataFrame construction with lists not coercing to dtype (GH 15682)

Carlos Souza · jreback · commit 1e0fbd2b8698 · 2017-03-30T08:03:49.000-04:00
closes #15682 Author: Carlos Souza <carlos@udacity.com> Closes #15834 from ucals/bug-fix-15682 and squashes the following commits: 04fba8d [Carlos Souza] Adding test_rename test cases (were missing) 483bb2c [Carlos Souza] Doing adjustments as per @jreback requests cc4c15b [Carlos Souza] Fixing coersion bug at SparseDataFrame construction faa5c5c [Carlos Souza] Merge remote-tracking branch 'upstream/master' 43456a5 [Carlos Souza] Merge remote-tracking branch 'upstream/master' 8b463cb [Carlos Souza] Merge remote-tracking branch 'upstream/master' 9fc617b [Carlos Souza] Merge remote-tracking branch 'upstream/master' e12bca7 [Carlos Souza] Sync fork 676a4e5 [Carlos Souza] Test
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
@@ -1046,7 +1046,7 @@ Bug Fixes
 - Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`)
 - Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`, :issue:`13046`)
 
-
+- Bug in ``SparseDataFrame`` construction with lists not coercing to dtype (:issue:`15682`)
 
 - Bug in ``.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`)
 - Bug in ``groupby.transform()`` that would coerce the resultant dtypes back to the original (:issue:`10972`, :issue:`11444`)
diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py
@@ -142,7 +142,7 @@ def _init_dict(self, data, index, columns, dtype=None):
 
         sp_maker = lambda x: SparseArray(x, kind=self._default_kind,
                                          fill_value=self._default_fill_value,
-                                         copy=True)
+                                         copy=True, dtype=dtype)
         sdict = DataFrame()
         for k, v in compat.iteritems(data):
             if isinstance(v, Series):
diff --git a/pandas/tests/sparse/test_frame.py b/pandas/tests/sparse/test_frame.py
@@ -28,7 +28,6 @@
 
 
 class TestSparseDataFrame(tm.TestCase, SharedWithSparse):
-
     klass = SparseDataFrame
 
     def setUp(self):
@@ -237,6 +236,18 @@ def test_constructor_nan_dataframe(self):
                                       dtype=float)
         tm.assert_sp_frame_equal(result, expected)
 
+    def test_type_coercion_at_construction(self):
+        # GH 15682
+        result = pd.SparseDataFrame(
+            {'a': [1, 0, 0], 'b': [0, 1, 0], 'c': [0, 0, 1]}, dtype='uint8',
+            default_fill_value=0)
+        expected = pd.SparseDataFrame(
+            {'a': pd.SparseSeries([1, 0, 0], dtype='uint8'),
+             'b': pd.SparseSeries([0, 1, 0], dtype='uint8'),
+             'c': pd.SparseSeries([0, 0, 1], dtype='uint8')},
+            default_fill_value=0)
+        tm.assert_sp_frame_equal(result, expected)
+
     def test_dtypes(self):
         df = DataFrame(np.random.randn(10000, 4))
         df.loc[:9998] = np.nan
@@ -756,9 +767,18 @@ def test_sparse_frame_fillna_limit(self):
         tm.assert_frame_equal(result, expected)
 
     def test_rename(self):
-        # just check this works
-        renamed = self.frame.rename(index=str)  # noqa
-        renamed = self.frame.rename(columns=lambda x: '%s%d' % (x, len(x)))  # noqa
+        result = self.frame.rename(index=str)
+        expected = SparseDataFrame(self.data, index=self.dates.strftime(
+            "%Y-%m-%d %H:%M:%S"))
+        tm.assert_sp_frame_equal(result, expected)
+
+        result = self.frame.rename(columns=lambda x: '%s%d' % (x, len(x)))
+        data = {'A1': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
+                'B1': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
+                'C1': np.arange(10, dtype=np.float64),
+                'D1': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}
+        expected = SparseDataFrame(data, index=self.dates)
+        tm.assert_sp_frame_equal(result, expected)
 
     def test_corr(self):
         res = self.frame.corr()
@@ -967,7 +987,6 @@ def _check(frame, orig):
     def test_shift(self):
 
         def _check(frame, orig):
-
             shifted = frame.shift(0)
             exp = orig.shift(0)
             tm.assert_frame_equal(shifted.to_dense(), exp)
@@ -1060,7 +1079,7 @@ def test_sparse_pow_issue(self):
         df = SparseDataFrame({'A': [nan, 0, 1]})
 
         # note that 2 ** df works fine, also df ** 1
-        result = 1**df
+        result = 1 ** df
 
         r1 = result.take([0], 1)['A']
         r2 = result['A']
@@ -1126,7 +1145,7 @@ def test_isnotnull(self):
         tm.assert_frame_equal(res.to_dense(), exp)
 
 
-@pytest.mark.parametrize('index', [None, list('ab')])    # noqa: F811
+@pytest.mark.parametrize('index', [None, list('ab')])  # noqa: F811
 @pytest.mark.parametrize('columns', [None, list('cd')])
 @pytest.mark.parametrize('fill_value', [None, 0, np.nan])
 @pytest.mark.parametrize('dtype', [bool, int, float, np.uint16])
@@ -1180,7 +1199,7 @@ def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):
     tm.assert_equal(sdf.to_coo().dtype, np.object_)
 
 
-@pytest.mark.parametrize('fill_value', [None, 0, np.nan])    # noqa: F811
+@pytest.mark.parametrize('fill_value', [None, 0, np.nan])  # noqa: F811
 def test_from_to_scipy_object(spmatrix, fill_value):
     # GH 4343
     dtype = object
@@ -1255,7 +1274,6 @@ def test_comparison_op_scalar(self):
 
 
 class TestSparseDataFrameAnalytics(tm.TestCase):
-
     def setUp(self):
         self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
                      'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],