Skip to content

Commit 1e0fbd2

Browse files
Carlos Souzajreback
Carlos Souza
authored andcommitted
BUG: SparseDataFrame construction with lists not coercing to dtype (GH 15682)
closes #15682 Author: Carlos Souza <[email protected]> Closes #15834 from ucals/bug-fix-15682 and squashes the following commits: 04fba8d [Carlos Souza] Adding test_rename test cases (were missing) 483bb2c [Carlos Souza] Doing adjustments as per @jreback requests cc4c15b [Carlos Souza] Fixing coersion bug at SparseDataFrame construction faa5c5c [Carlos Souza] Merge remote-tracking branch 'upstream/master' 43456a5 [Carlos Souza] Merge remote-tracking branch 'upstream/master' 8b463cb [Carlos Souza] Merge remote-tracking branch 'upstream/master' 9fc617b [Carlos Souza] Merge remote-tracking branch 'upstream/master' e12bca7 [Carlos Souza] Sync fork 676a4e5 [Carlos Souza] Test
1 parent 48749ce commit 1e0fbd2

File tree

3 files changed

+29
-11
lines changed

3 files changed

+29
-11
lines changed

doc/source/whatsnew/v0.20.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1046,7 +1046,7 @@ Bug Fixes
10461046
- Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`)
10471047
- Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`, :issue:`13046`)
10481048

1049-
1049+
- Bug in ``SparseDataFrame`` construction with lists not coercing to dtype (:issue:`15682`)
10501050

10511051
- Bug in ``.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`)
10521052
- Bug in ``groupby.transform()`` that would coerce the resultant dtypes back to the original (:issue:`10972`, :issue:`11444`)

pandas/sparse/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def _init_dict(self, data, index, columns, dtype=None):
142142

143143
sp_maker = lambda x: SparseArray(x, kind=self._default_kind,
144144
fill_value=self._default_fill_value,
145-
copy=True)
145+
copy=True, dtype=dtype)
146146
sdict = DataFrame()
147147
for k, v in compat.iteritems(data):
148148
if isinstance(v, Series):

pandas/tests/sparse/test_frame.py

+27-9
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828

2929

3030
class TestSparseDataFrame(tm.TestCase, SharedWithSparse):
31-
3231
klass = SparseDataFrame
3332

3433
def setUp(self):
@@ -237,6 +236,18 @@ def test_constructor_nan_dataframe(self):
237236
dtype=float)
238237
tm.assert_sp_frame_equal(result, expected)
239238

239+
def test_type_coercion_at_construction(self):
240+
# GH 15682
241+
result = pd.SparseDataFrame(
242+
{'a': [1, 0, 0], 'b': [0, 1, 0], 'c': [0, 0, 1]}, dtype='uint8',
243+
default_fill_value=0)
244+
expected = pd.SparseDataFrame(
245+
{'a': pd.SparseSeries([1, 0, 0], dtype='uint8'),
246+
'b': pd.SparseSeries([0, 1, 0], dtype='uint8'),
247+
'c': pd.SparseSeries([0, 0, 1], dtype='uint8')},
248+
default_fill_value=0)
249+
tm.assert_sp_frame_equal(result, expected)
250+
240251
def test_dtypes(self):
241252
df = DataFrame(np.random.randn(10000, 4))
242253
df.loc[:9998] = np.nan
@@ -756,9 +767,18 @@ def test_sparse_frame_fillna_limit(self):
756767
tm.assert_frame_equal(result, expected)
757768

758769
def test_rename(self):
759-
# just check this works
760-
renamed = self.frame.rename(index=str) # noqa
761-
renamed = self.frame.rename(columns=lambda x: '%s%d' % (x, len(x))) # noqa
770+
result = self.frame.rename(index=str)
771+
expected = SparseDataFrame(self.data, index=self.dates.strftime(
772+
"%Y-%m-%d %H:%M:%S"))
773+
tm.assert_sp_frame_equal(result, expected)
774+
775+
result = self.frame.rename(columns=lambda x: '%s%d' % (x, len(x)))
776+
data = {'A1': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
777+
'B1': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
778+
'C1': np.arange(10, dtype=np.float64),
779+
'D1': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}
780+
expected = SparseDataFrame(data, index=self.dates)
781+
tm.assert_sp_frame_equal(result, expected)
762782

763783
def test_corr(self):
764784
res = self.frame.corr()
@@ -967,7 +987,6 @@ def _check(frame, orig):
967987
def test_shift(self):
968988

969989
def _check(frame, orig):
970-
971990
shifted = frame.shift(0)
972991
exp = orig.shift(0)
973992
tm.assert_frame_equal(shifted.to_dense(), exp)
@@ -1060,7 +1079,7 @@ def test_sparse_pow_issue(self):
10601079
df = SparseDataFrame({'A': [nan, 0, 1]})
10611080

10621081
# note that 2 ** df works fine, also df ** 1
1063-
result = 1**df
1082+
result = 1 ** df
10641083

10651084
r1 = result.take([0], 1)['A']
10661085
r2 = result['A']
@@ -1126,7 +1145,7 @@ def test_isnotnull(self):
11261145
tm.assert_frame_equal(res.to_dense(), exp)
11271146

11281147

1129-
@pytest.mark.parametrize('index', [None, list('ab')]) # noqa: F811
1148+
@pytest.mark.parametrize('index', [None, list('ab')]) # noqa: F811
11301149
@pytest.mark.parametrize('columns', [None, list('cd')])
11311150
@pytest.mark.parametrize('fill_value', [None, 0, np.nan])
11321151
@pytest.mark.parametrize('dtype', [bool, int, float, np.uint16])
@@ -1180,7 +1199,7 @@ def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):
11801199
tm.assert_equal(sdf.to_coo().dtype, np.object_)
11811200

11821201

1183-
@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) # noqa: F811
1202+
@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) # noqa: F811
11841203
def test_from_to_scipy_object(spmatrix, fill_value):
11851204
# GH 4343
11861205
dtype = object
@@ -1255,7 +1274,6 @@ def test_comparison_op_scalar(self):
12551274

12561275

12571276
class TestSparseDataFrameAnalytics(tm.TestCase):
1258-
12591277
def setUp(self):
12601278
self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
12611279
'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],

0 commit comments

Comments
 (0)