Skip to content

Commit cc4c15b

Browse files
author
Carlos Souza
committed
Fixing coersion bug at SparseDataFrame construction
1 parent faa5c5c commit cc4c15b

File tree

3 files changed

+16
-10
lines changed

3 files changed

+16
-10
lines changed

doc/source/whatsnew/v0.20.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1045,7 +1045,7 @@ Bug Fixes
10451045
- Bug in ``pd.concat()`` in which concatting with an empty dataframe with ``join='inner'`` was being improperly handled (:issue:`15328`)
10461046
- Bug in ``groupby.agg()`` incorrectly localizing timezone on ``datetime`` (:issue:`15426`, :issue:`10668`, :issue:`13046`)
10471047

1048-
1048+
- Bug in ``SparseDataFrame`` construction with lists not coercing to dtype (:issue:`15682`)
10491049

10501050
- Bug in ``.read_csv()`` with ``parse_dates`` when multiline headers are specified (:issue:`15376`)
10511051
- Bug in ``groupby.transform()`` that would coerce the resultant dtypes back to the original (:issue:`10972`, :issue:`11444`)

pandas/sparse/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ def _init_dict(self, data, index, columns, dtype=None):
159159
v = [v.get(i, nan) for i in index]
160160

161161
v = sp_maker(v)
162-
sdict[k] = v
162+
sdict[k] = v.astype(np.dtype(dtype)) if dtype is not None else v
163163

164164
# TODO: figure out how to handle this case, all nan's?
165165
# add in any other columns we want to have (completeness)

pandas/tests/sparse/test_frame.py

+14-8
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828

2929

3030
class TestSparseDataFrame(tm.TestCase, SharedWithSparse):
31-
3231
klass = SparseDataFrame
3332

3433
def setUp(self):
@@ -758,7 +757,8 @@ def test_sparse_frame_fillna_limit(self):
758757
def test_rename(self):
759758
# just check this works
760759
renamed = self.frame.rename(index=str) # noqa
761-
renamed = self.frame.rename(columns=lambda x: '%s%d' % (x, len(x))) # noqa
760+
renamed = self.frame.rename(
761+
columns=lambda x: '%s%d' % (x, len(x))) # noqa
762762

763763
def test_corr(self):
764764
res = self.frame.corr()
@@ -967,7 +967,6 @@ def _check(frame, orig):
967967
def test_shift(self):
968968

969969
def _check(frame, orig):
970-
971970
shifted = frame.shift(0)
972971
exp = orig.shift(0)
973972
tm.assert_frame_equal(shifted.to_dense(), exp)
@@ -1060,7 +1059,7 @@ def test_sparse_pow_issue(self):
10601059
df = SparseDataFrame({'A': [nan, 0, 1]})
10611060

10621061
# note that 2 ** df works fine, also df ** 1
1063-
result = 1**df
1062+
result = 1 ** df
10641063

10651064
r1 = result.take([0], 1)['A']
10661065
r2 = result['A']
@@ -1126,7 +1125,7 @@ def test_isnotnull(self):
11261125
tm.assert_frame_equal(res.to_dense(), exp)
11271126

11281127

1129-
@pytest.mark.parametrize('index', [None, list('ab')]) # noqa: F811
1128+
@pytest.mark.parametrize('index', [None, list('ab')]) # noqa: F811
11301129
@pytest.mark.parametrize('columns', [None, list('cd')])
11311130
@pytest.mark.parametrize('fill_value', [None, 0, np.nan])
11321131
@pytest.mark.parametrize('dtype', [bool, int, float, np.uint16])
@@ -1180,7 +1179,7 @@ def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):
11801179
tm.assert_equal(sdf.to_coo().dtype, np.object_)
11811180

11821181

1183-
@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) # noqa: F811
1182+
@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) # noqa: F811
11841183
def test_from_to_scipy_object(spmatrix, fill_value):
11851184
# GH 4343
11861185
dtype = object
@@ -1226,7 +1225,6 @@ def test_from_to_scipy_object(spmatrix, fill_value):
12261225

12271226

12281227
class TestSparseDataFrameArithmetic(tm.TestCase):
1229-
12301228
def test_numeric_op_scalar(self):
12311229
df = pd.DataFrame({'A': [nan, nan, 0, 1, ],
12321230
'B': [0, 1, 2, nan],
@@ -1255,7 +1253,6 @@ def test_comparison_op_scalar(self):
12551253

12561254

12571255
class TestSparseDataFrameAnalytics(tm.TestCase):
1258-
12591256
def setUp(self):
12601257
self.data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
12611258
'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
@@ -1299,3 +1296,12 @@ def test_numpy_func_call(self):
12991296
'std', 'min', 'max']
13001297
for func in funcs:
13011298
getattr(np, func)(self.frame)
1299+
1300+
def test_type_coercion_at_construction(self):
1301+
# GH 15682
1302+
df = pd.SparseDataFrame(
1303+
{'a': [1, 0, 0], 'b': [0, 1, 0], 'c': [0, 0, 1]}, dtype='uint8',
1304+
default_fill_value=0)
1305+
result = df.dtypes[0]
1306+
expected = np.dtype('uint8')
1307+
assert result == expected

0 commit comments

Comments
 (0)