Skip to content

Commit caf69d5

Browse files
sinhrksjreback
authored andcommitted
BUG: SparseDataFrame may not preserve passed dtype (#13866)
1 parent 66c3b46 commit caf69d5

File tree

6 files changed

+53
-7
lines changed

6 files changed

+53
-7
lines changed

doc/source/whatsnew/v0.19.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -752,6 +752,7 @@ Bug Fixes
752752
- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing result may have normal ``Index`` (:issue:`13144`)
753753
- Bug in ``SparseDataFrame`` in which ``axis=None`` did not default to ``axis=0`` (:issue:`13048`)
754754
- Bug in ``SparseSeries`` and ``SparseDataFrame`` creation with ``object`` dtype may raise ``TypeError`` (:issue:`11633`)
755+
- Bug in ``SparseDataFrame`` doesn't respect passed ``SparseArray`` or ``SparseSeries`` 's dtype and ``fill_value`` (:issue:`13866`)
755756
- Bug when passing a not-default-indexed ``Series`` as ``xerr`` or ``yerr`` in ``.plot()`` (:issue:`11858`)
756757
- Bug in matplotlib ``AutoDataFormatter``; this restores the second scaled formatting and re-adds micro-second scaled formatting (:issue:`13131`)
757758
- Bug in selection from a ``HDFStore`` with a fixed format and ``start`` and/or ``stop`` specified will now return the selected range (:issue:`8287`)

pandas/sparse/array.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
import pandas as pd
1111
from pandas.core.base import PandasObject
12-
import pandas.core.common as com
1312

1413
from pandas import compat, lib
1514
from pandas.compat import range
@@ -577,11 +576,9 @@ def _maybe_to_dense(obj):
577576

578577

579578
def _maybe_to_sparse(array):
579+
""" array must be SparseSeries or SparseArray """
580580
if isinstance(array, ABCSparseSeries):
581-
array = SparseArray(array.values, sparse_index=array.sp_index,
582-
fill_value=array.fill_value, copy=True)
583-
if not isinstance(array, SparseArray):
584-
array = com._values_from_object(array)
581+
array = array.values.copy()
585582
return array
586583

587584

pandas/sparse/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ def _init_dict(self, data, index, columns, dtype=None):
151151
if not isinstance(v, SparseSeries):
152152
v = sp_maker(v.values)
153153
elif isinstance(v, SparseArray):
154-
v = sp_maker(v.values)
154+
v = v.copy()
155155
else:
156156
if isinstance(v, dict):
157157
v = [v.get(i, nan) for i in index]

pandas/sparse/tests/test_frame.py

+22
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,28 @@ def test_constructor_from_series(self):
192192
# without sparse value raises error
193193
# df2 = SparseDataFrame([x2_sparse, y])
194194

195+
def test_constructor_preserve_attr(self):
196+
# GH 13866
197+
arr = pd.SparseArray([1, 0, 3, 0], dtype=np.int64, fill_value=0)
198+
self.assertEqual(arr.dtype, np.int64)
199+
self.assertEqual(arr.fill_value, 0)
200+
201+
df = pd.SparseDataFrame({'x': arr})
202+
self.assertEqual(df['x'].dtype, np.int64)
203+
self.assertEqual(df['x'].fill_value, 0)
204+
205+
s = pd.SparseSeries(arr, name='x')
206+
self.assertEqual(s.dtype, np.int64)
207+
self.assertEqual(s.fill_value, 0)
208+
209+
df = pd.SparseDataFrame(s)
210+
self.assertEqual(df['x'].dtype, np.int64)
211+
self.assertEqual(df['x'].fill_value, 0)
212+
213+
df = pd.SparseDataFrame({'x': s})
214+
self.assertEqual(df['x'].dtype, np.int64)
215+
self.assertEqual(df['x'].fill_value, 0)
216+
195217
def test_dtypes(self):
196218
df = DataFrame(np.random.randn(10000, 4))
197219
df.ix[:9998] = np.nan

pandas/sparse/tests/test_series.py

+9
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,15 @@ def test_construct_DataFrame_with_sp_series(self):
136136
result = df.ftypes
137137
tm.assert_series_equal(expected, result)
138138

139+
def test_constructor_preserve_attr(self):
140+
arr = pd.SparseArray([1, 0, 3, 0], dtype=np.int64, fill_value=0)
141+
self.assertEqual(arr.dtype, np.int64)
142+
self.assertEqual(arr.fill_value, 0)
143+
144+
s = pd.SparseSeries(arr, name='x')
145+
self.assertEqual(s.dtype, np.int64)
146+
self.assertEqual(s.fill_value, 0)
147+
139148
def test_series_density(self):
140149
# GH2803
141150
ts = Series(np.random.randn(10))

pandas/tests/series/test_subclass.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# coding=utf-8
22
# pylint: disable-msg=E1101,W0612
33

4+
import numpy as np
5+
import pandas as pd
46
import pandas.util.testing as tm
57

68

@@ -32,6 +34,11 @@ def test_to_frame(self):
3234
tm.assert_frame_equal(res, exp)
3335
tm.assertIsInstance(res, tm.SubclassedDataFrame)
3436

37+
38+
class TestSparseSeriesSubclassing(tm.TestCase):
39+
40+
_multiprocess_can_split_ = True
41+
3542
def test_subclass_sparse_slice(self):
3643
s = tm.SubclassedSparseSeries([1, 2, 3, 4, 5])
3744
tm.assert_sp_series_equal(s.loc[1:3],
@@ -53,5 +60,15 @@ def test_subclass_sparse_addition(self):
5360
def test_subclass_sparse_to_frame(self):
5461
s = tm.SubclassedSparseSeries([1, 2], index=list('abcd'), name='xxx')
5562
res = s.to_frame()
56-
exp = tm.SubclassedSparseDataFrame({'xxx': [1, 2]}, index=list('abcd'))
63+
64+
exp_arr = pd.SparseArray([1, 2], dtype=np.int64, kind='block')
65+
exp = tm.SubclassedSparseDataFrame({'xxx': exp_arr},
66+
index=list('abcd'))
67+
tm.assert_sp_frame_equal(res, exp)
68+
69+
s = tm.SubclassedSparseSeries([1.1, 2.1], index=list('abcd'),
70+
name='xxx')
71+
res = s.to_frame()
72+
exp = tm.SubclassedSparseDataFrame({'xxx': [1.1, 2.1]},
73+
index=list('abcd'))
5774
tm.assert_sp_frame_equal(res, exp)

0 commit comments

Comments
 (0)