Skip to content

Commit 8b5305b

Browse files
committed
BUG: SparseSeries init from dict fixes
1 parent a9421af commit 8b5305b

File tree

3 files changed

+99
-4
lines changed

3 files changed

+99
-4
lines changed

doc/source/whatsnew/v0.21.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ Sparse
179179
^^^^^^
180180

181181

182+
- Bug in instantiating :class:`SparseSeries` from ``dict`` with or without ``index`` (:issue:`16905`)
182183

183184
Reshaping
184185
^^^^^^^^^

pandas/core/sparse/series.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -146,10 +146,8 @@ def __init__(self, data=None, index=None, sparse_index=None, kind='block',
146146
data = data._data
147147

148148
elif isinstance(data, (Series, dict)):
149-
if index is None:
150-
index = data.index.view()
151-
152-
data = Series(data)
149+
data = Series(data, index=index)
150+
index = data.index
153151
res = make_sparse(data, kind=kind, fill_value=fill_value)
154152
data, sparse_index, fill_value = res
155153

pandas/tests/sparse/test_series.py

+96
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
# pylint: disable-msg=E1101,W0612
22

33
import operator
4+
from collections import OrderedDict
5+
from datetime import datetime
6+
47
import pytest
58

69
from numpy import nan
@@ -1361,3 +1364,96 @@ def test_numpy_func_call(self):
13611364
for func in funcs:
13621365
for series in ('bseries', 'zbseries'):
13631366
getattr(np, func)(getattr(self, series))
1367+
1368+
1369+
def test_constructor_dict():
1370+
d = {'a': 0., 'b': 1., 'c': 2.}
1371+
result = SparseSeries(d)
1372+
expected = SparseSeries(d, index=sorted(d.keys()))
1373+
tm.assert_sp_series_equal(result, expected)
1374+
1375+
result = SparseSeries(d, index=['b', 'c', 'd', 'a'])
1376+
expected = SparseSeries([1, 2, nan, 0], index=['b', 'c', 'd', 'a'])
1377+
tm.assert_sp_series_equal(result, expected)
1378+
1379+
1380+
def test_constructor_dict_multiindex():
1381+
d = {('a', 'a'): 0., ('b', 'a'): 1., ('b', 'c'): 2.}
1382+
_d = sorted(d.items())
1383+
ser = SparseSeries(d)
1384+
expected = SparseSeries(
1385+
[x[1] for x in _d],
1386+
index=pd.MultiIndex.from_tuples([x[0] for x in _d]))
1387+
tm.assert_series_equal(ser, expected)
1388+
1389+
d['z'] = 111.
1390+
_d.insert(0, ('z', d['z']))
1391+
ser = SparseSeries(d)
1392+
expected = SparseSeries([x[1] for x in _d],
1393+
index=pd.Index([x[0] for x in _d],
1394+
tupleize_cols=False))
1395+
ser = ser.reindex(index=expected.index)
1396+
tm.assert_series_equal(ser, expected)
1397+
1398+
1399+
def test_constructor_dict_timedelta_index():
1400+
# GH #12169 : Resample category data with timedelta index
1401+
# construct Series from dict as data and TimedeltaIndex as index
1402+
# will result NaN in result Series data
1403+
expected = SparseSeries(
1404+
data=['A', 'B', 'C'],
1405+
index=pd.to_timedelta([0, 10, 20], unit='s')
1406+
)
1407+
1408+
result = SparseSeries(
1409+
data={pd.to_timedelta(0, unit='s'): 'A',
1410+
pd.to_timedelta(10, unit='s'): 'B',
1411+
pd.to_timedelta(20, unit='s'): 'C'},
1412+
index=pd.to_timedelta([0, 10, 20], unit='s')
1413+
)
1414+
tm.assert_sp_series_equal(result, expected)
1415+
1416+
1417+
def test_constructor_subclass_dict():
1418+
data = tm.TestSubDict((x, 10.0 * x) for x in range(10))
1419+
series = SparseSeries(data)
1420+
refseries = SparseSeries(dict(compat.iteritems(data)))
1421+
tm.assert_sp_series_equal(refseries, series)
1422+
1423+
1424+
def test_constructor_dict_datetime64_index():
1425+
# GH 9456
1426+
dates_as_str = ['1984-02-19', '1988-11-06', '1989-12-03', '1990-03-15']
1427+
values = [42544017.198965244, 1234565, 40512335.181958228, -1]
1428+
1429+
def create_data(constructor):
1430+
return dict(zip((constructor(x) for x in dates_as_str), values))
1431+
1432+
data_datetime64 = create_data(np.datetime64)
1433+
data_datetime = create_data(lambda x: datetime.strptime(x, '%Y-%m-%d'))
1434+
data_Timestamp = create_data(pd.Timestamp)
1435+
1436+
expected = SparseSeries(values, (pd.Timestamp(x) for x in dates_as_str))
1437+
1438+
result_datetime64 = SparseSeries(data_datetime64)
1439+
result_datetime = SparseSeries(data_datetime)
1440+
result_Timestamp = SparseSeries(data_Timestamp)
1441+
1442+
tm.assert_sp_series_equal(result_datetime64, expected)
1443+
tm.assert_sp_series_equal(result_datetime, expected)
1444+
tm.assert_sp_series_equal(result_Timestamp, expected)
1445+
1446+
1447+
def test_orderedDict_ctor():
1448+
# GH3283
1449+
data = OrderedDict(('col%s' % i, np.random.random()) for i in range(12))
1450+
s = SparseSeries(data)
1451+
tm.assert_numpy_array_equal(s.values.values, np.array(list(data.values())))
1452+
1453+
# Test with subclass
1454+
class A(OrderedDict):
1455+
pass
1456+
1457+
data = A(('col%s' % i, np.random.random()) for i in range(12))
1458+
s = SparseSeries(data)
1459+
tm.assert_numpy_array_equal(s.values.values, np.array(list(data.values())))

0 commit comments

Comments
 (0)