Skip to content

Commit 443b47e

Browse files
committed
BUG: Sparse creation with object dtype may raise TypeError
1 parent 4e4a7d9 commit 443b47e

File tree

6 files changed

+126
-8
lines changed

6 files changed

+126
-8
lines changed

doc/source/whatsnew/v0.18.2.txt

+1
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ Bug Fixes
116116
- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing may raise ``IndexError`` (:issue:`13144`)
117117
- Bug in ``SparseSeries`` with ``MultiIndex`` ``[]`` indexing result may have normal ``Index`` (:issue:`13144`)
118118
- Bug in ``SparseDataFrame`` in which ``axis=None`` did not default to ``axis=0`` (:issue:`13048`)
119+
- Bug in ``SparseSeries`` and ``SparseDataFrame`` creation with ``object`` dtype may raise ``TypeError`` (:issue:`11633`)
119120
- Bug when passing a not-default-indexed ``Series`` as ``xerr`` or ``yerr`` in ``.plot()`` (:issue:`11858`)
120121

121122

pandas/sparse/array.py

+14-6
Original file line numberDiff line numberDiff line change
@@ -152,9 +152,17 @@ def __new__(cls, data, sparse_index=None, index=None, kind='integer',
152152

153153
# Create array, do *not* copy data by default
154154
if copy:
155-
subarr = np.array(values, dtype=dtype, copy=True)
155+
try:
156+
# ToDo: Can remove this error handling when we actually
157+
# support other dtypes
158+
subarr = np.array(values, dtype=dtype, copy=True)
159+
except ValueError:
160+
subarr = np.array(values, copy=True)
156161
else:
157-
subarr = np.asarray(values, dtype=dtype)
162+
try:
163+
subarr = np.asarray(values, dtype=dtype)
164+
except ValueError:
165+
subarr = np.asarray(values)
158166

159167
# if we have a bool type, make sure that we have a bool fill_value
160168
if ((dtype is not None and issubclass(dtype.type, np.bool_)) or
@@ -437,12 +445,12 @@ def count(self):
437445

438446
@property
439447
def _null_fill_value(self):
440-
return np.isnan(self.fill_value)
448+
return com.isnull(self.fill_value)
441449

442450
@property
443451
def _valid_sp_values(self):
444452
sp_vals = self.sp_values
445-
mask = np.isfinite(sp_vals)
453+
mask = com.notnull(sp_vals)
446454
return sp_vals[mask]
447455

448456
@Appender(_index_shared_docs['fillna'] % _sparray_doc_kwargs)
@@ -616,8 +624,8 @@ def make_sparse(arr, kind='block', fill_value=nan):
616624
if arr.ndim > 1:
617625
raise TypeError("expected dimension <= 1 data")
618626

619-
if np.isnan(fill_value):
620-
mask = ~np.isnan(arr)
627+
if com.isnull(fill_value):
628+
mask = com.notnull(arr)
621629
else:
622630
mask = arr != fill_value
623631

pandas/sparse/tests/test_array.py

+11
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,17 @@ def test_constructor_dtype(self):
4646
self.assertEqual(arr.dtype, np.int64)
4747
self.assertEqual(arr.fill_value, 0)
4848

49+
def test_constructor_object_dtype(self):
50+
# GH 11856
51+
arr = SparseArray(['A', 'A', np.nan, 'B'], dtype=np.object)
52+
self.assertEqual(arr.dtype, np.object)
53+
self.assertTrue(np.isnan(arr.fill_value))
54+
55+
arr = SparseArray(['A', 'A', np.nan, 'B'], dtype=np.object,
56+
fill_value='A')
57+
self.assertEqual(arr.dtype, np.object)
58+
self.assertEqual(arr.fill_value, 'A')
59+
4960
def test_constructor_spindex_dtype(self):
5061
arr = SparseArray(data=[1, 2], sparse_index=IntIndex(4, [1, 2]))
5162
tm.assert_sp_array_equal(arr, SparseArray([np.nan, 1, 2, np.nan]))

pandas/sparse/tests/test_groupby.py

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# -*- coding: utf-8 -*-
2+
import numpy as np
3+
import pandas as pd
4+
import pandas.util.testing as tm
5+
6+
7+
class TestSparseGroupBy(tm.TestCase):
8+
9+
_multiprocess_can_split_ = True
10+
11+
def setUp(self):
12+
self.dense = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
13+
'foo', 'bar', 'foo', 'foo'],
14+
'B': ['one', 'one', 'two', 'three',
15+
'two', 'two', 'one', 'three'],
16+
'C': np.random.randn(8),
17+
'D': np.random.randn(8),
18+
'E': [np.nan, np.nan, 1, 2,
19+
np.nan, 1, np.nan, np.nan]})
20+
self.sparse = self.dense.to_sparse()
21+
22+
def test_first_last_nth(self):
23+
# tests for first / last / nth
24+
sparse_grouped = self.sparse.groupby('A')
25+
dense_grouped = self.dense.groupby('A')
26+
27+
tm.assert_frame_equal(sparse_grouped.first(),
28+
dense_grouped.first())
29+
tm.assert_frame_equal(sparse_grouped.last(),
30+
dense_grouped.last())
31+
tm.assert_frame_equal(sparse_grouped.nth(1),
32+
dense_grouped.nth(1))
33+
34+
def test_aggfuncs(self):
35+
sparse_grouped = self.sparse.groupby('A')
36+
dense_grouped = self.dense.groupby('A')
37+
38+
tm.assert_frame_equal(sparse_grouped.mean(),
39+
dense_grouped.mean())
40+
41+
# ToDo: sparse sum includes str column
42+
# tm.assert_frame_equal(sparse_grouped.sum(),
43+
# dense_grouped.sum())
44+
45+
tm.assert_frame_equal(sparse_grouped.count(),
46+
dense_grouped.count())

pandas/sparse/tests/test_pivot.py

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import numpy as np
2+
import pandas as pd
3+
import pandas.util.testing as tm
4+
5+
6+
class TestPivotTable(tm.TestCase):
7+
8+
_multiprocess_can_split_ = True
9+
10+
def setUp(self):
11+
self.dense = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
12+
'foo', 'bar', 'foo', 'foo'],
13+
'B': ['one', 'one', 'two', 'three',
14+
'two', 'two', 'one', 'three'],
15+
'C': np.random.randn(8),
16+
'D': np.random.randn(8),
17+
'E': [np.nan, np.nan, 1, 2,
18+
np.nan, 1, np.nan, np.nan]})
19+
self.sparse = self.dense.to_sparse()
20+
21+
def test_pivot_table(self):
22+
res_sparse = pd.pivot_table(self.sparse, index='A', columns='B',
23+
values='C')
24+
res_dense = pd.pivot_table(self.dense, index='A', columns='B',
25+
values='C')
26+
tm.assert_frame_equal(res_sparse, res_dense)
27+
28+
res_sparse = pd.pivot_table(self.sparse, index='A', columns='B',
29+
values='E')
30+
res_dense = pd.pivot_table(self.dense, index='A', columns='B',
31+
values='E')
32+
tm.assert_frame_equal(res_sparse, res_dense)
33+
34+
res_sparse = pd.pivot_table(self.sparse, index='A', columns='B',
35+
values='E', aggfunc='mean')
36+
res_dense = pd.pivot_table(self.dense, index='A', columns='B',
37+
values='E', aggfunc='mean')
38+
tm.assert_frame_equal(res_sparse, res_dense)
39+
40+
# ToDo: sum doesn't handle nan properly
41+
# res_sparse = pd.pivot_table(self.sparse, index='A', columns='B',
42+
# values='E', aggfunc='sum')
43+
# res_dense = pd.pivot_table(self.dense, index='A', columns='B',
44+
# values='E', aggfunc='sum')
45+
# tm.assert_frame_equal(res_sparse, res_dense)
46+
47+
def test_pivot_table_multi(self):
48+
res_sparse = pd.pivot_table(self.sparse, index='A', columns='B',
49+
values=['D', 'E'])
50+
res_dense = pd.pivot_table(self.dense, index='A', columns='B',
51+
values=['D', 'E'])
52+
tm.assert_frame_equal(res_sparse, res_dense)

pandas/tests/test_groupby.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -4508,7 +4508,7 @@ def test_groupby_with_empty(self):
45084508
grouped = series.groupby(grouper)
45094509
assert next(iter(grouped), None) is None
45104510

4511-
def test_aaa_groupby_with_small_elem(self):
4511+
def test_groupby_with_small_elem(self):
45124512
# GH 8542
45134513
# length=2
45144514
df = pd.DataFrame({'event': ['start', 'start'],
@@ -5972,7 +5972,7 @@ def test__cython_agg_general(self):
59725972
exc.args += ('operation: %s' % op, )
59735973
raise
59745974

5975-
def test_aa_cython_group_transform_algos(self):
5975+
def test_cython_group_transform_algos(self):
59765976
# GH 4095
59775977
dtypes = [np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint32,
59785978
np.uint64, np.float32, np.float64]

0 commit comments

Comments
 (0)