Skip to content

Commit d24e464

Browse files
committed
Add more tests and fix some existing ones
1 parent b04ac64 commit d24e464

File tree

6 files changed

+81
-38
lines changed

6 files changed

+81
-38
lines changed

pandas/core/internals.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -5523,7 +5523,10 @@ def is_na(self):
55235523
def get_reindexed_values(self, empty_dtype, upcasted_na):
55245524
if upcasted_na is None:
55255525
# No upcasting is necessary
5526-
fill_value = self.block.fill_value
5526+
5527+
# You would think that you want self.block.fill_value here
5528+
# But in reality that will fill with a bunch of wrong values
5529+
fill_value = np.nan
55275530
values = self.block.get_values()
55285531
else:
55295532
fill_value = upcasted_na

pandas/core/sparse/frame.py

+4
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,10 @@ def __init__(self, data=None, index=None, columns=None, default_kind=None,
7373
if columns is None:
7474
raise Exception("cannot pass a series w/o a name or columns")
7575
data = {columns[0]: data}
76+
elif isinstance(data, BlockManager):
77+
fill_value_size = len(set(b.fill_value for b in data.blocks))
78+
if default_fill_value is None and fill_value_size == 1:
79+
default_fill_value = data.blocks[0].fill_value
7680

7781
if default_fill_value is None:
7882
default_fill_value = np.nan

pandas/core/sparse/series.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -544,7 +544,7 @@ def to_dense(self, sparse_only=False):
544544
index = self.index.take(int_index.indices)
545545
return Series(self.sp_values, index=index, name=self.name)
546546
else:
547-
return Series(self.values.to_dense(), index=self.index,
547+
return Series(self.get_values(), index=self.index,
548548
name=self.name)
549549

550550
@property

pandas/tests/reshape/merge/test_merge.py

+57-7
Original file line numberDiff line numberDiff line change
@@ -1805,7 +1805,9 @@ def test_merge_on_indexes(self, left_df, right_df, how, sort, expected):
18051805

18061806
class TestMergeSparseDataFrames(object):
18071807
# Cannot seem to get 0 or 1 working with sparse data frame
1808-
@pytest.mark.parametrize('fill_value,how', itertools.product([np.nan],
1808+
@pytest.mark.parametrize('fill_value,how', itertools.product([0, 1,
1809+
None,
1810+
np.nan],
18091811
['left',
18101812
'right',
18111813
'outer',
@@ -1816,6 +1818,57 @@ def test_merge_two_sparse_frames(self, fill_value, how):
18161818
dense_threes = pd.DataFrame({'A': list(range(0, 300, 3)),
18171819
'B': np.random.randint(0, 100, size=100)})
18181820

1821+
sparse_evens = dense_evens.to_sparse(fill_value=fill_value)
1822+
sparse_threes = dense_threes.to_sparse(fill_value=fill_value)
1823+
1824+
to_merge_sparse = [sparse_evens, sparse_threes]
1825+
1826+
to_merge_dense = [dense_evens, dense_threes]
1827+
1828+
for _ in range(2):
1829+
sparse_merge = to_merge_sparse[0].merge(to_merge_sparse[1],
1830+
how=how, on='A')
1831+
1832+
dense_merge = to_merge_dense[0].merge(to_merge_dense[1],
1833+
how=how, on='A')
1834+
1835+
# If you merge two dense frames together it tends to default to
1836+
# float64 not the original dtype
1837+
dense_merge['B_x'] = dense_merge['B_x'].astype(np.int64,
1838+
errors='ignore')
1839+
dense_merge['B_y'] = dense_merge['B_y'].astype(np.int64,
1840+
errors='ignore')
1841+
1842+
if fill_value is None or fill_value is np.nan:
1843+
assert sparse_merge.default_fill_value is np.nan
1844+
else:
1845+
tm.assert_almost_equal(sparse_merge.default_fill_value,
1846+
fill_value)
1847+
1848+
exp = dense_merge.to_sparse(fill_value=fill_value),
1849+
tm.assert_sp_frame_equal(sparse_merge, exp,
1850+
exact_indices=False,
1851+
check_dtype=False)
1852+
1853+
to_merge_sparse = to_merge_sparse[::-1]
1854+
to_merge_dense = to_merge_dense[::-1]
1855+
1856+
@pytest.mark.parametrize('fill_value,how', itertools.product([0, 1,
1857+
None,
1858+
np.nan],
1859+
['left',
1860+
'right',
1861+
'outer',
1862+
'inner']))
1863+
def test_merge_dense_sparse_frames(self, fill_value, how):
1864+
fill_value = np.nan
1865+
1866+
dense_evens = pd.DataFrame({'A': list(range(0, 200, 2)),
1867+
'B': np.random.randint(0, 100, size=100)})
1868+
1869+
dense_threes = pd.DataFrame({'A': list(range(0, 300, 3)),
1870+
'B': np.random.randint(0, 100, size=100)})
1871+
18191872
dense_merge = dense_evens.merge(dense_threes, how=how, on='A')
18201873

18211874
# If you merge two dense frames together it tends to default to
@@ -1826,15 +1879,12 @@ def test_merge_two_sparse_frames(self, fill_value, how):
18261879
errors='ignore')
18271880

18281881
sparse_evens = dense_evens.to_sparse(fill_value=fill_value)
1829-
sparse_threes = dense_threes.to_sparse(fill_value=fill_value)
1882+
# sparse_threes = dense_threes.to_sparse(fill_value=fill_value)
18301883

1831-
sparse_merge = sparse_evens.merge(sparse_threes, how=how, on='A')
1884+
sparse_merge = sparse_evens.merge(dense_threes, how=how, on='A')
18321885

1833-
assert sparse_merge.default_fill_value is fill_value
1886+
tm.assert_almost_equal(sparse_merge.default_fill_value, fill_value)
18341887

18351888
tm.assert_sp_frame_equal(dense_merge.to_sparse(fill_value=fill_value),
18361889
sparse_merge, exact_indices=False,
18371890
check_dtype=False)
1838-
1839-
def test_merge_dense_sparse_frames(self, fill_value=1, how=None):
1840-
"pass"

pandas/tests/sparse/test_combine_concat.py

+2-11
Original file line numberDiff line numberDiff line change
@@ -168,22 +168,18 @@ def test_concat(self):
168168

169169
res = pd.concat([sparse, sparse])
170170
exp = pd.concat([self.dense1, self.dense1]).to_sparse(fill_value=0)
171-
exp._default_fill_value = np.nan
172171
tm.assert_sp_frame_equal(res, exp)
173172

174173
res = pd.concat([sparse2, sparse2])
175174
exp = pd.concat([self.dense2, self.dense2]).to_sparse(fill_value=0)
176-
exp._default_fill_value = np.nan
177175
tm.assert_sp_frame_equal(res, exp)
178176

179177
res = pd.concat([sparse, sparse2])
180178
exp = pd.concat([self.dense1, self.dense2]).to_sparse(fill_value=0)
181-
exp._default_fill_value = np.nan
182179
tm.assert_sp_frame_equal(res, exp)
183180

184181
res = pd.concat([sparse2, sparse])
185182
exp = pd.concat([self.dense2, self.dense1]).to_sparse(fill_value=0)
186-
exp._default_fill_value = np.nan
187183
tm.assert_sp_frame_equal(res, exp)
188184

189185
def test_concat_different_fill_value(self):
@@ -197,7 +193,6 @@ def test_concat_different_fill_value(self):
197193

198194
res = pd.concat([sparse2, sparse])
199195
exp = pd.concat([self.dense2, self.dense1]).to_sparse(fill_value=0)
200-
exp._default_fill_value = np.nan
201196
tm.assert_sp_frame_equal(res, exp)
202197

203198
def test_concat_different_columns(self):
@@ -220,12 +215,10 @@ def test_concat_different_columns(self):
220215

221216
res = pd.concat([sparse, sparse3])
222217
exp = pd.concat([self.dense1, self.dense3]).to_sparse(fill_value=0)
223-
exp._default_fill_value = np.nan
224218
tm.assert_sp_frame_equal(res, exp)
225219

226220
res = pd.concat([sparse3, sparse])
227221
exp = pd.concat([self.dense3, self.dense1]).to_sparse(fill_value=0)
228-
exp._default_fill_value = np.nan
229222
tm.assert_sp_frame_equal(res, exp)
230223

231224
# different fill values
@@ -264,13 +257,11 @@ def test_concat_series(self):
264257
res = pd.concat([sparse, sparse2[col]])
265258
exp = pd.concat([self.dense1,
266259
self.dense2[col]]).to_sparse(fill_value=0)
267-
exp._default_fill_value = np.nan
268260
tm.assert_sp_frame_equal(res, exp)
269261

270262
res = pd.concat([sparse2[col], sparse])
271263
exp = pd.concat([self.dense2[col],
272264
self.dense1]).to_sparse(fill_value=0)
273-
exp._default_fill_value = np.nan
274265
tm.assert_sp_frame_equal(res, exp)
275266

276267
def test_concat_axis1(self):
@@ -294,13 +285,13 @@ def test_concat_axis1(self):
294285
res = pd.concat([sparse, sparse3], axis=1)
295286
exp = pd.concat([self.dense1, self.dense3],
296287
axis=1).to_sparse(fill_value=0)
297-
exp._default_fill_value = np.nan
288+
# exp._default_fill_value = np.nan
298289
tm.assert_sp_frame_equal(res, exp)
299290

300291
res = pd.concat([sparse3, sparse], axis=1)
301292
exp = pd.concat([self.dense3, self.dense1],
302293
axis=1).to_sparse(fill_value=0)
303-
exp._default_fill_value = np.nan
294+
# exp._default_fill_value = np.nan
304295
tm.assert_sp_frame_equal(res, exp)
305296

306297
# different fill values

pandas/tests/sparse/test_indexing.py

+13-18
Original file line numberDiff line numberDiff line change
@@ -615,32 +615,29 @@ def test_getitem(self):
615615
tm.assert_sp_frame_equal(sparse.iloc[[1, 2]],
616616
orig.iloc[[1, 2]].to_sparse())
617617

618-
def test_getitem_fill_value(self):
618+
@pytest.mark.parametrize('fill_value', [0, 1, np.nan, None])
619+
def test_getitem_fill_value(self, fill_value):
619620
orig = pd.DataFrame([[1, np.nan, 0],
620621
[2, 3, np.nan],
621622
[0, np.nan, 4],
622623
[0, np.nan, 5]],
623624
columns=list('xyz'))
624-
sparse = orig.to_sparse(fill_value=0)
625+
sparse = orig.to_sparse(fill_value=fill_value)
625626

626627
tm.assert_sp_series_equal(sparse['y'],
627628
orig['y'].to_sparse(fill_value=0))
628629

629-
exp = orig[['x']].to_sparse(fill_value=0)
630-
exp._default_fill_value = np.nan
630+
exp = orig[['x']].to_sparse(fill_value=fill_value)
631631
tm.assert_sp_frame_equal(sparse[['x']], exp)
632632

633-
exp = orig[['z', 'x']].to_sparse(fill_value=0)
634-
exp._default_fill_value = np.nan
633+
exp = orig[['z', 'x']].to_sparse(fill_value=fill_value)
635634
tm.assert_sp_frame_equal(sparse[['z', 'x']], exp)
636635

637636
indexer = [True, False, True, True]
638-
exp = orig[indexer].to_sparse(fill_value=0)
639-
exp._default_fill_value = np.nan
637+
exp = orig[indexer].to_sparse(fill_value=fill_value)
640638
tm.assert_sp_frame_equal(sparse[indexer], exp)
641639

642-
exp = orig.iloc[[1, 2]].to_sparse(fill_value=0)
643-
exp._default_fill_value = np.nan
640+
exp = orig.iloc[[1, 2]].to_sparse(fill_value=fill_value)
644641
tm.assert_sp_frame_equal(sparse.iloc[[1, 2]], exp)
645642

646643
def test_loc(self):
@@ -877,24 +874,22 @@ def test_take(self):
877874
tm.assert_sp_frame_equal(sparse.take([-1, -2]),
878875
orig.take([-1, -2]).to_sparse())
879876

880-
def test_take_fill_value(self):
877+
@pytest.mark.parametrize('fill_value', [0, 1, np.nan, None])
878+
def test_take_fill_value(self, fill_value):
881879
orig = pd.DataFrame([[1, np.nan, 0],
882880
[2, 3, np.nan],
883881
[0, np.nan, 4],
884882
[0, np.nan, 5]],
885883
columns=list('xyz'))
886-
sparse = orig.to_sparse(fill_value=0)
884+
sparse = orig.to_sparse(fill_value=fill_value)
887885

888-
exp = orig.take([0]).to_sparse(fill_value=0)
889-
exp._default_fill_value = np.nan
886+
exp = orig.take([0]).to_sparse(fill_value=fill_value)
890887
tm.assert_sp_frame_equal(sparse.take([0]), exp)
891888

892-
exp = orig.take([0, 1]).to_sparse(fill_value=0)
893-
exp._default_fill_value = np.nan
889+
exp = orig.take([0, 1]).to_sparse(fill_value=fill_value)
894890
tm.assert_sp_frame_equal(sparse.take([0, 1]), exp)
895891

896-
exp = orig.take([-1, -2]).to_sparse(fill_value=0)
897-
exp._default_fill_value = np.nan
892+
exp = orig.take([-1, -2]).to_sparse(fill_value=fill_value)
898893
tm.assert_sp_frame_equal(sparse.take([-1, -2]), exp)
899894

900895
def test_reindex(self):

0 commit comments

Comments
 (0)