Skip to content

TST: Add more Sparse indexing tests #12848

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
234 changes: 234 additions & 0 deletions pandas/sparse/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,14 @@ def test_getitem(self):
exp = orig[orig % 2 == 1].to_sparse()
tm.assert_sp_series_equal(result, exp)

def test_getitem_slice(self):
orig = pd.Series([1, np.nan, np.nan, 3, np.nan])
sparse = orig.to_sparse()
tm.assert_sp_series_equal(sparse[:2], orig[:2].to_sparse())
tm.assert_sp_series_equal(sparse[4:2], orig[4:2].to_sparse())
tm.assert_sp_series_equal(sparse[::2], orig[::2].to_sparse())
tm.assert_sp_series_equal(sparse[-5:], orig[-5:].to_sparse())

def test_getitem_fill_value(self):
orig = pd.Series([1, np.nan, 0, 3, 0])
sparse = orig.to_sparse(fill_value=0)
Expand Down Expand Up @@ -63,6 +71,18 @@ def test_getitem_ellipsis(self):
s = pd.SparseSeries([1, np.nan, 2, 0, np.nan], fill_value=0)
tm.assert_sp_series_equal(s[...], s)

def test_getitem_slice_fill_value(self):
orig = pd.Series([1, np.nan, 0, 3, 0])
sparse = orig.to_sparse(fill_value=0)
tm.assert_sp_series_equal(sparse[:2],
orig[:2].to_sparse(fill_value=0))
tm.assert_sp_series_equal(sparse[4:2],
orig[4:2].to_sparse(fill_value=0))
tm.assert_sp_series_equal(sparse[::2],
orig[::2].to_sparse(fill_value=0))
tm.assert_sp_series_equal(sparse[-5:],
orig[-5:].to_sparse(fill_value=0))

def test_loc(self):
orig = pd.Series([1, np.nan, np.nan, 3, np.nan])
sparse = orig.to_sparse()
Expand Down Expand Up @@ -237,6 +257,25 @@ def test_iat_fill_value(self):
self.assertEqual(sparse.iat[-1], orig.iat[-1])
self.assertEqual(sparse.iat[-5], orig.iat[-5])

def test_get(self):
s = pd.SparseSeries([1, np.nan, np.nan, 3, np.nan])
self.assertEqual(s.get(0), 1)
self.assertTrue(np.isnan(s.get(1)))
self.assertIsNone(s.get(5))

s = pd.SparseSeries([1, np.nan, 0, 3, 0], index=list('ABCDE'))
self.assertEqual(s.get('A'), 1)
self.assertTrue(np.isnan(s.get('B')))
self.assertEqual(s.get('C'), 0)
self.assertIsNone(s.get('XX'))

s = pd.SparseSeries([1, np.nan, 0, 3, 0], index=list('ABCDE'),
fill_value=0)
self.assertEqual(s.get('A'), 1)
self.assertTrue(np.isnan(s.get('B')))
self.assertEqual(s.get('C'), 0)
self.assertIsNone(s.get('XX'))

def test_take(self):
orig = pd.Series([1, np.nan, np.nan, 3, np.nan],
index=list('ABCDE'))
Expand Down Expand Up @@ -321,6 +360,53 @@ class TestSparseDataFrameIndexing(tm.TestCase):

_multiprocess_can_split_ = True

def test_getitem(self):
orig = pd.DataFrame([[1, np.nan, np.nan],
[2, 3, np.nan],
[np.nan, np.nan, 4],
[0, np.nan, 5]],
columns=list('xyz'))
sparse = orig.to_sparse()

tm.assert_sp_series_equal(sparse['x'], orig['x'].to_sparse())
tm.assert_sp_frame_equal(sparse[['x']], orig[['x']].to_sparse())
tm.assert_sp_frame_equal(sparse[['z', 'x']],
orig[['z', 'x']].to_sparse())

tm.assert_sp_frame_equal(sparse[[True, False, True, True]],
orig[[True, False, True, True]].to_sparse())

tm.assert_sp_frame_equal(sparse[[1, 2]],
orig[[1, 2]].to_sparse())

def test_getitem_fill_value(self):
orig = pd.DataFrame([[1, np.nan, 0],
[2, 3, np.nan],
[0, np.nan, 4],
[0, np.nan, 5]],
columns=list('xyz'))
sparse = orig.to_sparse(fill_value=0)

tm.assert_sp_series_equal(sparse['y'],
orig['y'].to_sparse(fill_value=0))

exp = orig[['x']].to_sparse(fill_value=0)
exp._default_fill_value = np.nan
tm.assert_sp_frame_equal(sparse[['x']], exp)

exp = orig[['z', 'x']].to_sparse(fill_value=0)
exp._default_fill_value = np.nan
tm.assert_sp_frame_equal(sparse[['z', 'x']], exp)

indexer = [True, False, True, True]
exp = orig[indexer].to_sparse(fill_value=0)
exp._default_fill_value = np.nan
tm.assert_sp_frame_equal(sparse[indexer], exp)

exp = orig[[1, 2]].to_sparse(fill_value=0)
exp._default_fill_value = np.nan
tm.assert_sp_frame_equal(sparse[[1, 2]], exp)

def test_loc(self):
orig = pd.DataFrame([[1, np.nan, np.nan],
[2, 3, np.nan],
Expand Down Expand Up @@ -477,3 +563,151 @@ def test_iloc_slice(self):
columns=list('xyz'))
sparse = orig.to_sparse()
tm.assert_sp_frame_equal(sparse.iloc[2:], orig.iloc[2:].to_sparse())

def test_at(self):
orig = pd.DataFrame([[1, np.nan, 0],
[2, 3, np.nan],
[0, np.nan, 4],
[0, np.nan, 5]],
index=list('ABCD'), columns=list('xyz'))
sparse = orig.to_sparse()
self.assertEqual(sparse.at['A', 'x'], orig.at['A', 'x'])
self.assertTrue(np.isnan(sparse.at['B', 'z']))
self.assertTrue(np.isnan(sparse.at['C', 'y']))
self.assertEqual(sparse.at['D', 'x'], orig.at['D', 'x'])

def test_at_fill_value(self):
orig = pd.DataFrame([[1, np.nan, 0],
[2, 3, np.nan],
[0, np.nan, 4],
[0, np.nan, 5]],
index=list('ABCD'), columns=list('xyz'))
sparse = orig.to_sparse(fill_value=0)
self.assertEqual(sparse.at['A', 'x'], orig.at['A', 'x'])
self.assertTrue(np.isnan(sparse.at['B', 'z']))
self.assertTrue(np.isnan(sparse.at['C', 'y']))
self.assertEqual(sparse.at['D', 'x'], orig.at['D', 'x'])

def test_iat(self):
orig = pd.DataFrame([[1, np.nan, 0],
[2, 3, np.nan],
[0, np.nan, 4],
[0, np.nan, 5]],
index=list('ABCD'), columns=list('xyz'))
sparse = orig.to_sparse()
self.assertEqual(sparse.iat[0, 0], orig.iat[0, 0])
self.assertTrue(np.isnan(sparse.iat[1, 2]))
self.assertTrue(np.isnan(sparse.iat[2, 1]))
self.assertEqual(sparse.iat[2, 0], orig.iat[2, 0])

self.assertTrue(np.isnan(sparse.iat[-1, -2]))
self.assertEqual(sparse.iat[-1, -1], orig.iat[-1, -1])

def test_iat_fill_value(self):
orig = pd.DataFrame([[1, np.nan, 0],
[2, 3, np.nan],
[0, np.nan, 4],
[0, np.nan, 5]],
index=list('ABCD'), columns=list('xyz'))
sparse = orig.to_sparse(fill_value=0)
self.assertEqual(sparse.iat[0, 0], orig.iat[0, 0])
self.assertTrue(np.isnan(sparse.iat[1, 2]))
self.assertTrue(np.isnan(sparse.iat[2, 1]))
self.assertEqual(sparse.iat[2, 0], orig.iat[2, 0])

self.assertTrue(np.isnan(sparse.iat[-1, -2]))
self.assertEqual(sparse.iat[-1, -1], orig.iat[-1, -1])

def test_take(self):
orig = pd.DataFrame([[1, np.nan, 0],
[2, 3, np.nan],
[0, np.nan, 4],
[0, np.nan, 5]],
columns=list('xyz'))
sparse = orig.to_sparse()

tm.assert_sp_frame_equal(sparse.take([0]),
orig.take([0]).to_sparse())
tm.assert_sp_frame_equal(sparse.take([0, 1]),
orig.take([0, 1]).to_sparse())
tm.assert_sp_frame_equal(sparse.take([-1, -2]),
orig.take([-1, -2]).to_sparse())

def test_take_fill_value(self):
orig = pd.DataFrame([[1, np.nan, 0],
[2, 3, np.nan],
[0, np.nan, 4],
[0, np.nan, 5]],
columns=list('xyz'))
sparse = orig.to_sparse(fill_value=0)

exp = orig.take([0]).to_sparse(fill_value=0)
exp._default_fill_value = np.nan
tm.assert_sp_frame_equal(sparse.take([0]), exp)

exp = orig.take([0, 1]).to_sparse(fill_value=0)
exp._default_fill_value = np.nan
tm.assert_sp_frame_equal(sparse.take([0, 1]), exp)

exp = orig.take([-1, -2]).to_sparse(fill_value=0)
exp._default_fill_value = np.nan
tm.assert_sp_frame_equal(sparse.take([-1, -2]), exp)

def test_reindex(self):
orig = pd.DataFrame([[1, np.nan, 0],
[2, 3, np.nan],
[0, np.nan, 4],
[0, np.nan, 5]],
index=list('ABCD'), columns=list('xyz'))
sparse = orig.to_sparse()

res = sparse.reindex(['A', 'C', 'B'])
exp = orig.reindex(['A', 'C', 'B']).to_sparse()
tm.assert_sp_frame_equal(res, exp)

orig = pd.DataFrame([[np.nan, np.nan, np.nan],
[np.nan, np.nan, np.nan],
[np.nan, np.nan, np.nan],
[np.nan, np.nan, np.nan]],
index=list('ABCD'), columns=list('xyz'))
sparse = orig.to_sparse()

res = sparse.reindex(['A', 'C', 'B'])
exp = orig.reindex(['A', 'C', 'B']).to_sparse()
tm.assert_sp_frame_equal(res, exp)

def test_reindex_fill_value(self):
orig = pd.DataFrame([[1, np.nan, 0],
[2, 3, np.nan],
[0, np.nan, 4],
[0, np.nan, 5]],
index=list('ABCD'), columns=list('xyz'))
sparse = orig.to_sparse(fill_value=0)

res = sparse.reindex(['A', 'C', 'B'])
exp = orig.reindex(['A', 'C', 'B']).to_sparse(fill_value=0)
tm.assert_sp_frame_equal(res, exp)

# all missing
orig = pd.DataFrame([[np.nan, np.nan, np.nan],
[np.nan, np.nan, np.nan],
[np.nan, np.nan, np.nan],
[np.nan, np.nan, np.nan]],
index=list('ABCD'), columns=list('xyz'))
sparse = orig.to_sparse(fill_value=0)

res = sparse.reindex(['A', 'C', 'B'])
exp = orig.reindex(['A', 'C', 'B']).to_sparse(fill_value=0)
tm.assert_sp_frame_equal(res, exp)

# all fill_value
orig = pd.DataFrame([[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
[0, 0, 0]],
index=list('ABCD'), columns=list('xyz'))
sparse = orig.to_sparse(fill_value=0)

res = sparse.reindex(['A', 'C', 'B'])
exp = orig.reindex(['A', 'C', 'B']).to_sparse(fill_value=0)
tm.assert_sp_frame_equal(res, exp)
2 changes: 1 addition & 1 deletion pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1263,7 +1263,7 @@ def assert_sp_frame_equal(left, right, exact_indices=True,
else:
assert_series_equal(series.to_dense(), right[col].to_dense())

assert_almost_equal(left.default_fill_value, right.default_fill_value)
assert_attr_equal('default_fill_value', left, right, obj=obj)

# do I care?
# assert(left.default_kind == right.default_kind)
Expand Down