Skip to content

BUG: incorrect handling of scipy.sparse.dok formats #16191

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 11, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions doc/source/whatsnew/v0.20.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,7 @@ Groupby/Resample/Rolling
Sparse
^^^^^^



- Bug in construction of SparseDataFrame from ``scipy.sparse.dok_matrix`` (:issue:`16179`)

Reshaping
^^^^^^^^^
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/sparse/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,8 @@ def _init_spmatrix(self, data, index, columns, dtype=None,
values = Series(data.data, index=data.row, copy=False)
for col, rowvals in values.groupby(data.col):
# get_blocks expects int32 row indices in sorted order
rowvals = rowvals.sort_index()
rows = rowvals.index.values.astype(np.int32)
rows.sort()
blocs, blens = get_blocks(rows)

sdict[columns[col]] = SparseSeries(
Expand Down
28 changes: 25 additions & 3 deletions pandas/tests/sparse/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1146,8 +1146,8 @@ def test_isnotnull(self):
tm.assert_frame_equal(res.to_dense(), exp)


@pytest.mark.parametrize('index', [None, list('ab')]) # noqa: F811
@pytest.mark.parametrize('columns', [None, list('cd')])
@pytest.mark.parametrize('index', [None, list('abc')]) # noqa: F811
@pytest.mark.parametrize('columns', [None, list('def')])
@pytest.mark.parametrize('fill_value', [None, 0, np.nan])
@pytest.mark.parametrize('dtype', [bool, int, float, np.uint16])
def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):
Expand All @@ -1156,7 +1156,9 @@ def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):

# Make one ndarray and from it one sparse matrix, both to be used for
# constructing frames and comparing results
arr = np.eye(2, dtype=dtype)
arr = np.eye(3, dtype=dtype)
# GH 16179
arr[0, 1] = dtype(2)
try:
spm = spmatrix(arr)
assert spm.dtype == arr.dtype
Expand Down Expand Up @@ -1245,6 +1247,26 @@ def test_from_to_scipy_object(spmatrix, fill_value):
assert sdf.to_coo().dtype == res_dtype


def test_from_scipy_correct_ordering(spmatrix):
# GH 16179
tm.skip_if_no_package('scipy')

arr = np.arange(1, 5).reshape(2, 2)
try:
spm = spmatrix(arr)
assert spm.dtype == arr.dtype
except (TypeError, AssertionError):
# If conversion to sparse fails for this spmatrix type and arr.dtype,
# then the combination is not currently supported in NumPy, so we
# can just skip testing it thoroughly
return

sdf = pd.SparseDataFrame(spm)
expected = pd.SparseDataFrame(arr)
tm.assert_sp_frame_equal(sdf, expected)
tm.assert_frame_equal(sdf.to_dense(), expected.to_dense())


class TestSparseDataFrameArithmetic(object):

def test_numeric_op_scalar(self):
Expand Down