Skip to content

Commit 94a7baf

Browse files
committed
fixups
1 parent 6f619b5 commit 94a7baf

File tree

4 files changed

+64
-44
lines changed

4 files changed

+64
-44
lines changed

pandas/core/arrays/sparse.py

+32-21
Original file line numberDiff line numberDiff line change
@@ -686,22 +686,34 @@ def from_spmatrix(cls, data):
686686
Parameters
687687
----------
688688
data : scipy.sparse.sp_matrix
689-
This should be a 2-D SciPy sparse where the size
689+
This should be a SciPy sparse matrix where the size
690690
of the second dimension is 1. In other words, a
691691
sparse matrix with a single column.
692692
693693
Returns
694694
-------
695-
SparseArray.
696-
"""
697-
assert data.ndim == 2
695+
SparseArray
698696
697+
Examples
698+
--------
699+
>>> import scipy.sparse
700+
>>> mat = scipy.sparse.coo_matrix((4, 1))
701+
>>> pd.SparseArray.from_spmatrix(mat)
702+
[0.0, 0.0, 0.0, 0.0]
703+
Fill: 0.0
704+
IntIndex
705+
Indices: array([], dtype=int32)
706+
"""
699707
length, ncol = data.shape
700708

701-
assert ncol == 1
709+
if ncol != 1:
710+
raise ValueError(
711+
"'data' must have a single column, not '{}'".format(ncol)
712+
)
702713

703714
arr = data.data
704715
idx, _ = data.nonzero()
716+
idx.sort()
705717
zero = np.array(0, dtype=arr.dtype).item()
706718
dtype = SparseDtype(arr.dtype, zero)
707719
index = IntIndex(length, idx)
@@ -1921,28 +1933,32 @@ def _make_index(length, indices, kind):
19211933
# ----------------------------------------------------------------------------
19221934
# Accessor
19231935

1924-
_validation_msg = "Can only use the '.sparse' accessor with Sparse data."
1936+
1937+
class BaseAccessor(object):
1938+
_validation_msg = "Can only use the '.sparse' accessor with Sparse data."
1939+
1940+
def __init__(self, data=None):
1941+
self._parent = data
1942+
self._validate(data)
1943+
1944+
def _validate(self, data):
1945+
raise NotImplementedError
19251946

19261947

19271948
@delegate_names(SparseArray, ['npoints', 'density', 'fill_value',
19281949
'sp_values'],
19291950
typ='property')
1930-
class SparseAccessor(PandasDelegate):
1951+
class SparseAccessor(BaseAccessor, PandasDelegate):
19311952
"""
19321953
Accessor for SparseSparse from other sparse matrix data types.
19331954
"""
19341955

1935-
def __init__(self, data=None):
1936-
# Store the Series since we need that for to_coo
1937-
self._parent = data
1938-
self._validate(data)
1939-
19401956
def _validate(self, data):
19411957
if not isinstance(data.dtype, SparseDtype):
1942-
raise AttributeError(_validation_msg)
1958+
raise AttributeError(self._validation_msg)
19431959

19441960
def _delegate_property_get(self, name, *args, **kwargs):
1945-
return getattr(self._parent.values, name)
1961+
return getattr(self._parent.array, name)
19461962

19471963
def _delegate_method(self, name, *args, **kwargs):
19481964
if name == 'from_coo':
@@ -2064,17 +2080,12 @@ def to_dense(self):
20642080
name=self._parent.name)
20652081

20662082

2067-
class SparseFrameAccessor(PandasDelegate):
2068-
2069-
def __init__(self, data=None):
2070-
# Store the Series since we need that for to_coo
2071-
self._parent = data
2072-
self._validate(data)
2083+
class SparseFrameAccessor(BaseAccessor, PandasDelegate):
20732084

20742085
def _validate(self, data):
20752086
dtypes = data.dtypes
20762087
if not all(isinstance(t, SparseDtype) for t in dtypes):
2077-
raise AttributeError(_validation_msg)
2088+
raise AttributeError(self._validation_msg)
20782089

20792090
@classmethod
20802091
def from_spmatrix(cls, data, index=None, columns=None):

pandas/core/sparse/frame.py

+3-20
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,11 @@
1919
from pandas.core.dtypes.missing import isna, notna
2020

2121
import pandas.core.algorithms as algos
22-
from pandas.core.arrays.sparse import SparseArray
22+
from pandas.core.arrays.sparse import SparseArray, SparseFrameAccessor
2323
import pandas.core.common as com
2424
from pandas.core.frame import DataFrame
2525
import pandas.core.generic as generic
2626
from pandas.core.index import Index, MultiIndex, ensure_index
27-
import pandas.core.indexes.base as ibase
2827
from pandas.core.internals import (
2928
BlockManager, create_block_manager_from_arrays)
3029
from pandas.core.internals.construction import extract_index, prep_ndarray
@@ -198,7 +197,7 @@ def _init_matrix(self, data, index, columns, dtype=None):
198197
Init self from ndarray or list of lists.
199198
"""
200199
data = prep_ndarray(data, copy=False)
201-
index, columns = self._prep_index(data, index, columns)
200+
index, columns = SparseFrameAccessor._prep_index(data, index, columns)
202201
data = {idx: data[:, i] for i, idx in enumerate(columns)}
203202
return self._init_dict(data, index, columns, dtype)
204203

@@ -207,7 +206,7 @@ def _init_spmatrix(self, data, index, columns, dtype=None,
207206
"""
208207
Init self from scipy.sparse matrix.
209208
"""
210-
index, columns = self._prep_index(data, index, columns)
209+
index, columns = SparseFrameAccessor._prep_index(data, index, columns)
211210
data = data.tocoo()
212211
N = len(index)
213212

@@ -234,21 +233,6 @@ def _init_spmatrix(self, data, index, columns, dtype=None,
234233

235234
return self._init_dict(sdict, index, columns, dtype)
236235

237-
def _prep_index(self, data, index, columns):
238-
N, K = data.shape
239-
if index is None:
240-
index = ibase.default_index(N)
241-
if columns is None:
242-
columns = ibase.default_index(K)
243-
244-
if len(columns) != K:
245-
raise ValueError('Column length mismatch: {columns} vs. {K}'
246-
.format(columns=len(columns), K=K))
247-
if len(index) != N:
248-
raise ValueError('Index length mismatch: {index} vs. {N}'
249-
.format(index=len(index), N=N))
250-
return index, columns
251-
252236
def to_coo(self):
253237
"""
254238
Return the contents of the frame as a sparse SciPy COO matrix.
@@ -271,7 +255,6 @@ def to_coo(self):
271255
float32. By numpy.find_common_type convention, mixing int64 and
272256
and uint64 will result in a float64 dtype.
273257
"""
274-
from pandas.core.arrays.sparse import SparseFrameAccessor
275258
return SparseFrameAccessor(self).to_coo()
276259

277260
def __array_wrap__(self, result):

pandas/tests/arrays/sparse/test_accessor.py

+11-3
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,11 @@
55

66
import pandas as pd
77
import pandas.util.testing as tm
8+
import pandas.util._test_decorators as td
89

910

1011
class TestSeriesAccessor(object):
11-
# TODO: collect other accessor tests
12+
# TODO: collect other Series accessor tests
1213
def test_to_dense(self):
1314
s = pd.Series([0, 1, 0, 10], dtype='Sparse[int64]')
1415
result = s.sparse.to_dense()
@@ -17,15 +18,22 @@ def test_to_dense(self):
1718

1819

1920
class TestFrameAccessor(object):
21+
22+
def test_accessor_raises(self):
23+
df = pd.DataFrame({"A": [0, 1]})
24+
with pytest.raises(AttributeError, match='sparse'):
25+
df.sparse
26+
2027
@pytest.mark.parametrize('format', ['csc', 'csr', 'coo'])
2128
@pytest.mark.parametrize("labels", [
2229
None,
2330
list(string.ascii_letters[:10]),
2431
])
2532
@pytest.mark.parametrize('dtype', ['float64', 'int64'])
33+
@td.skip_if_no_scipy
2634
def test_from_spmatrix(self, format, labels, dtype):
27-
pytest.importorskip("scipy")
2835
import scipy.sparse
36+
2937
sp_dtype = pd.SparseDtype(dtype, np.array(0, dtype=dtype).item())
3038

3139
mat = scipy.sparse.eye(10, format=format, dtype=dtype)
@@ -39,8 +47,8 @@ def test_from_spmatrix(self, format, labels, dtype):
3947
).astype(sp_dtype)
4048
tm.assert_frame_equal(result, expected)
4149

50+
@td.skip_if_no_scipy
4251
def test_to_coo(self):
43-
pytest.importorskip("scipy")
4452
import scipy.sparse
4553

4654
df = pd.DataFrame({

pandas/tests/arrays/sparse/test_array.py

+18
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,24 @@ def test_constructor_inferred_fill_value(self, data, fill_value):
172172
else:
173173
assert result == fill_value
174174

175+
@pytest.mark.parametrize('format', ['coo', 'csc', 'csr'])
176+
def test_from_spmatrix(self, format):
177+
pytest.importorskip('scipy')
178+
import scipy.sparse
179+
180+
mat = scipy.sparse.random(10, 1, density=0.5, format=format)
181+
result = SparseArray.from_spmatrix(mat)
182+
np.testing.assert_array_equal(mat.data, result.sp_values)
183+
184+
def test_from_spmatrix_raises(self):
185+
pytest.importorskip('scipy')
186+
import scipy.sparse
187+
188+
mat = scipy.sparse.eye(5, 4, format='csc')
189+
190+
with pytest.raises(ValueError, match="not '4'"):
191+
SparseArray.from_spmatrix(mat)
192+
175193
@pytest.mark.parametrize('scalar,dtype', [
176194
(False, SparseDtype(bool, False)),
177195
(0.0, SparseDtype('float64', 0)),

0 commit comments

Comments
 (0)