Skip to content

Commit 2e0e075

Browse files
committed
clean up to_from scipy sparse tests
1 parent 72c2a3f commit 2e0e075

File tree

2 files changed

+168
-164
lines changed

2 files changed

+168
-164
lines changed

pandas/tests/sparse/frame/test_frame.py

-164
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,14 @@
77
from numpy import nan
88
import numpy as np
99
import pandas as pd
10-
from distutils.version import LooseVersion
1110

1211
from pandas import Series, DataFrame, bdate_range, Panel
13-
from pandas.core.dtypes.common import (
14-
is_bool_dtype,
15-
is_float_dtype,
16-
is_object_dtype,
17-
is_float)
1812
from pandas.core.indexes.datetimes import DatetimeIndex
1913
from pandas.tseries.offsets import BDay
2014
from pandas.util import testing as tm
2115
from pandas.compat import lrange
2216
from pandas import compat
2317
from pandas.core.sparse import frame as spf
24-
import pandas.util._test_decorators as td
2518

2619
from pandas._libs.sparse import BlockIndex, IntIndex
2720
from pandas.core.sparse.api import SparseSeries, SparseDataFrame, SparseArray
@@ -1171,163 +1164,6 @@ def test_notna(self):
11711164
tm.assert_frame_equal(res.to_dense(), exp)
11721165

11731166

1174-
@td.skip_if_no_scipy
1175-
@pytest.mark.parametrize('index', [None, list('abc')]) # noqa: F811
1176-
@pytest.mark.parametrize('columns', [None, list('def')])
1177-
@pytest.mark.parametrize('fill_value', [None, 0, np.nan])
1178-
@pytest.mark.parametrize('dtype', [bool, int, float, np.uint16])
1179-
def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):
1180-
# GH 4343
1181-
# Make one ndarray and from it one sparse matrix, both to be used for
1182-
# constructing frames and comparing results
1183-
arr = np.eye(3, dtype=dtype)
1184-
# GH 16179
1185-
arr[0, 1] = dtype(2)
1186-
try:
1187-
spm = spmatrix(arr)
1188-
assert spm.dtype == arr.dtype
1189-
except (TypeError, AssertionError):
1190-
# If conversion to sparse fails for this spmatrix type and arr.dtype,
1191-
# then the combination is not currently supported in NumPy, so we
1192-
# can just skip testing it thoroughly
1193-
return
1194-
1195-
sdf = pd.SparseDataFrame(spm, index=index, columns=columns,
1196-
default_fill_value=fill_value)
1197-
1198-
# Expected result construction is kind of tricky for all
1199-
# dtype-fill_value combinations; easiest to cast to something generic
1200-
# and except later on
1201-
rarr = arr.astype(object)
1202-
rarr[arr == 0] = np.nan
1203-
expected = pd.SparseDataFrame(rarr, index=index, columns=columns).fillna(
1204-
fill_value if fill_value is not None else np.nan)
1205-
1206-
# Assert frame is as expected
1207-
sdf_obj = sdf.astype(object)
1208-
tm.assert_sp_frame_equal(sdf_obj, expected)
1209-
tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())
1210-
1211-
# Assert spmatrices equal
1212-
assert dict(sdf.to_coo().todok()) == dict(spm.todok())
1213-
1214-
# Ensure dtype is preserved if possible
1215-
was_upcast = ((fill_value is None or is_float(fill_value)) and
1216-
not is_object_dtype(dtype) and
1217-
not is_float_dtype(dtype))
1218-
res_dtype = (bool if is_bool_dtype(dtype) else
1219-
float if was_upcast else
1220-
dtype)
1221-
tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)})
1222-
assert sdf.to_coo().dtype == res_dtype
1223-
1224-
# However, adding a str column results in an upcast to object
1225-
sdf['strings'] = np.arange(len(sdf)).astype(str)
1226-
assert sdf.to_coo().dtype == np.object_
1227-
1228-
1229-
@td.skip_if_no_scipy
1230-
@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) # noqa: F811
1231-
def test_from_to_scipy_object(spmatrix, fill_value):
1232-
# GH 4343
1233-
dtype = object
1234-
columns = list('cd')
1235-
index = list('ab')
1236-
import scipy
1237-
if (spmatrix is scipy.sparse.dok_matrix and LooseVersion(
1238-
scipy.__version__) >= LooseVersion('0.19.0')):
1239-
pytest.skip("dok_matrix from object does not work in SciPy >= 0.19")
1240-
1241-
# Make one ndarray and from it one sparse matrix, both to be used for
1242-
# constructing frames and comparing results
1243-
arr = np.eye(2, dtype=dtype)
1244-
try:
1245-
spm = spmatrix(arr)
1246-
assert spm.dtype == arr.dtype
1247-
except (TypeError, AssertionError):
1248-
# If conversion to sparse fails for this spmatrix type and arr.dtype,
1249-
# then the combination is not currently supported in NumPy, so we
1250-
# can just skip testing it thoroughly
1251-
return
1252-
1253-
sdf = pd.SparseDataFrame(spm, index=index, columns=columns,
1254-
default_fill_value=fill_value)
1255-
1256-
# Expected result construction is kind of tricky for all
1257-
# dtype-fill_value combinations; easiest to cast to something generic
1258-
# and except later on
1259-
rarr = arr.astype(object)
1260-
rarr[arr == 0] = np.nan
1261-
expected = pd.SparseDataFrame(rarr, index=index, columns=columns).fillna(
1262-
fill_value if fill_value is not None else np.nan)
1263-
1264-
# Assert frame is as expected
1265-
sdf_obj = sdf.astype(object)
1266-
tm.assert_sp_frame_equal(sdf_obj, expected)
1267-
tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())
1268-
1269-
# Assert spmatrices equal
1270-
assert dict(sdf.to_coo().todok()) == dict(spm.todok())
1271-
1272-
# Ensure dtype is preserved if possible
1273-
res_dtype = object
1274-
tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)})
1275-
assert sdf.to_coo().dtype == res_dtype
1276-
1277-
1278-
@td.skip_if_no_scipy
1279-
def test_from_scipy_correct_ordering(spmatrix):
1280-
# GH 16179
1281-
arr = np.arange(1, 5).reshape(2, 2)
1282-
try:
1283-
spm = spmatrix(arr)
1284-
assert spm.dtype == arr.dtype
1285-
except (TypeError, AssertionError):
1286-
# If conversion to sparse fails for this spmatrix type and arr.dtype,
1287-
# then the combination is not currently supported in NumPy, so we
1288-
# can just skip testing it thoroughly
1289-
return
1290-
1291-
sdf = pd.SparseDataFrame(spm)
1292-
expected = pd.SparseDataFrame(arr)
1293-
tm.assert_sp_frame_equal(sdf, expected)
1294-
tm.assert_frame_equal(sdf.to_dense(), expected.to_dense())
1295-
1296-
1297-
@td.skip_if_no_scipy
1298-
def test_from_scipy_fillna(spmatrix):
1299-
# GH 16112
1300-
arr = np.eye(3)
1301-
arr[1:, 0] = np.nan
1302-
1303-
try:
1304-
spm = spmatrix(arr)
1305-
assert spm.dtype == arr.dtype
1306-
except (TypeError, AssertionError):
1307-
# If conversion to sparse fails for this spmatrix type and arr.dtype,
1308-
# then the combination is not currently supported in NumPy, so we
1309-
# can just skip testing it thoroughly
1310-
return
1311-
1312-
sdf = pd.SparseDataFrame(spm).fillna(-1.0)
1313-
1314-
# Returning frame should fill all nan values with -1.0
1315-
expected = pd.SparseDataFrame({
1316-
0: pd.SparseSeries([1., -1, -1]),
1317-
1: pd.SparseSeries([np.nan, 1, np.nan]),
1318-
2: pd.SparseSeries([np.nan, np.nan, 1]),
1319-
}, default_fill_value=-1)
1320-
1321-
# fill_value is expected to be what .fillna() above was called with
1322-
# We don't use -1 as initial fill_value in expected SparseSeries
1323-
# construction because this way we obtain "compressed" SparseArrays,
1324-
# avoiding having to construct them ourselves
1325-
for col in expected:
1326-
expected[col].fill_value = -1
1327-
1328-
tm.assert_sp_frame_equal(sdf, expected)
1329-
1330-
13311167
class TestSparseDataFrameArithmetic(object):
13321168

13331169
def test_numeric_op_scalar(self):
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
import pytest
2+
import numpy as np
3+
from warnings import catch_warnings
4+
from pandas.util import testing as tm
5+
from pandas import SparseDataFrame, SparseSeries
6+
from distutils.version import LooseVersion
7+
from pandas.core.dtypes.common import (
8+
is_bool_dtype,
9+
is_float_dtype,
10+
is_object_dtype,
11+
is_float)
12+
13+
14+
scipy = pytest.importorskip('scipy')
15+
16+
17+
@pytest.mark.parametrize('index', [None, list('abc')]) # noqa: F811
18+
@pytest.mark.parametrize('columns', [None, list('def')])
19+
@pytest.mark.parametrize('fill_value', [None, 0, np.nan])
20+
@pytest.mark.parametrize('dtype', [bool, int, float, np.uint16])
21+
def test_from_to_scipy(spmatrix, index, columns, fill_value, dtype):
22+
# GH 4343
23+
# Make one ndarray and from it one sparse matrix, both to be used for
24+
# constructing frames and comparing results
25+
arr = np.eye(3, dtype=dtype)
26+
# GH 16179
27+
arr[0, 1] = dtype(2)
28+
try:
29+
spm = spmatrix(arr)
30+
assert spm.dtype == arr.dtype
31+
except (TypeError, AssertionError):
32+
# If conversion to sparse fails for this spmatrix type and arr.dtype,
33+
# then the combination is not currently supported in NumPy, so we
34+
# can just skip testing it thoroughly
35+
return
36+
37+
sdf = SparseDataFrame(spm, index=index, columns=columns,
38+
default_fill_value=fill_value)
39+
40+
# Expected result construction is kind of tricky for all
41+
# dtype-fill_value combinations; easiest to cast to something generic
42+
# and except later on
43+
rarr = arr.astype(object)
44+
rarr[arr == 0] = np.nan
45+
expected = SparseDataFrame(rarr, index=index, columns=columns).fillna(
46+
fill_value if fill_value is not None else np.nan)
47+
48+
# Assert frame is as expected
49+
sdf_obj = sdf.astype(object)
50+
tm.assert_sp_frame_equal(sdf_obj, expected)
51+
tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())
52+
53+
# Assert spmatrices equal
54+
assert dict(sdf.to_coo().todok()) == dict(spm.todok())
55+
56+
# Ensure dtype is preserved if possible
57+
was_upcast = ((fill_value is None or is_float(fill_value)) and
58+
not is_object_dtype(dtype) and
59+
not is_float_dtype(dtype))
60+
res_dtype = (bool if is_bool_dtype(dtype) else
61+
float if was_upcast else
62+
dtype)
63+
tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)})
64+
assert sdf.to_coo().dtype == res_dtype
65+
66+
# However, adding a str column results in an upcast to object
67+
sdf['strings'] = np.arange(len(sdf)).astype(str)
68+
assert sdf.to_coo().dtype == np.object_
69+
70+
71+
@pytest.mark.parametrize('fill_value', [None, 0, np.nan]) # noqa: F811
72+
def test_from_to_scipy_object(spmatrix, fill_value):
73+
# GH 4343
74+
dtype = object
75+
columns = list('cd')
76+
index = list('ab')
77+
78+
if (spmatrix is scipy.sparse.dok_matrix and LooseVersion(
79+
scipy.__version__) >= LooseVersion('0.19.0')):
80+
pytest.skip("dok_matrix from object does not work in SciPy >= 0.19")
81+
82+
# Make one ndarray and from it one sparse matrix, both to be used for
83+
# constructing frames and comparing results
84+
arr = np.eye(2, dtype=dtype)
85+
try:
86+
spm = spmatrix(arr)
87+
assert spm.dtype == arr.dtype
88+
except (TypeError, AssertionError):
89+
# If conversion to sparse fails for this spmatrix type and arr.dtype,
90+
# then the combination is not currently supported in NumPy, so we
91+
# can just skip testing it thoroughly
92+
return
93+
94+
sdf = SparseDataFrame(spm, index=index, columns=columns,
95+
default_fill_value=fill_value)
96+
97+
# Expected result construction is kind of tricky for all
98+
# dtype-fill_value combinations; easiest to cast to something generic
99+
# and except later on
100+
rarr = arr.astype(object)
101+
rarr[arr == 0] = np.nan
102+
expected = SparseDataFrame(rarr, index=index, columns=columns).fillna(
103+
fill_value if fill_value is not None else np.nan)
104+
105+
# Assert frame is as expected
106+
sdf_obj = sdf.astype(object)
107+
tm.assert_sp_frame_equal(sdf_obj, expected)
108+
tm.assert_frame_equal(sdf_obj.to_dense(), expected.to_dense())
109+
110+
# Assert spmatrices equal
111+
with catch_warnings(record=True):
112+
assert dict(sdf.to_coo().todok()) == dict(spm.todok())
113+
114+
# Ensure dtype is preserved if possible
115+
res_dtype = object
116+
tm.assert_contains_all(sdf.dtypes, {np.dtype(res_dtype)})
117+
assert sdf.to_coo().dtype == res_dtype
118+
119+
120+
def test_from_scipy_correct_ordering(spmatrix):
121+
# GH 16179
122+
arr = np.arange(1, 5).reshape(2, 2)
123+
try:
124+
spm = spmatrix(arr)
125+
assert spm.dtype == arr.dtype
126+
except (TypeError, AssertionError):
127+
# If conversion to sparse fails for this spmatrix type and arr.dtype,
128+
# then the combination is not currently supported in NumPy, so we
129+
# can just skip testing it thoroughly
130+
return
131+
132+
sdf = SparseDataFrame(spm)
133+
expected = SparseDataFrame(arr)
134+
tm.assert_sp_frame_equal(sdf, expected)
135+
tm.assert_frame_equal(sdf.to_dense(), expected.to_dense())
136+
137+
138+
def test_from_scipy_fillna(spmatrix):
139+
# GH 16112
140+
arr = np.eye(3)
141+
arr[1:, 0] = np.nan
142+
143+
try:
144+
spm = spmatrix(arr)
145+
assert spm.dtype == arr.dtype
146+
except (TypeError, AssertionError):
147+
# If conversion to sparse fails for this spmatrix type and arr.dtype,
148+
# then the combination is not currently supported in NumPy, so we
149+
# can just skip testing it thoroughly
150+
return
151+
152+
sdf = SparseDataFrame(spm).fillna(-1.0)
153+
154+
# Returning frame should fill all nan values with -1.0
155+
expected = SparseDataFrame({
156+
0: SparseSeries([1., -1, -1]),
157+
1: SparseSeries([np.nan, 1, np.nan]),
158+
2: SparseSeries([np.nan, np.nan, 1]),
159+
}, default_fill_value=-1)
160+
161+
# fill_value is expected to be what .fillna() above was called with
162+
# We don't use -1 as initial fill_value in expected SparseSeries
163+
# construction because this way we obtain "compressed" SparseArrays,
164+
# avoiding having to construct them ourselves
165+
for col in expected:
166+
expected[col].fill_value = -1
167+
168+
tm.assert_sp_frame_equal(sdf, expected)

0 commit comments

Comments
 (0)