Skip to content

Commit 1a71b94

Browse files
jbrockmendelAlexKirko
authored andcommitted
REF: method-specific tests for cov, corr, corrwith, count, round (pandas-dev#30437)
1 parent 3a30aff commit 1a71b94

File tree

8 files changed

+784
-732
lines changed

8 files changed

+784
-732
lines changed
+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
from pandas import DataFrame, Series
2+
import pandas.util.testing as tm
3+
4+
5+
class TestDataFrameCount:
6+
def test_count(self):
7+
# corner case
8+
frame = DataFrame()
9+
ct1 = frame.count(1)
10+
assert isinstance(ct1, Series)
11+
12+
ct2 = frame.count(0)
13+
assert isinstance(ct2, Series)
14+
15+
# GH#423
16+
df = DataFrame(index=range(10))
17+
result = df.count(1)
18+
expected = Series(0, index=df.index)
19+
tm.assert_series_equal(result, expected)
20+
21+
df = DataFrame(columns=range(10))
22+
result = df.count(0)
23+
expected = Series(0, index=df.columns)
24+
tm.assert_series_equal(result, expected)
25+
26+
df = DataFrame()
27+
result = df.count()
28+
expected = Series(0, index=[])
29+
tm.assert_series_equal(result, expected)
30+
31+
def test_count_objects(self, float_string_frame):
32+
dm = DataFrame(float_string_frame._series)
33+
df = DataFrame(float_string_frame._series)
34+
35+
tm.assert_series_equal(dm.count(), df.count())
36+
tm.assert_series_equal(dm.count(1), df.count(1))
+289
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,289 @@
1+
import warnings
2+
3+
import numpy as np
4+
import pytest
5+
6+
import pandas.util._test_decorators as td
7+
8+
import pandas as pd
9+
from pandas import DataFrame, Series, isna
10+
import pandas.util.testing as tm
11+
12+
13+
class TestDataFrameCov:
14+
def test_cov(self, float_frame, float_string_frame):
15+
# min_periods no NAs (corner case)
16+
expected = float_frame.cov()
17+
result = float_frame.cov(min_periods=len(float_frame))
18+
19+
tm.assert_frame_equal(expected, result)
20+
21+
result = float_frame.cov(min_periods=len(float_frame) + 1)
22+
assert isna(result.values).all()
23+
24+
# with NAs
25+
frame = float_frame.copy()
26+
frame["A"][:5] = np.nan
27+
frame["B"][5:10] = np.nan
28+
result = float_frame.cov(min_periods=len(float_frame) - 8)
29+
expected = float_frame.cov()
30+
expected.loc["A", "B"] = np.nan
31+
expected.loc["B", "A"] = np.nan
32+
33+
# regular
34+
float_frame["A"][:5] = np.nan
35+
float_frame["B"][:10] = np.nan
36+
cov = float_frame.cov()
37+
38+
tm.assert_almost_equal(cov["A"]["C"], float_frame["A"].cov(float_frame["C"]))
39+
40+
# exclude non-numeric types
41+
result = float_string_frame.cov()
42+
expected = float_string_frame.loc[:, ["A", "B", "C", "D"]].cov()
43+
tm.assert_frame_equal(result, expected)
44+
45+
# Single column frame
46+
df = DataFrame(np.linspace(0.0, 1.0, 10))
47+
result = df.cov()
48+
expected = DataFrame(
49+
np.cov(df.values.T).reshape((1, 1)), index=df.columns, columns=df.columns
50+
)
51+
tm.assert_frame_equal(result, expected)
52+
df.loc[0] = np.nan
53+
result = df.cov()
54+
expected = DataFrame(
55+
np.cov(df.values[1:].T).reshape((1, 1)),
56+
index=df.columns,
57+
columns=df.columns,
58+
)
59+
tm.assert_frame_equal(result, expected)
60+
61+
62+
class TestDataFrameCorr:
63+
# DataFrame.corr(), as opposed to DataFrame.corrwith
64+
65+
@staticmethod
66+
def _check_method(frame, method="pearson"):
67+
correls = frame.corr(method=method)
68+
expected = frame["A"].corr(frame["C"], method=method)
69+
tm.assert_almost_equal(correls["A"]["C"], expected)
70+
71+
@td.skip_if_no_scipy
72+
def test_corr_pearson(self, float_frame):
73+
float_frame["A"][:5] = np.nan
74+
float_frame["B"][5:10] = np.nan
75+
76+
self._check_method(float_frame, "pearson")
77+
78+
@td.skip_if_no_scipy
79+
def test_corr_kendall(self, float_frame):
80+
float_frame["A"][:5] = np.nan
81+
float_frame["B"][5:10] = np.nan
82+
83+
self._check_method(float_frame, "kendall")
84+
85+
@td.skip_if_no_scipy
86+
def test_corr_spearman(self, float_frame):
87+
float_frame["A"][:5] = np.nan
88+
float_frame["B"][5:10] = np.nan
89+
90+
self._check_method(float_frame, "spearman")
91+
92+
# ---------------------------------------------------------------------
93+
94+
@td.skip_if_no_scipy
95+
def test_corr_non_numeric(self, float_frame, float_string_frame):
96+
float_frame["A"][:5] = np.nan
97+
float_frame["B"][5:10] = np.nan
98+
99+
# exclude non-numeric types
100+
result = float_string_frame.corr()
101+
expected = float_string_frame.loc[:, ["A", "B", "C", "D"]].corr()
102+
tm.assert_frame_equal(result, expected)
103+
104+
@td.skip_if_no_scipy
105+
@pytest.mark.parametrize("meth", ["pearson", "kendall", "spearman"])
106+
def test_corr_nooverlap(self, meth):
107+
# nothing in common
108+
df = DataFrame(
109+
{
110+
"A": [1, 1.5, 1, np.nan, np.nan, np.nan],
111+
"B": [np.nan, np.nan, np.nan, 1, 1.5, 1],
112+
"C": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
113+
}
114+
)
115+
rs = df.corr(meth)
116+
assert isna(rs.loc["A", "B"])
117+
assert isna(rs.loc["B", "A"])
118+
assert rs.loc["A", "A"] == 1
119+
assert rs.loc["B", "B"] == 1
120+
assert isna(rs.loc["C", "C"])
121+
122+
@td.skip_if_no_scipy
123+
@pytest.mark.parametrize("meth", ["pearson", "spearman"])
124+
def test_corr_constant(self, meth):
125+
# constant --> all NA
126+
127+
df = DataFrame(
128+
{
129+
"A": [1, 1, 1, np.nan, np.nan, np.nan],
130+
"B": [np.nan, np.nan, np.nan, 1, 1, 1],
131+
}
132+
)
133+
rs = df.corr(meth)
134+
assert isna(rs.values).all()
135+
136+
@td.skip_if_no_scipy
137+
def test_corr_int_and_boolean(self):
138+
# when dtypes of pandas series are different
139+
# then ndarray will have dtype=object,
140+
# so it need to be properly handled
141+
df = DataFrame({"a": [True, False], "b": [1, 0]})
142+
143+
expected = DataFrame(np.ones((2, 2)), index=["a", "b"], columns=["a", "b"])
144+
for meth in ["pearson", "kendall", "spearman"]:
145+
146+
with warnings.catch_warnings(record=True):
147+
warnings.simplefilter("ignore", RuntimeWarning)
148+
result = df.corr(meth)
149+
tm.assert_frame_equal(result, expected)
150+
151+
def test_corr_cov_independent_index_column(self):
152+
# GH#14617
153+
df = pd.DataFrame(np.random.randn(4 * 10).reshape(10, 4), columns=list("abcd"))
154+
for method in ["cov", "corr"]:
155+
result = getattr(df, method)()
156+
assert result.index is not result.columns
157+
assert result.index.equals(result.columns)
158+
159+
def test_corr_invalid_method(self):
160+
# GH#22298
161+
df = pd.DataFrame(np.random.normal(size=(10, 2)))
162+
msg = "method must be either 'pearson', 'spearman', 'kendall', or a callable, "
163+
with pytest.raises(ValueError, match=msg):
164+
df.corr(method="____")
165+
166+
def test_corr_int(self):
167+
# dtypes other than float64 GH#1761
168+
df3 = DataFrame({"a": [1, 2, 3, 4], "b": [1, 2, 3, 4]})
169+
170+
df3.cov()
171+
df3.corr()
172+
173+
174+
class TestDataFrameCorrWith:
175+
def test_corrwith(self, datetime_frame):
176+
a = datetime_frame
177+
noise = Series(np.random.randn(len(a)), index=a.index)
178+
179+
b = datetime_frame.add(noise, axis=0)
180+
181+
# make sure order does not matter
182+
b = b.reindex(columns=b.columns[::-1], index=b.index[::-1][10:])
183+
del b["B"]
184+
185+
colcorr = a.corrwith(b, axis=0)
186+
tm.assert_almost_equal(colcorr["A"], a["A"].corr(b["A"]))
187+
188+
rowcorr = a.corrwith(b, axis=1)
189+
tm.assert_series_equal(rowcorr, a.T.corrwith(b.T, axis=0))
190+
191+
dropped = a.corrwith(b, axis=0, drop=True)
192+
tm.assert_almost_equal(dropped["A"], a["A"].corr(b["A"]))
193+
assert "B" not in dropped
194+
195+
dropped = a.corrwith(b, axis=1, drop=True)
196+
assert a.index[-1] not in dropped.index
197+
198+
# non time-series data
199+
index = ["a", "b", "c", "d", "e"]
200+
columns = ["one", "two", "three", "four"]
201+
df1 = DataFrame(np.random.randn(5, 4), index=index, columns=columns)
202+
df2 = DataFrame(np.random.randn(4, 4), index=index[:4], columns=columns)
203+
correls = df1.corrwith(df2, axis=1)
204+
for row in index[:4]:
205+
tm.assert_almost_equal(correls[row], df1.loc[row].corr(df2.loc[row]))
206+
207+
def test_corrwith_with_objects(self):
208+
df1 = tm.makeTimeDataFrame()
209+
df2 = tm.makeTimeDataFrame()
210+
cols = ["A", "B", "C", "D"]
211+
212+
df1["obj"] = "foo"
213+
df2["obj"] = "bar"
214+
215+
result = df1.corrwith(df2)
216+
expected = df1.loc[:, cols].corrwith(df2.loc[:, cols])
217+
tm.assert_series_equal(result, expected)
218+
219+
result = df1.corrwith(df2, axis=1)
220+
expected = df1.loc[:, cols].corrwith(df2.loc[:, cols], axis=1)
221+
tm.assert_series_equal(result, expected)
222+
223+
def test_corrwith_series(self, datetime_frame):
224+
result = datetime_frame.corrwith(datetime_frame["A"])
225+
expected = datetime_frame.apply(datetime_frame["A"].corr)
226+
227+
tm.assert_series_equal(result, expected)
228+
229+
def test_corrwith_matches_corrcoef(self):
230+
df1 = DataFrame(np.arange(10000), columns=["a"])
231+
df2 = DataFrame(np.arange(10000) ** 2, columns=["a"])
232+
c1 = df1.corrwith(df2)["a"]
233+
c2 = np.corrcoef(df1["a"], df2["a"])[0][1]
234+
235+
tm.assert_almost_equal(c1, c2)
236+
assert c1 < 1
237+
238+
def test_corrwith_mixed_dtypes(self):
239+
# GH#18570
240+
df = pd.DataFrame(
241+
{"a": [1, 4, 3, 2], "b": [4, 6, 7, 3], "c": ["a", "b", "c", "d"]}
242+
)
243+
s = pd.Series([0, 6, 7, 3])
244+
result = df.corrwith(s)
245+
corrs = [df["a"].corr(s), df["b"].corr(s)]
246+
expected = pd.Series(data=corrs, index=["a", "b"])
247+
tm.assert_series_equal(result, expected)
248+
249+
def test_corrwith_index_intersection(self):
250+
df1 = pd.DataFrame(np.random.random(size=(10, 2)), columns=["a", "b"])
251+
df2 = pd.DataFrame(np.random.random(size=(10, 3)), columns=["a", "b", "c"])
252+
253+
result = df1.corrwith(df2, drop=True).index.sort_values()
254+
expected = df1.columns.intersection(df2.columns).sort_values()
255+
tm.assert_index_equal(result, expected)
256+
257+
def test_corrwith_index_union(self):
258+
df1 = pd.DataFrame(np.random.random(size=(10, 2)), columns=["a", "b"])
259+
df2 = pd.DataFrame(np.random.random(size=(10, 3)), columns=["a", "b", "c"])
260+
261+
result = df1.corrwith(df2, drop=False).index.sort_values()
262+
expected = df1.columns.union(df2.columns).sort_values()
263+
tm.assert_index_equal(result, expected)
264+
265+
def test_corrwith_dup_cols(self):
266+
# GH#21925
267+
df1 = pd.DataFrame(np.vstack([np.arange(10)] * 3).T)
268+
df2 = df1.copy()
269+
df2 = pd.concat((df2, df2[0]), axis=1)
270+
271+
result = df1.corrwith(df2)
272+
expected = pd.Series(np.ones(4), index=[0, 0, 1, 2])
273+
tm.assert_series_equal(result, expected)
274+
275+
@td.skip_if_no_scipy
276+
def test_corrwith_spearman(self):
277+
# GH#21925
278+
df = pd.DataFrame(np.random.random(size=(100, 3)))
279+
result = df.corrwith(df ** 2, method="spearman")
280+
expected = Series(np.ones(len(result)))
281+
tm.assert_series_equal(result, expected)
282+
283+
@td.skip_if_no_scipy
284+
def test_corrwith_kendall(self):
285+
# GH#21925
286+
df = pd.DataFrame(np.random.random(size=(100, 3)))
287+
result = df.corrwith(df ** 2, method="kendall")
288+
expected = Series(np.ones(len(result)))
289+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)