Skip to content

Commit 92d708c

Browse files
committed
Split off duplicates/unique tests into separate file
1 parent c06442b commit 92d708c

File tree

2 files changed

+142
-126
lines changed

2 files changed

+142
-126
lines changed

pandas/tests/series/test_analytics.py

-126
Original file line numberDiff line numberDiff line change
@@ -907,105 +907,6 @@ def test_matmul(self):
907907
pytest.raises(Exception, a.dot, a.values[:3])
908908
pytest.raises(ValueError, a.dot, b.T)
909909

910-
def test_value_counts_nunique(self):
911-
912-
# basics.rst doc example
913-
series = Series(np.random.randn(500))
914-
series[20:500] = np.nan
915-
series[10:20] = 5000
916-
result = series.nunique()
917-
assert result == 11
918-
919-
# GH 18051
920-
s = pd.Series(pd.Categorical([]))
921-
assert s.nunique() == 0
922-
s = pd.Series(pd.Categorical([np.nan]))
923-
assert s.nunique() == 0
924-
925-
def test_unique(self):
926-
927-
# 714 also, dtype=float
928-
s = Series([1.2345] * 100)
929-
s[::2] = np.nan
930-
result = s.unique()
931-
assert len(result) == 2
932-
933-
s = Series([1.2345] * 100, dtype='f4')
934-
s[::2] = np.nan
935-
result = s.unique()
936-
assert len(result) == 2
937-
938-
# NAs in object arrays #714
939-
s = Series(['foo'] * 100, dtype='O')
940-
s[::2] = np.nan
941-
result = s.unique()
942-
assert len(result) == 2
943-
944-
# decision about None
945-
s = Series([1, 2, 3, None, None, None], dtype=object)
946-
result = s.unique()
947-
expected = np.array([1, 2, 3, None], dtype=object)
948-
tm.assert_numpy_array_equal(result, expected)
949-
950-
# GH 18051
951-
s = pd.Series(pd.Categorical([]))
952-
tm.assert_categorical_equal(s.unique(), pd.Categorical([]),
953-
check_dtype=False)
954-
s = pd.Series(pd.Categorical([np.nan]))
955-
tm.assert_categorical_equal(s.unique(), pd.Categorical([np.nan]),
956-
check_dtype=False)
957-
958-
@pytest.mark.parametrize(
959-
'keep, expected',
960-
[
961-
('first', Series([False, False, False, False, True, True, False])),
962-
('last', Series([False, True, True, False, False, False, False])),
963-
(False, Series([False, True, True, False, True, True, False]))
964-
])
965-
def test_drop_duplicates_non_bool(self, any_numpy_dtype, keep, expected):
966-
tc = Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype(any_numpy_dtype))
967-
968-
assert_series_equal(tc.duplicated(keep=keep), expected)
969-
assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected])
970-
sc = tc.copy()
971-
sc.drop_duplicates(keep=keep, inplace=True)
972-
assert_series_equal(sc, tc[~expected])
973-
974-
@pytest.mark.parametrize('keep, expected',
975-
[('first', Series([False, False, True, True])),
976-
('last', Series([True, True, False, False])),
977-
(False, Series([True, True, True, True]))])
978-
def test_drop_duplicates_bool(self, keep, expected):
979-
tc = Series([True, False, True, False])
980-
981-
assert_series_equal(tc.duplicated(keep=keep), expected)
982-
assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected])
983-
sc = tc.copy()
984-
sc.drop_duplicates(keep=keep, inplace=True)
985-
assert_series_equal(sc, tc[~expected])
986-
987-
@pytest.mark.parametrize('keep, expected', [
988-
('first', Series([False, False, True, False, True], name='name')),
989-
('last', Series([True, True, False, False, False], name='name')),
990-
(False, Series([True, True, True, False, True], name='name'))
991-
])
992-
def test_duplicated_keep(self, keep, expected):
993-
s = Series(['a', 'b', 'b', 'c', 'a'], name='name')
994-
995-
result = s.duplicated(keep=keep)
996-
tm.assert_series_equal(result, expected)
997-
998-
@pytest.mark.parametrize('keep, expected', [
999-
('first', Series([False, False, True, False, True])),
1000-
('last', Series([True, True, False, False, False])),
1001-
(False, Series([True, True, True, False, True]))
1002-
])
1003-
def test_duplicated_nan_none(self, keep, expected):
1004-
s = Series([np.nan, 3, 3, None, np.nan], dtype=object)
1005-
1006-
result = s.duplicated(keep=keep)
1007-
tm.assert_series_equal(result, expected)
1008-
1009910
def test_clip(self):
1010911
val = self.ts.median()
1011912

@@ -1419,10 +1320,6 @@ def test_empty_timeseries_redections_return_nat(self):
14191320
assert Series([], dtype=dtype).min() is pd.NaT
14201321
assert Series([], dtype=dtype).max() is pd.NaT
14211322

1422-
def test_unique_data_ownership(self):
1423-
# it works! #1807
1424-
Series(Series(["a", "c", "b"]).unique()).sort_values()
1425-
14261323
def test_repeat(self):
14271324
s = Series(np.random.randn(3), index=['a', 'b', 'c'])
14281325

@@ -1499,29 +1396,6 @@ def test_searchsorted_sorter(self):
14991396
e = np.array([0, 2], dtype=np.intp)
15001397
tm.assert_numpy_array_equal(r, e)
15011398

1502-
def test_is_unique(self):
1503-
# GH11946
1504-
s = Series(np.random.randint(0, 10, size=1000))
1505-
assert not s.is_unique
1506-
s = Series(np.arange(1000))
1507-
assert s.is_unique
1508-
1509-
def test_is_unique_class_ne(self, capsys):
1510-
# GH 20661
1511-
class Foo(object):
1512-
def __init__(self, val):
1513-
self._value = val
1514-
1515-
def __ne__(self, other):
1516-
raise Exception("NEQ not supported")
1517-
1518-
li = [Foo(i) for i in range(5)]
1519-
s = pd.Series(li, index=[i for i in range(5)])
1520-
_, err = capsys.readouterr()
1521-
s.is_unique
1522-
_, err = capsys.readouterr()
1523-
assert len(err) == 0
1524-
15251399
def test_is_monotonic(self):
15261400

15271401
s = Series(np.random.randint(0, 10, size=1000))
+142
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
# coding=utf-8
2+
# pylint: disable-msg=E1101,W0612
3+
4+
import pytest
5+
6+
import numpy as np
7+
import pandas as pd
8+
9+
from pandas import Series
10+
11+
from pandas.util.testing import assert_series_equal
12+
import pandas.util.testing as tm
13+
from .common import TestData
14+
15+
16+
class TestSeriesDuplicates(TestData):
17+
18+
def test_value_counts_nunique(self):
19+
20+
# basics.rst doc example
21+
series = Series(np.random.randn(500))
22+
series[20:500] = np.nan
23+
series[10:20] = 5000
24+
result = series.nunique()
25+
assert result == 11
26+
27+
# GH 18051
28+
s = pd.Series(pd.Categorical([]))
29+
assert s.nunique() == 0
30+
s = pd.Series(pd.Categorical([np.nan]))
31+
assert s.nunique() == 0
32+
33+
def test_unique(self):
34+
35+
# 714 also, dtype=float
36+
s = Series([1.2345] * 100)
37+
s[::2] = np.nan
38+
result = s.unique()
39+
assert len(result) == 2
40+
41+
s = Series([1.2345] * 100, dtype='f4')
42+
s[::2] = np.nan
43+
result = s.unique()
44+
assert len(result) == 2
45+
46+
# NAs in object arrays #714
47+
s = Series(['foo'] * 100, dtype='O')
48+
s[::2] = np.nan
49+
result = s.unique()
50+
assert len(result) == 2
51+
52+
# decision about None
53+
s = Series([1, 2, 3, None, None, None], dtype=object)
54+
result = s.unique()
55+
expected = np.array([1, 2, 3, None], dtype=object)
56+
tm.assert_numpy_array_equal(result, expected)
57+
58+
# GH 18051
59+
s = pd.Series(pd.Categorical([]))
60+
tm.assert_categorical_equal(s.unique(), pd.Categorical([]),
61+
check_dtype=False)
62+
s = pd.Series(pd.Categorical([np.nan]))
63+
tm.assert_categorical_equal(s.unique(), pd.Categorical([np.nan]),
64+
check_dtype=False)
65+
66+
def test_unique_data_ownership(self):
67+
# it works! #1807
68+
Series(Series(["a", "c", "b"]).unique()).sort_values()
69+
70+
def test_is_unique(self):
71+
# GH11946
72+
s = Series(np.random.randint(0, 10, size=1000))
73+
assert not s.is_unique
74+
s = Series(np.arange(1000))
75+
assert s.is_unique
76+
77+
def test_is_unique_class_ne(self, capsys):
78+
# GH 20661
79+
class Foo(object):
80+
def __init__(self, val):
81+
self._value = val
82+
83+
def __ne__(self, other):
84+
raise Exception("NEQ not supported")
85+
86+
li = [Foo(i) for i in range(5)]
87+
s = pd.Series(li, index=[i for i in range(5)])
88+
_, err = capsys.readouterr()
89+
s.is_unique
90+
_, err = capsys.readouterr()
91+
assert len(err) == 0
92+
93+
@pytest.mark.parametrize(
94+
'keep, expected',
95+
[
96+
('first', Series([False, False, False, False, True, True, False])),
97+
('last', Series([False, True, True, False, False, False, False])),
98+
(False, Series([False, True, True, False, True, True, False]))
99+
])
100+
def test_drop_duplicates_non_bool(self, any_numpy_dtype, keep, expected):
101+
tc = Series([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype(any_numpy_dtype))
102+
103+
assert_series_equal(tc.duplicated(keep=keep), expected)
104+
assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected])
105+
sc = tc.copy()
106+
sc.drop_duplicates(keep=keep, inplace=True)
107+
assert_series_equal(sc, tc[~expected])
108+
109+
@pytest.mark.parametrize('keep, expected',
110+
[('first', Series([False, False, True, True])),
111+
('last', Series([True, True, False, False])),
112+
(False, Series([True, True, True, True]))])
113+
def test_drop_duplicates_bool(self, keep, expected):
114+
tc = Series([True, False, True, False])
115+
116+
assert_series_equal(tc.duplicated(keep=keep), expected)
117+
assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected])
118+
sc = tc.copy()
119+
sc.drop_duplicates(keep=keep, inplace=True)
120+
assert_series_equal(sc, tc[~expected])
121+
122+
@pytest.mark.parametrize('keep, expected', [
123+
('first', Series([False, False, True, False, True], name='name')),
124+
('last', Series([True, True, False, False, False], name='name')),
125+
(False, Series([True, True, True, False, True], name='name'))
126+
])
127+
def test_duplicated_keep(self, keep, expected):
128+
s = Series(['a', 'b', 'b', 'c', 'a'], name='name')
129+
130+
result = s.duplicated(keep=keep)
131+
tm.assert_series_equal(result, expected)
132+
133+
@pytest.mark.parametrize('keep, expected', [
134+
('first', Series([False, False, True, False, True])),
135+
('last', Series([True, True, False, False, False])),
136+
(False, Series([True, True, True, False, True]))
137+
])
138+
def test_duplicated_nan_none(self, keep, expected):
139+
s = Series([np.nan, 3, 3, None, np.nan], dtype=object)
140+
141+
result = s.duplicated(keep=keep)
142+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)