|
7 | 7 | import sys
|
8 | 8 | import pytest
|
9 | 9 |
|
| 10 | +from string import ascii_lowercase |
10 | 11 | from numpy import nan
|
11 | 12 | from numpy.random import randn
|
12 | 13 | import numpy as np
|
13 | 14 |
|
14 |
| -from pandas.compat import lrange |
| 15 | +from pandas.compat import lrange, product |
15 | 16 | from pandas import (compat, isnull, notnull, DataFrame, Series,
|
16 | 17 | MultiIndex, date_range, Timestamp)
|
17 | 18 | import pandas as pd
|
@@ -1120,73 +1121,6 @@ def __nonzero__(self):
|
1120 | 1121 | self.assertTrue(r1.all())
|
1121 | 1122 |
|
1122 | 1123 | # ----------------------------------------------------------------------
|
1123 |
| - # Top / bottom |
1124 |
| - |
1125 |
| - def test_nlargest(self): |
1126 |
| - # GH10393 |
1127 |
| - from string import ascii_lowercase |
1128 |
| - df = pd.DataFrame({'a': np.random.permutation(10), |
1129 |
| - 'b': list(ascii_lowercase[:10])}) |
1130 |
| - result = df.nlargest(5, 'a') |
1131 |
| - expected = df.sort_values('a', ascending=False).head(5) |
1132 |
| - tm.assert_frame_equal(result, expected) |
1133 |
| - |
1134 |
| - def test_nlargest_multiple_columns(self): |
1135 |
| - from string import ascii_lowercase |
1136 |
| - df = pd.DataFrame({'a': np.random.permutation(10), |
1137 |
| - 'b': list(ascii_lowercase[:10]), |
1138 |
| - 'c': np.random.permutation(10).astype('float64')}) |
1139 |
| - result = df.nlargest(5, ['a', 'b']) |
1140 |
| - expected = df.sort_values(['a', 'b'], ascending=False).head(5) |
1141 |
| - tm.assert_frame_equal(result, expected) |
1142 |
| - |
1143 |
| - def test_nsmallest(self): |
1144 |
| - from string import ascii_lowercase |
1145 |
| - df = pd.DataFrame({'a': np.random.permutation(10), |
1146 |
| - 'b': list(ascii_lowercase[:10])}) |
1147 |
| - result = df.nsmallest(5, 'a') |
1148 |
| - expected = df.sort_values('a').head(5) |
1149 |
| - tm.assert_frame_equal(result, expected) |
1150 |
| - |
1151 |
| - def test_nsmallest_multiple_columns(self): |
1152 |
| - from string import ascii_lowercase |
1153 |
| - df = pd.DataFrame({'a': np.random.permutation(10), |
1154 |
| - 'b': list(ascii_lowercase[:10]), |
1155 |
| - 'c': np.random.permutation(10).astype('float64')}) |
1156 |
| - result = df.nsmallest(5, ['a', 'c']) |
1157 |
| - expected = df.sort_values(['a', 'c']).head(5) |
1158 |
| - tm.assert_frame_equal(result, expected) |
1159 |
| - |
1160 |
| - def test_nsmallest_nlargest_duplicate_index(self): |
1161 |
| - # GH 13412 |
1162 |
| - df = pd.DataFrame({'a': [1, 2, 3, 4], |
1163 |
| - 'b': [4, 3, 2, 1], |
1164 |
| - 'c': [0, 1, 2, 3]}, |
1165 |
| - index=[0, 0, 1, 1]) |
1166 |
| - result = df.nsmallest(4, 'a') |
1167 |
| - expected = df.sort_values('a').head(4) |
1168 |
| - tm.assert_frame_equal(result, expected) |
1169 |
| - |
1170 |
| - result = df.nlargest(4, 'a') |
1171 |
| - expected = df.sort_values('a', ascending=False).head(4) |
1172 |
| - tm.assert_frame_equal(result, expected) |
1173 |
| - |
1174 |
| - result = df.nsmallest(4, ['a', 'c']) |
1175 |
| - expected = df.sort_values(['a', 'c']).head(4) |
1176 |
| - tm.assert_frame_equal(result, expected) |
1177 |
| - |
1178 |
| - result = df.nsmallest(4, ['c', 'a']) |
1179 |
| - expected = df.sort_values(['c', 'a']).head(4) |
1180 |
| - tm.assert_frame_equal(result, expected) |
1181 |
| - |
1182 |
| - result = df.nlargest(4, ['a', 'c']) |
1183 |
| - expected = df.sort_values(['a', 'c'], ascending=False).head(4) |
1184 |
| - tm.assert_frame_equal(result, expected) |
1185 |
| - |
1186 |
| - result = df.nlargest(4, ['c', 'a']) |
1187 |
| - expected = df.sort_values(['c', 'a'], ascending=False).head(4) |
1188 |
| - tm.assert_frame_equal(result, expected) |
1189 |
| - # ---------------------------------------------------------------------- |
1190 | 1124 | # Isin
|
1191 | 1125 |
|
1192 | 1126 | def test_isin(self):
|
@@ -1965,3 +1899,111 @@ def test_dot(self):
|
1965 | 1899 |
|
1966 | 1900 | with tm.assertRaisesRegexp(ValueError, 'aligned'):
|
1967 | 1901 | df.dot(df2)
|
| 1902 | + |
| 1903 | + |
| 1904 | +@pytest.fixture |
| 1905 | +def df_duplicates(): |
| 1906 | + return pd.DataFrame({'a': [1, 2, 3, 4, 4], |
| 1907 | + 'b': [1, 1, 1, 1, 1], |
| 1908 | + 'c': [0, 1, 2, 5, 4]}, |
| 1909 | + index=[0, 0, 1, 1, 1]) |
| 1910 | + |
| 1911 | + |
| 1912 | +@pytest.fixture |
| 1913 | +def df_strings(): |
| 1914 | + return pd.DataFrame({'a': np.random.permutation(10), |
| 1915 | + 'b': list(ascii_lowercase[:10]), |
| 1916 | + 'c': np.random.permutation(10).astype('float64')}) |
| 1917 | + |
| 1918 | + |
| 1919 | +class TestNLargestNSmallest(object): |
| 1920 | + |
| 1921 | + # ---------------------------------------------------------------------- |
| 1922 | + # Top / bottom |
| 1923 | + @pytest.mark.parametrize( |
| 1924 | + 'n, order', |
| 1925 | + product(range(1, 11), |
| 1926 | + [['a'], |
| 1927 | + ['c'], |
| 1928 | + ['a', 'b'], |
| 1929 | + ['a', 'c'], |
| 1930 | + ['b', 'a'], |
| 1931 | + ['b', 'c'], |
| 1932 | + ['a', 'b', 'c'], |
| 1933 | + ['c', 'a', 'b'], |
| 1934 | + ['c', 'b', 'a'], |
| 1935 | + ['b', 'c', 'a'], |
| 1936 | + ['b', 'a', 'c'], |
| 1937 | +
|
| 1938 | + # dups! |
| 1939 | + ['b', 'c', 'c'], |
| 1940 | +
|
| 1941 | + ])) |
| 1942 | + def test_n(self, df_strings, n, order): |
| 1943 | + # GH10393 |
| 1944 | + df = df_strings |
| 1945 | + |
| 1946 | + error_msg = ( |
| 1947 | + "'b' has dtype: object, cannot use method 'nsmallest' " |
| 1948 | + "with this dtype" |
| 1949 | + ) |
| 1950 | + if 'b' in order: |
| 1951 | + with pytest.raises(TypeError) as exception: |
| 1952 | + df.nsmallest(n, order) |
| 1953 | + assert exception.value, error_msg |
| 1954 | + else: |
| 1955 | + result = df.nsmallest(n, order) |
| 1956 | + expected = df.sort_values(order).head(n) |
| 1957 | + tm.assert_frame_equal(result, expected) |
| 1958 | + |
| 1959 | + if 'b' in order: |
| 1960 | + with pytest.raises(TypeError) as exception: |
| 1961 | + df.nsmallest(n, order) |
| 1962 | + assert exception.value, error_msg |
| 1963 | + else: |
| 1964 | + result = df.nlargest(n, order) |
| 1965 | + expected = df.sort_values(order, ascending=False).head(n) |
| 1966 | + tm.assert_frame_equal(result, expected) |
| 1967 | + |
| 1968 | + def test_n_error(self, df_strings): |
| 1969 | + # b alone raises a TypeError |
| 1970 | + df = df_strings |
| 1971 | + with pytest.raises(TypeError): |
| 1972 | + df.nsmallest(1, 'b') |
| 1973 | + with pytest.raises(TypeError): |
| 1974 | + df.nlargest(1, 'b') |
| 1975 | + |
| 1976 | + def test_n_identical_values(self): |
| 1977 | + # GH15297 |
| 1978 | + df = pd.DataFrame({'a': [1] * 5, 'b': [1, 2, 3, 4, 5]}) |
| 1979 | + |
| 1980 | + result = df.nlargest(3, 'a') |
| 1981 | + expected = pd.DataFrame( |
| 1982 | + {'a': [1] * 3, 'b': [1, 2, 3]}, index=[0, 1, 2] |
| 1983 | + ) |
| 1984 | + tm.assert_frame_equal(result, expected) |
| 1985 | + |
| 1986 | + result = df.nsmallest(3, 'a') |
| 1987 | + expected = pd.DataFrame({'a': [1] * 3, 'b': [1, 2, 3]}) |
| 1988 | + tm.assert_frame_equal(result, expected) |
| 1989 | + |
| 1990 | + @pytest.mark.parametrize( |
| 1991 | + 'n, order', |
| 1992 | + product([1, 2, 3, 4, 5], |
| 1993 | + [['a', 'b', 'c'], |
| 1994 | + ['c', 'b', 'a'], |
| 1995 | + ['a'], |
| 1996 | + ['b'], |
| 1997 | + ['a', 'b'], |
| 1998 | + ['c', 'b']])) |
| 1999 | + def test_n_duplicate_index(self, df_duplicates, n, order): |
| 2000 | + # GH 13412 |
| 2001 | + |
| 2002 | + df = df_duplicates |
| 2003 | + result = df.nsmallest(n, order) |
| 2004 | + expected = df.sort_values(order).head(n) |
| 2005 | + tm.assert_frame_equal(result, expected) |
| 2006 | + |
| 2007 | + result = df.nlargest(n, order) |
| 2008 | + expected = df.sort_values(order, ascending=False).head(n) |
| 2009 | + tm.assert_frame_equal(result, expected) |
0 commit comments