diff --git a/ci/doctests.sh b/ci/doctests.sh index 2af5dbd26aeb1..2b5edc5aa1172 100755 --- a/ci/doctests.sh +++ b/ci/doctests.sh @@ -28,7 +28,7 @@ if [ "$DOCTEST" ]; then fi pytest --doctest-modules -v pandas/core/series.py \ - -k"-nlargest -nonzero -nsmallest -reindex -searchsorted -to_dict" + -k"-nonzero -reindex -searchsorted -to_dict" if [ $? -ne "0" ]; then RET=1 diff --git a/pandas/core/series.py b/pandas/core/series.py index a4d403e4bcd94..8ce58ed6f0554 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2743,17 +2743,20 @@ def nlargest(self, n=5, keep='first'): Parameters ---------- - n : int - Return this many descending sorted values - keep : {'first', 'last'}, default 'first' - Where there are duplicate values: - - ``first`` : take the first occurrence. - - ``last`` : take the last occurrence. + n : int, default 5 + Return this many descending sorted values. + keep : {'first', 'last', 'all'}, default 'first' + When there are duplicate values that cannot all fit in a + Series of `n` elements: + - ``first`` : take the first occurrences based on the index order + - ``last`` : take the last occurrences based on the index order + - ``all`` : keep all occurrences. This can result in a Series of + size larger than `n`. Returns ------- - top_n : Series - The n largest values in the Series, in sorted order + Series + The `n` largest values in the Series, sorted in decreasing order. Notes ----- @@ -2762,23 +2765,70 @@ def nlargest(self, n=5, keep='first'): See Also -------- - Series.nsmallest + Series.nsmallest: Get the `n` smallest elements. + Series.sort_values: Sort Series by values. + Series.head: Return the first `n` rows. Examples -------- - >>> s = pd.Series(np.random.randn(10**6)) - >>> s.nlargest(10) # only sorts up to the N requested - 219921 4.644710 - 82124 4.608745 - 421689 4.564644 - 425277 4.447014 - 718691 4.414137 - 43154 4.403520 - 283187 4.313922 - 595519 4.273635 - 503969 4.250236 - 121637 4.240952 - dtype: float64 + >>> countries_population = {"Italy": 59000000, "France": 65000000, + ... "Malta": 434000, "Maldives": 434000, + ... "Brunei": 434000, "Iceland": 337000, + ... "Nauru": 11300, "Tuvalu": 11300, + ... "Anguilla": 11300, "Monserat": 5200} + >>> s = pd.Series(countries_population) + >>> s + Italy 59000000 + France 65000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + Iceland 337000 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Monserat 5200 + dtype: int64 + + The `n` largest elements where ``n=5`` by default. + + >>> s.nlargest() + France 65000000 + Italy 59000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + dtype: int64 + + The `n` largest elements where ``n=3``. Default `keep` value is 'first' + so Malta will be kept. + + >>> s.nlargest(3) + France 65000000 + Italy 59000000 + Malta 434000 + dtype: int64 + + The `n` largest elements where ``n=3`` and keeping the last duplicates. + Brunei will be kept since it is the last with value 434000 based on + the index order. + + >>> s.nlargest(3, keep='last') + France 65000000 + Italy 59000000 + Brunei 434000 + dtype: int64 + + The `n` largest elements where ``n=3`` with all duplicates kept. Note + that the returned Series has five elements due to the three duplicates. + + >>> s.nlargest(3, keep='all') + France 65000000 + Italy 59000000 + Malta 434000 + Maldives 434000 + Brunei 434000 + dtype: int64 """ return algorithms.SelectNSeries(self, n=n, keep=keep).nlargest() @@ -2788,17 +2838,20 @@ def nsmallest(self, n=5, keep='first'): Parameters ---------- - n : int - Return this many ascending sorted values - keep : {'first', 'last'}, default 'first' - Where there are duplicate values: - - ``first`` : take the first occurrence. - - ``last`` : take the last occurrence. + n : int, default 5 + Return this many ascending sorted values. + keep : {'first', 'last', 'all'}, default 'first' + When there are duplicate values that cannot all fit in a + Series of `n` elements: + - ``first`` : take the first occurrences based on the index order + - ``last`` : take the last occurrences based on the index order + - ``all`` : keep all occurrences. This can result in a Series of + size larger than `n`. Returns ------- - bottom_n : Series - The n smallest values in the Series, in sorted order + Series + The `n` smallest values in the Series, sorted in increasing order. Notes ----- @@ -2807,23 +2860,69 @@ def nsmallest(self, n=5, keep='first'): See Also -------- - Series.nlargest + Series.nlargest: Get the `n` largest elements. + Series.sort_values: Sort Series by values. + Series.head: Return the first `n` rows. Examples -------- - >>> s = pd.Series(np.random.randn(10**6)) - >>> s.nsmallest(10) # only sorts up to the N requested - 288532 -4.954580 - 732345 -4.835960 - 64803 -4.812550 - 446457 -4.609998 - 501225 -4.483945 - 669476 -4.472935 - 973615 -4.401699 - 621279 -4.355126 - 773916 -4.347355 - 359919 -4.331927 - dtype: float64 + >>> countries_population = {"Italy": 59000000, "France": 65000000, + ... "Brunei": 434000, "Malta": 434000, + ... "Maldives": 434000, "Iceland": 337000, + ... "Nauru": 11300, "Tuvalu": 11300, + ... "Anguilla": 11300, "Monserat": 5200} + >>> s = pd.Series(countries_population) + >>> s + Italy 59000000 + France 65000000 + Brunei 434000 + Malta 434000 + Maldives 434000 + Iceland 337000 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Monserat 5200 + dtype: int64 + + The `n` largest elements where ``n=5`` by default. + + >>> s.nsmallest() + Monserat 5200 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + Iceland 337000 + dtype: int64 + + The `n` smallest elements where ``n=3``. Default `keep` value is + 'first' so Nauru and Tuvalu will be kept. + + >>> s.nsmallest(3) + Monserat 5200 + Nauru 11300 + Tuvalu 11300 + dtype: int64 + + The `n` smallest elements where ``n=3`` and keeping the last + duplicates. Anguilla and Tuvalu will be kept since they are the last + with value 11300 based on the index order. + + >>> s.nsmallest(3, keep='last') + Monserat 5200 + Anguilla 11300 + Tuvalu 11300 + dtype: int64 + + The `n` smallest elements where ``n=3`` with all duplicates kept. Note + that the returned Series has four elements due to the three duplicates. + + >>> s.nsmallest(3, keep='all') + Monserat 5200 + Nauru 11300 + Tuvalu 11300 + Anguilla 11300 + dtype: int64 """ return algorithms.SelectNSeries(self, n=n, keep=keep).nsmallest()