@@ -4694,76 +4694,87 @@ def nlargest(self, n, columns, keep='first'):
4694
4694
4695
4695
Examples
4696
4696
--------
4697
- >>> df = pd.DataFrame({'a': [1, 10, 8, 11, 8, 2],
4698
- ... 'b': list('abdcef'),
4699
- ... 'c': [1.0, 2.0, np.nan, 3.0, 4.0, 9.0]})
4697
+ >>> df = pd.DataFrame({'population': [59000000, 65000000, 434000,
4698
+ ... 434000, 434000, 337000, 11300,
4699
+ ... 11300, 11300],
4700
+ ... 'GDP': [1937894, 2583560 , 12011, 4520, 12128,
4701
+ ... 17036, 182, 38, 311],
4702
+ ... 'alpha-2': ["IT", "FR", "MT", "MV", "BN",
4703
+ ... "IS", "NR", "TV", "AI"]},
4704
+ ... index=["Italy", "France", "Malta",
4705
+ ... "Maldives", "Brunei", "Iceland",
4706
+ ... "Nauru", "Tuvalu", "Anguilla"])
4700
4707
>>> df
4701
- a b c
4702
- 0 1 a 1.0
4703
- 1 10 b 2.0
4704
- 2 8 d NaN
4705
- 3 11 c 3.0
4706
- 4 8 e 4.0
4707
- 5 2 f 9.0
4708
+ population GDP alpha-2
4709
+ Italy 59000000 1937894 IT
4710
+ France 65000000 2583560 FR
4711
+ Malta 434000 12011 MT
4712
+ Maldives 434000 4520 MV
4713
+ Brunei 434000 12128 BN
4714
+ Iceland 337000 17036 IS
4715
+ Nauru 11300 182 NR
4716
+ Tuvalu 11300 38 TV
4717
+ Anguilla 11300 311 AI
4708
4718
4709
4719
In the following example, we will use ``nlargest`` to select the three
4710
- rows having the largest values in column "a ".
4720
+ rows having the largest values in column "population ".
4711
4721
4712
- >>> df.nlargest(3, 'a ')
4713
- a b c
4714
- 3 11 c 3.0
4715
- 1 10 b 2.0
4716
- 2 8 d NaN
4722
+ >>> df.nlargest(3, 'population ')
4723
+ population GDP alpha-2
4724
+ France 65000000 2583560 FR
4725
+ Italy 59000000 1937894 IT
4726
+ Malta 434000 12011 MT
4717
4727
4718
4728
When using ``keep='last'``, ties are resolved in reverse order:
4719
4729
4720
- >>> df.nlargest(3, 'a ', keep='last')
4721
- a b c
4722
- 3 11 c 3.0
4723
- 1 10 b 2.0
4724
- 4 8 e 4.0
4730
+ >>> df.nlargest(3, 'population ', keep='last')
4731
+ population GDP alpha-2
4732
+ France 65000000 2583560 FR
4733
+ Italy 59000000 1937894 IT
4734
+ Brunei 434000 12128 BN
4725
4735
4726
4736
When using ``keep='all'``, all duplicate items are maintained:
4727
4737
4728
- >>> df.nlargest(3, 'a', keep='all')
4729
- a b c
4730
- 3 11 c 3.0
4731
- 1 10 b 2.0
4732
- 2 8 d NaN
4733
- 4 8 e 4.0
4738
+ >>> df.nlargest(3, 'population', keep='all')
4739
+ population GDP alpha-2
4740
+ France 65000000 2583560 FR
4741
+ Italy 59000000 1937894 IT
4742
+ Malta 434000 12011 MT
4743
+ Maldives 434000 4520 MV
4744
+ Brunei 434000 12128 BN
4734
4745
4735
- To order by the largest values in column "a" and then "c", we can
4736
- specify multiple columns like in the next example.
4737
-
4738
- >>> df.nlargest(3, ['a', 'c'])
4739
- a b c
4740
- 4 8 e 4.0
4741
- 3 11 c 3.0
4742
- 1 10 b 2.0
4743
-
4744
- Attempting to use ``nlargest`` on non-numeric dtypes will raise a
4745
- ``TypeError``:
4746
-
4747
- >>> df.nlargest(3, 'b')
4746
+ To order by the largest values in column "population" and then "GDP",
4747
+ we can specify multiple columns like in the next example.
4748
4748
4749
- Traceback (most recent call last):
4750
- TypeError: Column 'b' has dtype object, cannot use method 'nlargest'
4749
+ >>> df.nlargest(3, ['population', 'GDP'])
4750
+ population GDP alpha-2
4751
+ France 65000000 2583560 FR
4752
+ Italy 59000000 1937894 IT
4753
+ Brunei 434000 12128 BN
4751
4754
"""
4752
4755
return algorithms .SelectNFrame (self ,
4753
4756
n = n ,
4754
4757
keep = keep ,
4755
4758
columns = columns ).nlargest ()
4756
4759
4757
4760
def nsmallest (self , n , columns , keep = 'first' ):
4758
- """Get the rows of a DataFrame sorted by the `n` smallest
4759
- values of `columns`.
4761
+ """
4762
+ Return the first `n` rows ordered by `columns` in ascending order.
4763
+
4764
+ Return the first `n` rows with the smallest values in `columns`, in
4765
+ ascending order. The columns that are not specified are returned as
4766
+ well, but not used for ordering.
4767
+
4768
+ This method is equivalent to
4769
+ ``df.sort_values(columns, ascending=True).head(n)``, but more
4770
+ performant.
4760
4771
4761
4772
Parameters
4762
4773
----------
4763
4774
n : int
4764
- Number of items to retrieve
4775
+ Number of items to retrieve.
4765
4776
columns : list or str
4766
- Column name or names to order by
4777
+ Column name or names to order by.
4767
4778
keep : {'first', 'last', 'all'}, default 'first'
4768
4779
Where there are duplicate values:
4769
4780
@@ -4778,62 +4789,70 @@ def nsmallest(self, n, columns, keep='first'):
4778
4789
-------
4779
4790
DataFrame
4780
4791
4792
+ See Also
4793
+ --------
4794
+ DataFrame.nlargest : Return the first `n` rows ordered by `columns` in
4795
+ descending order.
4796
+ DataFrame.sort_values : Sort DataFrame by the values.
4797
+ DataFrame.head : Return the first `n` rows without re-ordering.
4798
+
4781
4799
Examples
4782
4800
--------
4783
- >>> df = pd.DataFrame({'a': [1, 10, 8, 11, 8, 2],
4784
- ... 'b': list('abdcef'),
4785
- ... 'c': [1.0, 2.0, np.nan, 3.0, 4.0, 9.0]})
4801
+ >>> df = pd.DataFrame({'population': [59000000, 65000000, 434000,
4802
+ ... 434000, 434000, 337000, 11300,
4803
+ ... 11300, 11300],
4804
+ ... 'GDP': [1937894, 2583560 , 12011, 4520, 12128,
4805
+ ... 17036, 182, 38, 311],
4806
+ ... 'alpha-2': ["IT", "FR", "MT", "MV", "BN",
4807
+ ... "IS", "NR", "TV", "AI"]},
4808
+ ... index=["Italy", "France", "Malta",
4809
+ ... "Maldives", "Brunei", "Iceland",
4810
+ ... "Nauru", "Tuvalu", "Anguilla"])
4786
4811
>>> df
4787
- a b c
4788
- 0 1 a 1.0
4789
- 1 10 b 2.0
4790
- 2 8 d NaN
4791
- 3 11 c 3.0
4792
- 4 8 e 4.0
4793
- 5 2 f 9.0
4812
+ population GDP alpha-2
4813
+ Italy 59000000 1937894 IT
4814
+ France 65000000 2583560 FR
4815
+ Malta 434000 12011 MT
4816
+ Maldives 434000 4520 MV
4817
+ Brunei 434000 12128 BN
4818
+ Iceland 337000 17036 IS
4819
+ Nauru 11300 182 NR
4820
+ Tuvalu 11300 38 TV
4821
+ Anguilla 11300 311 AI
4794
4822
4795
4823
In the following example, we will use ``nsmallest`` to select the
4796
4824
three rows having the smallest values in column "a".
4797
4825
4798
- >>> df.nsmallest(3, 'a ')
4799
- a b c
4800
- 0 1 a 1.0
4801
- 5 2 f 9.0
4802
- 2 8 d NaN
4826
+ >>> df.nsmallest(3, 'population ')
4827
+ population GDP alpha-2
4828
+ Nauru 11300 182 NR
4829
+ Tuvalu 11300 38 TV
4830
+ Anguilla 11300 311 AI
4803
4831
4804
4832
When using ``keep='last'``, ties are resolved in reverse order:
4805
4833
4806
- >>> df.nsmallest(3, 'a ', keep='last')
4807
- a b c
4808
- 0 1 a 1.0
4809
- 5 2 f 9.0
4810
- 4 8 e 4.0
4834
+ >>> df.nsmallest(3, 'population ', keep='last')
4835
+ population GDP alpha-2
4836
+ Anguilla 11300 311 AI
4837
+ Tuvalu 11300 38 TV
4838
+ Nauru 11300 182 NR
4811
4839
4812
4840
When using ``keep='all'``, all duplicate items are maintained:
4813
4841
4814
- >>> df.nsmallest(3, 'a', keep='all')
4815
- a b c
4816
- 0 1 a 1.0
4817
- 5 2 f 9.0
4818
- 2 8 d NaN
4819
- 4 8 e 4.0
4842
+ >>> df.nsmallest(3, 'population', keep='all')
4843
+ population GDP alpha-2
4844
+ Nauru 11300 182 NR
4845
+ Tuvalu 11300 38 TV
4846
+ Anguilla 11300 311 AI
4820
4847
4821
4848
To order by the largest values in column "a" and then "c", we can
4822
4849
specify multiple columns like in the next example.
4823
4850
4824
- >>> df.nsmallest(3, ['a', 'c'])
4825
- a b c
4826
- 0 1 a 1.0
4827
- 5 2 f 9.0
4828
- 4 8 e 4.0
4829
-
4830
- Attempting to use ``nsmallest`` on non-numeric dtypes will raise a
4831
- ``TypeError``:
4832
-
4833
- >>> df.nsmallest(3, 'b')
4834
-
4835
- Traceback (most recent call last):
4836
- TypeError: Column 'b' has dtype object, cannot use method 'nsmallest'
4851
+ >>> df.nsmallest(3, ['population', 'GDP'])
4852
+ population GDP alpha-2
4853
+ Tuvalu 11300 38 TV
4854
+ Nauru 11300 182 NR
4855
+ Anguilla 11300 311 AI
4837
4856
"""
4838
4857
return algorithms .SelectNFrame (self ,
4839
4858
n = n ,
0 commit comments