From 28222cd07a56ba76e6ed8134da2e0176c1ad9809 Mon Sep 17 00:00:00 2001 From: Tuhin Mahmud Date: Sat, 10 Mar 2018 13:48:21 -0600 Subject: [PATCH 1/8] DOC:Improve the docstring of DataFrame.iloc() --- pandas/core/indexing.py | 32 ++++++++++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index ec2874b3bae95..75454834869fc 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1629,7 +1629,8 @@ def _getitem_axis(self, key, axis=None): class _iLocIndexer(_LocationIndexer): - """Purely integer-location based indexing for selection by position. + """ + Purely integer-location based indexing for selection by position. ``.iloc[]`` is primarily integer position based (from ``0`` to ``length-1`` of the axis), but may also be used with a boolean @@ -1648,8 +1649,35 @@ class _iLocIndexer(_LocationIndexer): out-of-bounds, except *slice* indexers which allow out-of-bounds indexing (this conforms with python/numpy *slice* semantics). - See more at :ref:`Selection by Position ` + See Also + -------- + DataFrame.ix : A primarily label-location based indexer, with integer position fallback. + DataFrame.loc : Fast integer location scalar accessor. + Examples + -------- + >>> import pandas as pd + >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4}, + ... {'a': 100, 'b': 200, 'c': 300, 'd': 400}, + ... {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000 }] + >>> df = pd.DataFrame(mydict) + >>> print(df.head()) + a b c d + 0 1 2 3 4 + 1 100 200 300 400 + 2 1000 2000 3000 4000 + >>> print(df.iloc[0]) + a 1 + b 2 + c 3 + d 4 + Name: 0, dtype: int64 + >>> print(df.iloc[0:2]) + a b c d + 0 1 2 3 4 + 1 100 200 300 400 + + ref:`Selection by Position ` """ _valid_types = ("integer, integer slice (START point is INCLUDED, END " From 9b103c1f0f7328cb0f8097103da604bb29a425e8 Mon Sep 17 00:00:00 2001 From: Tuhin Mahmud Date: Sat, 10 Mar 2018 14:08:23 -0600 Subject: [PATCH 2/8] DOC:Improve the docstring of DataFrame.iloc() --- pandas/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 75454834869fc..32494f9d9e17d 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1651,7 +1651,7 @@ class _iLocIndexer(_LocationIndexer): See Also -------- - DataFrame.ix : A primarily label-location based indexer, with integer position fallback. + DataFrame.ix : A primarily label-location based indexer DataFrame.loc : Fast integer location scalar accessor. Examples From 76271f6805e7dcf6e86c9d61f71cd473a016d528 Mon Sep 17 00:00:00 2001 From: Tuhin Mahmud Date: Sat, 10 Mar 2018 15:43:47 -0600 Subject: [PATCH 3/8] DOC:Improve the docstring of DataFrame.iloc() --- pandas/core/indexing.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 32494f9d9e17d..634e854292cf3 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1649,35 +1649,35 @@ class _iLocIndexer(_LocationIndexer): out-of-bounds, except *slice* indexers which allow out-of-bounds indexing (this conforms with python/numpy *slice* semantics). + ref:`Selection by Position ` + See Also -------- - DataFrame.ix : A primarily label-location based indexer - DataFrame.loc : Fast integer location scalar accessor. + DataFrame.iat : Fast integer location scalar accessor. + DataFrame.loc : Purely label-location based indexer for selection by label. Examples -------- >>> import pandas as pd >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4}, - ... {'a': 100, 'b': 200, 'c': 300, 'd': 400}, - ... {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000 }] + ... {'a': 100, 'b': 200, 'c': 300, 'd': 400}, + ... {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000 }] >>> df = pd.DataFrame(mydict) - >>> print(df.head()) + >>> df a b c d 0 1 2 3 4 1 100 200 300 400 2 1000 2000 3000 4000 - >>> print(df.iloc[0]) + >>> df.iloc[0] a 1 b 2 c 3 d 4 Name: 0, dtype: int64 - >>> print(df.iloc[0:2]) + >>> df.iloc[0:2] a b c d - 0 1 2 3 4 + 0 1 2 3 4 1 100 200 300 400 - - ref:`Selection by Position ` """ _valid_types = ("integer, integer slice (START point is INCLUDED, END " From 9cda098960a57885673f694fa6dbeb028745da0a Mon Sep 17 00:00:00 2001 From: Tuhin Mahmud Date: Sat, 10 Mar 2018 16:47:27 -0600 Subject: [PATCH 4/8] DOC:Improve the docstring of DataFrame.iloc() --- pandas/core/indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 634e854292cf3..54ea4bd6a4760 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1643,13 +1643,13 @@ class _iLocIndexer(_LocationIndexer): - A slice object with ints, e.g. ``1:7``. - A boolean array. - A ``callable`` function with one argument (the calling Series, DataFrame - or Panel) and that returns valid output for indexing (one of the above) + or Panel) and that returns valid output for indexing (one of the above). ``.iloc`` will raise ``IndexError`` if a requested indexer is out-of-bounds, except *slice* indexers which allow out-of-bounds indexing (this conforms with python/numpy *slice* semantics). - ref:`Selection by Position ` + ref:`Selection by Position `. See Also -------- From 1ede54b901b21fd53fe24203cfbfb1bf378e893e Mon Sep 17 00:00:00 2001 From: Tuhin Mahmud Date: Sun, 11 Mar 2018 13:23:11 -0500 Subject: [PATCH 5/8] DOC:Improve the docstring of DataFrame.iloc() added 5 types of examples for iloc * Select using integer.r * Select via index slicing. * Select using boolean array. * Select using callable function. * Multi index selection. Updated indentation removed import --- pandas/core/indexing.py | 64 +++++++++++++++++++++++++++++++++++------ 1 file changed, 56 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 54ea4bd6a4760..a0e570af636a7 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1655,29 +1655,77 @@ class _iLocIndexer(_LocationIndexer): -------- DataFrame.iat : Fast integer location scalar accessor. DataFrame.loc : Purely label-location based indexer for selection by label. + Series.iloc : Purely integer-location based indexing for + selection by position. Examples -------- - >>> import pandas as pd + >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4}, - ... {'a': 100, 'b': 200, 'c': 300, 'd': 400}, - ... {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000 }] - >>> df = pd.DataFrame(mydict) + ... {'a': 100, 'b': 200, 'c': 300, 'd': 400}, + ... {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000 }] + >>> df=pd.DataFrame(mydict) >>> df a b c d 0 1 2 3 4 1 100 200 300 400 2 1000 2000 3000 4000 + + Select using integer. + + >>> type(df.iloc[0]) + >>> df.iloc[0] a 1 b 2 c 3 d 4 Name: 0, dtype: int64 - >>> df.iloc[0:2] - a b c d - 0 1 2 3 4 - 1 100 200 300 400 + >>> type(df.iloc[[0]]) + + >>> df.iloc[[0]] + a b c d + 0 1 2 3 4 + + Multi index selection. + + >>> df.iloc[0,1] + 2 + + Select using list + + >>> df.iloc[[0,2],[1,3]] + b d + 0 2 4 + 2 2000 4000 + + Select via index slicing. + + >>> df.iloc[:3] + a b c d + 0 1 2 3 4 + 1 100 200 300 400 + 2 1000 2000 3000 4000 + >>> df.iloc[1:3, 0:3] + a b c + 1 100 200 300 + 2 1000 2000 3000 + + Select using boolean array. + + >>> df.iloc[:,[True,False,True,False]] + a c + 0 1 3 + 1 100 300 + 2 1000 3000 + + Select using callable function. + + >>> df.iloc[:, lambda df: [0, 2]] + a c + 0 1 3 + 1 100 300 + 2 1000 3000 """ _valid_types = ("integer, integer slice (START point is INCLUDED, END " From a72f864149c27dc669139b73b33dc280aa22d22a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 12 Mar 2018 10:13:54 -0500 Subject: [PATCH 6/8] Updated examples. --- pandas/core/indexing.py | 65 ++++++++++++++++++++++++++++++----------- 1 file changed, 48 insertions(+), 17 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index a0e570af636a7..ad42bdf4679bd 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1644,12 +1644,14 @@ class _iLocIndexer(_LocationIndexer): - A boolean array. - A ``callable`` function with one argument (the calling Series, DataFrame or Panel) and that returns valid output for indexing (one of the above). + This is useful in method chains, when you don't have a reference to the + calling object, but would like to base your selection on some value. ``.iloc`` will raise ``IndexError`` if a requested indexer is out-of-bounds, except *slice* indexers which allow out-of-bounds indexing (this conforms with python/numpy *slice* semantics). - ref:`Selection by Position `. + See more at ref:`Selection by Position `. See Also -------- @@ -1664,14 +1666,16 @@ class _iLocIndexer(_LocationIndexer): >>> mydict = [{'a': 1, 'b': 2, 'c': 3, 'd': 4}, ... {'a': 100, 'b': 200, 'c': 300, 'd': 400}, ... {'a': 1000, 'b': 2000, 'c': 3000, 'd': 4000 }] - >>> df=pd.DataFrame(mydict) + >>> df = pd.DataFrame(mydict) >>> df a b c d 0 1 2 3 4 1 100 200 300 400 2 1000 2000 3000 4000 - Select using integer. + **Indexing just the rows** + + With a scalar integer. >>> type(df.iloc[0]) @@ -1681,31 +1685,58 @@ class _iLocIndexer(_LocationIndexer): c 3 d 4 Name: 0, dtype: int64 - >>> type(df.iloc[[0]]) - + + With a list of integers. + >>> df.iloc[[0]] a b c d 0 1 2 3 4 + >>> type(df.iloc[[0]]) + - Multi index selection. + >>> df.iloc[[0, 1]] + a b c d + 0 1 2 3 4 + 1 100 200 300 400 - >>> df.iloc[0,1] - 2 + With a slice object. - Select using list + >>> df.iloc[:3] + a b c d + 0 1 2 3 4 + 1 100 200 300 400 + 2 1000 2000 3000 4000 - >>> df.iloc[[0,2],[1,3]] - b d - 0 2 4 - 2 2000 4000 + With a boolean mask the same length as the index. - Select via index slicing. + >>> df.iloc[[True, False, True]] + a b c d + 0 1 2 3 4 + 2 1000 2000 3000 4000 - >>> df.iloc[:3] + With a callable, useful in method chains. The `x` passed + to the ``lambda`` is the DataFrame being sliced. This selects + the rows whose index label even. + + >>> df.iloc[lambda x: x.index % 2 == 0] a b c d 0 1 2 3 4 - 1 100 200 300 400 2 1000 2000 3000 4000 + + **Indexing both axes** + + With scalars. + + >>> df.iloc[0, 1] + 2 + + With lists. + + >>> df.iloc[[0, 2], [1, 3]] + b d + 0 2 4 + 2 2000 4000 + >>> df.iloc[1:3, 0:3] a b c 1 100 200 300 @@ -1713,7 +1744,7 @@ class _iLocIndexer(_LocationIndexer): Select using boolean array. - >>> df.iloc[:,[True,False,True,False]] + >>> df.iloc[:, [True, False, True, False]] a c 0 1 3 1 100 300 From 3d10bd8b506297a1c5e5eef2c04e7127fb7fa952 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 12 Mar 2018 10:15:40 -0500 Subject: [PATCH 7/8] Consistency --- pandas/core/indexing.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index ad42bdf4679bd..c963b724d3116 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1725,12 +1725,12 @@ class _iLocIndexer(_LocationIndexer): **Indexing both axes** - With scalars. + With scalar integers. >>> df.iloc[0, 1] 2 - With lists. + With lists of integers. >>> df.iloc[[0, 2], [1, 3]] b d @@ -1742,7 +1742,7 @@ class _iLocIndexer(_LocationIndexer): 1 100 200 300 2 1000 2000 3000 - Select using boolean array. + With a boolean array whose length matches the columns. >>> df.iloc[:, [True, False, True, False]] a c @@ -1750,7 +1750,7 @@ class _iLocIndexer(_LocationIndexer): 1 100 300 2 1000 3000 - Select using callable function. + With a callable function that expects the Series or DataFrame. >>> df.iloc[:, lambda df: [0, 2]] a c From f553cebb5e4e9703c9dfd15631c5b6b4005c2cac Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 12 Mar 2018 10:21:00 -0500 Subject: [PATCH 8/8] Example updates --- pandas/core/indexing.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index c963b724d3116..05adee98f518e 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1699,7 +1699,7 @@ class _iLocIndexer(_LocationIndexer): 0 1 2 3 4 1 100 200 300 400 - With a slice object. + With a `slice` object. >>> df.iloc[:3] a b c d @@ -1725,6 +1725,9 @@ class _iLocIndexer(_LocationIndexer): **Indexing both axes** + You can mix the indexer types for the index and columns. Use ``:`` to + select the entire axis. + With scalar integers. >>> df.iloc[0, 1] @@ -1737,6 +1740,8 @@ class _iLocIndexer(_LocationIndexer): 0 2 4 2 2000 4000 + With `slice` objects. + >>> df.iloc[1:3, 0:3] a b c 1 100 200 300