From dc9aabfae7e9659bc08fa733b13c137fea817c2a Mon Sep 17 00:00:00 2001 From: BielStela Date: Sat, 10 Mar 2018 13:41:54 +0100 Subject: [PATCH 1/7] DOC: improved hexbin plot docstring --- pandas/plotting/_core.py | 53 ++++++++++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 98fdcf8f94ae0..4c9bd246b51d9 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2874,25 +2874,62 @@ def scatter(self, x, y, s=None, c=None, **kwds): def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwds): """ - Hexbin plot + Make hexagonal binning plots. + + Make an hexagonal binning plot of `x` versus `y`, where `x`, + `y` are 1-D sequences of the same length, `N`. If `C` is `None` + (the default), this is an histogram of the number of occurrences + of the observations at (x[i],y[i]). + + If `C` is specified, specifies values at given coordinates + (x[i],y[i]). These values are accumulated for each hexagonal + bin and then reduced according to `reduce_C_function`, + having as default + the numpy's mean function (np.mean). (If *C* is + specified, it must also be a 1-D sequence of the same length + as `x` and `y`.) Parameters ---------- - x, y : label or position, optional - Coordinates for each point. + x : label or position, optional + Coordinates for x point. + y : label or position, optional + Coordinates for y point. C : label or position, optional The value at each `(x, y)` point. - reduce_C_function : callable, optional + reduce_C_function : callable, optional, default `mean` Function of one argument that reduces all the values in a bin to a single number (e.g. `mean`, `max`, `sum`, `std`). - gridsize : int, optional - Number of bins. - `**kwds` : optional + gridsize : int, optional, default 100 + The number of hexagons in the x-direction. + The corresponding number of hexagons in the y-direction is + chosen in a way that the hexagons are approximately regular. + Alternatively, + gridsize can be a tuple with two elements specifying the number of + hexagons in the x-direction and the y-direction. + kwds : optional Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. Returns ------- - axes : matplotlib.AxesSubplot or np.array of them + axes : matplotlib.AxesSubplot or np.array of them. + + See Also + -------- + matplotlib.pyplot.hexbin : hexagonal binning plot using matplotlib. + + Examples + -------- + + .. plot:: + :context: close-figs + + >>> from sklearn.datasets import load_iris + >>> iris = load_iris() + >>> df = pd.DataFrame(iris.data, columns=iris.feature_names) + >>> hexbin = df.plot.hexbin(x='sepal length (cm)', + ... y='sepal width (cm)', + ... gridsize=10, cmap='viridis') """ if reduce_C_function is not None: kwds['reduce_C_function'] = reduce_C_function From 17d653b57af8f6adb5aab049acb36d1725403297 Mon Sep 17 00:00:00 2001 From: BielStela Date: Sun, 11 Mar 2018 14:40:27 +0100 Subject: [PATCH 2/7] minor fixes from comments and new example using np.random.randn() --- pandas/plotting/_core.py | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 4c9bd246b51d9..cd898ad1775e8 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2874,27 +2874,25 @@ def scatter(self, x, y, s=None, c=None, **kwds): def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwds): """ - Make hexagonal binning plots. + Make hexagonal binning plot. - Make an hexagonal binning plot of `x` versus `y`, where `x`, - `y` are 1-D sequences of the same length, `N`. If `C` is `None` + Make an hexagonal binning plot of `x` versus `y`. If `C` is `None` (the default), this is an histogram of the number of occurrences of the observations at (x[i],y[i]). If `C` is specified, specifies values at given coordinates (x[i],y[i]). These values are accumulated for each hexagonal bin and then reduced according to `reduce_C_function`, - having as default - the numpy's mean function (np.mean). (If *C* is - specified, it must also be a 1-D sequence of the same length - as `x` and `y`.) + having as default the numpy's mean function (np.mean). + (If `C` is specified, it must also be a 1-D sequence + of the same length as `x` and `y`.) Parameters ---------- - x : label or position, optional - Coordinates for x point. - y : label or position, optional - Coordinates for y point. + x : label or position + Coordinates for x points. + y : label or position + Coordinates for y points. C : label or position, optional The value at each `(x, y)` point. reduce_C_function : callable, optional, default `mean` @@ -2905,18 +2903,19 @@ def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, The corresponding number of hexagons in the y-direction is chosen in a way that the hexagons are approximately regular. Alternatively, - gridsize can be a tuple with two elements specifying the number of - hexagons in the x-direction and the y-direction. + gridsize can be a tuple with two elements specifying the number + of hexagons in the x-direction and the y-direction. kwds : optional Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. Returns ------- - axes : matplotlib.AxesSubplot or np.array of them. + axes : matplotlib.AxesSubplot. See Also -------- - matplotlib.pyplot.hexbin : hexagonal binning plot using matplotlib. + matplotlib.pyplot.hexbin : hexagonal binning plot using matplotlib, + the matplotlib function that is used under the hood. Examples -------- @@ -2924,12 +2923,10 @@ def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, .. plot:: :context: close-figs - >>> from sklearn.datasets import load_iris - >>> iris = load_iris() - >>> df = pd.DataFrame(iris.data, columns=iris.feature_names) - >>> hexbin = df.plot.hexbin(x='sepal length (cm)', - ... y='sepal width (cm)', - ... gridsize=10, cmap='viridis') + >>> n = 100000 + >>> df = pd.DataFrame({'x':np.random.randn(n), + ... 'y':np.random.randn(n)}) + >>> hexbin = df.plot.hexbin(x='x', y='y', cmap='viridis') """ if reduce_C_function is not None: kwds['reduce_C_function'] = reduce_C_function From c686467fc41d5e2a8905c1b1bcf877a807183ac0 Mon Sep 17 00:00:00 2001 From: BielStela Date: Wed, 14 Mar 2018 12:56:30 +0100 Subject: [PATCH 3/7] Minor fixes to hexbin docstring and new example using C parameter --- pandas/plotting/_core.py | 57 +++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index cd898ad1775e8..0f5696bea91d3 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2874,39 +2874,39 @@ def scatter(self, x, y, s=None, c=None, **kwds): def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwds): """ - Make hexagonal binning plot. + Generate an hexagonal binning plot. - Make an hexagonal binning plot of `x` versus `y`. If `C` is `None` + Generate an hexagonal binning plot of `x` versus `y`. If `C` is `None` (the default), this is an histogram of the number of occurrences - of the observations at (x[i],y[i]). + of the observations at ``(x[i], y[i])``. If `C` is specified, specifies values at given coordinates - (x[i],y[i]). These values are accumulated for each hexagonal + ``(x[i], y[i])``. These values are accumulated for each hexagonal bin and then reduced according to `reduce_C_function`, - having as default the numpy's mean function (np.mean). + having as default the numpy's mean function (:meth:`numpy.mean`). (If `C` is specified, it must also be a 1-D sequence of the same length as `x` and `y`.) Parameters ---------- - x : label or position - Coordinates for x points. - y : label or position - Coordinates for y points. - C : label or position, optional - The value at each `(x, y)` point. - reduce_C_function : callable, optional, default `mean` + x : int or str + The column label or position for x points. + y : int or str + The column label or position for y points. + C : int or str, optional + The column label or position for the value of `(x, y)` point. + reduce_C_function : callable, default `np.mean` Function of one argument that reduces all the values in a bin to - a single number (e.g. `mean`, `max`, `sum`, `std`). - gridsize : int, optional, default 100 + a single number (e.g. `np.mean`, `np.max`, `np.sum`, `np.std`). + gridsize : int or tuple of (int, int), optional, default 100 The number of hexagons in the x-direction. The corresponding number of hexagons in the y-direction is chosen in a way that the hexagons are approximately regular. Alternatively, gridsize can be a tuple with two elements specifying the number of hexagons in the x-direction and the y-direction. - kwds : optional - Keyword arguments to pass on to :py:meth:`pandas.DataFrame.plot`. + **kwds : optional + Additional keyword arguments are documented in :meth:`pandas.DataFrame.plot`. Returns ------- @@ -2915,18 +2915,39 @@ def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, See Also -------- matplotlib.pyplot.hexbin : hexagonal binning plot using matplotlib, - the matplotlib function that is used under the hood. + the matplotlib function that is used under the hood. Examples -------- + The following examples are generated with random data. .. plot:: :context: close-figs >>> n = 100000 + >>> # Make a dataframe with normal distributed data >>> df = pd.DataFrame({'x':np.random.randn(n), ... 'y':np.random.randn(n)}) - >>> hexbin = df.plot.hexbin(x='x', y='y', cmap='viridis') + >>> ax = df.plot.hexbin(x='x', y='y', cmap='inferno') + + The next example uses `C` and `np.sum` as `reduce_C_function`. + Note that `'observations'` values ranges from 1 to 5 but the result + plot shows values up to more than 25. This is because of the `reduce_C_function`. + + .. plot:: + :context: close-figs + + >>> n=500 + >>> df = pd.DataFrame({ + ... 'coord_x':np.random.uniform(-3, 3, size=n), + ... 'coord_y':np.random.uniform(30, 50, size=n), + ... 'observations':np.random.randint(1,5, size=n) + ... }) + >>> ax = df.plot.hexbin(x='coord_x', + ... y='coord_y', + ... C='observations', + ... reduce_C_function=np.sum, + ... gridsize=10) """ if reduce_C_function is not None: kwds['reduce_C_function'] = reduce_C_function From 697e9fa0f6c82a525f965782df0f6aa40028d37a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 14 Mar 2018 13:22:13 +0100 Subject: [PATCH 4/7] Update _core.py --- pandas/plotting/_core.py | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index de6a02ff834b5..3eb6bf2798670 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2962,19 +2962,20 @@ def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, reduce_C_function : callable, default `np.mean` Function of one argument that reduces all the values in a bin to a single number (e.g. `np.mean`, `np.max`, `np.sum`, `np.std`). - gridsize : int or tuple of (int, int), optional, default 100 + gridsize : int or tuple of (int, int), default 100 The number of hexagons in the x-direction. The corresponding number of hexagons in the y-direction is chosen in a way that the hexagons are approximately regular. - Alternatively, - gridsize can be a tuple with two elements specifying the number - of hexagons in the x-direction and the y-direction. - **kwds : optional - Additional keyword arguments are documented in :meth:`pandas.DataFrame.plot`. + Alternatively, gridsize can be a tuple with two elements + specifying the number of hexagons in the x-direction and the + y-direction. + **kwds + Additional keyword arguments are documented in + :meth:`pandas.DataFrame.plot`. Returns ------- - axes : matplotlib.AxesSubplot. + axes : matplotlib.AxesSubplot See Also -------- @@ -2990,22 +2991,23 @@ def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, >>> n = 100000 >>> # Make a dataframe with normal distributed data - >>> df = pd.DataFrame({'x':np.random.randn(n), - ... 'y':np.random.randn(n)}) + >>> df = pd.DataFrame({'x': np.random.randn(n), + ... 'y': np.random.randn(n)}) >>> ax = df.plot.hexbin(x='x', y='y', cmap='inferno') - + The next example uses `C` and `np.sum` as `reduce_C_function`. Note that `'observations'` values ranges from 1 to 5 but the result - plot shows values up to more than 25. This is because of the `reduce_C_function`. + plot shows values up to more than 25. This is because of the + `reduce_C_function`. .. plot:: :context: close-figs - >>> n=500 + >>> n = 500 >>> df = pd.DataFrame({ - ... 'coord_x':np.random.uniform(-3, 3, size=n), - ... 'coord_y':np.random.uniform(30, 50, size=n), - ... 'observations':np.random.randint(1,5, size=n) + ... 'coord_x': np.random.uniform(-3, 3, size=n), + ... 'coord_y': np.random.uniform(30, 50, size=n), + ... 'observations': np.random.randint(1,5, size=n) ... }) >>> ax = df.plot.hexbin(x='coord_x', ... y='coord_y', From b31b687e81fee226230ae9b3b127118d8dbaab90 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 14 Mar 2018 07:28:55 -0500 Subject: [PATCH 5/7] Grammar --- pandas/plotting/_core.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 3298b5b3d103b..36d9e84e0d406 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -3174,18 +3174,18 @@ def scatter(self, x, y, s=None, c=None, **kwds): def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwds): """ - Generate an hexagonal binning plot. + Generate a hexagonal binning plot. - Generate an hexagonal binning plot of `x` versus `y`. If `C` is `None` - (the default), this is an histogram of the number of occurrences + Generate a hexagonal binning plot of `x` versus `y`. If `C` is `None` + (the default), this is a histogram of the number of occurrences of the observations at ``(x[i], y[i])``. If `C` is specified, specifies values at given coordinates ``(x[i], y[i])``. These values are accumulated for each hexagonal bin and then reduced according to `reduce_C_function`, - having as default the numpy's mean function (:meth:`numpy.mean`). + which defaults to NumPy's mean function (:meth:`numpy.mean`). (If `C` is specified, it must also be a 1-D sequence - of the same length as `x` and `y`.) + of the same length as `x` and `y`, or a column label.) Parameters ---------- From 4d7b73cf52aac32fe58ad14b31e9857f30b645f9 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 14 Mar 2018 13:32:01 +0100 Subject: [PATCH 6/7] edit in plot (lower gridsize) --- pandas/plotting/_core.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 36d9e84e0d406..18fff62015980 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -3174,18 +3174,18 @@ def scatter(self, x, y, s=None, c=None, **kwds): def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwds): """ - Generate a hexagonal binning plot. + Generate an hexagonal binning plot. - Generate a hexagonal binning plot of `x` versus `y`. If `C` is `None` - (the default), this is a histogram of the number of occurrences + Generate an hexagonal binning plot of `x` versus `y`. If `C` is `None` + (the default), this is an histogram of the number of occurrences of the observations at ``(x[i], y[i])``. If `C` is specified, specifies values at given coordinates ``(x[i], y[i])``. These values are accumulated for each hexagonal bin and then reduced according to `reduce_C_function`, - which defaults to NumPy's mean function (:meth:`numpy.mean`). + having as default the numpy's mean function (:meth:`numpy.mean`). (If `C` is specified, it must also be a 1-D sequence - of the same length as `x` and `y`, or a column label.) + of the same length as `x` and `y`.) Parameters ---------- @@ -3225,11 +3225,11 @@ def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, .. plot:: :context: close-figs - >>> n = 100000 + >>> n = 10000 >>> # Make a dataframe with normal distributed data >>> df = pd.DataFrame({'x': np.random.randn(n), ... 'y': np.random.randn(n)}) - >>> ax = df.plot.hexbin(x='x', y='y', cmap='inferno') + >>> ax = df.plot.hexbin(x='x', y='y', gridsize=20) The next example uses `C` and `np.sum` as `reduce_C_function`. Note that `'observations'` values ranges from 1 to 5 but the result @@ -3249,7 +3249,8 @@ def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, ... y='coord_y', ... C='observations', ... reduce_C_function=np.sum, - ... gridsize=10) + ... gridsize=10, + ... cmap="viridis") """ if reduce_C_function is not None: kwds['reduce_C_function'] = reduce_C_function From 347a012ce51f9604dfb52f664592217b0260fd3b Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 14 Mar 2018 13:39:10 +0100 Subject: [PATCH 7/7] add back changes + changes for feedback --- pandas/plotting/_core.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 18fff62015980..0e28b1cbd1cad 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -3174,18 +3174,18 @@ def scatter(self, x, y, s=None, c=None, **kwds): def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, **kwds): """ - Generate an hexagonal binning plot. + Generate a hexagonal binning plot. - Generate an hexagonal binning plot of `x` versus `y`. If `C` is `None` - (the default), this is an histogram of the number of occurrences + Generate a hexagonal binning plot of `x` versus `y`. If `C` is `None` + (the default), this is a histogram of the number of occurrences of the observations at ``(x[i], y[i])``. If `C` is specified, specifies values at given coordinates ``(x[i], y[i])``. These values are accumulated for each hexagonal bin and then reduced according to `reduce_C_function`, - having as default the numpy's mean function (:meth:`numpy.mean`). + having as default the NumPy's mean function (:meth:`numpy.mean`). (If `C` is specified, it must also be a 1-D sequence - of the same length as `x` and `y`.) + of the same length as `x` and `y`, or a column label.) Parameters ---------- @@ -3211,22 +3211,24 @@ def hexbin(self, x, y, C=None, reduce_C_function=None, gridsize=None, Returns ------- - axes : matplotlib.AxesSubplot + matplotlib.AxesSubplot + The matplotlib ``Axes`` on which the hexbin is plotted. See Also -------- + DataFrame.plot : Make plots of a DataFrame. matplotlib.pyplot.hexbin : hexagonal binning plot using matplotlib, the matplotlib function that is used under the hood. Examples -------- - The following examples are generated with random data. + The following examples are generated with random data from + a normal distribution. .. plot:: :context: close-figs >>> n = 10000 - >>> # Make a dataframe with normal distributed data >>> df = pd.DataFrame({'x': np.random.randn(n), ... 'y': np.random.randn(n)}) >>> ax = df.plot.hexbin(x='x', y='y', gridsize=20)