From 018276c1eaf7710f4df990467e5bacb8718c43ab Mon Sep 17 00:00:00 2001 From: Mabel Villalba Date: Sat, 10 Mar 2018 13:01:25 +0100 Subject: [PATCH 1/8] [WIP] DOC Fixes #8447 added examples --- pandas/plotting/_core.py | 110 ++++++++++++++++++++++++++++++++++----- 1 file changed, 96 insertions(+), 14 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 6c3d07124215b..b892cc0cd0713 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1995,52 +1995,134 @@ def plot_series(data, kind='line', ax=None, # Series unique _shared_docs['boxplot'] = """ - Make a box plot from DataFrame column optionally grouped by some columns or - other inputs + Make a box-and-whisker plot from DataFrame column optionally grouped + by some columns or other inputs. The box extends from the Q1 to Q3 + quartile values of the data, with a line at the median (Q2). + The whiskers extend from the edges of box to show the range of the data. + Flier points (outliers) are those past the end of the whiskers. + The position of the whiskers is set by default to 1.5 IQR (`whis=1.5``) + from the edge of the box. + + For further details see + Wikipedia's entry for `boxplot `_. Parameters ---------- - data : the pandas object holding the data column : column name or list of names, or vector - Can be any valid input to groupby + Can be any valid input to groupby. by : string or sequence - Column in the DataFrame to group by - ax : Matplotlib axes object, optional + Column in the DataFrame to groupby. + ax : Matplotlib axes object, (default `None`) + The matplotlib axes to be used by boxplot. fontsize : int or string + The font-size used by matplotlib. rot : label rotation angle + The rotation angle of labels. + grid : boolean( default `True`) + Setting this to True will show the grid. figsize : A tuple (width, height) in inches - grid : Setting this to True will show the grid + The size of the figure to create in inches by default. layout : tuple (optional) - (rows, columns) for the layout of the plot + Tuple (rows, columns) used for the layout of the plot. return_type : {None, 'axes', 'dict', 'both'}, default None The kind of object to return. The default is ``axes`` 'axes' returns the matplotlib axes the boxplot is drawn on; 'dict' returns a dictionary whose values are the matplotlib Lines of the boxplot; 'both' returns a namedtuple with the axes and dict. - When grouping with ``by``, a Series mapping columns to ``return_type`` is returned, unless ``return_type`` is None, in which case a NumPy array of axes is returned with the same shape as ``layout``. See the prose documentation for more. - - `**kwds` : Keyword Arguments + kwds : Keyword Arguments (optional) All other plotting keyword arguments to be passed to - matplotlib's boxplot function + matplotlib's function. Returns ------- lines : dict ax : matplotlib Axes - (ax, lines): namedtuple + (ax, lines): namedtuple + + See Also + -------- + matplotlib.pyplot.boxplot: Make a box and whisker plot. Notes ----- Use ``return_type='dict'`` when you want to tweak the appearance of the lines after plotting. In this case a dict containing the Lines making up the boxes, caps, fliers, medians, and whiskers is returned. - """ + Examples + -------- + .. plot:: + :context: close-figs + + >>> np.random.seed(1234) + + >>> df = pd.DataFrame({ + ... u'stratifying_var': np.random.uniform(0, 100, 20), + ... u'price': np.random.normal(100, 5, 20), + ... u'demand': np.random.normal(100, 10, 20)}) + + >>> df[u'quartiles'] = pd.qcut( + ... df[u'stratifying_var'], 4, + ... labels=[u'0-25%%', u'25-50%%', u'50-75%%', u'75-100%%']) + + >>> df + stratifying_var price demand quartiles + 0 19.151945 106.605791 108.416747 0-25%% + 1 62.210877 92.265472 123.909605 50-75%% + 2 43.772774 98.986768 100.761996 25-50%% + 3 78.535858 96.720153 94.335541 75-100%% + 4 77.997581 100.967107 100.361419 50-75%% + 5 27.259261 102.767195 79.250224 0-25%% + 6 27.646426 106.590758 102.477922 0-25%% + 7 80.187218 97.653474 91.028432 75-100%% + 8 95.813935 103.377770 98.632052 75-100%% + 9 87.593263 90.914864 100.182892 75-100%% + 10 35.781727 99.084457 107.554140 0-25%% + 11 50.099513 105.294846 102.152686 25-50%% + 12 68.346294 98.010799 108.410088 50-75%% + 13 71.270203 101.687188 85.541899 50-75%% + 14 37.025075 105.237893 85.980267 25-50%% + 15 56.119619 105.229691 98.990818 25-50%% + 16 50.308317 104.318586 94.517576 25-50%% + 17 1.376845 99.389542 98.553805 0-25%% + 18 77.282662 100.623565 103.540203 50-75%% + 19 88.264119 98.386026 99.644870 75-100%% + + To plot the boxplot of the ``demand`` just put: + + .. plot:: + :context: close-figs + + >>> boxplot = df.boxplot(column=u'demand', by=u'quartiles') + + Use ``grid=False`` to hide the grid: + + .. plot:: + :context: close-figs + + >>> boxplot = df.boxplot(column=u'demand', by=u'quartiles', grid=False) + + Optionally, the layout can be changed by setting ``layout=(rows, cols)``: + + .. plot:: + :context: close-figs + + >>> boxplot = df.boxplot(column=[u'price',u'demand'], + ... by=u'quartiles', layout=(1,2), + ... figsize=(8,5)) + + .. plot:: + :context: close-figs + + >>> boxplot = df.boxplot(column=[u'price',u'demand'], + ... by=u'quartiles', layout=(2,1), + ... figsize=(5,8)) + """ @Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) def boxplot(data, column=None, by=None, ax=None, fontsize=None, From f42600fe99ff0bb723ef7a0afca0a6727d7a406f Mon Sep 17 00:00:00 2001 From: Mabel Villalba Date: Mon, 12 Mar 2018 00:48:17 +0100 Subject: [PATCH 2/8] [WIP] DOC Fixes #8447 created new example and fixed issues --- pandas/plotting/_core.py | 168 +++++++++++++++++++++------------------ 1 file changed, 91 insertions(+), 77 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index b892cc0cd0713..a5de7922f0cc0 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1995,58 +1995,87 @@ def plot_series(data, kind='line', ax=None, # Series unique _shared_docs['boxplot'] = """ - Make a box-and-whisker plot from DataFrame column optionally grouped - by some columns or other inputs. The box extends from the Q1 to Q3 - quartile values of the data, with a line at the median (Q2). - The whiskers extend from the edges of box to show the range of the data. - Flier points (outliers) are those past the end of the whiskers. - The position of the whiskers is set by default to 1.5 IQR (`whis=1.5``) - from the edge of the box. + Make a box plot from DataFrame columns. + + Make a box-and-whisker plot from DataFrame columns optionally grouped + by some other columns. A box plot is a method for graphically depicting + groups of numerical data through their quartiles. + The box extends from the Q1 to Q3 quartile values of the data, + with a line at the median (Q2).The whiskers extend from the edges + of box to show the range of the data. The position of the whiskers + is set by default to 1.5*IQR (IQR = Q3 - Q1) from the edges of the box. + Outlier points are those past the end of the whiskers. For further details see - Wikipedia's entry for `boxplot `_. + Wikipedia's entry for `boxplot `_. Parameters ---------- - column : column name or list of names, or vector + column : str or list of str, optional + Column name or list of names, or vector. Can be any valid input to groupby. - by : string or sequence + by : str or array-like Column in the DataFrame to groupby. - ax : Matplotlib axes object, (default `None`) + ax : object of class matplotlib.axes.Axes, default `None` The matplotlib axes to be used by boxplot. - fontsize : int or string - The font-size used by matplotlib. - rot : label rotation angle - The rotation angle of labels. - grid : boolean( default `True`) + fontsize : float or str + Tick label font size in points or as a string (e.g., ‘large’) + (see `matplotlib.axes.Axes.tick_params + `_). + rot : int or float, default 0 + The rotation angle of labels (in degrees) + with respect to the screen coordinate sytem. + grid : boolean, default `True` Setting this to True will show the grid. figsize : A tuple (width, height) in inches - The size of the figure to create in inches by default. - layout : tuple (optional) - Tuple (rows, columns) used for the layout of the plot. - return_type : {None, 'axes', 'dict', 'both'}, default None - The kind of object to return. The default is ``axes`` - 'axes' returns the matplotlib axes the boxplot is drawn on; - 'dict' returns a dictionary whose values are the matplotlib - Lines of the boxplot; - 'both' returns a namedtuple with the axes and dict. - When grouping with ``by``, a Series mapping columns to ``return_type`` - is returned, unless ``return_type`` is None, in which case a NumPy - array of axes is returned with the same shape as ``layout``. - See the prose documentation for more. - kwds : Keyword Arguments (optional) + The size of the figure to create in matplotlib. + layout : tuple (rows, columns) (optional) + For example, (3, 5) will display the subplots + using 3 columns and 5 rows, starting from the top-left. + return_type : {None, 'axes', 'dict', 'both'}, default 'axes' + The kind of object to return. The default is ``axes``. + + * 'axes' returns the matplotlib axes the boxplot is drawn on. + * 'dict' returns a dictionary whose values are the matplotlib + Lines of the boxplot. + * 'both' returns a namedtuple with the axes and dict. + * when grouping with ``by``, a Series mapping columns to + ``return_type`` is returned (i.e. + ``df.boxplot(column=['Col1','Col2'], by='var',return_type='axes')`` + may return ``Series([AxesSubplot(..),AxesSubplot(..)], + index=['Col1','Col2'])``). + + If ``return_type`` is `None`, a NumPy array + of axes with the same shape as ``layout`` is returned + (i.e. ``df.boxplot(column=['Col1','Col2'], + by='var',return_type=None)`` may return a + ``array([, + ], + dtype=object)``). + **kwds : Keyword Arguments (optional) All other plotting keyword arguments to be passed to - matplotlib's function. + `matplotlib.pyplot.boxplot `_. Returns ------- - lines : dict - ax : matplotlib Axes - (ax, lines): namedtuple + result: + Options: + + * ax : object of class + matplotlib.axes.Axes (for ``return_type='axes'``) + * lines : dict (for ``return_type='dict'``) + * (ax, lines): namedtuple (for ``return_type='both'``) + * :class:`~pandas.Series` (for ``return_type != None`` + and data grouped with ``by``) + * :class:`~numpy.array` (for ``return_type=None`` + and data grouped with ``by``) See Also -------- matplotlib.pyplot.boxplot: Make a box and whisker plot. + matplotlib.pyplot.hist: Make a hsitogram. Notes ----- @@ -2056,72 +2085,57 @@ def plot_series(data, kind='line', ax=None, # Series unique Examples -------- + + Boxplots can be created for every column in the dataframe + by ``df.boxplot()`` or indicating the columns to be used: + .. plot:: :context: close-figs >>> np.random.seed(1234) + >>> df = pd.DataFrame(np.random.rand(10,4), + ... columns=['Col1', 'Col2', 'Col3', 'Col4']) + >>> boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3']) - >>> df = pd.DataFrame({ - ... u'stratifying_var': np.random.uniform(0, 100, 20), - ... u'price': np.random.normal(100, 5, 20), - ... u'demand': np.random.normal(100, 10, 20)}) - - >>> df[u'quartiles'] = pd.qcut( - ... df[u'stratifying_var'], 4, - ... labels=[u'0-25%%', u'25-50%%', u'50-75%%', u'75-100%%']) - - >>> df - stratifying_var price demand quartiles - 0 19.151945 106.605791 108.416747 0-25%% - 1 62.210877 92.265472 123.909605 50-75%% - 2 43.772774 98.986768 100.761996 25-50%% - 3 78.535858 96.720153 94.335541 75-100%% - 4 77.997581 100.967107 100.361419 50-75%% - 5 27.259261 102.767195 79.250224 0-25%% - 6 27.646426 106.590758 102.477922 0-25%% - 7 80.187218 97.653474 91.028432 75-100%% - 8 95.813935 103.377770 98.632052 75-100%% - 9 87.593263 90.914864 100.182892 75-100%% - 10 35.781727 99.084457 107.554140 0-25%% - 11 50.099513 105.294846 102.152686 25-50%% - 12 68.346294 98.010799 108.410088 50-75%% - 13 71.270203 101.687188 85.541899 50-75%% - 14 37.025075 105.237893 85.980267 25-50%% - 15 56.119619 105.229691 98.990818 25-50%% - 16 50.308317 104.318586 94.517576 25-50%% - 17 1.376845 99.389542 98.553805 0-25%% - 18 77.282662 100.623565 103.540203 50-75%% - 19 88.264119 98.386026 99.644870 75-100%% - - To plot the boxplot of the ``demand`` just put: + Boxplots of variables distributions grouped by a third variable values + can be created using the option ``by``. For instance: .. plot:: :context: close-figs - >>> boxplot = df.boxplot(column=u'demand', by=u'quartiles') + >>> df = pd.DataFrame(np.random.rand(10,2), columns=['Col1', 'Col2'] ) + >>> df['X'] = pd.Series(['A','A','A','A','A','B','B','B','B','B']) + >>> boxplot = df.boxplot(by='X') - Use ``grid=False`` to hide the grid: + A list of strings (i.e. ``['X','Y']``) containing can be passed to boxplot + in order to group the data by combination of the variables in the x-axis: .. plot:: :context: close-figs - >>> boxplot = df.boxplot(column=u'demand', by=u'quartiles', grid=False) + >>> df = pd.DataFrame(np.random.rand(10,3), + ... columns=['Col1', 'Col2', 'Col3']) + >>> df['X'] = pd.Series(['A','A','A','A','A','B','B','B','B','B']) + >>> df['Y'] = pd.Series(['A','B','A','B','A','B','A','B','A','B']) + >>> boxplot = df.boxplot(column=['Col1','Col2'], by=['X','Y']) - Optionally, the layout can be changed by setting ``layout=(rows, cols)``: + The layout of boxplot can be adjusted giving a tuple to ``layout``: .. plot:: :context: close-figs - >>> boxplot = df.boxplot(column=[u'price',u'demand'], - ... by=u'quartiles', layout=(1,2), - ... figsize=(8,5)) + >>> df = pd.DataFrame(np.random.rand(10,2), columns=['Col1', 'Col2']) + >>> df['X'] = pd.Series(['A','A','A','A','A','B','B','B','B','B']) + >>> boxplot = df.boxplot(by='X', layout=(2,1)) + + Additional formatting can be done to the boxplot, like suppressing the grid + (``grid=False``), rotating the labels in the x-axis (i.e. ``rot=45``) + or changing the fontsize (i.e. ``fontsize=15``): .. plot:: :context: close-figs - >>> boxplot = df.boxplot(column=[u'price',u'demand'], - ... by=u'quartiles', layout=(2,1), - ... figsize=(5,8)) + >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15) """ @Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) From e9afeb7c373f34d9e65c0c93303599eff3a75e2c Mon Sep 17 00:00:00 2001 From: Mabel Villalba Date: Sun, 18 Mar 2018 16:08:29 +0100 Subject: [PATCH 3/8] [WIP] DOC Fixes #8447 fixed issues and added examples for return_type --- pandas/plotting/_core.py | 120 ++++++++++++++++++++++++++++----------- 1 file changed, 88 insertions(+), 32 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index a5de7922f0cc0..31bb26b870b75 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2013,24 +2013,22 @@ def plot_series(data, kind='line', ax=None, # Series unique ---------- column : str or list of str, optional Column name or list of names, or vector. - Can be any valid input to groupby. - by : str or array-like - Column in the DataFrame to groupby. - ax : object of class matplotlib.axes.Axes, default `None` + Can be any valid input to :meth:`pandas.DataFrame.groupby`. + by : str or array-like, optional + Column in the DataFrame to :meth:`pandas.DataFrame.groupby`. + One box-plot will be done per value of columns in `by`. + ax : object of class matplotlib.axes.Axes, optional The matplotlib axes to be used by boxplot. fontsize : float or str - Tick label font size in points or as a string (e.g., ‘large’) - (see `matplotlib.axes.Axes.tick_params - `_). + Tick label font size in points or as a string (e.g., ‘large’). rot : int or float, default 0 The rotation angle of labels (in degrees) with respect to the screen coordinate sytem. - grid : boolean, default `True` + grid : boolean, default True Setting this to True will show the grid. figsize : A tuple (width, height) in inches The size of the figure to create in matplotlib. - layout : tuple (rows, columns) (optional) + layout : tuple (rows, columns), optional For example, (3, 5) will display the subplots using 3 columns and 5 rows, starting from the top-left. return_type : {None, 'axes', 'dict', 'both'}, default 'axes' @@ -2041,22 +2039,13 @@ def plot_series(data, kind='line', ax=None, # Series unique Lines of the boxplot. * 'both' returns a namedtuple with the axes and dict. * when grouping with ``by``, a Series mapping columns to - ``return_type`` is returned (i.e. - ``df.boxplot(column=['Col1','Col2'], by='var',return_type='axes')`` - may return ``Series([AxesSubplot(..),AxesSubplot(..)], - index=['Col1','Col2'])``). + ``return_type`` is returned. If ``return_type`` is `None`, a NumPy array - of axes with the same shape as ``layout`` is returned - (i.e. ``df.boxplot(column=['Col1','Col2'], - by='var',return_type=None)`` may return a - ``array([, - ], - dtype=object)``). - **kwds : Keyword Arguments (optional) + of axes with the same shape as ``layout`` is returned. + **kwds : Keyword Arguments, optional All other plotting keyword arguments to be passed to - `matplotlib.pyplot.boxplot `_. + :func:`matplotlib.pyplot.boxplot`. Returns ------- @@ -2074,8 +2063,8 @@ def plot_series(data, kind='line', ax=None, # Series unique See Also -------- - matplotlib.pyplot.boxplot: Make a box and whisker plot. - matplotlib.pyplot.hist: Make a hsitogram. + matplotlib.pyplot.boxplot : Make a box and whisker plot. + matplotlib.pyplot.hist : Make a histogram. Notes ----- @@ -2093,27 +2082,27 @@ def plot_series(data, kind='line', ax=None, # Series unique :context: close-figs >>> np.random.seed(1234) - >>> df = pd.DataFrame(np.random.rand(10,4), + >>> df = pd.DataFrame(np.random.randn(10,4), ... columns=['Col1', 'Col2', 'Col3', 'Col4']) >>> boxplot = df.boxplot(column=['Col1', 'Col2', 'Col3']) - Boxplots of variables distributions grouped by a third variable values - can be created using the option ``by``. For instance: + Boxplots of variables distributions grouped by the values of a third + variable can be created using the option ``by``. For instance: .. plot:: :context: close-figs - >>> df = pd.DataFrame(np.random.rand(10,2), columns=['Col1', 'Col2'] ) + >>> df = pd.DataFrame(np.random.randn(10,2), columns=['Col1', 'Col2'] ) >>> df['X'] = pd.Series(['A','A','A','A','A','B','B','B','B','B']) >>> boxplot = df.boxplot(by='X') - A list of strings (i.e. ``['X','Y']``) containing can be passed to boxplot + A list of strings (i.e. ``['X','Y']``) can be passed to boxplot in order to group the data by combination of the variables in the x-axis: .. plot:: :context: close-figs - >>> df = pd.DataFrame(np.random.rand(10,3), + >>> df = pd.DataFrame(np.random.randn(10,3), ... columns=['Col1', 'Col2', 'Col3']) >>> df['X'] = pd.Series(['A','A','A','A','A','B','B','B','B','B']) >>> df['Y'] = pd.Series(['A','B','A','B','A','B','A','B','A','B']) @@ -2124,7 +2113,7 @@ def plot_series(data, kind='line', ax=None, # Series unique .. plot:: :context: close-figs - >>> df = pd.DataFrame(np.random.rand(10,2), columns=['Col1', 'Col2']) + >>> df = pd.DataFrame(np.random.randn(10,2), columns=['Col1', 'Col2']) >>> df['X'] = pd.Series(['A','A','A','A','A','B','B','B','B','B']) >>> boxplot = df.boxplot(by='X', layout=(2,1)) @@ -2136,6 +2125,73 @@ def plot_series(data, kind='line', ax=None, # Series unique :context: close-figs >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15) + + + The parameter ``return_type`` can be used to select the type of element + returned by `boxplot`. When ``return_type='axes'`` is selected, + the matplotlib axes on which the boxplot is drawn are returned: + + >>> df.boxplot(column=['Col1','Col2'], return_type='axes') + + + If selecting ``return_type='dict'`` a dictionary containing the + lines is returned: + + >>> df.boxplot(column=['Col1','Col2'], return_type='dict') + {'boxes': [, + ], + 'caps': [, + , + , + ], + 'fliers': [, + ], + 'means': [], + 'medians': [, + ], + 'whiskers': [, + , + , + ]} + + If selecting ``return_type='both'``, a namedtuple with matplotlib axes and + Line objets is returned: + + >>> df.boxplot(column=['Col1','Col2'], return_type='both') + Boxplot(ax=, lines={'whiskers': + [, + , + , + ], + 'caps': [, + , + , + ], + 'boxes': [, + ], + 'medians': [, + ], + 'fliers': [, + ], 'means': []}) + + When grouping with ``by``, a Series mapping columns to ``return_type`` + is returned: + + + >>> df.boxplot(column=['Col1','Col2'], by='X', return_type='axes') + Col1 AxesSubplot(0.1,0.15;0.363636x0.75) + Col2 AxesSubplot(0.536364,0.15;0.363636x0.75) + dtype: object + + If ``return_type`` is `None`, a NumPy array of axes with the same shape + as ``layout`` is returned: + + >>> df.boxplot(column=['Col1','Col2'], by='X', return_type=None) + array([, + ], + dtype=object) + """ @Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) From 0c2398dd3dc4d01e84529d8b178806fdc67c802f Mon Sep 17 00:00:00 2001 From: Mabel Villalba Date: Wed, 21 Mar 2018 17:18:44 +0100 Subject: [PATCH 4/8] [WIP] DOC Fixes #8447 fixed issues and shortened the number of examples for return_type --- pandas/plotting/_core.py | 47 ++-------------------------------------- 1 file changed, 2 insertions(+), 45 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 31bb26b870b75..381f72e868c7c 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2020,7 +2020,7 @@ def plot_series(data, kind='line', ax=None, # Series unique ax : object of class matplotlib.axes.Axes, optional The matplotlib axes to be used by boxplot. fontsize : float or str - Tick label font size in points or as a string (e.g., ‘large’). + Tick label font size in points or as a string (e.g., `large`). rot : int or float, default 0 The rotation angle of labels (in degrees) with respect to the screen coordinate sytem. @@ -2126,7 +2126,6 @@ def plot_series(data, kind='line', ax=None, # Series unique >>> boxplot = df.boxplot(grid=False, rot=45, fontsize=15) - The parameter ``return_type`` can be used to select the type of element returned by `boxplot`. When ``return_type='axes'`` is selected, the matplotlib axes on which the boxplot is drawn are returned: @@ -2134,51 +2133,9 @@ def plot_series(data, kind='line', ax=None, # Series unique >>> df.boxplot(column=['Col1','Col2'], return_type='axes') - If selecting ``return_type='dict'`` a dictionary containing the - lines is returned: - - >>> df.boxplot(column=['Col1','Col2'], return_type='dict') - {'boxes': [, - ], - 'caps': [, - , - , - ], - 'fliers': [, - ], - 'means': [], - 'medians': [, - ], - 'whiskers': [, - , - , - ]} - - If selecting ``return_type='both'``, a namedtuple with matplotlib axes and - Line objets is returned: - - >>> df.boxplot(column=['Col1','Col2'], return_type='both') - Boxplot(ax=, lines={'whiskers': - [, - , - , - ], - 'caps': [, - , - , - ], - 'boxes': [, - ], - 'medians': [, - ], - 'fliers': [, - ], 'means': []}) - When grouping with ``by``, a Series mapping columns to ``return_type`` is returned: - >>> df.boxplot(column=['Col1','Col2'], by='X', return_type='axes') Col1 AxesSubplot(0.1,0.15;0.363636x0.75) Col2 AxesSubplot(0.536364,0.15;0.363636x0.75) @@ -2191,9 +2148,9 @@ def plot_series(data, kind='line', ax=None, # Series unique array([, ], dtype=object) - """ + @Appender(_shared_docs['boxplot'] % _shared_doc_kwargs) def boxplot(data, column=None, by=None, ax=None, fontsize=None, rot=0, grid=True, figsize=None, layout=None, return_type=None, From a723bf22547bab47c96a70d601c0002b22e7931a Mon Sep 17 00:00:00 2001 From: Mabel Villalba Date: Wed, 21 Mar 2018 17:47:20 +0100 Subject: [PATCH 5/8] [WIP] DOC Fixes #8447 Printing result types for different return_type values --- pandas/plotting/_core.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 381f72e868c7c..557cbf99cc7e6 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2130,24 +2130,25 @@ def plot_series(data, kind='line', ax=None, # Series unique returned by `boxplot`. When ``return_type='axes'`` is selected, the matplotlib axes on which the boxplot is drawn are returned: - >>> df.boxplot(column=['Col1','Col2'], return_type='axes') - + >>> boxplot = df.boxplot(column=['Col1','Col2'], return_type='axes') + >>> type(boxplot) + When grouping with ``by``, a Series mapping columns to ``return_type`` is returned: - >>> df.boxplot(column=['Col1','Col2'], by='X', return_type='axes') - Col1 AxesSubplot(0.1,0.15;0.363636x0.75) - Col2 AxesSubplot(0.536364,0.15;0.363636x0.75) - dtype: object + >>> boxplot = df.boxplot(column=['Col1','Col2'], by='X', + ... return_type='axes') + >>> type(boxplot) + If ``return_type`` is `None`, a NumPy array of axes with the same shape as ``layout`` is returned: - >>> df.boxplot(column=['Col1','Col2'], by='X', return_type=None) - array([, - ], - dtype=object) + >>> boxplot = df.boxplot(column=['Col1','Col2'], by='X', + ... return_type=None) + >>> type(boxplot) + """ From 8eccba13f77c78b1ad54b7b1becee666191d6b0b Mon Sep 17 00:00:00 2001 From: Mabel Villalba Date: Sun, 25 Mar 2018 09:52:45 +0200 Subject: [PATCH 6/8] [WIP] DOC Fixes #8447 Replaced matplotlib.hist by Series.hist and corrected format issues --- pandas/plotting/_core.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 557cbf99cc7e6..59630b830c016 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2001,9 +2001,9 @@ def plot_series(data, kind='line', ax=None, # Series unique by some other columns. A box plot is a method for graphically depicting groups of numerical data through their quartiles. The box extends from the Q1 to Q3 quartile values of the data, - with a line at the median (Q2).The whiskers extend from the edges + with a line at the median (Q2). The whiskers extend from the edges of box to show the range of the data. The position of the whiskers - is set by default to 1.5*IQR (IQR = Q3 - Q1) from the edges of the box. + is set by default to `1.5 * IQR (IQR = Q3 - Q1)` from the edges of the box. Outlier points are those past the end of the whiskers. For further details see @@ -2031,7 +2031,7 @@ def plot_series(data, kind='line', ax=None, # Series unique layout : tuple (rows, columns), optional For example, (3, 5) will display the subplots using 3 columns and 5 rows, starting from the top-left. - return_type : {None, 'axes', 'dict', 'both'}, default 'axes' + return_type : {'axes', 'dict', 'both'} or None, default 'axes' The kind of object to return. The default is ``axes``. * 'axes' returns the matplotlib axes the boxplot is drawn on. @@ -2049,22 +2049,23 @@ def plot_series(data, kind='line', ax=None, # Series unique Returns ------- - result: - Options: - - * ax : object of class - matplotlib.axes.Axes (for ``return_type='axes'``) - * lines : dict (for ``return_type='dict'``) - * (ax, lines): namedtuple (for ``return_type='both'``) - * :class:`~pandas.Series` (for ``return_type != None`` - and data grouped with ``by``) - * :class:`~numpy.array` (for ``return_type=None`` - and data grouped with ``by``) + result : + + The return type depends on the `return_type` parameter: + + * 'axes' : object of class matplotlib.axes.Axes + * 'dict' : dict of matplotlib.lines.Line2D objects + * 'both' : a nametuple with strucure (ax, lines) + + For data grouped with ``by``: + + * :class:`~pandas.Series` + * :class:`~numpy.array` (for ``return_type = None``) See Also -------- - matplotlib.pyplot.boxplot : Make a box and whisker plot. - matplotlib.pyplot.hist : Make a histogram. + Series.plot.hist: Make a histogram. + matplotlib.pyplot.boxplot : Matplotlib equivalent plot. Notes ----- From 487352bdf8b025cf68322b2744dbd69e941e0121 Mon Sep 17 00:00:00 2001 From: Mabel Villalba Date: Mon, 2 Apr 2018 15:30:02 +0200 Subject: [PATCH 7/8] [WIP] DOC Fixes #8447 Added a comma in the description --- pandas/plotting/_core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 59630b830c016..f57794cf1b396 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -1997,7 +1997,7 @@ def plot_series(data, kind='line', ax=None, # Series unique _shared_docs['boxplot'] = """ Make a box plot from DataFrame columns. - Make a box-and-whisker plot from DataFrame columns optionally grouped + Make a box-and-whisker plot from DataFrame columns, optionally grouped by some other columns. A box plot is a method for graphically depicting groups of numerical data through their quartiles. The box extends from the Q1 to Q3 quartile values of the data, From 39bb1668a6a1aee0a69ea65f68cb4e17c2f76046 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 2 Apr 2018 08:47:51 -0500 Subject: [PATCH 8/8] PEP8 in the examples --- pandas/plotting/_core.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index f57794cf1b396..b8485eed758a1 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2043,7 +2043,7 @@ def plot_series(data, kind='line', ax=None, # Series unique If ``return_type`` is `None`, a NumPy array of axes with the same shape as ``layout`` is returned. - **kwds : Keyword Arguments, optional + **kwds All other plotting keyword arguments to be passed to :func:`matplotlib.pyplot.boxplot`. @@ -2093,11 +2093,13 @@ def plot_series(data, kind='line', ax=None, # Series unique .. plot:: :context: close-figs - >>> df = pd.DataFrame(np.random.randn(10,2), columns=['Col1', 'Col2'] ) - >>> df['X'] = pd.Series(['A','A','A','A','A','B','B','B','B','B']) + >>> df = pd.DataFrame(np.random.randn(10, 2), + ... columns=['Col1', 'Col2']) + >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', + ... 'B', 'B', 'B', 'B', 'B']) >>> boxplot = df.boxplot(by='X') - A list of strings (i.e. ``['X','Y']``) can be passed to boxplot + A list of strings (i.e. ``['X', 'Y']``) can be passed to boxplot in order to group the data by combination of the variables in the x-axis: .. plot:: @@ -2105,18 +2107,19 @@ def plot_series(data, kind='line', ax=None, # Series unique >>> df = pd.DataFrame(np.random.randn(10,3), ... columns=['Col1', 'Col2', 'Col3']) - >>> df['X'] = pd.Series(['A','A','A','A','A','B','B','B','B','B']) - >>> df['Y'] = pd.Series(['A','B','A','B','A','B','A','B','A','B']) - >>> boxplot = df.boxplot(column=['Col1','Col2'], by=['X','Y']) + >>> df['X'] = pd.Series(['A', 'A', 'A', 'A', 'A', + ... 'B', 'B', 'B', 'B', 'B']) + >>> df['Y'] = pd.Series(['A', 'B', 'A', 'B', 'A', + ... 'B', 'A', 'B', 'A', 'B']) + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by=['X', 'Y']) The layout of boxplot can be adjusted giving a tuple to ``layout``: .. plot:: :context: close-figs - >>> df = pd.DataFrame(np.random.randn(10,2), columns=['Col1', 'Col2']) - >>> df['X'] = pd.Series(['A','A','A','A','A','B','B','B','B','B']) - >>> boxplot = df.boxplot(by='X', layout=(2,1)) + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', + ... layout=(2, 1)) Additional formatting can be done to the boxplot, like suppressing the grid (``grid=False``), rotating the labels in the x-axis (i.e. ``rot=45``) @@ -2138,7 +2141,7 @@ def plot_series(data, kind='line', ax=None, # Series unique When grouping with ``by``, a Series mapping columns to ``return_type`` is returned: - >>> boxplot = df.boxplot(column=['Col1','Col2'], by='X', + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', ... return_type='axes') >>> type(boxplot) @@ -2146,7 +2149,7 @@ def plot_series(data, kind='line', ax=None, # Series unique If ``return_type`` is `None`, a NumPy array of axes with the same shape as ``layout`` is returned: - >>> boxplot = df.boxplot(column=['Col1','Col2'], by='X', + >>> boxplot = df.boxplot(column=['Col1', 'Col2'], by='X', ... return_type=None) >>> type(boxplot)