From 565579b19d57fd24f15fb0d0017b952375be1780 Mon Sep 17 00:00:00 2001 From: Josiah Baker Date: Mon, 30 Sep 2019 11:47:50 -0400 Subject: [PATCH 1/7] fix PR09,PR08 docstring errors in pandas.plotting fixes the errors along with minor changes to standardize kwds -> **kwargs closes #28687 --- pandas/plotting/_misc.py | 93 +++++++++++++++++++--------------------- 1 file changed, 45 insertions(+), 48 deletions(-) diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index a8e86d9dfa997..76a1861ee0407 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -14,9 +14,9 @@ def table(ax, data, rowLabels=None, colLabels=None, **kwargs): ---------- ax : Matplotlib axes object data : DataFrame or Series - data for table contents - kwargs : keywords, optional - keyword arguments which passed to matplotlib.table.table. + Data for table contents. + **kwargs + Keyword arguments to be passed to matplotlib.table.table. If `rowLabels` or `colLabels` is not specified, data index or column name will be used. @@ -82,7 +82,7 @@ def scatter_matrix( density_kwds=None, hist_kwds=None, range_padding=0.05, - **kwds + **kwargs ): """ Draw a matrix of scatter plots. @@ -91,28 +91,26 @@ def scatter_matrix( ---------- frame : DataFrame alpha : float, optional - amount of transparency applied + Amount of transparency applied. figsize : (float,float), optional - a tuple (width, height) in inches + A tuple (width, height) in inches. ax : Matplotlib axis object, optional grid : bool, optional - setting this to True will show the grid + Setting this to True will show the grid. diagonal : {'hist', 'kde'} - pick between 'kde' and 'hist' for - either Kernel Density Estimation or Histogram - plot in the diagonal + Pick between 'kde' and 'hist' for either Kernel Density Estimation or + Histogram plot in the diagonal. marker : str, optional - Matplotlib marker type, default '.' - hist_kwds : other plotting keyword arguments - To be passed to hist function - density_kwds : other plotting keyword arguments - To be passed to kernel density estimate plot - range_padding : float, optional - relative extension of axis range in x and y - with respect to (x_max - x_min) or (y_max - y_min), - default 0.05 - kwds : other plotting keyword arguments - To be passed to scatter function + Matplotlib marker type, default '.'. + hist_kwds : keywords + Keyword arguments to be passed to hist function. + density_kwds : keywords + Keyword arguments to be passed to kernel density estimate plot. + range_padding : float, default 0.05 + Relative extension of axis range in x and y with respect to + (x_max - x_min) or (y_max - y_min). + **kwargs + Keyword arguments to be passed to scatter function. Returns ------- @@ -136,7 +134,7 @@ def scatter_matrix( density_kwds=density_kwds, hist_kwds=hist_kwds, range_padding=range_padding, - **kwds + **kwargs ) @@ -215,8 +213,8 @@ def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): @deprecate_kwarg(old_arg_name="data", new_arg_name="frame") def andrews_curves( - frame, class_column, ax=None, samples=200, color=None, colormap=None, **kwds -): + frame, class_column, ax=None, samples=200, color=None, colormap=None, + **kwargs): """ Generate a matplotlib plot of Andrews curves, for visualising clusters of multivariate data. @@ -233,17 +231,17 @@ def andrews_curves( Parameters ---------- frame : DataFrame - Data to be plotted, preferably normalized to (0.0, 1.0) + Data to be plotted, preferably normalized to (0.0, 1.0). class_column : Name of the column containing class names ax : matplotlib axes object, default None samples : Number of points to plot in each curve color : list or tuple, optional - Colors to use for the different classes + Colors to use for the different classes. colormap : str or matplotlib colormap object, default None Colormap to select colors from. If string, load colormap with that name from matplotlib. - kwds : keywords - Options to pass to matplotlib plotting method + **kwargs + Options to pass to matplotlib plotting method. Returns ------- @@ -257,7 +255,7 @@ def andrews_curves( samples=samples, color=color, colormap=colormap, - **kwds + **kwargs ) @@ -327,7 +325,7 @@ def parallel_coordinates( axvlines=True, axvlines_kwds=None, sort_labels=False, - **kwds + **kwargs ): """ Parallel coordinates plotting. @@ -336,30 +334,29 @@ def parallel_coordinates( ---------- frame : DataFrame class_column : str - Column name containing class names + Column name containing class names. cols : list, optional - A list of column names to use + A list of column names to use. ax : matplotlib.axis, optional - matplotlib axis object + Matplotlib axis object. color : list or tuple, optional - Colors to use for the different classes + Colors to use for the different classes. use_columns : bool, optional - If true, columns will be used as xticks + If true, columns will be used as xticks. xticks : list or tuple, optional - A list of values to use for xticks + A list of values to use for xticks. colormap : str or matplotlib colormap, default None Colormap to use for line colors. axvlines : bool, optional - If true, vertical lines will be added at each xtick + If true, vertical lines will be added at each xtick. axvlines_kwds : keywords, optional - Options to be passed to axvline method for vertical lines - sort_labels : bool, False - Sort class_column labels, useful when assigning colors + Options to be passed to axvline method for vertical lines. + sort_labels : bool, default False + Sort class_column labels, useful when assigning colors. .. versionadded:: 0.20.0 - - kwds : keywords - Options to pass to matplotlib plotting method + **kwargs + Options to pass to matplotlib plotting method. Returns ------- @@ -388,7 +385,7 @@ def parallel_coordinates( axvlines=axvlines, axvlines_kwds=axvlines_kwds, sort_labels=sort_labels, - **kwds + **kwargs ) @@ -411,7 +408,7 @@ def lag_plot(series, lag=1, ax=None, **kwds): return plot_backend.lag_plot(series=series, lag=lag, ax=ax, **kwds) -def autocorrelation_plot(series, ax=None, **kwds): +def autocorrelation_plot(series, ax=None, **kwargs): """ Autocorrelation plot for time series. @@ -419,15 +416,15 @@ def autocorrelation_plot(series, ax=None, **kwds): ---------- series : Time series ax : Matplotlib axis object, optional - kwds : keywords - Options to pass to matplotlib plotting method + **kwargs + Options to pass to matplotlib plotting method. Returns ------- class:`matplotlib.axis.Axes` """ plot_backend = _get_plot_backend("matplotlib") - return plot_backend.autocorrelation_plot(series=series, ax=ax, **kwds) + return plot_backend.autocorrelation_plot(series=series, ax=ax, **kwargs) def tsplot(series, plotf, ax=None, **kwargs): From ad3280e25db55fc6af9b5527a57829f151b994f1 Mon Sep 17 00:00:00 2001 From: Jack Bicknell Date: Mon, 30 Sep 2019 17:22:41 +0100 Subject: [PATCH 2/7] DOC: Fixed PR08 docstring errors in pandas.DataFrame (#28655) --- pandas/core/frame.py | 32 ++++++++++++++++++++------------ pandas/core/generic.py | 22 +++++++++++----------- pandas/plotting/_core.py | 4 +++- 3 files changed, 34 insertions(+), 24 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e4a44a89998e3..16f34fee5e1ff 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2066,7 +2066,7 @@ def to_feather(self, fname): Parameters ---------- fname : str - string file path + String file path. """ from pandas.io.feather_format import to_feather @@ -4772,6 +4772,7 @@ def drop_duplicates(self, subset=None, keep="first", inplace=False): Only consider certain columns for identifying duplicates, by default use all of the columns keep : {'first', 'last', False}, default 'first' + Determines which duplicates (if any) to keep. - ``first`` : Drop duplicates except for the first occurrence. - ``last`` : Drop duplicates except for the last occurrence. - False : Drop all duplicates. @@ -4806,10 +4807,10 @@ def duplicated(self, subset=None, keep="first"): Only consider certain columns for identifying duplicates, by default use all of the columns keep : {'first', 'last', False}, default 'first' - - ``first`` : Mark duplicates as ``True`` except for the - first occurrence. - - ``last`` : Mark duplicates as ``True`` except for the - last occurrence. + Determines which duplicates (if any) to mark. + + - ``first`` : Mark duplicates as ``True`` except for the first occurrence. + - ``last`` : Mark duplicates as ``True`` except for the last occurrence. - False : Mark all duplicates as ``True``. Returns @@ -6233,8 +6234,8 @@ def unstack(self, level=-1, fill_value=None): ---------- level : int, str, or list of these, default -1 (last level) Level(s) of index to unstack, can pass level name - fill_value : replace NaN with this value if the unstack produces - missing values + fill_value : int, string or dict + Replace NaN with this value if the unstack produces missing values Returns ------- @@ -6665,6 +6666,8 @@ def apply( by result_type='broadcast'. raw : bool, default False + Determines if row or column is passed as a Series or ndarry object: + * ``False`` : passes each row or column as a Series to the function. * ``True`` : the passed function will receive ndarray objects @@ -7357,6 +7360,8 @@ def corr(self, method="pearson", min_periods=1): Parameters ---------- method : {'pearson', 'kendall', 'spearman'} or callable + Method of correlation: + * pearson : standard correlation coefficient * kendall : Kendall Tau correlation coefficient * spearman : Spearman rank correlation @@ -7556,10 +7561,13 @@ def corrwith(self, other, axis=0, drop=False, method="pearson"): other : DataFrame, Series Object with which to compute correlations. axis : {0 or 'index', 1 or 'columns'}, default 0 - 0 or 'index' to compute column-wise, 1 or 'columns' for row-wise. + The axis to use. 0 or 'index' to compute column-wise, 1 or 'columns' for + row-wise. drop : bool, default False Drop missing indices from result. method : {'pearson', 'kendall', 'spearman'} or callable + Method of correlation: + * pearson : standard correlation coefficient * kendall : Kendall Tau correlation coefficient * spearman : Spearman rank correlation @@ -7939,8 +7947,8 @@ def idxmin(self, axis=0, skipna=True): Parameters ---------- axis : {0 or 'index', 1 or 'columns'}, default 0 - 0 or 'index' for row-wise, 1 or 'columns' for column-wise - skipna : bool, default True + The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise + skipna : boolean, default True Exclude NA/null values. If an entire row/column is NA, the result will be NA. @@ -7976,8 +7984,8 @@ def idxmax(self, axis=0, skipna=True): Parameters ---------- axis : {0 or 'index', 1 or 'columns'}, default 0 - 0 or 'index' for row-wise, 1 or 'columns' for column-wise - skipna : bool, default True + The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise + skipna : boolean, default True Exclude NA/null values. If an entire row/column is NA, the result will be NA. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index a3b9bec494854..cb21588c8ba1a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2559,10 +2559,10 @@ def to_msgpack(self, path_or_buf=None, encoding="utf-8", **kwargs): path : str, buffer-like, or None Destination for the serialized object. If None, return generated bytes - append : bool whether to append to an existing msgpack - (default is False) - compress : type of compressor (zlib or blosc), default to None (no - compression) + append : bool, default False + Whether to append to an existing msgpack. + compress : str, default None + Type of compressor (zlib, blosc or None). Returns ------- @@ -2797,10 +2797,10 @@ def to_clipboard(self, excel=True, sep=None, **kwargs): Parameters ---------- excel : bool, default True - - True, use the provided separator, writing in a csv format for - allowing easy pasting into excel. - - False, write a string representation of the object to the - clipboard. + Produce output in a csv format for easy pasting into excel. + + - True, use the provided separator for csv pasting. + - False, write a string representation of the object to the clipboard. sep : str, default ``'\t'`` Field delimiter. @@ -5024,15 +5024,15 @@ def sample( Parameters ---------- func : function - function to apply to the %(klass)s. + Function to apply to the %(klass)s. ``args``, and ``kwargs`` are passed into ``func``. Alternatively a ``(callable, data_keyword)`` tuple where ``data_keyword`` is a string indicating the keyword of ``callable`` that expects the %(klass)s. args : iterable, optional - positional arguments passed into ``func``. + Positional arguments passed into ``func``. kwargs : mapping, optional - a dictionary of keyword arguments passed into ``func``. + A dictionary of keyword arguments passed into ``func``. Returns ------- diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 8724382d9ec55..966a18e11a620 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -514,6 +514,8 @@ class PlotAccessor(PandasObject): Allows plotting of one column versus another. Only used if data is a DataFrame. kind : str + The kind of plot to produce: + - 'line' : line plot (default) - 'bar' : vertical bar plot - 'barh' : horizontal bar plot @@ -537,7 +539,7 @@ class PlotAccessor(PandasObject): legend : False/True/'reverse' Place legend on axis subplots style : list or dict - matplotlib line style per column + The matplotlib line style per column logx : bool or 'sym', default False Use log scaling or symlog scaling on x axis .. versionchanged:: 0.25.0 From 5946a94fbc3e1189e87bc46209be2480e6fc7b0d Mon Sep 17 00:00:00 2001 From: Josiah Baker Date: Mon, 30 Sep 2019 12:52:30 -0400 Subject: [PATCH 3/7] fix argument ordering actual parameters ordering doesnt match docstring ordering --- pandas/plotting/_misc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index 76a1861ee0407..47ed834ea3637 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -102,10 +102,10 @@ def scatter_matrix( Histogram plot in the diagonal. marker : str, optional Matplotlib marker type, default '.'. - hist_kwds : keywords - Keyword arguments to be passed to hist function. density_kwds : keywords Keyword arguments to be passed to kernel density estimate plot. + hist_kwds : keywords + Keyword arguments to be passed to hist function. range_padding : float, default 0.05 Relative extension of axis range in x and y with respect to (x_max - x_min) or (y_max - y_min). From 08d67fbec6307af15d043d308be65295c868ca52 Mon Sep 17 00:00:00 2001 From: Josiah Baker Date: Mon, 30 Sep 2019 11:47:50 -0400 Subject: [PATCH 4/7] fix PR09,PR08 docstring errors in pandas.plotting fixes the errors along with minor changes to standardize kwds -> **kwargs closes #28687 --- pandas/plotting/_misc.py | 93 +++++++++++++++++++--------------------- 1 file changed, 45 insertions(+), 48 deletions(-) diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index a8e86d9dfa997..76a1861ee0407 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -14,9 +14,9 @@ def table(ax, data, rowLabels=None, colLabels=None, **kwargs): ---------- ax : Matplotlib axes object data : DataFrame or Series - data for table contents - kwargs : keywords, optional - keyword arguments which passed to matplotlib.table.table. + Data for table contents. + **kwargs + Keyword arguments to be passed to matplotlib.table.table. If `rowLabels` or `colLabels` is not specified, data index or column name will be used. @@ -82,7 +82,7 @@ def scatter_matrix( density_kwds=None, hist_kwds=None, range_padding=0.05, - **kwds + **kwargs ): """ Draw a matrix of scatter plots. @@ -91,28 +91,26 @@ def scatter_matrix( ---------- frame : DataFrame alpha : float, optional - amount of transparency applied + Amount of transparency applied. figsize : (float,float), optional - a tuple (width, height) in inches + A tuple (width, height) in inches. ax : Matplotlib axis object, optional grid : bool, optional - setting this to True will show the grid + Setting this to True will show the grid. diagonal : {'hist', 'kde'} - pick between 'kde' and 'hist' for - either Kernel Density Estimation or Histogram - plot in the diagonal + Pick between 'kde' and 'hist' for either Kernel Density Estimation or + Histogram plot in the diagonal. marker : str, optional - Matplotlib marker type, default '.' - hist_kwds : other plotting keyword arguments - To be passed to hist function - density_kwds : other plotting keyword arguments - To be passed to kernel density estimate plot - range_padding : float, optional - relative extension of axis range in x and y - with respect to (x_max - x_min) or (y_max - y_min), - default 0.05 - kwds : other plotting keyword arguments - To be passed to scatter function + Matplotlib marker type, default '.'. + hist_kwds : keywords + Keyword arguments to be passed to hist function. + density_kwds : keywords + Keyword arguments to be passed to kernel density estimate plot. + range_padding : float, default 0.05 + Relative extension of axis range in x and y with respect to + (x_max - x_min) or (y_max - y_min). + **kwargs + Keyword arguments to be passed to scatter function. Returns ------- @@ -136,7 +134,7 @@ def scatter_matrix( density_kwds=density_kwds, hist_kwds=hist_kwds, range_padding=range_padding, - **kwds + **kwargs ) @@ -215,8 +213,8 @@ def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): @deprecate_kwarg(old_arg_name="data", new_arg_name="frame") def andrews_curves( - frame, class_column, ax=None, samples=200, color=None, colormap=None, **kwds -): + frame, class_column, ax=None, samples=200, color=None, colormap=None, + **kwargs): """ Generate a matplotlib plot of Andrews curves, for visualising clusters of multivariate data. @@ -233,17 +231,17 @@ def andrews_curves( Parameters ---------- frame : DataFrame - Data to be plotted, preferably normalized to (0.0, 1.0) + Data to be plotted, preferably normalized to (0.0, 1.0). class_column : Name of the column containing class names ax : matplotlib axes object, default None samples : Number of points to plot in each curve color : list or tuple, optional - Colors to use for the different classes + Colors to use for the different classes. colormap : str or matplotlib colormap object, default None Colormap to select colors from. If string, load colormap with that name from matplotlib. - kwds : keywords - Options to pass to matplotlib plotting method + **kwargs + Options to pass to matplotlib plotting method. Returns ------- @@ -257,7 +255,7 @@ def andrews_curves( samples=samples, color=color, colormap=colormap, - **kwds + **kwargs ) @@ -327,7 +325,7 @@ def parallel_coordinates( axvlines=True, axvlines_kwds=None, sort_labels=False, - **kwds + **kwargs ): """ Parallel coordinates plotting. @@ -336,30 +334,29 @@ def parallel_coordinates( ---------- frame : DataFrame class_column : str - Column name containing class names + Column name containing class names. cols : list, optional - A list of column names to use + A list of column names to use. ax : matplotlib.axis, optional - matplotlib axis object + Matplotlib axis object. color : list or tuple, optional - Colors to use for the different classes + Colors to use for the different classes. use_columns : bool, optional - If true, columns will be used as xticks + If true, columns will be used as xticks. xticks : list or tuple, optional - A list of values to use for xticks + A list of values to use for xticks. colormap : str or matplotlib colormap, default None Colormap to use for line colors. axvlines : bool, optional - If true, vertical lines will be added at each xtick + If true, vertical lines will be added at each xtick. axvlines_kwds : keywords, optional - Options to be passed to axvline method for vertical lines - sort_labels : bool, False - Sort class_column labels, useful when assigning colors + Options to be passed to axvline method for vertical lines. + sort_labels : bool, default False + Sort class_column labels, useful when assigning colors. .. versionadded:: 0.20.0 - - kwds : keywords - Options to pass to matplotlib plotting method + **kwargs + Options to pass to matplotlib plotting method. Returns ------- @@ -388,7 +385,7 @@ def parallel_coordinates( axvlines=axvlines, axvlines_kwds=axvlines_kwds, sort_labels=sort_labels, - **kwds + **kwargs ) @@ -411,7 +408,7 @@ def lag_plot(series, lag=1, ax=None, **kwds): return plot_backend.lag_plot(series=series, lag=lag, ax=ax, **kwds) -def autocorrelation_plot(series, ax=None, **kwds): +def autocorrelation_plot(series, ax=None, **kwargs): """ Autocorrelation plot for time series. @@ -419,15 +416,15 @@ def autocorrelation_plot(series, ax=None, **kwds): ---------- series : Time series ax : Matplotlib axis object, optional - kwds : keywords - Options to pass to matplotlib plotting method + **kwargs + Options to pass to matplotlib plotting method. Returns ------- class:`matplotlib.axis.Axes` """ plot_backend = _get_plot_backend("matplotlib") - return plot_backend.autocorrelation_plot(series=series, ax=ax, **kwds) + return plot_backend.autocorrelation_plot(series=series, ax=ax, **kwargs) def tsplot(series, plotf, ax=None, **kwargs): From 04dd7cf231f39a5465e1a580a87306dca8758cf9 Mon Sep 17 00:00:00 2001 From: Josiah Baker Date: Mon, 30 Sep 2019 12:52:30 -0400 Subject: [PATCH 5/7] fix argument ordering actual parameters ordering doesnt match docstring ordering --- pandas/plotting/_misc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index 76a1861ee0407..47ed834ea3637 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -102,10 +102,10 @@ def scatter_matrix( Histogram plot in the diagonal. marker : str, optional Matplotlib marker type, default '.'. - hist_kwds : keywords - Keyword arguments to be passed to hist function. density_kwds : keywords Keyword arguments to be passed to kernel density estimate plot. + hist_kwds : keywords + Keyword arguments to be passed to hist function. range_padding : float, default 0.05 Relative extension of axis range in x and y with respect to (x_max - x_min) or (y_max - y_min). From 9af4f17e1b798c67687d07c4cde5ba37334d26a8 Mon Sep 17 00:00:00 2001 From: Josiah Baker Date: Mon, 30 Sep 2019 14:02:33 -0400 Subject: [PATCH 6/7] fix formatting error forgot to run black --- pandas/plotting/_misc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index 47ed834ea3637..74ce60c6116a9 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -213,8 +213,8 @@ def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds): @deprecate_kwarg(old_arg_name="data", new_arg_name="frame") def andrews_curves( - frame, class_column, ax=None, samples=200, color=None, colormap=None, - **kwargs): + frame, class_column, ax=None, samples=200, color=None, colormap=None, **kwargs +): """ Generate a matplotlib plot of Andrews curves, for visualising clusters of multivariate data. From b6e9cd211e7562dec1b1f3ee4fb780cc64197893 Mon Sep 17 00:00:00 2001 From: Josiah Baker Date: Mon, 30 Sep 2019 23:54:28 -0400 Subject: [PATCH 7/7] change boolean -> bool --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 16f34fee5e1ff..9467978f13d30 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7948,7 +7948,7 @@ def idxmin(self, axis=0, skipna=True): ---------- axis : {0 or 'index', 1 or 'columns'}, default 0 The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise - skipna : boolean, default True + skipna : bool, default True Exclude NA/null values. If an entire row/column is NA, the result will be NA. @@ -7985,7 +7985,7 @@ def idxmax(self, axis=0, skipna=True): ---------- axis : {0 or 'index', 1 or 'columns'}, default 0 The axis to use. 0 or 'index' for row-wise, 1 or 'columns' for column-wise - skipna : boolean, default True + skipna : bool, default True Exclude NA/null values. If an entire row/column is NA, the result will be NA.