From 44469421fb8c1735f7ebecc2de0bb332e7725bbc Mon Sep 17 00:00:00 2001 From: benjamin Date: Fri, 14 Dec 2018 23:34:58 +0000 Subject: [PATCH 01/23] DOC: Fix docstrings with the sections in the wrong order #24280 --- pandas/_libs/interval.pyx | 56 +++++++++++++++--------------- pandas/_libs/tslibs/timedeltas.pyx | 16 ++++----- pandas/_libs/tslibs/timestamps.pyx | 8 ++--- pandas/core/accessor.py | 8 ++--- pandas/core/frame.py | 24 ++++++------- pandas/core/groupby/generic.py | 14 ++++---- pandas/core/groupby/groupby.py | 28 ++++++++++++--- pandas/core/resample.py | 2 +- pandas/core/series.py | 2 +- pandas/core/window.py | 30 ++++++++-------- 10 files changed, 104 insertions(+), 84 deletions(-) diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index dae88d3b707bf..8ca98ea0b5f2e 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -158,6 +158,29 @@ cdef class Interval(IntervalMixin): Whether the interval is closed on the left-side, right-side, both or neither. See the Notes for more detailed explanation. + See Also + -------- + IntervalIndex : An Index of Interval objects that are all closed on the + same side. + cut : Convert continuous data into discrete bins (Categorical + of Interval objects). + qcut : Convert continuous data into bins (Categorical of Interval objects) + based on quantiles. + Period : Represents a period of time. + """ + _typ = "interval" + + cdef readonly object left + """Left bound for the interval""" + + cdef readonly object right + """Right bound for the interval""" + + cdef readonly str closed + """ + Whether the interval is closed on the left-side, right-side, both or + neither + Notes ----- The parameters `left` and `right` must be from the same type, you must be @@ -226,29 +249,6 @@ cdef class Interval(IntervalMixin): >>> volume_1 = pd.Interval('Ant', 'Dog', closed='both') >>> 'Bee' in volume_1 True - - See Also - -------- - IntervalIndex : An Index of Interval objects that are all closed on the - same side. - cut : Convert continuous data into discrete bins (Categorical - of Interval objects). - qcut : Convert continuous data into bins (Categorical of Interval objects) - based on quantiles. - Period : Represents a period of time. - """ - _typ = "interval" - - cdef readonly object left - """Left bound for the interval""" - - cdef readonly object right - """Right bound for the interval""" - - cdef readonly str closed - """ - Whether the interval is closed on the left-side, right-side, both or - neither """ def __init__(self, left, right, str closed='right'): @@ -387,6 +387,11 @@ cdef class Interval(IntervalMixin): bool ``True`` if the two intervals overlap, else ``False``. + See Also + -------- + IntervalArray.overlaps : The corresponding method for IntervalArray + IntervalIndex.overlaps : The corresponding method for IntervalIndex + Examples -------- >>> i1 = pd.Interval(0, 2) @@ -409,11 +414,6 @@ cdef class Interval(IntervalMixin): >>> i6 = pd.Interval(1, 2, closed='neither') >>> i4.overlaps(i6) False - - See Also - -------- - IntervalArray.overlaps : The corresponding method for IntervalArray - IntervalIndex.overlaps : The corresponding method for IntervalIndex """ if not isinstance(other, Interval): msg = '`other` must be an Interval, got {other}' diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index b0bead2f66ce4..904089cacf537 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1059,6 +1059,10 @@ cdef class _Timedelta(timedelta): ------- formatted : str + See Also + -------- + Timestamp.isoformat + Notes ----- The longest component is days, whose value may be larger than @@ -1081,10 +1085,6 @@ cdef class _Timedelta(timedelta): 'P0DT0H0M10S' >>> pd.Timedelta(days=500.5).isoformat() 'P500DT12H0MS' - - See Also - -------- - Timestamp.isoformat """ components = self.components seconds = '{}.{:0>3}{:0>3}{:0>3}'.format(components.seconds, @@ -1210,14 +1210,14 @@ class Timedelta(_Timedelta): """ Round the Timedelta to the specified resolution - Returns - ------- - a new Timedelta rounded to the given resolution of `freq` - Parameters ---------- freq : a freq string indicating the rounding resolution + Returns + ------- + a new Timedelta rounded to the given resolution of `freq` + Raises ------ ValueError if the freq cannot be converted diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 472ac0ee6d45c..eda2b2fb6ca98 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -776,10 +776,6 @@ class Timestamp(_Timestamp): """ Round the Timestamp to the specified resolution - Returns - ------- - a new Timestamp rounded to the given resolution of `freq` - Parameters ---------- freq : a freq string indicating the rounding resolution @@ -802,6 +798,10 @@ class Timestamp(_Timestamp): .. versionadded:: 0.24.0 + Returns + ------- + a new Timestamp rounded to the given resolution of `freq` + Raises ------ ValueError if the freq cannot be converted diff --git a/pandas/core/accessor.py b/pandas/core/accessor.py index 93b4ce31a1e25..961488ff12e58 100644 --- a/pandas/core/accessor.py +++ b/pandas/core/accessor.py @@ -201,6 +201,10 @@ def decorator(accessor): Name under which the accessor should be registered. A warning is issued if this name conflicts with a preexisting attribute. +See Also +-------- +%(others)s + Notes ----- When accessed, your accessor will be initialized with the pandas object @@ -250,10 +254,6 @@ def plot(self): (5.0, 10.0) >>> ds.geo.plot() # plots data on a map - -See Also --------- -%(others)s """ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6b74fd7e06de9..13cc77a7d9e79 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6046,6 +6046,17 @@ def _gotitem(self, axis=0)``. `agg` is an alias for `aggregate`. Use the alias. + + See Also + -------- + DataFrame.apply : Perform any type of operations. + DataFrame.transform : Perform transformation type operations. + pandas.core.groupby.GroupBy : Perform operations over groups. + pandas.core.resample.Resampler : Perform operations over resampled bins. + pandas.core.window.Rolling : Perform operations over rolling window. + pandas.core.window.Expanding : Perform operations over expanding window. + pandas.core.window.EWM : Perform operation over exponential weighted + window. Examples -------- @@ -6078,23 +6089,12 @@ def _gotitem(self, 2 8.0 3 NaN dtype: float64 - - See Also - -------- - DataFrame.apply : Perform any type of operations. - DataFrame.transform : Perform transformation type operations. - pandas.core.groupby.GroupBy : Perform operations over groups. - pandas.core.resample.Resampler : Perform operations over resampled bins. - pandas.core.window.Rolling : Perform operations over rolling window. - pandas.core.window.Expanding : Perform operations over expanding window. - pandas.core.window.EWM : Perform operation over exponential weighted - window. """) - @Appender(_agg_doc) @Appender(_shared_docs['aggregate'] % dict( versionadded='.. versionadded:: 0.20.0', **_shared_doc_kwargs)) + @Appender(_agg_doc) def aggregate(self, func, axis=0, *args, **kwargs): axis = self._get_axis_number(axis) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 47ac1260d5179..7b823921d4f90 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1247,6 +1247,12 @@ class DataFrameGroupBy(NDFrameGroupBy): _block_agg_axis = 1 _agg_doc = dedent(""" + See Also + -------- + pandas.DataFrame.groupby.apply + pandas.DataFrame.groupby.transform + pandas.DataFrame.aggregate + Examples -------- @@ -1294,19 +1300,13 @@ class DataFrameGroupBy(NDFrameGroupBy): A 1 1 2 0.590716 2 3 4 0.704907 - - See Also - -------- - pandas.DataFrame.groupby.apply - pandas.DataFrame.groupby.transform - pandas.DataFrame.aggregate """) - @Appender(_agg_doc) @Appender(_shared_docs['aggregate'] % dict( klass='DataFrame', versionadded='', axis='')) + @Appender(_agg_doc) def aggregate(self, arg, *args, **kwargs): return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 4b915922cef93..2044897fbfef0 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1080,7 +1080,6 @@ def count(self): raise NotImplementedError @Substitution(name='groupby') - @Appender(_doc_template) def mean(self, *args, **kwargs): """ Compute mean of groups, excluding missing values. @@ -1089,6 +1088,12 @@ def mean(self, *args, **kwargs): ------- pandas.Series or pandas.DataFrame + See Also + -------- + pandas.Series.%(name)s + pandas.DataFrame.%(name)s + pandas.Panel.%(name)s + Examples -------- >>> df = pd.DataFrame({'A': [1, 1, 2, 1, 2], @@ -1528,7 +1533,6 @@ def backfill(self, limit=None): bfill = backfill @Substitution(name='groupby') - @Appender(_doc_template) def nth(self, n, dropna=None): """ Take the nth row from each group if n is an int, or a subset of rows @@ -1547,6 +1551,12 @@ def nth(self, n, dropna=None): apply the specified dropna operation before counting which row is the nth row. Needs to be None, 'any' or 'all' + See Also + -------- + pandas.Series.%(name)s + pandas.DataFrame.%(name)s + pandas.Panel.%(name)s + Examples -------- @@ -2032,7 +2042,6 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, return (filled / shifted) - 1 @Substitution(name='groupby') - @Appender(_doc_template) def head(self, n=5): """ Returns first n rows of each group. @@ -2040,6 +2049,12 @@ def head(self, n=5): Essentially equivalent to ``.apply(lambda x: x.head(n))``, except ignores as_index flag. + See Also + -------- + pandas.Series.%(name)s + pandas.DataFrame.%(name)s + pandas.Panel.%(name)s + Examples -------- @@ -2059,7 +2074,6 @@ def head(self, n=5): return self._selected_obj[mask] @Substitution(name='groupby') - @Appender(_doc_template) def tail(self, n=5): """ Returns last n rows of each group. @@ -2067,6 +2081,12 @@ def tail(self, n=5): Essentially equivalent to ``.apply(lambda x: x.tail(n))``, except ignores as_index flag. + See Also + -------- + pandas.Series.%(name)s + pandas.DataFrame.%(name)s + pandas.Panel.%(name)s + Examples -------- diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 7b842d141e839..33bc8c18e925b 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -253,11 +253,11 @@ def pipe(self, func, *args, **kwargs): 2013-01-01 00:00:04 5 NaN """) - @Appender(_agg_doc) @Appender(_shared_docs['aggregate'] % dict( klass='DataFrame', versionadded='', axis='')) + @Appender(_agg_doc) def aggregate(self, func, *args, **kwargs): self._set_binner() diff --git a/pandas/core/series.py b/pandas/core/series.py index f9c9c3ab81937..413309a0547d3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3302,10 +3302,10 @@ def _gotitem(self, key, ndim, subset=None): dtype: int64 """) - @Appender(_agg_doc) @Appender(generic._shared_docs['aggregate'] % dict( versionadded='.. versionadded:: 0.20.0', **_shared_doc_kwargs)) + @Appender(_agg_doc) def aggregate(self, func, axis=0, *args, **kwargs): # Validate the axis parameter self._get_axis_number(axis) diff --git a/pandas/core/window.py b/pandas/core/window.py index 8c4803a732dd8..9fe85d27d2b3d 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -1293,6 +1293,13 @@ def kurt(self, **kwargs): Series or DataFrame Returned object type is determined by the caller of the %(name)s calculation. + + See Also + -------- + pandas.Series.quantile : Computes value at the given quantile over all data + in Series. + pandas.DataFrame.quantile : Computes values at the given quantile over + requested axis in DataFrame. Examples -------- @@ -1310,13 +1317,6 @@ def kurt(self, **kwargs): 2 2.5 3 3.5 dtype: float64 - - See Also - -------- - pandas.Series.quantile : Computes value at the given quantile over all data - in Series. - pandas.DataFrame.quantile : Computes values at the given quantile over - requested axis in DataFrame. """) def quantile(self, quantile, interpolation='linear', **kwargs): @@ -1645,11 +1645,11 @@ def _validate_freq(self): 9 0.212668 -1.647453 """) - @Appender(_agg_doc) @Appender(_shared_docs['aggregate'] % dict( versionadded='', klass='Series/DataFrame', axis='')) + @Appender(_agg_doc) def aggregate(self, arg, *args, **kwargs): return super(Rolling, self).aggregate(arg, *args, **kwargs) @@ -1884,6 +1884,12 @@ def _get_window(self, other=None): return max(length, other) _agg_doc = dedent(""" + See Also + -------- + pandas.DataFrame.expanding.aggregate + pandas.DataFrame.rolling.aggregate + pandas.DataFrame.aggregate + Examples -------- @@ -1913,19 +1919,13 @@ def _get_window(self, other=None): 7 0.680292 0.132049 0.548693 8 0.067236 0.948257 0.163353 9 -0.286980 0.618493 -0.694496 - - See Also - -------- - pandas.DataFrame.expanding.aggregate - pandas.DataFrame.rolling.aggregate - pandas.DataFrame.aggregate """) - @Appender(_agg_doc) @Appender(_shared_docs['aggregate'] % dict( versionadded='', klass='Series/DataFrame', axis='')) + @Appender(_agg_doc) def aggregate(self, arg, *args, **kwargs): return super(Expanding, self).aggregate(arg, *args, **kwargs) From baa295002978e071254589ab8d99e1a93121e852 Mon Sep 17 00:00:00 2001 From: benjamin Date: Fri, 14 Dec 2018 23:48:59 +0000 Subject: [PATCH 02/23] DOC: Fix docstrings with the sections in the wrong order #24280 --- pandas/_libs/interval.pyx | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx index 8ca98ea0b5f2e..1484a1299b246 100644 --- a/pandas/_libs/interval.pyx +++ b/pandas/_libs/interval.pyx @@ -167,19 +167,6 @@ cdef class Interval(IntervalMixin): qcut : Convert continuous data into bins (Categorical of Interval objects) based on quantiles. Period : Represents a period of time. - """ - _typ = "interval" - - cdef readonly object left - """Left bound for the interval""" - - cdef readonly object right - """Right bound for the interval""" - - cdef readonly str closed - """ - Whether the interval is closed on the left-side, right-side, both or - neither Notes ----- @@ -250,6 +237,19 @@ cdef class Interval(IntervalMixin): >>> 'Bee' in volume_1 True """ + _typ = "interval" + + cdef readonly object left + """Left bound for the interval""" + + cdef readonly object right + """Right bound for the interval""" + + cdef readonly str closed + """ + Whether the interval is closed on the left-side, right-side, both or + neither + """ def __init__(self, left, right, str closed='right'): # note: it is faster to just do these checks than to use a special From c73c5f041952d1e6cda384b2d04f728bcdb92871 Mon Sep 17 00:00:00 2001 From: benjamin Date: Fri, 14 Dec 2018 23:59:46 +0000 Subject: [PATCH 03/23] Removal of whitespace to satisfy flak8 checks. --- pandas/core/frame.py | 2 +- pandas/core/groupby/generic.py | 2 +- pandas/core/window.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 13cc77a7d9e79..b1c408a1221d5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6046,7 +6046,7 @@ def _gotitem(self, axis=0)``. `agg` is an alias for `aggregate`. Use the alias. - + See Also -------- DataFrame.apply : Perform any type of operations. diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 7b823921d4f90..4893eb3002731 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1252,7 +1252,7 @@ class DataFrameGroupBy(NDFrameGroupBy): pandas.DataFrame.groupby.apply pandas.DataFrame.groupby.transform pandas.DataFrame.aggregate - + Examples -------- diff --git a/pandas/core/window.py b/pandas/core/window.py index 9fe85d27d2b3d..0d251a5d9d21c 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -1293,7 +1293,7 @@ def kurt(self, **kwargs): Series or DataFrame Returned object type is determined by the caller of the %(name)s calculation. - + See Also -------- pandas.Series.quantile : Computes value at the given quantile over all data @@ -1889,7 +1889,7 @@ def _get_window(self, other=None): pandas.DataFrame.expanding.aggregate pandas.DataFrame.rolling.aggregate pandas.DataFrame.aggregate - + Examples -------- From 14fe7296f5c973b07e5414545c5b2a1479aafaad Mon Sep 17 00:00:00 2001 From: benjamin Date: Sun, 16 Dec 2018 16:22:25 +0000 Subject: [PATCH 04/23] Refactored _doc_template to _common_see_also and used with the substitution decorator in docstrings where commonly repeated --- pandas/core/groupby/groupby.py | 72 +++++++++++++--------------------- 1 file changed, 28 insertions(+), 44 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 40b6bf278ead9..f7c6ccdc25395 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -41,7 +41,7 @@ class providing the base-class of operations. from pandas.core.series import Series from pandas.core.sorting import get_group_index_sorter -_doc_template = """ +_common_see_also = """ See Also -------- pandas.Series.%(name)s @@ -1044,7 +1044,7 @@ def result_to_bool(result): val_test=val_test, skipna=skipna) @Substitution(name='groupby') - @Appender(_doc_template) + @Appender(_common_see_also) def any(self, skipna=True): """ Returns True if any value in the group is truthful, else False. @@ -1057,7 +1057,7 @@ def any(self, skipna=True): return self._bool_agg('any', skipna) @Substitution(name='groupby') - @Appender(_doc_template) + @Appender(_common_see_also) def all(self, skipna=True): """ Returns True if all values in the group are truthful, else False. @@ -1070,7 +1070,7 @@ def all(self, skipna=True): return self._bool_agg('all', skipna) @Substitution(name='groupby') - @Appender(_doc_template) + @Appender(_common_see_also) def count(self): """ Compute count of group, excluding missing values. @@ -1079,7 +1079,7 @@ def count(self): # defined here for API doc raise NotImplementedError - @Substitution(name='groupby') + @Substitution(name='groupby', see_also=_common_see_also) def mean(self, *args, **kwargs): """ Compute mean of groups, excluding missing values. @@ -1088,11 +1088,7 @@ def mean(self, *args, **kwargs): ------- pandas.Series or pandas.DataFrame - See Also - -------- - pandas.Series.%(name)s - pandas.DataFrame.%(name)s - pandas.Panel.%(name)s + %(see_also)s Examples -------- @@ -1142,7 +1138,7 @@ def mean(self, *args, **kwargs): return self._python_agg_general(f) @Substitution(name='groupby') - @Appender(_doc_template) + @Appender(_common_see_also) def median(self, **kwargs): """ Compute median of groups, excluding missing values. @@ -1163,7 +1159,7 @@ def f(x): return self._python_agg_general(f) @Substitution(name='groupby') - @Appender(_doc_template) + @Appender(_common_see_also) def std(self, ddof=1, *args, **kwargs): """ Compute standard deviation of groups, excluding missing values. @@ -1181,7 +1177,7 @@ def std(self, ddof=1, *args, **kwargs): return np.sqrt(self.var(ddof=ddof, **kwargs)) @Substitution(name='groupby') - @Appender(_doc_template) + @Appender(_common_see_also) def var(self, ddof=1, *args, **kwargs): """ Compute variance of groups, excluding missing values. @@ -1207,7 +1203,7 @@ def var(self, ddof=1, *args, **kwargs): return self._python_agg_general(f) @Substitution(name='groupby') - @Appender(_doc_template) + @Appender(_common_see_also) def sem(self, ddof=1): """ Compute standard error of the mean of groups, excluding missing values. @@ -1223,7 +1219,7 @@ def sem(self, ddof=1): return self.std(ddof=ddof) / np.sqrt(self.count()) @Substitution(name='groupby') - @Appender(_doc_template) + @Appender(_common_see_also) def size(self): """ Compute group sizes. @@ -1247,7 +1243,7 @@ def groupby_function(name, alias, npfunc, _local_template = "Compute %(f)s of group values" @Substitution(name='groupby', f=name) - @Appender(_doc_template) + @Appender(_common_see_also) @Appender(_local_template) def f(self, **kwargs): if 'numeric_only' not in kwargs: @@ -1312,7 +1308,7 @@ def last(x): numeric_only=False) @Substitution(name='groupby') - @Appender(_doc_template) + @Appender(_common_see_also) def ohlc(self): """ Compute sum of values, excluding missing values. @@ -1441,7 +1437,7 @@ def resample(self, rule, *args, **kwargs): return get_resampler_for_grouping(self, rule, *args, **kwargs) @Substitution(name='groupby') - @Appender(_doc_template) + @Appender(_common_see_also) def rolling(self, *args, **kwargs): """ Return a rolling grouper, providing rolling functionality per group. @@ -1450,7 +1446,7 @@ def rolling(self, *args, **kwargs): return RollingGroupby(self, *args, **kwargs) @Substitution(name='groupby') - @Appender(_doc_template) + @Appender(_common_see_also) def expanding(self, *args, **kwargs): """ Return an expanding grouper, providing expanding @@ -1532,7 +1528,7 @@ def backfill(self, limit=None): return self._fill('bfill', limit=limit) bfill = backfill - @Substitution(name='groupby') + @Substitution(name='groupby', see_also=_common_see_also) def nth(self, n, dropna=None): """ Take the nth row from each group if n is an int, or a subset of rows @@ -1551,11 +1547,7 @@ def nth(self, n, dropna=None): apply the specified dropna operation before counting which row is the nth row. Needs to be None, 'any' or 'all' - See Also - -------- - pandas.Series.%(name)s - pandas.DataFrame.%(name)s - pandas.Panel.%(name)s + %(see_also)s Examples -------- @@ -1818,7 +1810,7 @@ def cumcount(self, ascending=True): return Series(cumcounts, index) @Substitution(name='groupby') - @Appender(_doc_template) + @Appender(_common_see_also) def rank(self, method='average', ascending=True, na_option='keep', pct=False, axis=0): """ @@ -1855,7 +1847,7 @@ def rank(self, method='average', ascending=True, na_option='keep', na_option=na_option, pct=pct, axis=axis) @Substitution(name='groupby') - @Appender(_doc_template) + @Appender(_common_see_also) def cumprod(self, axis=0, *args, **kwargs): """ Cumulative product for each group. @@ -1868,7 +1860,7 @@ def cumprod(self, axis=0, *args, **kwargs): return self._cython_transform('cumprod', **kwargs) @Substitution(name='groupby') - @Appender(_doc_template) + @Appender(_common_see_also) def cumsum(self, axis=0, *args, **kwargs): """ Cumulative sum for each group. @@ -1881,7 +1873,7 @@ def cumsum(self, axis=0, *args, **kwargs): return self._cython_transform('cumsum', **kwargs) @Substitution(name='groupby') - @Appender(_doc_template) + @Appender(_common_see_also) def cummin(self, axis=0, **kwargs): """ Cumulative min for each group. @@ -1892,7 +1884,7 @@ def cummin(self, axis=0, **kwargs): return self._cython_transform('cummin', numeric_only=False) @Substitution(name='groupby') - @Appender(_doc_template) + @Appender(_common_see_also) def cummax(self, axis=0, **kwargs): """ Cumulative max for each group. @@ -2001,7 +1993,7 @@ def _get_cythonized_result(self, how, grouper, aggregate=False, return self._wrap_transformed_output(output) @Substitution(name='groupby') - @Appender(_doc_template) + @Appender(_common_see_also) def shift(self, periods=1, freq=None, axis=0): """ Shift each group by periods observations. @@ -2024,7 +2016,7 @@ def shift(self, periods=1, freq=None, axis=0): periods=periods) @Substitution(name='groupby') - @Appender(_doc_template) + @Appender(_common_see_also) def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, axis=0): """ @@ -2041,7 +2033,7 @@ def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, shifted = fill_grp.shift(periods=periods, freq=freq) return (filled / shifted) - 1 - @Substitution(name='groupby') + @Substitution(name='groupby', see_also=_common_see_also) def head(self, n=5): """ Returns first n rows of each group. @@ -2049,11 +2041,7 @@ def head(self, n=5): Essentially equivalent to ``.apply(lambda x: x.head(n))``, except ignores as_index flag. - See Also - -------- - pandas.Series.%(name)s - pandas.DataFrame.%(name)s - pandas.Panel.%(name)s + %(see_also)s Examples -------- @@ -2073,7 +2061,7 @@ def head(self, n=5): mask = self._cumcount_array() < n return self._selected_obj[mask] - @Substitution(name='groupby') + @Substitution(name='groupby', see_also=_common_see_also) def tail(self, n=5): """ Returns last n rows of each group. @@ -2081,11 +2069,7 @@ def tail(self, n=5): Essentially equivalent to ``.apply(lambda x: x.tail(n))``, except ignores as_index flag. - See Also - -------- - pandas.Series.%(name)s - pandas.DataFrame.%(name)s - pandas.Panel.%(name)s + %(see_also)s Examples -------- From 23350ded288164cc4f0b00434b9cc69c71554d2f Mon Sep 17 00:00:00 2001 From: benjamin Date: Sun, 16 Dec 2018 16:25:51 +0000 Subject: [PATCH 05/23] Updated CI checks to include GL07 docstring validation. --- ci/code_checks.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index ac92cf492b91d..407540b66e927 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -213,7 +213,7 @@ fi if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then MSG='Validate docstrings (GL09, GL06, SS04, PR03, PR05, EX04)' ; echo $MSG - $BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL09,GL06,SS04,PR03,PR05,EX04 + $BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL09,GL06,GL07,SS04,PR03,PR05,EX04 RET=$(($RET + $?)) ; echo $MSG "DONE" fi From 3d18f6b6a9a12c25b7c2886c94a2a40753bb1991 Mon Sep 17 00:00:00 2001 From: benjamin Date: Sun, 16 Dec 2018 16:59:02 +0000 Subject: [PATCH 06/23] Docstring appends were in the wrong order - this corrects that. Although aggregate method docstrings still have the issue of sharing a docstring with Notes directly after Returns. This means any further appends with a See Also section are out of order. --- pandas/core/frame.py | 2 +- pandas/core/groupby/generic.py | 2 +- pandas/core/resample.py | 2 +- pandas/core/series.py | 2 +- pandas/core/window.py | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b1c408a1221d5..3d5250dd02fb7 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6091,10 +6091,10 @@ def _gotitem(self, dtype: float64 """) + @Appender(_agg_doc) @Appender(_shared_docs['aggregate'] % dict( versionadded='.. versionadded:: 0.20.0', **_shared_doc_kwargs)) - @Appender(_agg_doc) def aggregate(self, func, axis=0, *args, **kwargs): axis = self._get_axis_number(axis) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 4893eb3002731..b0c3080b72d4d 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1302,11 +1302,11 @@ class DataFrameGroupBy(NDFrameGroupBy): 2 3 4 0.704907 """) + @Appender(_agg_doc) @Appender(_shared_docs['aggregate'] % dict( klass='DataFrame', versionadded='', axis='')) - @Appender(_agg_doc) def aggregate(self, arg, *args, **kwargs): return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 54bf8e872eec4..9920fcbcbd2b8 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -253,11 +253,11 @@ def pipe(self, func, *args, **kwargs): 2013-01-01 00:00:04 5 NaN """) + @Appender(_agg_doc) @Appender(_shared_docs['aggregate'] % dict( klass='DataFrame', versionadded='', axis='')) - @Appender(_agg_doc) def aggregate(self, func, *args, **kwargs): self._set_binner() diff --git a/pandas/core/series.py b/pandas/core/series.py index abc2dfd0ac539..9ba9cdc818a5e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3302,10 +3302,10 @@ def _gotitem(self, key, ndim, subset=None): dtype: int64 """) + @Appender(_agg_doc) @Appender(generic._shared_docs['aggregate'] % dict( versionadded='.. versionadded:: 0.20.0', **_shared_doc_kwargs)) - @Appender(_agg_doc) def aggregate(self, func, axis=0, *args, **kwargs): # Validate the axis parameter self._get_axis_number(axis) diff --git a/pandas/core/window.py b/pandas/core/window.py index 0d251a5d9d21c..ffcf06df81712 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -1645,11 +1645,11 @@ def _validate_freq(self): 9 0.212668 -1.647453 """) + @Appender(_agg_doc) @Appender(_shared_docs['aggregate'] % dict( versionadded='', klass='Series/DataFrame', axis='')) - @Appender(_agg_doc) def aggregate(self, arg, *args, **kwargs): return super(Rolling, self).aggregate(arg, *args, **kwargs) @@ -1921,11 +1921,11 @@ def _get_window(self, other=None): 9 -0.286980 0.618493 -0.694496 """) + @Appender(_agg_doc) @Appender(_shared_docs['aggregate'] % dict( versionadded='', klass='Series/DataFrame', axis='')) - @Appender(_agg_doc) def aggregate(self, arg, *args, **kwargs): return super(Expanding, self).aggregate(arg, *args, **kwargs) From a1db80b105872441dfcd2da76a68a2423ab0596d Mon Sep 17 00:00:00 2001 From: benjamin Date: Mon, 17 Dec 2018 18:32:15 +0000 Subject: [PATCH 07/23] Use of format on _shared_docs['aggregate'] docstring usage to get the order right. --- pandas/core/frame.py | 12 +++++++----- pandas/core/generic.py | 7 ++++++- pandas/core/groupby/generic.py | 10 +++++++--- pandas/core/resample.py | 10 +++++++--- pandas/core/series.py | 16 ++++++++-------- pandas/core/window.py | 20 ++++++++++++++------ 6 files changed, 49 insertions(+), 26 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3d5250dd02fb7..fb7b462c3dba0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6037,7 +6037,7 @@ def _gotitem(self, # TODO: _shallow_copy(subset)? return subset[key] - _agg_doc = dedent(""" + _agg_see_also_doc = dedent(""" The aggregation operations are always performed over an axis, either the index (default) or the column axis. This behavior is different from `numpy` aggregation functions (`mean`, `median`, `prod`, `sum`, `std`, @@ -6057,7 +6057,9 @@ def _gotitem(self, pandas.core.window.Expanding : Perform operations over expanding window. pandas.core.window.EWM : Perform operation over exponential weighted window. + """) + _agg_examples_doc = dedent(""" Examples -------- >>> df = pd.DataFrame([[1, 2, 3], @@ -6091,10 +6093,10 @@ def _gotitem(self, dtype: float64 """) - @Appender(_agg_doc) - @Appender(_shared_docs['aggregate'] % dict( - versionadded='.. versionadded:: 0.20.0', - **_shared_doc_kwargs)) + @Appender(_shared_docs['aggregate'].format( + see_also=_agg_see_also_doc, + examples=_agg_examples_doc + ) % dict(versionadded='.. versionadded:: 0.20.0', **_shared_doc_kwargs)) def aggregate(self, func, axis=0, *args, **kwargs): axis = self._get_axis_number(axis) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0df4a067dda44..0ef50347f0949 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6,6 +6,7 @@ import operator import warnings import weakref +from textwrap import dedent import numpy as np @@ -4895,7 +4896,7 @@ def sample(self, n=None, frac=None, replace=False, weights=None, def pipe(self, func, *args, **kwargs): return com._pipe(self, func, *args, **kwargs) - _shared_docs['aggregate'] = (""" + _shared_docs['aggregate'] = dedent(""" Aggregate using one or more operations over the specified axis. %(versionadded)s @@ -4926,11 +4927,15 @@ def pipe(self, func, *args, **kwargs): if Series.agg is called with single function, returns a scalar if Series.agg is called with several functions, returns a Series + {see_also} + Notes ----- `agg` is an alias for `aggregate`. Use the alias. A passed user-defined-function will be passed a Series for evaluation. + + {examples} """) _shared_docs['transform'] = (""" diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index b0c3080b72d4d..0e656906e894f 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1246,13 +1246,15 @@ class DataFrameGroupBy(NDFrameGroupBy): _block_agg_axis = 1 - _agg_doc = dedent(""" + _agg_see_also_doc = dedent(""" See Also -------- pandas.DataFrame.groupby.apply pandas.DataFrame.groupby.transform pandas.DataFrame.aggregate + """) + _agg_examples_doc = dedent(""" Examples -------- @@ -1302,8 +1304,10 @@ class DataFrameGroupBy(NDFrameGroupBy): 2 3 4 0.704907 """) - @Appender(_agg_doc) - @Appender(_shared_docs['aggregate'] % dict( + @Appender(_shared_docs['aggregate'].format( + see_also=_agg_see_also_doc, + examples=_agg_examples_doc + ) % dict( klass='DataFrame', versionadded='', axis='')) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 9920fcbcbd2b8..2b2381e3dfd20 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -211,13 +211,15 @@ def _assure_grouper(self): def pipe(self, func, *args, **kwargs): return super(Resampler, self).pipe(func, *args, **kwargs) - _agg_doc = dedent(""" + _agg_see_also_doc = dedent(""" See Also -------- pandas.DataFrame.groupby.aggregate pandas.DataFrame.resample.transform pandas.DataFrame.aggregate + """) + _agg_examples_doc = dedent(""" Examples -------- >>> s = pd.Series([1,2,3,4,5], @@ -253,8 +255,10 @@ def pipe(self, func, *args, **kwargs): 2013-01-01 00:00:04 5 NaN """) - @Appender(_agg_doc) - @Appender(_shared_docs['aggregate'] % dict( + @Appender(_shared_docs['aggregate'].format( + see_also=_agg_see_also_doc, + examples=_agg_examples_doc + ) % dict( klass='DataFrame', versionadded='', axis='')) diff --git a/pandas/core/series.py b/pandas/core/series.py index 9ba9cdc818a5e..c737515afcdca 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3275,16 +3275,16 @@ def _gotitem(self, key, ndim, subset=None): """ return self - _agg_doc = dedent(""" + _agg_see_also_doc = dedent(""" See Also -------- Series.apply : Invoke function on a Series. - Series.transform : Transform function producing - a Series with like indexes. + Series.transform : Transform function producing a Series with like indexes. + """) + _agg_examples_doc = dedent(""" Examples -------- - >>> s = pd.Series([1, 2, 3, 4]) >>> s 0 1 @@ -3302,10 +3302,10 @@ def _gotitem(self, key, ndim, subset=None): dtype: int64 """) - @Appender(_agg_doc) - @Appender(generic._shared_docs['aggregate'] % dict( - versionadded='.. versionadded:: 0.20.0', - **_shared_doc_kwargs)) + @Appender(generic._shared_docs['aggregate'].format( + see_also=_agg_see_also_doc, + examples=_agg_examples_doc + ) % dict(versionadded='.. versionadded:: 0.20.0', **_shared_doc_kwargs)) def aggregate(self, func, axis=0, *args, **kwargs): # Validate the axis parameter self._get_axis_number(axis) diff --git a/pandas/core/window.py b/pandas/core/window.py index ffcf06df81712..c0828ea66f0b2 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -1595,12 +1595,14 @@ def _validate_freq(self): "compatible with a datetimelike " "index".format(self.window)) - _agg_doc = dedent(""" + _agg_see_also_doc = dedent(""" See Also -------- pandas.Series.rolling pandas.DataFrame.rolling + """) + _agg_examples_doc = dedent(""" Examples -------- @@ -1645,8 +1647,10 @@ def _validate_freq(self): 9 0.212668 -1.647453 """) - @Appender(_agg_doc) - @Appender(_shared_docs['aggregate'] % dict( + @Appender(_shared_docs['aggregate'].format( + see_also=_agg_see_also_doc, + examples=_agg_examples_doc + ) % dict( versionadded='', klass='Series/DataFrame', axis='')) @@ -1883,13 +1887,15 @@ def _get_window(self, other=None): other = self.min_periods or -1 return max(length, other) - _agg_doc = dedent(""" + _agg_see_also_doc = dedent(""" See Also -------- pandas.DataFrame.expanding.aggregate pandas.DataFrame.rolling.aggregate pandas.DataFrame.aggregate + """) + _agg_examples_doc = dedent(""" Examples -------- @@ -1921,8 +1927,10 @@ def _get_window(self, other=None): 9 -0.286980 0.618493 -0.694496 """) - @Appender(_agg_doc) - @Appender(_shared_docs['aggregate'] % dict( + @Appender(_shared_docs['aggregate'].format( + see_also=_agg_see_also_doc, + examples=_agg_examples_doc + ) % dict( versionadded='', klass='Series/DataFrame', axis='')) From 64af1fcf45ee6c4770bf76da4324f43cbfc2d687 Mon Sep 17 00:00:00 2001 From: benjamin Date: Tue, 18 Dec 2018 10:08:44 +0000 Subject: [PATCH 08/23] Updated to code check order and included GL07 in messages. --- ci/code_checks.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 407540b66e927..a8ca81bde9dcd 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -212,8 +212,8 @@ fi ### DOCSTRINGS ### if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then - MSG='Validate docstrings (GL09, GL06, SS04, PR03, PR05, EX04)' ; echo $MSG - $BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL09,GL06,GL07,SS04,PR03,PR05,EX04 + MSG='Validate docstrings (GL06, GL07, GL09, SS04, PR03, PR05, EX04)' ; echo $MSG + $BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL06,GL07,GL09,SS04,PR03,PR05,EX04 RET=$(($RET + $?)) ; echo $MSG "DONE" fi From 3e549186a1a4af2cfa9fd8df51fc0be95e53d07e Mon Sep 17 00:00:00 2001 From: benjamin Date: Tue, 18 Dec 2018 10:09:29 +0000 Subject: [PATCH 09/23] Use of substitution to clean up docstring order fix. --- pandas/core/frame.py | 9 +++--- pandas/core/generic.py | 4 +-- pandas/core/groupby/generic.py | 30 +++++++++--------- pandas/core/resample.py | 13 ++++---- pandas/core/series.py | 9 +++--- pandas/core/window.py | 57 ++++++++++++++++++---------------- 6 files changed, 63 insertions(+), 59 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index fb7b462c3dba0..d8fdc97555327 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6093,10 +6093,11 @@ def _gotitem(self, dtype: float64 """) - @Appender(_shared_docs['aggregate'].format( - see_also=_agg_see_also_doc, - examples=_agg_examples_doc - ) % dict(versionadded='.. versionadded:: 0.20.0', **_shared_doc_kwargs)) + @Substitution(see_also=_agg_see_also_doc, + examples=_agg_examples_doc, + versionadded='.. versionadded:: 0.20.0', + **_shared_doc_kwargs) + @Appender(_shared_docs['aggregate']) def aggregate(self, func, axis=0, *args, **kwargs): axis = self._get_axis_number(axis) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0ef50347f0949..c51d5fe6f27f2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4927,7 +4927,7 @@ def pipe(self, func, *args, **kwargs): if Series.agg is called with single function, returns a scalar if Series.agg is called with several functions, returns a Series - {see_also} + %(see_also)s Notes ----- @@ -4935,7 +4935,7 @@ def pipe(self, func, *args, **kwargs): A passed user-defined-function will be passed a Series for evaluation. - {examples} + %(examples)s """) _shared_docs['transform'] = (""" diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 0e656906e894f..63b378f59ba16 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -706,10 +706,9 @@ def _selection_name(self): else: return self._selection - _agg_doc = dedent(""" + _agg_examples_doc = dedent(""" Examples -------- - >>> s = pd.Series([1, 2, 3, 4]) >>> s @@ -733,13 +732,14 @@ def _selection_name(self): min max 1 1 2 2 3 4 + """) + _agg_see_also_doc = dedent(""" See Also -------- pandas.Series.groupby.apply pandas.Series.groupby.transform pandas.Series.aggregate - """) @Appender(_apply_docs['template'] @@ -748,11 +748,12 @@ def _selection_name(self): def apply(self, func, *args, **kwargs): return super(SeriesGroupBy, self).apply(func, *args, **kwargs) - @Appender(_agg_doc) - @Appender(_shared_docs['aggregate'] % dict( - klass='Series', - versionadded='', - axis='')) + @Substitution(see_also=_agg_see_also_doc, + examples=_agg_examples_doc, + versionadded='', + klass='Series', + axis='') + @Appender(_shared_docs['aggregate']) def aggregate(self, func_or_funcs, *args, **kwargs): _level = kwargs.pop('_level', None) if isinstance(func_or_funcs, compat.string_types): @@ -1304,13 +1305,12 @@ class DataFrameGroupBy(NDFrameGroupBy): 2 3 4 0.704907 """) - @Appender(_shared_docs['aggregate'].format( - see_also=_agg_see_also_doc, - examples=_agg_examples_doc - ) % dict( - klass='DataFrame', - versionadded='', - axis='')) + @Substitution(see_also=_agg_see_also_doc, + examples=_agg_examples_doc, + versionadded='', + klass='DataFrame', + axis='') + @Appender(_shared_docs['aggregate']) def aggregate(self, arg, *args, **kwargs): return super(DataFrameGroupBy, self).aggregate(arg, *args, **kwargs) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 2b2381e3dfd20..ef43da6d5f490 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -255,13 +255,12 @@ def pipe(self, func, *args, **kwargs): 2013-01-01 00:00:04 5 NaN """) - @Appender(_shared_docs['aggregate'].format( - see_also=_agg_see_also_doc, - examples=_agg_examples_doc - ) % dict( - klass='DataFrame', - versionadded='', - axis='')) + @Substitution(see_also=_agg_see_also_doc, + examples=_agg_examples_doc, + versionadded='', + klass='DataFrame', + axis='') + @Appender(_shared_docs['aggregate']) def aggregate(self, func, *args, **kwargs): self._set_binner() diff --git a/pandas/core/series.py b/pandas/core/series.py index c737515afcdca..773f2d17cf0fc 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3302,10 +3302,11 @@ def _gotitem(self, key, ndim, subset=None): dtype: int64 """) - @Appender(generic._shared_docs['aggregate'].format( - see_also=_agg_see_also_doc, - examples=_agg_examples_doc - ) % dict(versionadded='.. versionadded:: 0.20.0', **_shared_doc_kwargs)) + @Substitution(see_also=_agg_see_also_doc, + examples=_agg_examples_doc, + versionadded='.. versionadded:: 0.20.0', + **_shared_doc_kwargs) + @Appender(generic._shared_docs['aggregate']) def aggregate(self, func, axis=0, *args, **kwargs): # Validate the axis parameter self._get_axis_number(axis) diff --git a/pandas/core/window.py b/pandas/core/window.py index c0828ea66f0b2..2ea709c4cc61d 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -693,7 +693,7 @@ def f(arg, *args, **kwargs): return self._wrap_results(results, blocks, obj) - _agg_doc = dedent(""" + _agg_examples_doc = dedent(""" Examples -------- @@ -723,19 +723,21 @@ def f(arg, *args, **kwargs): 7 0.906020 1.283573 0.085482 8 -0.096361 0.818139 0.472290 9 0.070889 0.134399 -0.031308 + """) + _agg_see_also_doc = dedent(""" See Also -------- pandas.DataFrame.rolling.aggregate pandas.DataFrame.aggregate - """) - @Appender(_agg_doc) - @Appender(_shared_docs['aggregate'] % dict( - versionadded='', - klass='Series/DataFrame', - axis='')) + @Substitution(see_also=_agg_see_also_doc, + examples=_agg_examples_doc, + versionadded='', + klass='Series/DataFrame', + axis='') + @Appender(_shared_docs['aggregate']) def aggregate(self, arg, *args, **kwargs): result, how = self._aggregate(arg, *args, **kwargs) if result is None: @@ -1647,13 +1649,12 @@ def _validate_freq(self): 9 0.212668 -1.647453 """) - @Appender(_shared_docs['aggregate'].format( - see_also=_agg_see_also_doc, - examples=_agg_examples_doc - ) % dict( - versionadded='', - klass='Series/DataFrame', - axis='')) + @Substitution(see_also=_agg_see_also_doc, + examples=_agg_examples_doc, + versionadded='', + klass='Series/Dataframe', + axis='') + @Appender(_shared_docs['aggregate']) def aggregate(self, arg, *args, **kwargs): return super(Rolling, self).aggregate(arg, *args, **kwargs) @@ -1927,13 +1928,12 @@ def _get_window(self, other=None): 9 -0.286980 0.618493 -0.694496 """) - @Appender(_shared_docs['aggregate'].format( - see_also=_agg_see_also_doc, - examples=_agg_examples_doc - ) % dict( - versionadded='', - klass='Series/DataFrame', - axis='')) + @Substitution(see_also=_agg_see_also_doc, + examples=_agg_examples_doc, + versionadded='', + klass='Series/Dataframe', + axis='') + @Appender(_shared_docs['aggregate']) def aggregate(self, arg, *args, **kwargs): return super(Expanding, self).aggregate(arg, *args, **kwargs) @@ -2192,7 +2192,7 @@ def __init__(self, obj, com=None, span=None, halflife=None, alpha=None, def _constructor(self): return EWM - _agg_doc = dedent(""" + _agg_examples_doc = dedent(""" Examples -------- @@ -2222,17 +2222,20 @@ def _constructor(self): 7 0.680292 0.132049 0.548693 8 0.067236 0.948257 0.163353 9 -0.286980 0.618493 -0.694496 + """) + _agg_see_also_doc = dedent(""" See Also -------- pandas.DataFrame.rolling.aggregate """) - @Appender(_agg_doc) - @Appender(_shared_docs['aggregate'] % dict( - versionadded='', - klass='Series/DataFrame', - axis='')) + @Substitution(see_also=_agg_see_also_doc, + examples=_agg_examples_doc, + versionadded='', + klass='Series/Dataframe', + axis='') + @Appender(_shared_docs['aggregate']) def aggregate(self, arg, *args, **kwargs): return super(EWM, self).aggregate(arg, *args, **kwargs) From 5f64fcf824139bf2560cd07b2f91c4697aa3c2c0 Mon Sep 17 00:00:00 2001 From: benjamin Date: Tue, 18 Dec 2018 10:40:10 +0000 Subject: [PATCH 10/23] Corrected order of see_also and examples string defitions to match docstrings. --- pandas/core/groupby/generic.py | 16 ++++++++-------- pandas/core/window.py | 26 +++++++++++++------------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 63b378f59ba16..33a41ab1cabc4 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -706,6 +706,14 @@ def _selection_name(self): else: return self._selection + _agg_see_also_doc = dedent(""" + See Also + -------- + pandas.Series.groupby.apply + pandas.Series.groupby.transform + pandas.Series.aggregate + """) + _agg_examples_doc = dedent(""" Examples -------- @@ -734,14 +742,6 @@ def _selection_name(self): 2 3 4 """) - _agg_see_also_doc = dedent(""" - See Also - -------- - pandas.Series.groupby.apply - pandas.Series.groupby.transform - pandas.Series.aggregate - """) - @Appender(_apply_docs['template'] .format(input='series', examples=_apply_docs['series_examples'])) diff --git a/pandas/core/window.py b/pandas/core/window.py index 2ea709c4cc61d..f7c23172172ac 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -693,6 +693,13 @@ def f(arg, *args, **kwargs): return self._wrap_results(results, blocks, obj) + _agg_see_also_doc = dedent(""" + See Also + -------- + pandas.DataFrame.rolling.aggregate + pandas.DataFrame.aggregate + """) + _agg_examples_doc = dedent(""" Examples -------- @@ -725,13 +732,6 @@ def f(arg, *args, **kwargs): 9 0.070889 0.134399 -0.031308 """) - _agg_see_also_doc = dedent(""" - See Also - -------- - pandas.DataFrame.rolling.aggregate - pandas.DataFrame.aggregate - """) - @Substitution(see_also=_agg_see_also_doc, examples=_agg_examples_doc, versionadded='', @@ -2192,6 +2192,12 @@ def __init__(self, obj, com=None, span=None, halflife=None, alpha=None, def _constructor(self): return EWM + _agg_see_also_doc = dedent(""" + See Also + -------- + pandas.DataFrame.rolling.aggregate + """) + _agg_examples_doc = dedent(""" Examples -------- @@ -2224,12 +2230,6 @@ def _constructor(self): 9 -0.286980 0.618493 -0.694496 """) - _agg_see_also_doc = dedent(""" - See Also - -------- - pandas.DataFrame.rolling.aggregate - """) - @Substitution(see_also=_agg_see_also_doc, examples=_agg_examples_doc, versionadded='', From bdeb8ae0e71a571ce9cdb170ac9aecdba440298f Mon Sep 17 00:00:00 2001 From: benjamin Date: Tue, 18 Dec 2018 10:58:14 +0000 Subject: [PATCH 11/23] Updated name of _agg_see_also_doc to reflect fact it also contains a summary. --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d8fdc97555327..7b8cd71e64f25 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6037,7 +6037,7 @@ def _gotitem(self, # TODO: _shallow_copy(subset)? return subset[key] - _agg_see_also_doc = dedent(""" + _agg_summary_and_see_also_doc = dedent(""" The aggregation operations are always performed over an axis, either the index (default) or the column axis. This behavior is different from `numpy` aggregation functions (`mean`, `median`, `prod`, `sum`, `std`, @@ -6093,7 +6093,7 @@ def _gotitem(self, dtype: float64 """) - @Substitution(see_also=_agg_see_also_doc, + @Substitution(see_also=_agg_summary_and_see_also_doc, examples=_agg_examples_doc, versionadded='.. versionadded:: 0.20.0', **_shared_doc_kwargs) From fc5fc692f5bb6b516653d2a83c46e6f86cb9687c Mon Sep 17 00:00:00 2001 From: benjamin Date: Tue, 18 Dec 2018 11:48:41 +0000 Subject: [PATCH 12/23] Fix added import statement to be in the correct order. --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c51d5fe6f27f2..6eb6bc124c80a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -4,9 +4,9 @@ import gc import json import operator +from textwrap import dedent import warnings import weakref -from textwrap import dedent import numpy as np From 91d13fec3adb69b38a4e2a0b60f2784421c261a6 Mon Sep 17 00:00:00 2001 From: benjamin Date: Wed, 26 Dec 2018 21:16:26 +0000 Subject: [PATCH 13/23] Addition of dateutil docs to intersphinx mapping to pull in objects for doc refs. --- doc/source/conf.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/source/conf.py b/doc/source/conf.py index 2d1369499dfda..3d87d58026e82 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -368,13 +368,14 @@ intersphinx_mapping = { - 'statsmodels': ('http://www.statsmodels.org/devel/', None), 'matplotlib': ('https://matplotlib.org/', None), + 'numpy': ('https://docs.scipy.org/doc/numpy/', None), 'pandas-gbq': ('https://pandas-gbq.readthedocs.io/en/latest/', None), + 'py': ('https://pylib.readthedocs.io/en/latest/', None), 'python': ('https://docs.python.org/3/', None), - 'numpy': ('https://docs.scipy.org/doc/numpy/', None), 'scipy': ('https://docs.scipy.org/doc/scipy/reference/', None), - 'py': ('https://pylib.readthedocs.io/en/latest/', None) + 'statsmodels': ('http://www.statsmodels.org/devel/', None), + 'dateutil': ("https://dateutil.readthedocs.io/en/latest/", None), } import glob autosummary_generate = glob.glob("*.rst") From ae6aa27bd6bb1ff30c11b53a8c8dec61572d391b Mon Sep 17 00:00:00 2001 From: benjamin Date: Wed, 26 Dec 2018 23:14:43 +0000 Subject: [PATCH 14/23] Reordered listing of mapping to be alphabetical. --- doc/source/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/conf.py b/doc/source/conf.py index 3d87d58026e82..8e5daf079390a 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -368,6 +368,7 @@ intersphinx_mapping = { + 'dateutil': ("https://dateutil.readthedocs.io/en/latest/", None), 'matplotlib': ('https://matplotlib.org/', None), 'numpy': ('https://docs.scipy.org/doc/numpy/', None), 'pandas-gbq': ('https://pandas-gbq.readthedocs.io/en/latest/', None), @@ -375,7 +376,6 @@ 'python': ('https://docs.python.org/3/', None), 'scipy': ('https://docs.scipy.org/doc/scipy/reference/', None), 'statsmodels': ('http://www.statsmodels.org/devel/', None), - 'dateutil': ("https://dateutil.readthedocs.io/en/latest/", None), } import glob autosummary_generate = glob.glob("*.rst") From 88a77b010ae9b48f359cdf8ef9f9aa6840996f71 Mon Sep 17 00:00:00 2001 From: benjamin Date: Wed, 2 Jan 2019 15:21:30 +0000 Subject: [PATCH 15/23] Raise the ValueError instead of passing. --- pandas/io/sas/sasreader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 2da3775d5a6a7..12c5ba7565b07 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -48,7 +48,7 @@ def read_sas(filepath_or_buffer, format=None, index=None, encoding=None, else: raise ValueError("unable to infer format of SAS file") except ValueError: - pass + raise if format.lower() == 'xport': from pandas.io.sas.sas_xport import XportReader From 15bf0f9be63f0605c22cda6528058e3b625e7455 Mon Sep 17 00:00:00 2001 From: benjamin Date: Wed, 2 Jan 2019 15:45:19 +0000 Subject: [PATCH 16/23] Update docstring to describe format inference. --- pandas/io/sas/sasreader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 12c5ba7565b07..488debf225f66 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -16,8 +16,8 @@ def read_sas(filepath_or_buffer, format=None, index=None, encoding=None, filepath_or_buffer : string or file-like object Path to the SAS file. format : string {'xport', 'sas7bdat'} or None - If None, file format is inferred. If 'xport' or 'sas7bdat', - uses the corresponding format. + If None, file format is inferred from file extension. If 'xport' or + 'sas7bdat', uses the corresponding format. index : identifier of index column, defaults to None Identifier of column that should be used as index of the DataFrame. encoding : string, default is None From 00484cbb75a13f10cb978ceb787dc6f32b4867a1 Mon Sep 17 00:00:00 2001 From: benjamin Date: Wed, 2 Jan 2019 16:51:10 +0000 Subject: [PATCH 17/23] Added a test, updated whatsnew and removed superfluous try/except. --- doc/source/whatsnew/v0.24.0.rst | 1 + pandas/io/sas/sasreader.py | 17 +++++++---------- pandas/tests/io/sas/test_sas.py | 8 ++++++++ 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index e770a9e3c47f8..2a909ecc4c88d 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1623,6 +1623,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form - Bug in :meth:`DataFrame.to_stata`, :class:`pandas.io.stata.StataWriter` and :class:`pandas.io.stata.StataWriter117` where a exception would leave a partially written and invalid dta file (:issue:`23573`) - Bug in :meth:`DataFrame.to_stata` and :class:`pandas.io.stata.StataWriter117` that produced invalid files when using strLs with non-ASCII characters (:issue:`23573`) - Bug in :class:`HDFStore` that caused it to raise ``ValueError`` when reading a Dataframe in Python 3 from fixed format written in Python 2 (:issue:`24510`) +- Bug in :func:`read_sas()` in which ``ValueError`` supposedly raised by an inability to infer file format is passed instead of actually raised (:issue:`24548`) Plotting ^^^^^^^^ diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 488debf225f66..9fae0da670bec 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -39,16 +39,13 @@ def read_sas(filepath_or_buffer, format=None, index=None, encoding=None, filepath_or_buffer = _stringify_path(filepath_or_buffer) if not isinstance(filepath_or_buffer, compat.string_types): raise ValueError(buffer_error_msg) - try: - fname = filepath_or_buffer.lower() - if fname.endswith(".xpt"): - format = "xport" - elif fname.endswith(".sas7bdat"): - format = "sas7bdat" - else: - raise ValueError("unable to infer format of SAS file") - except ValueError: - raise + fname = filepath_or_buffer.lower() + if fname.endswith(".xpt"): + format = "xport" + elif fname.endswith(".sas7bdat"): + format = "sas7bdat" + else: + raise ValueError("unable to infer format of SAS file") if format.lower() == 'xport': from pandas.io.sas.sas_xport import XportReader diff --git a/pandas/tests/io/sas/test_sas.py b/pandas/tests/io/sas/test_sas.py index 0f6342aa62ac0..a9e71b09de1e8 100644 --- a/pandas/tests/io/sas/test_sas.py +++ b/pandas/tests/io/sas/test_sas.py @@ -1,6 +1,7 @@ import pytest from pandas.compat import StringIO +import pandas.util.testing as tm from pandas import read_sas @@ -15,3 +16,10 @@ def test_sas_buffer_format(self): "name, you must specify a format string") with pytest.raises(ValueError, match=msg): read_sas(b) + + def test_sas_read_no_format_or_extension(self): + # see gh-24548 + msg = ("unable to infer format of SAS file") + with tm.ensure_clean('test_file_no_extension') as path: + with pytest.raises(ValueError, match=msg): + read_sas(path) From 8eb839e5342a8d04135c7be5a21e0cf9a3c191f0 Mon Sep 17 00:00:00 2001 From: benjamin Date: Wed, 2 Jan 2019 17:08:24 +0000 Subject: [PATCH 18/23] Update to whatsnew entry to improve readability. --- doc/source/whatsnew/v0.24.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 2a909ecc4c88d..8c392ff8c8371 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1623,7 +1623,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form - Bug in :meth:`DataFrame.to_stata`, :class:`pandas.io.stata.StataWriter` and :class:`pandas.io.stata.StataWriter117` where a exception would leave a partially written and invalid dta file (:issue:`23573`) - Bug in :meth:`DataFrame.to_stata` and :class:`pandas.io.stata.StataWriter117` that produced invalid files when using strLs with non-ASCII characters (:issue:`23573`) - Bug in :class:`HDFStore` that caused it to raise ``ValueError`` when reading a Dataframe in Python 3 from fixed format written in Python 2 (:issue:`24510`) -- Bug in :func:`read_sas()` in which ``ValueError`` supposedly raised by an inability to infer file format is passed instead of actually raised (:issue:`24548`) +- Bug in :func:`read_sas()` in which ``ValueError`` was passed by a try/except block when no format kwarg was provided and function attempted to infer a file format from the extension in the file path. (:issue:`24548`) Plotting ^^^^^^^^ From 06a86c4e0148ca23fb55fba16d3c9321845443cb Mon Sep 17 00:00:00 2001 From: benjamin Date: Wed, 2 Jan 2019 17:34:59 +0000 Subject: [PATCH 19/23] Removal of trailing whitespace. --- doc/source/whatsnew/v0.24.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 8c392ff8c8371..31e0589d56d6f 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1623,7 +1623,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form - Bug in :meth:`DataFrame.to_stata`, :class:`pandas.io.stata.StataWriter` and :class:`pandas.io.stata.StataWriter117` where a exception would leave a partially written and invalid dta file (:issue:`23573`) - Bug in :meth:`DataFrame.to_stata` and :class:`pandas.io.stata.StataWriter117` that produced invalid files when using strLs with non-ASCII characters (:issue:`23573`) - Bug in :class:`HDFStore` that caused it to raise ``ValueError`` when reading a Dataframe in Python 3 from fixed format written in Python 2 (:issue:`24510`) -- Bug in :func:`read_sas()` in which ``ValueError`` was passed by a try/except block when no format kwarg was provided and function attempted to infer a file format from the extension in the file path. (:issue:`24548`) +- Bug in :func:`read_sas()` in which ``ValueError`` was passed by a try/except block when no format kwarg was provided and function attempted to infer a file format from the extension in the file path. (:issue:`24548`) Plotting ^^^^^^^^ From c3291c26f3296d94a4bb67a2e0b39f1fbf6f1fff Mon Sep 17 00:00:00 2001 From: benjamin Date: Wed, 2 Jan 2019 17:46:21 +0000 Subject: [PATCH 20/23] Triggering CI, pushed previous as github went down. From 0dd9b49aedc7c991e803808e123e6bdef9fa79b8 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 2 Jan 2019 13:00:44 -0500 Subject: [PATCH 21/23] whatsnew --- doc/source/whatsnew/v0.24.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 31e0589d56d6f..f1212bb116fae 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1596,6 +1596,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form - :func:`read_sas()` will parse numbers in sas7bdat-files that have width less than 8 bytes correctly. (:issue:`21616`) - :func:`read_sas()` will correctly parse sas7bdat files with many columns (:issue:`22628`) - :func:`read_sas()` will correctly parse sas7bdat files with data page types having also bit 7 set (so page type is 128 + 256 = 384) (:issue:`16615`) +- Bug in :func:`read_sas()` in which an error was incorrect raised on an invalid file format. (:issue:`24548`) - Bug in :meth:`detect_client_encoding` where potential ``IOError`` goes unhandled when importing in a mod_wsgi process due to restricted access to stdout. (:issue:`21552`) - Bug in :func:`to_html()` with ``index=False`` misses truncation indicators (...) on truncated DataFrame (:issue:`15019`, :issue:`22783`) - Bug in :func:`to_html()` with ``index=False`` when both columns and row index are ``MultiIndex`` (:issue:`22579`) @@ -1623,7 +1624,6 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form - Bug in :meth:`DataFrame.to_stata`, :class:`pandas.io.stata.StataWriter` and :class:`pandas.io.stata.StataWriter117` where a exception would leave a partially written and invalid dta file (:issue:`23573`) - Bug in :meth:`DataFrame.to_stata` and :class:`pandas.io.stata.StataWriter117` that produced invalid files when using strLs with non-ASCII characters (:issue:`23573`) - Bug in :class:`HDFStore` that caused it to raise ``ValueError`` when reading a Dataframe in Python 3 from fixed format written in Python 2 (:issue:`24510`) -- Bug in :func:`read_sas()` in which ``ValueError`` was passed by a try/except block when no format kwarg was provided and function attempted to infer a file format from the extension in the file path. (:issue:`24548`) Plotting ^^^^^^^^ From 9012e41d984dccf9184391bbcb411f92389477af Mon Sep 17 00:00:00 2001 From: benjamin Date: Wed, 2 Jan 2019 18:09:31 +0000 Subject: [PATCH 22/23] Fixed import order for test. --- pandas/tests/io/sas/test_sas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/io/sas/test_sas.py b/pandas/tests/io/sas/test_sas.py index a9e71b09de1e8..34bca1e5b74a1 100644 --- a/pandas/tests/io/sas/test_sas.py +++ b/pandas/tests/io/sas/test_sas.py @@ -1,9 +1,9 @@ import pytest from pandas.compat import StringIO -import pandas.util.testing as tm from pandas import read_sas +import pandas.util.testing as tm class TestSas(object): From dcc071d82b636f49128846ebd402fe68f63b08dc Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 2 Jan 2019 19:43:33 -0500 Subject: [PATCH 23/23] correct doc --- doc/source/whatsnew/v0.24.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 1e21f8a4bc8e3..3566d58f5c641 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1599,7 +1599,7 @@ Notice how we now instead output ``np.nan`` itself instead of a stringified form - :func:`read_sas()` will parse numbers in sas7bdat-files that have width less than 8 bytes correctly. (:issue:`21616`) - :func:`read_sas()` will correctly parse sas7bdat files with many columns (:issue:`22628`) - :func:`read_sas()` will correctly parse sas7bdat files with data page types having also bit 7 set (so page type is 128 + 256 = 384) (:issue:`16615`) -- Bug in :func:`read_sas()` in which an error was incorrect raised on an invalid file format. (:issue:`24548`) +- Bug in :func:`read_sas()` in which an incorrect error was raised on an invalid file format. (:issue:`24548`) - Bug in :meth:`detect_client_encoding` where potential ``IOError`` goes unhandled when importing in a mod_wsgi process due to restricted access to stdout. (:issue:`21552`) - Bug in :func:`to_html()` with ``index=False`` misses truncation indicators (...) on truncated DataFrame (:issue:`15019`, :issue:`22783`) - Bug in :func:`to_html()` with ``index=False`` when both columns and row index are ``MultiIndex`` (:issue:`22579`)