From 1811d60120f6d6659a2e261c72c49a10e07a0068 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 2 Nov 2017 14:02:37 -0500 Subject: [PATCH 1/5] DOC: Fix various warnings I'm trying to get our doc build cleaned up. Just fixing warnings in this one (mostly formatting, some references). Section name warnings Warning from computation.rst Remove references to invalid index / columns Various docstring fixups More warnings More warnings --- doc/source/api.rst | 6 +++--- doc/source/computation.rst | 1 + doc/source/conf.py | 16 ++++++++++++---- doc/source/contributing.rst | 2 +- doc/source/io.rst | 2 +- pandas/_libs/period.pyx | 2 +- pandas/_libs/tslib.pyx | 15 +++++++-------- pandas/_libs/tslibs/timedeltas.pyx | 2 +- pandas/core/computation/eval.py | 2 +- pandas/core/frame.py | 3 ++- pandas/core/generic.py | 1 + pandas/core/groupby.py | 4 ++-- pandas/core/indexes/period.py | 14 +++++++------- pandas/io/formats/format.py | 19 +++++++++++++++---- setup.py | 4 ++-- 15 files changed, 57 insertions(+), 36 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index e8b8b3624740d..b5cf593ac0d1f 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1822,7 +1822,7 @@ Interval Properties ~~~~~~~~~~ .. autosummary:: - :toctree generated/ + :toctree: generated/ Interval.closed Interval.closed_left @@ -1843,7 +1843,7 @@ Timedelta Properties ~~~~~~~~~~ .. autosummary:: - :toctree generated/ + :toctree: generated/ Timedelta.asm8 Timedelta.components @@ -1860,7 +1860,7 @@ Properties Methods ~~~~~~~ .. autosummary:: - :toctree generated/ + :toctree: generated/ Timedelta.ceil Timedelta.floor diff --git a/doc/source/computation.rst b/doc/source/computation.rst index 0325e54d18e36..2a358900e340d 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -670,6 +670,7 @@ columns of a ``DataFrame``: .. ipython:: python :okexcept: + :okwarning: r.agg({'A' : np.sum, 'B' : lambda x: np.std(x, ddof=1)}) diff --git a/doc/source/conf.py b/doc/source/conf.py index 6eb12324ee461..e006f1809da5a 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -358,13 +358,12 @@ # latex_use_modindex = True -# Example configuration for intersphinx: refer to the Python standard library. intersphinx_mapping = { 'statsmodels': ('http://www.statsmodels.org/devel/', None), 'matplotlib': ('http://matplotlib.org/', None), - 'python': ('http://docs.python.org/3', None), - 'numpy': ('http://docs.scipy.org/doc/numpy', None), - 'scipy': ('http://docs.scipy.org/doc/scipy/reference', None), + 'python': ('https://docs.python.org/3/', None), + 'numpy': ('https://docs.scipy.org/doc/numpy/', None), + 'scipy': ('https://docs.scipy.org/doc/scipy/reference/', None), 'py': ('https://pylib.readthedocs.io/en/latest/', None) } import glob @@ -573,6 +572,15 @@ def remove_flags_docstring(app, what, name, obj, options, lines): if what == "attribute" and name.endswith(".flags"): del lines[:] + +suppress_warnings = [ + # We "overwrite" autosummary with our PandasAutosummary, but + # still want the regular autosummary setup to run. So we just + # suppress this warning. + 'app.add_directive' +] + + def setup(app): app.connect("autodoc-process-docstring", remove_flags_docstring) app.add_autodocumenter(AccessorDocumenter) diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 2a1aa3d0cf17a..40189f0e45518 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -60,7 +60,7 @@ Bug reports must: The issue will then show up to the *pandas* community and be open to comments/ideas from others. -.. _contributing.github +.. _contributing.github: Working with the code ===================== diff --git a/doc/source/io.rst b/doc/source/io.rst index 5d6b00a4db72e..36f216601b491 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -115,7 +115,7 @@ header : int or list of ints, default ``'infer'`` names : array-like, default ``None`` List of column names to use. If file contains no header row, then you should explicitly pass ``header=None``. Duplicates in this list will cause - a ``UserWarning`` to be issued. + a ``UserWarning`` to be issued. index_col : int or sequence or ``False``, default ``None`` Column to use as the row labels of the DataFrame. If a sequence is given, a MultiIndex is used. If you have a malformed file with delimiters at the end of diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 40d970c7b20f2..72523a19b9595 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -967,7 +967,7 @@ cdef class _Period(object): def strftime(self, fmt): """ Returns the string representation of the :class:`Period`, depending - on the selected :keyword:`format`. :keyword:`format` must be a string + on the selected ``fmt``. ``fmt`` must be a string containing one or several directives. The method recognizes the same directives as the :func:`time.strftime` function of the standard Python distribution, as well as the specific additional directives ``%f``, diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 6d793b6770113..7db0fb26d0de5 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -262,19 +262,16 @@ class Timestamp(_Timestamp): The other two forms mimic the parameters from ``datetime.datetime``. They can be passed by either position or keyword, but not both mixed together. - :func:`datetime.datetime` Parameters - ------------------------------------ + Parameters + ---------- .. versionadded:: 0.19.0 year : int month : int day : int - hour : int, optional, default is 0 - minute : int, optional, default is 0 - second : int, optional, default is 0 - microsecond : int, optional, default is 0 - tzinfo : datetime.tzinfo, optional, default is None + hour, minute, second, microsecond : int, optional, default 0 + tzinfo : datetime.tzinfo, optional, default None """ @classmethod @@ -592,11 +589,13 @@ class Timestamp(_Timestamp): tz : string, pytz.timezone, dateutil.tz.tzfile or None Time zone for time which Timestamp will be converted to. None will remove timezone holding local time. + ambiguous : bool, 'NaT', default 'raise' - bool contains flags to determine if time is dst or not (note - that this flag is only applicable for ambiguous fall dst dates) + that this flag is only applicable for ambiguous fall dst dates) - 'NaT' will return NaT for an ambiguous time - 'raise' will raise an AmbiguousTimeError for an ambiguous time + errors : 'raise', 'coerce', default 'raise' - 'raise' will raise a NonExistentTimeError if a timestamp is not valid in the specified timezone (e.g. due to a transition from diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 623babe5422a8..18ec82ed2b289 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -820,7 +820,7 @@ cdef class _Timedelta(timedelta): def isoformat(self): """ Format Timedelta as ISO 8601 Duration like - `P[n]Y[n]M[n]DT[n]H[n]M[n]S`, where the `[n]`s are replaced by the + ``P[n]Y[n]M[n]DT[n]H[n]M[n]S``, where the ``[n]`` s are replaced by the values. See https://en.wikipedia.org/wiki/ISO_8601#Durations .. versionadded:: 0.20.0 diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index a5df6aea055ab..196f4b2679576 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -202,7 +202,7 @@ def eval(expr, parser='pandas', engine=None, truediv=True, you can use to inject an additional collection of namespaces to use for variable lookup. For example, this is used in the :meth:`~pandas.DataFrame.query` method to inject the - :attr:`~pandas.DataFrame.index` and :attr:`~pandas.DataFrame.columns` + ``DataFrame.index`` and ``DataFrame.columns`` variables that refer to their respective :class:`~pandas.DataFrame` instance attributes. level : int, optional diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5f5f785111fb4..70f1ff0a5380d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1692,7 +1692,7 @@ def to_html(self, buf=None, columns=None, col_space=None, header=True, classes : str or list or tuple, default None CSS class(es) to apply to the resulting html table escape : boolean, default True - Convert the characters <, >, and & to HTML-safe sequences.= + Convert the characters <, >, and & to HTML-safe sequences. max_rows : int, optional Maximum number of rows to show before truncating. If None, show all. @@ -1703,6 +1703,7 @@ def to_html(self, buf=None, columns=None, col_space=None, header=True, Character recognized as decimal separator, e.g. ',' in Europe .. versionadded:: 0.18.0 + border : int A ``border=border`` attribute is included in the opening `` tag. Default ``pd.options.html.border``. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 48e6f8d4d50d3..f1edfe276dfad 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -6278,6 +6278,7 @@ def truncate(self, before=None, after=None, axis=None, copy=True): * 0 or 'index': apply truncation to rows * 1 or 'columns': apply truncation to columns + Default is stat axis for given data type (0 for Series and DataFrames, 1 for Panels) copy : boolean, default is True, diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 1acc8c3ed0bbb..d2cc117ca3fdc 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -742,8 +742,8 @@ def _cumcount_array(self, ascending=True): ascending : bool, default True If False, number in reverse, from length of group - 1 to 0. - Note - ---- + Notes + ----- this is currently implementing sort=False (though the default is sort=True) for groupby in general """ diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index bd069c1d22403..a6d5690767c10 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -624,9 +624,9 @@ def to_timestamp(self, freq=None, how='start'): Parameters ---------- - freq : string or DateOffset, default 'D' for week or longer, 'S' - otherwise - Target frequency + freq : string or DateOffset, optional + Target frequency. The default is 'D' for week or longer, + 'S' otherwise how : {'s', 'e', 'start', 'end'} Returns @@ -1039,8 +1039,8 @@ def tz_convert(self, tz): ------- normalized : DatetimeIndex - Note - ---- + Notes + ----- Not currently implemented for PeriodIndex """ raise NotImplementedError("Not yet implemented for PeriodIndex") @@ -1063,8 +1063,8 @@ def tz_localize(self, tz, infer_dst=False): ------- localized : DatetimeIndex - Note - ---- + Notes + ----- Not currently implemented for PeriodIndex """ raise NotImplementedError("Not yet implemented for PeriodIndex") diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index ab98b9c4e4f49..35a87fbe7b15b 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -84,12 +84,23 @@ "match-parent", "initial", "unset") justify_docstring = """ - justify : {'left', 'right', 'center', 'justify', - 'justify-all', 'start', 'end', 'inherit', - 'match-parent', 'initial', 'unset'}, default None + justify : str, default None How to justify the column labels. If None uses the option from the print configuration (controlled by set_option), 'right' out - of the box.""" + of the box. Valid values are + + * left + * right + * center + * justify + * justify-all + * start + * end + * inherit + * match-parent + * initial + * unset +""" return_docstring = """ diff --git a/setup.py b/setup.py index 572c426f26ae3..dd24c5c14ee69 100755 --- a/setup.py +++ b/setup.py @@ -225,8 +225,8 @@ def build_extensions(self): of the analysis into a form suitable for plotting or tabular display. pandas is the ideal tool for all of these tasks. -Note ----- +Notes +----- Windows binaries built against NumPy 1.8.1 """ From 769e0c0c807723aeef79bae0bb273f9b0dfd4285 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 2 Nov 2017 21:03:28 -0500 Subject: [PATCH 2/5] match NaT doc --- pandas/_libs/tslibs/nattype.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index dedc115501cd0..a5861f5865a39 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -478,11 +478,13 @@ class NaTType(_NaT): tz : string, pytz.timezone, dateutil.tz.tzfile or None Time zone for time which Timestamp will be converted to. None will remove timezone holding local time. + ambiguous : bool, 'NaT', default 'raise' - bool contains flags to determine if time is dst or not (note - that this flag is only applicable for ambiguous fall dst dates) + that this flag is only applicable for ambiguous fall dst dates) - 'NaT' will return NaT for an ambiguous time - 'raise' will raise an AmbiguousTimeError for an ambiguous time + errors : 'raise', 'coerce', default 'raise' - 'raise' will raise a NonExistentTimeError if a timestamp is not valid in the specified timezone (e.g. due to a transition from From ae4f942c2322f3ffc19dc79a1a68b29f0df2fe9c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 3 Nov 2017 15:11:43 -0500 Subject: [PATCH 3/5] More warnings --- doc/source/whatsnew/v0.15.2.txt | 2 +- doc/source/whatsnew/v0.19.0.txt | 1 + doc/source/whatsnew/v0.20.0.txt | 1 + pandas/core/config_init.py | 5 +++-- 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.15.2.txt b/doc/source/whatsnew/v0.15.2.txt index feba3d6224e65..b908b60334f4c 100644 --- a/doc/source/whatsnew/v0.15.2.txt +++ b/doc/source/whatsnew/v0.15.2.txt @@ -163,7 +163,7 @@ Other enhancements: p.all() - Added support for ``utcfromtimestamp()``, ``fromtimestamp()``, and ``combine()`` on `Timestamp` class (:issue:`5351`). -- Added Google Analytics (`pandas.io.ga`) basic documentation (:issue:`8835`). See `here`__. +- Added Google Analytics (`pandas.io.ga`) basic documentation (:issue:`8835`). See `here `__. - ``Timedelta`` arithmetic returns ``NotImplemented`` in unknown cases, allowing extensions by custom classes (:issue:`8813`). - ``Timedelta`` now supports arithemtic with ``numpy.ndarray`` objects of the appropriate dtype (numpy 1.8 or newer only) (:issue:`8884`). - Added ``Timedelta.to_timedelta64()`` method to the public API (:issue:`8884`). diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index bc5e278df743f..6093e53029cb6 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -216,6 +216,7 @@ contained the values ``[0, 3]``. **New behavior**: .. ipython:: python + :okwarning: pd.read_csv(StringIO(data), names=names) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 1a7b75266bfdf..fc869956c820e 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -1375,6 +1375,7 @@ Convert to a MultiIndex DataFrame Convert to an xarray DataArray .. ipython:: python + :okwarning: p.to_xarray() diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index 33531e80449d8..59578b96807e1 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -392,8 +392,9 @@ def table_schema_cb(key): cf.register_option('sim_interactive', False, tc_sim_interactive_doc) use_inf_as_null_doc = """ -use_inf_as_null had been deprecated and will be removed in a future version. -Use `use_inf_as_na` instead. +: boolean + use_inf_as_null had been deprecated and will be removed in a future + version. Use `use_inf_as_na` instead. """ use_inf_as_na_doc = """ From f80197a3b4d6c916668ed7d33137db483287b94b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 3 Nov 2017 09:48:27 -0500 Subject: [PATCH 4/5] DOC: Remove duplicate sections --- pandas/_libs/tslib.pyx | 37 ++++++++++++++++++++++++------------- pandas/core/groupby.py | 9 ++++----- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 7db0fb26d0de5..bf22a3a528259 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -237,15 +237,13 @@ _no_input = object() class Timestamp(_Timestamp): - """TimeStamp is the pandas equivalent of python's Datetime + """Pandas replacement for datetime.datetime + + TimeStamp is the pandas equivalent of python's Datetime and is interchangable with it in most cases. It's the type used for the entries that make up a DatetimeIndex, and other timeseries oriented data structures in pandas. - There are essentially three calling conventions for the constructor. The - primary form accepts four parameters. They can be passed by position or - keyword. - Parameters ---------- ts_input : datetime-like, str, int, float @@ -259,19 +257,32 @@ class Timestamp(_Timestamp): offset : str, DateOffset Deprecated, use freq + year, month, day : int + .. versionadded:: 0.19.0 + hour, minute, second, microsecond : int, optional, default 0 + .. versionadded:: 0.19.0 + tzinfo : datetime.tzinfo, optional, default None + .. versionadded:: 0.19.0 + + Notes + ----- + There are essentially three calling conventions for the constructor. The + primary form accepts four parameters. They can be passed by position or + keyword. + The other two forms mimic the parameters from ``datetime.datetime``. They can be passed by either position or keyword, but not both mixed together. - Parameters - ---------- + Examples + -------- + >>> pd.Timestamp('2017-01-01T12') + Timestamp('2017-01-01 12:00:00') - .. versionadded:: 0.19.0 + >>> pd.Timestamp(2017, 1, 1, 12) + Timestamp('2017-01-01 12:00:00') - year : int - month : int - day : int - hour, minute, second, microsecond : int, optional, default 0 - tzinfo : datetime.tzinfo, optional, default None + >>> pd.Timestamp(year=2017, month=1, day=1, hour=12) + Timestamp('2017-01-01 12:00:00') """ @classmethod diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index d2cc117ca3fdc..8db75accc84e5 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1257,7 +1257,6 @@ def expanding(self, *args, **kwargs): return ExpandingGroupby(self, *args, **kwargs) @Substitution(name='groupby') - @Appender(_doc_template) def pad(self, limit=None): """ Forward fill the values @@ -1269,6 +1268,8 @@ def pad(self, limit=None): See Also -------- + Series.pad + DataFrame.pad Series.fillna DataFrame.fillna """ @@ -1276,7 +1277,6 @@ def pad(self, limit=None): ffill = pad @Substitution(name='groupby') - @Appender(_doc_template) def backfill(self, limit=None): """ Backward fill the values @@ -1288,6 +1288,8 @@ def backfill(self, limit=None): See Also -------- + Series.backfill + DataFrame.backfill Series.fillna DataFrame.fillna """ @@ -1450,7 +1452,6 @@ def nth(self, n, dropna=None): return result @Substitution(name='groupby') - @Appender(_doc_template) def ngroup(self, ascending=True): """ Number each group from 0 to the number of groups - 1. @@ -1507,7 +1508,6 @@ def ngroup(self, ascending=True): See also -------- .cumcount : Number the rows in each group. - """ self._set_group_selection() @@ -1519,7 +1519,6 @@ def ngroup(self, ascending=True): return result @Substitution(name='groupby') - @Appender(_doc_template) def cumcount(self, ascending=True): """ Number each item in each group from 0 to the length of that group - 1. From c1b25e91d3636105b125b1aaa801f9e795c37fbb Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 8 Nov 2017 16:40:13 -0600 Subject: [PATCH 5/5] fixup --- pandas/_libs/tslibs/timedeltas.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 18ec82ed2b289..869ff5ee77bda 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -881,7 +881,7 @@ class Timedelta(_Timedelta): Parameters ---------- value : Timedelta, timedelta, np.timedelta64, string, or integer - unit : string, [D,h,m,s,ms,us,ns] + unit : string, {'ns', 'us', 'ms', 's', 'm', 'h', 'D'}, optional Denote the unit of the input, if input is an integer. Default 'ns'. days, seconds, microseconds, milliseconds, minutes, hours, weeks : numeric, optional