diff --git a/.travis.yml b/.travis.yml index 40baee2c03ea0..502f06fbd329a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -111,17 +111,20 @@ before_script: script: - echo "script start" - - ci/run_build_docs.sh - ci/script_single.sh - ci/script_multi.sh - ci/lint.sh - - ci/doctests.sh + - ci/docs/doctests.sh + - ci/docs/build_docs.sh + - ci/docs/lint_docs.sh - echo "checking imports" - source activate pandas && python ci/check_imports.py - echo "script done" after_success: - - ci/upload_coverage.sh + - source activate pandas + - ci/upload_coverage.sh + - ci/docs/upload_docs.sh after_script: - echo "after_script start" diff --git a/ci/docs/build_docs.sh b/ci/docs/build_docs.sh new file mode 100755 index 0000000000000..93d0cc1455a7d --- /dev/null +++ b/ci/docs/build_docs.sh @@ -0,0 +1,23 @@ +#!/bin/bash +set -e + +cd "$TRAVIS_BUILD_DIR" +echo "inside $0" + +if [ "$DOC" ]; then + + echo "[building docs]" + + source activate pandas + + mv "$TRAVIS_BUILD_DIR"/doc /tmp + mv "$TRAVIS_BUILD_DIR/LICENSE" /tmp # included in the docs. + cd /tmp/doc + + echo './make.py 2>&1 | tee doc-build.log' + ./make.py 2>&1 | tee doc-build.log +else + echo "[skipping docs]" +fi + +exit 0 diff --git a/ci/doctests.sh b/ci/docs/doctests.sh similarity index 100% rename from ci/doctests.sh rename to ci/docs/doctests.sh diff --git a/ci/docs/lint_docs.sh b/ci/docs/lint_docs.sh new file mode 100755 index 0000000000000..c9c9b35606290 --- /dev/null +++ b/ci/docs/lint_docs.sh @@ -0,0 +1,15 @@ +#!/bin/bash +set -e + +echo "inside $0" + +if [ "$DOC" ]; then + cd /tmp/doc + + echo "[linting docs]" + + echo './make.py lint_log --log-file=doc-build.log' + ./make.py lint_log --log-file=doc-build.log +else + echo "[skipping doc lint]" +fi diff --git a/ci/build_docs.sh b/ci/docs/upload_docs.sh similarity index 61% rename from ci/build_docs.sh rename to ci/docs/upload_docs.sh index f445447e3565c..16b69b1e452d6 100755 --- a/ci/build_docs.sh +++ b/ci/docs/upload_docs.sh @@ -1,29 +1,9 @@ #!/bin/bash +set -e -if [ "${TRAVIS_OS_NAME}" != "linux" ]; then - echo "not doing build_docs on non-linux" - exit 0 -fi - -cd "$TRAVIS_BUILD_DIR" echo "inside $0" -if [ "$DOC" ]; then - - echo "Will build docs" - - source activate pandas - - mv "$TRAVIS_BUILD_DIR"/doc /tmp - mv "$TRAVIS_BUILD_DIR/LICENSE" /tmp # included in the docs. - cd /tmp/doc - - echo ############################### - echo # Log file for the doc build # - echo ############################### - - echo ./make.py - ./make.py +if [ "${DOC}" ] && [ "${TRAVIS_PULL_REQUEST}" = "false" ]; then echo ######################## echo # Create and send docs # @@ -51,6 +31,8 @@ if [ "$DOC" ]; then git remote -v git push origin gh-pages -f +else + echo "[skipping doc upload]" fi exit 0 diff --git a/ci/run_build_docs.sh b/ci/run_build_docs.sh deleted file mode 100755 index 2909b9619552e..0000000000000 --- a/ci/run_build_docs.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -echo "inside $0" - -"$TRAVIS_BUILD_DIR"/ci/build_docs.sh 2>&1 - -# wait until subprocesses finish (build_docs.sh) -wait - -exit 0 diff --git a/doc/.gitignore b/doc/.gitignore new file mode 100644 index 0000000000000..a5dd6ec150da1 --- /dev/null +++ b/doc/.gitignore @@ -0,0 +1 @@ +doc-build.log diff --git a/doc/make.py b/doc/make.py index d85747458148d..f31ec079cf5ef 100755 --- a/doc/make.py +++ b/doc/make.py @@ -14,9 +14,12 @@ import importlib import sys import os +import re +import textwrap import shutil # import subprocess import argparse +from collections import namedtuple from contextlib import contextmanager import webbrowser import jinja2 @@ -78,7 +81,7 @@ class DocBuilder: script. """ def __init__(self, num_jobs=1, include_api=True, single_doc=None, - verbosity=0): + verbosity=0, warnings_are_errors=False, log_file=None): self.num_jobs = num_jobs self.include_api = include_api self.verbosity = verbosity @@ -87,6 +90,8 @@ def __init__(self, num_jobs=1, include_api=True, single_doc=None, if single_doc is not None: self._process_single_doc(single_doc) self.exclude_patterns = self._exclude_patterns + self.warnings_are_errors = warnings_are_errors + self.log_file = log_file self._generate_index() if self.single_doc_type == 'docstring': @@ -135,6 +140,12 @@ def _process_single_doc(self, single_doc): try: obj = pandas # noqa: F821 for name in single_doc.split('.'): + try: + # for names not in the top-level namespace by default, + # e.g. pandas.io.formats.style.Styler + importlib.import_module('.'.join([obj.__name__, name])) + except ImportError: + pass obj = getattr(obj, name) except AttributeError: raise ValueError('Single document not understood, it should ' @@ -227,10 +238,10 @@ def _sphinx_build(self, kind): if kind not in ('html', 'latex', 'spelling'): raise ValueError('kind must be html, latex or ' 'spelling, not {}'.format(kind)) - self._run_os('sphinx-build', '-j{}'.format(self.num_jobs), '-b{}'.format(kind), + '{}'.format("W" if self.warnings_are_errors else ""), '-{}'.format( 'v' * self.verbosity) if self.verbosity else '', '-d{}'.format(os.path.join(BUILD_PATH, 'doctrees')), @@ -317,6 +328,61 @@ def spellcheck(self): ' Check pandas/doc/build/spelling/output.txt' ' for more details.') + def lint_log(self): + with open(self.log_file) as f: + log = f.read() + + tokens = tokenize_log(log) + failed = [tok for tok in tokens if tok.kind != 'OK'] + if failed: + report_failures(failed) + sys.exit(1) + + +# ------ +# Linter +# ------ + +LinterToken = namedtuple("Token", ['kind', 'value']) +IPY_ERROR = r'(?P>>>-*\n.*?<<<-*\n)' +SPHINX_WARNING = r'(?P^[^\n]*?: WARNING:.*?$\n?)' +OK = r'(?P^.*?\n)' + + +def tokenize_log(log): + master_pat = re.compile("|".join([IPY_ERROR, SPHINX_WARNING, OK]), + flags=re.MULTILINE | re.DOTALL) + + def generate_tokens(pat, text): + scanner = pat.scanner(text) + for m in iter(scanner.match, None): + yield LinterToken(m.lastgroup, m.group(m.lastgroup)) + + tok = list(generate_tokens(master_pat, log)) + return tok + + +def report_failures(failed): + tpl = textwrap.dedent("""\ + {n} failure{s} + + {individual} + """) + joined = [] + for i, tok in enumerate(failed): + line = "Failure [{}]: {}".format(i, tok.value.strip()) + joined.append(line) + joined = '\n'.join(joined) + + print(tpl.format(n=len(failed), + s="s" if len(failed) != 1 else "", + individual=joined)) + + +# --- +# CLI +# --- + def main(): cmds = [method for method in dir(DocBuilder) if not method.startswith('_')] @@ -349,6 +415,13 @@ def main(): argparser.add_argument('-v', action='count', dest='verbosity', default=0, help=('increase verbosity (can be repeated), ' 'passed to the sphinx build command')) + argparser.add_argument("--warnings-are-errors", + default=False, + action="store_true", + help="Whether to fail the build on warnings.") + argparser.add_argument("--log-file", + default="doc-build.log", + help="Log file of the build to lint for warnings.") args = argparser.parse_args() if args.command not in cmds: @@ -368,7 +441,8 @@ def main(): os.environ['MPLBACKEND'] = 'module://matplotlib.backends.backend_agg' builder = DocBuilder(args.num_jobs, not args.no_api, args.single, - args.verbosity) + args.verbosity, args.warnings_are_errors, + args.log_file) getattr(builder, args.command)() diff --git a/doc/source/api.rst b/doc/source/api.rst index e4b055c14ec27..073ed8a082a11 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -2603,3 +2603,12 @@ objects. generated/pandas.Series.ix generated/pandas.Series.imag generated/pandas.Series.real + + +.. Can't convince sphinx to generate toctree for this class attribute. +.. So we do it manually to avoid a warning + +.. toctree:: + :hidden: + + generated/pandas.api.extensions.ExtensionDtype.na_value diff --git a/doc/source/basics.rst b/doc/source/basics.rst index c18b94fea9a28..6eeb97349100a 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -1935,7 +1935,7 @@ NumPy's type-system for a few cases. * :ref:`Categorical ` * :ref:`Datetime with Timezone ` * :ref:`Period ` -* :ref:`Interval ` +* :ref:`Interval ` Pandas uses the ``object`` dtype for storing strings. diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index 65e151feeba67..11904359b8384 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -345,6 +345,11 @@ Some other important things to know about the docs: Every method should be included in a ``toctree`` in ``api.rst``, else Sphinx will emit a warning. +* The pandas CI system does not allow warnings in the documentation build. + If you cannot discover the cause of the warning from the build output, you can + try elevating warnings to errors with ``python make.py --warnings-are-errors``, + which will immediately halt the build when a warning is encountered. + .. note:: The ``.rst`` files are used to automatically generate Markdown and HTML versions diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index f6fa9e9f86143..a4dc99383a562 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -505,13 +505,11 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to .. ipython:: python df = pd.DataFrame({'A' : [1, 1, 2, 2], 'B' : [1, -1, 1, 2]}) - gb = df.groupby('A') def replace(g): - mask = g < 0 - g.loc[mask] = g[~mask].mean() - return g + mask = g < 0 + return g.where(mask, g[~mask].mean()) gb.transform(replace) diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst index 1014982fea21a..2e1d573ed4192 100644 --- a/doc/source/ecosystem.rst +++ b/doc/source/ecosystem.rst @@ -73,8 +73,8 @@ large data to thin clients. `seaborn `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Seaborn is a Python visualization library based on `matplotlib -`__. It provides a high-level, dataset-oriented +Seaborn is a Python visualization library based on +`matplotlib `__. It provides a high-level, dataset-oriented interface for creating attractive statistical graphics. The plotting functions in seaborn understand pandas objects and leverage pandas grouping operations internally to support concise specification of complex visualizations. Seaborn @@ -140,7 +140,7 @@ which are utilized by Jupyter Notebook for displaying (Note: HTML tables may or may not be compatible with non-HTML Jupyter output formats.) -See :ref:`Options and Settings ` and :ref:`` +See :ref:`Options and Settings ` and :ref:`options.available` for pandas ``display.`` settings. `quantopian/qgrid `__ @@ -169,7 +169,7 @@ or the clipboard into a new pandas DataFrame via a sophisticated import wizard. Most pandas classes, methods and data attributes can be autocompleted in Spyder's `Editor `__ and `IPython Console `__, -and Spyder's `Help pane`__ can retrieve +and Spyder's `Help pane `__ can retrieve and render Numpydoc documentation on pandas objects in rich text with Sphinx both automatically and on-demand. diff --git a/doc/source/io.rst b/doc/source/io.rst index c2c8c1c17700f..84530b2b560d0 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -66,16 +66,13 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like CSV & Text files ---------------- -The two workhorse functions for reading text files (a.k.a. flat files) are -:func:`read_csv` and :func:`read_table`. They both use the same parsing code to -intelligently convert tabular data into a ``DataFrame`` object. See the -:ref:`cookbook` for some advanced strategies. +The workhorse function for reading text files (a.k.a. flat files) is +:func:`read_csv`. See the :ref:`cookbook` for some advanced strategies. Parsing options ''''''''''''''' -The functions :func:`read_csv` and :func:`read_table` accept the following -common arguments: +:func:`read_csv` accepts the following common arguments: Basic +++++ @@ -780,8 +777,8 @@ Date Handling Specifying Date Columns +++++++++++++++++++++++ -To better facilitate working with datetime data, :func:`read_csv` and -:func:`read_table` use the keyword arguments ``parse_dates`` and ``date_parser`` +To better facilitate working with datetime data, :func:`read_csv` +uses the keyword arguments ``parse_dates`` and ``date_parser`` to allow users to specify a variety of columns and date/time formats to turn the input text data into ``datetime`` objects. @@ -1434,7 +1431,7 @@ Suppose you have data indexed by two columns: print(open('data/mindex_ex.csv').read()) -The ``index_col`` argument to ``read_csv`` and ``read_table`` can take a list of +The ``index_col`` argument to ``read_csv`` can take a list of column numbers to turn multiple columns into a ``MultiIndex`` for the index of the returned object: @@ -1505,8 +1502,8 @@ class of the csv module. For this, you have to specify ``sep=None``. .. ipython:: python - print(open('tmp2.sv').read()) - pd.read_csv('tmp2.sv', sep=None, engine='python') + print(open('tmp2.sv').read()) + pd.read_csv('tmp2.sv', sep=None, engine='python') .. _io.multiple_files: @@ -1528,16 +1525,16 @@ rather than reading the entire file into memory, such as the following: .. ipython:: python print(open('tmp.sv').read()) - table = pd.read_table('tmp.sv', sep='|') + table = pd.read_csv('tmp.sv', sep='|') table -By specifying a ``chunksize`` to ``read_csv`` or ``read_table``, the return +By specifying a ``chunksize`` to ``read_csv``, the return value will be an iterable object of type ``TextFileReader``: .. ipython:: python - reader = pd.read_table('tmp.sv', sep='|', chunksize=4) + reader = pd.read_csv('tmp.sv', sep='|', chunksize=4) reader for chunk in reader: @@ -1548,7 +1545,7 @@ Specifying ``iterator=True`` will also return the ``TextFileReader`` object: .. ipython:: python - reader = pd.read_table('tmp.sv', sep='|', iterator=True) + reader = pd.read_csv('tmp.sv', sep='|', iterator=True) reader.get_chunk(5) .. ipython:: python @@ -3067,7 +3064,7 @@ Clipboard A handy way to grab data is to use the :meth:`~DataFrame.read_clipboard` method, which takes the contents of the clipboard buffer and passes them to the -``read_table`` method. For instance, you can copy the following text to the +``read_csv`` method. For instance, you can copy the following text to the clipboard (CTRL-C on many operating systems): .. code-block:: python diff --git a/doc/source/text.rst b/doc/source/text.rst index 61583a179e572..d01c48695d0d6 100644 --- a/doc/source/text.rst +++ b/doc/source/text.rst @@ -312,14 +312,15 @@ All one-dimensional list-likes can be combined in a list-like container (includi s u - s.str.cat([u.values, ['A', 'B', 'C', 'D'], map(str, u.index)], na_rep='-') + s.str.cat([u.values, + u.index.astype(str).values], na_rep='-') All elements must match in length to the calling ``Series`` (or ``Index``), except those having an index if ``join`` is not None: .. ipython:: python v - s.str.cat([u, v, ['A', 'B', 'C', 'D']], join='outer', na_rep='-') + s.str.cat([u, v], join='outer', na_rep='-') If using ``join='right'`` on a list of ``others`` that contains different indexes, the union of these indexes will be used as the basis for the final concatenation: diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 71bc064ffb0c2..85b0abe421eb2 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -753,18 +753,28 @@ regularity will result in a ``DatetimeIndex``, although frequency is lost: Iterating through groups ------------------------ -With the :ref:`Resampler` object in hand, iterating through the grouped data is very +With the ``Resampler`` object in hand, iterating through the grouped data is very natural and functions similarly to :py:func:`itertools.groupby`: .. ipython:: python - resampled = df.resample('H') + small = pd.Series( + range(6), + index=pd.to_datetime(['2017-01-01T00:00:00', + '2017-01-01T00:30:00', + '2017-01-01T00:31:00', + '2017-01-01T01:00:00', + '2017-01-01T03:00:00', + '2017-01-01T03:05:00']) + ) + resampled = small.resample('H') for name, group in resampled: - print(name) - print(group) + print("Group: ", name) + print("-" * 27) + print(group, end="\n\n") -See :ref:`groupby.iterating-label`. +See :ref:`groupby.iterating-label` or :class:`Resampler.__iter__` for more. .. _timeseries.components: @@ -910,26 +920,22 @@ It's definitely worth exploring the ``pandas.tseries.offsets`` module and the various docstrings for the classes. These operations (``apply``, ``rollforward`` and ``rollback``) preserve time -(hour, minute, etc) information by default. To reset time, use ``normalize=True`` -when creating the offset instance. If ``normalize=True``, the result is -normalized after the function is applied. - +(hour, minute, etc) information by default. To reset time, use ``normalize`` +before or after applying the operation (depending on whether you want the +time information included in the operation. .. ipython:: python + ts = pd.Timestamp('2014-01-01 09:00') day = Day() - day.apply(pd.Timestamp('2014-01-01 09:00')) - - day = Day(normalize=True) - day.apply(pd.Timestamp('2014-01-01 09:00')) + day.apply(ts) + day.apply(ts).normalize() + ts = pd.Timestamp('2014-01-01 22:00') hour = Hour() - hour.apply(pd.Timestamp('2014-01-01 22:00')) - - hour = Hour(normalize=True) - hour.apply(pd.Timestamp('2014-01-01 22:00')) - hour.apply(pd.Timestamp('2014-01-01 23:00')) - + hour.apply(ts) + hour.apply(ts).normalize() + hour.apply(pd.Timestamp("2014-01-01 23:30")).normalize() .. _timeseries.dayvscalendarday: @@ -1488,6 +1494,7 @@ time. The method for this is :meth:`~Series.shift`, which is available on all of the pandas objects. .. ipython:: python + ts = pd.Series(range(len(rng)), index=rng) ts = ts[:5] ts.shift(1) diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index a3213136d998a..e38ba54d4b058 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -373,7 +373,7 @@ New Behavior: s = pd.Series([1,2,3], index=np.arange(3.)) s s.index - print(s.to_csv(path=None)) + print(s.to_csv(path_or_buf=None, header=False)) Changes to dtype assignment behaviors ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 3c0818343208a..819f24254b2ce 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -186,7 +186,7 @@ Previously, only ``gzip`` compression was supported. By default, compression of URLs and paths are now inferred using their file extensions. Additionally, support for bz2 compression in the python 2 C-engine improved (:issue:`14874`). -.. ipython:: python +.. code-block:::: python url = 'https://github.com/{repo}/raw/{branch}/{path}'.format( repo = 'pandas-dev/pandas', diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 2f70d4e5946a0..f278dbd43a59d 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -8,6 +8,8 @@ v0.24.0 (Month XX, 2018) Starting January 1, 2019, pandas feature releases will support Python 3 only. See :ref:`install.dropping-27` for more. +This is a deliberate error :meth:`notaclass` to test the failure. + .. _whatsnew_0240.enhancements: New features @@ -246,7 +248,6 @@ UTC offset (:issue:`17697`, :issue:`11736`, :issue:`22457`) .. code-block:: ipython - In [2]: pd.to_datetime("2015-11-18 15:30:00+05:30") Out[2]: Timestamp('2015-11-18 10:00:00') @@ -284,6 +285,7 @@ Passing ``utc=True`` will mimic the previous behavior but will correctly indicat that the dates have been converted to UTC .. ipython:: python + pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"], utc=True) .. _whatsnew_0240.api_breaking.calendarday: @@ -450,7 +452,7 @@ Previous Behavior: Out[3]: Int64Index([0, 1, 2], dtype='int64') -.. _whatsnew_0240.api.timedelta64_subtract_nan +.. _whatsnew_0240.api.timedelta64_subtract_nan: Addition/Subtraction of ``NaN`` from :class:`DataFrame` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -461,9 +463,10 @@ all-``NaT``. This is for compatibility with ``TimedeltaIndex`` and ``Series`` behavior (:issue:`22163`) .. ipython:: python + :okexcept: - df = pd.DataFrame([pd.Timedelta(days=1)]) - df - np.nan + df = pd.DataFrame([pd.Timedelta(days=1)]) + df - np.nan Previous Behavior: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 75baeab402734..34633d5707264 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2061,9 +2061,10 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None, .. versionadded:: 0.19.0 compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, - default 'infer' + A string representing the compression to use in the output file, - only used when the first argument is a filename. + only used when the first argument is a filename. By default, the + compression is inferred from the filename. .. versionadded:: 0.21.0 .. versionchanged:: 0.24.0 @@ -4198,6 +4199,10 @@ def head(self, n=5): on position. It is useful for quickly testing if your object has the right type of data in it. + .. ipython:: python + + 2 / 0 + Parameters ---------- n : int, default 5 @@ -9503,8 +9508,11 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, path_or_buf : string or file handle, default None File path or object, if None is provided the result is returned as a string. + .. versionchanged:: 0.24.0 - Was previously named "path" for Series. + + Was previously named "path" for Series. + sep : character, default ',' Field delimiter for the output file. na_rep : string, default '' @@ -9516,8 +9524,11 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, header : boolean or list of string, default True Write out the column names. If a list of strings is given it is assumed to be aliases for the column names + .. versionchanged:: 0.24.0 - Previously defaulted to False for Series. + + Previously defaulted to False for Series. + index : boolean, default True Write row names (index) index_label : string or sequence, or False, default None @@ -9531,14 +9542,16 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, encoding : string, optional A string representing the encoding to use in the output file, defaults to 'ascii' on Python 2 and 'utf-8' on Python 3. - compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, - default 'infer' - If 'infer' and `path_or_buf` is path-like, then detect compression - from the following extensions: '.gz', '.bz2', '.zip' or '.xz' - (otherwise no compression). + compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None} + + If 'infer' (the defualt) and `path_or_buf` is path-like, then + detect compression from the following extensions: '.gz', '.bz2', + '.zip' or '.xz' (otherwise no compression). .. versionchanged:: 0.24.0 + 'infer' option added and set to default + line_terminator : string, default ``'\n'`` The newline character or character sequence to use in the output file @@ -9555,7 +9568,9 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, chunksize : int or None rows to write at a time tupleize_cols : boolean, default False + .. deprecated:: 0.21.0 + This argument will be removed and will always write each row of the multi-index as a separate row in the CSV file. @@ -9569,7 +9584,8 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, European data .. versionchanged:: 0.24.0 - The order of arguments for Series was changed. + + The order of arguments for Series was changed. """ df = self if isinstance(self, ABCDataFrame) else self.to_frame() diff --git a/pandas/core/series.py b/pandas/core/series.py index fdb9ef59c1d3e..a913b956cbc16 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2745,6 +2745,7 @@ def nlargest(self, n=5, keep='first'): keep : {'first', 'last', 'all'}, default 'first' When there are duplicate values that cannot all fit in a Series of `n` elements: + - ``first`` : take the first occurrences based on the index order - ``last`` : take the last occurrences based on the index order - ``all`` : keep all occurrences. This can result in a Series of @@ -2840,6 +2841,7 @@ def nsmallest(self, n=5, keep='first'): keep : {'first', 'last', 'all'}, default 'first' When there are duplicate values that cannot all fit in a Series of `n` elements: + - ``first`` : take the first occurrences based on the index order - ``last`` : take the last occurrences based on the index order - ``all`` : keep all occurrences. This can result in a Series of diff --git a/pandas/core/window.py b/pandas/core/window.py index 66f48f403c941..5cdf62d5a5537 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -1404,7 +1404,7 @@ def _get_cov(X, Y): otherwise defaults to `False`. Not relevant for :class:`~pandas.Series`. **kwargs - Under Review. + Unused. Returns ------- @@ -1430,7 +1430,7 @@ def _get_cov(X, Y): all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise` set to `True`. - Function will return `NaN`s for correlations of equal valued sequences; + Function will return ``NaN`` for correlations of equal valued sequences; this is the result of a 0/0 division error. When `pairwise` is set to `False`, only matching columns between `self` and @@ -1446,7 +1446,7 @@ def _get_cov(X, Y): Examples -------- The below example shows a rolling calculation with a window size of - four matching the equivalent function call using `numpy.corrcoef`. + four matching the equivalent function call using :meth:`numpy.corrcoef`. >>> v1 = [3, 3, 3, 5, 8] >>> v2 = [3, 4, 4, 4, 8] diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index b175dd540a518..f4bb53ba4f218 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1073,6 +1073,7 @@ def bar(self, subset=None, axis=0, color='#d65f5f', width=100, percent of the cell's width. align : {'left', 'zero',' mid'}, default 'left' How to align the bars with the cells. + - 'left' : the min value starts at the left of the cell. - 'zero' : a value of zero is located at the center of the cell. - 'mid' : the center of the cell is at (max-min)/2, or