From bb3bca62b1484fab8a8a215b5a8ee6d42870781a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 18 Sep 2018 11:09:31 -0500 Subject: [PATCH 01/21] Fixed link to Resmpaler (cherry picked from commit 9f0a948947cf959a8750cbf11b2a52a1ccb47eaf) --- doc/source/timeseries.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 71bc064ffb0c2..6ddb257f6c55b 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -753,7 +753,7 @@ regularity will result in a ``DatetimeIndex``, although frequency is lost: Iterating through groups ------------------------ -With the :ref:`Resampler` object in hand, iterating through the grouped data is very +With the ``Resampler`` object in hand, iterating through the grouped data is very natural and functions similarly to :py:func:`itertools.groupby`: .. ipython:: python @@ -764,7 +764,7 @@ natural and functions similarly to :py:func:`itertools.groupby`: print(name) print(group) -See :ref:`groupby.iterating-label`. +See :ref:`groupby.iterating-label` or :class:`Resampler.__iter__` for more. .. _timeseries.components: From 54f2635f194d8bac6b819336fd5b58335f03fa8b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 17 Sep 2018 20:48:23 -0500 Subject: [PATCH 02/21] FutureWarning from groupby. Warning about elementwise comparison failing when we indexed a dataframe with boolean dataframe (cherry picked from commit 1168273efa0f941eae734645dc2d3d82ae778016) --- doc/source/cookbook.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index f6fa9e9f86143..a4dc99383a562 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -505,13 +505,11 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to .. ipython:: python df = pd.DataFrame({'A' : [1, 1, 2, 2], 'B' : [1, -1, 1, 2]}) - gb = df.groupby('A') def replace(g): - mask = g < 0 - g.loc[mask] = g[~mask].mean() - return g + mask = g < 0 + return g.where(mask, g[~mask].mean()) gb.transform(replace) From 1e8e95007b5a7f29c23c3a74af64a5104904fc49 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 17 Sep 2018 20:48:57 -0500 Subject: [PATCH 03/21] Purge read_table (cherry picked from commit 41c8297142bc95522c199a0f623abcf77d45a8a5) --- doc/source/io.rst | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/doc/source/io.rst b/doc/source/io.rst index c2c8c1c17700f..84530b2b560d0 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -66,16 +66,13 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like CSV & Text files ---------------- -The two workhorse functions for reading text files (a.k.a. flat files) are -:func:`read_csv` and :func:`read_table`. They both use the same parsing code to -intelligently convert tabular data into a ``DataFrame`` object. See the -:ref:`cookbook` for some advanced strategies. +The workhorse function for reading text files (a.k.a. flat files) is +:func:`read_csv`. See the :ref:`cookbook` for some advanced strategies. Parsing options ''''''''''''''' -The functions :func:`read_csv` and :func:`read_table` accept the following -common arguments: +:func:`read_csv` accepts the following common arguments: Basic +++++ @@ -780,8 +777,8 @@ Date Handling Specifying Date Columns +++++++++++++++++++++++ -To better facilitate working with datetime data, :func:`read_csv` and -:func:`read_table` use the keyword arguments ``parse_dates`` and ``date_parser`` +To better facilitate working with datetime data, :func:`read_csv` +uses the keyword arguments ``parse_dates`` and ``date_parser`` to allow users to specify a variety of columns and date/time formats to turn the input text data into ``datetime`` objects. @@ -1434,7 +1431,7 @@ Suppose you have data indexed by two columns: print(open('data/mindex_ex.csv').read()) -The ``index_col`` argument to ``read_csv`` and ``read_table`` can take a list of +The ``index_col`` argument to ``read_csv`` can take a list of column numbers to turn multiple columns into a ``MultiIndex`` for the index of the returned object: @@ -1505,8 +1502,8 @@ class of the csv module. For this, you have to specify ``sep=None``. .. ipython:: python - print(open('tmp2.sv').read()) - pd.read_csv('tmp2.sv', sep=None, engine='python') + print(open('tmp2.sv').read()) + pd.read_csv('tmp2.sv', sep=None, engine='python') .. _io.multiple_files: @@ -1528,16 +1525,16 @@ rather than reading the entire file into memory, such as the following: .. ipython:: python print(open('tmp.sv').read()) - table = pd.read_table('tmp.sv', sep='|') + table = pd.read_csv('tmp.sv', sep='|') table -By specifying a ``chunksize`` to ``read_csv`` or ``read_table``, the return +By specifying a ``chunksize`` to ``read_csv``, the return value will be an iterable object of type ``TextFileReader``: .. ipython:: python - reader = pd.read_table('tmp.sv', sep='|', chunksize=4) + reader = pd.read_csv('tmp.sv', sep='|', chunksize=4) reader for chunk in reader: @@ -1548,7 +1545,7 @@ Specifying ``iterator=True`` will also return the ``TextFileReader`` object: .. ipython:: python - reader = pd.read_table('tmp.sv', sep='|', iterator=True) + reader = pd.read_csv('tmp.sv', sep='|', iterator=True) reader.get_chunk(5) .. ipython:: python @@ -3067,7 +3064,7 @@ Clipboard A handy way to grab data is to use the :meth:`~DataFrame.read_clipboard` method, which takes the contents of the clipboard buffer and passes them to the -``read_table`` method. For instance, you can copy the following text to the +``read_csv`` method. For instance, you can copy the following text to the clipboard (CTRL-C on many operating systems): .. code-block:: python From baf1ee23b1ebe82ca86031dec09e3cad11915759 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 17 Sep 2018 20:49:26 -0500 Subject: [PATCH 04/21] Removed nested list example (cherry picked from commit 2e76e845b23f036d96b80fa164d49b577481303e) --- doc/source/text.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/text.rst b/doc/source/text.rst index 61583a179e572..d01c48695d0d6 100644 --- a/doc/source/text.rst +++ b/doc/source/text.rst @@ -312,14 +312,15 @@ All one-dimensional list-likes can be combined in a list-like container (includi s u - s.str.cat([u.values, ['A', 'B', 'C', 'D'], map(str, u.index)], na_rep='-') + s.str.cat([u.values, + u.index.astype(str).values], na_rep='-') All elements must match in length to the calling ``Series`` (or ``Index``), except those having an index if ``join`` is not None: .. ipython:: python v - s.str.cat([u, v, ['A', 'B', 'C', 'D']], join='outer', na_rep='-') + s.str.cat([u, v], join='outer', na_rep='-') If using ``join='right'`` on a list of ``others`` that contains different indexes, the union of these indexes will be used as the basis for the final concatenation: From 3a0bac30116897871f46633073d072e6a0a5c8bb Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 17 Sep 2018 21:45:27 -0500 Subject: [PATCH 05/21] Fixed resample __iter__ (cherry picked from commit a70f86df6b57b67cf5c7035f1f0c4afd7642da31) --- doc/source/timeseries.rst | 41 +++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 6ddb257f6c55b..85b0abe421eb2 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -758,11 +758,21 @@ natural and functions similarly to :py:func:`itertools.groupby`: .. ipython:: python - resampled = df.resample('H') + small = pd.Series( + range(6), + index=pd.to_datetime(['2017-01-01T00:00:00', + '2017-01-01T00:30:00', + '2017-01-01T00:31:00', + '2017-01-01T01:00:00', + '2017-01-01T03:00:00', + '2017-01-01T03:05:00']) + ) + resampled = small.resample('H') for name, group in resampled: - print(name) - print(group) + print("Group: ", name) + print("-" * 27) + print(group, end="\n\n") See :ref:`groupby.iterating-label` or :class:`Resampler.__iter__` for more. @@ -910,26 +920,22 @@ It's definitely worth exploring the ``pandas.tseries.offsets`` module and the various docstrings for the classes. These operations (``apply``, ``rollforward`` and ``rollback``) preserve time -(hour, minute, etc) information by default. To reset time, use ``normalize=True`` -when creating the offset instance. If ``normalize=True``, the result is -normalized after the function is applied. - +(hour, minute, etc) information by default. To reset time, use ``normalize`` +before or after applying the operation (depending on whether you want the +time information included in the operation. .. ipython:: python + ts = pd.Timestamp('2014-01-01 09:00') day = Day() - day.apply(pd.Timestamp('2014-01-01 09:00')) - - day = Day(normalize=True) - day.apply(pd.Timestamp('2014-01-01 09:00')) + day.apply(ts) + day.apply(ts).normalize() + ts = pd.Timestamp('2014-01-01 22:00') hour = Hour() - hour.apply(pd.Timestamp('2014-01-01 22:00')) - - hour = Hour(normalize=True) - hour.apply(pd.Timestamp('2014-01-01 22:00')) - hour.apply(pd.Timestamp('2014-01-01 23:00')) - + hour.apply(ts) + hour.apply(ts).normalize() + hour.apply(pd.Timestamp("2014-01-01 23:30")).normalize() .. _timeseries.dayvscalendarday: @@ -1488,6 +1494,7 @@ time. The method for this is :meth:`~Series.shift`, which is available on all of the pandas objects. .. ipython:: python + ts = pd.Series(range(len(rng)), index=rng) ts = ts[:5] ts.shift(1) From a53199e2973af3ea7848ec6c660336a8cb1e725f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 17 Sep 2018 21:52:51 -0500 Subject: [PATCH 06/21] Old whatsnew (cherry picked from commit e4a8b064b9c93378430ebcf972e999349f84f05b) --- doc/source/whatsnew/v0.18.0.txt | 2 +- doc/source/whatsnew/v0.20.0.txt | 2 +- doc/source/whatsnew/v0.24.0.txt | 5 +++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.18.0.txt b/doc/source/whatsnew/v0.18.0.txt index a3213136d998a..e38ba54d4b058 100644 --- a/doc/source/whatsnew/v0.18.0.txt +++ b/doc/source/whatsnew/v0.18.0.txt @@ -373,7 +373,7 @@ New Behavior: s = pd.Series([1,2,3], index=np.arange(3.)) s s.index - print(s.to_csv(path=None)) + print(s.to_csv(path_or_buf=None, header=False)) Changes to dtype assignment behaviors ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 3c0818343208a..819f24254b2ce 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -186,7 +186,7 @@ Previously, only ``gzip`` compression was supported. By default, compression of URLs and paths are now inferred using their file extensions. Additionally, support for bz2 compression in the python 2 C-engine improved (:issue:`14874`). -.. ipython:: python +.. code-block:::: python url = 'https://github.com/{repo}/raw/{branch}/{path}'.format( repo = 'pandas-dev/pandas', diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 2f70d4e5946a0..87c59b6410c04 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -461,9 +461,10 @@ all-``NaT``. This is for compatibility with ``TimedeltaIndex`` and ``Series`` behavior (:issue:`22163`) .. ipython:: python + :okexcept: - df = pd.DataFrame([pd.Timedelta(days=1)]) - df - np.nan + df = pd.DataFrame([pd.Timedelta(days=1)]) + df - np.nan Previous Behavior: From d7d51a4719ca547c6d7b14e3bdd80696c5551bb8 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 17 Sep 2018 21:52:59 -0500 Subject: [PATCH 07/21] Ecosystem (cherry picked from commit 693eead60d96d4297d9b55a6e86eeeb2b2797220) --- doc/source/ecosystem.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst index 1014982fea21a..7fffcadd8ee8c 100644 --- a/doc/source/ecosystem.rst +++ b/doc/source/ecosystem.rst @@ -73,8 +73,8 @@ large data to thin clients. `seaborn `__ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Seaborn is a Python visualization library based on `matplotlib -`__. It provides a high-level, dataset-oriented +Seaborn is a Python visualization library based on +`matplotlib `__. It provides a high-level, dataset-oriented interface for creating attractive statistical graphics. The plotting functions in seaborn understand pandas objects and leverage pandas grouping operations internally to support concise specification of complex visualizations. Seaborn @@ -140,7 +140,7 @@ which are utilized by Jupyter Notebook for displaying (Note: HTML tables may or may not be compatible with non-HTML Jupyter output formats.) -See :ref:`Options and Settings ` and :ref:`` +See :ref:`Options and Settings ` and :ref:`options.available ` for pandas ``display.`` settings. `quantopian/qgrid `__ @@ -169,7 +169,7 @@ or the clipboard into a new pandas DataFrame via a sophisticated import wizard. Most pandas classes, methods and data attributes can be autocompleted in Spyder's `Editor `__ and `IPython Console `__, -and Spyder's `Help pane`__ can retrieve +and Spyder's `Help pane `__ can retrieve and render Numpydoc documentation on pandas objects in rich text with Sphinx both automatically and on-demand. From 65d46ff1048b6a2fb1970ab2bb8d73febf875e92 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 17 Sep 2018 22:06:26 -0500 Subject: [PATCH 08/21] to_csv (cherry picked from commit e6b2c09284f2d68e38c1d5df06a966817a802d63) --- pandas/core/generic.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 75baeab402734..54d679db02958 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9503,8 +9503,11 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, path_or_buf : string or file handle, default None File path or object, if None is provided the result is returned as a string. + .. versionchanged:: 0.24.0 - Was previously named "path" for Series. + + Was previously named "path" for Series. + sep : character, default ',' Field delimiter for the output file. na_rep : string, default '' @@ -9516,8 +9519,11 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, header : boolean or list of string, default True Write out the column names. If a list of strings is given it is assumed to be aliases for the column names + .. versionchanged:: 0.24.0 - Previously defaulted to False for Series. + + Previously defaulted to False for Series. + index : boolean, default True Write row names (index) index_label : string or sequence, or False, default None @@ -9531,14 +9537,16 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, encoding : string, optional A string representing the encoding to use in the output file, defaults to 'ascii' on Python 2 and 'utf-8' on Python 3. - compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, - default 'infer' - If 'infer' and `path_or_buf` is path-like, then detect compression - from the following extensions: '.gz', '.bz2', '.zip' or '.xz' - (otherwise no compression). + compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None} + + If 'infer' (the defualt) and `path_or_buf` is path-like, then + detect compression from the following extensions: '.gz', '.bz2', + '.zip' or '.xz' (otherwise no compression). .. versionchanged:: 0.24.0 + 'infer' option added and set to default + line_terminator : string, default ``'\n'`` The newline character or character sequence to use in the output file @@ -9555,7 +9563,9 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, chunksize : int or None rows to write at a time tupleize_cols : boolean, default False + .. deprecated:: 0.21.0 + This argument will be removed and will always write each row of the multi-index as a separate row in the CSV file. @@ -9569,7 +9579,8 @@ def to_csv(self, path_or_buf=None, sep=",", na_rep='', float_format=None, European data .. versionchanged:: 0.24.0 - The order of arguments for Series was changed. + + The order of arguments for Series was changed. """ df = self if isinstance(self, ABCDataFrame) else self.to_frame() From 51ebb11414ce2811b8a164b39dddcc543317a6fa Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 17 Sep 2018 22:07:59 -0500 Subject: [PATCH 09/21] to_json (cherry picked from commit a7f0b3841367afc5ed641287e1bb971243477d0f) --- pandas/core/generic.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 54d679db02958..4e589cb7e105f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2061,9 +2061,10 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None, .. versionadded:: 0.19.0 compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, - default 'infer' + A string representing the compression to use in the output file, - only used when the first argument is a filename. + only used when the first argument is a filename. By default, the + compression is inferred from the filename. .. versionadded:: 0.21.0 .. versionchanged:: 0.24.0 From de799dad2a2f55ac29dfefe1a85ca5882e67d4d6 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 18 Sep 2018 10:29:39 -0500 Subject: [PATCH 10/21] Handle subpackages better (cherry picked from commit ff3d2dd7620eb9b4cf928dd4b66dadc7bb65b011) --- doc/make.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/make.py b/doc/make.py index d85747458148d..89766f89cda75 100755 --- a/doc/make.py +++ b/doc/make.py @@ -135,6 +135,12 @@ def _process_single_doc(self, single_doc): try: obj = pandas # noqa: F821 for name in single_doc.split('.'): + try: + # for names not in the top-level namespace by default, + # e.g. pandas.io.formats.style.Styler + importlib.import_module('.'.join([obj.__name__, name])) + except ModuleNotFoundError: + pass obj = getattr(obj, name) except AttributeError: raise ValueError('Single document not understood, it should ' From 3331dbf866e176bcb3871a33ff10ef1e5c8953f8 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 18 Sep 2018 10:30:04 -0500 Subject: [PATCH 11/21] Fixed unexpected indent (cherry picked from commit e54424981814f2ec5a5047e3ff33953340058b3a) --- pandas/io/formats/style.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index b175dd540a518..f4bb53ba4f218 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1073,6 +1073,7 @@ def bar(self, subset=None, axis=0, color='#d65f5f', width=100, percent of the cell's width. align : {'left', 'zero',' mid'}, default 'left' How to align the bars with the cells. + - 'left' : the min value starts at the left of the cell. - 'zero' : a value of zero is located at the center of the cell. - 'mid' : the center of the cell is at (max-min)/2, or From 5bd85b8588328fd2cc42332f62eb80cec503017b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 18 Sep 2018 10:33:31 -0500 Subject: [PATCH 12/21] Fixed "inline interpreted text..." (cherry picked from commit 8bdb9202e95264f6e1009e8afb66c071c03b9f76) --- pandas/core/window.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/window.py b/pandas/core/window.py index 66f48f403c941..5cdf62d5a5537 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -1404,7 +1404,7 @@ def _get_cov(X, Y): otherwise defaults to `False`. Not relevant for :class:`~pandas.Series`. **kwargs - Under Review. + Unused. Returns ------- @@ -1430,7 +1430,7 @@ def _get_cov(X, Y): all 1's), except for :class:`~pandas.DataFrame` inputs with `pairwise` set to `True`. - Function will return `NaN`s for correlations of equal valued sequences; + Function will return ``NaN`` for correlations of equal valued sequences; this is the result of a 0/0 division error. When `pairwise` is set to `False`, only matching columns between `self` and @@ -1446,7 +1446,7 @@ def _get_cov(X, Y): Examples -------- The below example shows a rolling calculation with a window size of - four matching the equivalent function call using `numpy.corrcoef`. + four matching the equivalent function call using :meth:`numpy.corrcoef`. >>> v1 = [3, 3, 3, 5, 8] >>> v2 = [3, 4, 4, 4, 8] From 482565cc5bae93166294f382080f1d2d62c104b9 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 18 Sep 2018 10:36:20 -0500 Subject: [PATCH 13/21] Fixed "malformed hyperlink target" (cherry picked from commit ae0f8ff028076ece045736a847ced25ddc5f5086) --- doc/source/whatsnew/v0.24.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 87c59b6410c04..3be91b6581a55 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -450,7 +450,7 @@ Previous Behavior: Out[3]: Int64Index([0, 1, 2], dtype='int64') -.. _whatsnew_0240.api.timedelta64_subtract_nan +.. _whatsnew_0240.api.timedelta64_subtract_nan: Addition/Subtraction of ``NaN`` from :class:`DataFrame` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From 810a553322271443f2002bf68b5a0c18735ba318 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 18 Sep 2018 10:56:49 -0500 Subject: [PATCH 14/21] Fixed unexpected indentation (cherry picked from commit b1908660804fe897ebf38c71aa85e1811bc1e73a) --- pandas/core/series.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/series.py b/pandas/core/series.py index fdb9ef59c1d3e..a913b956cbc16 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2745,6 +2745,7 @@ def nlargest(self, n=5, keep='first'): keep : {'first', 'last', 'all'}, default 'first' When there are duplicate values that cannot all fit in a Series of `n` elements: + - ``first`` : take the first occurrences based on the index order - ``last`` : take the last occurrences based on the index order - ``all`` : keep all occurrences. This can result in a Series of @@ -2840,6 +2841,7 @@ def nsmallest(self, n=5, keep='first'): keep : {'first', 'last', 'all'}, default 'first' When there are duplicate values that cannot all fit in a Series of `n` elements: + - ``first`` : take the first occurrences based on the index order - ``last`` : take the last occurrences based on the index order - ``all`` : keep all occurrences. This can result in a Series of From befc9cd6375373c6b761c5ef6952e56a67c737c8 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 18 Sep 2018 10:59:16 -0500 Subject: [PATCH 15/21] newline after directive (cherry picked from commit a46e4c7b513e7930e6a5e1b6e530ce223f9613a6) --- doc/source/whatsnew/v0.24.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 3be91b6581a55..4fbfb94a1bd28 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -246,7 +246,6 @@ UTC offset (:issue:`17697`, :issue:`11736`, :issue:`22457`) .. code-block:: ipython - In [2]: pd.to_datetime("2015-11-18 15:30:00+05:30") Out[2]: Timestamp('2015-11-18 10:00:00') @@ -284,6 +283,7 @@ Passing ``utc=True`` will mimic the previous behavior but will correctly indicat that the dates have been converted to UTC .. ipython:: python + pd.to_datetime(["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30"], utc=True) .. _whatsnew_0240.api_breaking.calendarday: From 968d53324c36b9db587c25effa346bc330a4d08a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 18 Sep 2018 11:00:41 -0500 Subject: [PATCH 16/21] Maybe fix na_value not included in toctree (cherry picked from commit 74af53db4164670dbeafbd58ae45467d15eb8044) --- doc/source/api.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/api.rst b/doc/source/api.rst index e4b055c14ec27..924aaa2ce1762 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -2570,6 +2570,7 @@ objects. api.extensions.register_series_accessor api.extensions.register_index_accessor api.extensions.ExtensionDtype + api.extensions.ExtensionDtype.na_value api.extensions.ExtensionArray .. This is to prevent warnings in the doc build. We don't want to encourage From d4edeebddf7397c586f2945ed5ac57cc178d4bad Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 18 Sep 2018 11:04:44 -0500 Subject: [PATCH 17/21] Fixed no link to na_value (cherry picked from commit 1668c654422dd5841c32b789a556388d42182192) --- doc/source/api.rst | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 924aaa2ce1762..073ed8a082a11 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -2570,7 +2570,6 @@ objects. api.extensions.register_series_accessor api.extensions.register_index_accessor api.extensions.ExtensionDtype - api.extensions.ExtensionDtype.na_value api.extensions.ExtensionArray .. This is to prevent warnings in the doc build. We don't want to encourage @@ -2604,3 +2603,12 @@ objects. generated/pandas.Series.ix generated/pandas.Series.imag generated/pandas.Series.real + + +.. Can't convince sphinx to generate toctree for this class attribute. +.. So we do it manually to avoid a warning + +.. toctree:: + :hidden: + + generated/pandas.api.extensions.ExtensionDtype.na_value From 766a430f81ea8c3ce17b66929a8e281e3929db86 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 18 Sep 2018 11:04:56 -0500 Subject: [PATCH 18/21] Fixed II ref (cherry picked from commit dda2bfc6741bb0d1b07a485cb0910d7e0425c313) --- doc/source/basics.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/basics.rst b/doc/source/basics.rst index c18b94fea9a28..6eeb97349100a 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -1935,7 +1935,7 @@ NumPy's type-system for a few cases. * :ref:`Categorical ` * :ref:`Datetime with Timezone ` * :ref:`Period ` -* :ref:`Interval ` +* :ref:`Interval ` Pandas uses the ``object`` dtype for storing strings. From 2947688bd38a811d4f7fe2e1fc0bee31e62b752a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 26 Sep 2018 09:25:34 -0500 Subject: [PATCH 19/21] revert make.py changes --- doc/make.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/doc/make.py b/doc/make.py index 89766f89cda75..d85747458148d 100755 --- a/doc/make.py +++ b/doc/make.py @@ -135,12 +135,6 @@ def _process_single_doc(self, single_doc): try: obj = pandas # noqa: F821 for name in single_doc.split('.'): - try: - # for names not in the top-level namespace by default, - # e.g. pandas.io.formats.style.Styler - importlib.import_module('.'.join([obj.__name__, name])) - except ModuleNotFoundError: - pass obj = getattr(obj, name) except AttributeError: raise ValueError('Single document not understood, it should ' From 03b99027d60da26cc8e82fdc9693e13db10c455d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 26 Sep 2018 10:40:45 -0500 Subject: [PATCH 20/21] Fixup failing doctest --- pandas/core/series.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 4d6d3ca47dc7e..83f80c305c5eb 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2065,10 +2065,10 @@ def autocorr(self, lag=1): Examples -------- >>> s = pd.Series([0.25, 0.5, 0.2, -0.05]) - >>> s.autocorr() - 0.1035526330902407 - >>> s.autocorr(lag=2) - -0.9999999999999999 + >>> s.autocorr() # doctest: +ELLIPSIS + 0.10355... + >>> s.autocorr(lag=2) # doctest: +ELLIPSIS + -0.99999... If the Pearson correlation is not well defined, then 'NaN' is returned. From c9b1921e269d13186a6d23834fe5a2f4827db858 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 26 Sep 2018 11:24:35 -0500 Subject: [PATCH 21/21] fixup --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/core/generic.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 819f24254b2ce..9f5fbdc195f34 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -186,7 +186,7 @@ Previously, only ``gzip`` compression was supported. By default, compression of URLs and paths are now inferred using their file extensions. Additionally, support for bz2 compression in the python 2 C-engine improved (:issue:`14874`). -.. code-block:::: python +.. code-block:: python url = 'https://github.com/{repo}/raw/{branch}/{path}'.format( repo = 'pandas-dev/pandas', diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 70e52acc9f3d6..393e7caae5fab 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2060,7 +2060,8 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None, like. .. versionadded:: 0.19.0 - compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, + + compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None} A string representing the compression to use in the output file, only used when the first argument is a filename. By default, the