From 83042218695fb15d8bac2945b487fb52b4f8854e Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 1 Sep 2017 15:57:27 -0700 Subject: [PATCH 01/26] Implement _is_utc in timezones --- pandas/_libs/index.pyx | 7 +----- pandas/_libs/period.pyx | 2 +- pandas/_libs/tslib.pyx | 4 ++-- pandas/_libs/tslibs/__init__.py | 2 ++ pandas/_libs/tslibs/timezones.pxd | 4 ++++ pandas/_libs/tslibs/timezones.pyx | 40 +++++++++++++++++++++++++++++++ setup.py | 11 ++++++--- 7 files changed, 58 insertions(+), 12 deletions(-) create mode 100644 pandas/_libs/tslibs/__init__.py create mode 100644 pandas/_libs/tslibs/timezones.pxd create mode 100644 pandas/_libs/tslibs/timezones.pyx diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 42ba0c1cadaec..bf4d53683c9b7 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -17,6 +17,7 @@ cimport tslib from hashtable cimport HashTable +from tslibs.timezones cimport _is_utc from pandas._libs import tslib, algos, hashtable as _hash from pandas._libs.tslib import Timestamp, Timedelta from datetime import datetime, timedelta @@ -32,9 +33,6 @@ cdef extern from "datetime.h": cdef int64_t iNaT = util.get_nat() -from dateutil.tz import tzutc as _du_utc -import pytz -UTC = pytz.utc PyDateTime_IMPORT @@ -559,9 +557,6 @@ cdef inline _to_i8(object val): return ival return val -cdef inline bint _is_utc(object tz): - return tz is UTC or isinstance(tz, _du_utc) - cdef class MultiIndexObjectEngine(ObjectEngine): """ diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 816b7ebfff86d..2f15e2c105209 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -33,9 +33,9 @@ from lib cimport is_null_datetimelike, is_period from pandas._libs import tslib, lib from pandas._libs.tslib import (Timedelta, Timestamp, iNaT, NaT, _get_utcoffset) +from tslibs.timezones cimport _is_utc from tslib cimport ( maybe_get_tz, - _is_utc, _is_tzlocal, _get_dst_info, _nat_scalar_rules) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 50e0b77c6d3a0..a03dbf5411351 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -107,6 +107,8 @@ cdef int64_t NPY_NAT = util.get_nat() iNaT = NPY_NAT +from tslibs.timezones cimport _is_utc + cdef inline object create_timestamp_from_ts( int64_t value, pandas_datetimestruct dts, object tz, object freq): @@ -1702,8 +1704,6 @@ def _localize_pydatetime(object dt, object tz): def get_timezone(tz): return _get_zone(tz) -cdef inline bint _is_utc(object tz): - return tz is UTC or isinstance(tz, _dateutil_tzutc) cdef inline object _get_zone(object tz): """ diff --git a/pandas/_libs/tslibs/__init__.py b/pandas/_libs/tslibs/__init__.py new file mode 100644 index 0000000000000..f3aa0424f0376 --- /dev/null +++ b/pandas/_libs/tslibs/__init__.py @@ -0,0 +1,2 @@ +# -*- coding: utf-8 -*- +# cython: profile=False diff --git a/pandas/_libs/tslibs/timezones.pxd b/pandas/_libs/tslibs/timezones.pxd new file mode 100644 index 0000000000000..0708282abe1d0 --- /dev/null +++ b/pandas/_libs/tslibs/timezones.pxd @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +# cython: profile=False + +cdef bint _is_utc(object tz) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx new file mode 100644 index 0000000000000..5e425ad15bc0b --- /dev/null +++ b/pandas/_libs/tslibs/timezones.pyx @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +# cython: profile=False + +from pandas.compat import string_types, is_platform_windows + +cdef extern from "Python.h": + Py_ssize_t PY_SSIZE_T_MAX + +cdef extern from "datetime.h": + void PyDateTime_IMPORT() + +# import datetime C API +PyDateTime_IMPORT + +import numpy as np +cimport numpy as np +from numpy cimport ndarray, int64_t, float64_t +np.import_array() + + +# dateutil compat +from dateutil.tz import (tzoffset, + tzlocal as _dateutil_tzlocal, + tzfile as _dateutil_tzfile, + tzutc as _dateutil_tzutc, + tzstr as _dateutil_tzstr) + +if is_platform_windows(): + from dateutil.zoneinfo import gettz as _dateutil_gettz +else: + from dateutil.tz import gettz as _dateutil_gettz + + +from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo +import pytz +UTC = pytz.utc + + +cdef inline bint _is_utc(object tz): + return tz is UTC or isinstance(tz, _dateutil_tzutc) diff --git a/setup.py b/setup.py index 444db5bc4d275..404e12b89ab75 100755 --- a/setup.py +++ b/setup.py @@ -341,6 +341,7 @@ class CheckSDist(sdist_class): 'pandas/_libs/window.pyx', 'pandas/_libs/sparse.pyx', 'pandas/_libs/parsers.pyx', + 'pandas/_libs/tslibs/timezones.pyx', 'pandas/io/sas/sas.pyx'] def initialize_options(self): @@ -483,12 +484,15 @@ def pxd(name): + _pxi_dep['hashtable'])}, '_libs.tslib': {'pyxfile': '_libs/tslib', 'pxdfiles': ['_libs/src/util', '_libs/lib'], - 'depends': tseries_depends, + 'depends': tseries_depends + [ + 'pandas/_libs/tslibs/timezones'], 'sources': ['pandas/_libs/src/datetime/np_datetime.c', 'pandas/_libs/src/datetime/np_datetime_strings.c', 'pandas/_libs/src/period_helper.c']}, + '_libs.tslibs.timezones': {'pyxfile': '_libs/tslibs/timezones'}, '_libs.period': {'pyxfile': '_libs/period', - 'depends': tseries_depends, + 'depends': tseries_depends + [ + 'pandas/_libs/tslibs/timezones'], 'sources': ['pandas/_libs/src/datetime/np_datetime.c', 'pandas/_libs/src/datetime/np_datetime_strings.c', 'pandas/_libs/src/period_helper.c']}, @@ -496,7 +500,8 @@ def pxd(name): 'sources': ['pandas/_libs/src/datetime/np_datetime.c', 'pandas/_libs/src/datetime/np_datetime_strings.c'], 'pxdfiles': ['_libs/src/util', '_libs/hashtable'], - 'depends': _pxi_dep['index']}, + 'depends': _pxi_dep['index'] + [ + 'pandas/_libs/tslibs/timezones']}, '_libs.algos': {'pyxfile': '_libs/algos', 'pxdfiles': ['_libs/src/util', '_libs/algos', '_libs/hashtable'], 'depends': _pxi_dep['algos']}, From 82b1da7a8cced9bb760805821bb2e6b72ae6ada0 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 1 Sep 2017 17:43:20 -0700 Subject: [PATCH 02/26] Dont declare _is_utc in tslib --- pandas/_libs/tslib.pxd | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/tslib.pxd b/pandas/_libs/tslib.pxd index aa8cbcb2cedc7..1d81c3cc15cd8 100644 --- a/pandas/_libs/tslib.pxd +++ b/pandas/_libs/tslib.pxd @@ -3,7 +3,6 @@ from numpy cimport ndarray, int64_t cdef convert_to_tsobject(object, object, object, bint, bint) cpdef convert_to_timedelta64(object, object) cpdef object maybe_get_tz(object) -cdef bint _is_utc(object) cdef bint _is_tzlocal(object) cdef object _get_dst_info(object) cdef bint _nat_scalar_rules[6] From a74e7faf7c631bec45a342b22498faf61548d76c Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 5 Sep 2017 07:21:26 -0700 Subject: [PATCH 03/26] Remove unused imports --- pandas/_libs/tslibs/timezones.pyx | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/pandas/_libs/tslibs/timezones.pyx b/pandas/_libs/tslibs/timezones.pyx index 5e425ad15bc0b..43709e77b70d5 100644 --- a/pandas/_libs/tslibs/timezones.pyx +++ b/pandas/_libs/tslibs/timezones.pyx @@ -1,37 +1,9 @@ # -*- coding: utf-8 -*- # cython: profile=False -from pandas.compat import string_types, is_platform_windows - -cdef extern from "Python.h": - Py_ssize_t PY_SSIZE_T_MAX - -cdef extern from "datetime.h": - void PyDateTime_IMPORT() - -# import datetime C API -PyDateTime_IMPORT - -import numpy as np -cimport numpy as np -from numpy cimport ndarray, int64_t, float64_t -np.import_array() - - # dateutil compat -from dateutil.tz import (tzoffset, - tzlocal as _dateutil_tzlocal, - tzfile as _dateutil_tzfile, - tzutc as _dateutil_tzutc, - tzstr as _dateutil_tzstr) - -if is_platform_windows(): - from dateutil.zoneinfo import gettz as _dateutil_gettz -else: - from dateutil.tz import gettz as _dateutil_gettz - +from dateutil.tz import tzutc as _dateutil_tzutc -from pytz.tzinfo import BaseTzInfo as _pytz_BaseTzInfo import pytz UTC = pytz.utc From b110f965c4c7e3a494c076086a3c499e8fc8d449 Mon Sep 17 00:00:00 2001 From: topper-123 Date: Sat, 2 Sep 2017 12:50:55 +0100 Subject: [PATCH 04/26] DOC: Cleaned references to pandas ` .. ipython:: python diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index 3c6572229802d..4652ccbf0ad34 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -73,7 +73,7 @@ index is passed, one will be created having values ``[0, ..., len(data) - 1]``. .. note:: - Starting in v0.8.0, pandas supports non-unique index values. If an operation + pandas supports non-unique index values. If an operation that does not support duplicate index values is attempted, an exception will be raised at that time. The reason for being lazy is nearly all performance-based (there are many instances in computations, like parts of GroupBy, where the index @@ -698,7 +698,7 @@ DataFrame in tabular form, though it won't always fit the console width: print(baseball.iloc[-20:, :12].to_string()) -New since 0.10.0, wide DataFrames will now be printed across multiple rows by +Wide DataFrames will be printed across multiple rows by default: .. ipython:: python @@ -845,19 +845,16 @@ DataFrame objects with mixed-type columns, all of the data will get upcasted to .. note:: - Unfortunately Panel, being less commonly used than Series and DataFrame, + Panel, being less commonly used than Series and DataFrame, has been slightly neglected feature-wise. A number of methods and options - available in DataFrame are not available in Panel. This will get worked - on, of course, in future releases. And faster if you join me in working on - the codebase. + available in DataFrame are not available in Panel. .. _dsintro.to_panel: From DataFrame using ``to_panel`` method ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -This method was introduced in v0.7 to replace ``LongPanel.to_long``, and converts -a DataFrame with a two-level index to a Panel. +``to_panel`` converts a DataFrame with a two-level index to a Panel. .. ipython:: python :okwarning: diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index 937d682d238b3..53c0b771555f8 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -140,7 +140,7 @@ columns: In [5]: grouped = df.groupby(get_letter_type, axis=1) -Starting with 0.8, pandas Index objects now support duplicate values. If a +pandas Index objects support duplicate values. If a non-unique index is used as the group key in a groupby operation, all values for the same index value will be considered to be in one group and thus the output of aggregation functions will only contain unique index values: @@ -288,8 +288,6 @@ chosen level: s.sum(level='second') -.. versionadded:: 0.6 - Grouping with multiple levels is supported. .. ipython:: python diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 53a259ad6eb15..4687e46490562 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -66,8 +66,6 @@ See the :ref:`cookbook` for some advanced strategies Different Choices for Indexing ------------------------------ -.. versionadded:: 0.11.0 - Object selection has had a number of user-requested additions in order to support more explicit location based indexing. Pandas now supports three types of multi-axis indexing. diff --git a/doc/source/io.rst b/doc/source/io.rst index f55c72bae5a20..f68358764a40e 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -364,7 +364,7 @@ warn_bad_lines : boolean, default ``True`` Specifying column data types '''''''''''''''''''''''''''' -Starting with v0.10, you can indicate the data type for the whole DataFrame or +You can indicate the data type for the whole DataFrame or individual columns: .. ipython:: python @@ -3346,7 +3346,7 @@ Read/Write API '''''''''''''' ``HDFStore`` supports an top-level API using ``read_hdf`` for reading and ``to_hdf`` for writing, -similar to how ``read_csv`` and ``to_csv`` work. (new in 0.11.0) +similar to how ``read_csv`` and ``to_csv`` work. .. ipython:: python @@ -3791,7 +3791,7 @@ indexed dimension as the ``where``. .. note:: - Indexes are automagically created (starting ``0.10.1``) on the indexables + Indexes are automagically created on the indexables and any data columns you specify. This behavior can be turned off by passing ``index=False`` to ``append``. @@ -3878,7 +3878,7 @@ create a new table!) Iterator ++++++++ -Starting in ``0.11.0``, you can pass, ``iterator=True`` or ``chunksize=number_in_a_chunk`` +You can pass ``iterator=True`` or ``chunksize=number_in_a_chunk`` to ``select`` and ``select_as_multiple`` to return an iterator on the results. The default is 50,000 rows returned in a chunk. @@ -3986,8 +3986,8 @@ of rows in an object. Multiple Table Queries ++++++++++++++++++++++ -New in 0.10.1 are the methods ``append_to_multiple`` and -``select_as_multiple``, that can perform appending/selecting from +The methods ``append_to_multiple`` and +``select_as_multiple`` can perform appending/selecting from multiple tables at once. The idea is to have one table (call it the selector table) that you index most/all of the columns, and perform your queries. The other table(s) are data tables with an index matching the @@ -4291,7 +4291,7 @@ Pass ``min_itemsize`` on the first table creation to a-priori specify the minimu ``min_itemsize`` can be an integer, or a dict mapping a column name to an integer. You can pass ``values`` as a key to allow all *indexables* or *data_columns* to have this min_itemsize. -Starting in 0.11.0, passing a ``min_itemsize`` dict will cause all passed columns to be created as *data_columns* automatically. +Passing a ``min_itemsize`` dict will cause all passed columns to be created as *data_columns* automatically. .. note:: diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index d54288baa389b..64a321d67a825 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -67,9 +67,8 @@ arise and we wish to also consider that "missing" or "not available" or "NA". .. note:: - Prior to version v0.10.0 ``inf`` and ``-inf`` were also - considered to be "NA" in computations. This is no longer the case by - default; use the ``mode.use_inf_as_na`` option to recover it. + If you want to consider ``inf`` and ``-inf`` to be "NA" in computations, + you can set ``pandas.options.mode.use_inf_as_na = True``. .. _missing.isna: @@ -485,8 +484,8 @@ respectively: Replacing Generic Values ~~~~~~~~~~~~~~~~~~~~~~~~ -Often times we want to replace arbitrary values with other values. New in v0.8 -is the ``replace`` method in Series/DataFrame that provides an efficient yet +Often times we want to replace arbitrary values with other values. The +``replace`` method in Series/DataFrame provides an efficient yet flexible way to perform such replacements. For a Series, you can replace a single value or a list of values by another diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index ce4a920ad77b5..aded5e4402df2 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -1069,8 +1069,7 @@ Offset Aliases ~~~~~~~~~~~~~~ A number of string aliases are given to useful common time series -frequencies. We will refer to these aliases as *offset aliases* -(referred to as *time rules* prior to v0.8.0). +frequencies. We will refer to these aliases as *offset aliases*. .. csv-table:: :header: "Alias", "Description" diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index fb799c642131d..c637246537ca1 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -306,8 +306,6 @@ subplots: df.diff().hist(color='k', alpha=0.5, bins=50) -.. versionadded:: 0.10.0 - The ``by`` keyword can be specified to plot grouped histograms: .. ipython:: python @@ -831,8 +829,6 @@ and take a :class:`Series` or :class:`DataFrame` as an argument. Scatter Matrix Plot ~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.7.3 - You can create a scatter plot matrix using the ``scatter_matrix`` method in ``pandas.plotting``: @@ -859,8 +855,6 @@ You can create a scatter plot matrix using the Density Plot ~~~~~~~~~~~~ -.. versionadded:: 0.8.0 - You can create density plots using the :meth:`Series.plot.kde` and :meth:`DataFrame.plot.kde` methods. .. ipython:: python From d8ea8f4982f1fa1dd3847aa14ddcaad623706198 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 4 Sep 2017 16:32:34 -0700 Subject: [PATCH 05/26] Remove unused _day and _month attrs (#17431) closes #17429 --- pandas/_libs/tslib.pyx | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index a03dbf5411351..8bd9df373f7ff 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -831,8 +831,6 @@ class NaTType(_NaT): cdef _NaT base base = _NaT.__new__(cls, 1, 1, 1) - base._day = -1 - base._month = -1 base.value = NPY_NAT return base From 42e5e4d02895ff4c2d64f06b785a3fe06614aef3 Mon Sep 17 00:00:00 2001 From: topper-123 Date: Tue, 5 Sep 2017 11:30:31 +0100 Subject: [PATCH 06/26] DOC: Clean-up references to v12 to v14 (both included) (#17420) --- doc/source/advanced.rst | 21 ++--------- doc/source/basics.rst | 10 +----- doc/source/comparison_with_r.rst | 4 --- doc/source/cookbook.rst | 2 +- doc/source/enhancingperf.rst | 36 ++++++------------- doc/source/groupby.rst | 19 ---------- doc/source/indexing.rst | 23 ++---------- doc/source/install.rst | 2 +- doc/source/io.rst | 61 +++++++++----------------------- doc/source/merging.rst | 2 -- doc/source/missing_data.rst | 9 ----- doc/source/options.rst | 2 +- doc/source/text.rst | 2 -- doc/source/timedeltas.rst | 2 -- doc/source/timeseries.rst | 10 +++--- doc/source/visualization.rst | 16 --------- 16 files changed, 43 insertions(+), 178 deletions(-) diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 711c3e9a95d05..4af476cd5a7e1 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -270,9 +270,6 @@ Passing a list of labels or tuples works similar to reindexing: Using slicers ~~~~~~~~~~~~~ -.. versionadded:: 0.14.0 - -In 0.14.0 we added a new way to slice multi-indexed objects. You can slice a multi-index by providing multiple indexers. You can provide any of the selectors as if you are indexing by label, see :ref:`Selection by Label `, @@ -384,7 +381,7 @@ selecting data at a particular level of a MultiIndex easier. .. ipython:: python - # using the slicers (new in 0.14.0) + # using the slicers df.loc[(slice(None),'one'),:] You can also select on the columns with :meth:`~pandas.MultiIndex.xs`, by @@ -397,7 +394,7 @@ providing the axis argument .. ipython:: python - # using the slicers (new in 0.14.0) + # using the slicers df.loc[:,(slice(None),'one')] :meth:`~pandas.MultiIndex.xs` also allows selection with multiple keys @@ -408,11 +405,9 @@ providing the axis argument .. ipython:: python - # using the slicers (new in 0.14.0) + # using the slicers df.loc[:,('bar','one')] -.. versionadded:: 0.13.0 - You can pass ``drop_level=False`` to :meth:`~pandas.MultiIndex.xs` to retain the level that was selected @@ -743,16 +738,6 @@ Prior to 0.18.0, the ``Int64Index`` would provide the default index for all ``ND Float64Index ~~~~~~~~~~~~ -.. note:: - - As of 0.14.0, ``Float64Index`` is backed by a native ``float64`` dtype - array. Prior to 0.14.0, ``Float64Index`` was backed by an ``object`` dtype - array. Using a ``float64`` dtype in the backend speeds up arithmetic - operations by about 30x and boolean indexing operations on the - ``Float64Index`` itself are about 2x as fast. - -.. versionadded:: 0.13.0 - By default a ``Float64Index`` will be automatically created when passing floating, or mixed-integer-floating values in index creation. This enables a pure label-based slicing paradigm that makes ``[],ix,loc`` for scalar indexing and slicing work exactly the same. diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 35eb14eda238f..5880703b1d271 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -347,7 +347,7 @@ That is because NaNs do not compare as equals: np.nan == np.nan -So, as of v0.13.1, NDFrames (such as Series, DataFrames, and Panels) +So, NDFrames (such as Series, DataFrames, and Panels) have an :meth:`~DataFrame.equals` method for testing equality, with NaNs in corresponding locations treated as equal. @@ -1104,10 +1104,6 @@ Applying with a ``Panel`` will pass a ``Series`` to the applied function. If the function returns a ``Series``, the result of the application will be a ``Panel``. If the applied function reduces to a scalar, the result of the application will be a ``DataFrame``. -.. note:: - - Prior to 0.13.1 ``apply`` on a ``Panel`` would only work on ``ufuncs`` (e.g. ``np.sum/np.max``). - .. ipython:: python import pandas.util.testing as tm @@ -1800,8 +1796,6 @@ Series has the :meth:`~Series.searchsorted` method, which works similar to smallest / largest values ~~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.14.0 - ``Series`` has the :meth:`~Series.nsmallest` and :meth:`~Series.nlargest` methods which return the smallest or largest :math:`n` values. For a large ``Series`` this can be much faster than sorting the entire Series and calling ``head(n)`` on the result. @@ -2168,8 +2162,6 @@ Selecting columns based on ``dtype`` .. _basics.selectdtypes: -.. versionadded:: 0.14.1 - The :meth:`~DataFrame.select_dtypes` method implements subsetting of columns based on their ``dtype``. diff --git a/doc/source/comparison_with_r.rst b/doc/source/comparison_with_r.rst index 194e022e34c7c..f895cdc25e620 100644 --- a/doc/source/comparison_with_r.rst +++ b/doc/source/comparison_with_r.rst @@ -247,8 +247,6 @@ For more details and examples see :ref:`the reshaping documentation |subset|_ ~~~~~~~~~~ -.. versionadded:: 0.13 - The :meth:`~pandas.DataFrame.query` method is similar to the base R ``subset`` function. In R you might want to get the rows of a ``data.frame`` where one column's values are less than another column's values: @@ -277,8 +275,6 @@ For more details and examples see :ref:`the query documentation |with|_ ~~~~~~~~ -.. versionadded:: 0.13 - An expression using a data.frame called ``df`` in R with the columns ``a`` and ``b`` would be evaluated using ``with`` like so: diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index 32e7a616fe856..f51c3e679b36f 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -818,7 +818,7 @@ The :ref:`Concat ` docs. The :ref:`Join ` d df1 = pd.DataFrame(np.random.randn(6, 3), index=rng, columns=['A', 'B', 'C']) df2 = df1.copy() -ignore_index is needed in pandas < v0.13, and depending on df construction +Depending on df construction, ``ignore_index`` may be needed .. ipython:: python diff --git a/doc/source/enhancingperf.rst b/doc/source/enhancingperf.rst index 685a8690a53d5..264bd1de1fc77 100644 --- a/doc/source/enhancingperf.rst +++ b/doc/source/enhancingperf.rst @@ -213,17 +213,18 @@ the rows, applying our ``integrate_f_typed``, and putting this in the zeros arra .. warning:: - In 0.13.0 since ``Series`` has internaly been refactored to no longer sub-class ``ndarray`` - but instead subclass ``NDFrame``, you can **not pass** a ``Series`` directly as a ``ndarray`` typed parameter - to a cython function. Instead pass the actual ``ndarray`` using the ``.values`` attribute of the Series. + You can **not pass** a ``Series`` directly as a ``ndarray`` typed parameter + to a cython function. Instead pass the actual ``ndarray`` using the + ``.values`` attribute of the Series. The reason is that the cython + definition is specific to an ndarray and not the passed Series. - Prior to 0.13.0 + So, do not do this: .. code-block:: python apply_integrate_f(df['a'], df['b'], df['N']) - Use ``.values`` to get the underlying ``ndarray`` + But rather, use ``.values`` to get the underlying ``ndarray`` .. code-block:: python @@ -399,10 +400,8 @@ Read more in the `numba docs `__. .. _enhancingperf.eval: -Expression Evaluation via :func:`~pandas.eval` (Experimental) -------------------------------------------------------------- - -.. versionadded:: 0.13 +Expression Evaluation via :func:`~pandas.eval` +----------------------------------------------- The top-level function :func:`pandas.eval` implements expression evaluation of :class:`~pandas.Series` and :class:`~pandas.DataFrame` objects. @@ -539,10 +538,8 @@ Now let's do the same thing but with comparisons: of type ``bool`` or ``np.bool_``. Again, you should perform these kinds of operations in plain Python. -The ``DataFrame.eval`` method (Experimental) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -.. versionadded:: 0.13 +The ``DataFrame.eval`` method +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In addition to the top level :func:`pandas.eval` function you can also evaluate an expression in the "context" of a :class:`~pandas.DataFrame`. @@ -646,19 +643,6 @@ whether the query modifies the original frame. Local Variables ~~~~~~~~~~~~~~~ -In pandas version 0.14 the local variable API has changed. In pandas 0.13.x, -you could refer to local variables the same way you would in standard Python. -For example, - -.. code-block:: python - - df = pd.DataFrame(np.random.randn(5, 2), columns=['a', 'b']) - newcol = np.random.randn(len(df)) - df.eval('b + newcol') - - UndefinedVariableError: name 'newcol' is not defined - -As you can see from the exception generated, this syntax is no longer allowed. You must *explicitly reference* any local variable that you want to use in an expression by placing the ``@`` character in front of the name. For example, diff --git a/doc/source/groupby.rst b/doc/source/groupby.rst index 53c0b771555f8..e1231b9a4a200 100644 --- a/doc/source/groupby.rst +++ b/doc/source/groupby.rst @@ -766,8 +766,6 @@ missing values with the ``ffill()`` method. Filtration ---------- -.. versionadded:: 0.12 - The ``filter`` method returns a subset of the original object. Suppose we want to take only elements that belong to groups with a group sum greater than 2. @@ -858,8 +856,6 @@ In this example, we chopped the collection of time series into yearly chunks then independently called :ref:`fillna ` on the groups. -.. versionadded:: 0.14.1 - The ``nlargest`` and ``nsmallest`` methods work on ``Series`` style groupbys: .. ipython:: python @@ -1048,19 +1044,6 @@ Just like for a DataFrame or Series you can call head and tail on a groupby: This shows the first or last n rows from each group. -.. warning:: - - Before 0.14.0 this was implemented with a fall-through apply, - so the result would incorrectly respect the as_index flag: - - .. code-block:: python - - >>> g.head(1): # was equivalent to g.apply(lambda x: x.head(1)) - A B - A - 1 0 1 2 - 5 2 5 6 - .. _groupby.nth: Taking the nth row of each group @@ -1113,8 +1096,6 @@ You can also select multiple rows from each group by specifying multiple nth val Enumerate group items ~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.13.0 - To see the order in which each row appears within its group, use the ``cumcount`` method: diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 4687e46490562..a6e7df57be4e5 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -248,8 +248,6 @@ as an attribute: - In any of these cases, standard indexing will still work, e.g. ``s['1']``, ``s['min']``, and ``s['index']`` will access the corresponding element or column. - - The ``Series/Panel`` accesses are available starting in 0.13.0. - If you are using the IPython environment, you may also use tab-completion to see these accessible attributes. @@ -529,7 +527,6 @@ Out of range slice indexes are handled gracefully just as in Python/Numpy. .. ipython:: python # these are allowed in python/numpy. - # Only works in Pandas starting from v0.14.0. x = list('abcdef') x x[4:10] @@ -539,14 +536,8 @@ Out of range slice indexes are handled gracefully just as in Python/Numpy. s.iloc[4:10] s.iloc[8:10] -.. note:: - - Prior to v0.14.0, ``iloc`` would not accept out of bounds indexers for - slices, e.g. a value that exceeds the length of the object being indexed. - - -Note that this could result in an empty axis (e.g. an empty DataFrame being -returned) +Note that using slices that go out of bounds can result in +an empty axis (e.g. an empty DataFrame being returned) .. ipython:: python @@ -745,8 +736,6 @@ Finally, one can also set a seed for ``sample``'s random number generator using Setting With Enlargement ------------------------ -.. versionadded:: 0.13 - The ``.loc/[]`` operations can perform enlargement when setting a non-existant key for that axis. In the ``Series`` case this is effectively an appending operation @@ -1020,8 +1009,6 @@ partial setting via ``.loc`` (but on the contents rather than the axis labels) df2[ df2[1:4] > 0 ] = 3 df2 -.. versionadded:: 0.13 - Where can also accept ``axis`` and ``level`` parameters to align the input when performing the ``where``. @@ -1064,8 +1051,6 @@ as condition and ``other`` argument. The :meth:`~pandas.DataFrame.query` Method (Experimental) --------------------------------------------------------- -.. versionadded:: 0.13 - :class:`~pandas.DataFrame` objects have a :meth:`~pandas.DataFrame.query` method that allows selection using an expression. @@ -1506,8 +1491,6 @@ The name, if set, will be shown in the console display: Setting metadata ~~~~~~~~~~~~~~~~ -.. versionadded:: 0.13.0 - Indexes are "mostly immutable", but it is possible to set and change their metadata, like the index ``name`` (or, for ``MultiIndex``, ``levels`` and ``labels``). @@ -1790,7 +1773,7 @@ Evaluation order matters Furthermore, in chained expressions, the order may determine whether a copy is returned or not. If an expression will set values on a copy of a slice, then a ``SettingWithCopy`` -exception will be raised (this raise/warn behavior is new starting in 0.13.0) +warning will be issued. You can control the action of a chained assignment via the option ``mode.chained_assignment``, which can take the values ``['raise','warn',None]``, where showing a warning is the default. diff --git a/doc/source/install.rst b/doc/source/install.rst index f92c43839ee31..8dc8224ea6cb2 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -107,7 +107,7 @@ following command:: To install a specific pandas version:: - conda install pandas=0.13.1 + conda install pandas=0.20.3 To install other packages, IPython for example:: diff --git a/doc/source/io.rst b/doc/source/io.rst index f68358764a40e..33523ea171f3a 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1310,8 +1310,6 @@ column widths for contiguous columns: The parser will take care of extra white spaces around the columns so it's ok to have extra separation between the columns in the file. -.. versionadded:: 0.13.0 - By default, ``read_fwf`` will try to infer the file's ``colspecs`` by using the first 100 rows of the file. It can do it only in cases when the columns are aligned and correctly separated by the provided ``delimiter`` (default delimiter @@ -1407,8 +1405,7 @@ Reading columns with a ``MultiIndex`` By specifying list of row locations for the ``header`` argument, you can read in a ``MultiIndex`` for the columns. Specifying non-consecutive -rows will skip the intervening rows. In order to have the pre-0.13 behavior -of tupleizing columns, specify ``tupleize_cols=True``. +rows will skip the intervening rows. .. ipython:: python @@ -1418,7 +1415,7 @@ of tupleizing columns, specify ``tupleize_cols=True``. print(open('mi.csv').read()) pd.read_csv('mi.csv',header=[0,1,2,3],index_col=[0,1]) -Starting in 0.13.0, ``read_csv`` will be able to interpret a more common format +``read_csv`` is also able to interpret a more common format of multi-columns indices. .. ipython:: python @@ -2012,8 +2009,6 @@ The speedup is less noticeable for smaller datasets: Normalization ''''''''''''' -.. versionadded:: 0.13.0 - pandas provides a utility function to take a dict or list of dicts and *normalize* this semi-structured data into a flat table. @@ -2198,8 +2193,6 @@ Reading HTML Content We **highly encourage** you to read the :ref:`HTML Table Parsing gotchas ` below regarding the issues surrounding the BeautifulSoup4/html5lib/lxml parsers. -.. versionadded:: 0.12.0 - The top-level :func:`~pandas.io.html.read_html` function can accept an HTML string/file/URL and will parse HTML tables into list of pandas DataFrames. Let's look at a few examples. @@ -2653,10 +2646,6 @@ of sheet names can simply be passed to ``read_excel`` with no loss in performanc # equivalent using the read_excel function data = read_excel('path_to_file.xls', ['Sheet1', 'Sheet2'], index_col=None, na_values=['NA']) -.. versionadded:: 0.12 - -``ExcelFile`` has been moved to the top level namespace. - .. versionadded:: 0.17 ``read_excel`` can take an ``ExcelFile`` object as input @@ -2716,9 +2705,6 @@ Using a list to get multiple sheets: ``read_excel`` can read more than one sheet, by setting ``sheet_name`` to either a list of sheet names, a list of sheet positions, or ``None`` to read all sheets. - -.. versionadded:: 0.13 - Sheets can be specified by sheet index or sheet name, using an integer or string, respectively. @@ -2866,9 +2852,9 @@ Files with a ``.xls`` extension will be written using ``xlwt`` and those with a ``.xlsx`` extension will be written using ``xlsxwriter`` (if available) or ``openpyxl``. -The DataFrame will be written in a way that tries to mimic the REPL output. One -difference from 0.12.0 is that the ``index_label`` will be placed in the second -row instead of the first. You can get the previous behaviour by setting the +The DataFrame will be written in a way that tries to mimic the REPL output. +The ``index_label`` will be placed in the second +row instead of the first. You can place it in the first row by setting the ``merge_cells`` option in ``to_excel()`` to ``False``: .. code-block:: python @@ -2945,8 +2931,6 @@ Added support for Openpyxl >= 2.2 Excel writer engines '''''''''''''''''''' -.. versionadded:: 0.13 - ``pandas`` chooses an Excel writer via two methods: 1. the ``engine`` keyword argument @@ -3074,14 +3058,19 @@ any pickled pandas object (or any other pickled object) from file: Loading pickled data received from untrusted sources can be unsafe. - See: http://docs.python.org/2.7/library/pickle.html + See: https://docs.python.org/3.6/library/pickle.html .. warning:: - Several internal refactorings, 0.13 (:ref:`Series Refactoring `), and 0.15 (:ref:`Index Refactoring `), - preserve compatibility with pickles created prior to these versions. However, these must - be read with ``pd.read_pickle``, rather than the default python ``pickle.load``. - See `this question `__ + Several internal refactorings have been done while still preserving + compatibility with pickles created with older versions of pandas. However, + for such cases, pickled dataframes, series etc, must be read with + ``pd.read_pickle``, rather than ``pickle.load``. + + See `here `__ + and `here `__ + for some examples of compatibility-breaking changes. See + `this question `__ for a detailed explanation. .. _io.pickle.compression: @@ -3150,9 +3139,7 @@ The default is to 'infer msgpack ------- -.. versionadded:: 0.13.0 - -Starting in 0.13.0, pandas is supporting the ``msgpack`` format for +pandas supports the ``msgpack`` format for object serialization. This is a lightweight portable binary format, similar to binary JSON, that is highly space efficient, and provides good performance both on the writing (serialization), and reading (deserialization). @@ -3424,10 +3411,6 @@ This is also true for the major axis of a ``Panel``: Fixed Format '''''''''''' -.. note:: - - This was prior to 0.13.0 the ``Storer`` format. - The examples above show storing using ``put``, which write the HDF5 to ``PyTables`` in a fixed array format, called the ``fixed`` format. These types of stores are **not** appendable once written (though you can simply remove them and rewrite). Nor are they **queryable**; they must be @@ -3460,8 +3443,6 @@ other sessions. In addition, delete & query type operations are supported. This format is specified by ``format='table'`` or ``format='t'`` to ``append`` or ``put`` or ``to_hdf`` -.. versionadded:: 0.13 - This format can be set as an option as well ``pd.set_option('io.hdf.default_format','table')`` to enable ``put/append/to_hdf`` to by default store in the ``table`` format. @@ -3765,9 +3746,7 @@ space. These are in terms of the total number of rows in a table. Using timedelta64[ns] +++++++++++++++++++++ -.. versionadded:: 0.13 - -Beginning in 0.13.0, you can store and query using the ``timedelta64[ns]`` type. Terms can be +You can store and query using the ``timedelta64[ns]`` type. Terms can be specified in the format: ``()``, where float may be signed (and fractional), and unit can be ``D,s,ms,us,ns`` for the timedelta. Here's an example: @@ -3889,8 +3868,6 @@ The default is 50,000 rows returned in a chunk. .. note:: - .. versionadded:: 0.12.0 - You can also use the iterator with ``read_hdf`` which will open, then automatically close the store when finished iterating. @@ -4603,8 +4580,6 @@ included in Python's standard library by default. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs `__. -.. versionadded:: 0.14.0 - If SQLAlchemy is not installed, a fallback is only provided for sqlite (and for mysql for backwards compatibility, but this is deprecated and will be removed in a future version). @@ -4937,8 +4912,6 @@ Full documentation can be found `here `__ Stata Format ------------ -.. versionadded:: 0.12.0 - .. _io.stata_writer: Writing to Stata format diff --git a/doc/source/merging.rst b/doc/source/merging.rst index d956f1ca54e6b..a5ee1b1a9384c 100644 --- a/doc/source/merging.rst +++ b/doc/source/merging.rst @@ -1053,8 +1053,6 @@ As you can see, this drops any rows where there was no match. Joining a single Index to a Multi-index ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.14.0 - You can join a singly-indexed ``DataFrame`` with a level of a multi-indexed ``DataFrame``. The level will match on the name of the index of the singly-indexed frame against a level name of the multi-indexed frame. diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index 64a321d67a825..65b411ccd4af2 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -263,8 +263,6 @@ and ``bfill()`` is equivalent to ``fillna(method='bfill')`` Filling with a PandasObject ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.12 - You can also fillna using a dict or Series that is alignable. The labels of the dict or index of the Series must match the columns of the frame you wish to fill. The use case of this is to fill a DataFrame with the mean of that column. @@ -280,8 +278,6 @@ use case of this is to fill a DataFrame with the mean of that column. dff.fillna(dff.mean()) dff.fillna(dff.mean()['B':'C']) -.. versionadded:: 0.13 - Same result as above, but is aligning the 'fill' value which is a Series in this case. @@ -320,11 +316,6 @@ examined :ref:`in the API `. Interpolation ~~~~~~~~~~~~~ -.. versionadded:: 0.13.0 - - :meth:`~pandas.DataFrame.interpolate`, and :meth:`~pandas.Series.interpolate` have - revamped interpolation methods and functionality. - .. versionadded:: 0.17.0 The ``limit_direction`` keyword argument was added. diff --git a/doc/source/options.rst b/doc/source/options.rst index 51d02bc89692a..1592caf90546c 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -306,7 +306,7 @@ display.float_format None The callable should accept a fl See core.format.EngFormatter for an example. display.large_repr truncate For DataFrames exceeding max_rows/max_cols, the repr (and HTML repr) can show - a truncated table (the default from 0.13), + a truncated table (the default), or switch to the view from df.info() (the behaviour in earlier versions of pandas). allowable settings, ['truncate', 'info'] diff --git a/doc/source/text.rst b/doc/source/text.rst index e3e4b24d17f44..85b8aa6aa1857 100644 --- a/doc/source/text.rst +++ b/doc/source/text.rst @@ -211,8 +211,6 @@ Extracting Substrings Extract first match in each subject (extract) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -.. versionadded:: 0.13.0 - .. warning:: In version 0.18.0, ``extract`` gained the ``expand`` argument. When diff --git a/doc/source/timedeltas.rst b/doc/source/timedeltas.rst index 07effcfdff33b..daa2c262c8c86 100644 --- a/doc/source/timedeltas.rst +++ b/doc/source/timedeltas.rst @@ -242,8 +242,6 @@ Numeric reduction operation for ``timedelta64[ns]`` will return ``Timedelta`` ob Frequency Conversion -------------------- -.. versionadded:: 0.13 - Timedelta Series, ``TimedeltaIndex``, and ``Timedelta`` scalars can be converted to other 'frequencies' by dividing by another timedelta, or by astyping to a specific timedelta type. These operations yield Series and propagate ``NaT`` -> ``nan``. Note that division by the numpy scalar is true division, while astyping is equivalent of floor division. diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index aded5e4402df2..c86c58c3183f6 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -177,7 +177,7 @@ you can pass the ``dayfirst`` flag: .. note:: Specifying a ``format`` argument will potentially speed up the conversion - considerably and on versions later then 0.13.0 explicitly specifying + considerably and explicitly specifying a format string of '%Y%m%d' takes a faster path still. If you pass a single string to ``to_datetime``, it returns single ``Timestamp``. @@ -1946,9 +1946,11 @@ These can easily be converted to a ``PeriodIndex`` Time Zone Handling ------------------ -Pandas provides rich support for working with timestamps in different time zones using ``pytz`` and ``dateutil`` libraries. -``dateutil`` support is new in 0.14.1 and currently only supported for fixed offset and tzfile zones. The default library is ``pytz``. -Support for ``dateutil`` is provided for compatibility with other applications e.g. if you use ``dateutil`` in other python packages. +Pandas provides rich support for working with timestamps in different time +zones using ``pytz`` and ``dateutil`` libraries. ``dateutil`` currently is only +supported for fixed offset and tzfile zones. The default library is ``pytz``. +Support for ``dateutil`` is provided for compatibility with other +applications e.g. if you use ``dateutil`` in other python packages. Working with Time Zones ~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index c637246537ca1..839390c8778aa 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -512,8 +512,6 @@ Compare to: Area Plot ~~~~~~~~~ -.. versionadded:: 0.14 - You can create area plots with :meth:`Series.plot.area` and :meth:`DataFrame.plot.area`. Area plots are stacked by default. To produce stacked area plot, each column must be either all positive or all negative values. @@ -550,8 +548,6 @@ To produce an unstacked plot, pass ``stacked=False``. Alpha value is set to 0.5 Scatter Plot ~~~~~~~~~~~~ -.. versionadded:: 0.13 - Scatter plot can be drawn by using the :meth:`DataFrame.plot.scatter` method. Scatter plot requires numeric columns for x and y axis. These can be specified by ``x`` and ``y`` keywords each. @@ -619,8 +615,6 @@ See the :meth:`scatter ` method and the Hexagonal Bin Plot ~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.14 - You can create hexagonal bin plots with :meth:`DataFrame.plot.hexbin`. Hexbin plots can be a useful alternative to scatter plots if your data are too dense to plot each point individually. @@ -682,8 +676,6 @@ See the :meth:`hexbin ` method and the Pie plot ~~~~~~~~ -.. versionadded:: 0.14 - You can create a pie plot with :meth:`DataFrame.plot.pie` or :meth:`Series.plot.pie`. If your data includes any ``NaN``, they will be automatically filled with 0. A ``ValueError`` will be raised if there are any negative values in your data. @@ -1365,8 +1357,6 @@ Another option is passing an ``ax`` argument to :meth:`Series.plot` to plot on a Plotting With Error Bars ~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.14 - Plotting with error bars is now supported in the :meth:`DataFrame.plot` and :meth:`Series.plot` Horizontal and vertical errorbars can be supplied to the ``xerr`` and ``yerr`` keyword arguments to :meth:`~DataFrame.plot()`. The error values can be specified using a variety of formats. @@ -1407,8 +1397,6 @@ Here is an example of one way to easily plot group means with standard deviation Plotting Tables ~~~~~~~~~~~~~~~ -.. versionadded:: 0.14 - Plotting with matplotlib table is now supported in :meth:`DataFrame.plot` and :meth:`Series.plot` with a ``table`` keyword. The ``table`` keyword can accept ``bool``, :class:`DataFrame` or :class:`Series`. The simple way to draw a table is to specify ``table=True``. Data will be transposed to meet matplotlib's default layout. .. ipython:: python @@ -1585,10 +1573,6 @@ available in matplotlib. Although this formatting does not provide the same level of refinement you would get when plotting via pandas, it can be faster when plotting a large number of points. -.. note:: - - The speed up for large data sets only applies to pandas 0.14.0 and later. - .. ipython:: python :suppress: From 870e6a45a2510191d2b91c36f296f5619e853f0e Mon Sep 17 00:00:00 2001 From: s-weigand Date: Wed, 6 Sep 2017 14:03:39 +0200 Subject: [PATCH 07/26] BUG: Plotting Timedelta on y-axis #16953 (#17430) * implemented fix for GH issue #16953 * added tests for fix of issue #16953 * changed comments for git issue to pandas style GH# * changed linelength in tests, so all lines are less than 80 characters * added whatsnew entry * swaped conversion and filtering of values, for plot to also work with object dtypes * refomated code, so len(line) < 80 * changed whatsnew with timedelta and datetime dtypes * added support for datetimetz and extended tests * added reason to pytest.mark.xfail --- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/plotting/_core.py | 8 ++- pandas/tests/plotting/test_frame.py | 76 +++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 81e52266f972e..1f3bf00c87767 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -432,7 +432,7 @@ I/O Plotting ^^^^^^^^ - Bug in plotting methods using ``secondary_y`` and ``fontsize`` not setting secondary axis font size (:issue:`12565`) - +- Bug when plotting ``timedelta`` and ``datetime`` dtypes on y-axis (:issue:`16953`) Groupby/Resample/Rolling ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index e5b9497993172..a0b7e93efd05c 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -342,7 +342,13 @@ def _compute_plot_data(self): label = 'None' data = data.to_frame(name=label) - numeric_data = data._convert(datetime=True)._get_numeric_data() + # GH16953, _convert is needed as fallback, for ``Series`` + # with ``dtype == object`` + data = data._convert(datetime=True, timedelta=True) + numeric_data = data.select_dtypes(include=[np.number, + "datetime", + "datetimetz", + "timedelta"]) try: is_empty = numeric_data.empty diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 67098529a0111..f3b287a8889c3 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -380,6 +380,82 @@ def test_subplots_timeseries(self): self._check_ticks_props(ax, xlabelsize=7, xrot=45, ylabelsize=7) + def test_subplots_timeseries_y_axis(self): + # GH16953 + data = {"numeric": np.array([1, 2, 5]), + "timedelta": [pd.Timedelta(-10, unit="s"), + pd.Timedelta(10, unit="m"), + pd.Timedelta(10, unit="h")], + "datetime_no_tz": [pd.to_datetime("2017-08-01 00:00:00"), + pd.to_datetime("2017-08-01 02:00:00"), + pd.to_datetime("2017-08-02 00:00:00")], + "datetime_all_tz": [pd.to_datetime("2017-08-01 00:00:00", + utc=True), + pd.to_datetime("2017-08-01 02:00:00", + utc=True), + pd.to_datetime("2017-08-02 00:00:00", + utc=True)], + "text": ["This", "should", "fail"]} + testdata = DataFrame(data) + + ax_numeric = testdata.plot(y="numeric") + assert (ax_numeric.get_lines()[0].get_data()[1] == + testdata["numeric"].values).all() + ax_timedelta = testdata.plot(y="timedelta") + assert (ax_timedelta.get_lines()[0].get_data()[1] == + testdata["timedelta"].values).all() + ax_datetime_no_tz = testdata.plot(y="datetime_no_tz") + assert (ax_datetime_no_tz.get_lines()[0].get_data()[1] == + testdata["datetime_no_tz"].values).all() + ax_datetime_all_tz = testdata.plot(y="datetime_all_tz") + assert (ax_datetime_all_tz.get_lines()[0].get_data()[1] == + testdata["datetime_all_tz"].values).all() + with pytest.raises(TypeError): + testdata.plot(y="text") + + @pytest.mark.xfail(reason='not support for period, categorical, ' + 'datetime_mixed_tz') + def test_subplots_timeseries_y_axis_not_supported(self): + """ + This test will fail for: + period: + since period isn't yet implemented in ``select_dtypes`` + and because it will need a custom value converter + + tick formater (as was done for x-axis plots) + + categorical: + because it will need a custom value converter + + tick formater (also doesn't work for x-axis, as of now) + + datetime_mixed_tz: + because of the way how pandas handels ``Series`` of + ``datetime`` objects with different timezone, + generally converting ``datetime`` objects in a tz-aware + form could help with this problem + """ + data = {"numeric": np.array([1, 2, 5]), + "period": [pd.Period('2017-08-01 00:00:00', freq='H'), + pd.Period('2017-08-01 02:00', freq='H'), + pd.Period('2017-08-02 00:00:00', freq='H')], + "categorical": pd.Categorical(["c", "b", "a"], + categories=["a", "b", "c"], + ordered=False), + "datetime_mixed_tz": [pd.to_datetime("2017-08-01 00:00:00", + utc=True), + pd.to_datetime("2017-08-01 02:00:00"), + pd.to_datetime("2017-08-02 00:00:00")]} + testdata = pd.DataFrame(data) + ax_period = testdata.plot(x="numeric", y="period") + assert (ax_period.get_lines()[0].get_data()[1] == + testdata["period"].values).all() + ax_categorical = testdata.plot(x="numeric", y="categorical") + assert (ax_categorical.get_lines()[0].get_data()[1] == + testdata["categorical"].values).all() + ax_datetime_mixed_tz = testdata.plot(x="numeric", + y="datetime_mixed_tz") + assert (ax_datetime_mixed_tz.get_lines()[0].get_data()[1] == + testdata["datetime_mixed_tz"].values).all() + @pytest.mark.slow def test_subplots_layout(self): # GH 6667 From 07edd078b00fbf4bc0a6f25b326e15eb6b0c0732 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Wed, 6 Sep 2017 08:23:06 -0400 Subject: [PATCH 08/26] COMPAT: handle pyarrow deprecation of timestamps_to_ms in .from_pandas with pyarrow < 0.6.0 (#17447) closes #17438 --- ci/requirements-3.5.sh | 2 +- doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/io/parquet.py | 18 ++++++++++++++---- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/ci/requirements-3.5.sh b/ci/requirements-3.5.sh index 33db9c28c78a9..d694ad3679ac1 100644 --- a/ci/requirements-3.5.sh +++ b/ci/requirements-3.5.sh @@ -8,4 +8,4 @@ echo "install 35" conda remove -n pandas python-dateutil --force pip install python-dateutil -conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 +conda install -n pandas -c conda-forge feather-format pyarrow=0.5.0 diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 1f3bf00c87767..b24a6f067cee4 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -125,7 +125,7 @@ Other Enhancements - :func:`DataFrame.select_dtypes` now accepts scalar values for include/exclude as well as list-like. (:issue:`16855`) - :func:`date_range` now accepts 'YS' in addition to 'AS' as an alias for start of year (:issue:`9313`) - :func:`date_range` now accepts 'Y' in addition to 'A' as an alias for end of year (:issue:`9313`) -- Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. +- Integration with `Apache Parquet `__, including a new top-level :func:`read_parquet` and :func:`DataFrame.to_parquet` method, see :ref:`here `. (:issue:`15838`, :issue:`17438`) - :func:`DataFrame.add_prefix` and :func:`DataFrame.add_suffix` now accept strings containing the '%' character. (:issue:`17151`) - `read_*` methods can now infer compression from non-string paths, such as ``pathlib.Path`` objects (:issue:`17206`). - :func:`pd.read_sas()` now recognizes much more of the most frequently used date (datetime) formats in SAS7BDAT files (:issue:`15871`). diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index 09603fd6fdcce..4b507b7f5df6f 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -58,13 +58,23 @@ def __init__(self): "\nor via pip\n" "pip install -U pyarrow\n") + self._pyarrow_lt_050 = LooseVersion(pyarrow.__version__) < '0.5.0' + self._pyarrow_lt_060 = LooseVersion(pyarrow.__version__) < '0.6.0' self.api = pyarrow - def write(self, df, path, compression='snappy', **kwargs): + def write(self, df, path, compression='snappy', + coerce_timestamps='ms', **kwargs): path, _, _ = get_filepath_or_buffer(path) - table = self.api.Table.from_pandas(df, timestamps_to_ms=True) - self.api.parquet.write_table( - table, path, compression=compression, **kwargs) + if self._pyarrow_lt_060: + table = self.api.Table.from_pandas(df, timestamps_to_ms=True) + self.api.parquet.write_table( + table, path, compression=compression, **kwargs) + + else: + table = self.api.Table.from_pandas(df) + self.api.parquet.write_table( + table, path, compression=compression, + coerce_timestamps=coerce_timestamps, **kwargs) def read(self, path): path, _, _ = get_filepath_or_buffer(path) From 708e4e691535f5bf2f6330e254ac873ba70ba0a3 Mon Sep 17 00:00:00 2001 From: topper-123 Date: Wed, 6 Sep 2017 15:55:12 +0100 Subject: [PATCH 09/26] DOC/TST: Add examples to MultiIndex.get_level_values + related changes (#17414) --- pandas/core/indexes/base.py | 12 ++++++++++-- pandas/core/indexes/multi.py | 23 +++++++++++++++++++++-- pandas/tests/indexes/test_base.py | 6 ++++++ 3 files changed, 37 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 6a30eaefaaae7..a9098126a38e3 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2529,15 +2529,23 @@ def set_value(self, arr, key, value): def _get_level_values(self, level): """ Return an Index of values for requested level, equal to the length - of the index + of the index. Parameters ---------- - level : int + level : int or str + ``level`` is either the integer position of the level in the + MultiIndex, or the name of the level. Returns ------- values : Index + ``self``, as there is only one level in the Index. + + See also + --------- + pandas.MultiIndex.get_level_values : get values for a level of a + MultiIndex """ self._validate_index_level(level) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index d7d5b6d128a2c..8b2cf0e7c0b40 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -882,15 +882,34 @@ def _get_level_values(self, level): def get_level_values(self, level): """ Return vector of label values for requested level, - equal to the length of the index + equal to the length of the index. Parameters ---------- - level : int or level name + level : int or str + ``level`` is either the integer position of the level in the + MultiIndex, or the name of the level. Returns ------- values : Index + ``values`` is a level of this MultiIndex converted to + a single :class:`Index` (or subclass thereof). + + Examples + --------- + + Create a MultiIndex: + + >>> mi = pd.MultiIndex.from_arrays((list('abc'), list('def'))) + >>> mi.names = ['level_1', 'level_2'] + + Get level values by supplying level as either integer or name: + + >>> mi.get_level_values(0) + Index(['a', 'b', 'c'], dtype='object', name='level_1') + >>> mi.get_level_values('level_2') + Index(['d', 'e', 'f'], dtype='object', name='level_2') """ level = self._get_level_number(level) values = self._get_level_values(level) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index aa32e75ba0d58..f96dbdcfb8acf 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1438,6 +1438,12 @@ def test_get_level_values(self): result = self.strIndex.get_level_values(0) tm.assert_index_equal(result, self.strIndex) + # test for name (GH 17414) + index_with_name = self.strIndex.copy() + index_with_name.name = 'a' + result = index_with_name.get_level_values('a') + tm.assert_index_equal(result, index_with_name) + def test_slice_keep_name(self): idx = Index(['a', 'b'], name='asdf') assert idx.name == idx[1:].name From b5073791c29f717821056f0c0454bf0551896127 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 6 Sep 2017 17:14:05 -0700 Subject: [PATCH 10/26] Dont re-pin total_seconds as it is already implemented (#17432) --- pandas/_libs/tslib.pyx | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 8bd9df373f7ff..1a35d5b4c0311 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -858,6 +858,9 @@ class NaTType(_NaT): return (__nat_unpickle, (None, )) def total_seconds(self): + """ + Total duration of timedelta in seconds (to ns precision) + """ # GH 10939 return np.nan @@ -3890,8 +3893,9 @@ for field in fields: _nat_methods = ['date', 'now', 'replace', 'to_pydatetime', 'today', 'round', 'floor', 'ceil', 'tz_convert', 'tz_localize'] -_nan_methods = ['weekday', 'isoweekday', 'total_seconds'] -_implemented_methods = ['to_datetime', 'to_datetime64', 'isoformat'] +_nan_methods = ['weekday', 'isoweekday'] +_implemented_methods = [ + 'to_datetime', 'to_datetime64', 'isoformat', 'total_seconds'] _implemented_methods.extend(_nat_methods) _implemented_methods.extend(_nan_methods) From 991af8f4df53d7c0b9cf0322dc7c8f3ea0e339cf Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 6 Sep 2017 17:46:50 -0700 Subject: [PATCH 11/26] BUG: Return local Timestamp.weekday_name attribute (#17354) (#17377) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/_libs/tslib.pyx | 22 ++++++++++++++++++---- pandas/tests/scalar/test_timestamp.py | 8 ++++++++ 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index b24a6f067cee4..553e622b8560e 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -399,6 +399,7 @@ Conversion - Fixed the return type of ``IntervalIndex.is_non_overlapping_monotonic`` to be a Python ``bool`` for consistency with similar attributes/methods. Previously returned a ``numpy.bool_``. (:issue:`17237`) - Bug in ``IntervalIndex.is_non_overlapping_monotonic`` when intervals are closed on both sides and overlap at a point (:issue:`16560`) - Bug in :func:`Series.fillna` returns frame when ``inplace=True`` and ``value`` is dict (:issue:`16156`) +- Bug in :attr:`Timestamp.weekday_name` returning a UTC-based weekday name when localized to a timezone (:issue:`17354`) Indexing ^^^^^^^^ diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 1a35d5b4c0311..74c84b8679073 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -534,9 +534,7 @@ class Timestamp(_Timestamp): @property def weekday_name(self): - out = get_date_name_field( - np.array([self.value], dtype=np.int64), 'weekday_name') - return out[0] + return self._get_named_field('weekday_name') @property def dayofyear(self): @@ -1271,13 +1269,29 @@ cdef class _Timestamp(datetime): # same timezone if specified) return datetime.__sub__(self, other) - cpdef _get_field(self, field): + cdef int64_t _maybe_convert_value_to_local(self): + """Convert UTC i8 value to local i8 value if tz exists""" + cdef: + int64_t val val = self.value if self.tz is not None and not _is_utc(self.tz): val = tz_convert_single(self.value, 'UTC', self.tz) + return val + + cpdef _get_field(self, field): + cdef: + int64_t val + val = self._maybe_convert_value_to_local() out = get_date_field(np.array([val], dtype=np.int64), field) return int(out[0]) + cpdef _get_named_field(self, field): + cdef: + int64_t val + val = self._maybe_convert_value_to_local() + out = get_date_name_field(np.array([val], dtype=np.int64), field) + return out[0] + cpdef _get_start_end_field(self, field): month_kw = self.freq.kwds.get( 'startingMonth', self.freq.kwds.get( diff --git a/pandas/tests/scalar/test_timestamp.py b/pandas/tests/scalar/test_timestamp.py index 7cd1a7db0f9fe..8d47ce4802ac6 100644 --- a/pandas/tests/scalar/test_timestamp.py +++ b/pandas/tests/scalar/test_timestamp.py @@ -555,6 +555,14 @@ def check(value, equal): for end in ends: assert getattr(ts, end) + @pytest.mark.parametrize('data, expected', + [(Timestamp('2017-08-28 23:00:00'), 'Monday'), + (Timestamp('2017-08-28 23:00:00', tz='EST'), + 'Monday')]) + def test_weekday_name(self, data, expected): + # GH 17354 + assert data.weekday_name == expected + def test_pprint(self): # GH12622 import pprint From c877772ee0c352c16b92d06d3c82788844e57732 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Thu, 7 Sep 2017 02:47:43 +0200 Subject: [PATCH 12/26] BUG: intersection of decreasing RangeIndexes (#17374) closes #17296 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/indexes/range.py | 22 +++++++++++++--------- pandas/tests/indexes/test_range.py | 15 +++++++++++++++ 3 files changed, 29 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 553e622b8560e..f7cd8230c8b9b 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -417,6 +417,7 @@ Indexing - Bug in ``.iloc`` when used with inplace addition or assignment and an int indexer on a ``MultiIndex`` causing the wrong indexes to be read from and written to (:issue:`17148`) - Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`) - Bug in ``CategoricalIndex`` reindexing in which specified indices containing duplicates were not being respected (:issue:`17323`) +- Bug in intersection of ``RangeIndex`` with negative step (:issue:`17296`) I/O ^^^ diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 82412d3a7ef57..b759abaed4e56 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -324,12 +324,13 @@ def intersection(self, other): if not len(self) or not len(other): return RangeIndex._simple_new(None) + first = self[::-1] if self._step < 0 else self + second = other[::-1] if other._step < 0 else other + # check whether intervals intersect # deals with in- and decreasing ranges - int_low = max(min(self._start, self._stop + 1), - min(other._start, other._stop + 1)) - int_high = min(max(self._stop, self._start + 1), - max(other._stop, other._start + 1)) + int_low = max(first._start, second._start) + int_high = min(first._stop, second._stop) if int_high <= int_low: return RangeIndex._simple_new(None) @@ -337,21 +338,24 @@ def intersection(self, other): # solve intersection problem # performance hint: for identical step sizes, could use # cheaper alternative - gcd, s, t = self._extended_gcd(self._step, other._step) + gcd, s, t = first._extended_gcd(first._step, second._step) # check whether element sets intersect - if (self._start - other._start) % gcd: + if (first._start - second._start) % gcd: return RangeIndex._simple_new(None) # calculate parameters for the RangeIndex describing the # intersection disregarding the lower bounds - tmp_start = self._start + (other._start - self._start) * \ - self._step // gcd * s - new_step = self._step * other._step // gcd + tmp_start = first._start + (second._start - first._start) * \ + first._step // gcd * s + new_step = first._step * second._step // gcd new_index = RangeIndex(tmp_start, int_high, new_step, fastpath=True) # adjust index to limiting interval new_index._start = new_index._min_fitting_element(int_low) + + if (self._step < 0 and other._step < 0) is not (new_index._step < 0): + new_index = new_index[::-1] return new_index def _min_fitting_element(self, lower_limit): diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 5ecf467b57fc5..06c8f0ee392c7 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -610,6 +610,21 @@ def test_intersection(self): other.values))) tm.assert_index_equal(result, expected) + # reversed (GH 17296) + result = other.intersection(self.index) + tm.assert_index_equal(result, expected) + + # GH 17296: intersect two decreasing RangeIndexes + first = RangeIndex(10, -2, -2) + other = RangeIndex(5, -4, -1) + expected = first.astype(int).intersection(other.astype(int)) + result = first.intersection(other).astype(int) + tm.assert_index_equal(result, expected) + + # reversed + result = other.intersection(first).astype(int) + tm.assert_index_equal(result, expected) + index = RangeIndex(5) # intersect of non-overlapping indices From f8ce1adcb55aa2596b5f921b5222929908b92075 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 6 Sep 2017 17:51:50 -0700 Subject: [PATCH 13/26] Remove property that re-computed microsecond (#17331) --- asv_bench/benchmarks/timestamp.py | 60 +++++++++++++++++++++++++++++++ doc/source/whatsnew/v0.21.0.txt | 2 +- pandas/_libs/period.pyx | 1 + pandas/_libs/tslib.pyx | 4 --- 4 files changed, 62 insertions(+), 5 deletions(-) create mode 100644 asv_bench/benchmarks/timestamp.py diff --git a/asv_bench/benchmarks/timestamp.py b/asv_bench/benchmarks/timestamp.py new file mode 100644 index 0000000000000..066479b22739a --- /dev/null +++ b/asv_bench/benchmarks/timestamp.py @@ -0,0 +1,60 @@ +from .pandas_vb_common import * +from pandas import to_timedelta, Timestamp + + +class TimestampProperties(object): + goal_time = 0.2 + + def setup(self): + self.ts = Timestamp('2017-08-25 08:16:14') + + def time_tz(self): + self.ts.tz + + def time_offset(self): + self.ts.offset + + def time_dayofweek(self): + self.ts.dayofweek + + def time_weekday_name(self): + self.ts.weekday_name + + def time_dayofyear(self): + self.ts.dayofyear + + def time_week(self): + self.ts.week + + def time_quarter(self): + self.ts.quarter + + def time_days_in_month(self): + self.ts.days_in_month + + def time_freqstr(self): + self.ts.freqstr + + def time_is_month_start(self): + self.ts.is_month_start + + def time_is_month_end(self): + self.ts.is_month_end + + def time_is_quarter_start(self): + self.ts.is_quarter_start + + def time_is_quarter_end(self): + self.ts.is_quarter_end + + def time_is_year_start(self): + self.ts.is_quarter_end + + def time_is_year_end(self): + self.ts.is_quarter_end + + def time_is_leap_year(self): + self.ts.is_quarter_end + + def time_microsecond(self): + self.ts.microsecond diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index f7cd8230c8b9b..33a6db18db3ca 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -382,7 +382,7 @@ Performance Improvements - Improved performance of instantiating :class:`SparseDataFrame` (:issue:`16773`) - :attr:`Series.dt` no longer performs frequency inference, yielding a large speedup when accessing the attribute (:issue:`17210`) - +- :attr:`Timestamp.microsecond` no longer re-computes on attribute access (:issue:`17331`) .. _whatsnew_0210.bug_fixes: diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 2f15e2c105209..106d4c4bae2fc 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- from datetime import datetime, date, timedelta import operator diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 74c84b8679073..30f2982b6f070 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -546,10 +546,6 @@ class Timestamp(_Timestamp): weekofyear = week - @property - def microsecond(self): - return self._get_field('us') - @property def quarter(self): return self._get_field('q') From 57c2d55cd00633cb3d3bc38988f0a2345ba95075 Mon Sep 17 00:00:00 2001 From: topper-123 Date: Thu, 7 Sep 2017 02:00:49 +0100 Subject: [PATCH 14/26] cleaned references to pandas v0.15 and v0.16 in docs (#17442) --- doc/source/10min.rst | 2 +- doc/source/advanced.rst | 15 ++------ doc/source/basics.rst | 8 ++-- doc/source/categorical.rst | 64 +++++--------------------------- doc/source/comparison_with_r.rst | 2 - doc/source/computation.rst | 7 +--- doc/source/cookbook.rst | 6 --- doc/source/dsintro.rst | 2 - doc/source/gotchas.rst | 4 +- doc/source/indexing.rst | 14 ------- doc/source/install.rst | 20 ++++------ doc/source/io.rst | 41 ++++---------------- doc/source/remote_data.rst | 11 +++--- doc/source/reshaping.rst | 4 +- doc/source/sparse.rst | 2 - doc/source/timedeltas.rst | 26 +++++-------- doc/source/visualization.rst | 4 -- doc/source/whatsnew/v0.21.0.txt | 1 + 18 files changed, 53 insertions(+), 180 deletions(-) diff --git a/doc/source/10min.rst b/doc/source/10min.rst index def49a641a0ff..ef6b2d6ef2c90 100644 --- a/doc/source/10min.rst +++ b/doc/source/10min.rst @@ -655,7 +655,7 @@ the quarter end: Categoricals ------------ -Since version 0.15, pandas can include categorical data in a ``DataFrame``. For full docs, see the +pandas can include categorical data in a ``DataFrame``. For full docs, see the :ref:`categorical introduction ` and the :ref:`API documentation `. .. ipython:: python diff --git a/doc/source/advanced.rst b/doc/source/advanced.rst index 4af476cd5a7e1..3f145cf955664 100644 --- a/doc/source/advanced.rst +++ b/doc/source/advanced.rst @@ -26,12 +26,6 @@ See the :ref:`Indexing and Selecting Data ` for general indexing docum should be avoided. See :ref:`Returning a View versus Copy ` -.. warning:: - - In 0.15.0 ``Index`` has internally been refactored to no longer sub-class ``ndarray`` - but instead subclass ``PandasObject``, similarly to the rest of the pandas objects. This should be - a transparent change with only very limited API implications (See the :ref:`Internal Refactoring `) - See the :ref:`cookbook` for some advanced strategies .. _advanced.hierarchical: @@ -638,12 +632,9 @@ In the following sub-sections we will highlite some other index types. CategoricalIndex ~~~~~~~~~~~~~~~~ -.. versionadded:: 0.16.1 - -We introduce a ``CategoricalIndex``, a new type of index object that is useful for supporting -indexing with duplicates. This is a container around a ``Categorical`` (introduced in v0.15.0) -and allows efficient indexing and storage of an index with a large number of duplicated elements. Prior to 0.16.1, -setting the index of a ``DataFrame/Series`` with a ``category`` dtype would convert this to regular object-based ``Index``. +``CategoricalIndex`` is a type of index that is useful for supporting +indexing with duplicates. This is a container around a ``Categorical`` +and allows efficient indexing and storage of an index with a large number of duplicated elements. .. ipython:: python diff --git a/doc/source/basics.rst b/doc/source/basics.rst index 5880703b1d271..42c28df3a6030 100644 --- a/doc/source/basics.rst +++ b/doc/source/basics.rst @@ -719,8 +719,6 @@ on an entire ``DataFrame`` or ``Series``, row- or column-wise, or elementwise. Tablewise Function Application ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.16.2 - ``DataFrames`` and ``Series`` can of course just be passed into functions. However, if the function needs to be called in a chain, consider using the :meth:`~DataFrame.pipe` method. Compare the following @@ -1860,8 +1858,10 @@ dtypes ------ The main types stored in pandas objects are ``float``, ``int``, ``bool``, -``datetime64[ns]`` and ``datetime64[ns, tz]`` (in >= 0.17.0), ``timedelta[ns]``, ``category`` (in >= 0.15.0), and ``object``. In addition these dtypes -have item sizes, e.g. ``int64`` and ``int32``. See :ref:`Series with TZ ` for more detail on ``datetime64[ns, tz]`` dtypes. +``datetime64[ns]`` and ``datetime64[ns, tz]`` (in >= 0.17.0), ``timedelta[ns]``, +``category`` and ``object``. In addition these dtypes have item sizes, e.g. +``int64`` and ``int32``. See :ref:`Series with TZ ` +for more detail on ``datetime64[ns, tz]`` dtypes. A convenient :attr:`~DataFrame.dtypes` attribute for DataFrames returns a Series with the data type of each column. diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index 02d7920bc4a84..8835c4a1533d0 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -16,13 +16,6 @@ Categorical Data **************** -.. versionadded:: 0.15 - -.. note:: - While there was `pandas.Categorical` in earlier versions, the ability to use - categorical data in `Series` and `DataFrame` is new. - - This is an introduction to pandas categorical data type, including a short comparison with R's ``factor``. @@ -295,10 +288,6 @@ Sorting and Order .. _categorical.sort: -.. warning:: - - The default for construction has changed in v0.16.0 to ``ordered=False``, from the prior implicit ``ordered=True`` - If categorical data is ordered (``s.cat.ordered == True``), then the order of the categories has a meaning and certain operations are possible. If the categorical is unordered, ``.min()/.max()`` will raise a `TypeError`. @@ -803,13 +792,11 @@ Following table summarizes the results of ``Categoricals`` related concatenation Getting Data In/Out ------------------- -.. versionadded:: 0.15.2 +You can write data that contains ``category`` dtypes to a ``HDFStore``. +See :ref:`here ` for an example and caveats. -Writing data (`Series`, `Frames`) to a HDF store that contains a ``category`` dtype was implemented -in 0.15.2. See :ref:`here ` for an example and caveats. - -Writing data to and reading data from *Stata* format files was implemented in -0.15.2. See :ref:`here ` for an example and caveats. +It is also possible to write data to and reading data from *Stata* format files. +See :ref:`here ` for an example and caveats. Writing to a CSV file will convert the data, effectively removing any information about the categorical (categories and ordering). So if you read back the CSV file you have to convert the @@ -928,32 +915,6 @@ an ``object`` dtype is a constant times the length of the data. s.astype('category').nbytes -Old style constructor usage -~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -In earlier versions than pandas 0.15, a `Categorical` could be constructed by passing in precomputed -`codes` (called then `labels`) instead of values with categories. The `codes` were interpreted as -pointers to the categories with `-1` as `NaN`. This type of constructor usage is replaced by -the special constructor :func:`Categorical.from_codes`. - -Unfortunately, in some special cases, using code which assumes the old style constructor usage -will work with the current pandas version, resulting in subtle bugs: - -.. code-block:: python - - >>> cat = pd.Categorical([1,2], [1,2,3]) - >>> # old version - >>> cat.get_values() - array([2, 3], dtype=int64) - >>> # new version - >>> cat.get_values() - array([1, 2], dtype=int64) - -.. warning:: - If you used `Categoricals` with older versions of pandas, please audit your code before - upgrading and change your code to use the :func:`~pandas.Categorical.from_codes` - constructor. - `Categorical` is not a `numpy` array ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -982,8 +943,7 @@ Dtype comparisons work: dtype == np.str_ np.str_ == dtype -To check if a Series contains Categorical data, with pandas 0.16 or later, use -``hasattr(s, 'cat')``: +To check if a Series contains Categorical data, use ``hasattr(s, 'cat')``: .. ipython:: python @@ -1023,13 +983,13 @@ basic type) and applying along columns will also convert to object. Categorical Index ~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.16.1 - -A new ``CategoricalIndex`` index type is introduced in version 0.16.1. See the -:ref:`advanced indexing docs ` for a more detailed +``CategoricalIndex`` is a type of index that is useful for supporting +indexing with duplicates. This is a container around a ``Categorical`` +and allows efficient indexing and storage of an index with a large number of duplicated elements. +See the :ref:`advanced indexing docs ` for a more detailed explanation. -Setting the index, will create create a ``CategoricalIndex`` +Setting the index will create a ``CategoricalIndex`` .. ipython:: python @@ -1041,10 +1001,6 @@ Setting the index, will create create a ``CategoricalIndex`` # This now sorts by the categories order df.sort_index() -In previous versions (<0.16.1) there is no index of type ``category``, so -setting the index to categorical column will convert the categorical data to a -"normal" dtype first and therefore remove any custom ordering of the categories. - Side Effects ~~~~~~~~~~~~ diff --git a/doc/source/comparison_with_r.rst b/doc/source/comparison_with_r.rst index f895cdc25e620..eb97aeeb7e696 100644 --- a/doc/source/comparison_with_r.rst +++ b/doc/source/comparison_with_r.rst @@ -505,8 +505,6 @@ For more details and examples see :ref:`the reshaping documentation |factor|_ ~~~~~~~~~ -.. versionadded:: 0.15 - pandas has a data type for categorical data. .. code-block:: r diff --git a/doc/source/computation.rst b/doc/source/computation.rst index 76a030d355e33..23699393958cf 100644 --- a/doc/source/computation.rst +++ b/doc/source/computation.rst @@ -924,15 +924,12 @@ EWM has a ``min_periods`` argument, which has the same meaning it does for all the ``.expanding`` and ``.rolling`` methods: no output values will be set until at least ``min_periods`` non-null values are encountered in the (expanding) window. -(This is a change from versions prior to 0.15.0, in which the ``min_periods`` -argument affected only the ``min_periods`` consecutive entries starting at the -first non-null value.) -EWM also has an ``ignore_na`` argument, which deterines how +EWM also has an ``ignore_na`` argument, which determines how intermediate null values affect the calculation of the weights. When ``ignore_na=False`` (the default), weights are calculated based on absolute positions, so that intermediate null values affect the result. -When ``ignore_na=True`` (which reproduces the behavior in versions prior to 0.15.0), +When ``ignore_na=True``, weights are calculated by ignoring intermediate null values. For example, assuming ``adjust=True``, if ``ignore_na=False``, the weighted average of ``3, NaN, 5`` would be calculated as diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index f51c3e679b36f..5bb3ba75fe51b 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -256,12 +256,6 @@ Panels pf = pd.Panel({'df1':df1,'df2':df2,'df3':df3});pf - #Assignment using Transpose (pandas < 0.15) - pf = pf.transpose(2,0,1) - pf['E'] = pd.DataFrame(data, rng, cols) - pf = pf.transpose(1,2,0);pf - - #Direct assignment (pandas > 0.15) pf.loc[:,:,'F'] = pd.DataFrame(data, rng, cols);pf `Mask a panel by using np.where and then reconstructing the panel with the new masked values diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index 4652ccbf0ad34..ec0a1c7a00bf7 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -453,8 +453,6 @@ available to insert at a particular location in the columns: Assigning New Columns in Method Chains ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -.. versionadded:: 0.16.0 - Inspired by `dplyr's `__ ``mutate`` verb, DataFrame has an :meth:`~pandas.DataFrame.assign` diff --git a/doc/source/gotchas.rst b/doc/source/gotchas.rst index a3062b4086673..9e6f98923fca6 100644 --- a/doc/source/gotchas.rst +++ b/doc/source/gotchas.rst @@ -22,8 +22,8 @@ Frequently Asked Questions (FAQ) DataFrame memory usage ---------------------- -As of pandas version 0.15.0, the memory usage of a dataframe (including -the index) is shown when accessing the ``info`` method of a dataframe. A +The memory usage of a dataframe (including the index) +is shown when accessing the ``info`` method of a dataframe. A configuration option, ``display.memory_usage`` (see :ref:`options`), specifies if the dataframe's memory usage will be displayed when invoking the ``df.info()`` method. diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index a6e7df57be4e5..88e62b5d301a3 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -47,12 +47,6 @@ advanced indexing. should be avoided. See :ref:`Returning a View versus Copy ` -.. warning:: - - In 0.15.0 ``Index`` has internally been refactored to no longer subclass ``ndarray`` - but instead subclass ``PandasObject``, similarly to the rest of the pandas objects. This should be - a transparent change with only very limited API implications (See the :ref:`Internal Refactoring `) - .. warning:: Indexing on an integer-based Index with floats has been clarified in 0.18.0, for a summary of the changes, see :ref:`here `. @@ -660,7 +654,6 @@ For getting *multiple* indexers, using ``.get_indexer`` Selecting Random Samples ------------------------ -.. versionadded::0.16.1 A random selection of rows or columns from a Series, DataFrame, or Panel with the :meth:`~DataFrame.sample` method. The method will sample rows by default, and accepts a specific number of rows/columns to return, or a fraction of rows. @@ -1510,8 +1503,6 @@ See :ref:`Advanced Indexing ` for usage of MultiIndexes. ind.name = "bob" ind -.. versionadded:: 0.15.0 - ``set_names``, ``set_levels``, and ``set_labels`` also take an optional `level`` argument @@ -1527,11 +1518,6 @@ Set operations on Index objects .. _indexing.set_ops: -.. warning:: - - In 0.15.0. the set operations ``+`` and ``-`` were deprecated in order to provide these for numeric type operations on certain - index types. ``+`` can be replace by ``.union()`` or ``|``, and ``-`` by ``.difference()``. - The two main operations are ``union (|)``, ``intersection (&)`` These can be directly called as instance methods or used via overloaded operators. Difference is provided via the ``.difference()`` method. diff --git a/doc/source/install.rst b/doc/source/install.rst index 8dc8224ea6cb2..c805f84d0faaa 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -18,7 +18,7 @@ Instructions for installing from source, Python version support ---------------------- -Officially Python 2.7, 3.4, 3.5, and 3.6 +Officially Python 2.7, 3.5, and 3.6. Installing pandas ----------------- @@ -183,21 +183,17 @@ installed), make sure you have `pytest >>> import pandas as pd >>> pd.test() - Running unit tests for pandas - pandas version 0.18.0 - numpy version 1.10.2 - pandas is installed in pandas - Python version 2.7.11 |Continuum Analytics, Inc.| - (default, Dec 6 2015, 18:57:58) [GCC 4.2.1 (Apple Inc. build 5577)] - nose version 1.3.7 + running: pytest --skip-slow --skip-network C:\Users\TP\Anaconda3\envs\py36\lib\site-packages\pandas + ============================= test session starts ============================= + platform win32 -- Python 3.6.2, pytest-3.2.1, py-1.4.34, pluggy-0.4.0 + rootdir: C:\Users\TP\Documents\Python\pandasdev\pandas, inifile: setup.cfg + collected 12145 items / 3 skipped + ..................................................................S...... ........S................................................................ ......................................................................... - ---------------------------------------------------------------------- - Ran 9252 tests in 368.339s - - OK (SKIP=117) + ==================== 12130 passed, 12 skipped in 368.339 seconds ===================== Dependencies ------------ diff --git a/doc/source/io.rst b/doc/source/io.rst index 33523ea171f3a..de3150035c446 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -592,8 +592,7 @@ Ignoring line comments and empty lines ++++++++++++++++++++++++++++++++++++++ If the ``comment`` parameter is specified, then completely commented lines will -be ignored. By default, completely blank lines will be ignored as well. Both of -these are API changes introduced in version 0.15. +be ignored. By default, completely blank lines will be ignored as well. .. ipython:: python @@ -2701,8 +2700,6 @@ Using a list to get multiple sheets: # Returns the 1st and 4th sheet, as a dictionary of DataFrames. read_excel('path_to_file.xls',sheet_name=['Sheet1',3]) -.. versionadded:: 0.16 - ``read_excel`` can read more than one sheet, by setting ``sheet_name`` to either a list of sheet names, a list of sheet positions, or ``None`` to read all sheets. Sheets can be specified by sheet index or sheet name, using an integer or string, @@ -3241,11 +3238,10 @@ for some advanced strategies .. warning:: - As of version 0.15.0, pandas requires ``PyTables`` >= 3.0.0. Stores written with prior versions of pandas / ``PyTables`` >= 2.3 are fully compatible (this was the previous minimum ``PyTables`` required version). - -.. warning:: - - There is a ``PyTables`` indexing bug which may appear when querying stores using an index. If you see a subset of results being returned, upgrade to ``PyTables`` >= 3.2. Stores created previously will need to be rewritten using the updated version. + pandas requires ``PyTables`` >= 3.0.0. + There is a indexing bug in ``PyTables`` < 3.2 which may appear when querying stores using an index. + If you see a subset of results being returned, upgrade to ``PyTables`` >= 3.2. + Stores created previously will need to be rewritten using the updated version. .. warning:: @@ -4210,10 +4206,8 @@ object : ``strings`` ``np.nan`` Categorical Data ++++++++++++++++ -.. versionadded:: 0.15.2 - -Writing data to a ``HDFStore`` that contains a ``category`` dtype was implemented -in 0.15.2. Queries work the same as if it was an object array. However, the ``category`` dtyped data is +You can write data that contains ``category`` dtypes to a ``HDFStore``. +Queries work the same as if it was an object array. However, the ``category`` dtyped data is stored in a more efficient manner. .. ipython:: python @@ -4228,21 +4222,6 @@ stored in a more efficient manner. result result.dtypes -.. warning:: - - The format of the ``Categorical`` is readable by prior versions of pandas (< 0.15.2), but will retrieve - the data as an integer based column (e.g. the ``codes``). However, the ``categories`` *can* be retrieved - but require the user to select them manually using the explicit meta path. - - The data is stored like so: - - .. ipython:: python - - cstore - - # to get the categories - cstore.select('dfcat/meta/A/meta') - .. ipython:: python :suppress: :okexcept: @@ -4746,8 +4725,6 @@ You can check if a table exists using :func:`~pandas.io.sql.has_table` Schema support '''''''''''''' -.. versionadded:: 0.15.0 - Reading from and writing to different schema's is supported through the ``schema`` keyword in the :func:`~pandas.read_sql_table` and :func:`~pandas.DataFrame.to_sql` functions. Note however that this depends on the database flavor (sqlite does not @@ -4975,8 +4952,6 @@ be used to read the file incrementally. pd.read_stata('stata.dta') -.. versionadded:: 0.16.0 - Specifying a ``chunksize`` yields a :class:`~pandas.io.stata.StataReader` instance that can be used to read ``chunksize`` lines from the file at a time. The ``StataReader`` @@ -5034,8 +5009,6 @@ values will have ``object`` data type. Categorical Data ++++++++++++++++ -.. versionadded:: 0.15.2 - ``Categorical`` data can be exported to *Stata* data files as value labeled data. The exported data consists of the underlying category codes as integer data values and the categories as value labels. *Stata* does not have an explicit equivalent diff --git a/doc/source/remote_data.rst b/doc/source/remote_data.rst index 7980133582125..9af66058a7aaa 100644 --- a/doc/source/remote_data.rst +++ b/doc/source/remote_data.rst @@ -11,14 +11,13 @@ Remote Data Access DataReader ---------- -The sub-package ``pandas.io.data`` is removed in favor of a separately -installable `pandas-datareader package +The sub-package ``pandas.io.data`` was deprecated in v.0.17 and removed in +`v.0.19 `__. + Instead there has been created a separately installable `pandas-datareader package `_. This will allow the data -modules to be independently updated to your pandas installation. The API for -``pandas-datareader v0.1.1`` is the same as in ``pandas v0.16.1``. -(:issue:`8961`) +modules to be independently updated on your pandas installation. - You should replace the imports of the following: + For code older than < 0.19 you should replace the imports of the following: .. code-block:: python diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst index 3dce73b302c7c..fab83222b313f 100644 --- a/doc/source/reshaping.rst +++ b/doc/source/reshaping.rst @@ -569,8 +569,6 @@ This function is often used along with discretization functions like ``cut``: See also :func:`Series.str.get_dummies `. -.. versionadded:: 0.15.0 - :func:`get_dummies` also accepts a DataFrame. By default all categorical variables (categorical in the statistical sense, those with `object` or `categorical` dtype) are encoded as dummy variables. @@ -675,4 +673,4 @@ handling of NaN: you can use ``df["cat_col"] = pd.Categorical(df["col"])`` or ``df["cat_col"] = df["col"].astype("category")``. For full docs on :class:`~pandas.Categorical`, see the :ref:`Categorical introduction ` and the - :ref:`API documentation `. This feature was introduced in version 0.15. + :ref:`API documentation `. diff --git a/doc/source/sparse.rst b/doc/source/sparse.rst index b4884cf1c4141..cf16cee501a3e 100644 --- a/doc/source/sparse.rst +++ b/doc/source/sparse.rst @@ -216,8 +216,6 @@ To convert a ``SparseDataFrame`` back to sparse SciPy matrix in COO format, you SparseSeries ~~~~~~~~~~~~ -.. versionadded:: 0.16.0 - A :meth:`SparseSeries.to_coo` method is implemented for transforming a ``SparseSeries`` indexed by a ``MultiIndex`` to a ``scipy.sparse.coo_matrix``. The method requires a ``MultiIndex`` with two or more levels. diff --git a/doc/source/timedeltas.rst b/doc/source/timedeltas.rst index daa2c262c8c86..d055c49dc4721 100644 --- a/doc/source/timedeltas.rst +++ b/doc/source/timedeltas.rst @@ -23,13 +23,12 @@ Time Deltas *********** -.. note:: - - Starting in v0.15.0, we introduce a new scalar type ``Timedelta``, which is a subclass of ``datetime.timedelta``, and behaves in a similar manner, - but allows compatibility with ``np.timedelta64`` types as well as a host of custom representation, parsing, and attributes. +Timedeltas are differences in times, expressed in difference units, e.g. days, hours, minutes, +seconds. They can be both positive and negative. -Timedeltas are differences in times, expressed in difference units, e.g. days, hours, minutes, seconds. -They can be both positive and negative. +``Timedelta`` is a subclass of ``datetime.timedelta``, and behaves in a similar manner, +but allows compatibility with ``np.timedelta64`` types as well as a host of custom representation, +parsing, and attributes. Parsing ------- @@ -78,15 +77,10 @@ Further, operations among the scalars yield another scalar ``Timedelta``. to_timedelta ~~~~~~~~~~~~ -.. warning:: - - Prior to 0.15.0 ``pd.to_timedelta`` would return a ``Series`` for list-like/Series input, and a ``np.timedelta64`` for scalar input. - It will now return a ``TimedeltaIndex`` for list-like input, ``Series`` for Series input, and ``Timedelta`` for scalar input. - - The arguments to ``pd.to_timedelta`` are now ``(arg, unit='ns', box=True)``, previously were ``(arg, box=True, unit='ns')`` as these are more logical. - -Using the top-level ``pd.to_timedelta``, you can convert a scalar, array, list, or Series from a recognized timedelta format / value into a ``Timedelta`` type. -It will construct Series if the input is a Series, a scalar if the input is scalar-like, otherwise will output a ``TimedeltaIndex``. +Using the top-level ``pd.to_timedelta``, you can convert a scalar, array, list, +or Series from a recognized timedelta format / value into a ``Timedelta`` type. +It will construct Series if the input is a Series, a scalar if the input is +scalar-like, otherwise it will output a ``TimedeltaIndex``. You can parse a single string to a Timedelta: @@ -328,8 +322,6 @@ You can convert a ``Timedelta`` to an `ISO 8601 Duration`_ string with the TimedeltaIndex -------------- -.. versionadded:: 0.15.0 - To generate an index with time delta, you can use either the ``TimedeltaIndex`` or the ``timedelta_range`` constructor. diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index 839390c8778aa..b5a261e3acac5 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -229,8 +229,6 @@ To get horizontal bar plots, use the ``barh`` method: Histograms ~~~~~~~~~~ -.. versionadded:: 0.15.0 - Histogram can be drawn by using the :meth:`DataFrame.plot.hist` and :meth:`Series.plot.hist` methods. .. ipython:: python @@ -328,8 +326,6 @@ The ``by`` keyword can be specified to plot grouped histograms: Box Plots ~~~~~~~~~ -.. versionadded:: 0.15.0 - Boxplot can be drawn calling :meth:`Series.plot.box` and :meth:`DataFrame.plot.box`, or :meth:`DataFrame.boxplot` to visualize the distribution of values within each column. diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 33a6db18db3ca..636bb2dc3e60e 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -485,3 +485,4 @@ Other ^^^^^ - Bug in :func:`eval` where the ``inplace`` parameter was being incorrectly handled (:issue:`16732`) - Several ``NaT`` method docstrings (e.g. :func:`NaT.ctime`) were incorrect (:issue:`17327`) +- The documentation has had references to versions < v0.16 removed and cleaned up (:issue:`17442`, :issue:`17442` & :issue:`#17404`) From 2638a204ecea7f4a9925fa4a05ccc8ebc2999f9d Mon Sep 17 00:00:00 2001 From: Dillon Niederhut Date: Thu, 7 Sep 2017 05:52:11 -0500 Subject: [PATCH 15/26] BUG: revert collision warning (#17298) --- doc/source/indexing.rst | 15 --------------- doc/source/whatsnew/v0.21.0.txt | 24 +++--------------------- pandas/core/generic.py | 8 ++------ pandas/tests/dtypes/test_generic.py | 5 ----- pandas/tests/io/test_pytables.py | 4 ++-- 5 files changed, 7 insertions(+), 49 deletions(-) diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 88e62b5d301a3..8474116c38082 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -269,21 +269,6 @@ new column. In 0.21.0 and later, this will raise a ``UserWarning``: 1 2.0 2 3.0 -Similarly, it is possible to create a column with a name which collides with one of Pandas's -built-in methods or attributes, which can cause confusion later when attempting to access -that column as an attribute. This behavior now warns: - -.. code-block:: ipython - - In[4]: df['sum'] = [5., 7., 9.] - UserWarning: Column name 'sum' collides with a built-in method, which will cause unexpected attribute behavior - In[5]: df.sum - Out[5]: - - Slicing ranges -------------- diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 636bb2dc3e60e..fa00140fb4abd 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -67,8 +67,8 @@ Improved warnings when attempting to create columns ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ New users are often flummoxed by the relationship between column operations and attribute -access on ``DataFrame`` instances (:issue:`5904` & :issue:`7175`). Two specific instances -of this confusion include attempting to create a new column by setting into an attribute: +access on ``DataFrame`` instances (:issue:`7175`). One specific instance +of this confusion is attempting to create a new column by setting into an attribute: .. code-block:: ipython @@ -86,25 +86,7 @@ This does not raise any obvious exceptions, but also does not create a new colum 1 2.0 2 3.0 -The second source of confusion is creating a column whose name collides with a method or -attribute already in the instance namespace: - -.. code-block:: ipython - - In[4]: df['sum'] = [5., 7., 9.] - -This does not permit that column to be accessed as an attribute: - -.. code-block:: ipython - - In[5]: df.sum - Out[5]: - - -Both of these now raise a ``UserWarning`` about the potential for unexpected behavior. See :ref:`Attribute Access `. +Setting a list-like data structure into a new attribute now raise a ``UserWarning`` about the potential for unexpected behavior. See :ref:`Attribute Access `. .. _whatsnew_0210.enhancements.other: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index cdb08d8887e05..df5f1a8326acd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1905,10 +1905,6 @@ def _slice(self, slobj, axis=0, kind=None): return result def _set_item(self, key, value): - if isinstance(key, str) and callable(getattr(self, key, None)): - warnings.warn("Column name '{key}' collides with a built-in " - "method, which will cause unexpected attribute " - "behavior".format(key=key), stacklevel=3) self._data.set(key, value) self._clear_item_cache() @@ -3441,8 +3437,8 @@ def __setattr__(self, name, value): object.__setattr__(self, name, value) except (AttributeError, TypeError): if isinstance(self, ABCDataFrame) and (is_list_like(value)): - warnings.warn("Pandas doesn't allow Series to be assigned " - "into nonexistent columns - see " + warnings.warn("Pandas doesn't allow columns to be " + "created via a new attribute name - see " "https://pandas.pydata.org/pandas-docs/" "stable/indexing.html#attribute-access", stacklevel=2) diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index 82444d6c94157..bd365f9c3281f 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -48,7 +48,6 @@ def test_abc_types(self): def test_setattr_warnings(): - # GH5904 - Suggestion: Warning for DataFrame colname-methodname clash # GH7175 - GOTCHA: You can't use dot notation to add a column... d = {'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']), 'two': pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])} @@ -78,7 +77,3 @@ def test_setattr_warnings(): # warn when setting column to nonexistent name df.four = df.two + 2 assert df.four.sum() > df.two.sum() - - with tm.assert_produces_warning(UserWarning): - # warn when column has same name as method - df['sum'] = df.two diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index b5ecc4d34cd08..9c488cb2389be 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -2011,7 +2011,7 @@ def check(obj, comparator): df['string'] = 'foo' df['float322'] = 1. df['float322'] = df['float322'].astype('float32') - df['boolean'] = df['float322'] > 0 + df['bool'] = df['float322'] > 0 df['time1'] = Timestamp('20130101') df['time2'] = Timestamp('20130102') check(df, tm.assert_frame_equal) @@ -2141,7 +2141,7 @@ def test_table_values_dtypes_roundtrip(self): df1['string'] = 'foo' df1['float322'] = 1. df1['float322'] = df1['float322'].astype('float32') - df1['boolean'] = df1['float32'] > 0 + df1['bool'] = df1['float32'] > 0 df1['time1'] = Timestamp('20130101') df1['time2'] = Timestamp('20130102') From a5ee65e3c0c24cf55db81cab460b21b4ee8beef5 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 7 Sep 2017 04:28:12 -0700 Subject: [PATCH 16/26] cdef out dtype for _Timestamp._get_field (#17457) --- pandas/_libs/tslib.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 30f2982b6f070..b1f794a0030d1 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1277,6 +1277,7 @@ cdef class _Timestamp(datetime): cpdef _get_field(self, field): cdef: int64_t val + ndarray[int32_t] out val = self._maybe_convert_value_to_local() out = get_date_field(np.array([val], dtype=np.int64), field) return int(out[0]) @@ -1284,6 +1285,7 @@ cdef class _Timestamp(datetime): cpdef _get_named_field(self, field): cdef: int64_t val + ndarray[object] out val = self._maybe_convert_value_to_local() out = get_date_name_field(np.array([val], dtype=np.int64), field) return out[0] @@ -1293,9 +1295,7 @@ cdef class _Timestamp(datetime): 'startingMonth', self.freq.kwds.get( 'month', 12)) if self.freq else 12 freqstr = self.freqstr if self.freq else None - val = self.value - if self.tz is not None and not _is_utc(self.tz): - val = tz_convert_single(self.value, 'UTC', self.tz) + val = self._maybe_convert_value_to_local() out = get_start_end_field( np.array([val], dtype=np.int64), field, freqstr, month_kw) return out[0] From 69ca387ebc5cc1e5601087734ca5629e6de63f20 Mon Sep 17 00:00:00 2001 From: Sam Foo Date: Thu, 7 Sep 2017 07:35:40 -0400 Subject: [PATCH 17/26] DOC: Add Timestamp, Period, Timedelta, and Interval to api.rst (#17424) --- doc/source/api.rst | 195 ++++++++++++++++++++++++++++++++++++++++ pandas/_libs/period.pyx | 2 +- 2 files changed, 196 insertions(+), 1 deletion(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index 12e6c7ad7f630..d34cec86638fb 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1599,6 +1599,201 @@ Conversion TimedeltaIndex.floor TimedeltaIndex.ceil +.. currentmodule:: pandas + +Scalars +------- + +Period +~~~~~~ +.. autosummary:: + :toctree: generated/ + + Period + +Attributes +~~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Period.day + Period.dayofweek + Period.dayofyear + Period.days_in_month + Period.daysinmonth + Period.end_time + Period.freq + Period.freqstr + Period.hour + Period.is_leap_year + Period.minute + Period.month + Period.now + Period.ordinal + Period.quarter + Period.qyear + Period.second + Period.start_time + Period.strftime + Period.week + Period.weekday + Period.weekofyear + Period.year + +Methods +~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Period.asfreq + Period.strftime + Period.to_timestamp + +Timestamp +~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Timestamp + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Timestamp.asm8 + Timestamp.day + Timestamp.dayofweek + Timestamp.dayofyear + Timestamp.days_in_month + Timestamp.daysinmonth + Timestamp.hour + Timestamp.is_leap_year + Timestamp.is_month_end + Timestamp.is_month_start + Timestamp.is_quarter_end + Timestamp.is_quarter_start + Timestamp.is_year_end + Timestamp.is_year_start + Timestamp.max + Timestamp.microsecond + Timestamp.min + Timestamp.month + Timestamp.nanosecond + Timestamp.quarter + Timestamp.resolution + Timestamp.second + Timestamp.tz + Timestamp.tzinfo + Timestamp.value + Timestamp.weekday_name + Timestamp.weekofyear + Timestamp.year + +Methods +~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Timestamp.astimezone + Timestamp.ceil + Timestamp.combine + Timestamp.ctime + Timestamp.date + Timestamp.dst + Timestamp.floor + Timestamp.freq + Timestamp.freqstr + Timestamp.from_ordinal + Timestamp.fromtimestamp + Timestamp.isocalendar + Timestamp.isoformat + Timestamp.isoweekday + Timestamp.normalize + Timestamp.now + Timestamp.replace + Timestamp.round + Timestamp.strftime + Timestamp.strptime + Timestamp.time + Timestamp.timetuple + Timestamp.timetz + Timestamp.to_datetime64 + Timestamp.to_julian_date + Timestamp.to_period + Timestamp.to_pydatetime + Timestamp.today + Timestamp.toordinal + Timestamp.tz_convert + Timestamp.tz_localize + Timestamp.tzname + Timestamp.utcfromtimestamp + Timestamp.utcnow + Timestamp.utcoffset + Timestamp.utctimetuple + Timestamp.weekday + +Interval +~~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Interval + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree generated/ + + Interval.closed + Interval.closed_left + Interval.closed_right + Interval.left + Interval.mid + Interval.open_left + Interval.open_right + Interval.right + +Timedelta +~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Timedelta + +Properties +~~~~~~~~~~ +.. autosummary:: + :toctree generated/ + + Timedelta.asm8 + Timedelta.components + Timedelta.days + Timedelta.delta + Timedelta.freq + Timedelta.is_populated + Timedelta.max + Timedelta.microseconds + Timedelta.min + Timedelta.nanoseconds + Timedelta.resolution + Timedelta.seconds + Timedelta.value + +Methods +~~~~~~~ +.. autosummary:: + :toctree generated/ + + Timedelta.ceil + Timedelta.floor + Timedelta.isoformat + Timedelta.round + Timdelta.to_pytimedelta + Timedelta.to_timedelta64 + Timedelta.total_seconds + Timedelta.view + Window ------ .. currentmodule:: pandas.core.window diff --git a/pandas/_libs/period.pyx b/pandas/_libs/period.pyx index 106d4c4bae2fc..c1d01fd6be948 100644 --- a/pandas/_libs/period.pyx +++ b/pandas/_libs/period.pyx @@ -1102,7 +1102,7 @@ cdef class _Period(object): class Period(_Period): """ - Represents an period of time + Represents a period of time Parameters ---------- From c12062cca8d19e3f21204cffe8aa8eef8959391e Mon Sep 17 00:00:00 2001 From: majiang Date: Thu, 7 Sep 2017 20:41:24 +0900 Subject: [PATCH 18/26] DOC: to_json (#17461) --- pandas/core/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index df5f1a8326acd..8d16b079ba2c8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1265,7 +1265,7 @@ def to_json(self, path_or_buf=None, orient=None, date_format=None, Parameters ---------- path_or_buf : the path or buffer to write the result string - if this is None, return a StringIO of the converted string + if this is None, return the converted string orient : string * Series From 2be14054d9e66b6c896a41f5db6824024ad4c120 Mon Sep 17 00:00:00 2001 From: jschendel Date: Thu, 7 Sep 2017 05:49:27 -0600 Subject: [PATCH 19/26] BUG: Index._searchsorted_monotonic(..., side='right') returns the left side position for monotonic decreasing indexes (#17272) --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/indexes/base.py | 2 +- pandas/tests/indexes/common.py | 59 +++++++++++++++++-- .../indexes/datetimes/test_datetimelike.py | 4 +- pandas/tests/indexes/period/test_period.py | 4 +- pandas/tests/indexes/test_base.py | 3 +- pandas/tests/indexes/test_numeric.py | 12 ++-- pandas/tests/indexes/test_range.py | 3 +- pandas/tests/indexing/test_interval.py | 56 +++++++++++------- 9 files changed, 111 insertions(+), 33 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index fa00140fb4abd..d3c61adccc7a6 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -400,6 +400,7 @@ Indexing - Bug in ``.isin()`` in which checking membership in empty ``Series`` objects raised an error (:issue:`16991`) - Bug in ``CategoricalIndex`` reindexing in which specified indices containing duplicates were not being respected (:issue:`17323`) - Bug in intersection of ``RangeIndex`` with negative step (:issue:`17296`) +- Bug in ``IntervalIndex`` where performing a scalar lookup fails for included right endpoints of non-overlapping monotonic decreasing indexes (:issue:`16417`, :issue:`17271`) I/O ^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a9098126a38e3..ef5f68936044a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3465,7 +3465,7 @@ def _searchsorted_monotonic(self, label, side='left'): # everything for it to work (element ordering, search side and # resulting value). pos = self[::-1].searchsorted(label, side='right' if side == 'left' - else 'right') + else 'left') return len(self) - pos raise ValueError('index must be monotonic increasing or decreasing') diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 1fdc08d68eb26..90618cd6e235f 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -11,6 +11,7 @@ RangeIndex, MultiIndex, CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex, IntervalIndex, notna, isna) +from pandas.core.indexes.base import InvalidIndexError from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin from pandas.core.dtypes.common import needs_i8_conversion from pandas._libs.tslib import iNaT @@ -138,9 +139,14 @@ def test_get_indexer_consistency(self): if isinstance(index, IntervalIndex): continue - indexer = index.get_indexer(index[0:2]) - assert isinstance(indexer, np.ndarray) - assert indexer.dtype == np.intp + if index.is_unique or isinstance(index, CategoricalIndex): + indexer = index.get_indexer(index[0:2]) + assert isinstance(indexer, np.ndarray) + assert indexer.dtype == np.intp + else: + e = "Reindexing only valid with uniquely valued Index objects" + with tm.assert_raises_regex(InvalidIndexError, e): + indexer = index.get_indexer(index[0:2]) indexer, _ = index.get_indexer_non_unique(index[0:2]) assert isinstance(indexer, np.ndarray) @@ -632,7 +638,8 @@ def test_difference_base(self): pass elif isinstance(idx, (DatetimeIndex, TimedeltaIndex)): assert result.__class__ == answer.__class__ - tm.assert_numpy_array_equal(result.asi8, answer.asi8) + tm.assert_numpy_array_equal(result.sort_values().asi8, + answer.sort_values().asi8) else: result = first.difference(case) assert tm.equalContents(result, answer) @@ -954,3 +961,47 @@ def test_join_self_unique(self, how): if index.is_unique: joined = index.join(index, how=how) assert (index == joined).all() + + def test_searchsorted_monotonic(self): + # GH17271 + for index in self.indices.values(): + # not implemented for tuple searches in MultiIndex + # or Intervals searches in IntervalIndex + if isinstance(index, (MultiIndex, IntervalIndex)): + continue + + # nothing to test if the index is empty + if index.empty: + continue + value = index[0] + + # determine the expected results (handle dupes for 'right') + expected_left, expected_right = 0, (index == value).argmin() + if expected_right == 0: + # all values are the same, expected_right should be length + expected_right = len(index) + + # test _searchsorted_monotonic in all cases + # test searchsorted only for increasing + if index.is_monotonic_increasing: + ssm_left = index._searchsorted_monotonic(value, side='left') + assert expected_left == ssm_left + + ssm_right = index._searchsorted_monotonic(value, side='right') + assert expected_right == ssm_right + + ss_left = index.searchsorted(value, side='left') + assert expected_left == ss_left + + ss_right = index.searchsorted(value, side='right') + assert expected_right == ss_right + elif index.is_monotonic_decreasing: + ssm_left = index._searchsorted_monotonic(value, side='left') + assert expected_left == ssm_left + + ssm_right = index._searchsorted_monotonic(value, side='right') + assert expected_right == ssm_right + else: + # non-monotonic should raise. + with pytest.raises(ValueError): + index._searchsorted_monotonic(value, side='left') diff --git a/pandas/tests/indexes/datetimes/test_datetimelike.py b/pandas/tests/indexes/datetimes/test_datetimelike.py index 3b970ee382521..538e10e6011ec 100644 --- a/pandas/tests/indexes/datetimes/test_datetimelike.py +++ b/pandas/tests/indexes/datetimes/test_datetimelike.py @@ -12,7 +12,9 @@ class TestDatetimeIndex(DatetimeLike): _holder = DatetimeIndex def setup_method(self, method): - self.indices = dict(index=tm.makeDateIndex(10)) + self.indices = dict(index=tm.makeDateIndex(10), + index_dec=date_range('20130110', periods=10, + freq='-1D')) self.setup_indices() def create_index(self): diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index e24e2ad936e2c..51f7d13cb0638 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -18,7 +18,9 @@ class TestPeriodIndex(DatetimeLike): _multiprocess_can_split_ = True def setup_method(self, method): - self.indices = dict(index=tm.makePeriodIndex(10)) + self.indices = dict(index=tm.makePeriodIndex(10), + index_dec=period_range('20130101', periods=10, + freq='D')[::-1]) self.setup_indices() def create_index(self): diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index f96dbdcfb8acf..d69fbbcdf4bf6 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -46,7 +46,8 @@ def setup_method(self, method): catIndex=tm.makeCategoricalIndex(100), empty=Index([]), tuples=MultiIndex.from_tuples(lzip( - ['foo', 'bar', 'baz'], [1, 2, 3]))) + ['foo', 'bar', 'baz'], [1, 2, 3])), + repeats=Index([0, 0, 1, 1, 2, 2])) self.setup_indices() def create_index(self): diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index 1a0a38c173284..7e7e10e4aeabe 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -181,7 +181,9 @@ class TestFloat64Index(Numeric): def setup_method(self, method): self.indices = dict(mixed=Float64Index([1.5, 2, 3, 4, 5]), - float=Float64Index(np.arange(5) * 2.5)) + float=Float64Index(np.arange(5) * 2.5), + mixed_dec=Float64Index([5, 4, 3, 2, 1.5]), + float_dec=Float64Index(np.arange(4, -1, -1) * 2.5)) self.setup_indices() def create_index(self): @@ -654,7 +656,8 @@ class TestInt64Index(NumericInt): _holder = Int64Index def setup_method(self, method): - self.indices = dict(index=Int64Index(np.arange(0, 20, 2))) + self.indices = dict(index=Int64Index(np.arange(0, 20, 2)), + index_dec=Int64Index(np.arange(19, -1, -1))) self.setup_indices() def create_index(self): @@ -949,8 +952,9 @@ class TestUInt64Index(NumericInt): _holder = UInt64Index def setup_method(self, method): - self.indices = dict(index=UInt64Index([2**63, 2**63 + 10, 2**63 + 15, - 2**63 + 20, 2**63 + 25])) + vals = [2**63, 2**63 + 10, 2**63 + 15, 2**63 + 20, 2**63 + 25] + self.indices = dict(index=UInt64Index(vals), + index_dec=UInt64Index(reversed(vals))) self.setup_indices() def create_index(self): diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 06c8f0ee392c7..d206c36ee51c9 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -25,7 +25,8 @@ class TestRangeIndex(Numeric): _compat_props = ['shape', 'ndim', 'size', 'itemsize'] def setup_method(self, method): - self.indices = dict(index=RangeIndex(0, 20, 2, name='foo')) + self.indices = dict(index=RangeIndex(0, 20, 2, name='foo'), + index_dec=RangeIndex(18, -1, -2, name='bar')) self.setup_indices() def create_index(self): diff --git a/pandas/tests/indexing/test_interval.py b/pandas/tests/indexing/test_interval.py index be6e5e1cffb2e..31a94abcd99a5 100644 --- a/pandas/tests/indexing/test_interval.py +++ b/pandas/tests/indexing/test_interval.py @@ -3,6 +3,7 @@ import pandas as pd from pandas import Series, DataFrame, IntervalIndex, Interval +from pandas.compat import product import pandas.util.testing as tm @@ -14,16 +15,6 @@ def setup_method(self, method): def test_loc_with_scalar(self): s = self.s - expected = 0 - - result = s.loc[0.5] - assert result == expected - - result = s.loc[1] - assert result == expected - - with pytest.raises(KeyError): - s.loc[0] expected = s.iloc[:3] tm.assert_series_equal(expected, s.loc[:3]) @@ -42,16 +33,6 @@ def test_loc_with_scalar(self): def test_getitem_with_scalar(self): s = self.s - expected = 0 - - result = s[0.5] - assert result == expected - - result = s[1] - assert result == expected - - with pytest.raises(KeyError): - s[0] expected = s.iloc[:3] tm.assert_series_equal(expected, s[:3]) @@ -67,6 +48,41 @@ def test_getitem_with_scalar(self): expected = s.iloc[2:5] tm.assert_series_equal(expected, s[s >= 2]) + @pytest.mark.parametrize('direction, closed', + product(('increasing', 'decreasing'), + ('left', 'right', 'neither', 'both'))) + def test_nonoverlapping_monotonic(self, direction, closed): + tpls = [(0, 1), (2, 3), (4, 5)] + if direction == 'decreasing': + tpls = reversed(tpls) + + idx = IntervalIndex.from_tuples(tpls, closed=closed) + s = Series(list('abc'), idx) + + for key, expected in zip(idx.left, s): + if idx.closed_left: + assert s[key] == expected + assert s.loc[key] == expected + else: + with pytest.raises(KeyError): + s[key] + with pytest.raises(KeyError): + s.loc[key] + + for key, expected in zip(idx.right, s): + if idx.closed_right: + assert s[key] == expected + assert s.loc[key] == expected + else: + with pytest.raises(KeyError): + s[key] + with pytest.raises(KeyError): + s.loc[key] + + for key, expected in zip(idx.mid, s): + assert s[key] == expected + assert s.loc[key] == expected + def test_with_interval(self): s = self.s From aafa941a0cd5f17fffba88505b3a5fadaa457a71 Mon Sep 17 00:00:00 2001 From: Matti Picus Date: Thu, 7 Sep 2017 14:56:33 +0300 Subject: [PATCH 20/26] COMPAT: Pypy tweaks (#17351) --- doc/source/whatsnew/v0.21.0.txt | 11 ++++- pandas/_libs/src/ujson/python/JSONtoObj.c | 16 +++---- pandas/io/parsers.py | 1 + pandas/tests/indexes/test_base.py | 16 +++++-- pandas/tests/indexes/test_multi.py | 13 +++++- pandas/tests/io/parser/test_parsers.py | 52 ++++++++++++++++++++++- 6 files changed, 92 insertions(+), 17 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index d3c61adccc7a6..f50052347cfb5 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -371,13 +371,11 @@ Performance Improvements Bug Fixes ~~~~~~~~~ - Conversion ^^^^^^^^^^ - Bug in assignment against datetime-like data with ``int`` may incorrectly convert to datetime-like (:issue:`14145`) - Bug in assignment against ``int64`` data with ``np.ndarray`` with ``float64`` dtype may keep ``int64`` dtype (:issue:`14001`) -- Fix :func:`DataFrame.memory_usage` to support PyPy. Objects on PyPy do not have a fixed size, so an approximation is used instead (:issue:`17228`) - Fixed the return type of ``IntervalIndex.is_non_overlapping_monotonic`` to be a Python ``bool`` for consistency with similar attributes/methods. Previously returned a ``numpy.bool_``. (:issue:`17237`) - Bug in ``IntervalIndex.is_non_overlapping_monotonic`` when intervals are closed on both sides and overlap at a point (:issue:`16560`) - Bug in :func:`Series.fillna` returns frame when ``inplace=True`` and ``value`` is dict (:issue:`16156`) @@ -463,6 +461,15 @@ Categorical the ``.categories`` to be an empty ``Float64Index`` rather than an empty ``Index`` with object dtype (:issue:`17248`) +PyPy +^^^^ + +- Compatibility with PyPy in :func:`read_csv` with ``usecols=[]`` and + :func:`read_json` (:issue:`17351`) +- Split tests into cases for CPython and PyPy where needed, which highlights the fragility + of index matching with ``float('nan')``, ``np.nan`` and ``NAT`` (:issue:`17351`) +- Fix :func:`DataFrame.memory_usage` to support PyPy. Objects on PyPy do not have a fixed size, + so an approximation is used instead (:issue:`17228`) Other ^^^^^ diff --git a/pandas/_libs/src/ujson/python/JSONtoObj.c b/pandas/_libs/src/ujson/python/JSONtoObj.c index b0132532c16af..85cf1d5e5e7a1 100644 --- a/pandas/_libs/src/ujson/python/JSONtoObj.c +++ b/pandas/_libs/src/ujson/python/JSONtoObj.c @@ -409,7 +409,7 @@ JSOBJ Object_npyEndObject(void *prv, JSOBJ obj) { } int Object_npyObjectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) { - PyObject *label; + PyObject *label, *labels; npy_intp labelidx; // add key to label array, value to values array NpyArrContext *npyarr = (NpyArrContext *)obj; @@ -424,11 +424,11 @@ int Object_npyObjectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) { if (!npyarr->labels[labelidx]) { npyarr->labels[labelidx] = PyList_New(0); } - + labels = npyarr->labels[labelidx]; // only fill label array once, assumes all column labels are the same // for 2-dimensional arrays. - if (PyList_GET_SIZE(npyarr->labels[labelidx]) <= npyarr->elcount) { - PyList_Append(npyarr->labels[labelidx], label); + if (PyList_Check(labels) && PyList_GET_SIZE(labels) <= npyarr->elcount) { + PyList_Append(labels, label); } if (((JSONObjectDecoder *)npyarr->dec)->arrayAddItem(prv, obj, value)) { @@ -439,16 +439,16 @@ int Object_npyObjectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) { } int Object_objectAddKey(void *prv, JSOBJ obj, JSOBJ name, JSOBJ value) { - PyDict_SetItem(obj, name, value); + int ret = PyDict_SetItem(obj, name, value); Py_DECREF((PyObject *)name); Py_DECREF((PyObject *)value); - return 1; + return ret == 0 ? 1 : 0; } int Object_arrayAddItem(void *prv, JSOBJ obj, JSOBJ value) { - PyList_Append(obj, value); + int ret = PyList_Append(obj, value); Py_DECREF((PyObject *)value); - return 1; + return ret == 0 ? 1 : 0; } JSOBJ Object_newString(void *prv, wchar_t *start, wchar_t *end) { diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 8b1a921536a1d..6adf154aabba7 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1716,6 +1716,7 @@ def _set_noconvert_columns(self): # A set of integers will be converted to a list in # the correct order every single time. usecols = list(self.usecols) + usecols.sort() elif (callable(self.usecols) or self.usecols_dtype not in ('empty', None)): # The names attribute should have the correct columns diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index d69fbbcdf4bf6..fa73c9fc7b722 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -9,7 +9,7 @@ from pandas.tests.indexes.common import Base from pandas.compat import (range, lrange, lzip, u, - text_type, zip, PY3, PY36) + text_type, zip, PY3, PY36, PYPY) import operator import numpy as np @@ -1370,13 +1370,21 @@ def test_isin(self): assert len(result) == 0 assert result.dtype == np.bool_ - def test_isin_nan(self): + @pytest.mark.skipif(PYPY, reason="np.nan is float('nan') on PyPy") + def test_isin_nan_not_pypy(self): + tm.assert_numpy_array_equal(Index(['a', np.nan]).isin([float('nan')]), + np.array([False, False])) + + @pytest.mark.skipif(not PYPY, reason="np.nan is float('nan') on PyPy") + def test_isin_nan_pypy(self): + tm.assert_numpy_array_equal(Index(['a', np.nan]).isin([float('nan')]), + np.array([False, True])) + + def test_isin_nan_common(self): tm.assert_numpy_array_equal(Index(['a', np.nan]).isin([np.nan]), np.array([False, True])) tm.assert_numpy_array_equal(Index(['a', pd.NaT]).isin([pd.NaT]), np.array([False, True])) - tm.assert_numpy_array_equal(Index(['a', np.nan]).isin([float('nan')]), - np.array([False, False])) tm.assert_numpy_array_equal(Index(['a', np.nan]).isin([pd.NaT]), np.array([False, False])) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 798d244468961..86308192c9166 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -14,7 +14,7 @@ from pandas import (CategoricalIndex, DataFrame, Index, MultiIndex, compat, date_range, period_range) -from pandas.compat import PY3, long, lrange, lzip, range, u +from pandas.compat import PY3, long, lrange, lzip, range, u, PYPY from pandas.errors import PerformanceWarning, UnsortedIndexError from pandas.core.indexes.base import InvalidIndexError from pandas._libs import lib @@ -2571,13 +2571,22 @@ def test_isin(self): assert len(result) == 0 assert result.dtype == np.bool_ - def test_isin_nan(self): + @pytest.mark.skipif(PYPY, reason="tuples cmp recursively on PyPy") + def test_isin_nan_not_pypy(self): idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]]) tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]), np.array([False, False])) tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]), np.array([False, False])) + @pytest.mark.skipif(not PYPY, reason="tuples cmp recursively on PyPy") + def test_isin_nan_pypy(self): + idx = MultiIndex.from_arrays([['foo', 'bar'], [1.0, np.nan]]) + tm.assert_numpy_array_equal(idx.isin([('bar', np.nan)]), + np.array([False, True])) + tm.assert_numpy_array_equal(idx.isin([('bar', float('nan'))]), + np.array([False, True])) + def test_isin_level_kwarg(self): idx = MultiIndex.from_arrays([['qux', 'baz', 'foo', 'bar'], np.arange( 4)]) diff --git a/pandas/tests/io/parser/test_parsers.py b/pandas/tests/io/parser/test_parsers.py index 2fee2451c5e36..0ea4757b10e94 100644 --- a/pandas/tests/io/parser/test_parsers.py +++ b/pandas/tests/io/parser/test_parsers.py @@ -3,8 +3,10 @@ import os import pandas.util.testing as tm -from pandas import read_csv, read_table +from pandas import read_csv, read_table, DataFrame from pandas.core.common import AbstractMethodError +from pandas._libs.lib import Timestamp +from pandas.compat import StringIO from .common import ParserTests from .header import HeaderTests @@ -100,3 +102,51 @@ def read_table(self, *args, **kwds): kwds = kwds.copy() kwds['engine'] = self.engine return read_table(*args, **kwds) + + +class TestUnsortedUsecols(object): + def test_override__set_noconvert_columns(self): + # GH 17351 - usecols needs to be sorted in _setnoconvert_columns + # based on the test_usecols_with_parse_dates test from usecols.py + from pandas.io.parsers import CParserWrapper, TextFileReader + + s = """a,b,c,d,e + 0,1,20140101,0900,4 + 0,1,20140102,1000,4""" + + parse_dates = [[1, 2]] + cols = { + 'a': [0, 0], + 'c_d': [ + Timestamp('2014-01-01 09:00:00'), + Timestamp('2014-01-02 10:00:00') + ] + } + expected = DataFrame(cols, columns=['c_d', 'a']) + + class MyTextFileReader(TextFileReader): + def __init__(self): + self._currow = 0 + self.squeeze = False + + class MyCParserWrapper(CParserWrapper): + def _set_noconvert_columns(self): + if self.usecols_dtype == 'integer': + # self.usecols is a set, which is documented as unordered + # but in practice, a CPython set of integers is sorted. + # In other implementations this assumption does not hold. + # The following code simulates a different order, which + # before GH 17351 would cause the wrong columns to be + # converted via the parse_dates parameter + self.usecols = list(self.usecols) + self.usecols.reverse() + return CParserWrapper._set_noconvert_columns(self) + + parser = MyTextFileReader() + parser.options = {'usecols': [0, 2, 3], + 'parse_dates': parse_dates, + 'delimiter': ','} + parser._engine = MyCParserWrapper(StringIO(s), **parser.options) + df = parser.read() + + tm.assert_frame_equal(df, expected) From 1c01a2b195147c1bf527170d5ad984f3b07f91ec Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 7 Sep 2017 17:46:12 -0700 Subject: [PATCH 21/26] Replace * imports with explicit imports; remove unused declared constants (#17470) --- pandas/_libs/src/skiplist.pyx | 1 - pandas/_libs/window.pyx | 38 ++++++----------------------------- 2 files changed, 6 insertions(+), 33 deletions(-) diff --git a/pandas/_libs/src/skiplist.pyx b/pandas/_libs/src/skiplist.pyx index 559b529822a69..1524dca38d0e0 100644 --- a/pandas/_libs/src/skiplist.pyx +++ b/pandas/_libs/src/skiplist.pyx @@ -15,7 +15,6 @@ cdef double Log2(double x): return log(x) / log(2.) cimport numpy as np -from numpy cimport * import numpy as np from random import random diff --git a/pandas/_libs/window.pyx b/pandas/_libs/window.pyx index 9fb3d0662eb4f..b6bd6f92f6199 100644 --- a/pandas/_libs/window.pyx +++ b/pandas/_libs/window.pyx @@ -1,55 +1,29 @@ # cython: profile=False # cython: boundscheck=False, wraparound=False, cdivision=True -from numpy cimport * +from cython cimport Py_ssize_t + cimport numpy as np import numpy as np cimport cython -import_array() +np.import_array() cimport util from libc.stdlib cimport malloc, free -from numpy cimport NPY_INT8 as NPY_int8 -from numpy cimport NPY_INT16 as NPY_int16 -from numpy cimport NPY_INT32 as NPY_int32 -from numpy cimport NPY_INT64 as NPY_int64 -from numpy cimport NPY_FLOAT16 as NPY_float16 -from numpy cimport NPY_FLOAT32 as NPY_float32 -from numpy cimport NPY_FLOAT64 as NPY_float64 - -from numpy cimport (int8_t, int16_t, int32_t, int64_t, uint8_t, uint16_t, - uint32_t, uint64_t, float16_t, float32_t, float64_t) - -int8 = np.dtype(np.int8) -int16 = np.dtype(np.int16) -int32 = np.dtype(np.int32) -int64 = np.dtype(np.int64) -float16 = np.dtype(np.float16) -float32 = np.dtype(np.float32) -float64 = np.dtype(np.float64) - -cdef np.int8_t MINint8 = np.iinfo(np.int8).min -cdef np.int16_t MINint16 = np.iinfo(np.int16).min -cdef np.int32_t MINint32 = np.iinfo(np.int32).min -cdef np.int64_t MINint64 = np.iinfo(np.int64).min -cdef np.float16_t MINfloat16 = np.NINF + +from numpy cimport ndarray, double_t, int64_t, float64_t + cdef np.float32_t MINfloat32 = np.NINF cdef np.float64_t MINfloat64 = np.NINF -cdef np.int8_t MAXint8 = np.iinfo(np.int8).max -cdef np.int16_t MAXint16 = np.iinfo(np.int16).max -cdef np.int32_t MAXint32 = np.iinfo(np.int32).max -cdef np.int64_t MAXint64 = np.iinfo(np.int64).max -cdef np.float16_t MAXfloat16 = np.inf cdef np.float32_t MAXfloat32 = np.inf cdef np.float64_t MAXfloat64 = np.inf cdef double NaN = np.NaN -cdef double nan = NaN cdef inline int int_max(int a, int b): return a if a >= b else b cdef inline int int_min(int a, int b): return a if a <= b else b From e0e837aa5f3320ab1467bf8aecce5ea906eb8dbb Mon Sep 17 00:00:00 2001 From: Sam Foo Date: Thu, 7 Sep 2017 20:47:52 -0400 Subject: [PATCH 22/26] Removed Timedelta.is_populated and fixed spelling errors (#17469) --- doc/source/api.rst | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index d34cec86638fb..c32a541d19605 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1704,7 +1704,7 @@ Methods Timestamp.floor Timestamp.freq Timestamp.freqstr - Timestamp.from_ordinal + Timestamp.fromordinal Timestamp.fromtimestamp Timestamp.isocalendar Timestamp.isoformat @@ -1769,9 +1769,7 @@ Properties Timedelta.asm8 Timedelta.components Timedelta.days - Timedelta.delta Timedelta.freq - Timedelta.is_populated Timedelta.max Timedelta.microseconds Timedelta.min @@ -1789,10 +1787,9 @@ Methods Timedelta.floor Timedelta.isoformat Timedelta.round - Timdelta.to_pytimedelta + Timedelta.to_pytimedelta Timedelta.to_timedelta64 Timedelta.total_seconds - Timedelta.view Window ------ From 72da8589a6b6bc63a1c8e7a7916336c45a449a7f Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Thu, 7 Sep 2017 19:50:10 -0700 Subject: [PATCH 23/26] PERF: Implement get_freq_code in cython frequencies (#17422) --- asv_bench/benchmarks/period.py | 29 ++++ pandas/_libs/tslibs/frequencies.pyx | 201 ++++++++++++++++++++++++++++ pandas/tseries/frequencies.py | 79 +---------- setup.py | 4 + 4 files changed, 235 insertions(+), 78 deletions(-) create mode 100644 pandas/_libs/tslibs/frequencies.pyx diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py index f9837191a7bae..78d66295f28cc 100644 --- a/asv_bench/benchmarks/period.py +++ b/asv_bench/benchmarks/period.py @@ -2,6 +2,35 @@ from pandas import Series, Period, PeriodIndex, date_range +class PeriodProperties(object): + def setup(self): + self.per = Period('2012-06-01', freq='M') + + def time_year(self): + self.per.year + + def time_month(self): + self.per.month + + def time_quarter(self): + self.per.quarter + + def time_day(self): + self.per.day + + def time_hour(self): + self.per.hour + + def time_minute(self): + self.per.second + + def time_second(self): + self.per.second + + def time_leap_year(self): + self.per.is_leapyear + + class Constructor(object): goal_time = 0.2 diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx new file mode 100644 index 0000000000000..35429e8ae87f0 --- /dev/null +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -0,0 +1,201 @@ +# -*- coding: utf-8 -*- +# cython: profile=False +import re + +cimport cython + +import numpy as np +cimport numpy as np +np.import_array() + +from util cimport is_integer_object + + +cpdef get_freq_code(freqstr): + """ + Return freq str or tuple to freq code and stride (mult) + + Parameters + ---------- + freqstr : str or tuple + + Returns + ------- + return : tuple of base frequency code and stride (mult) + + Example + ------- + >>> get_freq_code('3D') + (6000, 3) + + >>> get_freq_code('D') + (6000, 1) + + >>> get_freq_code(('D', 3)) + (6000, 3) + """ + if getattr(freqstr, '_typ', None) == 'dateoffset': + freqstr = (freqstr.rule_code, freqstr.n) + + if isinstance(freqstr, tuple): + if (is_integer_object(freqstr[0]) and + is_integer_object(freqstr[1])): + # e.g., freqstr = (2000, 1) + return freqstr + else: + # e.g., freqstr = ('T', 5) + try: + code = _period_str_to_code(freqstr[0]) + stride = freqstr[1] + except: + if is_integer_object(freqstr[1]): + raise + code = _period_str_to_code(freqstr[1]) + stride = freqstr[0] + return code, stride + + if is_integer_object(freqstr): + return (freqstr, 1) + + base, stride = _base_and_stride(freqstr) + code = _period_str_to_code(base) + + return code, stride + + +# hack to handle WOM-1MON +opattern = re.compile( + r'([\-]?\d*|[\-]?\d*\.\d*)\s*([A-Za-z]+([\-][\dA-Za-z\-]+)?)' +) + + +cpdef _base_and_stride(freqstr): + """ + Return base freq and stride info from string representation + + Examples + -------- + _freq_and_stride('5Min') -> 'Min', 5 + """ + groups = opattern.match(freqstr) + + if not groups: + raise ValueError("Could not evaluate {freq}".format(freq=freqstr)) + + stride = groups.group(1) + + if len(stride): + stride = int(stride) + else: + stride = 1 + + base = groups.group(2) + + return (base, stride) + + +# --------------------------------------------------------------------- +# Period codes + +# period frequency constants corresponding to scikits timeseries +# originals +_period_code_map = { + # Annual freqs with various fiscal year ends. + # eg, 2005 for A-FEB runs Mar 1, 2004 to Feb 28, 2005 + "A-DEC": 1000, # Annual - December year end + "A-JAN": 1001, # Annual - January year end + "A-FEB": 1002, # Annual - February year end + "A-MAR": 1003, # Annual - March year end + "A-APR": 1004, # Annual - April year end + "A-MAY": 1005, # Annual - May year end + "A-JUN": 1006, # Annual - June year end + "A-JUL": 1007, # Annual - July year end + "A-AUG": 1008, # Annual - August year end + "A-SEP": 1009, # Annual - September year end + "A-OCT": 1010, # Annual - October year end + "A-NOV": 1011, # Annual - November year end + + # Quarterly frequencies with various fiscal year ends. + # eg, Q42005 for Q-OCT runs Aug 1, 2005 to Oct 31, 2005 + "Q-DEC": 2000, # Quarterly - December year end + "Q-JAN": 2001, # Quarterly - January year end + "Q-FEB": 2002, # Quarterly - February year end + "Q-MAR": 2003, # Quarterly - March year end + "Q-APR": 2004, # Quarterly - April year end + "Q-MAY": 2005, # Quarterly - May year end + "Q-JUN": 2006, # Quarterly - June year end + "Q-JUL": 2007, # Quarterly - July year end + "Q-AUG": 2008, # Quarterly - August year end + "Q-SEP": 2009, # Quarterly - September year end + "Q-OCT": 2010, # Quarterly - October year end + "Q-NOV": 2011, # Quarterly - November year end + + "M": 3000, # Monthly + + "W-SUN": 4000, # Weekly - Sunday end of week + "W-MON": 4001, # Weekly - Monday end of week + "W-TUE": 4002, # Weekly - Tuesday end of week + "W-WED": 4003, # Weekly - Wednesday end of week + "W-THU": 4004, # Weekly - Thursday end of week + "W-FRI": 4005, # Weekly - Friday end of week + "W-SAT": 4006, # Weekly - Saturday end of week + + "B": 5000, # Business days + "D": 6000, # Daily + "H": 7000, # Hourly + "T": 8000, # Minutely + "S": 9000, # Secondly + "L": 10000, # Millisecondly + "U": 11000, # Microsecondly + "N": 12000, # Nanosecondly +} + +# Yearly aliases; careful not to put these in _reverse_period_code_map +_period_code_map.update({'Y' + key[1:]: _period_code_map[key] + for key in _period_code_map + if key.startswith('A-')}) + +_period_code_map.update({ + "Q": 2000, # Quarterly - December year end (default quarterly) + "A": 1000, # Annual + "W": 4000, # Weekly + "C": 5000, # Custom Business Day + }) + +_dont_uppercase = set(('MS', 'ms')) + +_lite_rule_alias = { + 'W': 'W-SUN', + 'Q': 'Q-DEC', + + 'A': 'A-DEC', # YearEnd(month=12), + 'Y': 'A-DEC', + 'AS': 'AS-JAN', # YearBegin(month=1), + 'YS': 'AS-JAN', + 'BA': 'BA-DEC', # BYearEnd(month=12), + 'BY': 'BA-DEC', + 'BAS': 'BAS-JAN', # BYearBegin(month=1), + 'BYS': 'BAS-JAN', + + 'Min': 'T', + 'min': 'T', + 'ms': 'L', + 'us': 'U', + 'ns': 'N'} + +_INVALID_FREQ_ERROR = "Invalid frequency: {0}" + + +cpdef _period_str_to_code(freqstr): + freqstr = _lite_rule_alias.get(freqstr, freqstr) + + if freqstr not in _dont_uppercase: + lower = freqstr.lower() + freqstr = _lite_rule_alias.get(lower, freqstr) + + if freqstr not in _dont_uppercase: + freqstr = freqstr.upper() + try: + return _period_code_map[freqstr] + except KeyError: + raise ValueError(_INVALID_FREQ_ERROR.format(freqstr)) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 7f34bcaf52926..6644a33245a84 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -8,7 +8,6 @@ from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.common import ( - is_integer, is_period_arraylike, is_timedelta64_dtype, is_datetime64_dtype) @@ -21,6 +20,7 @@ from pandas._libs import lib, tslib from pandas._libs.tslib import Timedelta +from pandas._libs.tslibs.frequencies import get_freq_code, _base_and_stride from pytz import AmbiguousTimeError @@ -298,58 +298,6 @@ def get_freq(freq): return freq -def get_freq_code(freqstr): - """ - Return freq str or tuple to freq code and stride (mult) - - Parameters - ---------- - freqstr : str or tuple - - Returns - ------- - return : tuple of base frequency code and stride (mult) - - Example - ------- - >>> get_freq_code('3D') - (6000, 3) - - >>> get_freq_code('D') - (6000, 1) - - >>> get_freq_code(('D', 3)) - (6000, 3) - """ - if isinstance(freqstr, DateOffset): - freqstr = (freqstr.rule_code, freqstr.n) - - if isinstance(freqstr, tuple): - if (is_integer(freqstr[0]) and - is_integer(freqstr[1])): - # e.g., freqstr = (2000, 1) - return freqstr - else: - # e.g., freqstr = ('T', 5) - try: - code = _period_str_to_code(freqstr[0]) - stride = freqstr[1] - except: - if is_integer(freqstr[1]): - raise - code = _period_str_to_code(freqstr[1]) - stride = freqstr[0] - return code, stride - - if is_integer(freqstr): - return (freqstr, 1) - - base, stride = _base_and_stride(freqstr) - code = _period_str_to_code(base) - - return code, stride - - def _get_freq_str(base, mult=1): code = _reverse_period_code_map.get(base) if mult == 1: @@ -577,31 +525,6 @@ def to_offset(freq): ) -def _base_and_stride(freqstr): - """ - Return base freq and stride info from string representation - - Examples - -------- - _freq_and_stride('5Min') -> 'Min', 5 - """ - groups = opattern.match(freqstr) - - if not groups: - raise ValueError("Could not evaluate {freq}".format(freq=freqstr)) - - stride = groups.group(1) - - if len(stride): - stride = int(stride) - else: - stride = 1 - - base = groups.group(2) - - return (base, stride) - - def get_base_alias(freqstr): """ Returns the base frequency alias, e.g., '5D' -> 'D' diff --git a/setup.py b/setup.py index 404e12b89ab75..e18aa62226e75 100755 --- a/setup.py +++ b/setup.py @@ -342,6 +342,7 @@ class CheckSDist(sdist_class): 'pandas/_libs/sparse.pyx', 'pandas/_libs/parsers.pyx', 'pandas/_libs/tslibs/timezones.pyx', + 'pandas/_libs/tslibs/frequencies.pyx', 'pandas/io/sas/sas.pyx'] def initialize_options(self): @@ -496,6 +497,8 @@ def pxd(name): 'sources': ['pandas/_libs/src/datetime/np_datetime.c', 'pandas/_libs/src/datetime/np_datetime_strings.c', 'pandas/_libs/src/period_helper.c']}, + '_libs.tslibs.frequencies': {'pyxfile': '_libs/tslibs/frequencies', + 'pxdfiles': ['_libs/src/util']}, '_libs.index': {'pyxfile': '_libs/index', 'sources': ['pandas/_libs/src/datetime/np_datetime.c', 'pandas/_libs/src/datetime/np_datetime_strings.c'], @@ -658,6 +661,7 @@ def pxd(name): 'pandas.io.formats', 'pandas.io.clipboard', 'pandas._libs', + 'pandas._libs.tslibs', 'pandas.plotting', 'pandas.stats', 'pandas.types', From 87482c2e82f14a226221a8580eaf89ef97e9b833 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 10 Sep 2017 09:16:32 -0700 Subject: [PATCH 24/26] whitespace cleanup --- pandas/_libs/tslibs/frequencies.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/tslibs/frequencies.pyx b/pandas/_libs/tslibs/frequencies.pyx index a86e25184432c..f7889d76abbc7 100644 --- a/pandas/_libs/tslibs/frequencies.pyx +++ b/pandas/_libs/tslibs/frequencies.pyx @@ -150,7 +150,6 @@ _period_code_map = { "N": 12000, # Nanosecondly } - _reverse_period_code_map = { _period_code_map[key]: key for key in _period_code_map} From 7985ee24d4da39b9778dbc210f2a7700404330c4 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 10 Sep 2017 09:17:45 -0700 Subject: [PATCH 25/26] Remove extraneous depends from setup --- setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 3816b43f98cbf..2f285cf2bfbbc 100755 --- a/setup.py +++ b/setup.py @@ -482,8 +482,7 @@ def pxd(name): 'pandas/_libs/src/period_helper.c']}, '_libs.tslibs.timezones': {'pyxfile': '_libs/tslibs/timezones'}, '_libs.period': {'pyxfile': '_libs/period', - 'depends': tseries_depends + [ - 'pandas/_libs/tslibs/timezones'], + 'depends': tseries_depends, 'sources': ['pandas/_libs/src/datetime/np_datetime.c', 'pandas/_libs/src/datetime/np_datetime_strings.c', 'pandas/_libs/src/period_helper.c']}, From fb55a8c67e19868820f55f0e1d956419ef0ef101 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 10 Sep 2017 17:30:08 -0700 Subject: [PATCH 26/26] Remove accidentally re-introduced dependency --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 2f285cf2bfbbc..434ca64473916 100755 --- a/setup.py +++ b/setup.py @@ -475,7 +475,7 @@ def pxd(name): 'depends': (['pandas/_libs/src/klib/khash_python.h'] + _pxi_dep['hashtable'])}, '_libs.tslib': {'pyxfile': '_libs/tslib', - 'pxdfiles': ['_libs/src/util', '_libs/lib'], + 'pxdfiles': ['_libs/src/util'], 'depends': tseries_depends, 'sources': ['pandas/_libs/src/datetime/np_datetime.c', 'pandas/_libs/src/datetime/np_datetime_strings.c',