diff --git a/doc/source/api.rst b/doc/source/api.rst index 4ffeb5035912f..28d4567027572 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -218,7 +218,6 @@ Top-level dealing with datetimelike to_timedelta date_range bdate_range - cdate_range period_range timedelta_range infer_freq diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 7399deb1319d8..d2d5ee344591a 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -76,21 +76,21 @@ Overview Following table shows the type of time-related classes pandas can handle and how to create them. -================= =============================== ================================================== +================= =============================== =================================================================== Class Remarks How to create -================= =============================== ================================================== -``Timestamp`` Represents a single time stamp ``to_datetime``, ``Timestamp`` -``DatetimeIndex`` Index of ``Timestamp`` ``to_datetime``, ``date_range``, ``DatetimeIndex`` +================= =============================== =================================================================== +``Timestamp`` Represents a single timestamp ``to_datetime``, ``Timestamp`` +``DatetimeIndex`` Index of ``Timestamp`` ``to_datetime``, ``date_range``, ``bdate_range``, ``DatetimeIndex`` ``Period`` Represents a single time span ``Period`` ``PeriodIndex`` Index of ``Period`` ``period_range``, ``PeriodIndex`` -================= =============================== ================================================== +================= =============================== =================================================================== .. _timeseries.representation: -Time Stamps vs. Time Spans --------------------------- +Timestamps vs. Time Spans +------------------------- -Time-stamped data is the most basic type of timeseries data that associates +Timestamped data is the most basic type of time series data that associates values with points in time. For pandas objects it means using the points in time. @@ -149,10 +149,10 @@ future releases. Converting to Timestamps ------------------------ -To convert a Series or list-like object of date-like objects e.g. strings, +To convert a ``Series`` or list-like object of date-like objects e.g. strings, epochs, or a mixture, you can use the ``to_datetime`` function. When passed -a Series, this returns a Series (with the same index), while a list-like -is converted to a DatetimeIndex: +a ``Series``, this returns a ``Series`` (with the same index), while a list-like +is converted to a ``DatetimeIndex``: .. ipython:: python @@ -175,11 +175,9 @@ you can pass the ``dayfirst`` flag: can't be parsed with the day being first it will be parsed as if ``dayfirst`` were False. -If you pass a single string to ``to_datetime``, it returns single ``Timestamp``. - -Also, ``Timestamp`` can accept the string input. -Note that ``Timestamp`` doesn't accept string parsing option like ``dayfirst`` -or ``format``, use ``to_datetime`` if these are required. +If you pass a single string to ``to_datetime``, it returns a single ``Timestamp``. +``Timestamp`` can also accept string input, but it doesn't accept string parsing +options like ``dayfirst`` or ``format``, so use ``to_datetime`` if these are required. .. ipython:: python @@ -191,9 +189,7 @@ Providing a Format Argument ~~~~~~~~~~~~~~~~~~~~~~~~~~~ In addition to the required datetime string, a ``format`` argument can be passed to ensure specific parsing. -It will potentially speed up the conversion considerably. - -For example: +This could also potentially speed up the conversion considerably. .. ipython:: python @@ -203,7 +199,7 @@ For example: For more information on how to specify the ``format`` options, see https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior. -Assembling datetime from multiple DataFrame columns +Assembling Datetime from Multiple DataFrame Columns ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. versionadded:: 0.18.1 @@ -238,28 +234,24 @@ Invalid Data In version 0.17.0, the default for ``to_datetime`` is now ``errors='raise'``, rather than ``errors='ignore'``. This means that invalid parsing will raise rather that return the original input as in previous versions. -Pass ``errors='coerce'`` to convert invalid data to ``NaT`` (not a time): - -Raise when unparseable, this is the default +The default behavior, ``errors='raise'``, is to raise when unparseable: .. code-block:: ipython In [2]: pd.to_datetime(['2009/07/31', 'asd'], errors='raise') ValueError: Unknown string format -Return the original input when unparseable +Pass ``errors='ignore'`` to return the original input when unparseable: -.. code-block:: ipython +.. ipython:: python - In [4]: pd.to_datetime(['2009/07/31', 'asd'], errors='ignore') - Out[4]: array(['2009/07/31', 'asd'], dtype=object) + pd.to_datetime(['2009/07/31', 'asd'], errors='ignore') -Return NaT for input when unparseable +Pass ``errors='coerce'`` to convert unparseable data to ``NaT`` (not a time): -.. code-block:: ipython +.. ipython:: python - In [6]: pd.to_datetime(['2009/07/31', 'asd'], errors='coerce') - Out[6]: DatetimeIndex(['2009-07-31', 'NaT'], dtype='datetime64[ns]', freq=None) + pd.to_datetime(['2009/07/31', 'asd'], errors='coerce') .. _timeseries.converting.epoch: @@ -267,12 +259,11 @@ Return NaT for input when unparseable Epoch Timestamps ~~~~~~~~~~~~~~~~ -It's also possible to convert integer or float epoch times. The default unit -for these is nanoseconds (since these are how ``Timestamp`` s are stored). However, -often epochs are stored in another ``unit`` which can be specified. These are computed -from the starting point specified by the :ref:`Origin Parameter `. - -Typical epoch stored units +pandas supports converting integer or float epoch times to ``Timestamp`` and +``DatetimeIndex``. The default unit is nanoseconds, since that is how ``Timestamp`` +objects are stored internally. However, epochs are often stored in another ``unit`` +which can be specified. These are computed from the starting point specified by the +``origin`` parameter. .. ipython:: python @@ -299,6 +290,10 @@ Typical epoch stored units pd.to_datetime([1490195805.433, 1490195805.433502912], unit='s') pd.to_datetime(1490195805433502912, unit='ns') +.. seealso:: + + :ref:`timeseries.origin` + .. _timeseries.converting.epoch_inverse: From Timestamps to Epoch @@ -319,15 +314,13 @@ We convert the ``DatetimeIndex`` to an ``int64`` array, then divide by the conve .. _timeseries.origin: -Using the Origin Parameter -~~~~~~~~~~~~~~~~~~~~~~~~~~ +Using the ``origin`` Parameter +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. versionadded:: 0.20.0 Using the ``origin`` parameter, one can specify an alternative starting point for creation -of a ``DatetimeIndex``. - -Start with 1960-01-01 as the starting date +of a ``DatetimeIndex``. For example, to use 1960-01-01 as the starting date: .. ipython:: python @@ -345,8 +338,8 @@ Commonly called 'unix epoch' or POSIX time. Generating Ranges of Timestamps ------------------------------- -To generate an index with time stamps, you can use either the DatetimeIndex or -Index constructor and pass in a list of datetime objects: +To generate an index with timestamps, you can use either the ``DatetimeIndex`` or +``Index`` constructor and pass in a list of datetime objects: .. ipython:: python @@ -360,37 +353,36 @@ Index constructor and pass in a list of datetime objects: index = pd.Index(dates) index -Practically, this becomes very cumbersome because we often need a very long +In practice this becomes very cumbersome because we often need a very long index with a large number of timestamps. If we need timestamps on a regular -frequency, we can use the pandas functions ``date_range`` and ``bdate_range`` -to create timestamp indexes. +frequency, we can use the :func:`date_range` and :func:`bdate_range` functions +to create a ``DatetimeIndex``. The default frequency for ``date_range`` is a +**calendar day** while the default for ``bdate_range`` is a **business day**: .. ipython:: python - index = pd.date_range('2000-1-1', periods=1000, freq='M') + start = datetime(2011, 1, 1) + end = datetime(2012, 1, 1) + + index = pd.date_range(start, end) index - index = pd.bdate_range('2012-1-1', periods=250) + index = pd.bdate_range(start, end) index -Convenience functions like ``date_range`` and ``bdate_range`` utilize a -variety of frequency aliases. The default frequency for ``date_range`` is a -**calendar day** while the default for ``bdate_range`` is a **business day** +Convenience functions like ``date_range`` and ``bdate_range`` can utilize a +variety of :ref:`frequency aliases `: .. ipython:: python - start = datetime(2011, 1, 1) - end = datetime(2012, 1, 1) - - rng = pd.date_range(start, end) - rng + pd.date_range(start, periods=1000, freq='M') - rng = pd.bdate_range(start, end) - rng + pd.bdate_range(start, periods=250, freq='BQS') ``date_range`` and ``bdate_range`` make it easy to generate a range of dates -using various combinations of parameters like ``start``, ``end``, -``periods``, and ``freq``: +using various combinations of parameters like ``start``, ``end``, ``periods``, +and ``freq``. The start and end dates are strictly inclusive, so dates outside +of those specified will not be generated: .. ipython:: python @@ -402,15 +394,45 @@ using various combinations of parameters like ``start``, ``end``, pd.bdate_range(start=start, periods=20) -The start and end dates are strictly inclusive. So it will not generate any -dates outside of those dates if specified. +.. _timeseries.custom-freq-ranges: + +Custom Frequency Ranges +~~~~~~~~~~~~~~~~~~~~~~~ + +.. warning:: + + This functionality was originally exclusive to ``cdate_range``, which is + deprecated as of version 0.21.0 in favor of ``bdate_range``. Note that + ``cdate_range`` only utilizes the ``weekmask`` and ``holidays`` parameters + when custom business day, 'C', is passed as the frequency string. Support has + been expanded with ``bdate_range`` to work with any custom frequency string. + +.. versionadded:: 0.21.0 + +``bdate_range`` can also generate a range of custom frequency dates by using +the ``weekmask`` and ``holidays`` parameters. These parameters will only be +used if a custom frequency string is passed. + +.. ipython:: python + + weekmask = 'Mon Wed Fri' + + holidays = [datetime(2011, 1, 5), datetime(2011, 3, 14)] + + pd.bdate_range(start, end, freq='C', weekmask=weekmask, holidays=holidays) + + pd.bdate_range(start, end, freq='CBMS', weekmask=weekmask) + +.. seealso:: + + :ref:`timeseries.custombusinessdays` .. _timeseries.timestamp-limits: -Timestamp limitations +Timestamp Limitations --------------------- -Since pandas represents timestamps in nanosecond resolution, the timespan that +Since pandas represents timestamps in nanosecond resolution, the time span that can be represented using a 64-bit integer is limited to approximately 584 years: .. ipython:: python @@ -418,7 +440,9 @@ can be represented using a 64-bit integer is limited to approximately 584 years: pd.Timestamp.min pd.Timestamp.max -See :ref:`here ` for ways to represent data outside these bound. +.. seealso:: + + :ref:`timeseries.oob` .. _timeseries.datetimeindex: @@ -426,20 +450,20 @@ Indexing -------- One of the main uses for ``DatetimeIndex`` is as an index for pandas objects. -The ``DatetimeIndex`` class contains many timeseries related optimizations: +The ``DatetimeIndex`` class contains many time series related optimizations: - A large range of dates for various offsets are pre-computed and cached under the hood in order to make generating subsequent date ranges very fast (just have to grab a slice) - Fast shifting using the ``shift`` and ``tshift`` method on pandas objects - - Unioning of overlapping DatetimeIndex objects with the same frequency is + - Unioning of overlapping ``DatetimeIndex`` objects with the same frequency is very fast (important for fast data alignment) - Quick access to date fields via properties such as ``year``, ``month``, etc. - Regularization functions like ``snap`` and very fast ``asof`` logic -DatetimeIndex objects has all the basic functionality of regular Index objects -and a smorgasbord of advanced timeseries-specific methods for easy frequency -processing. +``DatetimeIndex`` objects have all the basic functionality of regular ``Index`` +objects, and a smorgasbord of advanced time series specific methods for easy +frequency processing. .. seealso:: :ref:`Reindexing methods ` @@ -447,8 +471,7 @@ processing. .. note:: While pandas does not force you to have a sorted date index, some of these - methods may have unexpected or incorrect behavior if the dates are - unsorted. So please be careful. + methods may have unexpected or incorrect behavior if the dates are unsorted. ``DatetimeIndex`` can be used like a regular index and offers all of its intelligent functionality like selection, slicing, etc. @@ -466,7 +489,7 @@ intelligent functionality like selection, slicing, etc. Partial String Indexing ~~~~~~~~~~~~~~~~~~~~~~~ -You can pass in dates and strings that parse to dates as indexing parameters: +Dates and strings that parse to timestamps can be passed as indexing parameters: .. ipython:: python @@ -485,9 +508,9 @@ the year or year and month as strings: ts['2011-6'] -This type of slicing will work on a DataFrame with a ``DateTimeIndex`` as well. Since the +This type of slicing will work on a ``DataFrame`` with a ``DatetimeIndex`` as well. Since the partial string selection is a form of label slicing, the endpoints **will be** included. This -would include matching times on an included date. Here's an example: +would include matching times on an included date: .. ipython:: python @@ -523,7 +546,7 @@ We are stopping on the included end-point as it is part of the index .. versionadded:: 0.18.0 -DatetimeIndex Partial String Indexing also works on DataFrames with a ``MultiIndex``. For example: +``DatetimeIndex`` partial string indexing also works on a ``DataFrame`` with a ``MultiIndex``: .. ipython:: python @@ -541,14 +564,14 @@ DatetimeIndex Partial String Indexing also works on DataFrames with a ``MultiInd .. _timeseries.slice_vs_exact_match: -Slice vs. exact match +Slice vs. Exact Match ~~~~~~~~~~~~~~~~~~~~~ .. versionchanged:: 0.20.0 -The same string used as an indexing parameter can be treated either as a slice or as an exact match depending on the resolution of an index. If the string is less accurate than the index, it will be treated as a slice, otherwise as an exact match. +The same string used as an indexing parameter can be treated either as a slice or as an exact match depending on the resolution of the index. If the string is less accurate than the index, it will be treated as a slice, otherwise as an exact match. -For example, let us consider ``Series`` object which index has minute resolution. +Consider a ``Series`` object with a minute resolution index: .. ipython:: python @@ -593,7 +616,7 @@ If the timestamp string is treated as a slice, it can be used to index ``DataFra .. warning:: - However if the string is treated as an exact match, the selection in ``DataFrame``'s ``[]`` will be column-wise and not row-wise, see :ref:`Indexing Basics `. For example ``dft_minute['2011-12-31 23:59']`` will raise ``KeyError`` as ``'2012-12-31 23:59'`` has the same resolution as index and there is no column with such name: + However, if the string is treated as an exact match, the selection in ``DataFrame``'s ``[]`` will be column-wise and not row-wise, see :ref:`Indexing Basics `. For example ``dft_minute['2011-12-31 23:59']`` will raise ``KeyError`` as ``'2012-12-31 23:59'`` has the same resolution as the index and there is no column with such name: To *always* have unambiguous selection, whether the row is treated as a slice or a single selection, use ``.loc``. @@ -616,7 +639,7 @@ Note also that ``DatetimeIndex`` resolution cannot be less precise than day. Exact Indexing ~~~~~~~~~~~~~~ -As discussed in previous section, indexing a ``DateTimeIndex`` with a partial string depends on the "accuracy" of the period, in other words how specific the interval is in relation to the resolution of the index. In contrast, indexing with ``Timestamp`` or ``datetime`` objects is exact, because the objects have exact meaning. These also follow the semantics of *including both endpoints*. +As discussed in previous section, indexing a ``DatetimeIndex`` with a partial string depends on the "accuracy" of the period, in other words how specific the interval is in relation to the resolution of the index. In contrast, indexing with ``Timestamp`` or ``datetime`` objects is exact, because the objects have exact meaning. These also follow the semantics of *including both endpoints*. These ``Timestamp`` and ``datetime`` objects have exact ``hours, minutes,`` and ``seconds``, even though they were not explicitly specified (they are ``0``). @@ -640,8 +663,8 @@ A ``truncate`` convenience function is provided that is equivalent to slicing: ts.truncate(before='10/31/2011', after='12/31/2011') -Even complicated fancy indexing that breaks the DatetimeIndex's frequency -regularity will result in a ``DatetimeIndex`` (but frequency is lost): +Even complicated fancy indexing that breaks the ``DatetimeIndex`` frequency +regularity will result in a ``DatetimeIndex``, although frequency is lost: .. ipython:: python @@ -652,7 +675,7 @@ regularity will result in a ``DatetimeIndex`` (but frequency is lost): Time/Date Components -------------------- -There are several time/date properties that one can access from ``Timestamp`` or a collection of timestamps like a ``DateTimeIndex``. +There are several time/date properties that one can access from ``Timestamp`` or a collection of timestamps like a ``DatetimeIndex``. .. csv-table:: :header: "Property", "Description" @@ -688,10 +711,10 @@ Furthermore, if you have a ``Series`` with datetimelike values, then you can acc .. _timeseries.offsets: -DateOffset objects +DateOffset Objects ------------------ -In the preceding examples, we created DatetimeIndex objects at various +In the preceding examples, we created ``DatetimeIndex`` objects at various frequencies by passing in :ref:`frequency strings ` like 'M', 'W', and 'BM to the ``freq`` keyword. Under the hood, these frequency strings are being translated into an instance of pandas ``DateOffset``, @@ -704,7 +727,7 @@ which represents a regular frequency increment. Specific offset logic like DateOffset, "Generic offset class, defaults to 1 calendar day" BDay, "business day (weekday)" - CDay, "custom business day (experimental)" + CDay, "custom business day" Week, "one week, optionally anchored on a day of the week" WeekOfMonth, "the x-th day of the y-th week of each month" LastWeekOfMonth, "the x-th day of the last week of each month" @@ -805,7 +828,7 @@ These operations (``apply``, ``rollforward`` and ``rollback``) preserves time (h hour.apply(pd.Timestamp('2014-01-01 23:00')) -Parametric offsets +Parametric Offsets ~~~~~~~~~~~~~~~~~~ Some of the offsets can be "parameterized" when created to result in different @@ -840,7 +863,7 @@ Another example is parameterizing ``YearEnd`` with the specific ending month: .. _timeseries.offsetseries: -Using offsets with ``Series`` / ``DatetimeIndex`` +Using Offsets with ``Series`` / ``DatetimeIndex`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Offsets can be used with either a ``Series`` or ``DatetimeIndex`` to @@ -1091,7 +1114,7 @@ frequencies. We will refer to these aliases as *offset aliases*. :widths: 15, 100 "B", "business day frequency" - "C", "custom business day frequency (experimental)" + "C", "custom business day frequency" "D", "calendar day frequency" "W", "weekly frequency" "M", "month end frequency" @@ -1326,10 +1349,10 @@ or calendars with additional rules. .. _timeseries.advanced_datetime: -Time series-related instance methods +Time Series-Related Instance Methods ------------------------------------ -Shifting / lagging +Shifting / Lagging ~~~~~~~~~~~~~~~~~~ One may want to *shift* or *lag* the values in a time series back and forward in @@ -1360,7 +1383,7 @@ all the dates in the index by a specified number of offsets: Note that with ``tshift``, the leading entry is no longer NaN because the data is not being realigned. -Frequency conversion +Frequency Conversion ~~~~~~~~~~~~~~~~~~~~ The primary function for changing frequencies is the ``asfreq`` function. @@ -1381,13 +1404,13 @@ method for any gaps that may appear after the frequency conversion ts.asfreq(BDay(), method='pad') -Filling forward / backward +Filling Forward / Backward ~~~~~~~~~~~~~~~~~~~~~~~~~~ Related to ``asfreq`` and ``reindex`` is the ``fillna`` function documented in the :ref:`missing data section `. -Converting to Python datetimes +Converting to Python Datetimes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``DatetimeIndex`` can be converted to an array of Python native datetime.datetime objects using the @@ -1471,10 +1494,10 @@ labels. ts.resample('5Min', label='left', loffset='1s').mean() The ``axis`` parameter can be set to 0 or 1 and allows you to resample the -specified axis for a DataFrame. +specified axis for a ``DataFrame``. ``kind`` can be set to 'timestamp' or 'period' to convert the resulting index -to/from time-stamp and time-span representations. By default ``resample`` +to/from timestamp and time span representations. By default ``resample`` retains the input representation. ``convention`` can be set to 'start' or 'end' when resampling period data @@ -1482,8 +1505,8 @@ retains the input representation. frequency periods. -Up Sampling -~~~~~~~~~~~ +Upsampling +~~~~~~~~~~ For upsampling, you can specify a way to upsample and the ``limit`` parameter to interpolate over the gaps that are created: @@ -1559,13 +1582,13 @@ We can select a specific column or columns using standard getitem. r[['A','B']].mean() -You can pass a list or dict of functions to do aggregation with, outputting a DataFrame: +You can pass a list or dict of functions to do aggregation with, outputting a ``DataFrame``: .. ipython:: python r['A'].agg([np.sum, np.mean, np.std]) -On a resampled DataFrame, you can pass a list of functions to apply to each +On a resampled ``DataFrame``, you can pass a list of functions to apply to each column, which produces an aggregated result with a hierarchical index: .. ipython:: python @@ -1573,7 +1596,7 @@ column, which produces an aggregated result with a hierarchical index: r.agg([np.sum, np.mean]) By passing a dict to ``aggregate`` you can apply a different aggregation to the -columns of a DataFrame: +columns of a ``DataFrame``: .. ipython:: python :okexcept: @@ -1890,7 +1913,7 @@ frequencies ``Q-JAN`` through ``Q-DEC``. .. _timeseries.interchange: -Converting between Representations +Converting Between Representations ---------------------------------- Timestamped data can be converted to PeriodIndex-ed data using ``to_period`` @@ -1934,7 +1957,7 @@ the quarter end: .. _timeseries.oob: -Representing out-of-bounds spans +Representing Out-of-Bounds Spans -------------------------------- If you have data that is outside of the ``Timestamp`` bounds, see :ref:`Timestamp limitations `, @@ -2031,7 +2054,7 @@ which gives you more control over which time zone is used: rng_dateutil.tz == tz_dateutil Timestamps, like Python's ``datetime.datetime`` object can be either time zone -naive or time zone aware. Naive time series and DatetimeIndex objects can be +naive or time zone aware. Naive time series and ``DatetimeIndex`` objects can be *localized* using ``tz_localize``: .. ipython:: python @@ -2099,8 +2122,8 @@ Localization of ``Timestamp`` functions just like ``DatetimeIndex`` and ``Series rng[5].tz_localize('Asia/Shanghai') -Operations between Series in different time zones will yield UTC -Series, aligning the data on the UTC timestamps: +Operations between ``Series`` in different time zones will yield UTC +``Series``, aligning the data on the UTC timestamps: .. ipython:: python @@ -2180,7 +2203,7 @@ constructor as well as ``tz_localize``. .. _timeseries.timezone_series: -TZ aware Dtypes +TZ Aware Dtypes ~~~~~~~~~~~~~~~ .. versionadded:: 0.17.0 diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index c8a0a6bff5cc7..d69a5c22acc03 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -488,7 +488,7 @@ Additionally, DataFrames with datetime columns that were parsed by :func:`read_s Consistency of Range Functions ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -In previous versions, there were some inconsistencies between the various range functions: :func:`date_range`, :func:`bdate_range`, :func:`cdate_range`, :func:`period_range`, :func:`timedelta_range`, and :func:`interval_range`. (:issue:`17471`). +In previous versions, there were some inconsistencies between the various range functions: :func:`date_range`, :func:`bdate_range`, :func:`period_range`, :func:`timedelta_range`, and :func:`interval_range`. (:issue:`17471`). One of the inconsistent behaviors occurred when the ``start``, ``end`` and ``period`` parameters were all specified, potentially leading to ambiguous ranges. When all three parameters were passed, ``interval_range`` ignored the ``period`` parameter, ``period_range`` ignored the ``end`` parameter, and the other range functions raised. To promote consistency among the range functions, and avoid potentially ambiguous ranges, ``interval_range`` and ``period_range`` will now raise when all three parameters are passed. @@ -571,8 +571,9 @@ Deprecations - :func:`SeriesGroupBy.nth` has deprecated ``True`` in favor of ``'all'`` for its kwarg ``dropna`` (:issue:`11038`). - :func:`DataFrame.as_blocks` is deprecated, as this is exposing the internal implementation (:issue:`17302`) - ``pd.TimeGrouper`` is deprecated in favor of :class:`pandas.Grouper` (:issue:`16747`) +- ``cdate_range`` has been deprecated in favor of :func:`bdate_range`, which has gained ``weekmask`` and ``holidays`` parameters for building custom frequency date ranges. See the :ref:`documentation ` for more details (:issue:`17596`) -.. _whatsnew_0210.deprecations.argmin_min +.. _whatsnew_0210.deprecations.argmin_min: Series.argmax and Series.argmin ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -738,9 +739,9 @@ Numeric Categorical ^^^^^^^^^^^ -- Bug in :func:`Series.isin` when called with a categorical (:issue`16639`) +- Bug in :func:`Series.isin` when called with a categorical (:issue:`16639`) - Bug in the categorical constructor with empty values and categories causing the ``.categories`` to be an empty ``Float64Index`` rather than an empty ``Index`` with object dtype (:issue:`17248`) -- Bug in categorical operations with :ref:`Series.cat ' not preserving the original Series' name (:issue:`17509`) +- Bug in categorical operations with :ref:`Series.cat ` not preserving the original Series' name (:issue:`17509`) PyPy ^^^^ diff --git a/pandas/core/api.py b/pandas/core/api.py index a012ccce83965..2f818a400162b 100644 --- a/pandas/core/api.py +++ b/pandas/core/api.py @@ -16,8 +16,7 @@ PeriodIndex, NaT) from pandas.core.indexes.period import Period, period_range, pnow from pandas.core.indexes.timedeltas import Timedelta, timedelta_range -from pandas.core.indexes.datetimes import (Timestamp, date_range, bdate_range, - cdate_range) +from pandas.core.indexes.datetimes import Timestamp, date_range, bdate_range from pandas.core.indexes.interval import Interval, interval_range from pandas.core.series import Series diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 9127864eab8a1..1419da3fa8861 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -17,6 +17,7 @@ is_period_dtype, is_bool_dtype, is_string_dtype, + is_string_like, is_list_like, is_scalar, pandas_dtype, @@ -37,7 +38,8 @@ Resolution) from pandas.core.indexes.datetimelike import ( DatelikeOps, TimelikeOps, DatetimeIndexOpsMixin) -from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay +from pandas.tseries.offsets import ( + DateOffset, generate_range, Tick, CDay, prefix_mapping) from pandas.core.tools.datetimes import ( parse_time_string, normalize_date, to_time) from pandas.core.tools.timedeltas import to_timedelta @@ -2049,7 +2051,8 @@ def date_range(start=None, end=None, periods=None, freq='D', tz=None, def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, - normalize=True, name=None, closed=None, **kwargs): + normalize=True, name=None, weekmask=None, holidays=None, + closed=None, **kwargs): """ Return a fixed frequency DatetimeIndex, with business day as the default frequency @@ -2071,6 +2074,20 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, Normalize start/end dates to midnight before generating date range name : string, default None Name of the resulting DatetimeIndex + weekmask : string or None, default None + Weekmask of valid business days, passed to ``numpy.busdaycalendar``, + only used when custom frequency strings are passed. The default + value None is equivalent to 'Mon Tue Wed Thu Fri' + + .. versionadded:: 0.21.0 + + holidays : list-like or None, default None + Dates to exclude from the set of valid business days, passed to + ``numpy.busdaycalendar``, only used when custom frequency strings + are passed + + .. versionadded:: 0.21.0 + closed : string, default None Make the interval closed with respect to the given frequency to the 'left', 'right', or both sides (None) @@ -2088,6 +2105,18 @@ def bdate_range(start=None, end=None, periods=None, freq='B', tz=None, rng : DatetimeIndex """ + if is_string_like(freq) and freq.startswith('C'): + try: + weekmask = weekmask or 'Mon Tue Wed Thu Fri' + freq = prefix_mapping[freq](holidays=holidays, weekmask=weekmask) + except (KeyError, TypeError): + msg = 'invalid custom frequency string: {freq}'.format(freq=freq) + raise ValueError(msg) + elif holidays or weekmask: + msg = ('a custom frequency string is required when holidays or ' + 'weekmask are passed, got frequency {freq}').format(freq=freq) + raise ValueError(msg) + return DatetimeIndex(start=start, end=end, periods=periods, freq=freq, tz=tz, normalize=normalize, name=name, closed=closed, **kwargs) @@ -2099,6 +2128,8 @@ def cdate_range(start=None, end=None, periods=None, freq='C', tz=None, Return a fixed frequency DatetimeIndex, with CustomBusinessDay as the default frequency + .. deprecated:: 0.21.0 + Parameters ---------- start : string or datetime-like, default None @@ -2137,6 +2168,9 @@ def cdate_range(start=None, end=None, periods=None, freq='C', tz=None, ------- rng : DatetimeIndex """ + warnings.warn("cdate_range is deprecated and will be removed in a future " + "version, instead use pd.bdate_range(..., freq='{freq}')" + .format(freq=freq), FutureWarning, stacklevel=2) if freq == 'C': holidays = kwargs.pop('holidays', []) diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index c593290410b96..fad455d6391c3 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -63,7 +63,7 @@ class TestPDApi(Base): # top-level functions funcs = ['bdate_range', 'concat', 'crosstab', 'cut', 'date_range', 'interval_range', 'eval', - 'factorize', 'get_dummies', 'cdate_range', + 'factorize', 'get_dummies', 'infer_freq', 'isna', 'isnull', 'lreshape', 'melt', 'notna', 'notnull', 'offsets', 'merge', 'merge_ordered', 'merge_asof', @@ -240,3 +240,13 @@ def test_deprecation_access_func(self): [c1, c2], sort_categories=True, ignore_order=True) + + +class TestCDateRange(object): + + def test_deprecation_cdaterange(self): + # GH17596 + from pandas.core.indexes.datetimes import cdate_range + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + cdate_range('2017-01-01', '2017-12-31') diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index c373942cb4c63..3b40ef092f364 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -1,6 +1,5 @@ """ -test date_range, bdate_range, cdate_range -construction from the convenience range functions +test date_range, bdate_range construction from the convenience range functions """ import pytest @@ -12,10 +11,9 @@ import pandas as pd import pandas.util.testing as tm from pandas import compat -from pandas.core.indexes.datetimes import bdate_range, cdate_range -from pandas import date_range, offsets, DatetimeIndex, Timestamp -from pandas.tseries.offsets import (generate_range, CDay, BDay, - DateOffset, MonthEnd) +from pandas import date_range, bdate_range, offsets, DatetimeIndex, Timestamp +from pandas.tseries.offsets import (generate_range, CDay, BDay, DateOffset, + MonthEnd, prefix_mapping) from pandas.tests.series.common import TestData @@ -241,9 +239,6 @@ def test_precision_finer_than_offset(self): class TestBusinessDateRange(object): - def setup_method(self, method): - self.rng = bdate_range(START, END) - def test_constructor(self): bdate_range(START, END, freq=BDay()) bdate_range(START, periods=20, freq=BDay()) @@ -258,33 +253,31 @@ def test_constructor(self): def test_naive_aware_conflicts(self): naive = bdate_range(START, END, freq=BDay(), tz=None) - aware = bdate_range(START, END, freq=BDay(), - tz="Asia/Hong_Kong") - tm.assert_raises_regex(TypeError, "tz-naive.*tz-aware", - naive.join, aware) - tm.assert_raises_regex(TypeError, "tz-naive.*tz-aware", - aware.join, naive) + aware = bdate_range(START, END, freq=BDay(), tz="Asia/Hong_Kong") + + msg = 'tz-naive.*tz-aware' + with tm.assert_raises_regex(TypeError, msg): + naive.join(aware) + + with tm.assert_raises_regex(TypeError, msg): + aware.join(naive) def test_cached_range(self): DatetimeIndex._cached_range(START, END, offset=BDay()) DatetimeIndex._cached_range(START, periods=20, offset=BDay()) DatetimeIndex._cached_range(end=START, periods=20, offset=BDay()) - tm.assert_raises_regex(TypeError, "offset", - DatetimeIndex._cached_range, - START, END) + with tm.assert_raises_regex(TypeError, "offset"): + DatetimeIndex._cached_range(START, END) - tm.assert_raises_regex(TypeError, "specify period", - DatetimeIndex._cached_range, START, - offset=BDay()) + with tm.assert_raises_regex(TypeError, "specify period"): + DatetimeIndex._cached_range(START, offset=BDay()) - tm.assert_raises_regex(TypeError, "specify period", - DatetimeIndex._cached_range, end=END, - offset=BDay()) + with tm.assert_raises_regex(TypeError, "specify period"): + DatetimeIndex._cached_range(end=END, offset=BDay()) - tm.assert_raises_regex(TypeError, "start or end", - DatetimeIndex._cached_range, periods=20, - offset=BDay()) + with tm.assert_raises_regex(TypeError, "start or end"): + DatetimeIndex._cached_range(periods=20, offset=BDay()) def test_cached_range_bug(self): rng = date_range('2010-09-01 05:00:00', periods=50, @@ -300,8 +293,9 @@ def test_timezone_comparaison_bug(self): def test_timezone_comparaison_assert(self): start = Timestamp('20130220 10:00', tz='US/Eastern') - pytest.raises(AssertionError, date_range, start, periods=2, - tz='Europe/Berlin') + msg = 'Inferred time zone not equal to passed time zone' + with tm.assert_raises_regex(AssertionError, msg): + date_range(start, periods=2, tz='Europe/Berlin') def test_misc(self): end = datetime(2009, 5, 13) @@ -315,14 +309,17 @@ def test_misc(self): def test_date_parse_failure(self): badly_formed_date = '2007/100/1' - pytest.raises(ValueError, Timestamp, badly_formed_date) + with pytest.raises(ValueError): + Timestamp(badly_formed_date) + + with pytest.raises(ValueError): + bdate_range(start=badly_formed_date, periods=10) - pytest.raises(ValueError, bdate_range, start=badly_formed_date, - periods=10) - pytest.raises(ValueError, bdate_range, end=badly_formed_date, - periods=10) - pytest.raises(ValueError, bdate_range, badly_formed_date, - badly_formed_date) + with pytest.raises(ValueError): + bdate_range(end=badly_formed_date, periods=10) + + with pytest.raises(ValueError): + bdate_range(badly_formed_date, badly_formed_date) def test_daterange_bug_456(self): # GH #456 @@ -334,8 +331,9 @@ def test_daterange_bug_456(self): assert isinstance(result, DatetimeIndex) def test_error_with_zero_monthends(self): - pytest.raises(ValueError, date_range, '1/1/2000', '1/1/2001', - freq=MonthEnd(0)) + msg = 'Offset <0 \* MonthEnds> did not increment date' + with tm.assert_raises_regex(ValueError, msg): + date_range('1/1/2000', '1/1/2001', freq=MonthEnd(0)) def test_range_bug(self): # GH #770 @@ -343,8 +341,8 @@ def test_range_bug(self): result = date_range("2011-1-1", "2012-1-31", freq=offset) start = datetime(2011, 1, 1) - exp_values = [start + i * offset for i in range(5)] - tm.assert_index_equal(result, DatetimeIndex(exp_values)) + expected = DatetimeIndex([start + i * offset for i in range(5)]) + tm.assert_index_equal(result, expected) def test_range_tz_pytz(self): # see gh-2906 @@ -525,20 +523,18 @@ def test_freq_divides_end_in_nanos(self): class TestCustomDateRange(object): - def setup_method(self, method): - self.rng = cdate_range(START, END) def test_constructor(self): - cdate_range(START, END, freq=CDay()) - cdate_range(START, periods=20, freq=CDay()) - cdate_range(end=START, periods=20, freq=CDay()) + bdate_range(START, END, freq=CDay()) + bdate_range(START, periods=20, freq=CDay()) + bdate_range(end=START, periods=20, freq=CDay()) msg = 'periods must be a number, got C' with tm.assert_raises_regex(TypeError, msg): date_range('2011-1-1', '2012-1-1', 'C') with tm.assert_raises_regex(TypeError, msg): - cdate_range('2011-1-1', '2012-1-1', 'C') + bdate_range('2011-1-1', '2012-1-1', 'C') def test_cached_range(self): DatetimeIndex._cached_range(START, END, offset=CDay()) @@ -547,66 +543,93 @@ def test_cached_range(self): DatetimeIndex._cached_range(end=START, periods=20, offset=CDay()) - pytest.raises(Exception, DatetimeIndex._cached_range, START, END) + # with pytest.raises(TypeError): + with tm.assert_raises_regex(TypeError, "offset"): + DatetimeIndex._cached_range(START, END) - pytest.raises(Exception, DatetimeIndex._cached_range, START, - freq=CDay()) + # with pytest.raises(TypeError): + with tm.assert_raises_regex(TypeError, "specify period"): + DatetimeIndex._cached_range(START, offset=CDay()) - pytest.raises(Exception, DatetimeIndex._cached_range, end=END, - freq=CDay()) + # with pytest.raises(TypeError): + with tm.assert_raises_regex(TypeError, "specify period"): + DatetimeIndex._cached_range(end=END, offset=CDay()) - pytest.raises(Exception, DatetimeIndex._cached_range, periods=20, - freq=CDay()) + # with pytest.raises(TypeError): + with tm.assert_raises_regex(TypeError, "start or end"): + DatetimeIndex._cached_range(periods=20, offset=CDay()) def test_misc(self): end = datetime(2009, 5, 13) - dr = cdate_range(end=end, periods=20) + dr = bdate_range(end=end, periods=20, freq='C') firstDate = end - 19 * CDay() assert len(dr) == 20 assert dr[0] == firstDate assert dr[-1] == end - def test_date_parse_failure(self): - badly_formed_date = '2007/100/1' - - pytest.raises(ValueError, Timestamp, badly_formed_date) - - pytest.raises(ValueError, cdate_range, start=badly_formed_date, - periods=10) - pytest.raises(ValueError, cdate_range, end=badly_formed_date, - periods=10) - pytest.raises(ValueError, cdate_range, badly_formed_date, - badly_formed_date) - def test_daterange_bug_456(self): # GH #456 - rng1 = cdate_range('12/5/2011', '12/5/2011') - rng2 = cdate_range('12/2/2011', '12/5/2011') + rng1 = bdate_range('12/5/2011', '12/5/2011', freq='C') + rng2 = bdate_range('12/2/2011', '12/5/2011', freq='C') rng2.offset = CDay() result = rng1.union(rng2) assert isinstance(result, DatetimeIndex) def test_cdaterange(self): - rng = cdate_range('2013-05-01', periods=3) - xp = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-03']) - tm.assert_index_equal(xp, rng) + result = bdate_range('2013-05-01', periods=3, freq='C') + expected = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-03']) + tm.assert_index_equal(result, expected) def test_cdaterange_weekmask(self): - rng = cdate_range('2013-05-01', periods=3, - weekmask='Sun Mon Tue Wed Thu') - xp = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-05']) - tm.assert_index_equal(xp, rng) + result = bdate_range('2013-05-01', periods=3, freq='C', + weekmask='Sun Mon Tue Wed Thu') + expected = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-05']) + tm.assert_index_equal(result, expected) + + # raise with non-custom freq + msg = ('a custom frequency string is required when holidays or ' + 'weekmask are passed, got frequency B') + with tm.assert_raises_regex(ValueError, msg): + bdate_range('2013-05-01', periods=3, + weekmask='Sun Mon Tue Wed Thu') def test_cdaterange_holidays(self): - rng = cdate_range('2013-05-01', periods=3, holidays=['2013-05-01']) - xp = DatetimeIndex(['2013-05-02', '2013-05-03', '2013-05-06']) - tm.assert_index_equal(xp, rng) + result = bdate_range('2013-05-01', periods=3, freq='C', + holidays=['2013-05-01']) + expected = DatetimeIndex(['2013-05-02', '2013-05-03', '2013-05-06']) + tm.assert_index_equal(result, expected) + + # raise with non-custom freq + msg = ('a custom frequency string is required when holidays or ' + 'weekmask are passed, got frequency B') + with tm.assert_raises_regex(ValueError, msg): + bdate_range('2013-05-01', periods=3, holidays=['2013-05-01']) def test_cdaterange_weekmask_and_holidays(self): - rng = cdate_range('2013-05-01', periods=3, - weekmask='Sun Mon Tue Wed Thu', - holidays=['2013-05-01']) - xp = DatetimeIndex(['2013-05-02', '2013-05-05', '2013-05-06']) - tm.assert_index_equal(xp, rng) + result = bdate_range('2013-05-01', periods=3, freq='C', + weekmask='Sun Mon Tue Wed Thu', + holidays=['2013-05-01']) + expected = DatetimeIndex(['2013-05-02', '2013-05-05', '2013-05-06']) + tm.assert_index_equal(result, expected) + + # raise with non-custom freq + msg = ('a custom frequency string is required when holidays or ' + 'weekmask are passed, got frequency B') + with tm.assert_raises_regex(ValueError, msg): + bdate_range('2013-05-01', periods=3, + weekmask='Sun Mon Tue Wed Thu', + holidays=['2013-05-01']) + + @pytest.mark.parametrize('freq', [freq for freq in prefix_mapping + if freq.startswith('C')]) + def test_all_custom_freq(self, freq): + # should not raise + bdate_range(START, END, freq=freq, weekmask='Mon Wed Fri', + holidays=['2009-03-14']) + + bad_freq = freq + 'FOO' + msg = 'invalid custom frequency string: {freq}' + with tm.assert_raises_regex(ValueError, msg.format(freq=bad_freq)): + bdate_range(START, END, freq=bad_freq) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 86e65feec04f3..7cb051d351444 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -10,7 +10,6 @@ import pandas._libs.tslib as tslib import pandas.util.testing as tm from pandas.errors import PerformanceWarning -from pandas.core.indexes.datetimes import cdate_range from pandas import (DatetimeIndex, PeriodIndex, Series, Timestamp, Timedelta, date_range, TimedeltaIndex, _np_version_under1p10, Index, datetime, Float64Index, offsets, bdate_range) @@ -1208,7 +1207,7 @@ def test_identical(self): class TestCustomDatetimeIndex(object): def setup_method(self, method): - self.rng = cdate_range(START, END) + self.rng = bdate_range(START, END, freq='C') def test_comparison(self): d = self.rng[10] @@ -1277,10 +1276,11 @@ def test_summary(self): self.rng[2:2].summary() def test_summary_pytz(self): - cdate_range('1/1/2005', '1/1/2009', tz=pytz.utc).summary() + bdate_range('1/1/2005', '1/1/2009', freq='C', tz=pytz.utc).summary() def test_summary_dateutil(self): - cdate_range('1/1/2005', '1/1/2009', tz=dateutil.tz.tzutc()).summary() + bdate_range('1/1/2005', '1/1/2009', freq='C', + tz=dateutil.tz.tzutc()).summary() def test_equals(self): assert not self.rng.equals(list(self.rng)) diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 4ffd2e1cd1e61..ff436e0501849 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -4,7 +4,6 @@ import pandas as pd import pandas.util.testing as tm -from pandas.core.indexes.datetimes import cdate_range from pandas import (DatetimeIndex, date_range, Series, bdate_range, DataFrame, Int64Index, Index, to_datetime) from pandas.tseries.offsets import Minute, BMonthEnd, MonthEnd @@ -345,7 +344,7 @@ def test_month_range_union_tz_dateutil(self): class TestCustomDatetimeIndex(object): def setup_method(self, method): - self.rng = cdate_range(START, END) + self.rng = bdate_range(START, END, freq='C') def test_union(self): # overlapping @@ -412,7 +411,7 @@ def test_outer_join(self): def test_intersection_bug(self): # GH #771 - a = cdate_range('11/30/2011', '12/31/2011') - b = cdate_range('12/10/2011', '12/20/2011') + a = bdate_range('11/30/2011', '12/31/2011', freq='C') + b = bdate_range('12/10/2011', '12/20/2011', freq='C') result = a.intersection(b) tm.assert_index_equal(result, b) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index ea37434e3a8d9..3a2a613986dca 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -2987,6 +2987,7 @@ def generate_range(start=None, end=None, periods=None, CustomBusinessHour, # 'CBH' MonthEnd, # 'M' MonthBegin, # 'MS' + Nano, # 'N' SemiMonthEnd, # 'SM' SemiMonthBegin, # 'SMS' Week, # 'W' @@ -3002,5 +3003,3 @@ def generate_range(start=None, end=None, periods=None, FY5253, FY5253Quarter, ]) - -prefix_mapping['N'] = Nano