Skip to content

DEPR: Deprecate range-based PeriodIndex construction #24354

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Dec 28, 2018
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion asv_bench/benchmarks/join_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def setup(self, axis):
self.empty_right = [df, DataFrame()]

def time_concat_series(self, axis):
concat(self.series, axis=axis)
concat(self.series, axis=axis, sort=False)

def time_concat_small_frames(self, axis):
concat(self.small_frames, axis=axis)
Expand Down
12 changes: 6 additions & 6 deletions asv_bench/benchmarks/panel_ctor.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import warnings
from datetime import datetime, timedelta

from pandas import DataFrame, Panel, DatetimeIndex, date_range
from pandas import DataFrame, Panel, date_range


class DifferentIndexes(object):
Expand All @@ -23,9 +23,9 @@ def time_from_dict(self):
class SameIndexes(object):

def setup(self):
idx = DatetimeIndex(start=datetime(1990, 1, 1),
end=datetime(2012, 1, 1),
freq='D')
idx = date_range(start=datetime(1990, 1, 1),
end=datetime(2012, 1, 1),
freq='D')
df = DataFrame({'a': 0, 'b': 1, 'c': 2}, index=idx)
self.data_frames = dict(enumerate([df] * 100))

Expand All @@ -40,10 +40,10 @@ def setup(self):
start = datetime(1990, 1, 1)
end = datetime(2012, 1, 1)
df1 = DataFrame({'a': 0, 'b': 1, 'c': 2},
index=DatetimeIndex(start=start, end=end, freq='D'))
index=date_range(start=start, end=end, freq='D'))
end += timedelta(days=1)
df2 = DataFrame({'a': 0, 'b': 1, 'c': 2},
index=DatetimeIndex(start=start, end=end, freq='D'))
index=date_range(start=start, end=end, freq='D'))
dfs = [df1] * 50 + [df2] * 50
self.data_frames = dict(enumerate(dfs))

Expand Down
6 changes: 3 additions & 3 deletions asv_bench/benchmarks/period.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pandas import (DataFrame, Series, Period, PeriodIndex, date_range,
period_range)
from pandas import (
DataFrame, Period, PeriodIndex, Series, date_range, period_range)


class PeriodProperties(object):
Expand Down Expand Up @@ -94,7 +94,7 @@ def time_value_counts(self, typ):
class Indexing(object):

def setup(self):
self.index = PeriodIndex(start='1985', periods=1000, freq='D')
self.index = period_range(start='1985', periods=1000, freq='D')
self.series = Series(range(1000), index=self.index)
self.period = self.index[500]

Expand Down
4 changes: 2 additions & 2 deletions asv_bench/benchmarks/reindex.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
import numpy as np
import pandas.util.testing as tm
from pandas import (DataFrame, Series, DatetimeIndex, MultiIndex, Index,
from pandas import (DataFrame, Series, MultiIndex, Index,
date_range)
from .pandas_vb_common import lib


class Reindex(object):

def setup(self):
rng = DatetimeIndex(start='1/1/1970', periods=10000, freq='1min')
rng = date_range(start='1/1/1970', periods=10000, freq='1min')
self.df = DataFrame(np.random.rand(10000, 10), index=rng,
columns=range(10))
self.df['foo'] = 'bar'
Expand Down
9 changes: 5 additions & 4 deletions asv_bench/benchmarks/timedelta.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import datetime

import numpy as np
from pandas import Series, timedelta_range, to_timedelta, Timestamp, \
Timedelta, TimedeltaIndex, DataFrame

from pandas import (
DataFrame, Series, Timedelta, Timestamp, timedelta_range, to_timedelta)


class TimedeltaConstructor(object):
Expand Down Expand Up @@ -122,8 +123,8 @@ def time_timedelta_nanoseconds(self, series):
class TimedeltaIndexing(object):

def setup(self):
self.index = TimedeltaIndex(start='1985', periods=1000, freq='D')
self.index2 = TimedeltaIndex(start='1986', periods=1000, freq='D')
self.index = timedelta_range(start='1985', periods=1000, freq='D')
self.index2 = timedelta_range(start='1986', periods=1000, freq='D')
self.series = Series(range(1000), index=self.index)
self.timedelta = self.index[500]

Expand Down
7 changes: 4 additions & 3 deletions asv_bench/benchmarks/timestamp.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import datetime

from pandas import Timestamp
import pytz
import dateutil
import pytz

from pandas import Timestamp


class TimestampConstruction(object):
Expand Down Expand Up @@ -46,7 +47,7 @@ def time_dayofweek(self, tz, freq):
self.ts.dayofweek

def time_weekday_name(self, tz, freq):
self.ts.weekday_name
self.ts.day_name

def time_dayofyear(self, tz, freq):
self.ts.dayofyear
Expand Down
4 changes: 2 additions & 2 deletions doc/source/whatsnew/v0.21.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -654,7 +654,7 @@ Previous Behavior:

.. code-block:: ipython

In [1]: pi = pd.PeriodIndex(start='2000-01-01', freq='D', periods=10)
In [1]: pi = pd.period_range(start='2000-01-01', freq='D', periods=10)

In [2]: s = pd.Series(np.arange(10), index=pi)

Expand All @@ -674,7 +674,7 @@ New Behavior:

.. ipython:: python

pi = pd.PeriodIndex(start='2000-01-01', freq='D', periods=10)
pi = pd.period_range(start='2000-01-01', freq='D', periods=10)

s = pd.Series(np.arange(10), index=pi)

Expand Down
7 changes: 5 additions & 2 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,7 @@ changes were made:
* The ``out`` and ``mode`` parameters are now longer accepted (previously, this raised if they were specified).
* Passing a scalar for ``indices`` is no longer allowed.

- The result of concatenating a mix of sparse and dense Series is a Series with sparse values, rather than a ``SparseSeries``.
- The result of :func:`concat` with a mix of sparse and dense Series is a Series with sparse values, rather than a ``SparseSeries``.
- ``SparseDataFrame.combine`` and ``DataFrame.combine_first`` no longer supports combining a sparse column with a dense column while preserving the sparse subtype. The result will be an object-dtype SparseArray.
- Setting :attr:`SparseArray.fill_value` to a fill value with a different dtype is now allowed.
- ``DataFrame[column]`` is now a :class:`Series` with sparse values, rather than a :class:`SparseSeries`, when slicing a single column with sparse values (:issue:`23559`).
Expand Down Expand Up @@ -1104,6 +1104,7 @@ Other API Changes
- The order of the arguments of :func:`DataFrame.to_html` and :func:`DataFrame.to_string` is rearranged to be consistent with each other. (:issue:`23614`)
- :meth:`CategoricalIndex.reindex` now raises a ``ValueError`` if the target index is non-unique and not equal to the current index. It previously only raised if the target index was not of a categorical dtype (:issue:`23963`).
- :func:`Series.to_list` and :func:`Index.to_list` are now aliases of ``Series.tolist`` respectively ``Index.tolist`` (:issue:`8826`)
- The result of ``SparseSeries.unstack`` is now a :class:`DataFrame` with sparse values, rather than a :class:`SparseDataFrame` (issue:`24372`).

.. _whatsnew_0240.deprecations:

Expand Down Expand Up @@ -1144,7 +1145,7 @@ Deprecations
- Timezone converting a tz-aware ``datetime.datetime`` or :class:`Timestamp` with :class:`Timestamp` and the ``tz`` argument is now deprecated. Instead, use :meth:`Timestamp.tz_convert` (:issue:`23579`)
- :func:`pandas.api.types.is_period` is deprecated in favor of `pandas.api.types.is_period_dtype` (:issue:`23917`)
- :func:`pandas.api.types.is_datetimetz` is deprecated in favor of `pandas.api.types.is_datetime64tz` (:issue:`23917`)
- Creating a :class:`TimedeltaIndex` or :class:`DatetimeIndex` by passing range arguments `start`, `end`, and `periods` is deprecated in favor of :func:`timedelta_range` and :func:`date_range` (:issue:`23919`)
- Creating a :class:`TimedeltaIndex`, :class:`DatetimeIndex`, or :class:`PeriodIndex` by passing range arguments `start`, `end`, and `periods` is deprecated in favor of :func:`timedelta_range`, :func:`date_range`, or :func:`period_range` (:issue:`23919`)
- Passing a string alias like ``'datetime64[ns, UTC]'`` as the ``unit`` parameter to :class:`DatetimeTZDtype` is deprecated. Use :class:`DatetimeTZDtype.construct_from_string` instead (:issue:`23990`).
- In :meth:`Series.where` with Categorical data, providing an ``other`` that is not present in the categories is deprecated. Convert the categorical to a different dtype or add the ``other`` to the categories first (:issue:`24077`).
- :meth:`Series.clip_lower`, :meth:`Series.clip_upper`, :meth:`DataFrame.clip_lower` and :meth:`DataFrame.clip_upper` are deprecated and will be removed in a future version. Use ``Series.clip(lower=threshold)``, ``Series.clip(upper=threshold)`` and the equivalent ``DataFrame`` methods (:issue:`24203`)
Expand Down Expand Up @@ -1316,6 +1317,7 @@ Datetimelike
- Bug in :meth:`Series.combine_first` not properly aligning categoricals, so that missing values in ``self`` where not filled by valid values from ``other`` (:issue:`24147`)
- Bug in :func:`DataFrame.combine` with datetimelike values raising a TypeError (:issue:`23079`)
- Bug in :func:`date_range` with frequency of ``Day`` or higher where dates sufficiently far in the future could wrap around to the past instead of raising ``OutOfBoundsDatetime`` (:issue:`14187`)
- Bug in :func:`period_range` ignoring the frequency of ``start`` and ``end`` when those are provided as :class:`Period` objects (:issue:`20535`).
- Bug in :class:`PeriodIndex` with attribute ``freq.n`` greater than 1 where adding a :class:`DateOffset` object would return incorrect results (:issue:`23215`)
- Bug in :class:`Series` that interpreted string indices as lists of characters when setting datetimelike values (:issue:`23451`)
- Bug in :class:`Timestamp` constructor which would drop the frequency of an input :class:`Timestamp` (:issue:`22311`)
Expand Down Expand Up @@ -1615,6 +1617,7 @@ Sparse
- Bug in :meth:`SparseArary.unique` not returning the unique values (:issue:`19595`)
- Bug in :meth:`SparseArray.nonzero` and :meth:`SparseDataFrame.dropna` returning shifted/incorrect results (:issue:`21172`)
- Bug in :meth:`DataFrame.apply` where dtypes would lose sparseness (:issue:`23744`)
- Bug in :func:`concat` when concatenating a list of :class:`Series` with all-sparse values changing the ``fill_value`` and converting to a dense Series (:issue:`24371`)

Style
^^^^^
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,19 +66,19 @@ def _get_series_result_type(result, objs=None):
return appropriate class of Series concat
input is either dict or array-like
"""
from pandas import SparseSeries, SparseDataFrame, DataFrame

# concat Series with axis 1
if isinstance(result, dict):
# concat Series with axis 1
if all(is_sparse(c) for c in compat.itervalues(result)):
from pandas.core.sparse.api import SparseDataFrame
if all(isinstance(c, (SparseSeries, SparseDataFrame))
for c in compat.itervalues(result)):
return SparseDataFrame
else:
from pandas.core.frame import DataFrame
return DataFrame

# otherwise it is a SingleBlockManager (axis = 0)
if result._block.is_sparse:
from pandas.core.sparse.api import SparseSeries
return SparseSeries
else:
return objs[0]._constructor
Expand Down
26 changes: 21 additions & 5 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,15 +81,27 @@ class DatetimeIndex(DatetimeArray, DatetimeIndexOpsMixin, Int64Index):
start : starting value, datetime-like, optional
If data is None, start is used as the start point in generating regular
timestamp data.

.. deprecated:: 0.24.0

periods : int, optional, > 0
Number of periods to generate, if generating index. Takes precedence
over end argument
end : end time, datetime-like, optional

.. deprecated:: 0.24.0

end : end time, datetime-like, optional
If periods is none, generated index will extend to first conforming
time on or just past end argument

.. deprecated:: 0.24.0

closed : string or None, default None
Make the interval closed with respect to the given frequency to
the 'left', 'right', or both sides (None)

.. deprecated:: 0.24. 0

tz : pytz.timezone or dateutil.tz.tzfile
ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise'
When clocks moved backward due to DST, ambiguous times may arise.
Expand Down Expand Up @@ -166,12 +178,16 @@ class DatetimeIndex(DatetimeArray, DatetimeIndexOpsMixin, Int64Index):
To learn more about the frequency strings, please see `this link
<http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases>`__.

Creating a DatetimeIndex based on `start`, `periods`, and `end` has
been deprecated in favor of :func:`date_range`.

See Also
---------
Index : The base pandas Index type.
TimedeltaIndex : Index of timedelta64 data.
PeriodIndex : Index of Period data.
pandas.to_datetime : Convert argument to datetime.
to_datetime : Convert argument to datetime.
date_range : Create a fixed-frequency DatetimeIndex.
"""
_typ = 'datetimeindex'
_join_precedence = 10
Expand Down Expand Up @@ -223,13 +239,13 @@ def __new__(cls, data=None,
verify_integrity = True

if data is None:
result = cls._generate_range(start, end, periods,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously, if you passed a bad set of arguments (e.g. forgot freq), then we would warn and then raise an exception.

freq=freq, tz=tz, normalize=normalize,
closed=closed, ambiguous=ambiguous)
warnings.warn("Creating a DatetimeIndex by passing range "
"endpoints is deprecated. Use "
"`pandas.date_range` instead.",
FutureWarning, stacklevel=2)
result = cls._generate_range(start, end, periods,
freq=freq, tz=tz, normalize=normalize,
closed=closed, ambiguous=ambiguous)
result.name = name
return result

Expand Down
65 changes: 55 additions & 10 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,21 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index,
start : starting value, period-like, optional
If data is None, used as the start point in generating regular
period data.

.. deprecated:: 0.24.0

periods : int, optional, > 0
Number of periods to generate, if generating index. Takes precedence
over end argument

.. deprecated:: 0.24.0

end : end value, period-like, optional
If periods is none, generated index will extend to first conforming
period on or just past end argument

.. deprecated:: 0.24.0

year : int, array, or Series, default None
month : int, array, or Series, default None
quarter : int, array, or Series, default None
Expand Down Expand Up @@ -138,18 +147,22 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index,
strftime
to_timestamp

Notes
-----
Creating a PeriodIndex based on `start`, `periods`, and `end` has
been deprecated in favor of :func:`period_range`.

Examples
--------
>>> idx = pd.PeriodIndex(year=year_arr, quarter=q_arr)

>>> idx2 = pd.PeriodIndex(start='2000', end='2010', freq='A')

See Also
---------
Index : The base pandas Index type.
Period : Represents a period of time.
DatetimeIndex : Index with datetime64 data.
TimedeltaIndex : Index of timedelta64 data.
period_range : Create a fixed-frequency PeriodIndex.
"""
_typ = 'periodindex'
_attributes = ['name', 'freq']
Expand Down Expand Up @@ -181,8 +194,32 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,

if data is None and ordinal is None:
# range-based.
data, freq = PeriodArray._generate_range(start, end, periods,
freq, fields)
data, freq2 = PeriodArray._generate_range(start, end, periods,
freq, fields)
# PeriodArray._generate range does validate that fields is
# empty when really using the range-based constructor.
if not fields:
msg = ("Creating a PeriodIndex by passing range "
"endpoints is deprecated. Use "
"`pandas.period_range` instead.")
# period_range differs from PeriodIndex for cases like
# start="2000", periods=4
# PeriodIndex interprets that as A-DEC freq.
# period_range interprets it as 'D' freq.
cond = (
freq is None and (
(start and not isinstance(start, Period)) or
(end and not isinstance(end, Period))
)
)
if cond:
msg += (
" Note that the default `freq` may differ. Pass "
"'freq=\"{}\"' to ensure the same output."
).format(freq2.freqstr)
warnings.warn(msg, FutureWarning, stacklevel=2)
freq = freq2

data = PeriodArray(data, freq=freq)
else:
freq = validate_dtype_freq(dtype, freq)
Expand Down Expand Up @@ -983,7 +1020,7 @@ def base(self):
PeriodIndex._add_datetimelike_methods()


def period_range(start=None, end=None, periods=None, freq='D', name=None):
def period_range(start=None, end=None, periods=None, freq=None, name=None):
"""
Return a fixed frequency PeriodIndex, with day (calendar) as the default
frequency
Expand All @@ -996,8 +1033,11 @@ def period_range(start=None, end=None, periods=None, freq='D', name=None):
Right bound for generating periods
periods : integer, default None
Number of periods to generate
freq : string or DateOffset, default 'D'
Frequency alias
freq : string or DateOffset, optional
Frequency alias. By default the freq is taken from `start` or `end`
if those are Period objects. Otherwise, the default is ``"D"`` for
daily frequency.

name : string, default None
Name of the resulting PeriodIndex

Expand Down Expand Up @@ -1034,6 +1074,11 @@ def period_range(start=None, end=None, periods=None, freq='D', name=None):
if com.count_not_none(start, end, periods) != 2:
raise ValueError('Of the three parameters: start, end, and periods, '
'exactly two must be specified')

return PeriodIndex(start=start, end=end, periods=periods,
freq=freq, name=name)
if freq is None and (not isinstance(start, Period)
and not isinstance(end, Period)):
freq = 'D'

data, freq = PeriodArray._generate_range(start, end, periods, freq,
fields={})
data = PeriodArray(data, freq=freq)
return PeriodIndex(data, name=name)
Loading