From 959452c65a533a87c39995e4809a27dd8fd0ce79 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 19 Dec 2018 10:41:35 -0600 Subject: [PATCH 1/8] DEPR: Deprecate range-based PeriodIndex construction Closes https://github.com/pandas-dev/pandas/issues/20535 --- doc/source/whatsnew/v0.24.0.rst | 3 +- pandas/core/indexes/datetimes.py | 26 ++++++-- pandas/core/indexes/period.py | 40 +++++++++-- pandas/core/indexes/timedeltas.py | 19 +++++- .../tests/indexes/period/test_arithmetic.py | 28 ++++---- pandas/tests/indexes/period/test_asfreq.py | 20 +++--- .../tests/indexes/period/test_construction.py | 66 ++++++++++++------- pandas/tests/indexes/period/test_indexing.py | 9 +-- .../indexes/period/test_partial_slicing.py | 11 ++-- .../indexes/period/test_scalar_compat.py | 6 +- pandas/tests/indexes/period/test_tools.py | 16 ++--- pandas/util/testing.py | 5 +- 12 files changed, 163 insertions(+), 86 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index fe5e4a57c557a..a351cdf8d9817 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1142,7 +1142,7 @@ Deprecations - Timezone converting a tz-aware ``datetime.datetime`` or :class:`Timestamp` with :class:`Timestamp` and the ``tz`` argument is now deprecated. Instead, use :meth:`Timestamp.tz_convert` (:issue:`23579`) - :func:`pandas.api.types.is_period` is deprecated in favor of `pandas.api.types.is_period_dtype` (:issue:`23917`) - :func:`pandas.api.types.is_datetimetz` is deprecated in favor of `pandas.api.types.is_datetime64tz` (:issue:`23917`) -- Creating a :class:`TimedeltaIndex` or :class:`DatetimeIndex` by passing range arguments `start`, `end`, and `periods` is deprecated in favor of :func:`timedelta_range` and :func:`date_range` (:issue:`23919`) +- Creating a :class:`TimedeltaIndex`, :class:`DatetimeIndex`, or :class:`PeriodIndex` by passing range arguments `start`, `end`, and `periods` is deprecated in favor of :func:`timedelta_range`, :func:`date_range`, or :func:`period_range` (:issue:`23919`) - Passing a string alias like ``'datetime64[ns, UTC]'`` as the ``unit`` parameter to :class:`DatetimeTZDtype` is deprecated. Use :class:`DatetimeTZDtype.construct_from_string` instead (:issue:`23990`). - In :meth:`Series.where` with Categorical data, providing an ``other`` that is not present in the categories is deprecated. Convert the categorical to a different dtype or add the ``other`` to the categories first (:issue:`24077`). - :meth:`Series.clip_lower`, :meth:`Series.clip_upper`, :meth:`DataFrame.clip_lower` and :meth:`DataFrame.clip_upper` are deprecated and will be removed in a future version. Use ``Series.clip(lower=threshold)``, ``Series.clip(upper=threshold)`` and the equivalent ``DataFrame`` methods (:issue:`24203`) @@ -1313,6 +1313,7 @@ Datetimelike - Bug in :meth:`Series.combine_first` not properly aligning categoricals, so that missing values in ``self`` where not filled by valid values from ``other`` (:issue:`24147`) - Bug in :func:`DataFrame.combine` with datetimelike values raising a TypeError (:issue:`23079`) - Bug in :func:`date_range` with frequency of ``Day`` or higher where dates sufficiently far in the future could wrap around to the past instead of raising ``OutOfBoundsDatetime`` (:issue:`14187`) +- Bug in :func:`period_range` ignoring the frequency of ``start`` and ``end`` when those are provided as :class:`Period` objects (:issue:`20535`). - Bug in :class:`PeriodIndex` with attribute ``freq.n`` greater than 1 where adding a :class:`DateOffset` object would return incorrect results (:issue:`23215`) - Bug in :class:`Series` that interpreted string indices as lists of characters when setting datetimelike values (:issue:`23451`) - Bug in :class:`Timestamp` constructor which would drop the frequency of an input :class:`Timestamp` (:issue:`22311`) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 1c966ab58e8c4..b6305cd29e622 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -81,15 +81,27 @@ class DatetimeIndex(DatetimeArray, DatetimeIndexOpsMixin, Int64Index): start : starting value, datetime-like, optional If data is None, start is used as the start point in generating regular timestamp data. + + .. deprecated:: 0.24.0 + periods : int, optional, > 0 Number of periods to generate, if generating index. Takes precedence over end argument - end : end time, datetime-like, optional + + .. deprecated:: 0.24.0 + + end : end time, datetime-like, optional If periods is none, generated index will extend to first conforming time on or just past end argument + + .. deprecated:: 0.24.0 + closed : string or None, default None Make the interval closed with respect to the given frequency to the 'left', 'right', or both sides (None) + + .. deprecated:: 0.24. 0 + tz : pytz.timezone or dateutil.tz.tzfile ambiguous : 'infer', bool-ndarray, 'NaT', default 'raise' When clocks moved backward due to DST, ambiguous times may arise. @@ -166,12 +178,16 @@ class DatetimeIndex(DatetimeArray, DatetimeIndexOpsMixin, Int64Index): To learn more about the frequency strings, please see `this link `__. + Creating a DatetimeIndex based on `start`, `periods`, and `end` has + been deprecated in favor of :func:`date_range`. + See Also --------- Index : The base pandas Index type. TimedeltaIndex : Index of timedelta64 data. PeriodIndex : Index of Period data. - pandas.to_datetime : Convert argument to datetime. + to_datetime : Convert argument to datetime. + date_range : Create a fixed-frequency DatetimeIndex. """ _typ = 'datetimeindex' _join_precedence = 10 @@ -223,13 +239,13 @@ def __new__(cls, data=None, verify_integrity = True if data is None: + result = cls._generate_range(start, end, periods, + freq=freq, tz=tz, normalize=normalize, + closed=closed, ambiguous=ambiguous) warnings.warn("Creating a DatetimeIndex by passing range " "endpoints is deprecated. Use " "`pandas.date_range` instead.", FutureWarning, stacklevel=2) - result = cls._generate_range(start, end, periods, - freq=freq, tz=tz, normalize=normalize, - closed=closed, ambiguous=ambiguous) result.name = name return result diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 7ece1eaf547c8..7d9faf2236162 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -92,12 +92,21 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index, start : starting value, period-like, optional If data is None, used as the start point in generating regular period data. + + .. deprecated:: 0.24.0 + periods : int, optional, > 0 Number of periods to generate, if generating index. Takes precedence over end argument + + .. deprecated:: 0.24.0 + end : end value, period-like, optional If periods is none, generated index will extend to first conforming period on or just past end argument + + .. deprecated:: 0.24.0 + year : int, array, or Series, default None month : int, array, or Series, default None quarter : int, array, or Series, default None @@ -138,18 +147,22 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index, strftime to_timestamp + Notes + ----- + Creating a PeriodIndex based on `start`, `periods`, and `end` has + been deprecated in favor of :func:`period_range`. + Examples -------- >>> idx = pd.PeriodIndex(year=year_arr, quarter=q_arr) - >>> idx2 = pd.PeriodIndex(start='2000', end='2010', freq='A') - See Also --------- Index : The base pandas Index type. Period : Represents a period of time. DatetimeIndex : Index with datetime64 data. TimedeltaIndex : Index of timedelta64 data. + period_range : Create a fixed-frequency PeriodIndex. """ _typ = 'periodindex' _attributes = ['name', 'freq'] @@ -183,6 +196,14 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, # range-based. data, freq = PeriodArray._generate_range(start, end, periods, freq, fields) + # PeriodArray._generate range does validate that fields is + # empty when really using the range-based constructor. + if not fields: + warnings.warn("Creating a PeriodIndex by passing range " + "endpoints is deprecated. Use " + "`pandas.period_range` instead.", + FutureWarning, stacklevel=2) + data = PeriodArray(data, freq=freq) else: freq = validate_dtype_freq(dtype, freq) @@ -983,7 +1004,7 @@ def base(self): PeriodIndex._add_datetimelike_methods() -def period_range(start=None, end=None, periods=None, freq='D', name=None): +def period_range(start=None, end=None, periods=None, freq=None, name=None): """ Return a fixed frequency PeriodIndex, with day (calendar) as the default frequency @@ -996,8 +1017,11 @@ def period_range(start=None, end=None, periods=None, freq='D', name=None): Right bound for generating periods periods : integer, default None Number of periods to generate - freq : string or DateOffset, default 'D' - Frequency alias + freq : string or DateOffset, optional + Frequency alias. By default the freq is taken from `start` or `end` + if those are Period objects. Otherwise, the default is ``"D"`` for + daily frequency. + name : string, default None Name of the resulting PeriodIndex @@ -1035,5 +1059,7 @@ def period_range(start=None, end=None, periods=None, freq='D', name=None): raise ValueError('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') - return PeriodIndex(start=start, end=end, periods=periods, - freq=freq, name=name) + data, freq = PeriodArray._generate_range(start, end, periods, freq, + fields={}) + data = PeriodArray(data, freq=freq) + return PeriodIndex(data, name=name) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 5d52696992c30..b111653eb27b0 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -68,15 +68,27 @@ class TimedeltaIndex(TimedeltaArray, DatetimeIndexOpsMixin, start : starting value, timedelta-like, optional If data is None, start is used as the start point in generating regular timedelta data. + + .. deprecated:: 0.24.0 + periods : int, optional, > 0 Number of periods to generate, if generating index. Takes precedence over end argument - end : end time, timedelta-like, optional + + .. deprecated:: 0.24.0 + + end : end time, timedelta-like, optional If periods is none, generated index will extend to first conforming time on or just past end argument + + .. deprecated:: 0.24. 0 + closed : string or None, default None Make the interval closed with respect to the given frequency to the 'left', 'right', or both sides (None) + + .. deprecated:: 0.24. 0 + name : object Name to be stored in the index @@ -104,12 +116,15 @@ class TimedeltaIndex(TimedeltaArray, DatetimeIndexOpsMixin, Timedelta : Represents a duration between two dates or times. DatetimeIndex : Index of datetime64 data. PeriodIndex : Index of Period data. + timedelta_range : Create a fixed-frequency TimedeltaIndex. Notes ----- - To learn more about the frequency strings, please see `this link `__. + + Creating a TimedeltaIndex based on `start`, `periods`, and `end` has + been deprecated in favor of :func:`timedelta_range`. """ _typ = 'timedeltaindex' diff --git a/pandas/tests/indexes/period/test_arithmetic.py b/pandas/tests/indexes/period/test_arithmetic.py index 2c04f22f8b450..67b642e013880 100644 --- a/pandas/tests/indexes/period/test_arithmetic.py +++ b/pandas/tests/indexes/period/test_arithmetic.py @@ -4,7 +4,7 @@ import pytest import pandas as pd -from pandas import PeriodIndex +from pandas import PeriodIndex, period_range import pandas.util.testing as tm @@ -26,36 +26,36 @@ def test_pi_shift_ndarray(self): tm.assert_index_equal(result, expected) def test_shift(self): - pi1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='A', start='1/1/2002', end='12/1/2010') + pi1 = period_range(freq='A', start='1/1/2001', end='12/1/2009') + pi2 = period_range(freq='A', start='1/1/2002', end='12/1/2010') tm.assert_index_equal(pi1.shift(0), pi1) assert len(pi1) == len(pi2) tm.assert_index_equal(pi1.shift(1), pi2) - pi1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='A', start='1/1/2000', end='12/1/2008') + pi1 = period_range(freq='A', start='1/1/2001', end='12/1/2009') + pi2 = period_range(freq='A', start='1/1/2000', end='12/1/2008') assert len(pi1) == len(pi2) tm.assert_index_equal(pi1.shift(-1), pi2) - pi1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='M', start='2/1/2001', end='1/1/2010') + pi1 = period_range(freq='M', start='1/1/2001', end='12/1/2009') + pi2 = period_range(freq='M', start='2/1/2001', end='1/1/2010') assert len(pi1) == len(pi2) tm.assert_index_equal(pi1.shift(1), pi2) - pi1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='M', start='12/1/2000', end='11/1/2009') + pi1 = period_range(freq='M', start='1/1/2001', end='12/1/2009') + pi2 = period_range(freq='M', start='12/1/2000', end='11/1/2009') assert len(pi1) == len(pi2) tm.assert_index_equal(pi1.shift(-1), pi2) - pi1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='D', start='1/2/2001', end='12/2/2009') + pi1 = period_range(freq='D', start='1/1/2001', end='12/1/2009') + pi2 = period_range(freq='D', start='1/2/2001', end='12/2/2009') assert len(pi1) == len(pi2) tm.assert_index_equal(pi1.shift(1), pi2) - pi1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009') - pi2 = PeriodIndex(freq='D', start='12/31/2000', end='11/30/2009') + pi1 = period_range(freq='D', start='1/1/2001', end='12/1/2009') + pi2 = period_range(freq='D', start='12/31/2000', end='11/30/2009') assert len(pi1) == len(pi2) tm.assert_index_equal(pi1.shift(-1), pi2) @@ -100,7 +100,7 @@ def test_shift_gh8083(self): def test_shift_periods(self): # GH #22458 : argument 'n' was deprecated in favor of 'periods' - idx = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + idx = period_range(freq='A', start='1/1/2001', end='12/1/2009') tm.assert_index_equal(idx.shift(periods=0), idx) tm.assert_index_equal(idx.shift(0), idx) with tm.assert_produces_warning(FutureWarning, diff --git a/pandas/tests/indexes/period/test_asfreq.py b/pandas/tests/indexes/period/test_asfreq.py index fed6bbce6259f..2dd49e7e0845e 100644 --- a/pandas/tests/indexes/period/test_asfreq.py +++ b/pandas/tests/indexes/period/test_asfreq.py @@ -2,20 +2,20 @@ import pytest import pandas as pd -from pandas import DataFrame, PeriodIndex, Series +from pandas import DataFrame, PeriodIndex, Series, period_range from pandas.util import testing as tm class TestPeriodIndex(object): def test_asfreq(self): - pi1 = PeriodIndex(freq='A', start='1/1/2001', end='1/1/2001') - pi2 = PeriodIndex(freq='Q', start='1/1/2001', end='1/1/2001') - pi3 = PeriodIndex(freq='M', start='1/1/2001', end='1/1/2001') - pi4 = PeriodIndex(freq='D', start='1/1/2001', end='1/1/2001') - pi5 = PeriodIndex(freq='H', start='1/1/2001', end='1/1/2001 00:00') - pi6 = PeriodIndex(freq='Min', start='1/1/2001', end='1/1/2001 00:00') - pi7 = PeriodIndex(freq='S', start='1/1/2001', end='1/1/2001 00:00:00') + pi1 = period_range(freq='A', start='1/1/2001', end='1/1/2001') + pi2 = period_range(freq='Q', start='1/1/2001', end='1/1/2001') + pi3 = period_range(freq='M', start='1/1/2001', end='1/1/2001') + pi4 = period_range(freq='D', start='1/1/2001', end='1/1/2001') + pi5 = period_range(freq='H', start='1/1/2001', end='1/1/2001 00:00') + pi6 = period_range(freq='Min', start='1/1/2001', end='1/1/2001 00:00') + pi7 = period_range(freq='S', start='1/1/2001', end='1/1/2001 00:00:00') assert pi1.asfreq('Q', 'S') == pi2 assert pi1.asfreq('Q', 's') == pi2 @@ -70,7 +70,7 @@ def test_asfreq(self): pytest.raises(ValueError, pi7.asfreq, 'T', 'foo') result1 = pi1.asfreq('3M') result2 = pi1.asfreq('M') - expected = PeriodIndex(freq='M', start='2001-12', end='2001-12') + expected = period_range(freq='M', start='2001-12', end='2001-12') tm.assert_numpy_array_equal(result1.asi8, expected.asi8) assert result1.freqstr == '3M' tm.assert_numpy_array_equal(result2.asi8, expected.asi8) @@ -126,7 +126,7 @@ def test_asfreq_combined_pi(self): assert result.freq == exp.freq def test_asfreq_ts(self): - index = PeriodIndex(freq='A', start='1/1/2001', end='12/31/2010') + index = period_range(freq='A', start='1/1/2001', end='12/31/2010') ts = Series(np.random.randn(len(index)), index=index) df = DataFrame(np.random.randn(len(index), 3), index=index) diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index 1ebc0ecb2fc02..e9786e8ba25ac 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -40,8 +40,12 @@ def test_construction_base_constructor(self): def test_constructor_use_start_freq(self): # GH #1118 p = Period('4/2/2012', freq='B') - index = PeriodIndex(start=p, periods=10) - expected = PeriodIndex(start='4/2/2012', periods=10, freq='B') + with tm.assert_produces_warning(FutureWarning): + index = PeriodIndex(start=p, periods=10) + expected = period_range(start='4/2/2012', periods=10, freq='B') + tm.assert_index_equal(index, expected) + + index = period_range(start=p, periods=10) tm.assert_index_equal(index, expected) def test_constructor_field_arrays(self): @@ -93,7 +97,7 @@ def test_constructor_arrays_negative_year(self): years = np.arange(1960, 2000, dtype=np.int64).repeat(4) quarters = np.tile(np.array([1, 2, 3, 4], dtype=np.int64), 40) - pindex = PeriodIndex(year=years, quarter=quarters) + pindex = period_range(year=years, quarter=quarters) tm.assert_index_equal(pindex.year, pd.Index(years)) tm.assert_index_equal(pindex.quarter, pd.Index(quarters)) @@ -320,19 +324,28 @@ def test_constructor_year_and_quarter(self): def test_constructor_freq_mult(self): # GH #7811 for func in [PeriodIndex, period_range]: - # must be the same, but for sure... - pidx = func(start='2014-01', freq='2M', periods=4) + + if func is PeriodIndex: + warning = FutureWarning + else: + warning = None + + with tm.assert_produces_warning(warning): + # must be the same, but for sure... + pidx = func(start='2014-01', freq='2M', periods=4) expected = PeriodIndex(['2014-01', '2014-03', '2014-05', '2014-07'], freq='2M') tm.assert_index_equal(pidx, expected) - pidx = func(start='2014-01-02', end='2014-01-15', freq='3D') + with tm.assert_produces_warning(warning): + pidx = func(start='2014-01-02', end='2014-01-15', freq='3D') expected = PeriodIndex(['2014-01-02', '2014-01-05', '2014-01-08', '2014-01-11', '2014-01-14'], freq='3D') tm.assert_index_equal(pidx, expected) - pidx = func(end='2014-01-01 17:00', freq='4H', periods=3) + with tm.assert_produces_warning(warning): + pidx = func(end='2014-01-01 17:00', freq='4H', periods=3) expected = PeriodIndex(['2014-01-01 09:00', '2014-01-01 13:00', '2014-01-01 17:00'], freq='4H') tm.assert_index_equal(pidx, expected) @@ -354,7 +367,7 @@ def test_constructor_freq_mult(self): @pytest.mark.parametrize('mult', [1, 2, 3, 4, 5]) def test_constructor_freq_mult_dti_compat(self, mult, freq): freqstr = str(mult) + freq - pidx = PeriodIndex(start='2014-04-01', freq=freqstr, periods=10) + pidx = period_range(start='2014-04-01', freq=freqstr, periods=10) expected = date_range(start='2014-04-01', freq=freqstr, periods=10).to_period(freqstr) tm.assert_index_equal(pidx, expected) @@ -364,63 +377,68 @@ def test_constructor_freq_combined(self): pidx = PeriodIndex(['2016-01-01', '2016-01-02'], freq=freq) expected = PeriodIndex(['2016-01-01 00:00', '2016-01-02 00:00'], freq='25H') - for freq, func in zip(['1D1H', '1H1D'], [PeriodIndex, period_range]): - pidx = func(start='2016-01-01', periods=2, freq=freq) + for freq in ['1D1H', '1H1D']: + pidx = period_range(start='2016-01-01', periods=2, freq=freq) expected = PeriodIndex(['2016-01-01 00:00', '2016-01-02 01:00'], freq='25H') tm.assert_index_equal(pidx, expected) + def test_constructor_range_based_deprecated(self): + with tm.assert_produces_warning(FutureWarning): + pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + assert len(pi) == 9 + def test_constructor(self): - pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + pi = period_range(freq='A', start='1/1/2001', end='12/1/2009') assert len(pi) == 9 - pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2009') + pi = period_range(freq='Q', start='1/1/2001', end='12/1/2009') assert len(pi) == 4 * 9 - pi = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') + pi = period_range(freq='M', start='1/1/2001', end='12/1/2009') assert len(pi) == 12 * 9 - pi = PeriodIndex(freq='D', start='1/1/2001', end='12/31/2009') + pi = period_range(freq='D', start='1/1/2001', end='12/31/2009') assert len(pi) == 365 * 9 + 2 - pi = PeriodIndex(freq='B', start='1/1/2001', end='12/31/2009') + pi = period_range(freq='B', start='1/1/2001', end='12/31/2009') assert len(pi) == 261 * 9 - pi = PeriodIndex(freq='H', start='1/1/2001', end='12/31/2001 23:00') + pi = period_range(freq='H', start='1/1/2001', end='12/31/2001 23:00') assert len(pi) == 365 * 24 - pi = PeriodIndex(freq='Min', start='1/1/2001', end='1/1/2001 23:59') + pi = period_range(freq='Min', start='1/1/2001', end='1/1/2001 23:59') assert len(pi) == 24 * 60 - pi = PeriodIndex(freq='S', start='1/1/2001', end='1/1/2001 23:59:59') + pi = period_range(freq='S', start='1/1/2001', end='1/1/2001 23:59:59') assert len(pi) == 24 * 60 * 60 start = Period('02-Apr-2005', 'B') - i1 = PeriodIndex(start=start, periods=20) + i1 = period_range(start=start, periods=20) assert len(i1) == 20 assert i1.freq == start.freq assert i1[0] == start end_intv = Period('2006-12-31', 'W') - i1 = PeriodIndex(end=end_intv, periods=10) + i1 = period_range(end=end_intv, periods=10) assert len(i1) == 10 assert i1.freq == end_intv.freq assert i1[-1] == end_intv end_intv = Period('2006-12-31', '1w') - i2 = PeriodIndex(end=end_intv, periods=10) + i2 = period_range(end=end_intv, periods=10) assert len(i1) == len(i2) assert (i1 == i2).all() assert i1.freq == i2.freq end_intv = Period('2006-12-31', ('w', 1)) - i2 = PeriodIndex(end=end_intv, periods=10) + i2 = period_range(end=end_intv, periods=10) assert len(i1) == len(i2) assert (i1 == i2).all() assert i1.freq == i2.freq end_intv = Period('2005-05-01', 'B') - i1 = PeriodIndex(start=start, end=end_intv) + i1 = period_range(start=start, end=end_intv) # infer freq from first element i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')]) @@ -453,7 +471,7 @@ def test_constructor_error(self): @pytest.mark.parametrize('freq', ['M', 'Q', 'A', 'D', 'B', 'T', 'S', 'L', 'U', 'N', 'H']) def test_recreate_from_data(self, freq): - org = PeriodIndex(start='2001/04/01', freq=freq, periods=1) + org = period_range(start='2001/04/01', freq=freq, periods=1) idx = PeriodIndex(org.values, freq=freq) tm.assert_index_equal(idx, org) diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 8e7d719dd4c84..47c2edfd13395 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -147,7 +147,8 @@ def test_getitem_seconds(self): # GH#6716 didx = pd.date_range(start='2013/01/01 09:00:00', freq='S', periods=4000) - pidx = PeriodIndex(start='2013/01/01 09:00:00', freq='S', periods=4000) + pidx = period_range(start='2013/01/01 09:00:00', freq='S', + periods=4000) for idx in [didx, pidx]: # getitem against index should raise ValueError @@ -171,7 +172,7 @@ def test_getitem_day(self): # GH#6716 # Confirm DatetimeIndex and PeriodIndex works identically didx = pd.date_range(start='2013/01/01', freq='D', periods=400) - pidx = PeriodIndex(start='2013/01/01', freq='D', periods=400) + pidx = period_range(start='2013/01/01', freq='D', periods=400) for idx in [didx, pidx]: # getitem against index should raise ValueError @@ -281,8 +282,8 @@ def test_take(self): assert result.freq == 'D' def test_take_misc(self): - index = PeriodIndex(start='1/1/10', end='12/31/12', freq='D', - name='idx') + index = period_range(start='1/1/10', end='12/31/12', freq='D', + name='idx') expected = PeriodIndex([datetime(2010, 1, 6), datetime(2010, 1, 7), datetime(2010, 1, 9), datetime(2010, 1, 13)], freq='D', name='idx') diff --git a/pandas/tests/indexes/period/test_partial_slicing.py b/pandas/tests/indexes/period/test_partial_slicing.py index 014a92a7aa39d..0a1e7225463be 100644 --- a/pandas/tests/indexes/period/test_partial_slicing.py +++ b/pandas/tests/indexes/period/test_partial_slicing.py @@ -2,7 +2,7 @@ import pytest import pandas as pd -from pandas import DataFrame, Period, PeriodIndex, Series, period_range +from pandas import DataFrame, Period, Series, period_range from pandas.util import testing as tm @@ -52,7 +52,7 @@ def test_slice_keep_name(self): assert idx.name == idx[1:].name def test_pindex_slice_index(self): - pi = PeriodIndex(start='1/1/10', end='12/31/12', freq='M') + pi = period_range(start='1/1/10', end='12/31/12', freq='M') s = Series(np.random.rand(len(pi)), index=pi) res = s['2010'] exp = s[0:12] @@ -64,7 +64,7 @@ def test_pindex_slice_index(self): def test_range_slice_day(self): # GH#6716 didx = pd.date_range(start='2013/01/01', freq='D', periods=400) - pidx = PeriodIndex(start='2013/01/01', freq='D', periods=400) + pidx = period_range(start='2013/01/01', freq='D', periods=400) for idx in [didx, pidx]: # slices against index should raise IndexError @@ -90,7 +90,8 @@ def test_range_slice_seconds(self): # GH#6716 didx = pd.date_range(start='2013/01/01 09:00:00', freq='S', periods=4000) - pidx = PeriodIndex(start='2013/01/01 09:00:00', freq='S', periods=4000) + pidx = period_range(start='2013/01/01 09:00:00', freq='S', + periods=4000) for idx in [didx, pidx]: # slices against index should raise IndexError @@ -114,7 +115,7 @@ def test_range_slice_seconds(self): def test_range_slice_outofbounds(self): # GH#5407 didx = pd.date_range(start='2013/10/01', freq='D', periods=10) - pidx = PeriodIndex(start='2013/10/01', freq='D', periods=10) + pidx = period_range(start='2013/10/01', freq='D', periods=10) for idx in [didx, pidx]: df = DataFrame(dict(units=[100 + i for i in range(10)]), index=idx) diff --git a/pandas/tests/indexes/period/test_scalar_compat.py b/pandas/tests/indexes/period/test_scalar_compat.py index 01fb1e3dc7e02..b140a1f3c5b8b 100644 --- a/pandas/tests/indexes/period/test_scalar_compat.py +++ b/pandas/tests/indexes/period/test_scalar_compat.py @@ -1,18 +1,18 @@ # -*- coding: utf-8 -*- """Tests for PeriodIndex behaving like a vectorized Period scalar""" -from pandas import PeriodIndex, Timedelta, date_range +from pandas import Timedelta, date_range, period_range import pandas.util.testing as tm class TestPeriodIndexOps(object): def test_start_time(self): - index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31') + index = period_range(freq='M', start='2016-01-01', end='2016-05-31') expected_index = date_range('2016-01-01', end='2016-05-31', freq='MS') tm.assert_index_equal(index.start_time, expected_index) def test_end_time(self): - index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31') + index = period_range(freq='M', start='2016-01-01', end='2016-05-31') expected_index = date_range('2016-01-01', end='2016-05-31', freq='M') expected_index += Timedelta(1, 'D') - Timedelta(1, 'ns') tm.assert_index_equal(index.end_time, expected_index) diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py index 15dd4cd520cf8..2eb1bda1d5126 100644 --- a/pandas/tests/indexes/period/test_tools.py +++ b/pandas/tests/indexes/period/test_tools.py @@ -20,7 +20,7 @@ class TestPeriodRepresentation(object): """ def _check_freq(self, freq, base_date): - rng = PeriodIndex(start=base_date, periods=10, freq=freq) + rng = period_range(start=base_date, periods=10, freq=freq) exp = np.arange(10, dtype=np.int64) tm.assert_numpy_array_equal(rng.asi8, exp) @@ -56,7 +56,7 @@ def test_negone_ordinals(self): class TestPeriodIndex(object): def test_to_timestamp(self): - index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + index = period_range(freq='A', start='1/1/2001', end='12/1/2009') series = Series(1, index=index, name='foo') exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC') @@ -91,7 +91,7 @@ def _get_with_delta(delta, freq='A-DEC'): exp_index = exp_index + Timedelta(1, 's') - Timedelta(1, 'ns') tm.assert_index_equal(result.index, exp_index) - index = PeriodIndex(freq='H', start='1/1/2001', end='1/2/2001') + index = period_range(freq='H', start='1/1/2001', end='1/2/2001') series = Series(1, index=index, name='foo') exp_index = date_range('1/1/2001 00:59:59', end='1/2/2001 00:59:59', @@ -195,8 +195,8 @@ def test_period_dt64_round_trip(self): def test_combine_first(self): # GH#3367 didx = pd.date_range(start='1950-01-31', end='1950-07-31', freq='M') - pidx = pd.PeriodIndex(start=pd.Period('1950-1'), - end=pd.Period('1950-7'), freq='M') + pidx = pd.period_range(start=pd.Period('1950-1'), + end=pd.Period('1950-7'), freq='M') # check to be consistent with DatetimeIndex for idx in [didx, pidx]: a = pd.Series([1, np.nan, np.nan, 4, 5, np.nan, 7], index=idx) @@ -229,7 +229,7 @@ def test_searchsorted(self, freq): class TestPeriodIndexConversion(object): def test_tolist(self): - index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + index = period_range(freq='A', start='1/1/2001', end='12/1/2009') rs = index.tolist() for x in rs: assert isinstance(x, Period) @@ -264,8 +264,8 @@ def test_to_timestamp_pi_nat(self): result.to_period(freq='-2A') def test_to_timestamp_preserve_name(self): - index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009', - name='foo') + index = period_range(freq='A', start='1/1/2001', end='12/1/2009', + name='foo') assert index.name == 'foo' conv = index.to_timestamp('D') diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 8a408f5613a01..a140a918ac3dd 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -33,8 +33,7 @@ import pandas as pd from pandas import ( Categorical, CategoricalIndex, DataFrame, DatetimeIndex, Index, - IntervalIndex, MultiIndex, Panel, PeriodIndex, RangeIndex, Series, - bdate_range) + IntervalIndex, MultiIndex, Panel, RangeIndex, Series, bdate_range) from pandas.core.algorithms import take_1d from pandas.core.arrays import ( DatetimeArrayMixin as DatetimeArray, ExtensionArray, IntervalArray, @@ -1962,7 +1961,7 @@ def makeTimedeltaIndex(k=10, freq='D', name=None, **kwargs): def makePeriodIndex(k=10, name=None, **kwargs): dt = datetime(2000, 1, 1) - dr = PeriodIndex(start=dt, periods=k, freq='B', name=name, **kwargs) + dr = pd.period_range(start=dt, periods=k, freq='B', name=name, **kwargs) return dr From 5473bf1d43acbb7dd33a56d083e8530a71f2d3c7 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 19 Dec 2018 11:57:46 -0600 Subject: [PATCH 2/8] Try to match behavior --- pandas/core/indexes/period.py | 31 +++++++++++++++---- .../tests/indexes/period/test_construction.py | 7 +++++ 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 7d9faf2236162..bae1999a9eb3e 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -194,15 +194,31 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, if data is None and ordinal is None: # range-based. - data, freq = PeriodArray._generate_range(start, end, periods, - freq, fields) + data, freq2 = PeriodArray._generate_range(start, end, periods, + freq, fields) # PeriodArray._generate range does validate that fields is # empty when really using the range-based constructor. if not fields: - warnings.warn("Creating a PeriodIndex by passing range " - "endpoints is deprecated. Use " - "`pandas.period_range` instead.", - FutureWarning, stacklevel=2) + msg = ("Creating a PeriodIndex by passing range " + "endpoints is deprecated. Use " + "`pandas.period_range` instead.") + # period_range differs from PeriodIndex for cases like + # start="2000", periods=4 + # PeriodIndex interprets that as A-DEC freq. + # period_range interprets it as 'D' freq. + cond = ( + freq is None and ( + (start and not isinstance(start, Period)) or + (end and not isinstance(end, Period)) + ) + ) + if cond: + msg += ( + " Note that the default `freq` may differ. Pass " + "'freq=\"{}\"' to ensure the same output." + ).format(freq2.freqstr) + warnings.warn(msg, FutureWarning, stacklevel=2) + freq = freq2 data = PeriodArray(data, freq=freq) else: @@ -1058,6 +1074,9 @@ def period_range(start=None, end=None, periods=None, freq=None, name=None): if com.count_not_none(start, end, periods) != 2: raise ValueError('Of the three parameters: start, end, and periods, ' 'exactly two must be specified') + if freq is None and (not isinstance(start, Period) + and not isinstance(end, Period)): + freq = 'D' data, freq = PeriodArray._generate_range(start, end, periods, freq, fields={}) diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index e9786e8ba25ac..09b0b2e6b61ff 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -388,6 +388,13 @@ def test_constructor_range_based_deprecated(self): pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') assert len(pi) == 9 + def test_constructor_range_based_deprecated_different_freq(self): + with tm.assert_produces_warning(FutureWarning) as m: + PeriodIndex(start='2000', periods=2) + + warning, = m + assert 'freq="A-DEC"' in str(warning.message) + def test_constructor(self): pi = period_range(freq='A', start='1/1/2001', end='12/1/2009') assert len(pi) == 9 From 6b8bc99705ba3734ce7b9c0f25fc9019908f4af3 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 19 Dec 2018 12:50:13 -0600 Subject: [PATCH 3/8] revert one --- pandas/tests/indexes/period/test_construction.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index 09b0b2e6b61ff..767ba130a23ad 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -97,7 +97,7 @@ def test_constructor_arrays_negative_year(self): years = np.arange(1960, 2000, dtype=np.int64).repeat(4) quarters = np.tile(np.array([1, 2, 3, 4], dtype=np.int64), 40) - pindex = period_range(year=years, quarter=quarters) + pindex = PeriodIndex(year=years, quarter=quarters) tm.assert_index_equal(pindex.year, pd.Index(years)) tm.assert_index_equal(pindex.quarter, pd.Index(quarters)) From b636edf94edd1632b40fb64d61b17bb598d277a9 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 19 Dec 2018 13:34:38 -0600 Subject: [PATCH 4/8] some more --- asv_bench/benchmarks/period.py | 6 +-- doc/source/whatsnew/v0.21.0.rst | 4 +- pandas/core/resample.py | 12 ++--- pandas/plotting/_converter.py | 4 +- pandas/tests/frame/test_join.py | 4 +- pandas/tests/frame/test_period.py | 2 +- pandas/tests/indexes/period/test_period.py | 55 +++++++++++----------- pandas/tests/indexes/period/test_tools.py | 2 +- pandas/tests/indexes/test_base.py | 2 +- pandas/tests/resample/test_period_index.py | 6 +-- pandas/tests/tseries/test_frequencies.py | 3 +- 11 files changed, 50 insertions(+), 50 deletions(-) diff --git a/asv_bench/benchmarks/period.py b/asv_bench/benchmarks/period.py index 1af1ba1fb7b0b..8f341c8b415fe 100644 --- a/asv_bench/benchmarks/period.py +++ b/asv_bench/benchmarks/period.py @@ -1,5 +1,5 @@ -from pandas import (DataFrame, Series, Period, PeriodIndex, date_range, - period_range) +from pandas import ( + DataFrame, Period, PeriodIndex, Series, date_range, period_range) class PeriodProperties(object): @@ -94,7 +94,7 @@ def time_value_counts(self, typ): class Indexing(object): def setup(self): - self.index = PeriodIndex(start='1985', periods=1000, freq='D') + self.index = period_range(start='1985', periods=1000, freq='D') self.series = Series(range(1000), index=self.index) self.period = self.index[500] diff --git a/doc/source/whatsnew/v0.21.0.rst b/doc/source/whatsnew/v0.21.0.rst index 47cd17efe3f75..c62a2ee6ac92d 100644 --- a/doc/source/whatsnew/v0.21.0.rst +++ b/doc/source/whatsnew/v0.21.0.rst @@ -654,7 +654,7 @@ Previous Behavior: .. code-block:: ipython - In [1]: pi = pd.PeriodIndex(start='2000-01-01', freq='D', periods=10) + In [1]: pi = pd.period_range(start='2000-01-01', freq='D', periods=10) In [2]: s = pd.Series(np.arange(10), index=pi) @@ -674,7 +674,7 @@ New Behavior: .. ipython:: python - pi = pd.PeriodIndex(start='2000-01-01', freq='D', periods=10) + pi = pd.period_range(start='2000-01-01', freq='D', periods=10) s = pd.Series(np.arange(10), index=pi) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 3a5409ff15d1f..d36a3fd6f61bb 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1491,10 +1491,10 @@ def _get_time_period_bins(self, ax): binner = labels = PeriodIndex(data=[], freq=freq, name=ax.name) return binner, [], labels - labels = binner = PeriodIndex(start=ax[0], - end=ax[-1], - freq=freq, - name=ax.name) + labels = binner = pd.period_range(start=ax[0], + end=ax[-1], + freq=freq, + name=ax.name) end_stamps = (labels + freq).asfreq(freq, 's').to_timestamp() if ax.tzinfo: @@ -1543,8 +1543,8 @@ def _get_period_bins(self, ax): bin_shift = start_offset.n % freq_mult start = p_start - labels = binner = PeriodIndex(start=start, end=end, - freq=self.freq, name=ax.name) + labels = binner = pd.period_range(start=start, end=end, + freq=self.freq, name=ax.name) i8 = memb.asi8 diff --git a/pandas/plotting/_converter.py b/pandas/plotting/_converter.py index 3a3ebe7c56f67..8cab00fba3aa8 100644 --- a/pandas/plotting/_converter.py +++ b/pandas/plotting/_converter.py @@ -22,7 +22,7 @@ import pandas.core.common as com from pandas.core.index import Index from pandas.core.indexes.datetimes import date_range -from pandas.core.indexes.period import Period, PeriodIndex +from pandas.core.indexes.period import Period, PeriodIndex, period_range import pandas.core.tools.datetimes as tools import pandas.tseries.frequencies as frequencies @@ -630,7 +630,7 @@ def _daily_finder(vmin, vmax, freq): (vmin, vmax) = (Period(ordinal=int(vmin), freq=freq), Period(ordinal=int(vmax), freq=freq)) span = vmax.ordinal - vmin.ordinal + 1 - dates_ = PeriodIndex(start=vmin, end=vmax, freq=freq) + dates_ = period_range(start=vmin, end=vmax, freq=freq) # Initialize the output info = np.zeros(span, dtype=[('val', np.int64), ('maj', bool), diff --git a/pandas/tests/frame/test_join.py b/pandas/tests/frame/test_join.py index 1c7f3ed834289..f33e05fd910fc 100644 --- a/pandas/tests/frame/test_join.py +++ b/pandas/tests/frame/test_join.py @@ -3,7 +3,7 @@ import pytest import numpy as np -from pandas import DataFrame, Index, PeriodIndex +from pandas import DataFrame, Index, period_range from pandas.tests.frame.common import TestData import pandas.util.testing as tm @@ -13,7 +13,7 @@ def frame_with_period_index(): return DataFrame( data=np.arange(20).reshape(4, 5), columns=list('abcde'), - index=PeriodIndex(start='2000', freq='A', periods=4)) + index=period_range(start='2000', freq='A', periods=4)) @pytest.fixture diff --git a/pandas/tests/frame/test_period.py b/pandas/tests/frame/test_period.py index 2a8add1a5de92..231b643a867ad 100644 --- a/pandas/tests/frame/test_period.py +++ b/pandas/tests/frame/test_period.py @@ -44,7 +44,7 @@ def test_frame_setitem(self): def test_frame_to_time_stamp(self): K = 5 - index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + index = period_range(freq='A', start='1/1/2001', end='12/1/2009') df = DataFrame(randn(len(index), K), index=index) df['mix'] = 'a' diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 5d78333016f74..f4ecf893438e0 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -98,7 +98,7 @@ def test_hash_error(self): hash(index) def test_make_time_series(self): - index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + index = period_range(freq='A', start='1/1/2001', end='12/1/2009') series = Series(1, index=index) assert isinstance(series, Series) @@ -165,50 +165,50 @@ def test_values(self): tm.assert_numpy_array_equal(idx._ndarray_values, exp) def test_period_index_length(self): - pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') + pi = period_range(freq='A', start='1/1/2001', end='12/1/2009') assert len(pi) == 9 - pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2009') + pi = period_range(freq='Q', start='1/1/2001', end='12/1/2009') assert len(pi) == 4 * 9 - pi = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') + pi = period_range(freq='M', start='1/1/2001', end='12/1/2009') assert len(pi) == 12 * 9 start = Period('02-Apr-2005', 'B') - i1 = PeriodIndex(start=start, periods=20) + i1 = period_range(start=start, periods=20) assert len(i1) == 20 assert i1.freq == start.freq assert i1[0] == start end_intv = Period('2006-12-31', 'W') - i1 = PeriodIndex(end=end_intv, periods=10) + i1 = period_range(end=end_intv, periods=10) assert len(i1) == 10 assert i1.freq == end_intv.freq assert i1[-1] == end_intv end_intv = Period('2006-12-31', '1w') - i2 = PeriodIndex(end=end_intv, periods=10) + i2 = period_range(end=end_intv, periods=10) assert len(i1) == len(i2) assert (i1 == i2).all() assert i1.freq == i2.freq end_intv = Period('2006-12-31', ('w', 1)) - i2 = PeriodIndex(end=end_intv, periods=10) + i2 = period_range(end=end_intv, periods=10) assert len(i1) == len(i2) assert (i1 == i2).all() assert i1.freq == i2.freq try: - PeriodIndex(start=start, end=end_intv) + period_range(start=start, end=end_intv) raise AssertionError('Cannot allow mixed freq for start and end') except ValueError: pass end_intv = Period('2005-05-01', 'B') - i1 = PeriodIndex(start=start, end=end_intv) + i1 = period_range(start=start, end=end_intv) try: - PeriodIndex(start=start) + period_range(start=start) raise AssertionError( 'Must specify periods if missing start or end') except ValueError: @@ -233,33 +233,33 @@ def test_fields(self): # year, month, day, hour, minute # second, weekofyear, week, dayofweek, weekday, dayofyear, quarter # qyear - pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2005') + pi = period_range(freq='A', start='1/1/2001', end='12/1/2005') self._check_all_fields(pi) - pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2002') + pi = period_range(freq='Q', start='1/1/2001', end='12/1/2002') self._check_all_fields(pi) - pi = PeriodIndex(freq='M', start='1/1/2001', end='1/1/2002') + pi = period_range(freq='M', start='1/1/2001', end='1/1/2002') self._check_all_fields(pi) - pi = PeriodIndex(freq='D', start='12/1/2001', end='6/1/2001') + pi = period_range(freq='D', start='12/1/2001', end='6/1/2001') self._check_all_fields(pi) - pi = PeriodIndex(freq='B', start='12/1/2001', end='6/1/2001') + pi = period_range(freq='B', start='12/1/2001', end='6/1/2001') self._check_all_fields(pi) - pi = PeriodIndex(freq='H', start='12/31/2001', end='1/1/2002 23:00') + pi = period_range(freq='H', start='12/31/2001', end='1/1/2002 23:00') self._check_all_fields(pi) - pi = PeriodIndex(freq='Min', start='12/31/2001', end='1/1/2002 00:20') + pi = period_range(freq='Min', start='12/31/2001', end='1/1/2002 00:20') self._check_all_fields(pi) - pi = PeriodIndex(freq='S', start='12/31/2001 00:00:00', - end='12/31/2001 00:05:00') + pi = period_range(freq='S', start='12/31/2001 00:00:00', + end='12/31/2001 00:05:00') self._check_all_fields(pi) end_intv = Period('2006-12-31', 'W') - i1 = PeriodIndex(end=end_intv, periods=10) + i1 = period_range(end=end_intv, periods=10) self._check_all_fields(i1) def _check_all_fields(self, periodindex): @@ -325,8 +325,8 @@ def test_factorize(self): tm.assert_index_equal(idx, exp_idx) def test_is_(self): - create_index = lambda: PeriodIndex(freq='A', start='1/1/2001', - end='12/1/2009') + create_index = lambda: period_range(freq='A', start='1/1/2001', + end='12/1/2009') index = create_index() assert index.is_(index) assert not index.is_(create_index()) @@ -371,13 +371,13 @@ def test_periods_number_check(self): def test_start_time(self): # GH 17157 - index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31') + index = period_range(freq='M', start='2016-01-01', end='2016-05-31') expected_index = date_range('2016-01-01', end='2016-05-31', freq='MS') tm.assert_index_equal(index.start_time, expected_index) def test_end_time(self): # GH 17157 - index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31') + index = period_range(freq='M', start='2016-01-01', end='2016-05-31') expected_index = date_range('2016-01-01', end='2016-05-31', freq='M') expected_index = expected_index.shift(1, freq='D').shift(-1, freq='ns') tm.assert_index_equal(index.end_time, expected_index) @@ -457,7 +457,8 @@ def test_numpy_repeat(self): np.repeat(index, 2, axis=1) def test_pindex_multiples(self): - pi = PeriodIndex(start='1/1/11', end='12/31/11', freq='2M') + with tm.assert_produces_warning(FutureWarning): + pi = PeriodIndex(start='1/1/11', end='12/31/11', freq='2M') expected = PeriodIndex(['2011-01', '2011-03', '2011-05', '2011-07', '2011-09', '2011-11'], freq='2M') tm.assert_index_equal(pi, expected) @@ -475,7 +476,7 @@ def test_pindex_multiples(self): assert pi.freqstr == '2M' def test_iteration(self): - index = PeriodIndex(start='1/1/10', periods=4, freq='B') + index = period_range(start='1/1/10', periods=4, freq='B') result = list(index) assert isinstance(result[0], Period) diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py index 2eb1bda1d5126..641400ebec925 100644 --- a/pandas/tests/indexes/period/test_tools.py +++ b/pandas/tests/indexes/period/test_tools.py @@ -297,7 +297,7 @@ def test_to_timestamp_pi_mult(self): tm.assert_index_equal(result, expected) def test_to_timestamp_pi_combined(self): - idx = PeriodIndex(start='2011', periods=2, freq='1D1H', name='idx') + idx = period_range(start='2011', periods=2, freq='1D1H', name='idx') result = idx.to_timestamp() expected = DatetimeIndex(['2011-01-01 00:00', '2011-01-02 01:00'], diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 2580a47e8fdd3..8d6207eac39b7 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1794,7 +1794,7 @@ def test_str_attribute(self, method): @pytest.mark.parametrize("index", [ Index(range(5)), tm.makeDateIndex(10), MultiIndex.from_tuples([('foo', '1'), ('bar', '3')]), - PeriodIndex(start='2000', end='2010', freq='A')]) + period_range(start='2000', end='2010', freq='A')]) def test_str_attribute_raises(self, index): with pytest.raises(AttributeError, match='only use .str accessor'): index.str.repeat(2) diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index 0b393437a3072..60cf613a5f2c6 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -660,14 +660,14 @@ def test_loffset_returns_datetimeindex(self, frame, kind, agg_arg): @pytest.mark.parametrize('kind', [None, 'period']) def test_upsampling_ohlc(self, freq, period_mult, kind): # GH 13083 - pi = PeriodIndex(start='2000', freq='D', periods=10) + pi = period_range(start='2000', freq='D', periods=10) s = Series(range(len(pi)), index=pi) expected = s.to_timestamp().resample(freq).ohlc().to_period(freq) # timestamp-based resampling doesn't include all sub-periods # of the last original period, so extend accordingly: - new_index = PeriodIndex(start='2000', freq=freq, - periods=period_mult * len(pi)) + new_index = period_range(start='2000', freq=freq, + periods=period_mult * len(pi)) expected = expected.reindex(new_index) result = s.resample(freq, kind=kind).ohlc() assert_frame_equal(result, expected) diff --git a/pandas/tests/tseries/test_frequencies.py b/pandas/tests/tseries/test_frequencies.py index d2ca70795be80..c0a3e998dc2e0 100644 --- a/pandas/tests/tseries/test_frequencies.py +++ b/pandas/tests/tseries/test_frequencies.py @@ -13,7 +13,6 @@ from pandas import ( DatetimeIndex, Index, Series, Timedelta, Timestamp, date_range, period_range) -from pandas.core.indexes.period import PeriodIndex from pandas.core.tools.datetimes import to_datetime import pandas.util.testing as tm @@ -468,7 +467,7 @@ def test_frequency_misc(self): class TestFrequencyInference(object): def test_raise_if_period_index(self): - index = PeriodIndex(start="1/1/1990", periods=20, freq="M") + index = period_range(start="1/1/1990", periods=20, freq="M") pytest.raises(TypeError, frequencies.infer_freq, index) def test_raise_if_too_few(self): From f566b46390e8f87f0819801aeb9be52047f10e8f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 19 Dec 2018 16:39:04 -0600 Subject: [PATCH 5/8] Fixed warnings in asv files --- asv_bench/benchmarks/join_merge.py | 2 +- asv_bench/benchmarks/panel_ctor.py | 12 ++++++------ asv_bench/benchmarks/reindex.py | 4 ++-- asv_bench/benchmarks/timedelta.py | 9 +++++---- asv_bench/benchmarks/timestamp.py | 7 ++++--- 5 files changed, 18 insertions(+), 16 deletions(-) diff --git a/asv_bench/benchmarks/join_merge.py b/asv_bench/benchmarks/join_merge.py index 88a59fea375ea..a1cdb00260fc4 100644 --- a/asv_bench/benchmarks/join_merge.py +++ b/asv_bench/benchmarks/join_merge.py @@ -50,7 +50,7 @@ def setup(self, axis): self.empty_right = [df, DataFrame()] def time_concat_series(self, axis): - concat(self.series, axis=axis) + concat(self.series, axis=axis, sort=False) def time_concat_small_frames(self, axis): concat(self.small_frames, axis=axis) diff --git a/asv_bench/benchmarks/panel_ctor.py b/asv_bench/benchmarks/panel_ctor.py index 47b3ad612f9b1..627705284481b 100644 --- a/asv_bench/benchmarks/panel_ctor.py +++ b/asv_bench/benchmarks/panel_ctor.py @@ -1,7 +1,7 @@ import warnings from datetime import datetime, timedelta -from pandas import DataFrame, Panel, DatetimeIndex, date_range +from pandas import DataFrame, Panel, date_range class DifferentIndexes(object): @@ -23,9 +23,9 @@ def time_from_dict(self): class SameIndexes(object): def setup(self): - idx = DatetimeIndex(start=datetime(1990, 1, 1), - end=datetime(2012, 1, 1), - freq='D') + idx = date_range(start=datetime(1990, 1, 1), + end=datetime(2012, 1, 1), + freq='D') df = DataFrame({'a': 0, 'b': 1, 'c': 2}, index=idx) self.data_frames = dict(enumerate([df] * 100)) @@ -40,10 +40,10 @@ def setup(self): start = datetime(1990, 1, 1) end = datetime(2012, 1, 1) df1 = DataFrame({'a': 0, 'b': 1, 'c': 2}, - index=DatetimeIndex(start=start, end=end, freq='D')) + index=date_range(start=start, end=end, freq='D')) end += timedelta(days=1) df2 = DataFrame({'a': 0, 'b': 1, 'c': 2}, - index=DatetimeIndex(start=start, end=end, freq='D')) + index=date_range(start=start, end=end, freq='D')) dfs = [df1] * 50 + [df2] * 50 self.data_frames = dict(enumerate(dfs)) diff --git a/asv_bench/benchmarks/reindex.py b/asv_bench/benchmarks/reindex.py index 576dc495eb984..fb47fa81d8dfd 100644 --- a/asv_bench/benchmarks/reindex.py +++ b/asv_bench/benchmarks/reindex.py @@ -1,6 +1,6 @@ import numpy as np import pandas.util.testing as tm -from pandas import (DataFrame, Series, DatetimeIndex, MultiIndex, Index, +from pandas import (DataFrame, Series, MultiIndex, Index, date_range) from .pandas_vb_common import lib @@ -8,7 +8,7 @@ class Reindex(object): def setup(self): - rng = DatetimeIndex(start='1/1/1970', periods=10000, freq='1min') + rng = date_range(start='1/1/1970', periods=10000, freq='1min') self.df = DataFrame(np.random.rand(10000, 10), index=rng, columns=range(10)) self.df['foo'] = 'bar' diff --git a/asv_bench/benchmarks/timedelta.py b/asv_bench/benchmarks/timedelta.py index 7ee73fb7ac7b6..0cfbbd536bc8b 100644 --- a/asv_bench/benchmarks/timedelta.py +++ b/asv_bench/benchmarks/timedelta.py @@ -1,8 +1,9 @@ import datetime import numpy as np -from pandas import Series, timedelta_range, to_timedelta, Timestamp, \ - Timedelta, TimedeltaIndex, DataFrame + +from pandas import ( + DataFrame, Series, Timedelta, Timestamp, timedelta_range, to_timedelta) class TimedeltaConstructor(object): @@ -122,8 +123,8 @@ def time_timedelta_nanoseconds(self, series): class TimedeltaIndexing(object): def setup(self): - self.index = TimedeltaIndex(start='1985', periods=1000, freq='D') - self.index2 = TimedeltaIndex(start='1986', periods=1000, freq='D') + self.index = timedelta_range(start='1985', periods=1000, freq='D') + self.index2 = timedelta_range(start='1986', periods=1000, freq='D') self.series = Series(range(1000), index=self.index) self.timedelta = self.index[500] diff --git a/asv_bench/benchmarks/timestamp.py b/asv_bench/benchmarks/timestamp.py index 64f46fe378e53..4c1d6e8533408 100644 --- a/asv_bench/benchmarks/timestamp.py +++ b/asv_bench/benchmarks/timestamp.py @@ -1,8 +1,9 @@ import datetime -from pandas import Timestamp -import pytz import dateutil +import pytz + +from pandas import Timestamp class TimestampConstruction(object): @@ -46,7 +47,7 @@ def time_dayofweek(self, tz, freq): self.ts.dayofweek def time_weekday_name(self, tz, freq): - self.ts.weekday_name + self.ts.day_name def time_dayofyear(self, tz, freq): self.ts.dayofyear From eb219acf481d5c0a9f4d7fe745a886d81e57ccc4 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 19 Dec 2018 21:21:12 -0600 Subject: [PATCH 6/8] avoid series constructor --- pandas/core/reshape/reshape.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 8319a8cc5417c..9a9693bfd4c9e 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -853,6 +853,7 @@ def check_len(item, name): def _get_dummies_1d(data, prefix, prefix_sep='_', dummy_na=False, sparse=False, drop_first=False, dtype=None): + from pandas.core.reshape.concat import concat # Series avoids inconsistent NaN handling codes, levels = _factorize_from_iterable(Series(data)) @@ -909,7 +910,7 @@ def _make_col_name(prefix, prefix_sep, level): index = None if sparse: - sparse_series = {} + sparse_series = [] N = len(data) sp_indices = [[] for _ in range(len(dummy_cols))] mask = codes != -1 @@ -928,10 +929,9 @@ def _make_col_name(prefix, prefix_sep, level): sarr = SparseArray(np.ones(len(ixs), dtype=dtype), sparse_index=IntIndex(N, ixs), fill_value=0, dtype=dtype) - sparse_series[col] = Series(data=sarr, index=index) + sparse_series.append(Series(data=sarr, index=index, name=col)) - out = DataFrame(sparse_series, index=index, columns=dummy_cols, - dtype=dtype) + out = concat(sparse_series, axis=1, copy=False) return out else: From 2173f1a44a0d5e292062dfd02dfb572171f2a95e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 20 Dec 2018 12:07:54 -0600 Subject: [PATCH 7/8] Squashed commit of the following: commit 6a65cbc6c3b640bf1ec489d587d65c772160093b Author: Tom Augspurger Date: Thu Dec 20 07:21:34 2018 -0600 SparseSeries unstack commit b253674bbdf2771221af82b5637fee63efa7f8d0 Author: Tom Augspurger Date: Thu Dec 20 06:40:44 2018 -0600 BUG: Fix concat(Series[sparse], axis=1) * Preserve sparsity * Preserve fill value commit ae026b227b56f1ad73fcccc6a20b62a7cc069c13 Author: Tom Augspurger Date: Wed Dec 19 21:21:12 2018 -0600 avoid series constructor (cherry picked from commit eb219acf481d5c0a9f4d7fe745a886d81e57ccc4) commit aa08a6de4d32d2338a6850118fb6764e6deab1e1 Author: Tom Augspurger Date: Wed Dec 19 16:39:04 2018 -0600 Fixed warnings in asv files (cherry picked from commit f566b46390e8f87f0819801aeb9be52047f10e8f) --- doc/source/whatsnew/v0.24.0.rst | 4 +++- pandas/core/dtypes/concat.py | 8 ++++---- pandas/core/reshape/reshape.py | 15 ++++++++++++--- pandas/tests/sparse/test_reshape.py | 6 +++++- 4 files changed, 24 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index b285427b2398d..ce77b1e4210d8 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -647,7 +647,7 @@ changes were made: * The ``out`` and ``mode`` parameters are now longer accepted (previously, this raised if they were specified). * Passing a scalar for ``indices`` is no longer allowed. -- The result of concatenating a mix of sparse and dense Series is a Series with sparse values, rather than a ``SparseSeries``. +- The result of :func:`concat` with a mix of sparse and dense Series is a Series with sparse values, rather than a ``SparseSeries``. - ``SparseDataFrame.combine`` and ``DataFrame.combine_first`` no longer supports combining a sparse column with a dense column while preserving the sparse subtype. The result will be an object-dtype SparseArray. - Setting :attr:`SparseArray.fill_value` to a fill value with a different dtype is now allowed. - ``DataFrame[column]`` is now a :class:`Series` with sparse values, rather than a :class:`SparseSeries`, when slicing a single column with sparse values (:issue:`23559`). @@ -1104,6 +1104,7 @@ Other API Changes - The order of the arguments of :func:`DataFrame.to_html` and :func:`DataFrame.to_string` is rearranged to be consistent with each other. (:issue:`23614`) - :meth:`CategoricalIndex.reindex` now raises a ``ValueError`` if the target index is non-unique and not equal to the current index. It previously only raised if the target index was not of a categorical dtype (:issue:`23963`). - :func:`Series.to_list` and :func:`Index.to_list` are now aliases of ``Series.tolist`` respectively ``Index.tolist`` (:issue:`8826`) +- The result of ``SparseSeries.unstack`` is now a :class:`DataFrame` with sparse values, rather than a :class:`SparseDataFrame` (issue:`24372`). .. _whatsnew_0240.deprecations: @@ -1616,6 +1617,7 @@ Sparse - Bug in :meth:`SparseArary.unique` not returning the unique values (:issue:`19595`) - Bug in :meth:`SparseArray.nonzero` and :meth:`SparseDataFrame.dropna` returning shifted/incorrect results (:issue:`21172`) - Bug in :meth:`DataFrame.apply` where dtypes would lose sparseness (:issue:`23744`) +- Bug in :func:`concat` when concatenating a list of :class:`Series` with all-sparse values changing the ``fill_value`` and converting to a dense Series (:issue:`24371`) Style ^^^^^ diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 0df0c01dbd47a..a90cfa4e4c906 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -66,19 +66,19 @@ def _get_series_result_type(result, objs=None): return appropriate class of Series concat input is either dict or array-like """ + from pandas import SparseSeries, SparseDataFrame, DataFrame + # concat Series with axis 1 if isinstance(result, dict): # concat Series with axis 1 - if all(is_sparse(c) for c in compat.itervalues(result)): - from pandas.core.sparse.api import SparseDataFrame + if all(isinstance(c, (SparseSeries, SparseDataFrame)) + for c in compat.itervalues(result)): return SparseDataFrame else: - from pandas.core.frame import DataFrame return DataFrame # otherwise it is a SingleBlockManager (axis = 0) if result._block.is_sparse: - from pandas.core.sparse.api import SparseSeries return SparseSeries else: return objs[0]._constructor diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 9a9693bfd4c9e..713a4b19c1fd5 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -11,8 +11,8 @@ from pandas.core.dtypes.cast import maybe_promote from pandas.core.dtypes.common import ( - ensure_platform_int, is_bool_dtype, is_extension_array_dtype, is_list_like, - is_object_dtype, needs_i8_conversion) + ensure_platform_int, is_bool_dtype, is_extension_array_dtype, + is_integer_dtype, is_list_like, is_object_dtype, needs_i8_conversion) from pandas.core.dtypes.missing import notna from pandas import compat @@ -910,6 +910,14 @@ def _make_col_name(prefix, prefix_sep, level): index = None if sparse: + + if is_integer_dtype(dtype): + fill_value = 0 + elif dtype == bool: + fill_value = False + else: + fill_value = 0.0 + sparse_series = [] N = len(data) sp_indices = [[] for _ in range(len(dummy_cols))] @@ -927,7 +935,8 @@ def _make_col_name(prefix, prefix_sep, level): dummy_cols = dummy_cols[1:] for col, ixs in zip(dummy_cols, sp_indices): sarr = SparseArray(np.ones(len(ixs), dtype=dtype), - sparse_index=IntIndex(N, ixs), fill_value=0, + sparse_index=IntIndex(N, ixs), + fill_value=fill_value, dtype=dtype) sparse_series.append(Series(data=sarr, index=index, name=col)) diff --git a/pandas/tests/sparse/test_reshape.py b/pandas/tests/sparse/test_reshape.py index b492c47375bcf..d4ba672607982 100644 --- a/pandas/tests/sparse/test_reshape.py +++ b/pandas/tests/sparse/test_reshape.py @@ -35,4 +35,8 @@ def test_sparse_frame_unstack(sparse_df): def test_sparse_series_unstack(sparse_df, multi_index3): frame = pd.SparseSeries(np.ones(3), index=multi_index3).unstack() - tm.assert_sp_frame_equal(frame, sparse_df) + + arr = np.array([1, np.nan, np.nan]) + arrays = {i: pd.SparseArray(np.roll(arr, i)) for i in range(3)} + expected = pd.DataFrame(arrays) + tm.assert_frame_equal(frame, expected) From 4d0fce26a87f7c6d74b77743fbcf96dd9c3cc338 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 28 Dec 2018 07:35:10 -0600 Subject: [PATCH 8/8] parametrize --- .../tests/indexes/period/test_construction.py | 47 +++++++++---------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index 767ba130a23ad..916260c4cee7e 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -321,34 +321,31 @@ def test_constructor_year_and_quarter(self): p = PeriodIndex(lops) tm.assert_index_equal(p, idx) - def test_constructor_freq_mult(self): + @pytest.mark.parametrize('func, warning', [ + (PeriodIndex, FutureWarning), + (period_range, None) + ]) + def test_constructor_freq_mult(self, func, warning): # GH #7811 - for func in [PeriodIndex, period_range]: - - if func is PeriodIndex: - warning = FutureWarning - else: - warning = None - - with tm.assert_produces_warning(warning): - # must be the same, but for sure... - pidx = func(start='2014-01', freq='2M', periods=4) - expected = PeriodIndex(['2014-01', '2014-03', - '2014-05', '2014-07'], freq='2M') - tm.assert_index_equal(pidx, expected) + with tm.assert_produces_warning(warning): + # must be the same, but for sure... + pidx = func(start='2014-01', freq='2M', periods=4) + expected = PeriodIndex(['2014-01', '2014-03', + '2014-05', '2014-07'], freq='2M') + tm.assert_index_equal(pidx, expected) - with tm.assert_produces_warning(warning): - pidx = func(start='2014-01-02', end='2014-01-15', freq='3D') - expected = PeriodIndex(['2014-01-02', '2014-01-05', - '2014-01-08', '2014-01-11', - '2014-01-14'], freq='3D') - tm.assert_index_equal(pidx, expected) + with tm.assert_produces_warning(warning): + pidx = func(start='2014-01-02', end='2014-01-15', freq='3D') + expected = PeriodIndex(['2014-01-02', '2014-01-05', + '2014-01-08', '2014-01-11', + '2014-01-14'], freq='3D') + tm.assert_index_equal(pidx, expected) - with tm.assert_produces_warning(warning): - pidx = func(end='2014-01-01 17:00', freq='4H', periods=3) - expected = PeriodIndex(['2014-01-01 09:00', '2014-01-01 13:00', - '2014-01-01 17:00'], freq='4H') - tm.assert_index_equal(pidx, expected) + with tm.assert_produces_warning(warning): + pidx = func(end='2014-01-01 17:00', freq='4H', periods=3) + expected = PeriodIndex(['2014-01-01 09:00', '2014-01-01 13:00', + '2014-01-01 17:00'], freq='4H') + tm.assert_index_equal(pidx, expected) msg = ('Frequency must be positive, because it' ' represents span: -1M')