Skip to content

Commit 06a4f36

Browse files
committed
ENH: PeriodIndex now has period dtype
1 parent 6b7857b commit 06a4f36

17 files changed

+563
-85
lines changed

doc/source/timeseries.rst

+31
Original file line numberDiff line numberDiff line change
@@ -1594,6 +1594,8 @@ objects:
15941594
idx
15951595
idx + MonthEnd(3)
15961596
1597+
``PeriodIndex`` has its own dtype named ``period``, refer to :ref:`Period Dtypes <timeseries.period_dtype>`.
1598+
15971599
PeriodIndex Partial String Indexing
15981600
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
15991601

@@ -2047,3 +2049,32 @@ a convert on an aware stamp.
20472049
.. ipython:: python
20482050
20492051
pd.Series(s_aware.values).dt.tz_localize('UTC').dt.tz_convert('US/Eastern')
2052+
2053+
.. _timeseries.period_dtype:
2054+
2055+
Period Dtypes
2056+
~~~~~~~~~~~~~
2057+
2058+
.. versionadded:: 0.19.0
2059+
2060+
``PeriodIndex`` has its own ``period`` dtype. ``period`` dtype is a
2061+
pandas extension dtype as the same as timezone aware dtype, ``datetime64[ns, tz]``.
2062+
2063+
``period`` dtype is represented with a ``period[freq]``, using :ref:`frequency strings <timeseries.offset_aliases>`.
2064+
2065+
.. ipython:: python
2066+
2067+
pi = pd.period_range('2016-01-01', periods=3, freq='M')
2068+
pi
2069+
2070+
``period`` dtype can be used in ``.astype(...)``. It allows change ``freq`` of ``PeriodIndex`` as the same as ``.asfreq()`` and convert ``DatetimeIndex`` to ``PeriodIndex`` like ``to_period()``.
2071+
2072+
.. ipython:: python
2073+
2074+
# change monthly freq to daily freq
2075+
pi.astype('period[D]')
2076+
2077+
# convert to PeriodIndex
2078+
dti = pd.date_range('2011-01-01', freq='M', periods=3)
2079+
dti
2080+
dti.astype('period[M]')

doc/source/whatsnew/v0.19.0.txt

+32
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,38 @@ Furthermore:
628628
- Passing duplicated ``percentiles`` will now raise a ``ValueError``.
629629
- Bug in ``.describe()`` on a DataFrame with a mixed-dtype column index, which would previously raise a ``TypeError`` (:issue:`13288`)
630630

631+
.. _whatsnew_0190.api.perioddtype:
632+
633+
``PeriodIndex`` now has ``period`` dtype
634+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
635+
636+
``PeriodIndex`` now has its own ``period`` dtype. ``period`` dtype is a
637+
pandas extension dtype which extends NumPy dtype like ``"category"``. (:issue:`13941`)
638+
639+
Previous Behavior:
640+
641+
.. code-block:: ipython
642+
643+
In [1]: pi = pd.PeriodIndex(['2016-08-01'], freq='D')
644+
In [2]: pi
645+
Out[2]: PeriodIndex(['2016-08-01'], dtype='int64', freq='D')
646+
In [3]: pd.api.types.is_integer_dtype(pi)
647+
Out[3]: True
648+
In [4]: pi.dtype
649+
Out[4]: dtype('int64')
650+
651+
New Behavior:
652+
653+
.. ipython:: python
654+
655+
pi = pd.PeriodIndex(['2016-08-01'], freq='D')
656+
pi
657+
pd.api.types.is_integer_dtype(pi)
658+
pd.api.types.is_period_dtype(pi)
659+
660+
pi.dtype
661+
type(pi.dtype)
662+
631663
.. _whatsnew_0190.api.periodnat:
632664

633665
``Period('NaT')`` now returns ``pd.NaT``

pandas/api/tests/test_api.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -151,8 +151,9 @@ class TestTypes(Base, tm.TestCase):
151151
'is_floating_dtype', 'is_int64_dtype', 'is_integer',
152152
'is_integer_dtype', 'is_number', 'is_numeric_dtype',
153153
'is_object_dtype', 'is_scalar', 'is_sparse',
154-
'is_string_dtype', 'is_timedelta64_dtype',
155-
'is_timedelta64_ns_dtype',
154+
'is_string_dtype',
155+
'is_timedelta64_dtype', 'is_timedelta64_ns_dtype',
156+
'is_period', 'is_period_dtype',
156157
'is_re', 'is_re_compilable',
157158
'is_dict_like', 'is_iterator',
158159
'is_list_like', 'is_hashable',

pandas/core/algorithms.py

+13-19
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,14 @@
88

99
from pandas import compat, lib, tslib, _np_version_under1p8
1010
from pandas.types.cast import _maybe_promote
11-
from pandas.types.generic import (ABCSeries, ABCIndex, ABCPeriodIndex,
12-
ABCDatetimeIndex)
11+
from pandas.types.generic import ABCSeries, ABCIndex
1312
from pandas.types.common import (is_integer_dtype,
1413
is_int64_dtype,
1514
is_categorical_dtype,
1615
is_extension_type,
1716
is_datetimetz,
17+
is_period_dtype,
1818
is_period_arraylike,
19-
is_datetime_or_timedelta_dtype,
2019
is_float_dtype,
2120
needs_i8_conversion,
2221
is_categorical,
@@ -395,20 +394,21 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
395394

396395
def _value_counts_arraylike(values, dropna=True):
397396
is_datetimetz_type = is_datetimetz(values)
398-
is_period = (isinstance(values, ABCPeriodIndex) or
399-
is_period_arraylike(values))
397+
is_period_type = (is_period_dtype(values) or
398+
is_period_arraylike(values))
400399

401400
orig = values
402401

403402
from pandas.core.series import Series
404403
values = Series(values).values
405404
dtype = values.dtype
406405

407-
if is_datetime_or_timedelta_dtype(dtype) or is_period:
408-
from pandas.tseries.index import DatetimeIndex
406+
if needs_i8_conversion(dtype) or is_period_type:
407+
409408
from pandas.tseries.period import PeriodIndex
410409

411-
if is_period:
410+
if is_period_type:
411+
# values may be an object
412412
values = PeriodIndex(values)
413413
freq = values.freq
414414

@@ -424,12 +424,9 @@ def _value_counts_arraylike(values, dropna=True):
424424

425425
# dtype handling
426426
if is_datetimetz_type:
427-
if isinstance(orig, ABCDatetimeIndex):
428-
tz = orig.tz
429-
else:
430-
tz = orig.dt.tz
431-
keys = DatetimeIndex._simple_new(keys, tz=tz)
432-
if is_period:
427+
from pandas.tseries.index import DatetimeIndex
428+
keys = DatetimeIndex._simple_new(keys, tz=orig.dtype.tz)
429+
if is_period_type:
433430
keys = PeriodIndex._simple_new(keys, freq=freq)
434431

435432
elif is_integer_dtype(dtype):
@@ -472,11 +469,8 @@ def duplicated(values, keep='first'):
472469
dtype = values.dtype
473470

474471
# no need to revert to original type
475-
if is_datetime_or_timedelta_dtype(dtype) or is_datetimetz(dtype):
476-
if isinstance(values, (ABCSeries, ABCIndex)):
477-
values = values.values.view(np.int64)
478-
else:
479-
values = values.view(np.int64)
472+
if needs_i8_conversion(dtype):
473+
values = values.view(np.int64)
480474
elif is_period_arraylike(values):
481475
from pandas.tseries.period import PeriodIndex
482476
values = PeriodIndex(values).asi8

pandas/tests/indexes/common.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -149,10 +149,7 @@ def test_dtype_str(self):
149149
for idx in self.indices.values():
150150
dtype = idx.dtype_str
151151
self.assertIsInstance(dtype, compat.string_types)
152-
if isinstance(idx, PeriodIndex):
153-
self.assertEqual(dtype, 'period')
154-
else:
155-
self.assertEqual(dtype, str(idx.dtype))
152+
self.assertEqual(dtype, str(idx.dtype))
156153

157154
def test_repr_max_seq_item_setting(self):
158155
# GH10182

pandas/tests/indexes/test_base.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -149,8 +149,8 @@ def test_constructor_from_series(self):
149149

150150
expected = DatetimeIndex([Timestamp('20110101'), Timestamp('20120101'),
151151
Timestamp('20130101')])
152-
s = Series([Timestamp('20110101'), Timestamp('20120101'), Timestamp(
153-
'20130101')])
152+
s = Series([Timestamp('20110101'), Timestamp('20120101'),
153+
Timestamp('20130101')])
154154
result = Index(s)
155155
self.assert_index_equal(result, expected)
156156
result = DatetimeIndex(s)

pandas/tests/test_categorical.py

+18-18
Original file line numberDiff line numberDiff line change
@@ -2276,57 +2276,57 @@ def test_categorical_repr_period(self):
22762276
idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5)
22772277
c = pd.Categorical(idx)
22782278
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
2279-
Categories (5, period): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
2280-
2011-01-01 13:00]"""
2279+
Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
2280+
2011-01-01 13:00]"""
22812281

22822282
self.assertEqual(repr(c), exp)
22832283

22842284
c = pd.Categorical(idx.append(idx), categories=idx)
22852285
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
2286-
Categories (5, period): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
2287-
2011-01-01 13:00]"""
2286+
Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
2287+
2011-01-01 13:00]"""
22882288

22892289
self.assertEqual(repr(c), exp)
22902290

22912291
idx = pd.period_range('2011-01', freq='M', periods=5)
22922292
c = pd.Categorical(idx)
22932293
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
2294-
Categories (5, period): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
2294+
Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
22952295

22962296
self.assertEqual(repr(c), exp)
22972297

22982298
c = pd.Categorical(idx.append(idx), categories=idx)
22992299
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
2300-
Categories (5, period): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
2300+
Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
23012301

23022302
self.assertEqual(repr(c), exp)
23032303

23042304
def test_categorical_repr_period_ordered(self):
23052305
idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5)
23062306
c = pd.Categorical(idx, ordered=True)
23072307
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
2308-
Categories (5, period): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
2309-
2011-01-01 13:00]"""
2308+
Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
2309+
2011-01-01 13:00]"""
23102310

23112311
self.assertEqual(repr(c), exp)
23122312

23132313
c = pd.Categorical(idx.append(idx), categories=idx, ordered=True)
23142314
exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
2315-
Categories (5, period): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
2316-
2011-01-01 13:00]"""
2315+
Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
2316+
2011-01-01 13:00]"""
23172317

23182318
self.assertEqual(repr(c), exp)
23192319

23202320
idx = pd.period_range('2011-01', freq='M', periods=5)
23212321
c = pd.Categorical(idx, ordered=True)
23222322
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
2323-
Categories (5, period): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
2323+
Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
23242324

23252325
self.assertEqual(repr(c), exp)
23262326

23272327
c = pd.Categorical(idx.append(idx), categories=idx, ordered=True)
23282328
exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
2329-
Categories (5, period): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
2329+
Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
23302330

23312331
self.assertEqual(repr(c), exp)
23322332

@@ -2515,8 +2515,8 @@ def test_categorical_series_repr_period(self):
25152515
3 2011-01-01 12:00
25162516
4 2011-01-01 13:00
25172517
dtype: category
2518-
Categories (5, period): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
2519-
2011-01-01 13:00]"""
2518+
Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
2519+
2011-01-01 13:00]"""
25202520

25212521
self.assertEqual(repr(s), exp)
25222522

@@ -2528,7 +2528,7 @@ def test_categorical_series_repr_period(self):
25282528
3 2011-04
25292529
4 2011-05
25302530
dtype: category
2531-
Categories (5, period): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
2531+
Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
25322532

25332533
self.assertEqual(repr(s), exp)
25342534

@@ -2541,8 +2541,8 @@ def test_categorical_series_repr_period_ordered(self):
25412541
3 2011-01-01 12:00
25422542
4 2011-01-01 13:00
25432543
dtype: category
2544-
Categories (5, period): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
2545-
2011-01-01 13:00]"""
2544+
Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
2545+
2011-01-01 13:00]"""
25462546

25472547
self.assertEqual(repr(s), exp)
25482548

@@ -2554,7 +2554,7 @@ def test_categorical_series_repr_period_ordered(self):
25542554
3 2011-04
25552555
4 2011-05
25562556
dtype: category
2557-
Categories (5, period): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
2557+
Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
25582558

25592559
self.assertEqual(repr(s), exp)
25602560

pandas/tests/types/test_cast.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
_maybe_convert_scalar,
1919
_find_common_type)
2020
from pandas.types.dtypes import (CategoricalDtype,
21-
DatetimeTZDtype)
21+
DatetimeTZDtype, PeriodDtype)
2222
from pandas.util import testing as tm
2323

2424
_multiprocess_can_split_ = True
@@ -241,12 +241,13 @@ def test_numpy_dtypes(self):
241241
# empty
242242
_find_common_type([])
243243

244-
def test_pandas_dtypes(self):
244+
def test_categorical_dtype(self):
245245
dtype = CategoricalDtype()
246246
self.assertEqual(_find_common_type([dtype]), 'category')
247247
self.assertEqual(_find_common_type([dtype, dtype]), 'category')
248248
self.assertEqual(_find_common_type([np.object, dtype]), np.object)
249249

250+
def test_datetimetz_dtype(self):
250251
dtype = DatetimeTZDtype(unit='ns', tz='US/Eastern')
251252
self.assertEqual(_find_common_type([dtype, dtype]),
252253
'datetime64[ns, US/Eastern]')
@@ -256,6 +257,16 @@ def test_pandas_dtypes(self):
256257
self.assertEqual(_find_common_type([dtype, dtype2]), np.object)
257258
self.assertEqual(_find_common_type([dtype2, dtype]), np.object)
258259

260+
def test_period_dtype(self):
261+
dtype = PeriodDtype(freq='D')
262+
self.assertEqual(_find_common_type([dtype, dtype]), 'period[D]')
263+
264+
for dtype2 in [DatetimeTZDtype(unit='ns', tz='Asia/Tokyo'),
265+
PeriodDtype(freq='2D'), PeriodDtype(freq='H'),
266+
np.dtype('datetime64[ns]'), np.object, np.int64]:
267+
self.assertEqual(_find_common_type([dtype, dtype2]), np.object)
268+
self.assertEqual(_find_common_type([dtype2, dtype]), np.object)
269+
259270

260271
if __name__ == '__main__':
261272
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],

pandas/tests/types/test_common.py

+19-2
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
import nose
44
import numpy as np
55

6-
from pandas.types.dtypes import DatetimeTZDtype, CategoricalDtype
7-
from pandas.types.common import pandas_dtype
6+
from pandas.types.dtypes import DatetimeTZDtype, PeriodDtype, CategoricalDtype
7+
from pandas.types.common import pandas_dtype, is_dtype_equal
88

99
_multiprocess_can_split_ = True
1010

@@ -17,6 +17,23 @@ def test_pandas_dtype():
1717
for dtype in ['M8[ns]', 'm8[ns]', 'object', 'float64', 'int64']:
1818
assert pandas_dtype(dtype) == np.dtype(dtype)
1919

20+
21+
def test_dtype_equal():
22+
assert is_dtype_equal(np.int64, np.int64)
23+
assert not is_dtype_equal(np.int64, np.float64)
24+
25+
p1 = PeriodDtype('D')
26+
p2 = PeriodDtype('D')
27+
assert is_dtype_equal(p1, p2)
28+
assert not is_dtype_equal(np.int64, p1)
29+
30+
p3 = PeriodDtype('2D')
31+
assert not is_dtype_equal(p1, p3)
32+
33+
assert not DatetimeTZDtype.is_dtype(np.int64)
34+
assert not PeriodDtype.is_dtype(np.int64)
35+
36+
2037
if __name__ == '__main__':
2138
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
2239
exit=False)

0 commit comments

Comments
 (0)