pandas-dev · sinhrks · Aug 6, 2016 · Aug 15, 2016 · Aug 16, 2016 · jreback
diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
@@ -1594,6 +1594,47 @@ objects:
    idx
    idx + MonthEnd(3)
 
+``PeriodIndex`` has its own dtype named ``period``, refer to :ref:`Period Dtypes <timeseries.period_dtype>`.
+
+.. _timeseries.period_dtype:
+
+Period Dtypes
+~~~~~~~~~~~~~
+
+.. versionadded:: 0.19.0
+
+``PeriodIndex`` has a custom ``period`` dtype. This is a pandas extension
+dtype similar to the :ref:`timezone aware dtype <timeseries.timezone_series>` (``datetime64[ns, tz]``).
+
+.. _timeseries.timezone_series:
+
+The ``period`` dtype holds the ``freq`` attribute and is represented with
+``period[freq]`` like ``period[D]`` or ``period[M]``, using :ref:`frequency strings <timeseries.offset_aliases>`.
+
+.. ipython:: python
+
+   pi = pd.period_range('2016-01-01', periods=3, freq='M')
+   pi
+   pi.dtype
+
+The ``period`` dtype can be used in ``.astype(...)``. It allows one to change the
+``freq`` of a ``PeriodIndex`` like ``.asfreq()`` and convert a
+``DatetimeIndex`` to ``PeriodIndex`` like ``to_period()``:
+
+.. ipython:: python
+
+   # change monthly freq to daily freq
+   pi.astype('period[D]')
+
+   # convert to DatetimeIndex
+   pi.astype('datetime64[ns]')
+
+   # convert to PeriodIndex
+   dti = pd.date_range('2011-01-01', freq='M', periods=3)
+   dti
+   dti.astype('period[M]')
+
+
 PeriodIndex Partial String Indexing
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
@@ -628,6 +628,41 @@ Furthermore:
 - Passing duplicated ``percentiles`` will now raise a ``ValueError``.
 - Bug in ``.describe()`` on a DataFrame with a mixed-dtype column index, which would previously raise a ``TypeError`` (:issue:`13288`)
 
+.. _whatsnew_0190.api.perioddtype:
+
+``PeriodIndex`` now has ``period`` dtype
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+``PeriodIndex`` now has its own ``period`` dtype. The ``period`` dtype is a
+pandas extension dtype like ``category`` or :ref:`timezone aware dtype <timeseries.timezone_series>` (``datetime64[ns, tz]``). (:issue:`13941`).
+As a consequence of this change, ``PeriodIndex`` no longer has an integer dtype:
+
+Previous Behavior:
+
+.. code-block:: ipython
+
+   In [1]: pi = pd.PeriodIndex(['2016-08-01'], freq='D')
+
+   In [2]: pi
+   Out[2]: PeriodIndex(['2016-08-01'], dtype='int64', freq='D')
+
+   In [3]: pd.api.types.is_integer_dtype(pi)
+   Out[3]: True
+
+   In [4]: pi.dtype
+   Out[4]: dtype('int64')
+
+New Behavior:
+
+.. ipython:: python
+
+   pi = pd.PeriodIndex(['2016-08-01'], freq='D')
+   pi
+   pd.api.types.is_integer_dtype(pi)
+   pd.api.types.is_period_dtype(pi)
+   pi.dtype
+   type(pi.dtype)
+
 .. _whatsnew_0190.api.periodnat:
 
 ``Period('NaT')`` now returns ``pd.NaT``

diff --git a/pandas/api/tests/test_api.py b/pandas/api/tests/test_api.py
@@ -151,8 +151,9 @@ class TestTypes(Base, tm.TestCase):
                'is_floating_dtype', 'is_int64_dtype', 'is_integer',
                'is_integer_dtype', 'is_number', 'is_numeric_dtype',
                'is_object_dtype', 'is_scalar', 'is_sparse',
-               'is_string_dtype', 'is_timedelta64_dtype',
-               'is_timedelta64_ns_dtype',
+               'is_string_dtype',
+               'is_timedelta64_dtype', 'is_timedelta64_ns_dtype',
+               'is_period', 'is_period_dtype',
                'is_re', 'is_re_compilable',
                'is_dict_like', 'is_iterator',
                'is_list_like', 'is_hashable',

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -8,15 +8,14 @@
 
 from pandas import compat, lib, tslib, _np_version_under1p8
 from pandas.types.cast import _maybe_promote
-from pandas.types.generic import (ABCSeries, ABCIndex, ABCPeriodIndex,
-                                  ABCDatetimeIndex)
+from pandas.types.generic import ABCSeries, ABCIndex
 from pandas.types.common import (is_integer_dtype,
                                  is_int64_dtype,
                                  is_categorical_dtype,
                                  is_extension_type,
                                  is_datetimetz,
+                                 is_period_dtype,
                                  is_period_arraylike,
-                                 is_datetime_or_timedelta_dtype,
                                  is_float_dtype,
                                  needs_i8_conversion,
                                  is_categorical,
@@ -395,20 +394,22 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
 
 def _value_counts_arraylike(values, dropna=True):
     is_datetimetz_type = is_datetimetz(values)
-    is_period = (isinstance(values, ABCPeriodIndex) or
-                 is_period_arraylike(values))
+    is_period_type = (is_period_dtype(values) or
+                      is_period_arraylike(values))
 
     orig = values
 
     from pandas.core.series import Series
     values = Series(values).values
     dtype = values.dtype
 
-    if is_datetime_or_timedelta_dtype(dtype) or is_period:
+    if needs_i8_conversion(dtype) or is_period_type:
+
         from pandas.tseries.index import DatetimeIndex
         from pandas.tseries.period import PeriodIndex
 
-        if is_period:
+        if is_period_type:
+            # values may be an object
             values = PeriodIndex(values)
             freq = values.freq
 
@@ -424,12 +425,8 @@ def _value_counts_arraylike(values, dropna=True):
 
         # dtype handling
         if is_datetimetz_type:
-            if isinstance(orig, ABCDatetimeIndex):
-                tz = orig.tz
-            else:
-                tz = orig.dt.tz
-            keys = DatetimeIndex._simple_new(keys, tz=tz)
-        if is_period:
+            keys = DatetimeIndex._simple_new(keys, tz=orig.dtype.tz)
+        if is_period_type:
             keys = PeriodIndex._simple_new(keys, freq=freq)
 
     elif is_integer_dtype(dtype):
@@ -472,11 +469,8 @@ def duplicated(values, keep='first'):
     dtype = values.dtype
 
     # no need to revert to original type
-    if is_datetime_or_timedelta_dtype(dtype) or is_datetimetz(dtype):
-        if isinstance(values, (ABCSeries, ABCIndex)):
-            values = values.values.view(np.int64)
-        else:
-            values = values.view(np.int64)
+    if needs_i8_conversion(dtype):
+        values = values.view(np.int64)
     elif is_period_arraylike(values):
         from pandas.tseries.period import PeriodIndex
         values = PeriodIndex(values).asi8

diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py
@@ -149,10 +149,7 @@ def test_dtype_str(self):
         for idx in self.indices.values():
             dtype = idx.dtype_str
             self.assertIsInstance(dtype, compat.string_types)
-            if isinstance(idx, PeriodIndex):
-                self.assertEqual(dtype, 'period')
-            else:
-                self.assertEqual(dtype, str(idx.dtype))
+            self.assertEqual(dtype, str(idx.dtype))
 
     def test_repr_max_seq_item_setting(self):
         # GH10182

diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py
@@ -149,8 +149,8 @@ def test_constructor_from_series(self):
 
         expected = DatetimeIndex([Timestamp('20110101'), Timestamp('20120101'),
                                   Timestamp('20130101')])
-        s = Series([Timestamp('20110101'), Timestamp('20120101'), Timestamp(
-            '20130101')])
+        s = Series([Timestamp('20110101'), Timestamp('20120101'),
+                    Timestamp('20130101')])
         result = Index(s)
         self.assert_index_equal(result, expected)
         result = DatetimeIndex(s)

diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py
@@ -790,8 +790,6 @@ def test_astype_raises(self):
         self.assertRaises(ValueError, idx.astype, float)
         self.assertRaises(ValueError, idx.astype, 'timedelta64')
         self.assertRaises(ValueError, idx.astype, 'timedelta64[ns]')
-        self.assertRaises(ValueError, idx.astype, 'datetime64')
-        self.assertRaises(ValueError, idx.astype, 'datetime64[ns]')
 
     def test_shift(self):
 

diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py
@@ -2276,57 +2276,57 @@ def test_categorical_repr_period(self):
         idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5)
         c = pd.Categorical(idx)
         exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
-Categories (5, period): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
-                         2011-01-01 13:00]"""
+Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
+                            2011-01-01 13:00]"""
 
         self.assertEqual(repr(c), exp)
 
         c = pd.Categorical(idx.append(idx), categories=idx)
         exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
-Categories (5, period): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
-                         2011-01-01 13:00]"""
+Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
+                            2011-01-01 13:00]"""
 
         self.assertEqual(repr(c), exp)
 
         idx = pd.period_range('2011-01', freq='M', periods=5)
         c = pd.Categorical(idx)
         exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
-Categories (5, period): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
+Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
 
         self.assertEqual(repr(c), exp)
 
         c = pd.Categorical(idx.append(idx), categories=idx)
         exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
-Categories (5, period): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
+Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
 
         self.assertEqual(repr(c), exp)
 
     def test_categorical_repr_period_ordered(self):
         idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5)
         c = pd.Categorical(idx, ordered=True)
         exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
-Categories (5, period): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
-                         2011-01-01 13:00]"""
+Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
+                            2011-01-01 13:00]"""
 
         self.assertEqual(repr(c), exp)
 
         c = pd.Categorical(idx.append(idx), categories=idx, ordered=True)
         exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00]
-Categories (5, period): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
-                         2011-01-01 13:00]"""
+Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
+                            2011-01-01 13:00]"""
 
         self.assertEqual(repr(c), exp)
 
         idx = pd.period_range('2011-01', freq='M', periods=5)
         c = pd.Categorical(idx, ordered=True)
         exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
-Categories (5, period): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
+Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
 
         self.assertEqual(repr(c), exp)
 
         c = pd.Categorical(idx.append(idx), categories=idx, ordered=True)
         exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05]
-Categories (5, period): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
+Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
 
         self.assertEqual(repr(c), exp)
 
@@ -2515,8 +2515,8 @@ def test_categorical_series_repr_period(self):
 3   2011-01-01 12:00
 4   2011-01-01 13:00
 dtype: category
-Categories (5, period): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
-                         2011-01-01 13:00]"""
+Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00,
+                            2011-01-01 13:00]"""
 
         self.assertEqual(repr(s), exp)
 
@@ -2528,7 +2528,7 @@ def test_categorical_series_repr_period(self):
 3   2011-04
 4   2011-05
 dtype: category
-Categories (5, period): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
+Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]"""
 
         self.assertEqual(repr(s), exp)
 
@@ -2541,8 +2541,8 @@ def test_categorical_series_repr_period_ordered(self):
 3   2011-01-01 12:00
 4   2011-01-01 13:00
 dtype: category
-Categories (5, period): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
-                         2011-01-01 13:00]"""
+Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 <
+                            2011-01-01 13:00]"""
 
         self.assertEqual(repr(s), exp)
 
@@ -2554,7 +2554,7 @@ def test_categorical_series_repr_period_ordered(self):
 3   2011-04
 4   2011-05
 dtype: category
-Categories (5, period): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
+Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]"""
 
         self.assertEqual(repr(s), exp)
 

diff --git a/pandas/tests/types/test_cast.py b/pandas/tests/types/test_cast.py
@@ -18,7 +18,7 @@
                                _maybe_convert_scalar,
                                _find_common_type)
 from pandas.types.dtypes import (CategoricalDtype,
-                                 DatetimeTZDtype)
+                                 DatetimeTZDtype, PeriodDtype)
 from pandas.util import testing as tm
 
 _multiprocess_can_split_ = True
@@ -241,12 +241,13 @@ def test_numpy_dtypes(self):
             # empty
             _find_common_type([])
 
-    def test_pandas_dtypes(self):
+    def test_categorical_dtype(self):
         dtype = CategoricalDtype()
         self.assertEqual(_find_common_type([dtype]), 'category')
         self.assertEqual(_find_common_type([dtype, dtype]), 'category')
         self.assertEqual(_find_common_type([np.object, dtype]), np.object)
 
+    def test_datetimetz_dtype(self):
         dtype = DatetimeTZDtype(unit='ns', tz='US/Eastern')
         self.assertEqual(_find_common_type([dtype, dtype]),
                          'datetime64[ns, US/Eastern]')
@@ -256,6 +257,16 @@ def test_pandas_dtypes(self):
             self.assertEqual(_find_common_type([dtype, dtype2]), np.object)
             self.assertEqual(_find_common_type([dtype2, dtype]), np.object)
 
+    def test_period_dtype(self):
+        dtype = PeriodDtype(freq='D')
+        self.assertEqual(_find_common_type([dtype, dtype]), 'period[D]')
+
+        for dtype2 in [DatetimeTZDtype(unit='ns', tz='Asia/Tokyo'),
+                       PeriodDtype(freq='2D'), PeriodDtype(freq='H'),
+                       np.dtype('datetime64[ns]'), np.object, np.int64]:
+            self.assertEqual(_find_common_type([dtype, dtype2]), np.object)
+            self.assertEqual(_find_common_type([dtype2, dtype]), np.object)
+
 
 if __name__ == '__main__':
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],