diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index b8f747757987c..a35b8d561a5a7 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -1594,6 +1594,47 @@ objects: idx idx + MonthEnd(3) +``PeriodIndex`` has its own dtype named ``period``, refer to :ref:`Period Dtypes `. + +.. _timeseries.period_dtype: + +Period Dtypes +~~~~~~~~~~~~~ + +.. versionadded:: 0.19.0 + +``PeriodIndex`` has a custom ``period`` dtype. This is a pandas extension +dtype similar to the :ref:`timezone aware dtype ` (``datetime64[ns, tz]``). + +.. _timeseries.timezone_series: + +The ``period`` dtype holds the ``freq`` attribute and is represented with +``period[freq]`` like ``period[D]`` or ``period[M]``, using :ref:`frequency strings `. + +.. ipython:: python + + pi = pd.period_range('2016-01-01', periods=3, freq='M') + pi + pi.dtype + +The ``period`` dtype can be used in ``.astype(...)``. It allows one to change the +``freq`` of a ``PeriodIndex`` like ``.asfreq()`` and convert a +``DatetimeIndex`` to ``PeriodIndex`` like ``to_period()``: + +.. ipython:: python + + # change monthly freq to daily freq + pi.astype('period[D]') + + # convert to DatetimeIndex + pi.astype('datetime64[ns]') + + # convert to PeriodIndex + dti = pd.date_range('2011-01-01', freq='M', periods=3) + dti + dti.astype('period[M]') + + PeriodIndex Partial String Indexing ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt index 0ee56f865f8c8..50b6ecfacd5c2 100644 --- a/doc/source/whatsnew/v0.19.0.txt +++ b/doc/source/whatsnew/v0.19.0.txt @@ -628,6 +628,41 @@ Furthermore: - Passing duplicated ``percentiles`` will now raise a ``ValueError``. - Bug in ``.describe()`` on a DataFrame with a mixed-dtype column index, which would previously raise a ``TypeError`` (:issue:`13288`) +.. _whatsnew_0190.api.perioddtype: + +``PeriodIndex`` now has ``period`` dtype +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``PeriodIndex`` now has its own ``period`` dtype. The ``period`` dtype is a +pandas extension dtype like ``category`` or :ref:`timezone aware dtype ` (``datetime64[ns, tz]``). (:issue:`13941`). +As a consequence of this change, ``PeriodIndex`` no longer has an integer dtype: + +Previous Behavior: + +.. code-block:: ipython + + In [1]: pi = pd.PeriodIndex(['2016-08-01'], freq='D') + + In [2]: pi + Out[2]: PeriodIndex(['2016-08-01'], dtype='int64', freq='D') + + In [3]: pd.api.types.is_integer_dtype(pi) + Out[3]: True + + In [4]: pi.dtype + Out[4]: dtype('int64') + +New Behavior: + +.. ipython:: python + + pi = pd.PeriodIndex(['2016-08-01'], freq='D') + pi + pd.api.types.is_integer_dtype(pi) + pd.api.types.is_period_dtype(pi) + pi.dtype + type(pi.dtype) + .. _whatsnew_0190.api.periodnat: ``Period('NaT')`` now returns ``pd.NaT`` diff --git a/pandas/api/tests/test_api.py b/pandas/api/tests/test_api.py index b1bbf18df3e06..b706d789931b0 100644 --- a/pandas/api/tests/test_api.py +++ b/pandas/api/tests/test_api.py @@ -151,8 +151,9 @@ class TestTypes(Base, tm.TestCase): 'is_floating_dtype', 'is_int64_dtype', 'is_integer', 'is_integer_dtype', 'is_number', 'is_numeric_dtype', 'is_object_dtype', 'is_scalar', 'is_sparse', - 'is_string_dtype', 'is_timedelta64_dtype', - 'is_timedelta64_ns_dtype', + 'is_string_dtype', + 'is_timedelta64_dtype', 'is_timedelta64_ns_dtype', + 'is_period', 'is_period_dtype', 'is_re', 'is_re_compilable', 'is_dict_like', 'is_iterator', 'is_list_like', 'is_hashable', diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 7920f05b5e7a1..7a4f05f31b725 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -8,15 +8,14 @@ from pandas import compat, lib, tslib, _np_version_under1p8 from pandas.types.cast import _maybe_promote -from pandas.types.generic import (ABCSeries, ABCIndex, ABCPeriodIndex, - ABCDatetimeIndex) +from pandas.types.generic import ABCSeries, ABCIndex from pandas.types.common import (is_integer_dtype, is_int64_dtype, is_categorical_dtype, is_extension_type, is_datetimetz, + is_period_dtype, is_period_arraylike, - is_datetime_or_timedelta_dtype, is_float_dtype, needs_i8_conversion, is_categorical, @@ -395,8 +394,8 @@ def value_counts(values, sort=True, ascending=False, normalize=False, def _value_counts_arraylike(values, dropna=True): is_datetimetz_type = is_datetimetz(values) - is_period = (isinstance(values, ABCPeriodIndex) or - is_period_arraylike(values)) + is_period_type = (is_period_dtype(values) or + is_period_arraylike(values)) orig = values @@ -404,11 +403,13 @@ def _value_counts_arraylike(values, dropna=True): values = Series(values).values dtype = values.dtype - if is_datetime_or_timedelta_dtype(dtype) or is_period: + if needs_i8_conversion(dtype) or is_period_type: + from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex - if is_period: + if is_period_type: + # values may be an object values = PeriodIndex(values) freq = values.freq @@ -424,12 +425,8 @@ def _value_counts_arraylike(values, dropna=True): # dtype handling if is_datetimetz_type: - if isinstance(orig, ABCDatetimeIndex): - tz = orig.tz - else: - tz = orig.dt.tz - keys = DatetimeIndex._simple_new(keys, tz=tz) - if is_period: + keys = DatetimeIndex._simple_new(keys, tz=orig.dtype.tz) + if is_period_type: keys = PeriodIndex._simple_new(keys, freq=freq) elif is_integer_dtype(dtype): @@ -472,11 +469,8 @@ def duplicated(values, keep='first'): dtype = values.dtype # no need to revert to original type - if is_datetime_or_timedelta_dtype(dtype) or is_datetimetz(dtype): - if isinstance(values, (ABCSeries, ABCIndex)): - values = values.values.view(np.int64) - else: - values = values.view(np.int64) + if needs_i8_conversion(dtype): + values = values.view(np.int64) elif is_period_arraylike(values): from pandas.tseries.period import PeriodIndex values = PeriodIndex(values).asi8 diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 92560363be8fe..ce9d8eb6122ab 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -149,10 +149,7 @@ def test_dtype_str(self): for idx in self.indices.values(): dtype = idx.dtype_str self.assertIsInstance(dtype, compat.string_types) - if isinstance(idx, PeriodIndex): - self.assertEqual(dtype, 'period') - else: - self.assertEqual(dtype, str(idx.dtype)) + self.assertEqual(dtype, str(idx.dtype)) def test_repr_max_seq_item_setting(self): # GH10182 diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 3c9040021fdbf..135f530899956 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -149,8 +149,8 @@ def test_constructor_from_series(self): expected = DatetimeIndex([Timestamp('20110101'), Timestamp('20120101'), Timestamp('20130101')]) - s = Series([Timestamp('20110101'), Timestamp('20120101'), Timestamp( - '20130101')]) + s = Series([Timestamp('20110101'), Timestamp('20120101'), + Timestamp('20130101')]) result = Index(s) self.assert_index_equal(result, expected) result = DatetimeIndex(s) diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py index 9371bef8b8f2e..d10f7ec26f560 100644 --- a/pandas/tests/indexes/test_datetimelike.py +++ b/pandas/tests/indexes/test_datetimelike.py @@ -790,8 +790,6 @@ def test_astype_raises(self): self.assertRaises(ValueError, idx.astype, float) self.assertRaises(ValueError, idx.astype, 'timedelta64') self.assertRaises(ValueError, idx.astype, 'timedelta64[ns]') - self.assertRaises(ValueError, idx.astype, 'datetime64') - self.assertRaises(ValueError, idx.astype, 'datetime64[ns]') def test_shift(self): diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 0e37f5bf17405..b630e0914259e 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -2276,28 +2276,28 @@ def test_categorical_repr_period(self): idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5) c = pd.Categorical(idx) exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] -Categories (5, period): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, - 2011-01-01 13:00]""" +Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, + 2011-01-01 13:00]""" self.assertEqual(repr(c), exp) c = pd.Categorical(idx.append(idx), categories=idx) exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] -Categories (5, period): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, - 2011-01-01 13:00]""" +Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, + 2011-01-01 13:00]""" self.assertEqual(repr(c), exp) idx = pd.period_range('2011-01', freq='M', periods=5) c = pd.Categorical(idx) exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05] -Categories (5, period): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" +Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" self.assertEqual(repr(c), exp) c = pd.Categorical(idx.append(idx), categories=idx) exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] -Categories (5, period): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" +Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" self.assertEqual(repr(c), exp) @@ -2305,28 +2305,28 @@ def test_categorical_repr_period_ordered(self): idx = pd.period_range('2011-01-01 09:00', freq='H', periods=5) c = pd.Categorical(idx, ordered=True) exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] -Categories (5, period): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < - 2011-01-01 13:00]""" +Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < + 2011-01-01 13:00]""" self.assertEqual(repr(c), exp) c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) exp = """[2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00, 2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, 2011-01-01 13:00] -Categories (5, period): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < - 2011-01-01 13:00]""" +Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < + 2011-01-01 13:00]""" self.assertEqual(repr(c), exp) idx = pd.period_range('2011-01', freq='M', periods=5) c = pd.Categorical(idx, ordered=True) exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05] -Categories (5, period): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" +Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" self.assertEqual(repr(c), exp) c = pd.Categorical(idx.append(idx), categories=idx, ordered=True) exp = """[2011-01, 2011-02, 2011-03, 2011-04, 2011-05, 2011-01, 2011-02, 2011-03, 2011-04, 2011-05] -Categories (5, period): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" +Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" self.assertEqual(repr(c), exp) @@ -2515,8 +2515,8 @@ def test_categorical_series_repr_period(self): 3 2011-01-01 12:00 4 2011-01-01 13:00 dtype: category -Categories (5, period): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, - 2011-01-01 13:00]""" +Categories (5, period[H]): [2011-01-01 09:00, 2011-01-01 10:00, 2011-01-01 11:00, 2011-01-01 12:00, + 2011-01-01 13:00]""" self.assertEqual(repr(s), exp) @@ -2528,7 +2528,7 @@ def test_categorical_series_repr_period(self): 3 2011-04 4 2011-05 dtype: category -Categories (5, period): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" +Categories (5, period[M]): [2011-01, 2011-02, 2011-03, 2011-04, 2011-05]""" self.assertEqual(repr(s), exp) @@ -2541,8 +2541,8 @@ def test_categorical_series_repr_period_ordered(self): 3 2011-01-01 12:00 4 2011-01-01 13:00 dtype: category -Categories (5, period): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < - 2011-01-01 13:00]""" +Categories (5, period[H]): [2011-01-01 09:00 < 2011-01-01 10:00 < 2011-01-01 11:00 < 2011-01-01 12:00 < + 2011-01-01 13:00]""" self.assertEqual(repr(s), exp) @@ -2554,7 +2554,7 @@ def test_categorical_series_repr_period_ordered(self): 3 2011-04 4 2011-05 dtype: category -Categories (5, period): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" +Categories (5, period[M]): [2011-01 < 2011-02 < 2011-03 < 2011-04 < 2011-05]""" self.assertEqual(repr(s), exp) diff --git a/pandas/tests/types/test_cast.py b/pandas/tests/types/test_cast.py index 46f37bf0ef8c2..2b4998fd64f4a 100644 --- a/pandas/tests/types/test_cast.py +++ b/pandas/tests/types/test_cast.py @@ -18,7 +18,7 @@ _maybe_convert_scalar, _find_common_type) from pandas.types.dtypes import (CategoricalDtype, - DatetimeTZDtype) + DatetimeTZDtype, PeriodDtype) from pandas.util import testing as tm _multiprocess_can_split_ = True @@ -241,12 +241,13 @@ def test_numpy_dtypes(self): # empty _find_common_type([]) - def test_pandas_dtypes(self): + def test_categorical_dtype(self): dtype = CategoricalDtype() self.assertEqual(_find_common_type([dtype]), 'category') self.assertEqual(_find_common_type([dtype, dtype]), 'category') self.assertEqual(_find_common_type([np.object, dtype]), np.object) + def test_datetimetz_dtype(self): dtype = DatetimeTZDtype(unit='ns', tz='US/Eastern') self.assertEqual(_find_common_type([dtype, dtype]), 'datetime64[ns, US/Eastern]') @@ -256,6 +257,16 @@ def test_pandas_dtypes(self): self.assertEqual(_find_common_type([dtype, dtype2]), np.object) self.assertEqual(_find_common_type([dtype2, dtype]), np.object) + def test_period_dtype(self): + dtype = PeriodDtype(freq='D') + self.assertEqual(_find_common_type([dtype, dtype]), 'period[D]') + + for dtype2 in [DatetimeTZDtype(unit='ns', tz='Asia/Tokyo'), + PeriodDtype(freq='2D'), PeriodDtype(freq='H'), + np.dtype('datetime64[ns]'), np.object, np.int64]: + self.assertEqual(_find_common_type([dtype, dtype2]), np.object) + self.assertEqual(_find_common_type([dtype2, dtype]), np.object) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tests/types/test_common.py b/pandas/tests/types/test_common.py index 0a586410ad5a0..4d6f50862c562 100644 --- a/pandas/tests/types/test_common.py +++ b/pandas/tests/types/test_common.py @@ -3,19 +3,59 @@ import nose import numpy as np -from pandas.types.dtypes import DatetimeTZDtype, CategoricalDtype -from pandas.types.common import pandas_dtype +from pandas.types.dtypes import DatetimeTZDtype, PeriodDtype, CategoricalDtype +from pandas.types.common import pandas_dtype, is_dtype_equal + +import pandas.util.testing as tm _multiprocess_can_split_ = True -def test_pandas_dtype(): +class TestPandasDtype(tm.TestCase): + + def test_numpy_dtype(self): + for dtype in ['M8[ns]', 'm8[ns]', 'object', 'float64', 'int64']: + self.assertEqual(pandas_dtype(dtype), np.dtype(dtype)) + + def test_numpy_string_dtype(self): + # do not parse freq-like string as period dtype + self.assertEqual(pandas_dtype('U'), np.dtype('U')) + self.assertEqual(pandas_dtype('S'), np.dtype('S')) + + def test_datetimetz_dtype(self): + for dtype in ['datetime64[ns, US/Eastern]', + 'datetime64[ns, Asia/Tokyo]', + 'datetime64[ns, UTC]']: + self.assertIs(pandas_dtype(dtype), DatetimeTZDtype(dtype)) + self.assertEqual(pandas_dtype(dtype), DatetimeTZDtype(dtype)) + self.assertEqual(pandas_dtype(dtype), dtype) + + def test_categorical_dtype(self): + self.assertEqual(pandas_dtype('category'), CategoricalDtype()) + + def test_period_dtype(self): + for dtype in ['period[D]', 'period[3M]', 'period[U]', + 'Period[D]', 'Period[3M]', 'Period[U]']: + self.assertIs(pandas_dtype(dtype), PeriodDtype(dtype)) + self.assertEqual(pandas_dtype(dtype), PeriodDtype(dtype)) + self.assertEqual(pandas_dtype(dtype), dtype) + + +def test_dtype_equal(): + assert is_dtype_equal(np.int64, np.int64) + assert not is_dtype_equal(np.int64, np.float64) + + p1 = PeriodDtype('D') + p2 = PeriodDtype('D') + assert is_dtype_equal(p1, p2) + assert not is_dtype_equal(np.int64, p1) + + p3 = PeriodDtype('2D') + assert not is_dtype_equal(p1, p3) + + assert not DatetimeTZDtype.is_dtype(np.int64) + assert not PeriodDtype.is_dtype(np.int64) - assert pandas_dtype('datetime64[ns, US/Eastern]') == DatetimeTZDtype( - 'datetime64[ns, US/Eastern]') - assert pandas_dtype('category') == CategoricalDtype() - for dtype in ['M8[ns]', 'm8[ns]', 'object', 'float64', 'int64']: - assert pandas_dtype(dtype) == np.dtype(dtype) if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tests/types/test_dtypes.py b/pandas/tests/types/test_dtypes.py index 1743e80ae01a9..dd1a8dbd5c53a 100644 --- a/pandas/tests/types/test_dtypes.py +++ b/pandas/tests/types/test_dtypes.py @@ -3,14 +3,15 @@ import nose import numpy as np +import pandas as pd from pandas import Series, Categorical, date_range -from pandas.types.dtypes import CategoricalDtype -from pandas.types.common import (is_categorical_dtype, - is_categorical, DatetimeTZDtype, +from pandas.types.dtypes import DatetimeTZDtype, PeriodDtype, CategoricalDtype +from pandas.types.common import (is_categorical_dtype, is_categorical, is_datetime64tz_dtype, is_datetimetz, + is_period_dtype, is_period, is_dtype_equal, is_datetime64_ns_dtype, - is_datetime64_dtype, + is_datetime64_dtype, is_string_dtype, _coerce_to_dtype) import pandas.util.testing as tm @@ -24,6 +25,7 @@ def test_hash(self): def test_equality_invalid(self): self.assertRaises(self.dtype == 'foo') + self.assertFalse(is_dtype_equal(self.dtype, np.int64)) def test_numpy_informed(self): @@ -206,6 +208,148 @@ def test_parser(self): DatetimeTZDtype('ns', tz), ) + def test_empty(self): + dt = DatetimeTZDtype() + with tm.assertRaises(AttributeError): + str(dt) + + +class TestPeriodDtype(Base, tm.TestCase): + + def setUp(self): + self.dtype = PeriodDtype('D') + + def test_construction(self): + with tm.assertRaises(ValueError): + PeriodDtype('xx') + + for s in ['period[D]', 'Period[D]', 'D']: + dt = PeriodDtype(s) + self.assertEqual(dt.freq, pd.tseries.offsets.Day()) + self.assertTrue(is_period_dtype(dt)) + + for s in ['period[3D]', 'Period[3D]', '3D']: + dt = PeriodDtype(s) + self.assertEqual(dt.freq, pd.tseries.offsets.Day(3)) + self.assertTrue(is_period_dtype(dt)) + + for s in ['period[26H]', 'Period[26H]', '26H', + 'period[1D2H]', 'Period[1D2H]', '1D2H']: + dt = PeriodDtype(s) + self.assertEqual(dt.freq, pd.tseries.offsets.Hour(26)) + self.assertTrue(is_period_dtype(dt)) + + def test_subclass(self): + a = PeriodDtype('period[D]') + b = PeriodDtype('period[3D]') + + self.assertTrue(issubclass(type(a), type(a))) + self.assertTrue(issubclass(type(a), type(b))) + + def test_identity(self): + self.assertEqual(PeriodDtype('period[D]'), + PeriodDtype('period[D]')) + self.assertIs(PeriodDtype('period[D]'), + PeriodDtype('period[D]')) + + self.assertEqual(PeriodDtype('period[3D]'), + PeriodDtype('period[3D]')) + self.assertIs(PeriodDtype('period[3D]'), + PeriodDtype('period[3D]')) + + self.assertEqual(PeriodDtype('period[1S1U]'), + PeriodDtype('period[1000001U]')) + self.assertIs(PeriodDtype('period[1S1U]'), + PeriodDtype('period[1000001U]')) + + def test_coerce_to_dtype(self): + self.assertEqual(_coerce_to_dtype('period[D]'), + PeriodDtype('period[D]')) + self.assertEqual(_coerce_to_dtype('period[3M]'), + PeriodDtype('period[3M]')) + + def test_compat(self): + self.assertFalse(is_datetime64_ns_dtype(self.dtype)) + self.assertFalse(is_datetime64_ns_dtype('period[D]')) + self.assertFalse(is_datetime64_dtype(self.dtype)) + self.assertFalse(is_datetime64_dtype('period[D]')) + + def test_construction_from_string(self): + result = PeriodDtype('period[D]') + self.assertTrue(is_dtype_equal(self.dtype, result)) + result = PeriodDtype.construct_from_string('period[D]') + self.assertTrue(is_dtype_equal(self.dtype, result)) + with tm.assertRaises(TypeError): + PeriodDtype.construct_from_string('foo') + with tm.assertRaises(TypeError): + PeriodDtype.construct_from_string('period[foo]') + with tm.assertRaises(TypeError): + PeriodDtype.construct_from_string('foo[D]') + + with tm.assertRaises(TypeError): + PeriodDtype.construct_from_string('datetime64[ns]') + with tm.assertRaises(TypeError): + PeriodDtype.construct_from_string('datetime64[ns, US/Eastern]') + + def test_is_dtype(self): + self.assertTrue(PeriodDtype.is_dtype(self.dtype)) + self.assertTrue(PeriodDtype.is_dtype('period[D]')) + self.assertTrue(PeriodDtype.is_dtype('period[3D]')) + self.assertTrue(PeriodDtype.is_dtype(PeriodDtype('3D'))) + self.assertTrue(PeriodDtype.is_dtype('period[U]')) + self.assertTrue(PeriodDtype.is_dtype('period[S]')) + self.assertTrue(PeriodDtype.is_dtype(PeriodDtype('U'))) + self.assertTrue(PeriodDtype.is_dtype(PeriodDtype('S'))) + + self.assertFalse(PeriodDtype.is_dtype('D')) + self.assertFalse(PeriodDtype.is_dtype('3D')) + self.assertFalse(PeriodDtype.is_dtype('U')) + self.assertFalse(PeriodDtype.is_dtype('S')) + self.assertFalse(PeriodDtype.is_dtype('foo')) + self.assertFalse(PeriodDtype.is_dtype(np.object_)) + self.assertFalse(PeriodDtype.is_dtype(np.int64)) + self.assertFalse(PeriodDtype.is_dtype(np.float64)) + + def test_equality(self): + self.assertTrue(is_dtype_equal(self.dtype, 'period[D]')) + self.assertTrue(is_dtype_equal(self.dtype, PeriodDtype('D'))) + self.assertTrue(is_dtype_equal(self.dtype, PeriodDtype('D'))) + self.assertTrue(is_dtype_equal(PeriodDtype('D'), PeriodDtype('D'))) + + self.assertFalse(is_dtype_equal(self.dtype, 'D')) + self.assertFalse(is_dtype_equal(PeriodDtype('D'), PeriodDtype('2D'))) + + def test_basic(self): + self.assertTrue(is_period_dtype(self.dtype)) + + pidx = pd.period_range('2013-01-01 09:00', periods=5, freq='H') + + self.assertTrue(is_period_dtype(pidx.dtype)) + self.assertTrue(is_period_dtype(pidx)) + self.assertTrue(is_period(pidx)) + + s = Series(pidx, name='A') + # dtypes + # series results in object dtype currently, + # is_period checks period_arraylike + self.assertFalse(is_period_dtype(s.dtype)) + self.assertFalse(is_period_dtype(s)) + self.assertTrue(is_period(s)) + + self.assertFalse(is_period_dtype(np.dtype('float64'))) + self.assertFalse(is_period_dtype(1.0)) + self.assertFalse(is_period(np.dtype('float64'))) + self.assertFalse(is_period(1.0)) + + def test_empty(self): + dt = PeriodDtype() + with tm.assertRaises(AttributeError): + str(dt) + + def test_not_string(self): + # though PeriodDtype has object kind, it cannot be string + self.assertFalse(is_string_dtype(PeriodDtype('D'))) + if __name__ == '__main__': nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 01728889a8595..8f50ddc0f9e41 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -13,10 +13,12 @@ is_integer, is_float, is_integer_dtype, is_datetime64_ns_dtype, + is_period_dtype, is_bool_dtype, is_string_dtype, is_list_like, is_scalar, + pandas_dtype, _ensure_int64) from pandas.types.generic import ABCSeries from pandas.types.dtypes import DatetimeTZDtype @@ -802,8 +804,7 @@ def to_datetime(self, dayfirst=False): @Appender(_index_shared_docs['astype']) def astype(self, dtype, copy=True): - dtype = np.dtype(dtype) - + dtype = pandas_dtype(dtype) if is_object_dtype(dtype): return self.asobject elif is_integer_dtype(dtype): @@ -817,6 +818,8 @@ def astype(self, dtype, copy=True): return self elif is_string_dtype(dtype): return Index(self.format(), name=self.name, dtype=object) + elif is_period_dtype(dtype): + return self.to_period(freq=dtype.freq) raise ValueError('Cannot cast DatetimeIndex to dtype %s' % dtype) def _get_time_micros(self): diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index af46162038fef..486cf52f188a9 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -10,11 +10,15 @@ is_integer_dtype, is_float_dtype, is_scalar, + is_datetime64_dtype, + is_datetime64tz_dtype, is_timedelta64_dtype, + is_period_dtype, is_bool_dtype, + pandas_dtype, _ensure_int64, _ensure_object) - +from pandas.types.dtypes import PeriodDtype from pandas.types.generic import ABCSeries import pandas.tseries.frequencies as frequencies @@ -123,7 +127,6 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index): ---------- data : array-like (1-dimensional), optional Optional period-like data to construct index with - dtype : NumPy dtype (default: i8) copy : bool Make a copy of input ndarray freq : string or period object, optional @@ -146,6 +149,7 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index): second : int, array, or Series, default None tz : object, default None Timezone for converting datetime64 data to Periods + dtype : str or PeriodDtype, default None Examples -------- @@ -175,7 +179,8 @@ class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index): __ge__ = _period_index_cmp('__ge__') def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, - periods=None, copy=False, name=None, tz=None, **kwargs): + periods=None, copy=False, name=None, tz=None, dtype=None, + **kwargs): if periods is not None: if is_float(periods): @@ -187,6 +192,16 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None, if name is None and hasattr(data, 'name'): name = data.name + if dtype is not None: + dtype = pandas_dtype(dtype) + if not is_period_dtype(dtype): + raise ValueError('dtype must be PeriodDtype') + if freq is None: + freq = dtype.freq + elif freq != dtype.freq: + msg = 'specified freq and dtype are different' + raise IncompatibleFrequency(msg) + if data is None: if ordinal is not None: data = np.asarray(ordinal, dtype=np.int64) @@ -372,6 +387,11 @@ def _to_embed(self, keep_tz=False): def _formatter_func(self): return lambda x: "'%s'" % x + @property + def _int64index(self): + # do not cache, same as .asi8 + return Int64Index(self.asi8, name=self.name, fastpath=True) + def asof_locs(self, where, mask): """ where : array of timestamps @@ -393,13 +413,19 @@ def asof_locs(self, where, mask): return result @Appender(_index_shared_docs['astype']) - def astype(self, dtype, copy=True): - dtype = np.dtype(dtype) + def astype(self, dtype, copy=True, how='start'): + dtype = pandas_dtype(dtype) if is_object_dtype(dtype): return self.asobject elif is_integer_dtype(dtype): return Index(self.values.astype('i8', copy=copy), name=self.name, dtype='i8') + elif is_datetime64_dtype(dtype): + return self.to_timestamp(how=how) + elif is_datetime64tz_dtype(dtype): + return self.to_timestamp(how=how).tz_localize(dtype.tz) + elif is_period_dtype(dtype): + return self.asfreq(freq=dtype.freq) raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype) @Substitution(klass='PeriodIndex', value='key') @@ -650,9 +676,8 @@ def shift(self, n): return PeriodIndex(data=values, name=self.name, freq=self.freq) @cache_readonly - def dtype_str(self): - """ return the dtype str of the underlying data """ - return self.inferred_type + def dtype(self): + return PeriodDtype.construct_from_string(self.freq) @property def inferred_type(self): @@ -738,7 +763,10 @@ def get_loc(self, key, method=None, tolerance=None): try: ordinal = tslib.iNaT if key is tslib.NaT else key.ordinal - return Index.get_loc(self, ordinal, method, tolerance) + if tolerance is not None: + tolerance = self._convert_tolerance(tolerance) + return self._int64index.get_loc(ordinal, method, tolerance) + except KeyError: raise KeyError(key) diff --git a/pandas/tseries/tests/test_base.py b/pandas/tseries/tests/test_base.py index 800f9470f9845..45a5feec7c949 100644 --- a/pandas/tseries/tests/test_base.py +++ b/pandas/tseries/tests/test_base.py @@ -1767,35 +1767,40 @@ def test_representation(self): idx7 = pd.period_range('2013Q1', periods=1, freq="Q") idx8 = pd.period_range('2013Q1', periods=2, freq="Q") idx9 = pd.period_range('2013Q1', periods=3, freq="Q") + idx10 = PeriodIndex(['2011-01-01', '2011-02-01'], freq='3D') - exp1 = """PeriodIndex([], dtype='int64', freq='D')""" + exp1 = """PeriodIndex([], dtype='period[D]', freq='D')""" - exp2 = """PeriodIndex(['2011-01-01'], dtype='int64', freq='D')""" + exp2 = """PeriodIndex(['2011-01-01'], dtype='period[D]', freq='D')""" - exp3 = ("PeriodIndex(['2011-01-01', '2011-01-02'], dtype='int64', " + exp3 = ("PeriodIndex(['2011-01-01', '2011-01-02'], dtype='period[D]', " "freq='D')") exp4 = ("PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03'], " - "dtype='int64', freq='D')") + "dtype='period[D]', freq='D')") - exp5 = ("PeriodIndex(['2011', '2012', '2013'], dtype='int64', " + exp5 = ("PeriodIndex(['2011', '2012', '2013'], dtype='period[A-DEC]', " "freq='A-DEC')") exp6 = ("PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], " - "dtype='int64', freq='H')") + "dtype='period[H]', freq='H')") - exp7 = """PeriodIndex(['2013Q1'], dtype='int64', freq='Q-DEC')""" - - exp8 = ("PeriodIndex(['2013Q1', '2013Q2'], dtype='int64', " + exp7 = ("PeriodIndex(['2013Q1'], dtype='period[Q-DEC]', " "freq='Q-DEC')") - exp9 = ("PeriodIndex(['2013Q1', '2013Q2', '2013Q3'], dtype='int64', " + exp8 = ("PeriodIndex(['2013Q1', '2013Q2'], dtype='period[Q-DEC]', " "freq='Q-DEC')") + exp9 = ("PeriodIndex(['2013Q1', '2013Q2', '2013Q3'], " + "dtype='period[Q-DEC]', freq='Q-DEC')") + + exp10 = ("PeriodIndex(['2011-01-01', '2011-02-01'], " + "dtype='period[3D]', freq='3D')") + for idx, expected in zip([idx1, idx2, idx3, idx4, idx5, - idx6, idx7, idx8, idx9], + idx6, idx7, idx8, idx9, idx10], [exp1, exp2, exp3, exp4, exp5, - exp6, exp7, exp8, exp9]): + exp6, exp7, exp8, exp9, exp10]): for func in ['__repr__', '__unicode__', '__str__']: result = getattr(idx, func)() self.assertEqual(result, expected) @@ -1805,11 +1810,11 @@ def test_representation_to_series(self): idx1 = PeriodIndex([], freq='D') idx2 = PeriodIndex(['2011-01-01'], freq='D') idx3 = PeriodIndex(['2011-01-01', '2011-01-02'], freq='D') - idx4 = PeriodIndex( - ['2011-01-01', '2011-01-02', '2011-01-03'], freq='D') + idx4 = PeriodIndex(['2011-01-01', '2011-01-02', + '2011-01-03'], freq='D') idx5 = PeriodIndex(['2011', '2012', '2013'], freq='A') - idx6 = PeriodIndex( - ['2011-01-01 09:00', '2012-02-01 10:00', 'NaT'], freq='H') + idx6 = PeriodIndex(['2011-01-01 09:00', '2012-02-01 10:00', + 'NaT'], freq='H') idx7 = pd.period_range('2013Q1', periods=1, freq="Q") idx8 = pd.period_range('2013Q1', periods=2, freq="Q") diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index 8baac297fe57b..ad6486809171e 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -1783,6 +1783,35 @@ def test_constructor_datetime64arr(self): self.assertRaises(ValueError, PeriodIndex, vals, freq='D') + def test_constructor_dtype(self): + # passing a dtype with a tz should localize + idx = PeriodIndex(['2013-01', '2013-03'], dtype='period[M]') + exp = PeriodIndex(['2013-01', '2013-03'], freq='M') + tm.assert_index_equal(idx, exp) + self.assertEqual(idx.dtype, 'period[M]') + + idx = PeriodIndex(['2013-01-05', '2013-03-05'], dtype='period[3D]') + exp = PeriodIndex(['2013-01-05', '2013-03-05'], freq='3D') + tm.assert_index_equal(idx, exp) + self.assertEqual(idx.dtype, 'period[3D]') + + # if we already have a freq and its not the same, then asfreq + # (not changed) + idx = PeriodIndex(['2013-01-01', '2013-01-02'], freq='D') + + res = PeriodIndex(idx, dtype='period[M]') + exp = PeriodIndex(['2013-01', '2013-01'], freq='M') + tm.assert_index_equal(res, exp) + self.assertEqual(res.dtype, 'period[M]') + + res = PeriodIndex(idx, freq='M') + tm.assert_index_equal(res, exp) + self.assertEqual(res.dtype, 'period[M]') + + msg = 'specified freq and dtype are different' + with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg): + PeriodIndex(['2011-01'], freq='M', dtype='period[D]') + def test_constructor_empty(self): idx = pd.PeriodIndex([], freq='M') tm.assertIsInstance(idx, PeriodIndex) @@ -1970,6 +1999,15 @@ def test_constructor_freq_combined(self): freq='25H') tm.assert_index_equal(pidx, expected) + def test_dtype_str(self): + pi = pd.PeriodIndex([], freq='M') + self.assertEqual(pi.dtype_str, 'period[M]') + self.assertEqual(pi.dtype_str, str(pi.dtype)) + + pi = pd.PeriodIndex([], freq='3M') + self.assertEqual(pi.dtype_str, 'period[3M]') + self.assertEqual(pi.dtype_str, str(pi.dtype)) + def test_view_asi8(self): idx = pd.PeriodIndex([], freq='M') @@ -2314,6 +2352,17 @@ def test_to_timestamp_pi_combined(self): ['2011-01-02 00:00', '2011-01-03 01:00'], name='idx') self.assert_index_equal(result, expected) + def test_to_timestamp_to_period_astype(self): + idx = DatetimeIndex([pd.NaT, '2011-01-01', '2011-02-01'], name='idx') + + res = idx.astype('period[M]') + exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='M', name='idx') + tm.assert_index_equal(res, exp) + + res = idx.astype('period[3M]') + exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='3M', name='idx') + self.assert_index_equal(res, exp) + def test_start_time(self): index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31') expected_index = date_range('2016-01-01', end='2016-05-31', freq='MS') @@ -3013,6 +3062,16 @@ def test_range_slice_outofbounds(self): tm.assert_frame_equal(df['2013-06':'2013-09'], empty) tm.assert_frame_equal(df['2013-11':'2013-12'], empty) + def test_astype_asfreq(self): + pi1 = PeriodIndex(['2011-01-01', '2011-02-01', '2011-03-01'], freq='D') + exp = PeriodIndex(['2011-01', '2011-02', '2011-03'], freq='M') + tm.assert_index_equal(pi1.asfreq('M'), exp) + tm.assert_index_equal(pi1.astype('period[M]'), exp) + + exp = PeriodIndex(['2011-01', '2011-02', '2011-03'], freq='3M') + tm.assert_index_equal(pi1.asfreq('3M'), exp) + tm.assert_index_equal(pi1.astype('period[3M]'), exp) + def test_pindex_fieldaccessor_nat(self): idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2012-03', '2012-04'], freq='D') @@ -3037,6 +3096,25 @@ def test_period_dt64_round_trip(self): pi = dti.to_period(freq='H') tm.assert_index_equal(pi.to_timestamp(), dti) + def test_period_astype_to_timestamp(self): + pi = pd.PeriodIndex(['2011-01', '2011-02', '2011-03'], freq='M') + + exp = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01']) + tm.assert_index_equal(pi.astype('datetime64[ns]'), exp) + + exp = pd.DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31']) + tm.assert_index_equal(pi.astype('datetime64[ns]', how='end'), exp) + + exp = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'], + tz='US/Eastern') + res = pi.astype('datetime64[ns, US/Eastern]') + tm.assert_index_equal(pi.astype('datetime64[ns, US/Eastern]'), exp) + + exp = pd.DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31'], + tz='US/Eastern') + res = pi.astype('datetime64[ns, US/Eastern]', how='end') + tm.assert_index_equal(res, exp) + def test_to_period_quarterly(self): # make sure we can make the round trip for month in MONTHS: diff --git a/pandas/types/api.py b/pandas/types/api.py index 2d68e041f632e..096dc2f84aa67 100644 --- a/pandas/types/api.py +++ b/pandas/types/api.py @@ -18,6 +18,8 @@ is_datetime64_ns_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype, + is_period, + is_period_dtype, # string-like is_string_dtype, diff --git a/pandas/types/common.py b/pandas/types/common.py index 39db0be3e416e..2e7a67112e6db 100644 --- a/pandas/types/common.py +++ b/pandas/types/common.py @@ -5,6 +5,7 @@ from pandas import lib, algos from .dtypes import (CategoricalDtype, CategoricalDtypeType, DatetimeTZDtype, DatetimeTZDtypeType, + PeriodDtype, PeriodDtypeType, ExtensionDtype) from .generic import (ABCCategorical, ABCPeriodIndex, ABCDatetimeIndex, ABCSeries, @@ -63,6 +64,11 @@ def is_datetimetz(array): is_datetime64tz_dtype(array)) +def is_period(array): + """ return if we are a period array """ + return isinstance(array, ABCPeriodIndex) or is_period_arraylike(array) + + def is_datetime64_dtype(arr_or_dtype): try: tipo = _get_dtype_type(arr_or_dtype) @@ -80,13 +86,17 @@ def is_timedelta64_dtype(arr_or_dtype): return issubclass(tipo, np.timedelta64) +def is_period_dtype(arr_or_dtype): + return PeriodDtype.is_dtype(arr_or_dtype) + + def is_categorical_dtype(arr_or_dtype): return CategoricalDtype.is_dtype(arr_or_dtype) def is_string_dtype(arr_or_dtype): dtype = _get_dtype(arr_or_dtype) - return dtype.kind in ('O', 'S', 'U') + return dtype.kind in ('O', 'S', 'U') and not is_period_dtype(dtype) def is_period_arraylike(arr): @@ -231,7 +241,7 @@ def is_object(x): def needs_i8_conversion(arr_or_dtype): return (is_datetime_or_timedelta_dtype(arr_or_dtype) or is_datetime64tz_dtype(arr_or_dtype) or - isinstance(arr_or_dtype, ABCPeriodIndex)) + is_period_dtype(arr_or_dtype)) def is_numeric_dtype(arr_or_dtype): @@ -290,6 +300,8 @@ def _coerce_to_dtype(dtype): dtype = CategoricalDtype() elif is_datetime64tz_dtype(dtype): dtype = DatetimeTZDtype(dtype) + elif is_period_dtype(dtype): + dtype = PeriodDtype(dtype) else: dtype = np.dtype(dtype) return dtype @@ -304,11 +316,15 @@ def _get_dtype(arr_or_dtype): return arr_or_dtype elif isinstance(arr_or_dtype, DatetimeTZDtype): return arr_or_dtype + elif isinstance(arr_or_dtype, PeriodDtype): + return arr_or_dtype elif isinstance(arr_or_dtype, string_types): if is_categorical_dtype(arr_or_dtype): return CategoricalDtype.construct_from_string(arr_or_dtype) elif is_datetime64tz_dtype(arr_or_dtype): return DatetimeTZDtype.construct_from_string(arr_or_dtype) + elif is_period_dtype(arr_or_dtype): + return PeriodDtype.construct_from_string(arr_or_dtype) if hasattr(arr_or_dtype, 'dtype'): arr_or_dtype = arr_or_dtype.dtype @@ -324,11 +340,15 @@ def _get_dtype_type(arr_or_dtype): return CategoricalDtypeType elif isinstance(arr_or_dtype, DatetimeTZDtype): return DatetimeTZDtypeType + elif isinstance(arr_or_dtype, PeriodDtype): + return PeriodDtypeType elif isinstance(arr_or_dtype, string_types): if is_categorical_dtype(arr_or_dtype): return CategoricalDtypeType elif is_datetime64tz_dtype(arr_or_dtype): return DatetimeTZDtypeType + elif is_period_dtype(arr_or_dtype): + return PeriodDtypeType return _get_dtype_type(np.dtype(arr_or_dtype)) try: return arr_or_dtype.dtype.type @@ -404,6 +424,8 @@ def pandas_dtype(dtype): """ if isinstance(dtype, DatetimeTZDtype): return dtype + elif isinstance(dtype, PeriodDtype): + return dtype elif isinstance(dtype, CategoricalDtype): return dtype elif isinstance(dtype, string_types): @@ -412,6 +434,13 @@ def pandas_dtype(dtype): except TypeError: pass + if dtype.startswith('period[') or dtype.startswith('Period['): + # do not parse string like U as period[U] + try: + return PeriodDtype.construct_from_string(dtype) + except TypeError: + pass + try: return CategoricalDtype.construct_from_string(dtype) except TypeError: diff --git a/pandas/types/dtypes.py b/pandas/types/dtypes.py index 140d494c3e1b2..5b6d7905d4095 100644 --- a/pandas/types/dtypes.py +++ b/pandas/types/dtypes.py @@ -244,6 +244,124 @@ def __eq__(self, other): if isinstance(other, compat.string_types): return other == self.name - return isinstance(other, DatetimeTZDtype) and \ - self.unit == other.unit and \ - str(self.tz) == str(other.tz) + return (isinstance(other, DatetimeTZDtype) and + self.unit == other.unit and + str(self.tz) == str(other.tz)) + + +class PeriodDtypeType(type): + """ + the type of PeriodDtype, this metaclass determines subclass ability + """ + pass + + +class PeriodDtype(ExtensionDtype): + __metaclass__ = PeriodDtypeType + """ + A Period duck-typed class, suitable for holding a period with freq dtype. + + THIS IS NOT A REAL NUMPY DTYPE, but essentially a sub-class of np.int64. + """ + type = PeriodDtypeType + kind = 'O' + str = '|O08' + base = np.dtype('O') + num = 102 + _metadata = ['freq'] + _match = re.compile("(P|p)eriod\[(?P.+)\]") + _cache = {} + + def __new__(cls, freq=None): + """ + Parameters + ---------- + freq : frequency + """ + + if isinstance(freq, PeriodDtype): + return freq + + elif freq is None: + # empty constructor for pickle compat + return object.__new__(cls) + + from pandas.tseries.offsets import DateOffset + if not isinstance(freq, DateOffset): + freq = cls._parse_dtype_strict(freq) + + try: + return cls._cache[freq.freqstr] + except KeyError: + u = object.__new__(cls) + u.freq = freq + cls._cache[freq.freqstr] = u + return u + + @classmethod + def _parse_dtype_strict(cls, freq): + if isinstance(freq, compat.string_types): + if freq.startswith('period[') or freq.startswith('Period['): + m = cls._match.search(freq) + if m is not None: + freq = m.group('freq') + from pandas.tseries.frequencies import to_offset + freq = to_offset(freq) + if freq is not None: + return freq + + raise ValueError("could not construct PeriodDtype") + + @classmethod + def construct_from_string(cls, string): + """ + attempt to construct this type from a string, raise a TypeError + if its not possible + """ + from pandas.tseries.offsets import DateOffset + if isinstance(string, (compat.string_types, DateOffset)): + # avoid tuple to be regarded as freq + try: + return cls(freq=string) + except ValueError: + pass + raise TypeError("could not construct PeriodDtype") + + def __unicode__(self): + return "period[{freq}]".format(freq=self.freq.freqstr) + + @property + def name(self): + return str(self) + + def __hash__(self): + # make myself hashable + return hash(str(self)) + + def __eq__(self, other): + if isinstance(other, compat.string_types): + return other == self.name or other == self.name.title() + + return isinstance(other, PeriodDtype) and self.freq == other.freq + + @classmethod + def is_dtype(cls, dtype): + """ + Return a boolean if we if the passed type is an actual dtype that we + can match (via string or type) + """ + + if isinstance(dtype, compat.string_types): + # PeriodDtype can be instanciated from freq string like "U", + # but dosn't regard freq str like "U" as dtype. + if dtype.startswith('period[') or dtype.startswith('Period['): + try: + if cls._parse_dtype_strict(dtype) is not None: + return True + else: + return False + except ValueError: + return False + else: + return False + return super(PeriodDtype, cls).is_dtype(dtype)