Skip to content

Commit a968782

Browse files
committed
doc / astype updates
1 parent 13d9592 commit a968782

File tree

9 files changed

+93
-55
lines changed

9 files changed

+93
-55
lines changed

doc/source/timeseries.rst

+8-3
Original file line numberDiff line numberDiff line change
@@ -1604,18 +1604,20 @@ Period Dtypes
16041604
.. versionadded:: 0.19.0
16051605

16061606
``PeriodIndex`` has a custom ``period`` dtype. This is a pandas extension
1607-
dtype similar to the timezone aware dtype (``datetime64[ns, tz]``).
1607+
dtype similar to the :ref:`timezone aware dtype <timeseries.timezone_series>` (``datetime64[ns, tz]``).
1608+
1609+
.. _timeseries.timezone_series:
16081610

16091611
The ``period`` dtype holds the ``freq`` attribute and is represented with
1610-
``period[freq]``, using :ref:`frequency strings <timeseries.offset_aliases>`.
1612+
``period[freq]`` like ``period[D]`` or ``period[M]``, using :ref:`frequency strings <timeseries.offset_aliases>`.
16111613

16121614
.. ipython:: python
16131615
16141616
pi = pd.period_range('2016-01-01', periods=3, freq='M')
16151617
pi
16161618
pi.dtype
16171619
1618-
The ``period`` dtype can be used in ``.astype(...)``. It allows to change the
1620+
The ``period`` dtype can be used in ``.astype(...)``. It allows one to change the
16191621
``freq`` of a ``PeriodIndex`` like ``.asfreq()`` and convert a
16201622
``DatetimeIndex`` to ``PeriodIndex`` like ``to_period()``:
16211623

@@ -1624,6 +1626,9 @@ The ``period`` dtype can be used in ``.astype(...)``. It allows to change the
16241626
# change monthly freq to daily freq
16251627
pi.astype('period[D]')
16261628
1629+
# convert to DatetimeIndex
1630+
pi.astype('datetime64[ns]')
1631+
16271632
# convert to PeriodIndex
16281633
dti = pd.date_range('2011-01-01', freq='M', periods=3)
16291634
dti

doc/source/whatsnew/v0.19.0.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -634,8 +634,8 @@ Furthermore:
634634
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
635635

636636
``PeriodIndex`` now has its own ``period`` dtype. The ``period`` dtype is a
637-
pandas extension dtype which extends NumPy dtype like ``"category"`` (:issue:`13941`).
638-
As a consequence of this change, `PeriodIndex` no longer has an integer dtype:
637+
pandas extension dtype like ``category`` or :ref:`timezone aware dtype <timeseries.timezone_series>` (``datetime64[ns, tz]``). (:issue:`13941`).
638+
As a consequence of this change, ``PeriodIndex`` no longer has an integer dtype:
639639

640640
Previous Behavior:
641641

pandas/core/algorithms.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,7 @@ def _value_counts_arraylike(values, dropna=True):
405405

406406
if needs_i8_conversion(dtype) or is_period_type:
407407

408+
from pandas.tseries.index import DatetimeIndex
408409
from pandas.tseries.period import PeriodIndex
409410

410411
if is_period_type:
@@ -424,7 +425,6 @@ def _value_counts_arraylike(values, dropna=True):
424425

425426
# dtype handling
426427
if is_datetimetz_type:
427-
from pandas.tseries.index import DatetimeIndex
428428
keys = DatetimeIndex._simple_new(keys, tz=orig.dtype.tz)
429429
if is_period_type:
430430
keys = PeriodIndex._simple_new(keys, freq=freq)

pandas/tests/indexes/test_datetimelike.py

-2
Original file line numberDiff line numberDiff line change
@@ -790,8 +790,6 @@ def test_astype_raises(self):
790790
self.assertRaises(ValueError, idx.astype, float)
791791
self.assertRaises(ValueError, idx.astype, 'timedelta64')
792792
self.assertRaises(ValueError, idx.astype, 'timedelta64[ns]')
793-
self.assertRaises(ValueError, idx.astype, 'datetime64')
794-
self.assertRaises(ValueError, idx.astype, 'datetime64[ns]')
795793

796794
def test_shift(self):
797795

pandas/tests/types/test_common.py

+29-6
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,39 @@
66
from pandas.types.dtypes import DatetimeTZDtype, PeriodDtype, CategoricalDtype
77
from pandas.types.common import pandas_dtype, is_dtype_equal
88

9+
import pandas.util.testing as tm
10+
911
_multiprocess_can_split_ = True
1012

1113

12-
def test_pandas_dtype():
14+
class TestPandasDtype(tm.TestCase):
15+
16+
def test_numpy_dtype(self):
17+
for dtype in ['M8[ns]', 'm8[ns]', 'object', 'float64', 'int64']:
18+
self.assertEqual(pandas_dtype(dtype), np.dtype(dtype))
19+
20+
def test_numpy_string_dtype(self):
21+
# do not parse freq-like string as period dtype
22+
self.assertEqual(pandas_dtype('U'), np.dtype('U'))
23+
self.assertEqual(pandas_dtype('S'), np.dtype('S'))
24+
25+
def test_datetimetz_dtype(self):
26+
for dtype in ['datetime64[ns, US/Eastern]',
27+
'datetime64[ns, Asia/Tokyo]',
28+
'datetime64[ns, UTC]']:
29+
self.assertIs(pandas_dtype(dtype), DatetimeTZDtype(dtype))
30+
self.assertEqual(pandas_dtype(dtype), DatetimeTZDtype(dtype))
31+
self.assertEqual(pandas_dtype(dtype), dtype)
32+
33+
def test_categorical_dtype(self):
34+
self.assertEqual(pandas_dtype('category'), CategoricalDtype())
1335

14-
assert pandas_dtype('datetime64[ns, US/Eastern]') == DatetimeTZDtype(
15-
'datetime64[ns, US/Eastern]')
16-
assert pandas_dtype('category') == CategoricalDtype()
17-
for dtype in ['M8[ns]', 'm8[ns]', 'object', 'float64', 'int64']:
18-
assert pandas_dtype(dtype) == np.dtype(dtype)
36+
def test_period_dtype(self):
37+
for dtype in ['period[D]', 'period[3M]', 'period[U]',
38+
'Period[D]', 'Period[3M]', 'Period[U]']:
39+
self.assertIs(pandas_dtype(dtype), PeriodDtype(dtype))
40+
self.assertEqual(pandas_dtype(dtype), PeriodDtype(dtype))
41+
self.assertEqual(pandas_dtype(dtype), dtype)
1942

2043

2144
def test_dtype_equal():

pandas/tests/types/test_dtypes.py

+6-20
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
is_period_dtype, is_period,
1313
is_dtype_equal, is_datetime64_ns_dtype,
1414
is_datetime64_dtype, is_string_dtype,
15-
_coerce_to_dtype, pandas_dtype)
15+
_coerce_to_dtype)
1616
import pandas.util.testing as tm
1717

1818
_multiprocess_can_split_ = True
@@ -268,25 +268,6 @@ def test_coerce_to_dtype(self):
268268
self.assertEqual(_coerce_to_dtype('period[3M]'),
269269
PeriodDtype('period[3M]'))
270270

271-
def test_pandas_dtype(self):
272-
self.assertEqual(pandas_dtype('period[D]'),
273-
PeriodDtype('period[D]'))
274-
self.assertEqual(pandas_dtype('period[3M]'),
275-
PeriodDtype('period[3M]'))
276-
self.assertEqual(pandas_dtype('period[U]'),
277-
PeriodDtype('period[U]'))
278-
# capital P
279-
self.assertEqual(pandas_dtype('Period[D]'),
280-
PeriodDtype('period[D]'))
281-
self.assertEqual(pandas_dtype('Period[3M]'),
282-
PeriodDtype('period[3M]'))
283-
self.assertEqual(pandas_dtype('Period[U]'),
284-
PeriodDtype('period[U]'))
285-
286-
# do not parse freq-like string as period dtype
287-
self.assertEqual(pandas_dtype('U'), np.dtype('U'))
288-
self.assertEqual(pandas_dtype('S'), np.dtype('S'))
289-
290271
def test_compat(self):
291272
self.assertFalse(is_datetime64_ns_dtype(self.dtype))
292273
self.assertFalse(is_datetime64_ns_dtype('period[D]'))
@@ -305,6 +286,11 @@ def test_construction_from_string(self):
305286
with tm.assertRaises(TypeError):
306287
PeriodDtype.construct_from_string('foo[D]')
307288

289+
with tm.assertRaises(TypeError):
290+
PeriodDtype.construct_from_string('datetime64[ns]')
291+
with tm.assertRaises(TypeError):
292+
PeriodDtype.construct_from_string('datetime64[ns, US/Eastern]')
293+
308294
def test_is_dtype(self):
309295
self.assertTrue(PeriodDtype.is_dtype(self.dtype))
310296
self.assertTrue(PeriodDtype.is_dtype('period[D]'))

pandas/tseries/period.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
is_integer_dtype,
1111
is_float_dtype,
1212
is_scalar,
13+
is_datetime64_dtype,
14+
is_datetime64tz_dtype,
1315
is_timedelta64_dtype,
1416
is_period_dtype,
1517
is_bool_dtype,
@@ -411,13 +413,17 @@ def asof_locs(self, where, mask):
411413
return result
412414

413415
@Appender(_index_shared_docs['astype'])
414-
def astype(self, dtype, copy=True):
416+
def astype(self, dtype, copy=True, how='start'):
415417
dtype = pandas_dtype(dtype)
416418
if is_object_dtype(dtype):
417419
return self.asobject
418420
elif is_integer_dtype(dtype):
419421
return Index(self.values.astype('i8', copy=copy), name=self.name,
420422
dtype='i8')
423+
elif is_datetime64_dtype(dtype):
424+
return self.to_timestamp(how=how)
425+
elif is_datetime64tz_dtype(dtype):
426+
return self.to_timestamp(how=how).tz_localize(dtype.tz)
421427
elif is_period_dtype(dtype):
422428
return self.asfreq(freq=dtype.freq)
423429
raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)

pandas/tseries/tests/test_period.py

+19
Original file line numberDiff line numberDiff line change
@@ -3096,6 +3096,25 @@ def test_period_dt64_round_trip(self):
30963096
pi = dti.to_period(freq='H')
30973097
tm.assert_index_equal(pi.to_timestamp(), dti)
30983098

3099+
def test_period_astype_to_timestamp(self):
3100+
pi = pd.PeriodIndex(['2011-01', '2011-02', '2011-03'], freq='M')
3101+
3102+
exp = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'])
3103+
tm.assert_index_equal(pi.astype('datetime64[ns]'), exp)
3104+
3105+
exp = pd.DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31'])
3106+
tm.assert_index_equal(pi.astype('datetime64[ns]', how='end'), exp)
3107+
3108+
exp = pd.DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01'],
3109+
tz='US/Eastern')
3110+
res = pi.astype('datetime64[ns, US/Eastern]')
3111+
tm.assert_index_equal(pi.astype('datetime64[ns, US/Eastern]'), exp)
3112+
3113+
exp = pd.DatetimeIndex(['2011-01-31', '2011-02-28', '2011-03-31'],
3114+
tz='US/Eastern')
3115+
res = pi.astype('datetime64[ns, US/Eastern]', how='end')
3116+
tm.assert_index_equal(res, exp)
3117+
30993118
def test_to_period_quarterly(self):
31003119
# make sure we can make the round trip
31013120
for month in MONTHS:

pandas/types/dtypes.py

+21-20
Original file line numberDiff line numberDiff line change
@@ -286,10 +286,8 @@ def __new__(cls, freq=None):
286286
# empty constructor for pickle compat
287287
return object.__new__(cls)
288288

289-
from pandas._period import Period
290-
try:
291-
freq = Period._maybe_convert_freq(freq)
292-
except ValueError:
289+
from pandas.tseries.offsets import DateOffset
290+
if not isinstance(freq, DateOffset):
293291
freq = cls._parse_dtype_strict(freq)
294292

295293
try:
@@ -302,16 +300,16 @@ def __new__(cls, freq=None):
302300

303301
@classmethod
304302
def _parse_dtype_strict(cls, freq):
305-
try:
306-
m = cls._match.search(freq)
307-
if m is not None:
308-
from pandas._period import Period
309-
freq = m.group('freq')
310-
freq = Period._maybe_convert_freq(freq)
311-
if freq is not None:
312-
return freq
313-
except:
314-
pass
303+
if isinstance(freq, compat.string_types):
304+
if freq.startswith('period[') or freq.startswith('Period['):
305+
m = cls._match.search(freq)
306+
if m is not None:
307+
freq = m.group('freq')
308+
from pandas.tseries.frequencies import to_offset
309+
freq = to_offset(freq)
310+
if freq is not None:
311+
return freq
312+
315313
raise ValueError("could not construct PeriodDtype")
316314

317315
@classmethod
@@ -342,7 +340,7 @@ def __hash__(self):
342340

343341
def __eq__(self, other):
344342
if isinstance(other, compat.string_types):
345-
return other == self.name
343+
return other == self.name or other == self.name.title()
346344

347345
return isinstance(other, PeriodDtype) and self.freq == other.freq
348346

@@ -356,11 +354,14 @@ def is_dtype(cls, dtype):
356354
if isinstance(dtype, compat.string_types):
357355
# PeriodDtype can be instanciated from freq string like "U",
358356
# but dosn't regard freq str like "U" as dtype.
359-
try:
360-
if cls._parse_dtype_strict(dtype) is not None:
361-
return True
362-
else:
357+
if dtype.startswith('period[') or dtype.startswith('Period['):
358+
try:
359+
if cls._parse_dtype_strict(dtype) is not None:
360+
return True
361+
else:
362+
return False
363+
except ValueError:
363364
return False
364-
except ValueError:
365+
else:
365366
return False
366367
return super(PeriodDtype, cls).is_dtype(dtype)

0 commit comments

Comments
 (0)