Skip to content

Commit 5990de6

Browse files
committed
ENH: PeriodIndex now accepts pd.NaT
1 parent 07761c5 commit 5990de6

File tree

4 files changed

+121
-25
lines changed

4 files changed

+121
-25
lines changed

doc/source/whatsnew/v0.18.2.txt

+1
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,7 @@ Other API changes
288288
- ``Float64Index.astype(int)`` will now raise ``ValueError`` if ``Float64Index`` contains ``NaN`` values (:issue:`13149`)
289289
- ``TimedeltaIndex.astype(int)`` and ``DatetimeIndex.astype(int)`` will now return ``Int64Index`` instead of ``np.array`` (:issue:`13209`)
290290
- ``.filter()`` enforces mutual exclusion of the keyword arguments. (:issue:`12399`)
291+
- ``PeridIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`)
291292

292293
.. _whatsnew_0182.deprecations:
293294

pandas/src/period.pyx

+32-5
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ cimport cython
2424
from datetime cimport *
2525
cimport util
2626
cimport lib
27+
from lib cimport is_null_datetimelike
2728
import lib
2829
from pandas import tslib
2930
from tslib import Timedelta, Timestamp, iNaT, NaT
@@ -458,13 +459,39 @@ def extract_ordinals(ndarray[object] values, freq):
458459

459460
for i in range(n):
460461
p = values[i]
461-
ordinals[i] = p.ordinal
462-
if p.freqstr != freqstr:
463-
msg = _DIFFERENT_FREQ_INDEX.format(freqstr, p.freqstr)
464-
raise IncompatibleFrequency(msg)
462+
463+
if is_null_datetimelike(p):
464+
ordinals[i] = tslib.iNaT
465+
else:
466+
try:
467+
ordinals[i] = p.ordinal
468+
469+
if p.freqstr != freqstr:
470+
msg = _DIFFERENT_FREQ_INDEX.format(freqstr, p.freqstr)
471+
raise IncompatibleFrequency(msg)
472+
473+
except AttributeError:
474+
p = Period(p, freq=freq)
475+
ordinals[i] = p.ordinal
465476

466477
return ordinals
467478

479+
480+
def extract_freq(ndarray[object] values):
481+
cdef:
482+
Py_ssize_t i, n = len(values)
483+
object p
484+
485+
for i in range(n):
486+
p = values[i]
487+
try:
488+
return p.freq
489+
except AttributeError:
490+
pass
491+
492+
raise ValueError('freq not specified and cannot be inferred')
493+
494+
468495
cpdef resolution(ndarray[int64_t] stamps, tz=None):
469496
cdef:
470497
Py_ssize_t i, n = len(stamps)
@@ -719,7 +746,7 @@ cdef class Period(object):
719746
converted = other.asfreq(freq)
720747
ordinal = converted.ordinal
721748

722-
elif lib.is_null_datetimelike(value) or value in tslib._nat_strings:
749+
elif is_null_datetimelike(value) or value in tslib._nat_strings:
723750
ordinal = tslib.iNaT
724751
if freq is None:
725752
raise ValueError("If value is NaT, freq cannot be None "

pandas/tseries/period.py

+10-20
Original file line numberDiff line numberDiff line change
@@ -40,14 +40,6 @@ def f(self):
4040
return property(f)
4141

4242

43-
def _get_ordinals(data, freq):
44-
f = lambda x: Period(x, freq=freq).ordinal
45-
if isinstance(data[0], Period):
46-
return period.extract_ordinals(data, freq)
47-
else:
48-
return lib.map_infer(data, f)
49-
50-
5143
def dt64arr_to_periodarr(data, freq, tz):
5244
if data.dtype != np.dtype('M8[ns]'):
5345
raise ValueError('Wrong dtype: %s' % data.dtype)
@@ -235,14 +227,9 @@ def _from_arraylike(cls, data, freq, tz):
235227
except (TypeError, ValueError):
236228
data = com._ensure_object(data)
237229

238-
if freq is None and len(data) > 0:
239-
freq = getattr(data[0], 'freq', None)
240-
241230
if freq is None:
242-
raise ValueError('freq not specified and cannot be '
243-
'inferred from first element')
244-
245-
data = _get_ordinals(data, freq)
231+
freq = period.extract_freq(data)
232+
data = period.extract_ordinals(data, freq)
246233
else:
247234
if isinstance(data, PeriodIndex):
248235
if freq is None or freq == data.freq:
@@ -254,12 +241,15 @@ def _from_arraylike(cls, data, freq, tz):
254241
data = period.period_asfreq_arr(data.values,
255242
base1, base2, 1)
256243
else:
257-
if freq is None and len(data) > 0:
258-
freq = getattr(data[0], 'freq', None)
244+
245+
if freq is None and com.is_object_dtype(data):
246+
# must contain Period instance and thus extract ordinals
247+
freq = period.extract_freq(data)
248+
data = period.extract_ordinals(data, freq)
259249

260250
if freq is None:
261-
raise ValueError('freq not specified and cannot be '
262-
'inferred from first element')
251+
msg = 'freq not specified and cannot be inferred'
252+
raise ValueError(msg)
263253

264254
if data.dtype != np.int64:
265255
if np.issubdtype(data.dtype, np.datetime64):
@@ -269,7 +259,7 @@ def _from_arraylike(cls, data, freq, tz):
269259
data = com._ensure_int64(data)
270260
except (TypeError, ValueError):
271261
data = com._ensure_object(data)
272-
data = _get_ordinals(data, freq)
262+
data = period.extract_ordinals(data, freq)
273263

274264
return data, freq
275265

pandas/tseries/tests/test_period.py

+78
Original file line numberDiff line numberDiff line change
@@ -1742,6 +1742,84 @@ def test_constructor_datetime64arr(self):
17421742

17431743
self.assertRaises(ValueError, PeriodIndex, vals, freq='D')
17441744

1745+
def test_constructor_empty(self):
1746+
idx = pd.PeriodIndex([], freq='M')
1747+
tm.assertIsInstance(idx, PeriodIndex)
1748+
self.assertEqual(len(idx), 0)
1749+
self.assertEqual(idx.freq, 'M')
1750+
1751+
with tm.assertRaisesRegexp(ValueError, 'freq not specified'):
1752+
pd.PeriodIndex([])
1753+
1754+
def test_constructor_pi_nat(self):
1755+
idx = PeriodIndex([Period('2011-01', freq='M'), pd.NaT,
1756+
Period('2011-01', freq='M')])
1757+
exp = PeriodIndex(['2011-01', 'NaT', '2011-01'], freq='M')
1758+
tm.assert_index_equal(idx, exp)
1759+
1760+
idx = PeriodIndex(np.array([Period('2011-01', freq='M'), pd.NaT,
1761+
Period('2011-01', freq='M')]))
1762+
tm.assert_index_equal(idx, exp)
1763+
1764+
idx = PeriodIndex([pd.NaT, pd.NaT, Period('2011-01', freq='M'),
1765+
Period('2011-01', freq='M')])
1766+
exp = PeriodIndex(['NaT', 'NaT', '2011-01', '2011-01'], freq='M')
1767+
tm.assert_index_equal(idx, exp)
1768+
1769+
idx = PeriodIndex(np.array([pd.NaT, pd.NaT,
1770+
Period('2011-01', freq='M'),
1771+
Period('2011-01', freq='M')]))
1772+
tm.assert_index_equal(idx, exp)
1773+
1774+
idx = PeriodIndex([pd.NaT, pd.NaT, '2011-01', '2011-01'], freq='M')
1775+
tm.assert_index_equal(idx, exp)
1776+
1777+
with tm.assertRaisesRegexp(ValueError, 'freq not specified'):
1778+
PeriodIndex([pd.NaT, pd.NaT])
1779+
1780+
with tm.assertRaisesRegexp(ValueError, 'freq not specified'):
1781+
PeriodIndex(np.array([pd.NaT, pd.NaT]))
1782+
1783+
with tm.assertRaisesRegexp(ValueError, 'freq not specified'):
1784+
PeriodIndex(['NaT', 'NaT'])
1785+
1786+
with tm.assertRaisesRegexp(ValueError, 'freq not specified'):
1787+
PeriodIndex(np.array(['NaT', 'NaT']))
1788+
1789+
def test_constructor_incompat_freq(self):
1790+
msg = "Input has different freq=D from PeriodIndex\\(freq=M\\)"
1791+
1792+
with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg):
1793+
PeriodIndex([Period('2011-01', freq='M'), pd.NaT,
1794+
Period('2011-01', freq='D')])
1795+
1796+
with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg):
1797+
PeriodIndex(np.array([Period('2011-01', freq='M'), pd.NaT,
1798+
Period('2011-01', freq='D')]))
1799+
1800+
# first element is pd.NaT
1801+
with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg):
1802+
PeriodIndex([pd.NaT, Period('2011-01', freq='M'),
1803+
Period('2011-01', freq='D')])
1804+
1805+
with tm.assertRaisesRegexp(period.IncompatibleFrequency, msg):
1806+
PeriodIndex(np.array([pd.NaT, Period('2011-01', freq='M'),
1807+
Period('2011-01', freq='D')]))
1808+
1809+
def test_constructor_mixed(self):
1810+
idx = PeriodIndex(['2011-01', pd.NaT, Period('2011-01', freq='M')])
1811+
exp = PeriodIndex(['2011-01', 'NaT', '2011-01'], freq='M')
1812+
tm.assert_index_equal(idx, exp)
1813+
1814+
idx = PeriodIndex(['NaT', pd.NaT, Period('2011-01', freq='M')])
1815+
exp = PeriodIndex(['NaT', 'NaT', '2011-01'], freq='M')
1816+
tm.assert_index_equal(idx, exp)
1817+
1818+
idx = PeriodIndex([Period('2011-01-01', freq='D'), pd.NaT,
1819+
'2012-01-01'])
1820+
exp = PeriodIndex(['2011-01-01', 'NaT', '2012-01-01'], freq='D')
1821+
tm.assert_index_equal(idx, exp)
1822+
17451823
def test_constructor_simple_new(self):
17461824
idx = period_range('2007-01', name='p', periods=2, freq='M')
17471825
result = idx._simple_new(idx, 'p', freq=idx.freq)

0 commit comments

Comments
 (0)