Skip to content

Commit 506520b

Browse files
sinhrksjreback
authored andcommitted
API: Index doesn't results in PeriodIndex if Period contains NaT
Author: sinhrks <[email protected]> Closes #13664 from sinhrks/period_infer2 and squashes the following commits: b208a9e [sinhrks] API: Index doesn't results in PeriodIndex if Period contains NaT
1 parent fafef5d commit 506520b

File tree

8 files changed

+153
-50
lines changed

8 files changed

+153
-50
lines changed

doc/source/whatsnew/v0.19.0.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,8 @@ API changes
269269
- ``.filter()`` enforces mutual exclusion of the keyword arguments. (:issue:`12399`)
270270
- ``PeridIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`)
271271
- ``__setitem__`` will no longer apply a callable rhs as a function instead of storing it. Call ``where`` directly to get the previous behavior. (:issue:`13299`)
272+
- Passing ``Period`` with multiple frequencies to normal ``Index`` now returns ``Index`` with ``object`` dtype (:issue:`13664`)
273+
- ``PeriodIndex.fillna`` with ``Period`` has different freq now coerces to ``object`` dtype (:issue:`13664`)
272274

273275

274276
.. _whatsnew_0190.api.tolist:
@@ -645,7 +647,6 @@ Bug Fixes
645647
- Bug in ``.unstack`` with ``Categorical`` dtype resets ``.ordered`` to ``True`` (:issue:`13249`)
646648
- Clean some compile time warnings in datetime parsing (:issue:`13607`)
647649

648-
649650
- Bug in ``Series`` comparison operators when dealing with zero dim NumPy arrays (:issue:`13006`)
650651
- Bug in ``groupby`` where ``apply`` returns different result depending on whether first result is ``None`` or not (:issue:`12824`)
651652
- Bug in ``groupby(..).nth()`` where the group key is included inconsistently if called after ``.head()/.tail()`` (:issue:`12839`)

pandas/core/ops.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
is_list_like,
3232
_ensure_object)
3333
from pandas.types.cast import _maybe_upcast_putmask
34-
from pandas.types.generic import ABCSeries, ABCIndex
34+
from pandas.types.generic import ABCSeries, ABCIndex, ABCPeriodIndex
3535

3636
# -----------------------------------------------------------------------------
3737
# Functions that add arithmetic methods to objects, given arithmetic factory
@@ -773,6 +773,15 @@ def wrapper(self, other, axis=None):
773773
if (not lib.isscalar(lib.item_from_zerodim(other)) and
774774
len(self) != len(other)):
775775
raise ValueError('Lengths must match to compare')
776+
777+
if isinstance(other, ABCPeriodIndex):
778+
# temp workaround until fixing GH 13637
779+
# tested in test_nat_comparisons
780+
# (pandas.tests.series.test_operators.TestSeriesOperators)
781+
return self._constructor(na_op(self.values,
782+
other.asobject.values),
783+
index=self.index)
784+
776785
return self._constructor(na_op(self.values, np.asarray(other)),
777786
index=self.index).__finalize__(self)
778787
elif isinstance(other, pd.Categorical):

pandas/indexes/base.py

+17-14
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,8 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
224224
pass
225225

226226
# maybe coerce to a sub-class
227-
from pandas.tseries.period import PeriodIndex
227+
from pandas.tseries.period import (PeriodIndex,
228+
IncompatibleFrequency)
228229
if isinstance(data, PeriodIndex):
229230
return PeriodIndex(data, copy=copy, name=name, **kwargs)
230231
if issubclass(data.dtype.type, np.integer):
@@ -265,13 +266,15 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
265266
return DatetimeIndex(subarr, copy=copy, name=name,
266267
**kwargs)
267268

268-
elif (inferred.startswith('timedelta') or
269-
lib.is_timedelta_array(subarr)):
269+
elif inferred.startswith('timedelta'):
270270
from pandas.tseries.tdi import TimedeltaIndex
271271
return TimedeltaIndex(subarr, copy=copy, name=name,
272272
**kwargs)
273273
elif inferred == 'period':
274-
return PeriodIndex(subarr, name=name, **kwargs)
274+
try:
275+
return PeriodIndex(subarr, name=name, **kwargs)
276+
except IncompatibleFrequency:
277+
pass
275278
return cls._simple_new(subarr, name)
276279

277280
elif hasattr(data, '__array__'):
@@ -866,6 +869,16 @@ def _convert_can_do_setop(self, other):
866869
result_name = self.name if self.name == other.name else None
867870
return other, result_name
868871

872+
def _convert_for_op(self, value):
873+
""" Convert value to be insertable to ndarray """
874+
return value
875+
876+
def _assert_can_do_op(self, value):
877+
""" Check value is valid for scalar op """
878+
if not lib.isscalar(value):
879+
msg = "'value' must be a scalar, passed: {0}"
880+
raise TypeError(msg.format(type(value).__name__))
881+
869882
@property
870883
def nlevels(self):
871884
return 1
@@ -1508,16 +1521,6 @@ def hasnans(self):
15081521
else:
15091522
return False
15101523

1511-
def _convert_for_op(self, value):
1512-
""" Convert value to be insertable to ndarray """
1513-
return value
1514-
1515-
def _assert_can_do_op(self, value):
1516-
""" Check value is valid for scalar op """
1517-
if not is_scalar(value):
1518-
msg = "'value' must be a scalar, passed: {0}"
1519-
raise TypeError(msg.format(type(value).__name__))
1520-
15211524
def putmask(self, mask, value):
15221525
"""
15231526
return a new Index of the values set with the mask

pandas/src/inference.pyx

+27-7
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,7 @@ cdef inline bint is_null_datetimelike(v):
270270

271271

272272
cdef inline bint is_null_datetime64(v):
273-
# determine if we have a null for a datetime (or integer versions)x,
273+
# determine if we have a null for a datetime (or integer versions),
274274
# excluding np.timedelta64('nat')
275275
if util._checknull(v):
276276
return True
@@ -282,7 +282,7 @@ cdef inline bint is_null_datetime64(v):
282282

283283

284284
cdef inline bint is_null_timedelta64(v):
285-
# determine if we have a null for a timedelta (or integer versions)x,
285+
# determine if we have a null for a timedelta (or integer versions),
286286
# excluding np.datetime64('nat')
287287
if util._checknull(v):
288288
return True
@@ -293,6 +293,16 @@ cdef inline bint is_null_timedelta64(v):
293293
return False
294294

295295

296+
cdef inline bint is_null_period(v):
297+
# determine if we have a null for a Period (or integer versions),
298+
# excluding np.datetime64('nat') and np.timedelta64('nat')
299+
if util._checknull(v):
300+
return True
301+
elif v is NaT:
302+
return True
303+
return False
304+
305+
296306
cdef inline bint is_datetime(object o):
297307
return PyDateTime_Check(o)
298308

@@ -531,6 +541,7 @@ def is_timedelta_array(ndarray values):
531541
return False
532542
return null_count != n
533543

544+
534545
def is_timedelta64_array(ndarray values):
535546
cdef Py_ssize_t i, null_count = 0, n = len(values)
536547
cdef object v
@@ -546,6 +557,7 @@ def is_timedelta64_array(ndarray values):
546557
return False
547558
return null_count != n
548559

560+
549561
def is_timedelta_or_timedelta64_array(ndarray values):
550562
""" infer with timedeltas and/or nat/none """
551563
cdef Py_ssize_t i, null_count = 0, n = len(values)
@@ -562,6 +574,7 @@ def is_timedelta_or_timedelta64_array(ndarray values):
562574
return False
563575
return null_count != n
564576

577+
565578
def is_date_array(ndarray[object] values):
566579
cdef Py_ssize_t i, n = len(values)
567580
if n == 0:
@@ -571,6 +584,7 @@ def is_date_array(ndarray[object] values):
571584
return False
572585
return True
573586

587+
574588
def is_time_array(ndarray[object] values):
575589
cdef Py_ssize_t i, n = len(values)
576590
if n == 0:
@@ -582,15 +596,21 @@ def is_time_array(ndarray[object] values):
582596

583597

584598
def is_period_array(ndarray[object] values):
585-
cdef Py_ssize_t i, n = len(values)
586-
from pandas.tseries.period import Period
587-
599+
cdef Py_ssize_t i, null_count = 0, n = len(values)
600+
cdef object v
588601
if n == 0:
589602
return False
603+
604+
# return False for all nulls
590605
for i in range(n):
591-
if not isinstance(values[i], Period):
606+
v = values[i]
607+
if is_null_period(v):
608+
# we are a regular null
609+
if util._checknull(v):
610+
null_count += 1
611+
elif not is_period(v):
592612
return False
593-
return True
613+
return null_count != n
594614

595615

596616
cdef extern from "parse_helper.h":

pandas/tests/indexes/test_datetimelike.py

+62-22
Original file line numberDiff line numberDiff line change
@@ -119,10 +119,10 @@ def test_pickle_compat_construction(self):
119119
def test_construction_index_with_mixed_timezones(self):
120120
# GH 11488
121121
# no tz results in DatetimeIndex
122-
result = Index(
123-
[Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx')
124-
exp = DatetimeIndex(
125-
[Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx')
122+
result = Index([Timestamp('2011-01-01'),
123+
Timestamp('2011-01-02')], name='idx')
124+
exp = DatetimeIndex([Timestamp('2011-01-01'),
125+
Timestamp('2011-01-02')], name='idx')
126126
self.assert_index_equal(result, exp, exact=True)
127127
self.assertTrue(isinstance(result, DatetimeIndex))
128128
self.assertIsNone(result.tz)
@@ -295,9 +295,9 @@ def test_construction_dti_with_mixed_timezones(self):
295295
Timestamp('2011-01-02 10:00',
296296
tz='Asia/Tokyo')],
297297
name='idx')
298-
exp = DatetimeIndex(
299-
[Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00')
300-
], tz='Asia/Tokyo', name='idx')
298+
exp = DatetimeIndex([Timestamp('2011-01-01 10:00'),
299+
Timestamp('2011-01-02 10:00')],
300+
tz='Asia/Tokyo', name='idx')
301301
self.assert_index_equal(result, exp, exact=True)
302302
self.assertTrue(isinstance(result, DatetimeIndex))
303303

@@ -338,6 +338,17 @@ def test_construction_dti_with_mixed_timezones(self):
338338
Timestamp('2011-01-02 10:00', tz='US/Eastern')],
339339
tz='US/Eastern', name='idx')
340340

341+
def test_construction_base_constructor(self):
342+
arr = [pd.Timestamp('2011-01-01'), pd.NaT, pd.Timestamp('2011-01-03')]
343+
tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr))
344+
tm.assert_index_equal(pd.Index(np.array(arr)),
345+
pd.DatetimeIndex(np.array(arr)))
346+
347+
arr = [np.nan, pd.NaT, pd.Timestamp('2011-01-03')]
348+
tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr))
349+
tm.assert_index_equal(pd.Index(np.array(arr)),
350+
pd.DatetimeIndex(np.array(arr)))
351+
341352
def test_astype(self):
342353
# GH 13149, GH 13209
343354
idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN])
@@ -699,12 +710,11 @@ def test_fillna_datetime64(self):
699710
pd.Timestamp('2011-01-01 11:00')], dtype=object)
700711
self.assert_index_equal(idx.fillna('x'), exp)
701712

702-
idx = pd.DatetimeIndex(
703-
['2011-01-01 09:00', pd.NaT, '2011-01-01 11:00'], tz=tz)
713+
idx = pd.DatetimeIndex(['2011-01-01 09:00', pd.NaT,
714+
'2011-01-01 11:00'], tz=tz)
704715

705-
exp = pd.DatetimeIndex(
706-
['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'
707-
], tz=tz)
716+
exp = pd.DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00',
717+
'2011-01-01 11:00'], tz=tz)
708718
self.assert_index_equal(
709719
idx.fillna(pd.Timestamp('2011-01-01 10:00', tz=tz)), exp)
710720

@@ -734,6 +744,26 @@ def setUp(self):
734744
def create_index(self):
735745
return period_range('20130101', periods=5, freq='D')
736746

747+
def test_construction_base_constructor(self):
748+
# GH 13664
749+
arr = [pd.Period('2011-01', freq='M'), pd.NaT,
750+
pd.Period('2011-03', freq='M')]
751+
tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr))
752+
tm.assert_index_equal(pd.Index(np.array(arr)),
753+
pd.PeriodIndex(np.array(arr)))
754+
755+
arr = [np.nan, pd.NaT, pd.Period('2011-03', freq='M')]
756+
tm.assert_index_equal(pd.Index(arr), pd.PeriodIndex(arr))
757+
tm.assert_index_equal(pd.Index(np.array(arr)),
758+
pd.PeriodIndex(np.array(arr)))
759+
760+
arr = [pd.Period('2011-01', freq='M'), pd.NaT,
761+
pd.Period('2011-03', freq='D')]
762+
tm.assert_index_equal(pd.Index(arr), pd.Index(arr, dtype=object))
763+
764+
tm.assert_index_equal(pd.Index(np.array(arr)),
765+
pd.Index(np.array(arr), dtype=object))
766+
737767
def test_astype(self):
738768
# GH 13149, GH 13209
739769
idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq='D')
@@ -874,7 +904,6 @@ def test_repeat(self):
874904
self.assertEqual(res.freqstr, 'D')
875905

876906
def test_period_index_indexer(self):
877-
878907
# GH4125
879908
idx = pd.period_range('2002-01', '2003-12', freq='M')
880909
df = pd.DataFrame(pd.np.random.randn(24, 10), index=idx)
@@ -886,23 +915,23 @@ def test_period_index_indexer(self):
886915

887916
def test_fillna_period(self):
888917
# GH 11343
889-
idx = pd.PeriodIndex(
890-
['2011-01-01 09:00', pd.NaT, '2011-01-01 11:00'], freq='H')
918+
idx = pd.PeriodIndex(['2011-01-01 09:00', pd.NaT,
919+
'2011-01-01 11:00'], freq='H')
891920

892-
exp = pd.PeriodIndex(
893-
['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'
894-
], freq='H')
921+
exp = pd.PeriodIndex(['2011-01-01 09:00', '2011-01-01 10:00',
922+
'2011-01-01 11:00'], freq='H')
895923
self.assert_index_equal(
896924
idx.fillna(pd.Period('2011-01-01 10:00', freq='H')), exp)
897925

898926
exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'), 'x',
899927
pd.Period('2011-01-01 11:00', freq='H')], dtype=object)
900928
self.assert_index_equal(idx.fillna('x'), exp)
901929

902-
with tm.assertRaisesRegexp(
903-
ValueError,
904-
'Input has different freq=D from PeriodIndex\\(freq=H\\)'):
905-
idx.fillna(pd.Period('2011-01-01', freq='D'))
930+
exp = pd.Index([pd.Period('2011-01-01 09:00', freq='H'),
931+
pd.Period('2011-01-01', freq='D'),
932+
pd.Period('2011-01-01 11:00', freq='H')], dtype=object)
933+
self.assert_index_equal(idx.fillna(pd.Period('2011-01-01', freq='D')),
934+
exp)
906935

907936
def test_no_millisecond_field(self):
908937
with self.assertRaises(AttributeError):
@@ -923,6 +952,17 @@ def setUp(self):
923952
def create_index(self):
924953
return pd.to_timedelta(range(5), unit='d') + pd.offsets.Hour(1)
925954

955+
def test_construction_base_constructor(self):
956+
arr = [pd.Timedelta('1 days'), pd.NaT, pd.Timedelta('3 days')]
957+
tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr))
958+
tm.assert_index_equal(pd.Index(np.array(arr)),
959+
pd.TimedeltaIndex(np.array(arr)))
960+
961+
arr = [np.nan, pd.NaT, pd.Timedelta('1 days')]
962+
tm.assert_index_equal(pd.Index(arr), pd.TimedeltaIndex(arr))
963+
tm.assert_index_equal(pd.Index(np.array(arr)),
964+
pd.TimedeltaIndex(np.array(arr)))
965+
926966
def test_shift(self):
927967
# test shift for TimedeltaIndex
928968
# err8083

pandas/tests/types/test_inference.py

+27
Original file line numberDiff line numberDiff line change
@@ -431,6 +431,33 @@ def test_infer_dtype_timedelta(self):
431431
dtype=object)
432432
self.assertEqual(lib.infer_dtype(arr), 'mixed')
433433

434+
def test_infer_dtype_period(self):
435+
# GH 13664
436+
arr = np.array([pd.Period('2011-01', freq='D'),
437+
pd.Period('2011-02', freq='D')])
438+
self.assertEqual(pd.lib.infer_dtype(arr), 'period')
439+
440+
arr = np.array([pd.Period('2011-01', freq='D'),
441+
pd.Period('2011-02', freq='M')])
442+
self.assertEqual(pd.lib.infer_dtype(arr), 'period')
443+
444+
# starts with nan
445+
for n in [pd.NaT, np.nan]:
446+
arr = np.array([n, pd.Period('2011-01', freq='D')])
447+
self.assertEqual(pd.lib.infer_dtype(arr), 'period')
448+
449+
arr = np.array([n, pd.Period('2011-01', freq='D'), n])
450+
self.assertEqual(pd.lib.infer_dtype(arr), 'period')
451+
452+
# different type of nat
453+
arr = np.array([np.datetime64('nat'), pd.Period('2011-01', freq='M')],
454+
dtype=object)
455+
self.assertEqual(pd.lib.infer_dtype(arr), 'mixed')
456+
457+
arr = np.array([pd.Period('2011-01', freq='M'), np.datetime64('nat')],
458+
dtype=object)
459+
self.assertEqual(pd.lib.infer_dtype(arr), 'mixed')
460+
434461
def test_infer_dtype_all_nan_nat_like(self):
435462
arr = np.array([np.nan, np.nan])
436463
self.assertEqual(lib.infer_dtype(arr), 'floating')

pandas/tseries/base.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -800,12 +800,15 @@ def _ensure_datetimelike_to_i8(other):
800800
if lib.isscalar(other) and isnull(other):
801801
other = tslib.iNaT
802802
elif isinstance(other, ABCIndexClass):
803-
804803
# convert tz if needed
805804
if getattr(other, 'tz', None) is not None:
806805
other = other.tz_localize(None).asi8
807806
else:
808807
other = other.asi8
809808
else:
810-
other = np.array(other, copy=False).view('i8')
809+
try:
810+
other = np.array(other, copy=False).view('i8')
811+
except TypeError:
812+
# period array cannot be coerces to int
813+
other = Index(other).asi8
811814
return other

0 commit comments

Comments
 (0)