Skip to content

clean up PeriodIndex constructor #13277

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -585,6 +585,7 @@ Bug Fixes
- Bug in ``DataFrame.sort_values()`` when sorting by multiple columns where one column is of type ``int64`` and contains ``NaT`` (:issue:`14922`)
- Bug in ``DataFrame.reindex()`` in which ``method`` was ignored when passing ``columns`` (:issue:`14992`)
- Bug in ``pd.to_numeric()`` in which float and unsigned integer elements were being improperly casted (:issue:`14941`, :issue:`15005`)
- Cleaned up ``PeriodIndex`` constructor, including raising on floats more consistently (:issue:`13277`)
- Bug in ``pd.read_csv()`` in which the ``dialect`` parameter was not being verified before processing (:issue:`14898`)
- Bug in ``pd.read_fwf`` where the skiprows parameter was not being respected during column width inference (:issue:`11256`)
- Bug in ``pd.read_csv()`` in which missing data was being improperly handled with ``usecols`` (:issue:`6710`)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,8 +471,8 @@ def _value_counts_arraylike(values, dropna=True):
# dtype handling
if is_datetimetz_type:
keys = DatetimeIndex._simple_new(keys, tz=orig.dtype.tz)
if is_period_type:
keys = PeriodIndex._simple_new(keys, freq=freq)
elif is_period_type:
keys = PeriodIndex._from_ordinals(keys, freq=freq)

elif is_signed_integer_dtype(dtype):
values = _ensure_int64(values)
Expand Down
5 changes: 5 additions & 0 deletions pandas/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,11 @@ def _new_Index(cls, d):
""" This is called upon unpickling, rather than the default which doesn't
have arguments and breaks __new__
"""
# required for backward compat, because PI can't be instantiated with
# ordinals through __new__ GH #13277
if issubclass(cls, ABCPeriodIndex):
from pandas.tseries.period import _new_PeriodIndex
return _new_PeriodIndex(cls, **d)
return cls.__new__(cls, **d)


Expand Down
2 changes: 1 addition & 1 deletion pandas/io/packers.py
Original file line number Diff line number Diff line change
Expand Up @@ -573,7 +573,7 @@ def decode(obj):
elif typ == u'period_index':
data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
d = dict(name=obj[u'name'], freq=obj[u'freq'])
return globals()[obj[u'klass']](data, **d)
return globals()[obj[u'klass']]._from_ordinals(data, **d)
elif typ == u'datetime_index':
data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
d = dict(name=obj[u'name'], freq=obj[u'freq'], verify_integrity=False)
Expand Down
9 changes: 6 additions & 3 deletions pandas/tests/indexes/period/test_construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def test_constructor_fromarraylike(self):

self.assertRaises(ValueError, PeriodIndex, idx._values)
self.assertRaises(ValueError, PeriodIndex, list(idx._values))
self.assertRaises(ValueError, PeriodIndex,
self.assertRaises(TypeError, PeriodIndex,
data=Period('2007', freq='A'))

result = PeriodIndex(iter(idx))
Expand Down Expand Up @@ -285,12 +285,15 @@ def test_constructor_simple_new_empty(self):
result = idx._simple_new(idx, name='p', freq='M')
tm.assert_index_equal(result, idx)

def test_constructor_simple_new_floats(self):
def test_constructor_floats(self):
# GH13079
for floats in [[1.1], np.array([1.1])]:
for floats in [[1.1, 2.1], np.array([1.1, 2.1])]:
with self.assertRaises(TypeError):
pd.PeriodIndex._simple_new(floats, freq='M')

with self.assertRaises(TypeError):
pd.PeriodIndex(floats, freq='M')

def test_constructor_nat(self):
self.assertRaises(ValueError, period_range, start='NaT',
end='2011-01-01', freq='M')
Expand Down
155 changes: 75 additions & 80 deletions pandas/tseries/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
is_period_dtype,
is_bool_dtype,
pandas_dtype,
_ensure_int64,
_ensure_object)
from pandas.types.dtypes import PeriodDtype
from pandas.types.generic import ABCSeries
Expand Down Expand Up @@ -114,6 +113,13 @@ def wrapper(self, other):
return wrapper


def _new_PeriodIndex(cls, **d):
# GH13277 for unpickling
if d['data'].dtype == 'int64':
values = d.pop('data')
return cls._from_ordinals(values=values, **d)


class PeriodIndex(DatelikeOps, DatetimeIndexOpsMixin, Int64Index):
"""
Immutable ndarray holding ordinal values indicating regular periods in
Expand Down Expand Up @@ -209,17 +215,56 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
msg = 'specified freq and dtype are different'
raise IncompatibleFrequency(msg)

# coerce freq to freq object, otherwise it can be coerced elementwise
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add under _simple_new and _from_ordinals pointers to each other and an expl of when to use which for construction (or can do in a block above them both).

# which is slow
if freq:
freq = Period._maybe_convert_freq(freq)

if data is None:
if ordinal is not None:
data = np.asarray(ordinal, dtype=np.int64)
else:
data, freq = cls._generate_range(start, end, periods,
freq, kwargs)
else:
ordinal, freq = cls._from_arraylike(data, freq, tz)
data = np.array(ordinal, dtype=np.int64, copy=copy)
return cls._from_ordinals(data, name=name, freq=freq)

return cls._simple_new(data, name=name, freq=freq)
if isinstance(data, PeriodIndex):
if freq is None or freq == data.freq: # no freq change
freq = data.freq
data = data._values
else:
base1, _ = _gfc(data.freq)
base2, _ = _gfc(freq)
data = period.period_asfreq_arr(data._values,
base1, base2, 1)
return cls._simple_new(data, name=name, freq=freq)

# not array / index
if not isinstance(data, (np.ndarray, PeriodIndex,
DatetimeIndex, Int64Index)):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am ok with a .from_ordinals

if is_scalar(data) or isinstance(data, Period):
cls._scalar_data_error(data)

# other iterable of some kind
if not isinstance(data, (list, tuple)):
data = list(data)

data = np.asarray(data)

# datetime other than period
if is_datetime64_dtype(data.dtype):
data = dt64arr_to_periodarr(data, freq, tz)
return cls._from_ordinals(data, name=name, freq=freq)

# check not floats
if infer_dtype(data) == 'floating' and len(data) > 0:
raise TypeError("PeriodIndex can't take floats")

# anything else, likely an array of strings or periods
data = _ensure_object(data)
freq = freq or period.extract_freq(data)
data = period.extract_ordinals(data, freq)
return cls._from_ordinals(data, name=name, freq=freq)

@classmethod
def _generate_range(cls, start, end, periods, freq, fields):
Expand All @@ -240,85 +285,34 @@ def _generate_range(cls, start, end, periods, freq, fields):

return subarr, freq

@classmethod
def _from_arraylike(cls, data, freq, tz):
if freq is not None:
freq = Period._maybe_convert_freq(freq)

if not isinstance(data, (np.ndarray, PeriodIndex,
DatetimeIndex, Int64Index)):
if is_scalar(data) or isinstance(data, Period):
raise ValueError('PeriodIndex() must be called with a '
'collection of some kind, %s was passed'
% repr(data))

# other iterable of some kind
if not isinstance(data, (list, tuple)):
data = list(data)

try:
data = _ensure_int64(data)
if freq is None:
raise ValueError('freq not specified')
data = np.array([Period(x, freq=freq) for x in data],
dtype=np.int64)
except (TypeError, ValueError):
data = _ensure_object(data)

if freq is None:
freq = period.extract_freq(data)
data = period.extract_ordinals(data, freq)
else:
if isinstance(data, PeriodIndex):
if freq is None or freq == data.freq:
freq = data.freq
data = data._values
else:
base1, _ = _gfc(data.freq)
base2, _ = _gfc(freq)
data = period.period_asfreq_arr(data._values,
base1, base2, 1)
else:
if is_object_dtype(data):
inferred = infer_dtype(data)
if inferred == 'integer':
data = data.astype(np.int64)

if freq is None and is_object_dtype(data):
# must contain Period instance and thus extract ordinals
freq = period.extract_freq(data)
data = period.extract_ordinals(data, freq)

if freq is None:
msg = 'freq not specified and cannot be inferred'
raise ValueError(msg)

if data.dtype != np.int64:
if np.issubdtype(data.dtype, np.datetime64):
data = dt64arr_to_periodarr(data, freq, tz)
else:
data = _ensure_object(data)
data = period.extract_ordinals(data, freq)

return data, freq

@classmethod
def _simple_new(cls, values, name=None, freq=None, **kwargs):

"""
Values can be any type that can be coerced to Periods.
Ordinals in an ndarray are fastpath-ed to `_from_ordinals`
"""
if not is_integer_dtype(values):
values = np.array(values, copy=False)
if (len(values) > 0 and is_float_dtype(values)):
if len(values) > 0 and is_float_dtype(values):
raise TypeError("PeriodIndex can't take floats")
else:
return cls(values, name=name, freq=freq, **kwargs)
return cls(values, name=name, freq=freq, **kwargs)

return cls._from_ordinals(values, name, freq, **kwargs)

@classmethod
def _from_ordinals(cls, values, name=None, freq=None, **kwargs):
"""
Values should be int ordinals
`__new__` & `_simple_new` cooerce to ordinals and call this method
"""

values = np.array(values, dtype='int64', copy=False)

result = object.__new__(cls)
result._data = values
result.name = name
if freq is None:
raise ValueError('freq is not specified')
raise ValueError('freq is not specified and cannot be inferred')
result.freq = Period._maybe_convert_freq(freq)
result._reset_identity()
return result
Expand All @@ -327,13 +321,13 @@ def _shallow_copy_with_infer(self, values=None, **kwargs):
""" we always want to return a PeriodIndex """
return self._shallow_copy(values=values, **kwargs)

def _shallow_copy(self, values=None, **kwargs):
if kwargs.get('freq') is None:
# freq must be provided
kwargs['freq'] = self.freq
def _shallow_copy(self, values=None, freq=None, **kwargs):
if freq is None:
freq = self.freq
if values is None:
values = self._values
return super(PeriodIndex, self)._shallow_copy(values=values, **kwargs)
return super(PeriodIndex, self)._shallow_copy(values=values,
freq=freq, **kwargs)

def _coerce_scalar_to_index(self, item):
"""
Expand Down Expand Up @@ -413,7 +407,7 @@ def __array_wrap__(self, result, context=None):
return result
# the result is object dtype array of Period
# cannot pass _simple_new as it is
return PeriodIndex(result, freq=self.freq, name=self.name)
return self._shallow_copy(result, freq=self.freq, name=self.name)

@property
def _box_func(self):
Expand Down Expand Up @@ -708,7 +702,7 @@ def shift(self, n):
values = self._values + n * self.freq.n
if self.hasnans:
values[self._isnan] = tslib.iNaT
return PeriodIndex(data=values, name=self.name, freq=self.freq)
return self._shallow_copy(values=values)

@cache_readonly
def dtype(self):
Expand Down Expand Up @@ -945,7 +939,8 @@ def _wrap_union_result(self, other, result):

def _apply_meta(self, rawarr):
if not isinstance(rawarr, PeriodIndex):
rawarr = PeriodIndex(rawarr, freq=self.freq)
rawarr = PeriodIndex._from_ordinals(rawarr, freq=self.freq,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

test to hit this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I only saw it because it broke a test. But not really sure whether we need the function

name=self.name)
return rawarr

def _format_native_types(self, na_rep=u('NaT'), date_format=None,
Expand Down
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ parentdir_prefix = pandas-

[flake8]
ignore = E731,E402
max-line-length = 79

[yapf]
based_on_style = pep8
Expand Down