Skip to content

Commit fa0fa9d

Browse files
committed
clean up PeriodIndex constructor
1 parent 211ecd5 commit fa0fa9d

File tree

6 files changed

+63
-77
lines changed

6 files changed

+63
-77
lines changed

pandas/core/algorithms.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -471,8 +471,8 @@ def _value_counts_arraylike(values, dropna=True):
471471
# dtype handling
472472
if is_datetimetz_type:
473473
keys = DatetimeIndex._simple_new(keys, tz=orig.dtype.tz)
474-
if is_period_type:
475-
keys = PeriodIndex._simple_new(keys, freq=freq)
474+
elif is_period_type:
475+
keys = PeriodIndex._from_ordinals(keys, freq=freq)
476476

477477
elif is_signed_integer_dtype(dtype):
478478
values = _ensure_int64(values)

pandas/indexes/base.py

+5
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,11 @@ def _new_Index(cls, d):
8888
""" This is called upon unpickling, rather than the default which doesn't
8989
have arguments and breaks __new__
9090
"""
91+
from pandas.types.generic import ABCPeriodIndex
92+
if issubclass(cls, ABCPeriodIndex):
93+
if d['data'].dtype == 'int64':
94+
values = d.pop('data')
95+
return cls._from_ordinals(values=values, **d)
9196
return cls.__new__(cls, **d)
9297

9398

pandas/io/packers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -573,7 +573,7 @@ def decode(obj):
573573
elif typ == u'period_index':
574574
data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
575575
d = dict(name=obj[u'name'], freq=obj[u'freq'])
576-
return globals()[obj[u'klass']](data, **d)
576+
return globals()[obj[u'klass']]._from_ordinals(data, **d)
577577
elif typ == u'datetime_index':
578578
data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
579579
d = dict(name=obj[u'name'], freq=obj[u'freq'], verify_integrity=False)

pandas/tests/indexes/period/test_construction.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -285,12 +285,15 @@ def test_constructor_simple_new_empty(self):
285285
result = idx._simple_new(idx, name='p', freq='M')
286286
tm.assert_index_equal(result, idx)
287287

288-
def test_constructor_simple_new_floats(self):
288+
def test_constructor_floats(self):
289289
# GH13079
290-
for floats in [[1.1], np.array([1.1])]:
290+
for floats in [[1.1, 2.1], np.array([1.1, 2.1])]:
291291
with self.assertRaises(TypeError):
292292
pd.PeriodIndex._simple_new(floats, freq='M')
293293

294+
with self.assertRaises(TypeError):
295+
pd.PeriodIndex(floats, freq='M')
296+
294297
def test_constructor_nat(self):
295298
self.assertRaises(ValueError, period_range, start='NaT',
296299
end='2011-01-01', freq='M')

pandas/tseries/period.py

+49-72
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
is_period_dtype,
1818
is_bool_dtype,
1919
pandas_dtype,
20-
_ensure_int64,
2120
_ensure_object)
2221
from pandas.types.dtypes import PeriodDtype
2322
from pandas.types.generic import ABCSeries
@@ -215,11 +214,46 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
215214
else:
216215
data, freq = cls._generate_range(start, end, periods,
217216
freq, kwargs)
218-
else:
219-
ordinal, freq = cls._from_arraylike(data, freq, tz)
220-
data = np.array(ordinal, dtype=np.int64, copy=copy)
217+
return cls._from_ordinals(data, name=name, freq=freq)
218+
219+
if isinstance(data, PeriodIndex):
220+
if freq is None or freq == data.freq:
221+
freq = data.freq
222+
data = data._values
223+
else:
224+
base1, _ = _gfc(data.freq)
225+
base2, _ = _gfc(freq)
226+
data = period.period_asfreq_arr(data._values,
227+
base1, base2, 1)
228+
return cls._simple_new(data, name=name, freq=freq)
221229

222-
return cls._simple_new(data, name=name, freq=freq)
230+
if not isinstance(data, (np.ndarray, PeriodIndex,
231+
DatetimeIndex, Int64Index)):
232+
if is_scalar(data) or isinstance(data, Period):
233+
raise ValueError('PeriodIndex() must be called with a '
234+
'collection of some kind, %s was passed'
235+
% repr(data))
236+
237+
# other iterable of some kind
238+
if not isinstance(data, (list, tuple)):
239+
data = list(data)
240+
241+
data = np.asarray(data)
242+
243+
if np.issubdtype(data.dtype, np.datetime64):
244+
data = dt64arr_to_periodarr(data, freq, tz)
245+
return cls._from_ordinals(data, name=name, freq=freq)
246+
247+
inferred_dtype = infer_dtype(data)
248+
249+
if inferred_dtype == 'floating' and len(data) > 0:
250+
raise TypeError("PeriodIndex can't take floats")
251+
252+
else:
253+
data = _ensure_object(data)
254+
freq = freq or period.extract_freq(data)
255+
data = period.extract_ordinals(data, freq)
256+
return cls._from_ordinals(data, name=name, freq=freq)
223257

224258
@classmethod
225259
def _generate_range(cls, start, end, periods, freq, fields):
@@ -240,68 +274,6 @@ def _generate_range(cls, start, end, periods, freq, fields):
240274

241275
return subarr, freq
242276

243-
@classmethod
244-
def _from_arraylike(cls, data, freq, tz):
245-
if freq is not None:
246-
freq = Period._maybe_convert_freq(freq)
247-
248-
if not isinstance(data, (np.ndarray, PeriodIndex,
249-
DatetimeIndex, Int64Index)):
250-
if is_scalar(data) or isinstance(data, Period):
251-
raise ValueError('PeriodIndex() must be called with a '
252-
'collection of some kind, %s was passed'
253-
% repr(data))
254-
255-
# other iterable of some kind
256-
if not isinstance(data, (list, tuple)):
257-
data = list(data)
258-
259-
try:
260-
data = _ensure_int64(data)
261-
if freq is None:
262-
raise ValueError('freq not specified')
263-
data = np.array([Period(x, freq=freq) for x in data],
264-
dtype=np.int64)
265-
except (TypeError, ValueError):
266-
data = _ensure_object(data)
267-
268-
if freq is None:
269-
freq = period.extract_freq(data)
270-
data = period.extract_ordinals(data, freq)
271-
else:
272-
if isinstance(data, PeriodIndex):
273-
if freq is None or freq == data.freq:
274-
freq = data.freq
275-
data = data._values
276-
else:
277-
base1, _ = _gfc(data.freq)
278-
base2, _ = _gfc(freq)
279-
data = period.period_asfreq_arr(data._values,
280-
base1, base2, 1)
281-
else:
282-
if is_object_dtype(data):
283-
inferred = infer_dtype(data)
284-
if inferred == 'integer':
285-
data = data.astype(np.int64)
286-
287-
if freq is None and is_object_dtype(data):
288-
# must contain Period instance and thus extract ordinals
289-
freq = period.extract_freq(data)
290-
data = period.extract_ordinals(data, freq)
291-
292-
if freq is None:
293-
msg = 'freq not specified and cannot be inferred'
294-
raise ValueError(msg)
295-
296-
if data.dtype != np.int64:
297-
if np.issubdtype(data.dtype, np.datetime64):
298-
data = dt64arr_to_periodarr(data, freq, tz)
299-
else:
300-
data = _ensure_object(data)
301-
data = period.extract_ordinals(data, freq)
302-
303-
return data, freq
304-
305277
@classmethod
306278
def _simple_new(cls, values, name=None, freq=None, **kwargs):
307279

@@ -312,13 +284,18 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs):
312284
else:
313285
return cls(values, name=name, freq=freq, **kwargs)
314286

287+
return cls._from_ordinals(values, name, freq, **kwargs)
288+
289+
@classmethod
290+
def _from_ordinals(cls, values, name=None, freq=None, **kwargs):
291+
315292
values = np.array(values, dtype='int64', copy=False)
316293

317294
result = object.__new__(cls)
318295
result._data = values
319296
result.name = name
320297
if freq is None:
321-
raise ValueError('freq is not specified')
298+
raise ValueError('freq is not specified and cannot be inferred')
322299
result.freq = Period._maybe_convert_freq(freq)
323300
result._reset_identity()
324301
return result
@@ -329,7 +306,6 @@ def _shallow_copy_with_infer(self, values=None, **kwargs):
329306

330307
def _shallow_copy(self, values=None, **kwargs):
331308
if kwargs.get('freq') is None:
332-
# freq must be provided
333309
kwargs['freq'] = self.freq
334310
if values is None:
335311
values = self._values
@@ -413,7 +389,7 @@ def __array_wrap__(self, result, context=None):
413389
return result
414390
# the result is object dtype array of Period
415391
# cannot pass _simple_new as it is
416-
return PeriodIndex(result, freq=self.freq, name=self.name)
392+
return self._shallow_copy(result, freq=self.freq, name=self.name)
417393

418394
@property
419395
def _box_func(self):
@@ -708,7 +684,7 @@ def shift(self, n):
708684
values = self._values + n * self.freq.n
709685
if self.hasnans:
710686
values[self._isnan] = tslib.iNaT
711-
return PeriodIndex(data=values, name=self.name, freq=self.freq)
687+
return self._shallow_copy(values=values)
712688

713689
@cache_readonly
714690
def dtype(self):
@@ -945,7 +921,8 @@ def _wrap_union_result(self, other, result):
945921

946922
def _apply_meta(self, rawarr):
947923
if not isinstance(rawarr, PeriodIndex):
948-
rawarr = PeriodIndex(rawarr, freq=self.freq)
924+
rawarr = PeriodIndex._from_ordinals(rawarr, freq=self.freq,
925+
name=self.name)
949926
return rawarr
950927

951928
def _format_native_types(self, na_rep=u('NaT'), date_format=None,

setup.cfg

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ parentdir_prefix = pandas-
1313

1414
[flake8]
1515
ignore = E731,E402
16+
max-line-length = 79
1617

1718
[yapf]
1819
based_on_style = pep8

0 commit comments

Comments
 (0)