Skip to content

Commit 776f406

Browse files
committed
clean up PeriodIndex constructor
1 parent 8a78a2d commit 776f406

File tree

6 files changed

+63
-77
lines changed

6 files changed

+63
-77
lines changed

pandas/core/algorithms.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -474,8 +474,8 @@ def _value_counts_arraylike(values, dropna=True):
474474
# dtype handling
475475
if is_datetimetz_type:
476476
keys = DatetimeIndex._simple_new(keys, tz=orig.dtype.tz)
477-
if is_period_type:
478-
keys = PeriodIndex._simple_new(keys, freq=freq)
477+
elif is_period_type:
478+
keys = PeriodIndex._from_ordinals(keys, freq=freq)
479479

480480
elif is_integer_dtype(dtype):
481481
values = _ensure_int64(values)

pandas/indexes/base.py

+5
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,11 @@ def _new_Index(cls, d):
8686
""" This is called upon unpickling, rather than the default which doesn't
8787
have arguments and breaks __new__
8888
"""
89+
from pandas.types.generic import ABCPeriodIndex
90+
if issubclass(cls, ABCPeriodIndex):
91+
if d['data'].dtype == 'int64':
92+
values = d.pop('data')
93+
return cls._from_ordinals(values=values, **d)
8994
return cls.__new__(cls, **d)
9095

9196

pandas/io/packers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -571,7 +571,7 @@ def decode(obj):
571571
elif typ == u'period_index':
572572
data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
573573
d = dict(name=obj[u'name'], freq=obj[u'freq'])
574-
return globals()[obj[u'klass']](data, **d)
574+
return globals()[obj[u'klass']]._from_ordinals(data, **d)
575575
elif typ == u'datetime_index':
576576
data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
577577
d = dict(name=obj[u'name'], freq=obj[u'freq'], verify_integrity=False)

pandas/tseries/period.py

+49-72
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
is_period_dtype,
1818
is_bool_dtype,
1919
pandas_dtype,
20-
_ensure_int64,
2120
_ensure_object)
2221
from pandas.types.dtypes import PeriodDtype
2322
from pandas.types.generic import ABCSeries
@@ -211,11 +210,46 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
211210
else:
212211
data, freq = cls._generate_range(start, end, periods,
213212
freq, kwargs)
214-
else:
215-
ordinal, freq = cls._from_arraylike(data, freq, tz)
216-
data = np.array(ordinal, dtype=np.int64, copy=copy)
213+
return cls._from_ordinals(data, name=name, freq=freq)
214+
215+
if isinstance(data, PeriodIndex):
216+
if freq is None or freq == data.freq:
217+
freq = data.freq
218+
data = data._values
219+
else:
220+
base1, _ = _gfc(data.freq)
221+
base2, _ = _gfc(freq)
222+
data = period.period_asfreq_arr(data._values,
223+
base1, base2, 1)
224+
return cls._simple_new(data, name=name, freq=freq)
217225

218-
return cls._simple_new(data, name=name, freq=freq)
226+
if not isinstance(data, (np.ndarray, PeriodIndex,
227+
DatetimeIndex, Int64Index)):
228+
if is_scalar(data) or isinstance(data, Period):
229+
raise ValueError('PeriodIndex() must be called with a '
230+
'collection of some kind, %s was passed'
231+
% repr(data))
232+
233+
# other iterable of some kind
234+
if not isinstance(data, (list, tuple)):
235+
data = list(data)
236+
237+
data = np.asarray(data)
238+
239+
if np.issubdtype(data.dtype, np.datetime64):
240+
data = dt64arr_to_periodarr(data, freq, tz)
241+
return cls._from_ordinals(data, name=name, freq=freq)
242+
243+
inferred_dtype = infer_dtype(data)
244+
245+
if inferred_dtype == 'floating' and len(data) > 0:
246+
raise TypeError("PeriodIndex can't take floats")
247+
248+
else:
249+
data = _ensure_object(data)
250+
freq = freq or period.extract_freq(data)
251+
data = period.extract_ordinals(data, freq)
252+
return cls._from_ordinals(data, name=name, freq=freq)
219253

220254
@classmethod
221255
def _generate_range(cls, start, end, periods, freq, fields):
@@ -236,68 +270,6 @@ def _generate_range(cls, start, end, periods, freq, fields):
236270

237271
return subarr, freq
238272

239-
@classmethod
240-
def _from_arraylike(cls, data, freq, tz):
241-
if freq is not None:
242-
freq = Period._maybe_convert_freq(freq)
243-
244-
if not isinstance(data, (np.ndarray, PeriodIndex,
245-
DatetimeIndex, Int64Index)):
246-
if is_scalar(data) or isinstance(data, Period):
247-
raise ValueError('PeriodIndex() must be called with a '
248-
'collection of some kind, %s was passed'
249-
% repr(data))
250-
251-
# other iterable of some kind
252-
if not isinstance(data, (list, tuple)):
253-
data = list(data)
254-
255-
try:
256-
data = _ensure_int64(data)
257-
if freq is None:
258-
raise ValueError('freq not specified')
259-
data = np.array([Period(x, freq=freq) for x in data],
260-
dtype=np.int64)
261-
except (TypeError, ValueError):
262-
data = _ensure_object(data)
263-
264-
if freq is None:
265-
freq = period.extract_freq(data)
266-
data = period.extract_ordinals(data, freq)
267-
else:
268-
if isinstance(data, PeriodIndex):
269-
if freq is None or freq == data.freq:
270-
freq = data.freq
271-
data = data._values
272-
else:
273-
base1, _ = _gfc(data.freq)
274-
base2, _ = _gfc(freq)
275-
data = period.period_asfreq_arr(data._values,
276-
base1, base2, 1)
277-
else:
278-
if is_object_dtype(data):
279-
inferred = infer_dtype(data)
280-
if inferred == 'integer':
281-
data = data.astype(np.int64)
282-
283-
if freq is None and is_object_dtype(data):
284-
# must contain Period instance and thus extract ordinals
285-
freq = period.extract_freq(data)
286-
data = period.extract_ordinals(data, freq)
287-
288-
if freq is None:
289-
msg = 'freq not specified and cannot be inferred'
290-
raise ValueError(msg)
291-
292-
if data.dtype != np.int64:
293-
if np.issubdtype(data.dtype, np.datetime64):
294-
data = dt64arr_to_periodarr(data, freq, tz)
295-
else:
296-
data = _ensure_object(data)
297-
data = period.extract_ordinals(data, freq)
298-
299-
return data, freq
300-
301273
@classmethod
302274
def _simple_new(cls, values, name=None, freq=None, **kwargs):
303275

@@ -308,13 +280,18 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs):
308280
else:
309281
return cls(values, name=name, freq=freq, **kwargs)
310282

283+
return cls._from_ordinals(values, name, freq, **kwargs)
284+
285+
@classmethod
286+
def _from_ordinals(cls, values, name=None, freq=None, **kwargs):
287+
311288
values = np.array(values, dtype='int64', copy=False)
312289

313290
result = object.__new__(cls)
314291
result._data = values
315292
result.name = name
316293
if freq is None:
317-
raise ValueError('freq is not specified')
294+
raise ValueError('freq is not specified and cannot be inferred')
318295
result.freq = Period._maybe_convert_freq(freq)
319296
result._reset_identity()
320297
return result
@@ -325,7 +302,6 @@ def _shallow_copy_with_infer(self, values=None, **kwargs):
325302

326303
def _shallow_copy(self, values=None, **kwargs):
327304
if kwargs.get('freq') is None:
328-
# freq must be provided
329305
kwargs['freq'] = self.freq
330306
if values is None:
331307
values = self._values
@@ -409,7 +385,7 @@ def __array_wrap__(self, result, context=None):
409385
return result
410386
# the result is object dtype array of Period
411387
# cannot pass _simple_new as it is
412-
return PeriodIndex(result, freq=self.freq, name=self.name)
388+
return self._shallow_copy(result, freq=self.freq, name=self.name)
413389

414390
@property
415391
def _box_func(self):
@@ -704,7 +680,7 @@ def shift(self, n):
704680
values = self._values + n * self.freq.n
705681
if self.hasnans:
706682
values[self._isnan] = tslib.iNaT
707-
return PeriodIndex(data=values, name=self.name, freq=self.freq)
683+
return self._shallow_copy(values=values)
708684

709685
@cache_readonly
710686
def dtype(self):
@@ -940,7 +916,8 @@ def _wrap_union_result(self, other, result):
940916

941917
def _apply_meta(self, rawarr):
942918
if not isinstance(rawarr, PeriodIndex):
943-
rawarr = PeriodIndex(rawarr, freq=self.freq)
919+
rawarr = PeriodIndex._from_ordinals(rawarr, freq=self.freq,
920+
name=self.name)
944921
return rawarr
945922

946923
def _format_native_types(self, na_rep=u('NaT'), date_format=None,

pandas/tseries/tests/test_period.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -1918,12 +1918,15 @@ def test_constructor_simple_new_empty(self):
19181918
result = idx._simple_new(idx, name='p', freq='M')
19191919
tm.assert_index_equal(result, idx)
19201920

1921-
def test_constructor_simple_new_floats(self):
1921+
def test_constructor_floats(self):
19221922
# GH13079
1923-
for floats in [[1.1], np.array([1.1])]:
1923+
for floats in [[1.1, 2.1], np.array([1.1, 2.1])]:
19241924
with self.assertRaises(TypeError):
19251925
pd.PeriodIndex._simple_new(floats, freq='M')
19261926

1927+
with self.assertRaises(TypeError):
1928+
pd.PeriodIndex(floats, freq='M')
1929+
19271930
def test_shallow_copy_empty(self):
19281931

19291932
# GH13067

setup.cfg

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ parentdir_prefix = pandas-
1313

1414
[flake8]
1515
ignore = E731
16+
max-line-length = 79
1617

1718
[yapf]
1819
based_on_style = pep8

0 commit comments

Comments
 (0)