Skip to content

Commit e6695fa

Browse files
committed
clean up PeriodIndex constructor
1 parent ae2ca83 commit e6695fa

File tree

6 files changed

+86
-110
lines changed

6 files changed

+86
-110
lines changed

pandas/core/algorithms.py

+7-6
Original file line numberDiff line numberDiff line change
@@ -304,17 +304,18 @@ def _value_counts_arraylike(values, dropna=True):
304304

305305
orig = values
306306

307+
if is_period:
308+
from pandas.tseries.period import PeriodIndex
309+
index = PeriodIndex(values)
310+
freq = index.freq
311+
values = index.values
312+
307313
from pandas.core.series import Series
308314
values = Series(values).values
309315
dtype = values.dtype
310316

311317
if com.is_datetime_or_timedelta_dtype(dtype) or is_period:
312318
from pandas.tseries.index import DatetimeIndex
313-
from pandas.tseries.period import PeriodIndex
314-
315-
if is_period:
316-
values = PeriodIndex(values)
317-
freq = values.freq
318319

319320
values = values.view(np.int64)
320321
keys, counts = htable.value_count_scalar64(values, dropna)
@@ -333,7 +334,7 @@ def _value_counts_arraylike(values, dropna=True):
333334
else:
334335
tz = orig.dt.tz
335336
keys = DatetimeIndex._simple_new(keys, tz=tz)
336-
if is_period:
337+
elif is_period:
337338
keys = PeriodIndex._simple_new(keys, freq=freq)
338339

339340
elif com.is_integer_dtype(dtype):

pandas/indexes/base.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ def _new_Index(cls, d):
6767
""" This is called upon unpickling, rather than the default which doesn't
6868
have arguments and breaks __new__
6969
"""
70+
if issubclass(cls, ABCPeriodIndex):
71+
return cls._simple_new(**d)
7072
return cls.__new__(cls, **d)
7173

7274

@@ -371,7 +373,7 @@ def _shallow_copy_with_infer(self, values=None, **kwargs):
371373
attributes['copy'] = False
372374
if self._infer_as_myclass:
373375
try:
374-
return self._constructor(values, **attributes)
376+
return self._shallow_copy(values, **attributes)
375377
except (TypeError, ValueError):
376378
pass
377379
return Index(values, **attributes)
@@ -1944,9 +1946,9 @@ def symmetric_difference(self, other, result_name=None):
19441946
union(other.difference(self))))
19451947
attribs = self._get_attributes_dict()
19461948
attribs['name'] = result_name
1947-
if 'freq' in attribs:
1948-
attribs['freq'] = None
1949-
return self._shallow_copy_with_infer(the_diff, **attribs)
1949+
# use constructor rather than _shallow_copy, because we have objects
1950+
# rather than .values
1951+
return self._constructor(the_diff, **attribs)
19501952

19511953
sym_diff = deprecate('sym_diff', symmetric_difference)
19521954

@@ -2419,10 +2421,10 @@ def _reindex_non_unique(self, target):
24192421
missing = com._ensure_platform_int(missing)
24202422
missing_labels = target.take(missing)
24212423
missing_indexer = _ensure_int64(l[~check])
2422-
cur_labels = self.take(indexer[check])._values
2424+
cur_labels = self.take(indexer[check])
24232425
cur_indexer = _ensure_int64(l[check])
24242426

2425-
new_labels = np.empty(tuple([len(indexer)]), dtype=object)
2427+
new_labels = np.empty(tuple([len(indexer)]), dtype=target.dtype)
24262428
new_labels[cur_indexer] = cur_labels
24272429
new_labels[missing_indexer] = missing_labels
24282430

@@ -2446,7 +2448,7 @@ def _reindex_non_unique(self, target):
24462448
new_indexer = np.arange(len(self.take(indexer)))
24472449
new_indexer[~check] = -1
24482450

2449-
new_index = self._shallow_copy_with_infer(new_labels, freq=None)
2451+
new_index = self._shallow_copy_with_infer(new_labels)
24502452
return new_index, indexer, new_indexer
24512453

24522454
def join(self, other, how='left', level=None, return_indexers=False):

pandas/tests/indexes/test_datetimelike.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -867,7 +867,7 @@ def test_repeat(self):
867867
# GH10183
868868
idx = pd.period_range('2000-01-01', periods=3, freq='D')
869869
res = idx.repeat(3)
870-
exp = PeriodIndex(idx.values.repeat(3), freq='D')
870+
exp = PeriodIndex._simple_new(idx.values.repeat(3), freq='D')
871871
self.assert_index_equal(res, exp)
872872
self.assertEqual(res.freqstr, 'D')
873873

pandas/tseries/index.py

+6
Original file line numberDiff line numberDiff line change
@@ -598,6 +598,12 @@ def _simple_new(cls, values, name=None, freq=None, tz=None,
598598
result._reset_identity()
599599
return result
600600

601+
def _shallow_copy_with_infer(self, values=None, **kwargs):
602+
# re-infer freq as it may have changed (and always possible to infer
603+
# from values for DatetimeIndex)
604+
kwargs['freq'] = None
605+
return super(DatetimeIndex, self)._shallow_copy_with_infer(values, **kwargs)
606+
601607
@property
602608
def tzinfo(self):
603609
"""

pandas/tseries/period.py

+56-92
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,13 @@ def f(self):
4141

4242

4343
def _get_ordinals(data, freq):
44-
f = lambda x: Period(x, freq=freq).ordinal
45-
if isinstance(data[0], Period):
44+
if len(data) == 0:
45+
return np.array([], dtype='int64')
46+
elif isinstance(data[0], Period):
4647
return period.extract_ordinals(data, freq)
47-
else:
48-
return lib.map_infer(data, f)
48+
if com.is_float_dtype(data):
49+
raise ValueError("PeriodIndex can't be constructed from floats")
50+
return lib.map_infer(data, lambda x: Period(x, freq=freq).ordinal)
4951

5052

5153
def dt64arr_to_periodarr(data, freq, tz):
@@ -188,9 +190,43 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
188190
else:
189191
data, freq = cls._generate_range(start, end, periods,
190192
freq, kwargs)
191-
else:
192-
ordinal, freq = cls._from_arraylike(data, freq, tz)
193-
data = np.array(ordinal, dtype=np.int64, copy=False)
193+
return cls._simple_new(data, name=name, freq=freq)
194+
195+
if isinstance(data, PeriodIndex):
196+
if freq is None or freq == data.freq:
197+
freq = data.freq
198+
data = data.values
199+
else:
200+
base1, _ = _gfc(data.freq)
201+
base2, _ = _gfc(freq)
202+
data = period.period_asfreq_arr(
203+
data.values, base1, base2, 1)
204+
return cls._simple_new(data, name=name, freq=freq)
205+
206+
if not isinstance(data, (np.ndarray, PeriodIndex,
207+
DatetimeIndex, Int64Index)):
208+
if lib.isscalar(data):
209+
raise ValueError('PeriodIndex() must be called with a '
210+
'collection of some kind, %r was passed'
211+
% data)
212+
213+
# other iterable of some kind
214+
if not isinstance(data, (list, tuple)):
215+
data = list(data)
216+
217+
data = np.array(data, copy=False)
218+
219+
if freq is None and len(data) > 0:
220+
freq = getattr(data[0], 'freq', None)
221+
if freq is None:
222+
raise ValueError('freq not specified and cannot be '
223+
'inferred from first element')
224+
225+
if np.issubdtype(data.dtype, np.datetime64):
226+
data = dt64arr_to_periodarr(data, freq, tz)
227+
return cls._simple_new(data, name=name, freq=freq)
228+
229+
data = _get_ordinals(data, freq)
194230

195231
return cls._simple_new(data, name=name, freq=freq)
196232

@@ -211,96 +247,23 @@ def _generate_range(cls, start, end, periods, freq, fields):
211247
return subarr, freq
212248

213249
@classmethod
214-
def _from_arraylike(cls, data, freq, tz):
215-
if not isinstance(data, (np.ndarray, PeriodIndex,
216-
DatetimeIndex, Int64Index)):
217-
if lib.isscalar(data) or isinstance(data, Period):
218-
raise ValueError('PeriodIndex() must be called with a '
219-
'collection of some kind, %s was passed'
220-
% repr(data))
221-
222-
# other iterable of some kind
223-
if not isinstance(data, (list, tuple)):
224-
data = list(data)
225-
226-
try:
227-
data = com._ensure_int64(data)
228-
if freq is None:
229-
raise ValueError('freq not specified')
230-
data = np.array([Period(x, freq=freq).ordinal for x in data],
231-
dtype=np.int64)
232-
except (TypeError, ValueError):
233-
data = com._ensure_object(data)
234-
235-
if freq is None and len(data) > 0:
236-
freq = getattr(data[0], 'freq', None)
237-
238-
if freq is None:
239-
raise ValueError('freq not specified and cannot be '
240-
'inferred from first element')
241-
242-
data = _get_ordinals(data, freq)
243-
else:
244-
if isinstance(data, PeriodIndex):
245-
if freq is None or freq == data.freq:
246-
freq = data.freq
247-
data = data.values
248-
else:
249-
base1, _ = _gfc(data.freq)
250-
base2, _ = _gfc(freq)
251-
data = period.period_asfreq_arr(data.values,
252-
base1, base2, 1)
253-
else:
254-
if freq is None and len(data) > 0:
255-
freq = getattr(data[0], 'freq', None)
256-
257-
if freq is None:
258-
raise ValueError('freq not specified and cannot be '
259-
'inferred from first element')
260-
261-
if data.dtype != np.int64:
262-
if np.issubdtype(data.dtype, np.datetime64):
263-
data = dt64arr_to_periodarr(data, freq, tz)
264-
else:
265-
try:
266-
data = com._ensure_int64(data)
267-
except (TypeError, ValueError):
268-
data = com._ensure_object(data)
269-
data = _get_ordinals(data, freq)
270-
271-
return data, freq
272-
273-
@classmethod
274-
def _simple_new(cls, values, name=None, freq=None, **kwargs):
250+
def _simple_new(cls, data, name=None, freq=None, **kwargs):
251+
""" PeriodIndex from array-like of either ints or Periods """
275252

276-
if not com.is_integer_dtype(values):
277-
values = np.array(values, copy=False)
278-
if (len(values) > 0 and com.is_float_dtype(values)):
279-
raise TypeError("PeriodIndex can't take floats")
280-
else:
281-
return PeriodIndex(values, name=name, freq=freq, **kwargs)
253+
if not com.is_integer_dtype(data):
254+
return PeriodIndex(data, name=name, freq=freq, **kwargs)
282255

283-
values = np.array(values, dtype='int64', copy=False)
256+
data = np.array(data, dtype='int64', copy=False)
284257

285258
result = object.__new__(cls)
286-
result._data = values
259+
result._data = data
287260
result.name = name
288261
if freq is None:
289262
raise ValueError('freq is not specified')
290263
result.freq = Period._maybe_convert_freq(freq)
291264
result._reset_identity()
292265
return result
293266

294-
def _shallow_copy_with_infer(self, values=None, **kwargs):
295-
""" we always want to return a PeriodIndex """
296-
return self._shallow_copy(values=values, **kwargs)
297-
298-
def _shallow_copy(self, values=None, **kwargs):
299-
if kwargs.get('freq') is None:
300-
# freq must be provided
301-
kwargs['freq'] = self.freq
302-
return super(PeriodIndex, self)._shallow_copy(values=values, **kwargs)
303-
304267
def _coerce_scalar_to_index(self, item):
305268
"""
306269
we need to coerce a scalar to a compat for our index type
@@ -348,7 +311,7 @@ def __array_wrap__(self, result, context=None):
348311

349312
if com.is_bool_dtype(result):
350313
return result
351-
return PeriodIndex(result, freq=self.freq, name=self.name)
314+
return self._shallow_copy(result)
352315

353316
@property
354317
def _box_func(self):
@@ -646,7 +609,7 @@ def shift(self, n):
646609
values = self.values + n * self.freq.n
647610
if self.hasnans:
648611
values[self._isnan] = tslib.iNaT
649-
return PeriodIndex(data=values, name=self.name, freq=self.freq)
612+
return self._shallow_copy(values)
650613

651614
@cache_readonly
652615
def dtype_str(self):
@@ -851,7 +814,7 @@ def _wrap_union_result(self, other, result):
851814

852815
def _apply_meta(self, rawarr):
853816
if not isinstance(rawarr, PeriodIndex):
854-
rawarr = PeriodIndex(rawarr, freq=self.freq)
817+
rawarr = self._shallow_copy(rawarr)
855818
return rawarr
856819

857820
def __getitem__(self, key):
@@ -869,9 +832,9 @@ def __getitem__(self, key):
869832
# values = np.asarray(list(values), dtype=object)
870833
# return values.reshape(result.shape)
871834

872-
return PeriodIndex(result, name=self.name, freq=self.freq)
835+
return self._shallow_copy(result)
873836

874-
return PeriodIndex(result, name=self.name, freq=self.freq)
837+
return self._shallow_copy(result)
875838

876839
def _format_native_types(self, na_rep=u('NaT'), date_format=None,
877840
**kwargs):
@@ -921,7 +884,7 @@ def append(self, other):
921884
to_concat = [x.asobject.values for x in to_concat]
922885
else:
923886
cat_values = np.concatenate([x.values for x in to_concat])
924-
return PeriodIndex(cat_values, freq=self.freq, name=name)
887+
return self._shallow_copy(cat_values)
925888

926889
to_concat = [x.values if isinstance(x, Index) else x
927890
for x in to_concat]
@@ -1143,3 +1106,4 @@ def period_range(start=None, end=None, periods=None, freq='D', name=None):
11431106
"""
11441107
return PeriodIndex(start=start, end=end, periods=periods,
11451108
freq=freq, name=name)
1109+

pandas/tseries/tests/test_period.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -1769,12 +1769,15 @@ def test_constructor_simple_new_empty(self):
17691769
result = idx._simple_new(idx, name='p', freq='M')
17701770
assert_index_equal(result, idx)
17711771

1772-
def test_constructor_simple_new_floats(self):
1772+
def test_constructor_floats(self):
17731773
# GH13079
1774-
for floats in [[1.1], np.array([1.1])]:
1775-
with self.assertRaises(TypeError):
1774+
for floats in [[1.1, 2.1], np.array([1.1, 2.1])]:
1775+
with self.assertRaises(ValueError):
17761776
pd.PeriodIndex._simple_new(floats, freq='M')
17771777

1778+
with self.assertRaises(ValueError):
1779+
pd.PeriodIndex(floats, freq='M')
1780+
17781781
def test_shallow_copy_empty(self):
17791782

17801783
# GH13067
@@ -3279,7 +3282,7 @@ def test_factorize(self):
32793282
def test_recreate_from_data(self):
32803283
for o in ['M', 'Q', 'A', 'D', 'B', 'T', 'S', 'L', 'U', 'N', 'H']:
32813284
org = PeriodIndex(start='2001/04/01', freq=o, periods=1)
3282-
idx = PeriodIndex(org.values, freq=o)
3285+
idx = PeriodIndex(org, freq=o)
32833286
tm.assert_index_equal(idx, org)
32843287

32853288
def test_combine_first(self):

0 commit comments

Comments
 (0)