Skip to content

Commit 8d8ef2f

Browse files
committed
ENH: Add PeriodBlock
1 parent 6fa2b03 commit 8d8ef2f

36 files changed

+1043
-320
lines changed

pandas/core/algorithms.py

+21-30
Original file line numberDiff line numberDiff line change
@@ -285,18 +285,18 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
285285
note: an array of Periods will ignore sort as it returns an always sorted
286286
PeriodIndex
287287
"""
288-
from pandas import Index, Series, DatetimeIndex
288+
from pandas import Index, Series, DatetimeIndex, PeriodIndex
289289

290290
vals = np.asarray(values)
291291

292-
# localize to UTC
293292
is_datetimetz_type = is_datetimetz(values)
294-
if is_datetimetz_type:
295-
values = DatetimeIndex(values)
296-
vals = values.asi8
293+
is_period_type = is_period_dtype(values)
294+
if is_datetimetz_type or is_period_type:
295+
vals = values.view('i8')
297296

298297
is_datetime = is_datetime64_dtype(vals)
299298
is_timedelta = is_timedelta64_dtype(vals)
299+
300300
(hash_klass, vec_klass), vals = _get_data_algo(vals, _hashtables)
301301

302302
table = hash_klass(size_hint or len(vals))
@@ -318,6 +318,8 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
318318
uniques = uniques.astype('M8[ns]')
319319
elif is_timedelta:
320320
uniques = uniques.astype('m8[ns]')
321+
elif is_period_type:
322+
uniques = PeriodIndex(uniques, freq=values.freq)
321323
if isinstance(values, Index):
322324
uniques = values._shallow_copy(uniques, name=None)
323325
elif isinstance(values, Series):
@@ -362,7 +364,8 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
362364
raise TypeError("bins argument only works with numeric data.")
363365
values = cat.codes
364366

365-
if is_extension_type(values) and not is_datetimetz(values):
367+
if (is_extension_type(values) and
368+
not (is_datetimetz(values) or is_period(values))):
366369
# handle Categorical and sparse,
367370
# datetime tz can be handeled in ndarray path
368371
result = Series(values).values.value_counts(dropna=dropna)
@@ -394,25 +397,14 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
394397

395398
def _value_counts_arraylike(values, dropna=True):
396399
is_datetimetz_type = is_datetimetz(values)
397-
is_period_type = (is_period_dtype(values) or
398-
is_period_arraylike(values))
399-
400+
is_period_type = is_period_dtype(values)
400401
orig = values
401402

402403
from pandas.core.series import Series
403-
values = Series(values).values
404+
values = Series(values)._values
404405
dtype = values.dtype
405406

406-
if needs_i8_conversion(dtype) or is_period_type:
407-
408-
from pandas.tseries.index import DatetimeIndex
409-
from pandas.tseries.period import PeriodIndex
410-
411-
if is_period_type:
412-
# values may be an object
413-
values = PeriodIndex(values)
414-
freq = values.freq
415-
407+
if needs_i8_conversion(dtype):
416408
values = values.view(np.int64)
417409
keys, counts = htable.value_count_int64(values, dropna)
418410

@@ -421,13 +413,14 @@ def _value_counts_arraylike(values, dropna=True):
421413
keys, counts = keys[msk], counts[msk]
422414

423415
# convert the keys back to the dtype we came in
424-
keys = keys.astype(dtype)
425-
426-
# dtype handling
427416
if is_datetimetz_type:
417+
from pandas.tseries.index import DatetimeIndex
428418
keys = DatetimeIndex._simple_new(keys, tz=orig.dtype.tz)
429-
if is_period_type:
430-
keys = PeriodIndex._simple_new(keys, freq=freq)
419+
elif is_period_type:
420+
from pandas.tseries.period import PeriodIndex
421+
keys = PeriodIndex._simple_new(keys, freq=orig.dtype.freq)
422+
else:
423+
keys = keys.astype(dtype)
431424

432425
elif is_integer_dtype(dtype):
433426
values = _ensure_int64(values)
@@ -471,9 +464,6 @@ def duplicated(values, keep='first'):
471464
# no need to revert to original type
472465
if needs_i8_conversion(dtype):
473466
values = values.view(np.int64)
474-
elif is_period_arraylike(values):
475-
from pandas.tseries.period import PeriodIndex
476-
values = PeriodIndex(values).asi8
477467
elif is_categorical_dtype(dtype):
478468
values = values.values.codes
479469
elif isinstance(values, (ABCSeries, ABCIndex)):
@@ -1010,8 +1000,9 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None,
10101000
if is_categorical(arr):
10111001
return arr.take_nd(indexer, fill_value=fill_value,
10121002
allow_fill=allow_fill)
1013-
elif is_datetimetz(arr):
1014-
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
1003+
elif is_extension_type(arr):
1004+
return arr.take(indexer, fill_value=fill_value,
1005+
allow_fill=allow_fill)
10151006

10161007
if indexer is None:
10171008
indexer = np.arange(arr.shape[axis], dtype=np.int64)

pandas/core/frame.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -263,8 +263,10 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
263263
if isinstance(data, BlockManager):
264264
mgr = self._init_mgr(data, axes=dict(index=index, columns=columns),
265265
dtype=dtype, copy=copy)
266+
266267
elif isinstance(data, dict):
267268
mgr = self._init_dict(data, index, columns, dtype=dtype)
269+
268270
elif isinstance(data, ma.MaskedArray):
269271
import numpy.ma.mrecords as mrecords
270272
# masked recarray
@@ -2952,7 +2954,7 @@ def reset_index(self, level=None, drop=False, inplace=False, col_level=0,
29522954

29532955
def _maybe_casted_values(index, labels=None):
29542956
if isinstance(index, PeriodIndex):
2955-
values = index.asobject.values
2957+
values = index
29562958
elif isinstance(index, DatetimeIndex) and index.tz is not None:
29572959
values = index
29582960
else:

0 commit comments

Comments
 (0)