Skip to content

(WIP) ENH: Add PeriodBlock #13755

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 15 additions & 26 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
is_categorical_dtype,
is_extension_type,
is_datetimetz,
is_period,
is_period_dtype,
is_period_arraylike,
is_float_dtype,
needs_i8_conversion,
is_categorical,
Expand Down Expand Up @@ -410,7 +410,8 @@ def value_counts(values, sort=True, ascending=False, normalize=False,
raise TypeError("bins argument only works with numeric data.")
values = cat.codes

if is_extension_type(values) and not is_datetimetz(values):
if (is_extension_type(values) and
not (is_datetimetz(values) or is_period(values))):
# handle Categorical and sparse,
# datetime tz can be handeled in ndarray path
result = Series(values).values.value_counts(dropna=dropna)
Expand Down Expand Up @@ -442,25 +443,14 @@ def value_counts(values, sort=True, ascending=False, normalize=False,

def _value_counts_arraylike(values, dropna=True):
is_datetimetz_type = is_datetimetz(values)
is_period_type = (is_period_dtype(values) or
is_period_arraylike(values))

is_period_type = is_period_dtype(values)
orig = values

from pandas.core.series import Series
values = Series(values).values
values = Series(values)._values
dtype = values.dtype

if needs_i8_conversion(dtype) or is_period_type:

from pandas.tseries.index import DatetimeIndex
from pandas.tseries.period import PeriodIndex

if is_period_type:
# values may be an object
values = PeriodIndex(values)
freq = values.freq

if needs_i8_conversion(dtype):
values = values.view(np.int64)
keys, counts = htable.value_count_int64(values, dropna)

Expand All @@ -469,13 +459,14 @@ def _value_counts_arraylike(values, dropna=True):
keys, counts = keys[msk], counts[msk]

# convert the keys back to the dtype we came in
keys = keys.astype(dtype)

# dtype handling
if is_datetimetz_type:
from pandas.tseries.index import DatetimeIndex
keys = DatetimeIndex._simple_new(keys, tz=orig.dtype.tz)
if is_period_type:
keys = PeriodIndex._simple_new(keys, freq=freq)
elif is_period_type:
from pandas.tseries.period import PeriodIndex
keys = PeriodIndex._simple_new(keys, freq=orig.dtype.freq)
else:
keys = keys.astype(dtype)

elif is_integer_dtype(dtype):
values = _ensure_int64(values)
Expand Down Expand Up @@ -522,9 +513,6 @@ def duplicated(values, keep='first'):
# no need to revert to original type
if needs_i8_conversion(dtype):
values = values.view(np.int64)
elif is_period_arraylike(values):
from pandas.tseries.period import PeriodIndex
values = PeriodIndex(values).asi8
elif is_categorical_dtype(dtype):
values = values.values.codes
elif isinstance(values, (ABCSeries, ABCIndex)):
Expand Down Expand Up @@ -1243,8 +1231,9 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None,
if is_categorical(arr):
return arr.take_nd(indexer, fill_value=fill_value,
allow_fill=allow_fill)
elif is_datetimetz(arr):
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
elif is_extension_type(arr):
return arr.take(indexer, fill_value=fill_value,
allow_fill=allow_fill)

if indexer is None:
indexer = np.arange(arr.shape[axis], dtype=np.int64)
Expand Down
11 changes: 10 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
is_datetimetz,
is_datetime64_dtype,
is_datetime64tz_dtype,
is_period_dtype,
is_bool_dtype,
is_integer_dtype,
is_float_dtype,
Expand Down Expand Up @@ -263,8 +264,10 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
if isinstance(data, BlockManager):
mgr = self._init_mgr(data, axes=dict(index=index, columns=columns),
dtype=dtype, copy=copy)

elif isinstance(data, dict):
mgr = self._init_dict(data, index, columns, dtype=dtype)

elif isinstance(data, ma.MaskedArray):
import numpy.ma.mrecords as mrecords
# masked recarray
Expand Down Expand Up @@ -2946,7 +2949,7 @@ def reset_index(self, level=None, drop=False, inplace=False, col_level=0,

def _maybe_casted_values(index, labels=None):
if isinstance(index, PeriodIndex):
values = index.asobject.values
values = index
elif isinstance(index, DatetimeIndex) and index.tz is not None:
values = index
else:
Expand Down Expand Up @@ -3706,6 +3709,11 @@ def combine(self, other, func, fill_value=None, overwrite=True):
# see if we need to be represented as i8 (datetimelike)
# try to keep us at this dtype
needs_i8_conversion_i = needs_i8_conversion(new_dtype)

if is_period_dtype(new_dtype):
# temp for PeriodDtype
needs_i8_conversion_i = False

if needs_i8_conversion_i:
arr = func(series, otherSeries, True)
else:
Expand Down Expand Up @@ -3756,6 +3764,7 @@ def combine_first(self, other):
"""

def combiner(x, y, needs_i8_conversion=False):
# ToDo:
x_values = x.values if hasattr(x, 'values') else x
y_values = y.values if hasattr(y, 'values') else y
if needs_i8_conversion:
Expand Down
Loading