Skip to content

Sync Fork from Upstream Repo #91

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Mar 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 4 additions & 10 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ PyDateTime_IMPORT


cimport numpy as cnp
from numpy cimport float64_t, int64_t, ndarray
from numpy cimport float64_t, int64_t, ndarray, uint8_t
import numpy as np
cnp.import_array()

Expand Down Expand Up @@ -351,7 +351,6 @@ def format_array_from_datetime(

def array_with_unit_to_datetime(
ndarray values,
ndarray mask,
object unit,
str errors='coerce'
):
Expand All @@ -373,8 +372,6 @@ def array_with_unit_to_datetime(
----------
values : ndarray of object
Date-like objects to convert.
mask : boolean ndarray
Not-a-time mask for non-nullable integer types conversion, can be None.
unit : object
Time unit to use during conversion.
errors : str, default 'raise'
Expand All @@ -395,6 +392,7 @@ def array_with_unit_to_datetime(
bint need_to_iterate = True
ndarray[int64_t] iresult
ndarray[object] oresult
ndarray mask
object tz = None

assert is_ignore or is_coerce or is_raise
Expand All @@ -404,9 +402,6 @@ def array_with_unit_to_datetime(
result = values.astype('M8[ns]')
else:
result, tz = array_to_datetime(values.astype(object), errors=errors)
if mask is not None:
iresult = result.view('i8')
iresult[mask] = NPY_NAT
return result, tz

m = cast_from_unit(None, unit)
Expand All @@ -419,9 +414,8 @@ def array_with_unit_to_datetime(
if values.dtype.kind == "i":
# Note: this condition makes the casting="same_kind" redundant
iresult = values.astype('i8', casting='same_kind', copy=False)
# If no mask, fill mask by comparing to NPY_NAT constant
if mask is None:
mask = iresult == NPY_NAT
# fill by comparing to NPY_NAT constant
mask = iresult == NPY_NAT
iresult[mask] = 0
fvalues = iresult.astype('f8') * m
need_to_iterate = False
Expand Down
13 changes: 12 additions & 1 deletion pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,18 @@ def apply_index_wraps(func):
# Note: normally we would use `@functools.wraps(func)`, but this does
# not play nicely with cython class methods
def wrapper(self, other):
result = func(self, other)

is_index = getattr(other, "_typ", "") == "datetimeindex"

# operate on DatetimeArray
arr = other._data if is_index else other

result = func(self, arr)

if is_index:
# Wrap DatetimeArray result back to DatetimeIndex
result = type(other)._simple_new(result, name=other.name)

if self.normalize:
result = result.to_period('D').to_timestamp()
return result
Expand Down
7 changes: 7 additions & 0 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,13 @@ def shape(self) -> Tuple[int, ...]:
"""
return (len(self),)

@property
def size(self) -> int:
"""
The number of elements in the array.
"""
return np.prod(self.shape)

@property
def ndim(self) -> int:
"""
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from pandas.core.dtypes.cast import astype_nansafe
from pandas.core.dtypes.common import (
is_bool_dtype,
is_datetime64_dtype,
is_float,
is_float_dtype,
is_integer,
Expand Down Expand Up @@ -469,6 +470,8 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike:
if is_float_dtype(dtype):
# In astype, we consider dtype=float to also mean na_value=np.nan
kwargs = dict(na_value=np.nan)
elif is_datetime64_dtype(dtype):
kwargs = dict(na_value=np.datetime64("NaT"))
else:
kwargs = {}

Expand Down
34 changes: 19 additions & 15 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7808,6 +7808,8 @@ def _reduce(
self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds
):

assert filter_type is None or filter_type == "bool", filter_type

dtype_is_dt = self.dtypes.apply(
lambda x: is_datetime64_any_dtype(x) or is_period_dtype(x)
)
Expand Down Expand Up @@ -7835,7 +7837,7 @@ def f(x):
return op(x, axis=axis, skipna=skipna, **kwds)

def _get_data(axis_matters):
if filter_type is None or filter_type == "numeric":
if filter_type is None:
data = self._get_numeric_data()
elif filter_type == "bool":
if axis_matters:
Expand Down Expand Up @@ -7882,15 +7884,11 @@ def blk_func(values):
return out

if numeric_only is None:
values = self.values
data = self
values = data.values
try:
result = f(values)

if filter_type == "bool" and is_object_dtype(values) and axis is None:
# work around https://github.com/numpy/numpy/issues/10489
# TODO: combine with hasattr(result, 'dtype') further down
# hard since we don't have `values` down there.
result = np.bool_(result)
except TypeError:
# e.g. in nanops trying to convert strs to float

Expand All @@ -7916,30 +7914,36 @@ def blk_func(values):

# TODO: why doesnt axis matter here?
data = _get_data(axis_matters=False)
with np.errstate(all="ignore"):
result = f(data.values)
labels = data._get_agg_axis(axis)

values = data.values
with np.errstate(all="ignore"):
result = f(values)
else:
if numeric_only:
data = _get_data(axis_matters=True)
labels = data._get_agg_axis(axis)

values = data.values
labels = data._get_agg_axis(axis)
else:
values = self.values
data = self
values = data.values
result = f(values)

if hasattr(result, "dtype") and is_object_dtype(result.dtype):
if filter_type == "bool" and is_object_dtype(values) and axis is None:
# work around https://github.com/numpy/numpy/issues/10489
# TODO: can we de-duplicate parts of this with the next blocK?
result = np.bool_(result)
elif hasattr(result, "dtype") and is_object_dtype(result.dtype):
try:
if filter_type is None or filter_type == "numeric":
if filter_type is None:
result = result.astype(np.float64)
elif filter_type == "bool" and notna(result).all():
result = result.astype(np.bool_)
except (ValueError, TypeError):

# try to coerce to the original dtypes item by item if we can
if axis == 0:
result = coerce_to_dtypes(result, self.dtypes)
result = coerce_to_dtypes(result, data.dtypes)

if constructor is not None:
result = self._constructor_sliced(result, index=labels)
Expand Down
76 changes: 12 additions & 64 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

from pandas._config import config

from pandas._libs import Timestamp, iNaT, lib
from pandas._libs import Timestamp, lib
from pandas._typing import (
Axis,
FilePathOrBuffer,
Expand Down Expand Up @@ -4558,6 +4558,10 @@ def filter(
>>> df = pd.DataFrame(np.array(([1, 2, 3], [4, 5, 6])),
... index=['mouse', 'rabbit'],
... columns=['one', 'two', 'three'])
>>> df
one two three
mouse 1 2 3
rabbit 4 5 6

>>> # select columns by name
>>> df.filter(items=['one', 'three'])
Expand Down Expand Up @@ -10102,8 +10106,6 @@ def mad(self, axis=None, skipna=None, level=None):
desc="minimum",
accum_func=np.minimum.accumulate,
accum_func_name="min",
mask_a=np.inf,
mask_b=np.nan,
examples=_cummin_examples,
)
cls.cumsum = _make_cum_function(
Expand All @@ -10115,8 +10117,6 @@ def mad(self, axis=None, skipna=None, level=None):
desc="sum",
accum_func=np.cumsum,
accum_func_name="sum",
mask_a=0.0,
mask_b=np.nan,
examples=_cumsum_examples,
)
cls.cumprod = _make_cum_function(
Expand All @@ -10128,8 +10128,6 @@ def mad(self, axis=None, skipna=None, level=None):
desc="product",
accum_func=np.cumprod,
accum_func_name="prod",
mask_a=1.0,
mask_b=np.nan,
examples=_cumprod_examples,
)
cls.cummax = _make_cum_function(
Expand All @@ -10141,8 +10139,6 @@ def mad(self, axis=None, skipna=None, level=None):
desc="maximum",
accum_func=np.maximum.accumulate,
accum_func_name="max",
mask_a=-np.inf,
mask_b=np.nan,
examples=_cummax_examples,
)

Expand Down Expand Up @@ -11182,8 +11178,6 @@ def _make_cum_function(
desc: str,
accum_func: Callable,
accum_func_name: str,
mask_a: float,
mask_b: float,
examples: str,
) -> Callable:
@Substitution(
Expand All @@ -11205,61 +11199,15 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs):
if axis == 1:
return cum_func(self.T, axis=0, skipna=skipna, *args, **kwargs).T

def na_accum_func(blk_values):
# We will be applying this function to block values
if blk_values.dtype.kind in ["m", "M"]:
# GH#30460, GH#29058
# numpy 1.18 started sorting NaTs at the end instead of beginning,
# so we need to work around to maintain backwards-consistency.
orig_dtype = blk_values.dtype

# We need to define mask before masking NaTs
mask = isna(blk_values)

if accum_func == np.minimum.accumulate:
# Note: the accum_func comparison fails as an "is" comparison
y = blk_values.view("i8")
y[mask] = np.iinfo(np.int64).max
changed = True
else:
y = blk_values
changed = False

result = accum_func(y.view("i8"), axis)
if skipna:
np.putmask(result, mask, iNaT)
elif accum_func == np.minimum.accumulate:
# Restore NaTs that we masked previously
nz = (~np.asarray(mask)).nonzero()[0]
if len(nz):
# everything up to the first non-na entry stays NaT
result[: nz[0]] = iNaT

if changed:
# restore NaT elements
y[mask] = iNaT # TODO: could try/finally for this?

if isinstance(blk_values, np.ndarray):
result = result.view(orig_dtype)
else:
# DatetimeArray
result = type(blk_values)._from_sequence(result, dtype=orig_dtype)

elif skipna and not issubclass(
blk_values.dtype.type, (np.integer, np.bool_)
):
vals = blk_values.copy().T
mask = isna(vals)
np.putmask(vals, mask, mask_a)
result = accum_func(vals, axis)
np.putmask(result, mask, mask_b)
else:
result = accum_func(blk_values.T, axis)
def block_accum_func(blk_values):
values = blk_values.T if hasattr(blk_values, "T") else blk_values

result = nanops.na_accum_func(values, accum_func, skipna=skipna)

# transpose back for ndarray, not for EA
return result.T if hasattr(result, "T") else result
result = result.T if hasattr(result, "T") else result
return result

result = self._data.apply(na_accum_func)
result = self._data.apply(block_accum_func)

d = self._construct_axes_dict()
d["copy"] = False
Expand Down
4 changes: 1 addition & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3281,13 +3281,11 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None):
target = _ensure_has_len(target) # target may be an iterator

if not isinstance(target, Index) and len(target) == 0:
attrs = self._get_attributes_dict()
attrs.pop("freq", None) # don't preserve freq
if isinstance(self, ABCRangeIndex):
values = range(0)
else:
values = self._data[:0] # appropriately-dtyped empty array
target = self._simple_new(values, **attrs)
target = self._simple_new(values, name=self.name)
else:
target = ensure_index(target)

Expand Down
14 changes: 7 additions & 7 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ def _engine(self):
def unique(self, level=None):
if level is not None:
self._validate_index_level(level)
result = self.values.unique()
result = self._values.unique()
# Use _simple_new instead of _shallow_copy to ensure we keep dtype
# of result, not self.
return type(self)._simple_new(result, name=self.name)
Expand All @@ -423,7 +423,7 @@ def where(self, cond, other=None):
# 3. Rebuild CategoricalIndex.
if other is None:
other = self._na_value
values = np.where(cond, self.values, other)
values = np.where(cond, self._values, other)
cat = Categorical(values, dtype=self.dtype)
return type(self)._simple_new(cat, name=self.name)

Expand Down Expand Up @@ -532,13 +532,13 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
"method='nearest' not implemented yet for CategoricalIndex"
)

if isinstance(target, CategoricalIndex) and self.values.is_dtype_equal(target):
if self.values.equals(target.values):
if isinstance(target, CategoricalIndex) and self._values.is_dtype_equal(target):
if self._values.equals(target._values):
# we have the same codes
codes = target.codes
else:
codes = _recode_for_categories(
target.codes, target.categories, self.values.categories
target.codes, target.categories, self._values.categories
)
else:
if isinstance(target, CategoricalIndex):
Expand All @@ -560,7 +560,7 @@ def get_indexer_non_unique(self, target):
target = target.codes
indexer, missing = self._engine.get_indexer_non_unique(target)
return ensure_platform_int(indexer), missing
target = target.values
target = target._values

codes = self.categories.get_indexer(target)
indexer, missing = self._engine.get_indexer_non_unique(codes)
Expand Down Expand Up @@ -679,7 +679,7 @@ def map(self, mapper):
>>> idx.map({'a': 'first', 'b': 'second'})
Index(['first', 'second', nan], dtype='object')
"""
return self._shallow_copy_with_infer(self.values.map(mapper))
return self._shallow_copy_with_infer(self._values.map(mapper))

def delete(self, loc):
"""
Expand Down
Loading