Skip to content

Commit 5d63ed2

Browse files
authored
Merge pull request #91 from pandas-dev/master
Sync Fork from Upstream Repo
2 parents 220ce00 + 3b66021 commit 5d63ed2

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+297
-270
lines changed

pandas/_libs/tslib.pyx

+4-10
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ PyDateTime_IMPORT
1414

1515

1616
cimport numpy as cnp
17-
from numpy cimport float64_t, int64_t, ndarray
17+
from numpy cimport float64_t, int64_t, ndarray, uint8_t
1818
import numpy as np
1919
cnp.import_array()
2020

@@ -351,7 +351,6 @@ def format_array_from_datetime(
351351

352352
def array_with_unit_to_datetime(
353353
ndarray values,
354-
ndarray mask,
355354
object unit,
356355
str errors='coerce'
357356
):
@@ -373,8 +372,6 @@ def array_with_unit_to_datetime(
373372
----------
374373
values : ndarray of object
375374
Date-like objects to convert.
376-
mask : boolean ndarray
377-
Not-a-time mask for non-nullable integer types conversion, can be None.
378375
unit : object
379376
Time unit to use during conversion.
380377
errors : str, default 'raise'
@@ -395,6 +392,7 @@ def array_with_unit_to_datetime(
395392
bint need_to_iterate = True
396393
ndarray[int64_t] iresult
397394
ndarray[object] oresult
395+
ndarray mask
398396
object tz = None
399397

400398
assert is_ignore or is_coerce or is_raise
@@ -404,9 +402,6 @@ def array_with_unit_to_datetime(
404402
result = values.astype('M8[ns]')
405403
else:
406404
result, tz = array_to_datetime(values.astype(object), errors=errors)
407-
if mask is not None:
408-
iresult = result.view('i8')
409-
iresult[mask] = NPY_NAT
410405
return result, tz
411406

412407
m = cast_from_unit(None, unit)
@@ -419,9 +414,8 @@ def array_with_unit_to_datetime(
419414
if values.dtype.kind == "i":
420415
# Note: this condition makes the casting="same_kind" redundant
421416
iresult = values.astype('i8', casting='same_kind', copy=False)
422-
# If no mask, fill mask by comparing to NPY_NAT constant
423-
if mask is None:
424-
mask = iresult == NPY_NAT
417+
# fill by comparing to NPY_NAT constant
418+
mask = iresult == NPY_NAT
425419
iresult[mask] = 0
426420
fvalues = iresult.astype('f8') * m
427421
need_to_iterate = False

pandas/_libs/tslibs/offsets.pyx

+12-1
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,18 @@ def apply_index_wraps(func):
114114
# Note: normally we would use `@functools.wraps(func)`, but this does
115115
# not play nicely with cython class methods
116116
def wrapper(self, other):
117-
result = func(self, other)
117+
118+
is_index = getattr(other, "_typ", "") == "datetimeindex"
119+
120+
# operate on DatetimeArray
121+
arr = other._data if is_index else other
122+
123+
result = func(self, arr)
124+
125+
if is_index:
126+
# Wrap DatetimeArray result back to DatetimeIndex
127+
result = type(other)._simple_new(result, name=other.name)
128+
118129
if self.normalize:
119130
result = result.to_period('D').to_timestamp()
120131
return result

pandas/core/arrays/base.py

+7
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,13 @@ def shape(self) -> Tuple[int, ...]:
407407
"""
408408
return (len(self),)
409409

410+
@property
411+
def size(self) -> int:
412+
"""
413+
The number of elements in the array.
414+
"""
415+
return np.prod(self.shape)
416+
410417
@property
411418
def ndim(self) -> int:
412419
"""

pandas/core/arrays/integer.py

+3
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from pandas.core.dtypes.cast import astype_nansafe
1414
from pandas.core.dtypes.common import (
1515
is_bool_dtype,
16+
is_datetime64_dtype,
1617
is_float,
1718
is_float_dtype,
1819
is_integer,
@@ -469,6 +470,8 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike:
469470
if is_float_dtype(dtype):
470471
# In astype, we consider dtype=float to also mean na_value=np.nan
471472
kwargs = dict(na_value=np.nan)
473+
elif is_datetime64_dtype(dtype):
474+
kwargs = dict(na_value=np.datetime64("NaT"))
472475
else:
473476
kwargs = {}
474477

pandas/core/frame.py

+19-15
Original file line numberDiff line numberDiff line change
@@ -7808,6 +7808,8 @@ def _reduce(
78087808
self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds
78097809
):
78107810

7811+
assert filter_type is None or filter_type == "bool", filter_type
7812+
78117813
dtype_is_dt = self.dtypes.apply(
78127814
lambda x: is_datetime64_any_dtype(x) or is_period_dtype(x)
78137815
)
@@ -7835,7 +7837,7 @@ def f(x):
78357837
return op(x, axis=axis, skipna=skipna, **kwds)
78367838

78377839
def _get_data(axis_matters):
7838-
if filter_type is None or filter_type == "numeric":
7840+
if filter_type is None:
78397841
data = self._get_numeric_data()
78407842
elif filter_type == "bool":
78417843
if axis_matters:
@@ -7882,15 +7884,11 @@ def blk_func(values):
78827884
return out
78837885

78847886
if numeric_only is None:
7885-
values = self.values
7887+
data = self
7888+
values = data.values
78867889
try:
78877890
result = f(values)
78887891

7889-
if filter_type == "bool" and is_object_dtype(values) and axis is None:
7890-
# work around https://github.com/numpy/numpy/issues/10489
7891-
# TODO: combine with hasattr(result, 'dtype') further down
7892-
# hard since we don't have `values` down there.
7893-
result = np.bool_(result)
78947892
except TypeError:
78957893
# e.g. in nanops trying to convert strs to float
78967894

@@ -7916,30 +7914,36 @@ def blk_func(values):
79167914

79177915
# TODO: why doesnt axis matter here?
79187916
data = _get_data(axis_matters=False)
7919-
with np.errstate(all="ignore"):
7920-
result = f(data.values)
79217917
labels = data._get_agg_axis(axis)
7918+
7919+
values = data.values
7920+
with np.errstate(all="ignore"):
7921+
result = f(values)
79227922
else:
79237923
if numeric_only:
79247924
data = _get_data(axis_matters=True)
7925+
labels = data._get_agg_axis(axis)
79257926

79267927
values = data.values
7927-
labels = data._get_agg_axis(axis)
79287928
else:
7929-
values = self.values
7929+
data = self
7930+
values = data.values
79307931
result = f(values)
79317932

7932-
if hasattr(result, "dtype") and is_object_dtype(result.dtype):
7933+
if filter_type == "bool" and is_object_dtype(values) and axis is None:
7934+
# work around https://github.com/numpy/numpy/issues/10489
7935+
# TODO: can we de-duplicate parts of this with the next blocK?
7936+
result = np.bool_(result)
7937+
elif hasattr(result, "dtype") and is_object_dtype(result.dtype):
79337938
try:
7934-
if filter_type is None or filter_type == "numeric":
7939+
if filter_type is None:
79357940
result = result.astype(np.float64)
79367941
elif filter_type == "bool" and notna(result).all():
79377942
result = result.astype(np.bool_)
79387943
except (ValueError, TypeError):
7939-
79407944
# try to coerce to the original dtypes item by item if we can
79417945
if axis == 0:
7942-
result = coerce_to_dtypes(result, self.dtypes)
7946+
result = coerce_to_dtypes(result, data.dtypes)
79437947

79447948
if constructor is not None:
79457949
result = self._constructor_sliced(result, index=labels)

pandas/core/generic.py

+12-64
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030

3131
from pandas._config import config
3232

33-
from pandas._libs import Timestamp, iNaT, lib
33+
from pandas._libs import Timestamp, lib
3434
from pandas._typing import (
3535
Axis,
3636
FilePathOrBuffer,
@@ -4558,6 +4558,10 @@ def filter(
45584558
>>> df = pd.DataFrame(np.array(([1, 2, 3], [4, 5, 6])),
45594559
... index=['mouse', 'rabbit'],
45604560
... columns=['one', 'two', 'three'])
4561+
>>> df
4562+
one two three
4563+
mouse 1 2 3
4564+
rabbit 4 5 6
45614565
45624566
>>> # select columns by name
45634567
>>> df.filter(items=['one', 'three'])
@@ -10102,8 +10106,6 @@ def mad(self, axis=None, skipna=None, level=None):
1010210106
desc="minimum",
1010310107
accum_func=np.minimum.accumulate,
1010410108
accum_func_name="min",
10105-
mask_a=np.inf,
10106-
mask_b=np.nan,
1010710109
examples=_cummin_examples,
1010810110
)
1010910111
cls.cumsum = _make_cum_function(
@@ -10115,8 +10117,6 @@ def mad(self, axis=None, skipna=None, level=None):
1011510117
desc="sum",
1011610118
accum_func=np.cumsum,
1011710119
accum_func_name="sum",
10118-
mask_a=0.0,
10119-
mask_b=np.nan,
1012010120
examples=_cumsum_examples,
1012110121
)
1012210122
cls.cumprod = _make_cum_function(
@@ -10128,8 +10128,6 @@ def mad(self, axis=None, skipna=None, level=None):
1012810128
desc="product",
1012910129
accum_func=np.cumprod,
1013010130
accum_func_name="prod",
10131-
mask_a=1.0,
10132-
mask_b=np.nan,
1013310131
examples=_cumprod_examples,
1013410132
)
1013510133
cls.cummax = _make_cum_function(
@@ -10141,8 +10139,6 @@ def mad(self, axis=None, skipna=None, level=None):
1014110139
desc="maximum",
1014210140
accum_func=np.maximum.accumulate,
1014310141
accum_func_name="max",
10144-
mask_a=-np.inf,
10145-
mask_b=np.nan,
1014610142
examples=_cummax_examples,
1014710143
)
1014810144

@@ -11182,8 +11178,6 @@ def _make_cum_function(
1118211178
desc: str,
1118311179
accum_func: Callable,
1118411180
accum_func_name: str,
11185-
mask_a: float,
11186-
mask_b: float,
1118711181
examples: str,
1118811182
) -> Callable:
1118911183
@Substitution(
@@ -11205,61 +11199,15 @@ def cum_func(self, axis=None, skipna=True, *args, **kwargs):
1120511199
if axis == 1:
1120611200
return cum_func(self.T, axis=0, skipna=skipna, *args, **kwargs).T
1120711201

11208-
def na_accum_func(blk_values):
11209-
# We will be applying this function to block values
11210-
if blk_values.dtype.kind in ["m", "M"]:
11211-
# GH#30460, GH#29058
11212-
# numpy 1.18 started sorting NaTs at the end instead of beginning,
11213-
# so we need to work around to maintain backwards-consistency.
11214-
orig_dtype = blk_values.dtype
11215-
11216-
# We need to define mask before masking NaTs
11217-
mask = isna(blk_values)
11218-
11219-
if accum_func == np.minimum.accumulate:
11220-
# Note: the accum_func comparison fails as an "is" comparison
11221-
y = blk_values.view("i8")
11222-
y[mask] = np.iinfo(np.int64).max
11223-
changed = True
11224-
else:
11225-
y = blk_values
11226-
changed = False
11227-
11228-
result = accum_func(y.view("i8"), axis)
11229-
if skipna:
11230-
np.putmask(result, mask, iNaT)
11231-
elif accum_func == np.minimum.accumulate:
11232-
# Restore NaTs that we masked previously
11233-
nz = (~np.asarray(mask)).nonzero()[0]
11234-
if len(nz):
11235-
# everything up to the first non-na entry stays NaT
11236-
result[: nz[0]] = iNaT
11237-
11238-
if changed:
11239-
# restore NaT elements
11240-
y[mask] = iNaT # TODO: could try/finally for this?
11241-
11242-
if isinstance(blk_values, np.ndarray):
11243-
result = result.view(orig_dtype)
11244-
else:
11245-
# DatetimeArray
11246-
result = type(blk_values)._from_sequence(result, dtype=orig_dtype)
11247-
11248-
elif skipna and not issubclass(
11249-
blk_values.dtype.type, (np.integer, np.bool_)
11250-
):
11251-
vals = blk_values.copy().T
11252-
mask = isna(vals)
11253-
np.putmask(vals, mask, mask_a)
11254-
result = accum_func(vals, axis)
11255-
np.putmask(result, mask, mask_b)
11256-
else:
11257-
result = accum_func(blk_values.T, axis)
11202+
def block_accum_func(blk_values):
11203+
values = blk_values.T if hasattr(blk_values, "T") else blk_values
11204+
11205+
result = nanops.na_accum_func(values, accum_func, skipna=skipna)
1125811206

11259-
# transpose back for ndarray, not for EA
11260-
return result.T if hasattr(result, "T") else result
11207+
result = result.T if hasattr(result, "T") else result
11208+
return result
1126111209

11262-
result = self._data.apply(na_accum_func)
11210+
result = self._data.apply(block_accum_func)
1126311211

1126411212
d = self._construct_axes_dict()
1126511213
d["copy"] = False

pandas/core/indexes/base.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -3281,13 +3281,11 @@ def reindex(self, target, method=None, level=None, limit=None, tolerance=None):
32813281
target = _ensure_has_len(target) # target may be an iterator
32823282

32833283
if not isinstance(target, Index) and len(target) == 0:
3284-
attrs = self._get_attributes_dict()
3285-
attrs.pop("freq", None) # don't preserve freq
32863284
if isinstance(self, ABCRangeIndex):
32873285
values = range(0)
32883286
else:
32893287
values = self._data[:0] # appropriately-dtyped empty array
3290-
target = self._simple_new(values, **attrs)
3288+
target = self._simple_new(values, name=self.name)
32913289
else:
32923290
target = ensure_index(target)
32933291

pandas/core/indexes/category.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -396,7 +396,7 @@ def _engine(self):
396396
def unique(self, level=None):
397397
if level is not None:
398398
self._validate_index_level(level)
399-
result = self.values.unique()
399+
result = self._values.unique()
400400
# Use _simple_new instead of _shallow_copy to ensure we keep dtype
401401
# of result, not self.
402402
return type(self)._simple_new(result, name=self.name)
@@ -423,7 +423,7 @@ def where(self, cond, other=None):
423423
# 3. Rebuild CategoricalIndex.
424424
if other is None:
425425
other = self._na_value
426-
values = np.where(cond, self.values, other)
426+
values = np.where(cond, self._values, other)
427427
cat = Categorical(values, dtype=self.dtype)
428428
return type(self)._simple_new(cat, name=self.name)
429429

@@ -532,13 +532,13 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
532532
"method='nearest' not implemented yet for CategoricalIndex"
533533
)
534534

535-
if isinstance(target, CategoricalIndex) and self.values.is_dtype_equal(target):
536-
if self.values.equals(target.values):
535+
if isinstance(target, CategoricalIndex) and self._values.is_dtype_equal(target):
536+
if self._values.equals(target._values):
537537
# we have the same codes
538538
codes = target.codes
539539
else:
540540
codes = _recode_for_categories(
541-
target.codes, target.categories, self.values.categories
541+
target.codes, target.categories, self._values.categories
542542
)
543543
else:
544544
if isinstance(target, CategoricalIndex):
@@ -560,7 +560,7 @@ def get_indexer_non_unique(self, target):
560560
target = target.codes
561561
indexer, missing = self._engine.get_indexer_non_unique(target)
562562
return ensure_platform_int(indexer), missing
563-
target = target.values
563+
target = target._values
564564

565565
codes = self.categories.get_indexer(target)
566566
indexer, missing = self._engine.get_indexer_non_unique(codes)
@@ -679,7 +679,7 @@ def map(self, mapper):
679679
>>> idx.map({'a': 'first', 'b': 'second'})
680680
Index(['first', 'second', nan], dtype='object')
681681
"""
682-
return self._shallow_copy_with_infer(self.values.map(mapper))
682+
return self._shallow_copy_with_infer(self._values.map(mapper))
683683

684684
def delete(self, loc):
685685
"""

0 commit comments

Comments
 (0)