Skip to content

Commit c3dd5b9

Browse files
committed
Merge remote-tracking branch 'upstream/master' into map-na-action
2 parents c1d928d + e77f09c commit c3dd5b9

28 files changed

+216
-290
lines changed

doc/source/development/internals.rst

+2-8
Original file line numberDiff line numberDiff line change
@@ -89,16 +89,10 @@ pandas extends NumPy's type system with custom types, like ``Categorical`` or
8989
datetimes with a timezone, so we have multiple notions of "values". For 1-D
9090
containers (``Index`` classes and ``Series``) we have the following convention:
9191

92-
* ``cls._ndarray_values`` is *always* a NumPy ``ndarray``. Ideally,
93-
``_ndarray_values`` is cheap to compute. For example, for a ``Categorical``,
94-
this returns the codes, not the array of objects.
9592
* ``cls._values`` refers is the "best possible" array. This could be an
96-
``ndarray``, ``ExtensionArray``, or in ``Index`` subclass (note: we're in the
97-
process of removing the index subclasses here so that it's always an
98-
``ndarray`` or ``ExtensionArray``).
93+
``ndarray`` or ``ExtensionArray``.
9994

100-
So, for example, ``Series[category]._values`` is a ``Categorical``, while
101-
``Series[category]._ndarray_values`` is the underlying codes.
95+
So, for example, ``Series[category]._values`` is a ``Categorical``.
10296

10397
.. _ref-subclassing-pandas:
10498

doc/source/reference/extensions.rst

-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ objects.
3737
api.extensions.ExtensionArray._from_factorized
3838
api.extensions.ExtensionArray._from_sequence
3939
api.extensions.ExtensionArray._from_sequence_of_strings
40-
api.extensions.ExtensionArray._ndarray_values
4140
api.extensions.ExtensionArray._reduce
4241
api.extensions.ExtensionArray._values_for_argsort
4342
api.extensions.ExtensionArray._values_for_factorize

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,7 @@ Indexing
303303
- Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` when indexing with an integer key on a object-dtype :class:`Index` that is not all-integers (:issue:`31905`)
304304
- Bug in :meth:`DataFrame.iloc.__setitem__` on a :class:`DataFrame` with duplicate columns incorrectly setting values for all matching columns (:issue:`15686`, :issue:`22036`)
305305
- Bug in :meth:`DataFrame.loc:` and :meth:`Series.loc` with a :class:`DatetimeIndex`, :class:`TimedeltaIndex`, or :class:`PeriodIndex` incorrectly allowing lookups of non-matching datetime-like dtypes (:issue:`32650`)
306+
- Bug in :meth:`Series.__getitem__` indexing with non-standard scalars, e.g. ``np.dtype`` (:issue:`32684`)
306307

307308
Missing
308309
^^^^^^^

pandas/_libs/groupby.pyx

+12-4
Original file line numberDiff line numberDiff line change
@@ -869,7 +869,9 @@ def group_last(rank_t[:, :] out,
869869

870870
assert min_count == -1, "'min_count' only used in add and prod"
871871

872-
if not len(values) == len(labels):
872+
# TODO(cython 3.0):
873+
# Instead of `labels.shape[0]` use `len(labels)`
874+
if not len(values) == labels.shape[0]:
873875
raise AssertionError("len(index) != len(labels)")
874876

875877
nobs = np.zeros((<object>out).shape, dtype=np.int64)
@@ -960,7 +962,9 @@ def group_nth(rank_t[:, :] out,
960962

961963
assert min_count == -1, "'min_count' only used in add and prod"
962964

963-
if not len(values) == len(labels):
965+
# TODO(cython 3.0):
966+
# Instead of `labels.shape[0]` use `len(labels)`
967+
if not len(values) == labels.shape[0]:
964968
raise AssertionError("len(index) != len(labels)")
965969

966970
nobs = np.zeros((<object>out).shape, dtype=np.int64)
@@ -1254,7 +1258,9 @@ def group_max(groupby_t[:, :] out,
12541258

12551259
assert min_count == -1, "'min_count' only used in add and prod"
12561260

1257-
if not len(values) == len(labels):
1261+
# TODO(cython 3.0):
1262+
# Instead of `labels.shape[0]` use `len(labels)`
1263+
if not len(values) == labels.shape[0]:
12581264
raise AssertionError("len(index) != len(labels)")
12591265

12601266
nobs = np.zeros((<object>out).shape, dtype=np.int64)
@@ -1327,7 +1333,9 @@ def group_min(groupby_t[:, :] out,
13271333

13281334
assert min_count == -1, "'min_count' only used in add and prod"
13291335

1330-
if not len(values) == len(labels):
1336+
# TODO(cython 3.0):
1337+
# Instead of `labels.shape[0]` use `len(labels)`
1338+
if not len(values) == labels.shape[0]:
13311339
raise AssertionError("len(index) != len(labels)")
13321340

13331341
nobs = np.zeros((<object>out).shape, dtype=np.int64)

pandas/_libs/tslibs/strptime.pyx

+11-11
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import time
44
import locale
55
import calendar
66
import re
7-
from datetime import date as datetime_date
7+
import datetime
88

99
from _thread import allocate_lock as _thread_allocate_lock
1010

@@ -288,20 +288,20 @@ def array_strptime(object[:] values, object fmt, bint exact=True, errors='raise'
288288
elif iso_year != -1 and iso_week != -1:
289289
year, julian = _calc_julian_from_V(iso_year, iso_week,
290290
weekday + 1)
291-
# Cannot pre-calculate datetime_date() since can change in Julian
291+
# Cannot pre-calculate datetime.date() since can change in Julian
292292
# calculation and thus could have different value for the day of the wk
293293
# calculation.
294294
try:
295295
if julian == -1:
296296
# Need to add 1 to result since first day of the year is 1, not
297297
# 0.
298-
ordinal = datetime_date(year, month, day).toordinal()
299-
julian = ordinal - datetime_date(year, 1, 1).toordinal() + 1
298+
ordinal = datetime.date(year, month, day).toordinal()
299+
julian = ordinal - datetime.date(year, 1, 1).toordinal() + 1
300300
else:
301301
# Assume that if they bothered to include Julian day it will
302302
# be accurate.
303-
datetime_result = datetime_date.fromordinal(
304-
(julian - 1) + datetime_date(year, 1, 1).toordinal())
303+
datetime_result = datetime.date.fromordinal(
304+
(julian - 1) + datetime.date(year, 1, 1).toordinal())
305305
year = datetime_result.year
306306
month = datetime_result.month
307307
day = datetime_result.day
@@ -311,7 +311,7 @@ def array_strptime(object[:] values, object fmt, bint exact=True, errors='raise'
311311
continue
312312
raise
313313
if weekday == -1:
314-
weekday = datetime_date(year, month, day).weekday()
314+
weekday = datetime.date(year, month, day).weekday()
315315

316316
dts.year = year
317317
dts.month = month
@@ -649,7 +649,7 @@ cdef int _calc_julian_from_U_or_W(int year, int week_of_year,
649649
cdef:
650650
int first_weekday, week_0_length, days_to_week
651651

652-
first_weekday = datetime_date(year, 1, 1).weekday()
652+
first_weekday = datetime.date(year, 1, 1).weekday()
653653
# If we are dealing with the %U directive (week starts on Sunday), it's
654654
# easier to just shift the view to Sunday being the first day of the
655655
# week.
@@ -692,14 +692,14 @@ cdef (int, int) _calc_julian_from_V(int iso_year, int iso_week, int iso_weekday)
692692
cdef:
693693
int correction, ordinal
694694

695-
correction = datetime_date(iso_year, 1, 4).isoweekday() + 3
695+
correction = datetime.date(iso_year, 1, 4).isoweekday() + 3
696696
ordinal = (iso_week * 7) + iso_weekday - correction
697697
# ordinal may be negative or 0 now, which means the date is in the previous
698698
# calendar year
699699
if ordinal < 1:
700-
ordinal += datetime_date(iso_year, 1, 1).toordinal()
700+
ordinal += datetime.date(iso_year, 1, 1).toordinal()
701701
iso_year -= 1
702-
ordinal -= datetime_date(iso_year, 1, 1).toordinal()
702+
ordinal -= datetime.date(iso_year, 1, 1).toordinal()
703703
return iso_year, ordinal
704704

705705

pandas/core/array_algos/__init__.py

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
"""
2+
core.array_algos is for algorithms that operate on ndarray and ExtensionArray.
3+
These should:
4+
5+
- Assume that any Index, Series, or DataFrame objects have already been unwrapped.
6+
- Assume that any list arguments have already been cast to ndarray/EA.
7+
- Not depend on Index, Series, or DataFrame, nor import any of these.
8+
- May dispatch to ExtensionArray methods, but should not import from core.arrays.
9+
"""

pandas/core/array_algos/transforms.py

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
"""
2+
transforms.py is for shape-preserving functions.
3+
"""
4+
5+
import numpy as np
6+
7+
from pandas.core.dtypes.common import ensure_platform_int
8+
9+
10+
def shift(values: np.ndarray, periods: int, axis: int, fill_value) -> np.ndarray:
11+
new_values = values
12+
13+
# make sure array sent to np.roll is c_contiguous
14+
f_ordered = values.flags.f_contiguous
15+
if f_ordered:
16+
new_values = new_values.T
17+
axis = new_values.ndim - axis - 1
18+
19+
if np.prod(new_values.shape):
20+
new_values = np.roll(new_values, ensure_platform_int(periods), axis=axis)
21+
22+
axis_indexer = [slice(None)] * values.ndim
23+
if periods > 0:
24+
axis_indexer[axis] = slice(None, periods)
25+
else:
26+
axis_indexer[axis] = slice(periods, None)
27+
new_values[tuple(axis_indexer)] = fill_value
28+
29+
# restore original order
30+
if f_ordered:
31+
new_values = new_values.T
32+
33+
return new_values

pandas/core/arrays/base.py

-17
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,6 @@ class ExtensionArray:
9393
_from_factorized
9494
_from_sequence
9595
_from_sequence_of_strings
96-
_ndarray_values
9796
_reduce
9897
_values_for_argsort
9998
_values_for_factorize
@@ -1046,22 +1045,6 @@ def _concat_same_type(
10461045
# of objects
10471046
_can_hold_na = True
10481047

1049-
@property
1050-
def _ndarray_values(self) -> np.ndarray:
1051-
"""
1052-
Internal pandas method for lossy conversion to a NumPy ndarray.
1053-
1054-
This method is not part of the pandas interface.
1055-
1056-
The expectation is that this is cheap to compute, and is primarily
1057-
used for interacting with our indexers.
1058-
1059-
Returns
1060-
-------
1061-
array : ndarray
1062-
"""
1063-
return np.array(self)
1064-
10651048
def _reduce(self, name, skipna=True, **kwargs):
10661049
"""
10671050
Return a scalar result of performing the reduction operation.

pandas/core/arrays/categorical.py

+2-11
Original file line numberDiff line numberDiff line change
@@ -451,10 +451,6 @@ def dtype(self) -> CategoricalDtype:
451451
"""
452452
return self._dtype
453453

454-
@property
455-
def _ndarray_values(self) -> np.ndarray:
456-
return self.codes
457-
458454
@property
459455
def _constructor(self) -> Type["Categorical"]:
460456
return Categorical
@@ -2567,12 +2563,7 @@ def _get_codes_for_values(values, categories):
25672563
"""
25682564
dtype_equal = is_dtype_equal(values.dtype, categories.dtype)
25692565

2570-
if dtype_equal:
2571-
# To prevent erroneous dtype coercion in _get_data_algo, retrieve
2572-
# the underlying numpy array. gh-22702
2573-
values = getattr(values, "_ndarray_values", values)
2574-
categories = getattr(categories, "_ndarray_values", categories)
2575-
elif is_extension_array_dtype(categories.dtype) and is_object_dtype(values):
2566+
if is_extension_array_dtype(categories.dtype) and is_object_dtype(values):
25762567
# Support inferring the correct extension dtype from an array of
25772568
# scalar objects. e.g.
25782569
# Categorical(array[Period, Period], categories=PeriodIndex(...))
@@ -2582,7 +2573,7 @@ def _get_codes_for_values(values, categories):
25822573
# exception raised in _from_sequence
25832574
values = ensure_object(values)
25842575
categories = ensure_object(categories)
2585-
else:
2576+
elif not dtype_equal:
25862577
values = ensure_object(values)
25872578
categories = ensure_object(categories)
25882579

pandas/core/arrays/datetimelike.py

+2-24
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040

4141
from pandas.core import missing, nanops, ops
4242
from pandas.core.algorithms import checked_add_with_arr, take, unique1d, value_counts
43+
from pandas.core.array_algos.transforms import shift
4344
from pandas.core.arrays.base import ExtensionArray, ExtensionOpsMixin
4445
import pandas.core.common as com
4546
from pandas.core.construction import array, extract_array
@@ -455,10 +456,6 @@ def asi8(self) -> np.ndarray:
455456
# do not cache or you'll create a memory leak
456457
return self._data.view("i8")
457458

458-
@property
459-
def _ndarray_values(self):
460-
return self._data
461-
462459
# ----------------------------------------------------------------
463460
# Rendering Methods
464461

@@ -773,26 +770,7 @@ def shift(self, periods=1, fill_value=None, axis=0):
773770

774771
fill_value = self._unbox_scalar(fill_value)
775772

776-
new_values = self._data
777-
778-
# make sure array sent to np.roll is c_contiguous
779-
f_ordered = new_values.flags.f_contiguous
780-
if f_ordered:
781-
new_values = new_values.T
782-
axis = new_values.ndim - axis - 1
783-
784-
new_values = np.roll(new_values, periods, axis=axis)
785-
786-
axis_indexer = [slice(None)] * self.ndim
787-
if periods > 0:
788-
axis_indexer[axis] = slice(None, periods)
789-
else:
790-
axis_indexer[axis] = slice(periods, None)
791-
new_values[tuple(axis_indexer)] = fill_value
792-
793-
# restore original order
794-
if f_ordered:
795-
new_values = new_values.T
773+
new_values = shift(self._data, periods, axis, fill_value)
796774

797775
return type(self)._simple_new(new_values, dtype=self.dtype)
798776

pandas/core/arrays/integer.py

-12
Original file line numberDiff line numberDiff line change
@@ -478,18 +478,6 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike:
478478
data = self.to_numpy(dtype=dtype, **kwargs)
479479
return astype_nansafe(data, dtype, copy=False)
480480

481-
@property
482-
def _ndarray_values(self) -> np.ndarray:
483-
"""
484-
Internal pandas method for lossy conversion to a NumPy ndarray.
485-
486-
This method is not part of the pandas interface.
487-
488-
The expectation is that this is cheap to compute, and is primarily
489-
used for interacting with our indexers.
490-
"""
491-
return self._data
492-
493481
def _values_for_factorize(self) -> Tuple[np.ndarray, float]:
494482
# TODO: https://github.com/pandas-dev/pandas/issues/30037
495483
# use masked algorithms, rather than object-dtype / np.nan.

pandas/core/base.py

-17
Original file line numberDiff line numberDiff line change
@@ -855,23 +855,6 @@ def to_numpy(self, dtype=None, copy=False, na_value=lib.no_default, **kwargs):
855855
result[self.isna()] = na_value
856856
return result
857857

858-
@property
859-
def _ndarray_values(self) -> np.ndarray:
860-
"""
861-
The data as an ndarray, possibly losing information.
862-
863-
The expectation is that this is cheap to compute, and is primarily
864-
used for interacting with our indexers.
865-
866-
- categorical -> codes
867-
"""
868-
if is_extension_array_dtype(self):
869-
return self.array._ndarray_values
870-
# As a mixin, we depend on the mixing class having values.
871-
# Special mixin syntax may be developed in the future:
872-
# https://github.com/python/typing/issues/246
873-
return self.values # type: ignore
874-
875858
@property
876859
def empty(self):
877860
return not self.size

0 commit comments

Comments
 (0)