Skip to content

Commit 6cb81b7

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into no-values
2 parents f3a4d38 + 402c5cd commit 6cb81b7

File tree

17 files changed

+203
-119
lines changed

17 files changed

+203
-119
lines changed

doc/source/whatsnew/v1.0.3.rst

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ including other versions of pandas.
1515

1616
Fixed regressions
1717
~~~~~~~~~~~~~~~~~
18+
- Fixed regression in ``resample.agg`` when the underlying data is non-writeable (:issue:`31710`)
1819

1920
.. _whatsnew_103.bug_fixes:
2021

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,7 @@ Indexing
301301
- Bug in :meth:`DataFrame.iat` incorrectly returning ``Timestamp`` instead of ``datetime`` in some object-dtype cases (:issue:`32809`)
302302
- Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` when indexing with an integer key on a object-dtype :class:`Index` that is not all-integers (:issue:`31905`)
303303
- Bug in :meth:`DataFrame.iloc.__setitem__` on a :class:`DataFrame` with duplicate columns incorrectly setting values for all matching columns (:issue:`15686`, :issue:`22036`)
304+
- Bug in :meth:`DataFrame.loc:` and :meth:`Series.loc` with a :class:`DatetimeIndex`, :class:`TimedeltaIndex`, or :class:`PeriodIndex` incorrectly allowing lookups of non-matching datetime-like dtypes (:issue:`32650`)
304305

305306
Missing
306307
^^^^^^^

pandas/_libs/groupby.pyx

+8-4
Original file line numberDiff line numberDiff line change
@@ -848,11 +848,13 @@ cdef inline bint _treat_as_na(rank_t val, bint is_datetimelike) nogil:
848848
return val != val
849849

850850

851+
# GH#31710 use memorviews once cython 0.30 is released so we can
852+
# use `const rank_t[:, :] values`
851853
@cython.wraparound(False)
852854
@cython.boundscheck(False)
853855
def group_last(rank_t[:, :] out,
854856
int64_t[:] counts,
855-
rank_t[:, :] values,
857+
ndarray[rank_t, ndim=2] values,
856858
const int64_t[:] labels,
857859
Py_ssize_t min_count=-1):
858860
"""
@@ -937,11 +939,13 @@ def group_last(rank_t[:, :] out,
937939
raise RuntimeError("empty group with uint64_t")
938940

939941

942+
# GH#31710 use memorviews once cython 0.30 is released so we can
943+
# use `const rank_t[:, :] values`
940944
@cython.wraparound(False)
941945
@cython.boundscheck(False)
942946
def group_nth(rank_t[:, :] out,
943947
int64_t[:] counts,
944-
rank_t[:, :] values,
948+
ndarray[rank_t, ndim=2] values,
945949
const int64_t[:] labels, int64_t rank=1,
946950
Py_ssize_t min_count=-1):
947951
"""
@@ -1235,7 +1239,7 @@ ctypedef fused groupby_t:
12351239
@cython.boundscheck(False)
12361240
def group_max(groupby_t[:, :] out,
12371241
int64_t[:] counts,
1238-
groupby_t[:, :] values,
1242+
ndarray[groupby_t, ndim=2] values,
12391243
const int64_t[:] labels,
12401244
Py_ssize_t min_count=-1):
12411245
"""
@@ -1308,7 +1312,7 @@ def group_max(groupby_t[:, :] out,
13081312
@cython.boundscheck(False)
13091313
def group_min(groupby_t[:, :] out,
13101314
int64_t[:] counts,
1311-
groupby_t[:, :] values,
1315+
ndarray[groupby_t, ndim=2] values,
13121316
const int64_t[:] labels,
13131317
Py_ssize_t min_count=-1):
13141318
"""

pandas/_libs/src/ujson/python/objToJSON.c

+8-2
Original file line numberDiff line numberDiff line change
@@ -222,13 +222,19 @@ static PyObject *get_values(PyObject *obj) {
222222

223223
PRINTMARK();
224224

225-
if (PyObject_HasAttrString(obj, "_internal_get_values")) {
225+
if (PyObject_TypeCheck(obj, cls_index) || PyObject_TypeCheck(obj, cls_series)) {
226+
// The special cases to worry about are dt64tz and category[dt64tz].
227+
// In both cases we want the UTC-localized datetime64 ndarray,
228+
// without going through and object array of Timestamps.
226229
PRINTMARK();
227-
values = PyObject_CallMethod(obj, "_internal_get_values", NULL);
230+
values = PyObject_GetAttrString(obj, "values");
228231

229232
if (values == NULL) {
230233
// Clear so we can subsequently try another method
231234
PyErr_Clear();
235+
} else if (PyObject_HasAttrString(values, "__array__")) {
236+
// We may have gotten a Categorical or Sparse array so call np.array
237+
values = PyObject_CallMethod(values, "__array__", NULL);
232238
} else if (!PyArray_CheckExact(values)) {
233239
// Didn't get a numpy array, so keep trying
234240
PRINTMARK();

pandas/core/generic.py

-20
Original file line numberDiff line numberDiff line change
@@ -5382,26 +5382,6 @@ def _values(self) -> np.ndarray:
53825382
"""internal implementation"""
53835383
return self.values
53845384

5385-
def _internal_get_values(self) -> np.ndarray:
5386-
"""
5387-
Return an ndarray after converting sparse values to dense.
5388-
5389-
This is the same as ``.values`` for non-sparse data. For sparse
5390-
data contained in a `SparseArray`, the data are first
5391-
converted to a dense representation.
5392-
5393-
Returns
5394-
-------
5395-
numpy.ndarray
5396-
Numpy representation of DataFrame.
5397-
5398-
See Also
5399-
--------
5400-
values : Numpy representation of DataFrame.
5401-
SparseArray : Container for sparse data.
5402-
"""
5403-
return self.values
5404-
54055385
@property
54065386
def dtypes(self):
54075387
"""

pandas/core/indexes/base.py

+4-48
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
ensure_platform_int,
3030
is_bool,
3131
is_bool_dtype,
32-
is_categorical,
3332
is_categorical_dtype,
3433
is_datetime64_any_dtype,
3534
is_dtype_equal,
@@ -532,6 +531,9 @@ def _shallow_copy_with_infer(self, values, **kwargs):
532531
return self._constructor(values, **attributes)
533532
except (TypeError, ValueError):
534533
pass
534+
535+
# Remove tz so Index will try non-DatetimeIndex inference
536+
attributes.pop("tz", None)
535537
return Index(values, **attributes)
536538

537539
def _update_inplace(self, result, **kwargs):
@@ -3870,50 +3872,6 @@ def _values(self) -> Union[ExtensionArray, np.ndarray]:
38703872
"""
38713873
return self._data
38723874

3873-
def _internal_get_values(self) -> np.ndarray:
3874-
"""
3875-
Return `Index` data as an `numpy.ndarray`.
3876-
3877-
Returns
3878-
-------
3879-
numpy.ndarray
3880-
A one-dimensional numpy array of the `Index` values.
3881-
3882-
See Also
3883-
--------
3884-
Index.values : The attribute that _internal_get_values wraps.
3885-
3886-
Examples
3887-
--------
3888-
Getting the `Index` values of a `DataFrame`:
3889-
3890-
>>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
3891-
... index=['a', 'b', 'c'], columns=['A', 'B', 'C'])
3892-
>>> df
3893-
A B C
3894-
a 1 2 3
3895-
b 4 5 6
3896-
c 7 8 9
3897-
>>> df.index._internal_get_values()
3898-
array(['a', 'b', 'c'], dtype=object)
3899-
3900-
Standalone `Index` values:
3901-
3902-
>>> idx = pd.Index(['1', '2', '3'])
3903-
>>> idx._internal_get_values()
3904-
array(['1', '2', '3'], dtype=object)
3905-
3906-
`MultiIndex` arrays also have only one dimension:
3907-
3908-
>>> midx = pd.MultiIndex.from_arrays([[1, 2, 3], ['a', 'b', 'c']],
3909-
... names=('number', 'letter'))
3910-
>>> midx._internal_get_values()
3911-
array([(1, 'a'), (2, 'b'), (3, 'c')], dtype=object)
3912-
>>> midx._internal_get_values().ndim
3913-
1
3914-
"""
3915-
return self.values
3916-
39173875
def _get_engine_target(self) -> np.ndarray:
39183876
"""
39193877
Get the ndarray that we can pass to the IndexEngine constructor.
@@ -4657,10 +4615,8 @@ def get_indexer_non_unique(self, target):
46574615
if pself is not self or ptarget is not target:
46584616
return pself.get_indexer_non_unique(ptarget)
46594617

4660-
if is_categorical(target):
4618+
if is_categorical_dtype(target.dtype):
46614619
tgt_values = np.asarray(target)
4662-
elif self.is_all_dates and target.is_all_dates: # GH 30399
4663-
tgt_values = target.asi8
46644620
else:
46654621
tgt_values = target._get_engine_target()
46664622

pandas/core/indexes/datetimelike.py

+24-2
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,14 @@
88

99
from pandas._libs import NaT, iNaT, join as libjoin, lib
1010
from pandas._libs.tslibs import timezones
11-
from pandas._typing import Label
11+
from pandas._typing import DtypeObj, Label
1212
from pandas.compat.numpy import function as nv
1313
from pandas.errors import AbstractMethodError
1414
from pandas.util._decorators import Appender, cache_readonly, doc
1515

1616
from pandas.core.dtypes.common import (
1717
ensure_int64,
18+
ensure_platform_int,
1819
is_bool_dtype,
1920
is_categorical_dtype,
2021
is_dtype_equal,
@@ -32,7 +33,7 @@
3233
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
3334
from pandas.core.base import IndexOpsMixin
3435
import pandas.core.indexes.base as ibase
35-
from pandas.core.indexes.base import Index, _index_shared_docs
36+
from pandas.core.indexes.base import Index, _index_shared_docs, ensure_index
3637
from pandas.core.indexes.extension import (
3738
ExtensionIndex,
3839
inherit_names,
@@ -101,6 +102,12 @@ class DatetimeIndexOpsMixin(ExtensionIndex):
101102
def is_all_dates(self) -> bool:
102103
return True
103104

105+
def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
106+
"""
107+
Can we compare values of the given dtype to our own?
108+
"""
109+
raise AbstractMethodError(self)
110+
104111
# ------------------------------------------------------------------------
105112
# Abstract data attributes
106113

@@ -426,6 +433,21 @@ def _partial_date_slice(
426433
# try to find the dates
427434
return (lhs_mask & rhs_mask).nonzero()[0]
428435

436+
@Appender(Index.get_indexer_non_unique.__doc__)
437+
def get_indexer_non_unique(self, target):
438+
target = ensure_index(target)
439+
pself, ptarget = self._maybe_promote(target)
440+
if pself is not self or ptarget is not target:
441+
return pself.get_indexer_non_unique(ptarget)
442+
443+
if not self._is_comparable_dtype(target.dtype):
444+
no_matches = -1 * np.ones(self.shape, dtype=np.intp)
445+
return no_matches, no_matches
446+
447+
tgt_values = target.asi8
448+
indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
449+
return ensure_platform_int(indexer), missing
450+
429451
# --------------------------------------------------------------------
430452

431453
__add__ = make_wrapped_arith_op("__add__")

pandas/core/indexes/datetimes.py

+22-2
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,18 @@
77

88
from pandas._libs import NaT, Period, Timestamp, index as libindex, lib, tslib as libts
99
from pandas._libs.tslibs import fields, parsing, timezones
10-
from pandas._typing import Label
10+
from pandas._typing import DtypeObj, Label
1111
from pandas.util._decorators import cache_readonly
1212

13-
from pandas.core.dtypes.common import _NS_DTYPE, is_float, is_integer, is_scalar
13+
from pandas.core.dtypes.common import (
14+
_NS_DTYPE,
15+
is_datetime64_any_dtype,
16+
is_datetime64_dtype,
17+
is_datetime64tz_dtype,
18+
is_float,
19+
is_integer,
20+
is_scalar,
21+
)
1422
from pandas.core.dtypes.missing import is_valid_nat_for_dtype
1523

1624
from pandas.core.arrays.datetimes import DatetimeArray, tz_to_dtype
@@ -298,6 +306,18 @@ def _convert_for_op(self, value):
298306
return Timestamp(value).asm8
299307
raise ValueError("Passed item and index have different timezone")
300308

309+
def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
310+
"""
311+
Can we compare values of the given dtype to our own?
312+
"""
313+
if not is_datetime64_any_dtype(dtype):
314+
return False
315+
if self.tz is not None:
316+
# If we have tz, we can compare to tzaware
317+
return is_datetime64tz_dtype(dtype)
318+
# if we dont have tz, we can only compare to tznaive
319+
return is_datetime64_dtype(dtype)
320+
301321
# --------------------------------------------------------------------
302322
# Rendering Methods
303323

pandas/core/indexes/multi.py

+18-4
Original file line numberDiff line numberDiff line change
@@ -3243,9 +3243,13 @@ def union(self, other, sort=None):
32433243

32443244
# TODO: Index.union returns other when `len(self)` is 0.
32453245

3246-
uniq_tuples = lib.fast_unique_multiple(
3247-
[self._values, other._ndarray_values], sort=sort
3248-
)
3246+
if not is_object_dtype(other.dtype):
3247+
raise NotImplementedError(
3248+
"Can only union MultiIndex with MultiIndex or Index of tuples, "
3249+
"try mi.to_flat_index().union(other) instead."
3250+
)
3251+
3252+
uniq_tuples = lib.fast_unique_multiple([self._values, other._values], sort=sort)
32493253

32503254
return MultiIndex.from_arrays(
32513255
zip(*uniq_tuples), sortorder=0, names=result_names
@@ -3279,8 +3283,18 @@ def intersection(self, other, sort=False):
32793283
if self.equals(other):
32803284
return self
32813285

3286+
if not is_object_dtype(other.dtype):
3287+
# The intersection is empty
3288+
# TODO: we have no tests that get here
3289+
return MultiIndex(
3290+
levels=self.levels,
3291+
codes=[[]] * self.nlevels,
3292+
names=result_names,
3293+
verify_integrity=False,
3294+
)
3295+
32823296
lvals = self._values
3283-
rvals = other._ndarray_values
3297+
rvals = other._values
32843298

32853299
uniq_tuples = None # flag whether _inner_indexer was succesful
32863300
if self.is_monotonic and other.is_monotonic:

pandas/core/indexes/period.py

+14-6
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from pandas._libs.tslibs import frequencies as libfrequencies, resolution
1010
from pandas._libs.tslibs.parsing import parse_time_string
1111
from pandas._libs.tslibs.period import Period
12-
from pandas._typing import Label
12+
from pandas._typing import DtypeObj, Label
1313
from pandas.util._decorators import Appender, cache_readonly
1414

1515
from pandas.core.dtypes.common import (
@@ -23,6 +23,7 @@
2323
is_scalar,
2424
pandas_dtype,
2525
)
26+
from pandas.core.dtypes.dtypes import PeriodDtype
2627

2728
from pandas.core.arrays.period import (
2829
PeriodArray,
@@ -298,6 +299,14 @@ def _maybe_convert_timedelta(self, other):
298299
# raise when input doesn't have freq
299300
raise raise_on_incompatible(self, None)
300301

302+
def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
303+
"""
304+
Can we compare values of the given dtype to our own?
305+
"""
306+
if not isinstance(dtype, PeriodDtype):
307+
return False
308+
return dtype.freq == self.freq
309+
301310
# ------------------------------------------------------------------------
302311
# Rendering Methods
303312

@@ -454,12 +463,11 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
454463
def get_indexer_non_unique(self, target):
455464
target = ensure_index(target)
456465

457-
if isinstance(target, PeriodIndex):
458-
if target.freq != self.freq:
459-
no_matches = -1 * np.ones(self.shape, dtype=np.intp)
460-
return no_matches, no_matches
466+
if not self._is_comparable_dtype(target.dtype):
467+
no_matches = -1 * np.ones(self.shape, dtype=np.intp)
468+
return no_matches, no_matches
461469

462-
target = target.asi8
470+
target = target.asi8
463471

464472
indexer, missing = self._int64index.get_indexer_non_unique(target)
465473
return ensure_platform_int(indexer), missing

0 commit comments

Comments
 (0)