Skip to content

Commit bb13ec4

Browse files
committed
Merge remote-tracking branch 'upstream/main' into ci/align_azure
2 parents feab7ee + 7102f81 commit bb13ec4

File tree

12 files changed

+125
-72
lines changed

12 files changed

+125
-72
lines changed

doc/source/whatsnew/v1.5.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ Indexing
266266
- Bug in :meth:`DataFrame.iloc` where indexing a single row on a :class:`DataFrame` with a single ExtensionDtype column gave a copy instead of a view on the underlying data (:issue:`45241`)
267267
- Bug in setting a NA value (``None`` or ``np.nan``) into a :class:`Series` with int-based :class:`IntervalDtype` incorrectly casting to object dtype instead of a float-based :class:`IntervalDtype` (:issue:`45568`)
268268
- Bug in :meth:`Series.__setitem__` with a non-integer :class:`Index` when using an integer key to set a value that cannot be set inplace where a ``ValueError`` was raised insead of casting to a common dtype (:issue:`45070`)
269+
- Bug in :meth:`Series.__setitem__` when setting incompatible values into a ``PeriodDtype`` or ``IntervalDtype`` :class:`Series` raising when indexing with a boolean mask but coercing when indexing with otherwise-equivalent indexers; these now consistently coerce, along with :meth:`Series.mask` and :meth:`Series.where` (:issue:`45768`)
269270
- Bug in :meth:`Series.loc.__setitem__` and :meth:`Series.loc.__getitem__` not raising when using multiple keys without using a :class:`MultiIndex` (:issue:`13831`)
270271
- Bug when setting a value too large for a :class:`Series` dtype failing to coerce to a common type (:issue:`26049`, :issue:`32878`)
271272
- Bug in :meth:`loc.__setitem__` treating ``range`` keys as positional instead of label-based (:issue:`45479`)

pandas/core/indexes/base.py

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -391,7 +391,6 @@ def _outer_indexer(
391391
_comparables: list[str] = ["name"]
392392
_attributes: list[str] = ["name"]
393393
_is_numeric_dtype: bool = False
394-
_can_hold_na: bool = True
395394
_can_hold_strings: bool = True
396395

397396
# Whether this index is a NumericIndex, but not a Int64Index, Float64Index,
@@ -2206,6 +2205,20 @@ def _get_grouper_for_level(self, mapper, *, level=None):
22062205
# --------------------------------------------------------------------
22072206
# Introspection Methods
22082207

2208+
@cache_readonly
2209+
@final
2210+
def _can_hold_na(self) -> bool:
2211+
if isinstance(self.dtype, ExtensionDtype):
2212+
if isinstance(self.dtype, IntervalDtype):
2213+
# FIXME(GH#45720): this is inaccurate for integer-backed
2214+
# IntervalArray, but without it other.categories.take raises
2215+
# in IntervalArray._cmp_method
2216+
return True
2217+
return self.dtype._can_hold_na
2218+
if self.dtype.kind in ["i", "u", "b"]:
2219+
return False
2220+
return True
2221+
22092222
@final
22102223
@property
22112224
def is_monotonic(self) -> bool:
@@ -2662,10 +2675,21 @@ def inferred_type(self) -> str_t:
26622675
return lib.infer_dtype(self._values, skipna=False)
26632676

26642677
@cache_readonly
2678+
@final
26652679
def _is_all_dates(self) -> bool:
26662680
"""
26672681
Whether or not the index values only consist of dates.
26682682
"""
2683+
2684+
if needs_i8_conversion(self.dtype):
2685+
return True
2686+
elif self.dtype != _dtype_obj:
2687+
# TODO(ExtensionIndex): 3rd party EA might override?
2688+
# Note: this includes IntervalIndex, even when the left/right
2689+
# contain datetime-like objects.
2690+
return False
2691+
elif self._is_multi:
2692+
return False
26692693
return is_datetime_array(ensure_object(self._values))
26702694

26712695
@cache_readonly
@@ -6159,6 +6183,10 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
61596183
"""
61606184
Can we compare values of the given dtype to our own?
61616185
"""
6186+
if self.dtype.kind == "b":
6187+
return dtype.kind == "b"
6188+
elif is_numeric_dtype(self.dtype):
6189+
return is_numeric_dtype(dtype)
61626190
return True
61636191

61646192
@final

pandas/core/indexes/datetimelike.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,6 @@ class DatetimeIndexOpsMixin(NDArrayBackedExtensionIndex):
100100
),
101101
)
102102

103-
@property
104-
def _is_all_dates(self) -> bool:
105-
return True
106-
107103
# ------------------------------------------------------------------------
108104

109105
def equals(self, other: Any) -> bool:
@@ -151,8 +147,6 @@ def __contains__(self, key: Any) -> bool:
151147
return False
152148
return True
153149

154-
_can_hold_na = True
155-
156150
def _convert_tolerance(self, tolerance, target):
157151
tolerance = np.asarray(to_timedelta(tolerance).to_numpy())
158152
return super()._convert_tolerance(tolerance, target)

pandas/core/indexes/interval.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -897,14 +897,6 @@ def _intersection_non_unique(self, other: IntervalIndex) -> IntervalIndex:
897897

898898
# --------------------------------------------------------------------
899899

900-
@property
901-
def _is_all_dates(self) -> bool:
902-
"""
903-
This is False even when left/right contain datetime-like objects,
904-
as the check is done on the Interval itself
905-
"""
906-
return False
907-
908900
def _get_engine_target(self) -> np.ndarray:
909901
# Note: we _could_ use libjoin functions by either casting to object
910902
# dtype or constructing tuples (faster than constructing Intervals)

pandas/core/indexes/multi.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1841,10 +1841,6 @@ def to_flat_index(self) -> Index:
18411841
"""
18421842
return Index(self._values, tupleize_cols=False)
18431843

1844-
@property
1845-
def _is_all_dates(self) -> bool:
1846-
return False
1847-
18481844
def is_lexsorted(self) -> bool:
18491845
warnings.warn(
18501846
"MultiIndex.is_lexsorted is deprecated as a public function, "

pandas/core/indexes/numeric.py

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
)
1515
from pandas._typing import (
1616
Dtype,
17-
DtypeObj,
1817
npt,
1918
)
2019
from pandas.util._decorators import (
@@ -91,14 +90,6 @@ class NumericIndex(Index):
9190
_can_hold_strings = False
9291
_is_backward_compat_public_numeric_index: bool = True
9392

94-
# error: Signature of "_can_hold_na" incompatible with supertype "Index"
95-
@cache_readonly
96-
def _can_hold_na(self) -> bool: # type: ignore[override]
97-
if is_float_dtype(self.dtype):
98-
return True
99-
else:
100-
return False
101-
10293
_engine_types: dict[np.dtype, type[libindex.IndexEngine]] = {
10394
np.dtype(np.int8): libindex.Int8Engine,
10495
np.dtype(np.int16): libindex.Int16Engine,
@@ -268,10 +259,6 @@ def _convert_tolerance(self, tolerance, target):
268259
)
269260
return tolerance
270261

271-
def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
272-
# If we ever have BoolIndex or ComplexIndex, this may need to be tightened
273-
return is_numeric_dtype(dtype)
274-
275262
@classmethod
276263
def _assert_safe_casting(cls, data: np.ndarray, subarr: np.ndarray) -> None:
277264
"""
@@ -284,13 +271,6 @@ def _assert_safe_casting(cls, data: np.ndarray, subarr: np.ndarray) -> None:
284271
if not np.array_equal(data, subarr):
285272
raise TypeError("Unsafe NumPy casting, you must explicitly cast")
286273

287-
@property
288-
def _is_all_dates(self) -> bool:
289-
"""
290-
Checks that all the labels are datetime objects.
291-
"""
292-
return False
293-
294274
def _format_native_types(
295275
self, *, na_rep="", float_format=None, decimal=".", quoting=None, **kwargs
296276
):

pandas/core/internals/blocks.py

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1376,6 +1376,8 @@ def where(self, other, cond) -> list[Block]:
13761376

13771377
cond = extract_bool_array(cond)
13781378

1379+
orig_other = other
1380+
orig_cond = cond
13791381
other = self._maybe_squeeze_arg(other)
13801382
cond = self._maybe_squeeze_arg(cond)
13811383

@@ -1395,21 +1397,15 @@ def where(self, other, cond) -> list[Block]:
13951397

13961398
if is_interval_dtype(self.dtype):
13971399
# TestSetitemFloatIntervalWithIntIntervalValues
1398-
blk = self.coerce_to_target_dtype(other)
1399-
if blk.dtype == _dtype_obj:
1400-
# For now at least only support casting e.g.
1401-
# Interval[int64]->Interval[float64]
1402-
raise
1403-
return blk.where(other, cond)
1400+
blk = self.coerce_to_target_dtype(orig_other)
1401+
nbs = blk.where(orig_other, orig_cond)
1402+
return self._maybe_downcast(nbs, "infer")
14041403

14051404
elif isinstance(self, NDArrayBackedExtensionBlock):
14061405
# NB: not (yet) the same as
14071406
# isinstance(values, NDArrayBackedExtensionArray)
1408-
if isinstance(self.dtype, PeriodDtype):
1409-
# TODO: don't special-case
1410-
raise
1411-
blk = self.coerce_to_target_dtype(other)
1412-
nbs = blk.where(other, cond)
1407+
blk = self.coerce_to_target_dtype(orig_other)
1408+
nbs = blk.where(orig_other, orig_cond)
14131409
return self._maybe_downcast(nbs, "infer")
14141410

14151411
else:
@@ -1426,6 +1422,8 @@ def putmask(self, mask, new) -> list[Block]:
14261422

14271423
values = self.values
14281424

1425+
orig_new = new
1426+
orig_mask = mask
14291427
new = self._maybe_squeeze_arg(new)
14301428
mask = self._maybe_squeeze_arg(mask)
14311429

@@ -1438,21 +1436,14 @@ def putmask(self, mask, new) -> list[Block]:
14381436
if is_interval_dtype(self.dtype):
14391437
# Discussion about what we want to support in the general
14401438
# case GH#39584
1441-
blk = self.coerce_to_target_dtype(new)
1442-
if blk.dtype == _dtype_obj:
1443-
# For now at least, only support casting e.g.
1444-
# Interval[int64]->Interval[float64],
1445-
raise
1446-
return blk.putmask(mask, new)
1439+
blk = self.coerce_to_target_dtype(orig_new)
1440+
return blk.putmask(orig_mask, orig_new)
14471441

14481442
elif isinstance(self, NDArrayBackedExtensionBlock):
14491443
# NB: not (yet) the same as
14501444
# isinstance(values, NDArrayBackedExtensionArray)
1451-
if isinstance(self.dtype, PeriodDtype):
1452-
# TODO: don't special-case
1453-
raise
1454-
blk = self.coerce_to_target_dtype(new)
1455-
return blk.putmask(mask, new)
1445+
blk = self.coerce_to_target_dtype(orig_new)
1446+
return blk.putmask(orig_mask, orig_new)
14561447

14571448
else:
14581449
raise

pandas/tests/arrays/interval/test_interval.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,16 @@ def test_set_closed(self, closed, new_closed):
7676
],
7777
)
7878
def test_where_raises(self, other):
79+
# GH#45768 The IntervalArray methods raises; the Series method coerces
7980
ser = pd.Series(IntervalArray.from_breaks([1, 2, 3, 4], closed="left"))
81+
mask = np.array([True, False, True])
8082
match = "'value.closed' is 'right', expected 'left'."
8183
with pytest.raises(ValueError, match=match):
82-
ser.where([True, False, True], other=other)
84+
ser.array._where(mask, other)
85+
86+
res = ser.where(mask, other=other)
87+
expected = ser.astype(object).where(mask, other)
88+
tm.assert_series_equal(res, expected)
8389

8490
def test_shift(self):
8591
# https://github.com/pandas-dev/pandas/issues/31495, GH#22428, GH#31502

pandas/tests/arrays/test_period.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,10 +124,16 @@ def test_sub_period():
124124
[pd.Period("2000", freq="H"), period_array(["2000", "2001", "2000"], freq="H")],
125125
)
126126
def test_where_different_freq_raises(other):
127+
# GH#45768 The PeriodArray method raises, the Series method coerces
127128
ser = pd.Series(period_array(["2000", "2001", "2002"], freq="D"))
128129
cond = np.array([True, False, True])
130+
129131
with pytest.raises(IncompatibleFrequency, match="freq"):
130-
ser.where(cond, other)
132+
ser.array._where(cond, other)
133+
134+
res = ser.where(cond, other)
135+
expected = ser.astype(object).where(cond, other)
136+
tm.assert_series_equal(res, expected)
131137

132138

133139
# ----------------------------------------------------------------------------

pandas/tests/frame/indexing/test_where.py

Lines changed: 44 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -706,6 +706,20 @@ def test_where_interval_noop(self):
706706
res = ser.where(ser.notna())
707707
tm.assert_series_equal(res, ser)
708708

709+
def test_where_interval_fullop_downcast(self, frame_or_series):
710+
# GH#45768
711+
obj = frame_or_series([pd.Interval(0, 0)] * 2)
712+
other = frame_or_series([1.0, 2.0])
713+
res = obj.where(~obj.notna(), other)
714+
715+
# since all entries are being changed, we will downcast result
716+
# from object to ints (not floats)
717+
tm.assert_equal(res, other.astype(np.int64))
718+
719+
# unlike where, Block.putmask does not downcast
720+
obj.mask(obj.notna(), other, inplace=True)
721+
tm.assert_equal(obj, other.astype(object))
722+
709723
@pytest.mark.parametrize(
710724
"dtype",
711725
[
@@ -736,6 +750,16 @@ def test_where_datetimelike_noop(self, dtype):
736750
res4 = df.mask(mask2, "foo")
737751
tm.assert_frame_equal(res4, df)
738752

753+
# opposite case where we are replacing *all* values -> we downcast
754+
# from object dtype # GH#45768
755+
res5 = df.where(mask2, 4)
756+
expected = DataFrame(4, index=df.index, columns=df.columns)
757+
tm.assert_frame_equal(res5, expected)
758+
759+
# unlike where, Block.putmask does not downcast
760+
df.mask(~mask2, 4, inplace=True)
761+
tm.assert_frame_equal(df, expected.astype(object))
762+
739763

740764
def test_where_try_cast_deprecated(frame_or_series):
741765
obj = DataFrame(np.random.randn(4, 3))
@@ -894,14 +918,29 @@ def test_where_period_invalid_na(frame_or_series, as_cat, request):
894918
else:
895919
msg = "value should be a 'Period'"
896920

897-
with pytest.raises(TypeError, match=msg):
898-
obj.where(mask, tdnat)
921+
if as_cat:
922+
with pytest.raises(TypeError, match=msg):
923+
obj.where(mask, tdnat)
899924

900-
with pytest.raises(TypeError, match=msg):
901-
obj.mask(mask, tdnat)
925+
with pytest.raises(TypeError, match=msg):
926+
obj.mask(mask, tdnat)
927+
928+
with pytest.raises(TypeError, match=msg):
929+
obj.mask(mask, tdnat, inplace=True)
930+
931+
else:
932+
# With PeriodDtype, ser[i] = tdnat coerces instead of raising,
933+
# so for consistency, ser[mask] = tdnat must as well
934+
expected = obj.astype(object).where(mask, tdnat)
935+
result = obj.where(mask, tdnat)
936+
tm.assert_equal(result, expected)
937+
938+
expected = obj.astype(object).mask(mask, tdnat)
939+
result = obj.mask(mask, tdnat)
940+
tm.assert_equal(result, expected)
902941

903-
with pytest.raises(TypeError, match=msg):
904942
obj.mask(mask, tdnat, inplace=True)
943+
tm.assert_equal(obj, expected)
905944

906945

907946
def test_where_nullable_invalid_na(frame_or_series, any_numeric_ea_dtype):

pandas/tests/io/parser/test_unsupported.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,10 @@
1212

1313
import pytest
1414

15-
from pandas.compat import is_platform_windows
15+
from pandas.compat import (
16+
is_platform_mac,
17+
is_platform_windows,
18+
)
1619
from pandas.errors import ParserError
1720

1821
import pandas._testing as tm
@@ -174,9 +177,9 @@ def test_close_file_handle_on_invalid_usecols(all_parsers):
174177
if parser.engine == "pyarrow":
175178
pyarrow = pytest.importorskip("pyarrow")
176179
error = pyarrow.lib.ArrowKeyError
177-
if is_platform_windows():
178-
# GH#45547 causes timeouts on windows builds
179-
pytest.skip("GH#45547 causing timeouts on windows builds 2022-01-22")
180+
if is_platform_windows() or is_platform_mac():
181+
# GH#45547 causes timeouts on windows/mac builds
182+
pytest.skip("GH#45547 causing timeouts on windows/mac builds 2022-01-22")
180183

181184
with tm.ensure_clean("test.csv") as fname:
182185
Path(fname).write_text("col1,col2\na,b\n1,2")

pandas/tests/series/indexing/test_setitem.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
IntervalIndex,
1919
MultiIndex,
2020
NaT,
21+
Period,
2122
Series,
2223
Timedelta,
2324
Timestamp,
@@ -1317,6 +1318,22 @@ def obj(self):
13171318
return Series(timedelta_range("1 day", periods=4))
13181319

13191320

1321+
@pytest.mark.parametrize(
1322+
"val", ["foo", Period("2016", freq="Y"), Interval(1, 2, closed="both")]
1323+
)
1324+
@pytest.mark.parametrize("exp_dtype", [object])
1325+
class TestPeriodIntervalCoercion(CoercionTest):
1326+
# GH#45768
1327+
@pytest.fixture(
1328+
params=[
1329+
period_range("2016-01-01", periods=3, freq="D"),
1330+
interval_range(1, 5),
1331+
]
1332+
)
1333+
def obj(self, request):
1334+
return Series(request.param)
1335+
1336+
13201337
def test_20643():
13211338
# closed by GH#45121
13221339
orig = Series([0, 1, 2], index=["a", "b", "c"])

0 commit comments

Comments
 (0)