Skip to content

Commit 9eda931

Browse files
committed
Merge branch 'main' into do-not-error-on-other-dbapi2-connections
2 parents 0fa71e1 + bcf0af0 commit 9eda931

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+432
-323
lines changed

doc/source/user_guide/reshaping.rst

+61-90
Large diffs are not rendered by default.

doc/source/whatsnew/v1.4.1.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,10 @@ including other versions of pandas.
1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717
- Regression in :meth:`Series.mask` with ``inplace=True`` and ``PeriodDtype`` and an incompatible ``other`` coercing to a common dtype instead of raising (:issue:`45546`)
18+
- Regression in :func:`.assert_frame_equal` not respecting ``check_flags=False`` (:issue:`45554`)
19+
- Regression in :meth:`Series.fillna` with ``downcast=False`` incorrectly downcasting ``object`` dtype (:issue:`45603`)
1820
- Regression in :meth:`DataFrame.loc.__setitem__` losing :class:`Index` name if :class:`DataFrame` was empty before (:issue:`45621`)
19-
-
21+
- Regression in :func:`pandasSQL_builder` whereby `sqlalchemy` was incorrectly required as a dependency for DBAPI2 connection objects that aren't from `sqlite3` or `sqlalchemy` (:issue:`45660`)
2022

2123
.. ---------------------------------------------------------------------------
2224

doc/source/whatsnew/v1.5.0.rst

+5
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,7 @@ Timezones
234234
Numeric
235235
^^^^^^^
236236
- Bug in operations with array-likes with ``dtype="boolean"`` and :attr:`NA` incorrectly altering the array in-place (:issue:`45421`)
237+
- Bug in multiplying a :class:`Series` with ``IntegerDtype`` or ``FloatingDtype`` by an arraylike with ``timedelta64[ns]`` dtype incorrectly raising (:issue:`45622`)
237238
-
238239

239240
Conversion
@@ -259,12 +260,15 @@ Indexing
259260
^^^^^^^^
260261
- Bug in :meth:`loc.__getitem__` with a list of keys causing an internal inconsistency that could lead to a disconnect between ``frame.at[x, y]`` vs ``frame[y].loc[x]`` (:issue:`22372`)
261262
- Bug in :meth:`DataFrame.iloc` where indexing a single row on a :class:`DataFrame` with a single ExtensionDtype column gave a copy instead of a view on the underlying data (:issue:`45241`)
263+
- Bug in setting a NA value (``None`` or ``np.nan``) into a :class:`Series` with int-based :class:`IntervalDtype` incorrectly casting to object dtype instead of a float-based :class:`IntervalDtype` (:issue:`45568`)
262264
- Bug in :meth:`Series.__setitem__` with a non-integer :class:`Index` when using an integer key to set a value that cannot be set inplace where a ``ValueError`` was raised insead of casting to a common dtype (:issue:`45070`)
263265
- Bug when setting a value too large for a :class:`Series` dtype failing to coerce to a common type (:issue:`26049`, :issue:`32878`)
264266
- Bug in :meth:`loc.__setitem__` treating ``range`` keys as positional instead of label-based (:issue:`45479`)
265267
- Bug in :meth:`Series.__setitem__` when setting ``boolean`` dtype values containing ``NA`` incorrectly raising instead of casting to ``boolean`` dtype (:issue:`45462`)
266268
- Bug in :meth:`Series.__setitem__` where setting :attr:`NA` into a numeric-dtpye :class:`Series` would incorrectly upcast to object-dtype rather than treating the value as ``np.nan`` (:issue:`44199`)
269+
- Bug in :meth:`DataFrame.mask` with ``inplace=True`` and ``ExtensionDtype`` columns incorrectly raising (:issue:`45577`)
267270
- Bug in getting a column from a DataFrame with an object-dtype row index with datetime-like values: the resulting Series now preserves the exact object-dtype Index from the parent DataFrame (:issue:`42950`)
271+
- Bug in indexing on a :class:`DatetimeIndex` with a ``np.str_`` key incorrectly raising (:issue:`45580`)
268272
-
269273

270274
Missing
@@ -305,6 +309,7 @@ Groupby/resample/rolling
305309
Reshaping
306310
^^^^^^^^^
307311
- Bug in :func:`concat` between a :class:`Series` with integer dtype and another with :class:`CategoricalDtype` with integer categories and containing ``NaN`` values casting to object dtype instead of ``float64`` (:issue:`45359`)
312+
- Bug in :func:`get_dummies` that selected object and categorical dtypes but not string (:issue:`44965`)
308313
-
309314

310315
Sparse

pandas/_libs/tslibs/parsing.pyx

+11-1
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ def parse_datetime_string(
291291
return dt
292292

293293

294-
def parse_time_string(arg: str, freq=None, dayfirst=None, yearfirst=None):
294+
def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None):
295295
"""
296296
Try hard to parse datetime string, leveraging dateutil plus some extra
297297
goodies like quarter recognition.
@@ -312,6 +312,16 @@ def parse_time_string(arg: str, freq=None, dayfirst=None, yearfirst=None):
312312
str
313313
Describing resolution of parsed string.
314314
"""
315+
if type(arg) is not str:
316+
# GH#45580 np.str_ satisfies isinstance(obj, str) but if we annotate
317+
# arg as "str" this raises here
318+
if not isinstance(arg, np.str_):
319+
raise TypeError(
320+
"Argument 'arg' has incorrect type "
321+
f"(expected str, got {type(arg).__name__})"
322+
)
323+
arg = str(arg)
324+
315325
if is_offset_object(freq):
316326
freq = freq.rule_code
317327

pandas/_testing/asserters.py

+1
Original file line numberDiff line numberDiff line change
@@ -1344,6 +1344,7 @@ def assert_frame_equal(
13441344
rtol=rtol,
13451345
atol=atol,
13461346
check_index=False,
1347+
check_flags=False,
13471348
)
13481349

13491350

pandas/core/algorithms.py

+4
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,10 @@ def factorize(
631631
cut : Discretize continuous-valued array.
632632
unique : Find the unique value in an array.
633633
634+
Notes
635+
-----
636+
Reference :ref:`the user guide <reshaping.factorize>` for more examples.
637+
634638
Examples
635639
--------
636640
These examples all show factorize as a top-level method like

pandas/core/arrays/boolean.py

+4-20
Original file line numberDiff line numberDiff line change
@@ -349,11 +349,10 @@ def _coerce_to_array(
349349
def _logical_method(self, other, op):
350350

351351
assert op.__name__ in {"or_", "ror_", "and_", "rand_", "xor", "rxor"}
352-
other_is_booleanarray = isinstance(other, BooleanArray)
353352
other_is_scalar = lib.is_scalar(other)
354353
mask = None
355354

356-
if other_is_booleanarray:
355+
if isinstance(other, BooleanArray):
357356
other, mask = other._data, other._mask
358357
elif is_list_like(other):
359358
other = np.asarray(other, dtype="bool")
@@ -370,7 +369,7 @@ def _logical_method(self, other, op):
370369
)
371370

372371
if not other_is_scalar and len(self) != len(other):
373-
raise ValueError("Lengths must match to compare")
372+
raise ValueError("Lengths must match")
374373

375374
if op.__name__ in {"or_", "ror_"}:
376375
result, mask = ops.kleene_or(self._data, other, self._mask, mask)
@@ -387,7 +386,7 @@ def _arith_method(self, other, op):
387386
mask = None
388387
op_name = op.__name__
389388

390-
if isinstance(other, BooleanArray):
389+
if isinstance(other, BaseMaskedArray):
391390
other, mask = other._data, other._mask
392391

393392
elif is_list_like(other):
@@ -397,14 +396,7 @@ def _arith_method(self, other, op):
397396
if len(self) != len(other):
398397
raise ValueError("Lengths must match")
399398

400-
# nans propagate
401-
if mask is None:
402-
mask = self._mask
403-
if other is libmissing.NA:
404-
# GH#45421 don't alter inplace
405-
mask = mask | True
406-
else:
407-
mask = self._mask | mask
399+
mask = self._propagate_mask(mask, other)
408400

409401
if other is libmissing.NA:
410402
# if other is NA, the result will be all NA and we can't run the
@@ -425,14 +417,6 @@ def _arith_method(self, other, op):
425417
with np.errstate(all="ignore"):
426418
result = op(self._data, other)
427419

428-
# divmod returns a tuple
429-
if op_name == "divmod":
430-
div, mod = result
431-
return (
432-
self._maybe_mask_result(div, mask, other, "floordiv"),
433-
self._maybe_mask_result(mod, mask, other, "mod"),
434-
)
435-
436420
return self._maybe_mask_result(result, mask, other, op_name)
437421

438422
def __abs__(self):

pandas/core/arrays/interval.py

+6-23
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,7 @@
1818

1919
from pandas._config import get_option
2020

21-
from pandas._libs import (
22-
NaT,
23-
lib,
24-
)
21+
from pandas._libs import lib
2522
from pandas._libs.interval import (
2623
VALID_CLOSED,
2724
Interval,
@@ -44,8 +41,6 @@
4441

4542
from pandas.core.dtypes.common import (
4643
is_categorical_dtype,
47-
is_datetime64_dtype,
48-
is_datetime64tz_dtype,
4944
is_dtype_equal,
5045
is_float_dtype,
5146
is_integer_dtype,
@@ -54,7 +49,6 @@
5449
is_object_dtype,
5550
is_scalar,
5651
is_string_dtype,
57-
is_timedelta64_dtype,
5852
needs_i8_conversion,
5953
pandas_dtype,
6054
)
@@ -1103,30 +1097,23 @@ def _validate_scalar(self, value):
11031097
# TODO: check subdtype match like _validate_setitem_value?
11041098
elif is_valid_na_for_dtype(value, self.left.dtype):
11051099
# GH#18295
1106-
left = right = value
1100+
left = right = self.left._na_value
11071101
else:
11081102
raise TypeError(
11091103
"can only insert Interval objects and NA into an IntervalArray"
11101104
)
11111105
return left, right
11121106

11131107
def _validate_setitem_value(self, value):
1114-
needs_float_conversion = False
11151108

11161109
if is_valid_na_for_dtype(value, self.left.dtype):
11171110
# na value: need special casing to set directly on numpy arrays
1111+
value = self.left._na_value
11181112
if is_integer_dtype(self.dtype.subtype):
11191113
# can't set NaN on a numpy integer array
1120-
needs_float_conversion = True
1121-
elif is_datetime64_dtype(self.dtype.subtype):
1122-
# need proper NaT to set directly on the numpy array
1123-
value = np.datetime64("NaT")
1124-
elif is_datetime64tz_dtype(self.dtype.subtype):
1125-
# need proper NaT to set directly on the DatetimeArray array
1126-
value = NaT
1127-
elif is_timedelta64_dtype(self.dtype.subtype):
1128-
# need proper NaT to set directly on the numpy array
1129-
value = np.timedelta64("NaT")
1114+
# GH#45484 TypeError, not ValueError, matches what we get with
1115+
# non-NA un-holdable value.
1116+
raise TypeError("Cannot set float NaN to integer-backed IntervalArray")
11301117
value_left, value_right = value, value
11311118

11321119
elif isinstance(value, Interval):
@@ -1139,10 +1126,6 @@ def _validate_setitem_value(self, value):
11391126
else:
11401127
return self._validate_listlike(value)
11411128

1142-
if needs_float_conversion:
1143-
# GH#45484 TypeError, not ValueError, matches what we get with
1144-
# non-NA un-holdable value.
1145-
raise TypeError("Cannot set float NaN to integer-backed IntervalArray")
11461129
return value_left, value_right
11471130

11481131
def value_counts(self, dropna: bool = True):

pandas/core/arrays/masked.py

+26-9
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
import numpy as np
1414

1515
from pandas._libs import (
16-
iNaT,
1716
lib,
1817
missing as libmissing,
1918
)
@@ -582,6 +581,18 @@ def _hasna(self) -> bool:
582581
# error: Incompatible return value type (got "bool_", expected "bool")
583582
return self._mask.any() # type: ignore[return-value]
584583

584+
def _propagate_mask(
585+
self, mask: npt.NDArray[np.bool_] | None, other
586+
) -> npt.NDArray[np.bool_]:
587+
if mask is None:
588+
mask = self._mask.copy() # TODO: need test for BooleanArray needing a copy
589+
if other is libmissing.NA:
590+
# GH#45421 don't alter inplace
591+
mask = mask | True
592+
else:
593+
mask = self._mask | mask
594+
return mask
595+
585596
def _cmp_method(self, other, op) -> BooleanArray:
586597
from pandas.core.arrays import BooleanArray
587598

@@ -619,12 +630,7 @@ def _cmp_method(self, other, op) -> BooleanArray:
619630
if result is NotImplemented:
620631
result = invalid_comparison(self._data, other, op)
621632

622-
# nans propagate
623-
if mask is None:
624-
mask = self._mask.copy()
625-
else:
626-
mask = self._mask | mask
627-
633+
mask = self._propagate_mask(mask, other)
628634
return BooleanArray(result, mask, copy=False)
629635

630636
def _maybe_mask_result(self, result, mask, other, op_name: str):
@@ -636,6 +642,14 @@ def _maybe_mask_result(self, result, mask, other, op_name: str):
636642
other : scalar or array-like
637643
op_name : str
638644
"""
645+
if op_name == "divmod":
646+
# divmod returns a tuple
647+
div, mod = result
648+
return (
649+
self._maybe_mask_result(div, mask, other, "floordiv"),
650+
self._maybe_mask_result(mod, mask, other, "mod"),
651+
)
652+
639653
# if we have a float operand we are by-definition
640654
# a float result
641655
# or our op is a divide
@@ -657,8 +671,11 @@ def _maybe_mask_result(self, result, mask, other, op_name: str):
657671
# e.g. test_numeric_arr_mul_tdscalar_numexpr_path
658672
from pandas.core.arrays import TimedeltaArray
659673

660-
result[mask] = iNaT
661-
return TimedeltaArray._simple_new(result)
674+
if not isinstance(result, TimedeltaArray):
675+
result = TimedeltaArray._simple_new(result)
676+
677+
result[mask] = result.dtype.type("NaT")
678+
return result
662679

663680
elif is_integer_dtype(result):
664681
from pandas.core.arrays import IntegerArray

0 commit comments

Comments
 (0)