Skip to content

Commit eb23909

Browse files
Merge branch 'main' into fix_to_string_backslash
2 parents 5edba1b + d3bc372 commit eb23909

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+329
-317
lines changed

asv_bench/benchmarks/dtypes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424

2525

2626
class Dtypes:
27-
params = _dtypes + list(map(lambda dt: dt.name, _dtypes))
27+
params = _dtypes + [dt.name for dt in _dtypes]
2828
param_names = ["dtype"]
2929

3030
def time_pandas_dtype(self, dtype):

doc/source/whatsnew/v2.0.2.rst

+5-2
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,22 @@ including other versions of pandas.
1313

1414
Fixed regressions
1515
~~~~~~~~~~~~~~~~~
16-
- Bug in :func:`DataFrame.to_string` with ``header=True`` that printed a backslash at the end of the first row of the data, instead of the headers, when the DataFrame doesn't fit the line width (:issue:`53054`)
16+
- Fixed regression in :meth:`DataFrame.loc` losing :class:`MultiIndex` name when enlarging object (:issue:`53053`)
17+
- Fixed regression in :meth:`DataFrame.to_string` printing a backslash at the end of the first row of data, instead of headers, when the DataFrame doesn't fit the line width (:issue:`53054`)
1718

1819
.. ---------------------------------------------------------------------------
1920
.. _whatsnew_202.bug_fixes:
2021

2122
Bug fixes
2223
~~~~~~~~~
24+
- Bug in :func:`api.interchange.from_dataframe` was raising ``IndexError`` on empty categorical data (:issue:`53077`)
2325
- Bug in :func:`api.interchange.from_dataframe` was returning :class:`DataFrame`'s of incorrect sizes when called on slices (:issue:`52824`)
2426
- Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on bitmasks (:issue:`49888`)
27+
- Bug in :func:`to_timedelta` was raising ``ValueError`` with ``pandas.NA`` (:issue:`52909`)
2528
- Bug in :meth:`DataFrame.convert_dtypes` ignores ``convert_*`` keywords when set to False ``dtype_backend="pyarrow"`` (:issue:`52872`)
2629
- Bug in :meth:`Series.describe` treating pyarrow-backed timestamps and timedeltas as categorical data (:issue:`53001`)
2730
- Bug in :meth:`pd.array` raising for ``NumPy`` array and ``pa.large_string`` or ``pa.large_binary`` (:issue:`52590`)
28-
-
31+
2932

3033
.. ---------------------------------------------------------------------------
3134
.. _whatsnew_202.other:

doc/source/whatsnew/v2.1.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@ Performance improvements
285285
- Performance improvement accessing :attr:`arrays.IntegerArrays.dtype` & :attr:`arrays.FloatingArray.dtype` (:issue:`52998`)
286286
- Performance improvement in :class:`Series` reductions (:issue:`52341`)
287287
- Performance improvement in :func:`concat` when ``axis=1`` and objects have different indexes (:issue:`52541`)
288+
- Performance improvement in :meth:`DataFrame.loc` when selecting rows and columns (:issue:`53014`)
288289
- Performance improvement in :meth:`Series.corr` and :meth:`Series.cov` for extension dtypes (:issue:`52502`)
289290
- Performance improvement in :meth:`Series.to_numpy` when dtype is a numpy float dtype and ``na_value`` is ``np.nan`` (:issue:`52430`)
290291
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.to_numpy` (:issue:`52525`)
@@ -307,6 +308,8 @@ Datetimelike
307308
- Bug in :func:`date_range` when ``freq`` was a :class:`DateOffset` with ``nanoseconds`` (:issue:`46877`)
308309
- Bug in :meth:`Timestamp.round` with values close to the implementation bounds returning incorrect results instead of raising ``OutOfBoundsDatetime`` (:issue:`51494`)
309310
- Bug in :meth:`arrays.DatetimeArray.map` and :meth:`DatetimeIndex.map`, where the supplied callable operated array-wise instead of element-wise (:issue:`51977`)
311+
- Bug in parsing datetime strings with weekday but no day e.g. "2023 Sept Thu" incorrectly raising ``AttributeError`` instead of ``ValueError`` (:issue:`52659`)
312+
-
310313

311314
Timedelta
312315
^^^^^^^^^

pandas/_libs/tslibs/offsets.pyx

+7-2
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@ from cpython.datetime cimport (
1515

1616
import_datetime()
1717

18-
from dateutil.easter import easter
19-
from dateutil.relativedelta import relativedelta
2018
import numpy as np
2119

2220
cimport numpy as cnp
@@ -348,6 +346,8 @@ cdef _determine_offset(kwds):
348346
kwds_no_nanos["microseconds"] = kwds_no_nanos.get("microseconds", 0) + micro
349347

350348
if all(k in kwds_use_relativedelta for k in kwds_no_nanos):
349+
from dateutil.relativedelta import relativedelta
350+
351351
return relativedelta(**kwds_no_nanos), True
352352

353353
raise ValueError(
@@ -3691,6 +3691,8 @@ cdef class Easter(SingleConstructorOffset):
36913691

36923692
@apply_wraps
36933693
def _apply(self, other: datetime) -> datetime:
3694+
from dateutil.easter import easter
3695+
36943696
current_easter = easter(other.year)
36953697
current_easter = datetime(
36963698
current_easter.year, current_easter.month, current_easter.day
@@ -3721,6 +3723,9 @@ cdef class Easter(SingleConstructorOffset):
37213723
def is_on_offset(self, dt: datetime) -> bool:
37223724
if self.normalize and not _is_normalized(dt):
37233725
return False
3726+
3727+
from dateutil.easter import easter
3728+
37243729
return date(dt.year, dt.month, dt.day) == easter(dt.year)
37253730

37263731

pandas/_libs/tslibs/parsing.pyx

+5-2
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@ from dateutil.parser import (
4646
DEFAULTPARSER,
4747
parse as du_parse,
4848
)
49-
from dateutil.relativedelta import relativedelta
5049
from dateutil.tz import (
5150
tzlocal as _dateutil_tzlocal,
5251
tzoffset,
@@ -692,7 +691,11 @@ cdef datetime dateutil_parse(
692691
) from err
693692

694693
if res.weekday is not None and not res.day:
695-
ret = ret + relativedelta.relativedelta(weekday=res.weekday)
694+
# GH#52659
695+
raise ValueError(
696+
"Parsing datetimes with weekday but no day information is "
697+
"not supported"
698+
)
696699
if not ignoretz:
697700
if res.tzname and res.tzname in time.tzname:
698701
# GH#50791

pandas/_libs/tslibs/timedeltas.pyx

+3-3
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import_datetime()
3434

3535

3636
cimport pandas._libs.tslibs.util as util
37+
from pandas._libs.missing cimport checknull_with_nat_and_na
3738
from pandas._libs.tslibs.base cimport ABCTimestamp
3839
from pandas._libs.tslibs.conversion cimport (
3940
cast_from_unit,
@@ -341,8 +342,7 @@ cdef convert_to_timedelta64(object ts, str unit):
341342
Return an ns based int64
342343
"""
343344
# Caller is responsible for checking unit not in ["Y", "y", "M"]
344-
345-
if checknull_with_nat(ts):
345+
if checknull_with_nat_and_na(ts):
346346
return np.timedelta64(NPY_NAT, "ns")
347347
elif isinstance(ts, _Timedelta):
348348
# already in the proper format
@@ -1808,7 +1808,7 @@ class Timedelta(_Timedelta):
18081808
# unit=None is de-facto 'ns'
18091809
unit = parse_timedelta_unit(unit)
18101810
value = convert_to_timedelta64(value, unit)
1811-
elif checknull_with_nat(value):
1811+
elif checknull_with_nat_and_na(value):
18121812
return NaT
18131813
else:
18141814
raise ValueError(

pandas/_typing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@
155155

156156
RandomState = Union[
157157
int,
158-
ArrayLike,
158+
np.ndarray,
159159
np.random.Generator,
160160
np.random.BitGenerator,
161161
np.random.RandomState,

pandas/core/array_algos/putmask.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def setitem_datetimelike_compat(values: np.ndarray, num_set: int, other):
136136
other : Any
137137
"""
138138
if values.dtype == object:
139-
dtype, _ = infer_dtype_from(other, pandas_dtype=True)
139+
dtype, _ = infer_dtype_from(other)
140140

141141
if isinstance(dtype, np.dtype) and dtype.kind in "mM":
142142
# https://github.com/numpy/numpy/issues/12550

pandas/core/arrays/arrow/array.py

+17-18
Original file line numberDiff line numberDiff line change
@@ -18,22 +18,6 @@
1818
import numpy as np
1919

2020
from pandas._libs import lib
21-
from pandas._typing import (
22-
ArrayLike,
23-
AxisInt,
24-
Dtype,
25-
FillnaOptions,
26-
Iterator,
27-
NpDtype,
28-
PositionalIndexer,
29-
Scalar,
30-
Self,
31-
SortKind,
32-
TakeIndexer,
33-
TimeAmbiguous,
34-
TimeNonexistent,
35-
npt,
36-
)
3721
from pandas.compat import (
3822
pa_version_under7p0,
3923
pa_version_under8p0,
@@ -140,8 +124,22 @@ def floordiv_compat(
140124

141125
if TYPE_CHECKING:
142126
from pandas._typing import (
127+
ArrayLike,
128+
AxisInt,
129+
Dtype,
130+
FillnaOptions,
131+
Iterator,
132+
NpDtype,
143133
NumpySorter,
144134
NumpyValueArrayLike,
135+
PositionalIndexer,
136+
Scalar,
137+
Self,
138+
SortKind,
139+
TakeIndexer,
140+
TimeAmbiguous,
141+
TimeNonexistent,
142+
npt,
145143
)
146144

147145
from pandas import Series
@@ -805,8 +803,9 @@ def fillna(
805803
fallback_performancewarning()
806804
return super().fillna(value=value, method=method, limit=limit)
807805

808-
if is_array_like(value):
809-
value = cast(ArrayLike, value)
806+
if isinstance(value, (np.ndarray, ExtensionArray)):
807+
# Similar to check_value_size, but we do not mask here since we may
808+
# end up passing it to the super() method.
810809
if len(value) != len(self):
811810
raise ValueError(
812811
f"Length of 'value' does not match. Got ({len(value)}) "

pandas/core/arrays/base.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@
3838
validate_insert_loc,
3939
)
4040

41-
from pandas.core.dtypes.cast import maybe_cast_to_extension_array
41+
from pandas.core.dtypes.cast import maybe_cast_pointwise_result
4242
from pandas.core.dtypes.common import (
4343
is_list_like,
4444
is_scalar,
@@ -1957,7 +1957,7 @@ def _maybe_convert(arr):
19571957
# https://github.com/pandas-dev/pandas/issues/22850
19581958
# We catch all regular exceptions here, and fall back
19591959
# to an ndarray.
1960-
res = maybe_cast_to_extension_array(type(self), arr)
1960+
res = maybe_cast_pointwise_result(arr, self.dtype, same_dtype=False)
19611961
if not isinstance(res, type(self)):
19621962
# exception raised in _from_sequence; ensure we have ndarray
19631963
res = np.asarray(arr)

pandas/core/arrays/masked.py

+7-15
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,11 @@
7575
from pandas.core.array_algos.quantile import quantile_with_mask
7676
from pandas.core.arraylike import OpsMixin
7777
from pandas.core.arrays.base import ExtensionArray
78-
from pandas.core.construction import ensure_wrapped_if_datetimelike
78+
from pandas.core.construction import (
79+
array as pd_array,
80+
ensure_wrapped_if_datetimelike,
81+
extract_array,
82+
)
7983
from pandas.core.indexers import check_array_indexer
8084
from pandas.core.ops import invalid_comparison
8185

@@ -645,20 +649,8 @@ def _arith_method(self, other, op):
645649
and len(other) == len(self)
646650
):
647651
# Try inferring masked dtype instead of casting to object
648-
inferred_dtype = lib.infer_dtype(other, skipna=True)
649-
if inferred_dtype == "integer":
650-
from pandas.core.arrays import IntegerArray
651-
652-
other = IntegerArray._from_sequence(other)
653-
elif inferred_dtype in ["floating", "mixed-integer-float"]:
654-
from pandas.core.arrays import FloatingArray
655-
656-
other = FloatingArray._from_sequence(other)
657-
658-
elif inferred_dtype in ["boolean"]:
659-
from pandas.core.arrays import BooleanArray
660-
661-
other = BooleanArray._from_sequence(other)
652+
other = pd_array(other)
653+
other = extract_array(other, extract_numpy=True)
662654

663655
if isinstance(other, BaseMaskedArray):
664656
other, omask = other._data, other._mask

pandas/core/common.py

+4-24
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,9 @@
3333

3434
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
3535
from pandas.core.dtypes.common import (
36-
is_array_like,
3736
is_bool_dtype,
3837
is_integer,
3938
)
40-
from pandas.core.dtypes.dtypes import ExtensionDtype
4139
from pandas.core.dtypes.generic import (
4240
ABCExtensionArray,
4341
ABCIndex,
@@ -120,9 +118,7 @@ def is_bool_indexer(key: Any) -> bool:
120118
check_array_indexer : Check that `key` is a valid array to index,
121119
and convert to an ndarray.
122120
"""
123-
if isinstance(key, (ABCSeries, np.ndarray, ABCIndex)) or (
124-
is_array_like(key) and isinstance(key.dtype, ExtensionDtype)
125-
):
121+
if isinstance(key, (ABCSeries, np.ndarray, ABCIndex, ABCExtensionArray)):
126122
if key.dtype == np.object_:
127123
key_array = np.asarray(key)
128124

@@ -420,7 +416,7 @@ def random_state(state: np.random.Generator) -> np.random.Generator:
420416

421417
@overload
422418
def random_state(
423-
state: int | ArrayLike | np.random.BitGenerator | np.random.RandomState | None,
419+
state: int | np.ndarray | np.random.BitGenerator | np.random.RandomState | None,
424420
) -> np.random.RandomState:
425421
...
426422

@@ -445,24 +441,8 @@ def random_state(state: RandomState | None = None):
445441
np.random.RandomState or np.random.Generator. If state is None, returns np.random
446442
447443
"""
448-
if (
449-
is_integer(state)
450-
or is_array_like(state)
451-
or isinstance(state, np.random.BitGenerator)
452-
):
453-
# error: Argument 1 to "RandomState" has incompatible type "Optional[Union[int,
454-
# Union[ExtensionArray, ndarray[Any, Any]], Generator, RandomState]]"; expected
455-
# "Union[None, Union[Union[_SupportsArray[dtype[Union[bool_, integer[Any]]]],
456-
# Sequence[_SupportsArray[dtype[Union[bool_, integer[Any]]]]],
457-
# Sequence[Sequence[_SupportsArray[dtype[Union[bool_, integer[Any]]]]]],
458-
# Sequence[Sequence[Sequence[_SupportsArray[dtype[Union[bool_,
459-
# integer[Any]]]]]]],
460-
# Sequence[Sequence[Sequence[Sequence[_SupportsArray[dtype[Union[bool_,
461-
# integer[Any]]]]]]]]], Union[bool, int, Sequence[Union[bool, int]],
462-
# Sequence[Sequence[Union[bool, int]]], Sequence[Sequence[Sequence[Union[bool,
463-
# int]]]], Sequence[Sequence[Sequence[Sequence[Union[bool, int]]]]]]],
464-
# BitGenerator]"
465-
return np.random.RandomState(state) # type: ignore[arg-type]
444+
if is_integer(state) or isinstance(state, (np.ndarray, np.random.BitGenerator)):
445+
return np.random.RandomState(state)
466446
elif isinstance(state, np.random.RandomState):
467447
return state
468448
elif isinstance(state, np.random.Generator):

pandas/core/computation/expr.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ def _filter_nodes(superclass, all_nodes=_all_nodes):
192192
return frozenset(node_names)
193193

194194

195-
_all_node_names = frozenset(map(lambda x: x.__name__, _all_nodes))
195+
_all_node_names = frozenset(x.__name__ for x in _all_nodes)
196196
_mod_nodes = _filter_nodes(ast.mod)
197197
_stmt_nodes = _filter_nodes(ast.stmt)
198198
_expr_nodes = _filter_nodes(ast.expr)

0 commit comments

Comments
 (0)