Skip to content

Commit 59a07c2

Browse files
authored
Merge branch 'master' into BUG_GH35558_merge_asof_tolerance
2 parents 06e6a27 + 9a8152c commit 59a07c2

31 files changed

+333
-132
lines changed

doc/source/user_guide/indexing.rst

+2-6
Original file line numberDiff line numberDiff line change
@@ -1532,12 +1532,8 @@ Setting metadata
15321532
~~~~~~~~~~~~~~~~
15331533

15341534
Indexes are "mostly immutable", but it is possible to set and change their
1535-
metadata, like the index ``name`` (or, for ``MultiIndex``, ``levels`` and
1536-
``codes``).
1537-
1538-
You can use the ``rename``, ``set_names``, ``set_levels``, and ``set_codes``
1539-
to set these attributes directly. They default to returning a copy; however,
1540-
you can specify ``inplace=True`` to have the data change in place.
1535+
``name`` attribute. You can use the ``rename``, ``set_names`` to set these attributes
1536+
directly, and they default to returning a copy.
15411537

15421538
See :ref:`Advanced Indexing <advanced>` for usage of MultiIndexes.
15431539

doc/source/whatsnew/v1.1.1.rst

+6-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@ Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717

1818
- Fixed regression where :meth:`DataFrame.to_numpy` would raise a ``RuntimeError`` for mixed dtypes when converting to ``str`` (:issue:`35455`)
19-
- Fixed regression where :func:`read_csv` would raise a ``ValueError`` when ``pandas.options.mode.use_inf_as_na`` was set to ``True`` (:issue:`35493`)
19+
- Fixed regression where :func:`read_csv` would raise a ``ValueError`` when ``pandas.options.mode.use_inf_as_na`` was set to ``True`` (:issue:`35493`).
20+
- Fixed regression where :func:`pandas.testing.assert_series_equal` would raise an error when non-numeric dtypes were passed with ``check_exact=True`` (:issue:`35446`)
2021
- Fixed regression in :class:`pandas.core.groupby.RollingGroupby` where column selection was ignored (:issue:`35486`)
2122
- Fixed regression in :meth:`DataFrame.shift` with ``axis=1`` and heterogeneous dtypes (:issue:`35488`)
2223
- Fixed regression in ``.groupby(..).rolling(..)`` where a segfault would occur with ``center=True`` and an odd number of values (:issue:`35552`)
@@ -52,6 +53,10 @@ Categorical
5253
-
5354
-
5455

56+
**Groupby/resample/rolling**
57+
58+
- Bug in :class:`pandas.core.groupby.RollingGroupby` where passing ``closed`` with column selection would raise a ``ValueError`` (:issue:`35549`)
59+
5560
**Plotting**
5661

5762
-

doc/source/whatsnew/v1.2.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ Other enhancements
4747

4848
Deprecations
4949
~~~~~~~~~~~~
50-
50+
- Deprecated parameter ``inplace`` in :meth:`MultiIndex.set_codes` and :meth:`MultiIndex.set_levels` (:issue:`35626`)
5151
-
5252
-
5353

pandas/_testing.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -1339,10 +1339,8 @@ def assert_series_equal(
13391339
else:
13401340
assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}")
13411341

1342-
if check_exact:
1343-
if not is_numeric_dtype(left.dtype):
1344-
raise AssertionError("check_exact may only be used with numeric Series")
1345-
1342+
if check_exact and is_numeric_dtype(left.dtype) and is_numeric_dtype(right.dtype):
1343+
# Only check exact if dtype is numeric
13461344
assert_numpy_array_equal(
13471345
left._values,
13481346
right._values,

pandas/conftest.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,7 @@ def multiindex_year_month_day_dataframe_random_data():
359359
tdf = tm.makeTimeDataFrame(100)
360360
ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum()
361361
# use Int64Index, to make sure things work
362-
ymd.index.set_levels([lev.astype("i8") for lev in ymd.index.levels], inplace=True)
362+
ymd.index = ymd.index.set_levels([lev.astype("i8") for lev in ymd.index.levels])
363363
ymd.index.set_names(["year", "month", "day"], inplace=True)
364364
return ymd
365365

pandas/core/dtypes/common.py

+27-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from pandas._libs import Interval, Period, algos
1111
from pandas._libs.tslibs import conversion
12-
from pandas._typing import ArrayLike, DtypeObj
12+
from pandas._typing import ArrayLike, DtypeObj, Optional
1313

1414
from pandas.core.dtypes.base import registry
1515
from pandas.core.dtypes.dtypes import (
@@ -1732,6 +1732,32 @@ def _validate_date_like_dtype(dtype) -> None:
17321732
)
17331733

17341734

1735+
def validate_all_hashable(*args, error_name: Optional[str] = None) -> None:
1736+
"""
1737+
Return None if all args are hashable, else raise a TypeError.
1738+
1739+
Parameters
1740+
----------
1741+
*args
1742+
Arguments to validate.
1743+
error_name : str, optional
1744+
The name to use if error
1745+
1746+
Raises
1747+
------
1748+
TypeError : If an argument is not hashable
1749+
1750+
Returns
1751+
-------
1752+
None
1753+
"""
1754+
if not all(is_hashable(arg) for arg in args):
1755+
if error_name:
1756+
raise TypeError(f"{error_name} must be a hashable type")
1757+
else:
1758+
raise TypeError("All elements must be hashable")
1759+
1760+
17351761
def pandas_dtype(dtype) -> DtypeObj:
17361762
"""
17371763
Convert input into a pandas only dtype object or a numpy dtype object.

pandas/core/generic.py

+22-1
Original file line numberDiff line numberDiff line change
@@ -1772,7 +1772,28 @@ def empty(self) -> bool_t:
17721772
def __array__(self, dtype=None) -> np.ndarray:
17731773
return np.asarray(self._values, dtype=dtype)
17741774

1775-
def __array_wrap__(self, result, context=None):
1775+
def __array_wrap__(
1776+
self,
1777+
result: np.ndarray,
1778+
context: Optional[Tuple[Callable, Tuple[Any, ...], int]] = None,
1779+
):
1780+
"""
1781+
Gets called after a ufunc and other functions.
1782+
1783+
Parameters
1784+
----------
1785+
result: np.ndarray
1786+
The result of the ufunc or other function called on the NumPy array
1787+
returned by __array__
1788+
context: tuple of (func, tuple, int)
1789+
This parameter is returned by ufuncs as a 3-element tuple: (name of the
1790+
ufunc, arguments of the ufunc, domain of the ufunc), but is not set by
1791+
other numpy functions.q
1792+
1793+
Notes
1794+
-----
1795+
Series implements __array_ufunc_ so this not called for ufunc on Series.
1796+
"""
17761797
result = lib.item_from_zerodim(result)
17771798
if is_scalar(result):
17781799
# e.g. we get here with np.ptp(series)

pandas/core/groupby/generic.py

+47-50
Original file line numberDiff line numberDiff line change
@@ -1029,11 +1029,36 @@ def _cython_agg_blocks(
10291029
agg_blocks: List[Block] = []
10301030
new_items: List[np.ndarray] = []
10311031
deleted_items: List[np.ndarray] = []
1032-
# Some object-dtype blocks might be split into List[Block[T], Block[U]]
1033-
split_items: List[np.ndarray] = []
1034-
split_frames: List[DataFrame] = []
10351032

10361033
no_result = object()
1034+
1035+
def cast_result_block(result, block: "Block", how: str) -> "Block":
1036+
# see if we can cast the block to the desired dtype
1037+
# this may not be the original dtype
1038+
assert not isinstance(result, DataFrame)
1039+
assert result is not no_result
1040+
1041+
dtype = maybe_cast_result_dtype(block.dtype, how)
1042+
result = maybe_downcast_numeric(result, dtype)
1043+
1044+
if block.is_extension and isinstance(result, np.ndarray):
1045+
# e.g. block.values was an IntegerArray
1046+
# (1, N) case can occur if block.values was Categorical
1047+
# and result is ndarray[object]
1048+
# TODO(EA2D): special casing not needed with 2D EAs
1049+
assert result.ndim == 1 or result.shape[0] == 1
1050+
try:
1051+
# Cast back if feasible
1052+
result = type(block.values)._from_sequence(
1053+
result.ravel(), dtype=block.values.dtype
1054+
)
1055+
except (ValueError, TypeError):
1056+
# reshape to be valid for non-Extension Block
1057+
result = result.reshape(1, -1)
1058+
1059+
agg_block: Block = block.make_block(result)
1060+
return agg_block
1061+
10371062
for block in data.blocks:
10381063
# Avoid inheriting result from earlier in the loop
10391064
result = no_result
@@ -1065,9 +1090,9 @@ def _cython_agg_blocks(
10651090
# not try to add missing categories if grouping over multiple
10661091
# Categoricals. This will done by later self._reindex_output()
10671092
# Doing it here creates an error. See GH#34951
1068-
s = get_groupby(obj, self.grouper, observed=True)
1093+
sgb = get_groupby(obj, self.grouper, observed=True)
10691094
try:
1070-
result = s.aggregate(lambda x: alt(x, axis=self.axis))
1095+
result = sgb.aggregate(lambda x: alt(x, axis=self.axis))
10711096
except TypeError:
10721097
# we may have an exception in trying to aggregate
10731098
# continue and exclude the block
@@ -1081,54 +1106,26 @@ def _cython_agg_blocks(
10811106
# about a single block input returning a single block output
10821107
# is a lie. To keep the code-path for the typical non-split case
10831108
# clean, we choose to clean up this mess later on.
1084-
split_items.append(locs)
1085-
split_frames.append(result)
1086-
continue
1087-
1088-
assert len(result._mgr.blocks) == 1
1089-
result = result._mgr.blocks[0].values
1090-
if isinstance(result, np.ndarray) and result.ndim == 1:
1091-
result = result.reshape(1, -1)
1092-
1093-
assert not isinstance(result, DataFrame)
1094-
1095-
if result is not no_result:
1096-
# see if we can cast the block to the desired dtype
1097-
# this may not be the original dtype
1098-
dtype = maybe_cast_result_dtype(block.dtype, how)
1099-
result = maybe_downcast_numeric(result, dtype)
1100-
1101-
if block.is_extension and isinstance(result, np.ndarray):
1102-
# e.g. block.values was an IntegerArray
1103-
# (1, N) case can occur if block.values was Categorical
1104-
# and result is ndarray[object]
1105-
# TODO(EA2D): special casing not needed with 2D EAs
1106-
assert result.ndim == 1 or result.shape[0] == 1
1107-
try:
1108-
# Cast back if feasible
1109-
result = type(block.values)._from_sequence(
1110-
result.ravel(), dtype=block.values.dtype
1111-
)
1112-
except (ValueError, TypeError):
1113-
# reshape to be valid for non-Extension Block
1114-
result = result.reshape(1, -1)
1115-
1116-
agg_block: Block = block.make_block(result)
1117-
1118-
new_items.append(locs)
1119-
agg_blocks.append(agg_block)
1109+
assert len(locs) == result.shape[1]
1110+
for i, loc in enumerate(locs):
1111+
new_items.append(np.array([loc], dtype=locs.dtype))
1112+
agg_block = result.iloc[:, [i]]._mgr.blocks[0]
1113+
agg_blocks.append(agg_block)
1114+
else:
1115+
result = result._mgr.blocks[0].values
1116+
if isinstance(result, np.ndarray) and result.ndim == 1:
1117+
result = result.reshape(1, -1)
1118+
agg_block = cast_result_block(result, block, how)
1119+
new_items.append(locs)
1120+
agg_blocks.append(agg_block)
1121+
else:
1122+
agg_block = cast_result_block(result, block, how)
1123+
new_items.append(locs)
1124+
agg_blocks.append(agg_block)
11201125

1121-
if not (agg_blocks or split_frames):
1126+
if not agg_blocks:
11221127
raise DataError("No numeric types to aggregate")
11231128

1124-
if split_items:
1125-
# Clean up the mess left over from split blocks.
1126-
for locs, result in zip(split_items, split_frames):
1127-
assert len(locs) == result.shape[1]
1128-
for i, loc in enumerate(locs):
1129-
new_items.append(np.array([loc], dtype=locs.dtype))
1130-
agg_blocks.append(result.iloc[:, [i]]._mgr.blocks[0])
1131-
11321129
# reset the locs in the blocks to correspond to our
11331130
# current ordering
11341131
indexer = np.concatenate(new_items)

pandas/core/indexes/base.py

+23-15
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
is_timedelta64_dtype,
5959
is_unsigned_integer_dtype,
6060
pandas_dtype,
61+
validate_all_hashable,
6162
)
6263
from pandas.core.dtypes.concat import concat_compat
6364
from pandas.core.dtypes.generic import (
@@ -574,7 +575,7 @@ def __array__(self, dtype=None) -> np.ndarray:
574575

575576
def __array_wrap__(self, result, context=None):
576577
"""
577-
Gets called after a ufunc.
578+
Gets called after a ufunc and other functions.
578579
"""
579580
result = lib.item_from_zerodim(result)
580581
if is_bool_dtype(result) or lib.is_scalar(result) or np.ndim(result) > 1:
@@ -812,13 +813,11 @@ def copy(self, name=None, deep=False, dtype=None, names=None):
812813
In most cases, there should be no functional difference from using
813814
``deep``, but if ``deep`` is passed it will attempt to deepcopy.
814815
"""
816+
name = self._validate_names(name=name, names=names, deep=deep)[0]
815817
if deep:
816-
new_index = self._shallow_copy(self._data.copy())
818+
new_index = self._shallow_copy(self._data.copy(), name=name)
817819
else:
818-
new_index = self._shallow_copy()
819-
820-
names = self._validate_names(name=name, names=names, deep=deep)
821-
new_index = new_index.set_names(names)
820+
new_index = self._shallow_copy(name=name)
822821

823822
if dtype:
824823
new_index = new_index.astype(dtype)
@@ -1186,7 +1185,7 @@ def name(self, value):
11861185
maybe_extract_name(value, None, type(self))
11871186
self._name = value
11881187

1189-
def _validate_names(self, name=None, names=None, deep: bool = False):
1188+
def _validate_names(self, name=None, names=None, deep: bool = False) -> List[Label]:
11901189
"""
11911190
Handles the quirks of having a singular 'name' parameter for general
11921191
Index and plural 'names' parameter for MultiIndex.
@@ -1196,15 +1195,25 @@ def _validate_names(self, name=None, names=None, deep: bool = False):
11961195
if names is not None and name is not None:
11971196
raise TypeError("Can only provide one of `names` and `name`")
11981197
elif names is None and name is None:
1199-
return deepcopy(self.names) if deep else self.names
1198+
new_names = deepcopy(self.names) if deep else self.names
12001199
elif names is not None:
12011200
if not is_list_like(names):
12021201
raise TypeError("Must pass list-like as `names`.")
1203-
return names
1202+
new_names = names
1203+
elif not is_list_like(name):
1204+
new_names = [name]
12041205
else:
1205-
if not is_list_like(name):
1206-
return [name]
1207-
return name
1206+
new_names = name
1207+
1208+
if len(new_names) != len(self.names):
1209+
raise ValueError(
1210+
f"Length of new names must be {len(self.names)}, got {len(new_names)}"
1211+
)
1212+
1213+
# All items in 'new_names' need to be hashable
1214+
validate_all_hashable(*new_names, error_name=f"{type(self).__name__}.name")
1215+
1216+
return new_names
12081217

12091218
def _get_names(self):
12101219
return FrozenList((self.name,))
@@ -1232,9 +1241,8 @@ def _set_names(self, values, level=None):
12321241

12331242
# GH 20527
12341243
# All items in 'name' need to be hashable:
1235-
for name in values:
1236-
if not is_hashable(name):
1237-
raise TypeError(f"{type(self).__name__}.name must be a hashable type")
1244+
validate_all_hashable(*values, error_name=f"{type(self).__name__}.name")
1245+
12381246
self._name = values[0]
12391247

12401248
names = property(fset=_set_names, fget=_get_names)

pandas/core/indexes/datetimelike.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ def values(self):
116116

117117
def __array_wrap__(self, result, context=None):
118118
"""
119-
Gets called after a ufunc.
119+
Gets called after a ufunc and other functions.
120120
"""
121121
result = lib.item_from_zerodim(result)
122122
if is_bool_dtype(result) or lib.is_scalar(result):

0 commit comments

Comments
 (0)