Skip to content

Commit 7ab6598

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into sty-private
2 parents 049b551 + 70c056b commit 7ab6598

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+375
-360
lines changed

ci/code_checks.sh

+7-4
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
187187
invgrep -R --include="*.py" -E "super\(\w*, (self|cls)\)" pandas
188188
RET=$(($RET + $?)) ; echo $MSG "DONE"
189189

190+
MSG='Check for use of builtin filter function' ; echo $MSG
191+
invgrep -R --include="*.py" -P '(?<!def)[\(\s]filter\(' pandas
192+
RET=$(($RET + $?)) ; echo $MSG "DONE"
193+
190194
# Check for the following code in testing: `np.testing` and `np.array_equal`
191195
MSG='Check for invalid testing' ; echo $MSG
192196
invgrep -r -E --include '*.py' --exclude testing.py '(numpy|np)(\.testing|\.array_equal)' pandas/tests/
@@ -238,10 +242,9 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
238242
invgrep -R --include="*.py" -P '# type: (?!ignore)' pandas
239243
RET=$(($RET + $?)) ; echo $MSG "DONE"
240244

241-
# https://github.com/python/mypy/issues/7384
242-
# MSG='Check for missing error codes with # type: ignore' ; echo $MSG
243-
# invgrep -R --include="*.py" -P '# type: ignore(?!\[)' pandas
244-
# RET=$(($RET + $?)) ; echo $MSG "DONE"
245+
MSG='Check for missing error codes with # type: ignore' ; echo $MSG
246+
invgrep -R --include="*.py" -P '# type:\s?ignore(?!\[)' pandas
247+
RET=$(($RET + $?)) ; echo $MSG "DONE"
245248

246249
MSG='Check for use of foo.__class__ instead of type(foo)' ; echo $MSG
247250
invgrep -R --include=*.{py,pyx} '\.__class__' pandas

doc/source/user_guide/missing_data.rst

-26
Original file line numberDiff line numberDiff line change
@@ -689,32 +689,6 @@ You can also operate on the DataFrame in place:
689689
690690
df.replace(1.5, np.nan, inplace=True)
691691
692-
.. warning::
693-
694-
When replacing multiple ``bool`` or ``datetime64`` objects, the first
695-
argument to ``replace`` (``to_replace``) must match the type of the value
696-
being replaced. For example,
697-
698-
.. code-block:: python
699-
700-
>>> s = pd.Series([True, False, True])
701-
>>> s.replace({'a string': 'new value', True: False}) # raises
702-
TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str'
703-
704-
will raise a ``TypeError`` because one of the ``dict`` keys is not of the
705-
correct type for replacement.
706-
707-
However, when replacing a *single* object such as,
708-
709-
.. ipython:: python
710-
711-
s = pd.Series([True, False, True])
712-
s.replace('a string', 'another string')
713-
714-
the original ``NDFrame`` object will be returned untouched. We're working on
715-
unifying this API, but for backwards compatibility reasons we cannot break
716-
the latter behavior. See :issue:`6354` for more details.
717-
718692
Missing data casting rules and indexing
719693
---------------------------------------
720694

doc/source/whatsnew/v1.2.0.rst

+4-1
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,8 @@ Performance improvements
214214

215215
Bug fixes
216216
~~~~~~~~~
217-
217+
- Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
218+
-
218219

219220
Categorical
220221
^^^^^^^^^^^
@@ -311,6 +312,7 @@ Groupby/resample/rolling
311312
- Bug in :meth:`DataFrameGroupby.apply` would drop a :class:`CategoricalIndex` when grouped on. (:issue:`35792`)
312313
- Bug when subsetting columns on a :class:`~pandas.core.groupby.DataFrameGroupBy` (e.g. ``df.groupby('a')[['b']])``) would reset the attributes ``axis``, ``dropna``, ``group_keys``, ``level``, ``mutated``, ``sort``, and ``squeeze`` to their default values. (:issue:`9959`)
313314
- Bug in :meth:`DataFrameGroupby.tshift` failing to raise ``ValueError`` when a frequency cannot be inferred for the index of a group (:issue:`35937`)
315+
- Bug in :meth:`DataFrame.groupby` does not always maintain column index name for ``any``, ``all``, ``bfill``, ``ffill``, ``shift`` (:issue:`29764`)
314316
-
315317

316318
Reshaping
@@ -337,6 +339,7 @@ ExtensionArray
337339
Other
338340
^^^^^
339341
- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly raising ``AssertionError`` instead of ``ValueError`` when invalid parameter combinations are passed (:issue:`36045`)
342+
- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` with numeric values and string ``to_replace`` (:issue:`34789`)
340343
-
341344

342345
.. ---------------------------------------------------------------------------

pandas/_typing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@
6262
# other
6363

6464
Dtype = Union[
65-
"ExtensionDtype", str, np.dtype, Type[Union[str, float, int, complex, bool]]
65+
"ExtensionDtype", str, np.dtype, Type[Union[str, float, int, complex, bool, object]]
6666
]
6767
DtypeObj = Union[np.dtype, "ExtensionDtype"]
6868
FilePathOrBuffer = Union[str, Path, IO[AnyStr], IOBase]

pandas/core/algorithms.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import operator
88
from textwrap import dedent
9-
from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union
9+
from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union, cast
1010
from warnings import catch_warnings, simplefilter, warn
1111

1212
import numpy as np
@@ -60,7 +60,7 @@
6060
from pandas.core.indexers import validate_indices
6161

6262
if TYPE_CHECKING:
63-
from pandas import DataFrame, Series
63+
from pandas import Categorical, DataFrame, Series
6464

6565
_shared_docs: Dict[str, str] = {}
6666

@@ -429,8 +429,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
429429
if is_categorical_dtype(comps):
430430
# TODO(extension)
431431
# handle categoricals
432-
# error: "ExtensionArray" has no attribute "isin" [attr-defined]
433-
return comps.isin(values) # type: ignore[attr-defined]
432+
return cast("Categorical", comps).isin(values)
434433

435434
comps, dtype = _ensure_data(comps)
436435
values, _ = _ensure_data(values, dtype=dtype)

pandas/core/array_algos/replace.py

+95
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
"""
2+
Methods used by Block.replace and related methods.
3+
"""
4+
import operator
5+
import re
6+
from typing import Optional, Pattern, Union
7+
8+
import numpy as np
9+
10+
from pandas._typing import ArrayLike, Scalar
11+
12+
from pandas.core.dtypes.common import (
13+
is_datetimelike_v_numeric,
14+
is_numeric_v_string_like,
15+
is_scalar,
16+
)
17+
from pandas.core.dtypes.missing import isna
18+
19+
20+
def compare_or_regex_search(
21+
a: ArrayLike,
22+
b: Union[Scalar, Pattern],
23+
regex: bool = False,
24+
mask: Optional[ArrayLike] = None,
25+
) -> Union[ArrayLike, bool]:
26+
"""
27+
Compare two array_like inputs of the same shape or two scalar values
28+
29+
Calls operator.eq or re.search, depending on regex argument. If regex is
30+
True, perform an element-wise regex matching.
31+
32+
Parameters
33+
----------
34+
a : array_like
35+
b : scalar or regex pattern
36+
regex : bool, default False
37+
mask : array_like or None (default)
38+
39+
Returns
40+
-------
41+
mask : array_like of bool
42+
"""
43+
44+
def _check_comparison_types(
45+
result: Union[ArrayLike, bool], a: ArrayLike, b: Union[Scalar, Pattern]
46+
):
47+
"""
48+
Raises an error if the two arrays (a,b) cannot be compared.
49+
Otherwise, returns the comparison result as expected.
50+
"""
51+
if is_scalar(result) and isinstance(a, np.ndarray):
52+
type_names = [type(a).__name__, type(b).__name__]
53+
54+
if isinstance(a, np.ndarray):
55+
type_names[0] = f"ndarray(dtype={a.dtype})"
56+
57+
raise TypeError(
58+
f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}"
59+
)
60+
61+
if not regex:
62+
op = lambda x: operator.eq(x, b)
63+
else:
64+
op = np.vectorize(
65+
lambda x: bool(re.search(b, x))
66+
if isinstance(x, str) and isinstance(b, (str, Pattern))
67+
else False
68+
)
69+
70+
# GH#32621 use mask to avoid comparing to NAs
71+
if mask is None and isinstance(a, np.ndarray) and not isinstance(b, np.ndarray):
72+
mask = np.reshape(~(isna(a)), a.shape)
73+
if isinstance(a, np.ndarray):
74+
a = a[mask]
75+
76+
if is_numeric_v_string_like(a, b):
77+
# GH#29553 avoid deprecation warnings from numpy
78+
return np.zeros(a.shape, dtype=bool)
79+
80+
elif is_datetimelike_v_numeric(a, b):
81+
# GH#29553 avoid deprecation warnings from numpy
82+
_check_comparison_types(False, a, b)
83+
return False
84+
85+
result = op(a)
86+
87+
if isinstance(result, np.ndarray) and mask is not None:
88+
# The shape of the mask can differ to that of the result
89+
# since we may compare only a subset of a's or b's elements
90+
tmp = np.zeros(mask.shape, dtype=np.bool_)
91+
tmp[mask] = result
92+
result = tmp
93+
94+
_check_comparison_types(result, a, b)
95+
return result

pandas/core/arrays/categorical.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,19 @@ class Categorical(NDArrayBackedExtensionArray, PandasObject):
280280
['a', 'b', 'c', 'a', 'b', 'c']
281281
Categories (3, object): ['a', 'b', 'c']
282282
283+
Missing values are not included as a category.
284+
285+
>>> c = pd.Categorical([1, 2, 3, 1, 2, 3, np.nan])
286+
>>> c
287+
[1, 2, 3, 1, 2, 3, NaN]
288+
Categories (3, int64): [1, 2, 3]
289+
290+
However, their presence is indicated in the `codes` attribute
291+
by code `-1`.
292+
293+
>>> c.codes
294+
array([ 0, 1, 2, 0, 1, 2, -1], dtype=int8)
295+
283296
Ordered `Categoricals` can be sorted according to the custom order
284297
of the categories and can have a min and max value.
285298
@@ -2316,7 +2329,7 @@ def _concat_same_type(self, to_concat):
23162329

23172330
return union_categoricals(to_concat)
23182331

2319-
def isin(self, values):
2332+
def isin(self, values) -> np.ndarray:
23202333
"""
23212334
Check whether `values` are contained in Categorical.
23222335

pandas/core/arrays/datetimelike.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -468,10 +468,9 @@ def _ndarray(self) -> np.ndarray:
468468

469469
def _from_backing_data(self: _T, arr: np.ndarray) -> _T:
470470
# Note: we do not retain `freq`
471+
# error: Too many arguments for "NDArrayBackedExtensionArray"
471472
# error: Unexpected keyword argument "dtype" for "NDArrayBackedExtensionArray"
472-
# TODO: add my error code
473-
# https://github.com/python/mypy/issues/7384
474-
return type(self)(arr, dtype=self.dtype) # type: ignore
473+
return type(self)(arr, dtype=self.dtype) # type: ignore[call-arg]
475474

476475
# ------------------------------------------------------------------
477476

pandas/core/construction.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -335,7 +335,7 @@ def array(
335335
return result
336336

337337

338-
def extract_array(obj, extract_numpy: bool = False):
338+
def extract_array(obj: AnyArrayLike, extract_numpy: bool = False) -> ArrayLike:
339339
"""
340340
Extract the ndarray or ExtensionArray from a Series or Index.
341341
@@ -383,7 +383,9 @@ def extract_array(obj, extract_numpy: bool = False):
383383
if extract_numpy and isinstance(obj, ABCPandasArray):
384384
obj = obj.to_numpy()
385385

386-
return obj
386+
# error: Incompatible return value type (got "Index", expected "ExtensionArray")
387+
# error: Incompatible return value type (got "Series", expected "ExtensionArray")
388+
return obj # type: ignore[return-value]
387389

388390

389391
def sanitize_array(

pandas/core/dtypes/cast.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1488,7 +1488,7 @@ def find_common_type(types: List[DtypeObj]) -> DtypeObj:
14881488
if has_bools:
14891489
for t in types:
14901490
if is_integer_dtype(t) or is_float_dtype(t) or is_complex_dtype(t):
1491-
return object
1491+
return np.dtype("object")
14921492

14931493
return np.find_common_type(types, [])
14941494

@@ -1550,7 +1550,7 @@ def construct_1d_arraylike_from_scalar(
15501550
elif isinstance(dtype, np.dtype) and dtype.kind in ("U", "S"):
15511551
# we need to coerce to object dtype to avoid
15521552
# to allow numpy to take our string as a scalar value
1553-
dtype = object
1553+
dtype = np.dtype("object")
15541554
if not isna(value):
15551555
value = ensure_str(value)
15561556

pandas/core/dtypes/dtypes.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,7 @@ def _hash_categories(categories, ordered: Ordered = True) -> int:
395395
from pandas.core.dtypes.common import DT64NS_DTYPE, is_datetime64tz_dtype
396396

397397
from pandas.core.util.hashing import (
398-
_combine_hash_arrays,
398+
combine_hash_arrays,
399399
hash_array,
400400
hash_tuples,
401401
)
@@ -427,7 +427,7 @@ def _hash_categories(categories, ordered: Ordered = True) -> int:
427427
)
428428
else:
429429
cat_array = [cat_array]
430-
hashed = _combine_hash_arrays(iter(cat_array), num_items=len(cat_array))
430+
hashed = combine_hash_arrays(iter(cat_array), num_items=len(cat_array))
431431
return np.bitwise_xor.reduce(hashed)
432432

433433
@classmethod

pandas/core/generic.py

-14
Original file line numberDiff line numberDiff line change
@@ -6561,20 +6561,6 @@ def replace(
65616561
1 new new
65626562
2 bait xyz
65636563
6564-
Note that when replacing multiple ``bool`` or ``datetime64`` objects,
6565-
the data types in the `to_replace` parameter must match the data
6566-
type of the value being replaced:
6567-
6568-
>>> df = pd.DataFrame({{'A': [True, False, True],
6569-
... 'B': [False, True, False]}})
6570-
>>> df.replace({{'a string': 'new value', True: False}}) # raises
6571-
Traceback (most recent call last):
6572-
...
6573-
TypeError: Cannot compare types 'ndarray(dtype=bool)' and 'str'
6574-
6575-
This raises a ``TypeError`` because one of the ``dict`` keys is not of
6576-
the correct type for replacement.
6577-
65786564
Compare the behavior of ``s.replace({{'a': None}})`` and
65796565
``s.replace('a', None)`` to understand the peculiarities
65806566
of the `to_replace` parameter:

pandas/core/groupby/categorical.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,10 @@ def recode_from_groupby(
9898
"""
9999
# we re-order to the original category orderings
100100
if sort:
101-
return ci.set_categories(c.categories) # type: ignore [attr-defined]
101+
# error: "CategoricalIndex" has no attribute "set_categories"
102+
return ci.set_categories(c.categories) # type: ignore[attr-defined]
102103

103104
# we are not sorting, so add unobserved to the end
104105
new_cats = c.categories[~c.categories.isin(ci.categories)]
105-
return ci.add_categories(new_cats) # type: ignore [attr-defined]
106+
# error: "CategoricalIndex" has no attribute "add_categories"
107+
return ci.add_categories(new_cats) # type: ignore[attr-defined]

pandas/core/groupby/generic.py

+2
Original file line numberDiff line numberDiff line change
@@ -1084,6 +1084,7 @@ def blk_func(bvalues: ArrayLike) -> ArrayLike:
10841084
assert how == "ohlc"
10851085
raise
10861086

1087+
# We get here with a) EADtypes and b) object dtype
10871088
obj: Union[Series, DataFrame]
10881089
# call our grouper again with only this block
10891090
if isinstance(bvalues, ExtensionArray):
@@ -1694,6 +1695,7 @@ def _wrap_transformed_output(
16941695
"""
16951696
indexed_output = {key.position: val for key, val in output.items()}
16961697
columns = Index(key.label for key in output)
1698+
columns.name = self.obj.columns.name
16971699

16981700
result = self.obj._constructor(indexed_output)
16991701
result.columns = columns

pandas/core/groupby/groupby.py

+2
Original file line numberDiff line numberDiff line change
@@ -1012,6 +1012,8 @@ def _agg_general(
10121012
# raised in _get_cython_function, in some cases can
10131013
# be trimmed by implementing cython funcs for more dtypes
10141014
pass
1015+
else:
1016+
raise
10151017

10161018
# apply a non-cython aggregation
10171019
result = self.aggregate(lambda x: npfunc(x, axis=self.axis))

0 commit comments

Comments
 (0)