Skip to content

Commit 79741d1

Browse files
committed
Merge branch 'master' of github.com:pandas-dev/pandas
2 parents ecad7f9 + b4e9566 commit 79741d1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+422
-292
lines changed

asv_bench/benchmarks/groupby.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -480,7 +480,19 @@ class GroupByCythonAgg:
480480
param_names = ["dtype", "method"]
481481
params = [
482482
["float64"],
483-
["sum", "prod", "min", "max", "mean", "median", "var", "first", "last"],
483+
[
484+
"sum",
485+
"prod",
486+
"min",
487+
"max",
488+
"mean",
489+
"median",
490+
"var",
491+
"first",
492+
"last",
493+
"any",
494+
"all",
495+
],
484496
]
485497

486498
def setup(self, dtype, method):

ci/azure/windows.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ jobs:
88
vmImage: ${{ parameters.vmImage }}
99
strategy:
1010
matrix:
11-
py37_np16:
11+
py37_np17:
1212
ENV_FILE: ci/deps/azure-windows-37.yaml
1313
CONDA_PY: "37"
1414
PATTERN: "not slow and not network"

ci/deps/actions-37-minimum_versions.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ dependencies:
1818
- jinja2=2.10
1919
- numba=0.46.0
2020
- numexpr=2.6.8
21-
- numpy=1.16.5
21+
- numpy=1.17.3
2222
- openpyxl=3.0.0
2323
- pytables=3.5.1
2424
- python-dateutil=2.7.3

ci/deps/azure-macos-37.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ dependencies:
1919
- matplotlib=2.2.3
2020
- nomkl
2121
- numexpr
22-
- numpy=1.16.5
22+
- numpy=1.17.3
2323
- openpyxl
2424
- pyarrow=0.15.1
2525
- pytables

ci/deps/azure-windows-37.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ dependencies:
2424
- moto>=1.3.14
2525
- flask
2626
- numexpr
27-
- numpy=1.16.*
27+
- numpy=1.17.*
2828
- openpyxl
2929
- pyarrow=0.15
3030
- pytables

doc/source/getting_started/install.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -222,7 +222,7 @@ Dependencies
222222
Package Minimum supported version
223223
================================================================ ==========================
224224
`setuptools <https://setuptools.readthedocs.io/en/latest/>`__ 38.6.0
225-
`NumPy <https://numpy.org>`__ 1.16.5
225+
`NumPy <https://numpy.org>`__ 1.17.3
226226
`python-dateutil <https://dateutil.readthedocs.io/en/stable/>`__ 2.7.3
227227
`pytz <https://pypi.org/project/pytz/>`__ 2017.3
228228
================================================================ ==========================

doc/source/whatsnew/v1.3.0.rst

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,9 @@ Other enhancements
217217
- :class:`RangeIndex` can now be constructed by passing a ``range`` object directly e.g. ``pd.RangeIndex(range(3))`` (:issue:`12067`)
218218
- :meth:`round` being enabled for the nullable integer and floating dtypes (:issue:`38844`)
219219
- :meth:`pandas.read_csv` and :meth:`pandas.read_json` expose the argument ``encoding_errors`` to control how encoding errors are handled (:issue:`39450`)
220+
- :meth:`.GroupBy.any` and :meth:`.GroupBy.all` use Kleene logic with nullable data types (:issue:`37506`)
221+
- :meth:`.GroupBy.any` and :meth:`.GroupBy.all` return a ``BooleanDtype`` for columns with nullable data types (:issue:`33449`)
222+
-
220223

221224
.. ---------------------------------------------------------------------------
222225
@@ -465,7 +468,7 @@ If installed, we now require:
465468
+-----------------+-----------------+----------+---------+
466469
| Package | Minimum Version | Required | Changed |
467470
+=================+=================+==========+=========+
468-
| numpy | 1.16.5 | X | |
471+
| numpy | 1.17.3 | X | X |
469472
+-----------------+-----------------+----------+---------+
470473
| pytz | 2017.3 | X | |
471474
+-----------------+-----------------+----------+---------+
@@ -711,7 +714,7 @@ Missing
711714

712715
- Bug in :class:`Grouper` now correctly propagates ``dropna`` argument and :meth:`DataFrameGroupBy.transform` now correctly handles missing values for ``dropna=True`` (:issue:`35612`)
713716
- Bug in :func:`isna`, and :meth:`Series.isna`, :meth:`Index.isna`, :meth:`DataFrame.isna` (and the corresponding ``notna`` functions) not recognizing ``Decimal("NaN")`` objects (:issue:`39409`)
714-
-
717+
- Bug in :meth:`DataFrame.fillna` not accepting dictionary for ``downcast`` keyword (:issue:`40809`)
715718

716719
MultiIndex
717720
^^^^^^^^^^
@@ -787,6 +790,8 @@ Groupby/resample/rolling
787790
- Bug in :meth:`Series.asfreq` and :meth:`DataFrame.asfreq` dropping rows when the index is not sorted (:issue:`39805`)
788791
- Bug in aggregation functions for :class:`DataFrame` not respecting ``numeric_only`` argument when ``level`` keyword was given (:issue:`40660`)
789792
- Bug in :class:`core.window.RollingGroupby` where ``as_index=False`` argument in ``groupby`` was ignored (:issue:`39433`)
793+
- Bug in :meth:`.GroupBy.any` and :meth:`.GroupBy.all` raising ``ValueError`` when using with nullable type columns holding ``NA`` even with ``skipna=True`` (:issue:`40585`)
794+
790795

791796
Reshaping
792797
^^^^^^^^^

environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ channels:
33
- conda-forge
44
dependencies:
55
# required
6-
- numpy>=1.16.5
6+
- numpy>=1.17.3
77
- python=3
88
- python-dateutil>=2.7.3
99
- pytz

pandas/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020

2121
# numpy compat
2222
from pandas.compat import (
23-
np_version_under1p17 as _np_version_under1p17,
2423
np_version_under1p18 as _np_version_under1p18,
2524
is_numpy_dev as _is_numpy_dev,
2625
)

pandas/_libs/groupby.pyx

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -388,40 +388,47 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[intp_t] labels,
388388

389389
@cython.boundscheck(False)
390390
@cython.wraparound(False)
391-
def group_any_all(uint8_t[::1] out,
392-
const uint8_t[::1] values,
391+
def group_any_all(int8_t[::1] out,
392+
const int8_t[::1] values,
393393
const intp_t[:] labels,
394394
const uint8_t[::1] mask,
395395
str val_test,
396-
bint skipna) -> None:
396+
bint skipna,
397+
bint nullable) -> None:
397398
"""
398-
Aggregated boolean values to show truthfulness of group elements.
399+
Aggregated boolean values to show truthfulness of group elements. If the
400+
input is a nullable type (nullable=True), the result will be computed
401+
using Kleene logic.
399402

400403
Parameters
401404
----------
402-
out : np.ndarray[np.uint8]
405+
out : np.ndarray[np.int8]
403406
Values into which this method will write its results.
404407
labels : np.ndarray[np.intp]
405408
Array containing unique label for each group, with its
406409
ordering matching up to the corresponding record in `values`
407-
values : np.ndarray[np.uint8]
410+
values : np.ndarray[np.int8]
408411
Containing the truth value of each element.
409412
mask : np.ndarray[np.uint8]
410413
Indicating whether a value is na or not.
411414
val_test : {'any', 'all'}
412415
String object dictating whether to use any or all truth testing
413416
skipna : bool
414417
Flag to ignore nan values during truth testing
418+
nullable : bool
419+
Whether or not the input is a nullable type. If True, the
420+
result will be computed using Kleene logic
415421

416422
Notes
417423
-----
418424
This method modifies the `out` parameter rather than returning an object.
419-
The returned values will either be 0 or 1 (False or True, respectively).
425+
The returned values will either be 0, 1 (False or True, respectively), or
426+
-1 to signify a masked position in the case of a nullable input.
420427
"""
421428
cdef:
422429
Py_ssize_t i, N = len(labels)
423430
intp_t lab
424-
uint8_t flag_val
431+
int8_t flag_val
425432

426433
if val_test == 'all':
427434
# Because the 'all' value of an empty iterable in Python is True we can
@@ -444,6 +451,16 @@ def group_any_all(uint8_t[::1] out,
444451
if lab < 0 or (skipna and mask[i]):
445452
continue
446453

454+
if nullable and mask[i]:
455+
# Set the position as masked if `out[lab] != flag_val`, which
456+
# would indicate True/False has not yet been seen for any/all,
457+
# so by Kleene logic the result is currently unknown
458+
if out[lab] != flag_val:
459+
out[lab] = -1
460+
continue
461+
462+
# If True and 'any' or False and 'all', the result is
463+
# already determined
447464
if values[i] == flag_val:
448465
out[lab] = flag_val
449466

pandas/compat/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
is_numpy_dev,
1717
np_array_datetime64_compat,
1818
np_datetime64_compat,
19-
np_version_under1p17,
2019
np_version_under1p18,
2120
np_version_under1p19,
2221
np_version_under1p20,
@@ -133,7 +132,6 @@ def get_lzma_file(lzma):
133132
"is_numpy_dev",
134133
"np_array_datetime64_compat",
135134
"np_datetime64_compat",
136-
"np_version_under1p17",
137135
"np_version_under1p18",
138136
"np_version_under1p19",
139137
"np_version_under1p20",

pandas/compat/numpy/__init__.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,11 @@
88
# numpy versioning
99
_np_version = np.__version__
1010
_nlv = LooseVersion(_np_version)
11-
np_version_under1p17 = _nlv < LooseVersion("1.17")
1211
np_version_under1p18 = _nlv < LooseVersion("1.18")
1312
np_version_under1p19 = _nlv < LooseVersion("1.19")
1413
np_version_under1p20 = _nlv < LooseVersion("1.20")
1514
is_numpy_dev = ".dev" in str(_nlv)
16-
_min_numpy_ver = "1.16.5"
15+
_min_numpy_ver = "1.17.3"
1716

1817

1918
if _nlv < _min_numpy_ver:
@@ -65,6 +64,5 @@ def np_array_datetime64_compat(arr, dtype="M8[ns]"):
6564
__all__ = [
6665
"np",
6766
"_np_version",
68-
"np_version_under1p17",
6967
"is_numpy_dev",
7068
]

pandas/core/array_algos/masked_reductions.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
import numpy as np
99

1010
from pandas._libs import missing as libmissing
11-
from pandas.compat import np_version_under1p17
1211

1312
from pandas.core.nanops import check_below_min_count
1413

@@ -46,11 +45,7 @@ def _sumprod(
4645
else:
4746
if check_below_min_count(values.shape, mask, min_count):
4847
return libmissing.NA
49-
50-
if np_version_under1p17:
51-
return func(values[~mask])
52-
else:
53-
return func(values, where=~mask)
48+
return func(values, where=~mask)
5449

5550

5651
def sum(

pandas/core/arrays/datetimes.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps):
190190
_infer_matches = ("datetime", "datetime64", "date")
191191

192192
# define my properties & methods for delegation
193-
_bool_ops = [
193+
_bool_ops: list[str] = [
194194
"is_month_start",
195195
"is_month_end",
196196
"is_quarter_start",
@@ -199,8 +199,8 @@ class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps):
199199
"is_year_end",
200200
"is_leap_year",
201201
]
202-
_object_ops = ["freq", "tz"]
203-
_field_ops = [
202+
_object_ops: list[str] = ["freq", "tz"]
203+
_field_ops: list[str] = [
204204
"year",
205205
"month",
206206
"day",
@@ -220,9 +220,9 @@ class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps):
220220
"microsecond",
221221
"nanosecond",
222222
]
223-
_other_ops = ["date", "time", "timetz"]
224-
_datetimelike_ops = _field_ops + _object_ops + _bool_ops + _other_ops
225-
_datetimelike_methods = [
223+
_other_ops: list[str] = ["date", "time", "timetz"]
224+
_datetimelike_ops: list[str] = _field_ops + _object_ops + _bool_ops + _other_ops
225+
_datetimelike_methods: list[str] = [
226226
"to_period",
227227
"tz_localize",
228228
"tz_convert",

pandas/core/arrays/interval.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@
8787

8888
IntervalArrayT = TypeVar("IntervalArrayT", bound="IntervalArray")
8989

90-
_interval_shared_docs = {}
90+
_interval_shared_docs: dict[str, str] = {}
9191

9292
_shared_docs_kwargs = {
9393
"klass": "IntervalArray",

pandas/core/arrays/period.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -157,9 +157,9 @@ class PeriodArray(PeriodMixin, dtl.DatelikeOps):
157157

158158
# Names others delegate to us
159159
_other_ops: list[str] = []
160-
_bool_ops = ["is_leap_year"]
161-
_object_ops = ["start_time", "end_time", "freq"]
162-
_field_ops = [
160+
_bool_ops: list[str] = ["is_leap_year"]
161+
_object_ops: list[str] = ["start_time", "end_time", "freq"]
162+
_field_ops: list[str] = [
163163
"year",
164164
"month",
165165
"day",

pandas/core/generic.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1797,9 +1797,9 @@ def _drop_labels_or_levels(self, keys, axis: int = 0):
17971797
17981798
Parameters
17991799
----------
1800-
keys: str or list of str
1800+
keys : str or list of str
18011801
labels or levels to drop
1802-
axis: int, default 0
1802+
axis : int, default 0
18031803
Axis that levels are associated with (0 for index, 1 for columns)
18041804
18051805
Returns
@@ -6446,11 +6446,13 @@ def fillna(
64466446
)
64476447

64486448
result = self if inplace else self.copy()
6449+
is_dict = isinstance(downcast, dict)
64496450
for k, v in value.items():
64506451
if k not in result:
64516452
continue
64526453
obj = result[k]
6453-
obj.fillna(v, limit=limit, inplace=True, downcast=downcast)
6454+
downcast_k = downcast if not is_dict else downcast.get(k)
6455+
obj.fillna(v, limit=limit, inplace=True, downcast=downcast_k)
64546456
return result if not inplace else None
64556457

64566458
elif not is_list_like(value):

pandas/core/groupby/base.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33
hold the allowlist of methods that are exposed on the
44
SeriesGroupBy and the DataFrameGroupBy objects.
55
"""
6+
from __future__ import annotations
7+
68
import collections
7-
from typing import List
89

910
from pandas._typing import final
1011

@@ -19,7 +20,7 @@
1920

2021

2122
class ShallowMixin(PandasObject):
22-
_attributes: List[str] = []
23+
_attributes: list[str] = []
2324

2425
@final
2526
def _shallow_copy(self, obj, **kwargs):
@@ -39,7 +40,7 @@ class GotItemMixin(PandasObject):
3940
Provide the groupby facilities to the mixed object.
4041
"""
4142

42-
_attributes: List[str]
43+
_attributes: list[str]
4344

4445
@final
4546
def _gotitem(self, key, ndim, subset=None):
@@ -106,12 +107,16 @@ def _gotitem(self, key, ndim, subset=None):
106107
| plotting_methods
107108
)
108109

109-
series_apply_allowlist = (
110+
series_apply_allowlist: frozenset[str] = (
110111
common_apply_allowlist
111-
| {"nlargest", "nsmallest", "is_monotonic_increasing", "is_monotonic_decreasing"}
112+
| frozenset(
113+
{"nlargest", "nsmallest", "is_monotonic_increasing", "is_monotonic_decreasing"}
114+
)
112115
) | frozenset(["dtype", "unique"])
113116

114-
dataframe_apply_allowlist = common_apply_allowlist | frozenset(["dtypes", "corrwith"])
117+
dataframe_apply_allowlist: frozenset[str] = common_apply_allowlist | frozenset(
118+
["dtypes", "corrwith"]
119+
)
115120

116121
# cythonized transformations or canned "agg+broadcast", which do not
117122
# require postprocessing of the result by transform.

0 commit comments

Comments
 (0)