Skip to content

Commit eb8bc34

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into ref-numeric-validate_fill_value
2 parents 631cf65 + 03b9ad8 commit eb8bc34

File tree

23 files changed

+757
-447
lines changed

23 files changed

+757
-447
lines changed

ci/deps/azure-39.yaml

+5
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,8 @@ dependencies:
1515
- numpy
1616
- python-dateutil
1717
- pytz
18+
19+
# optional dependencies
20+
- pytables
21+
- scipy
22+
- pyarrow=1.0

doc/source/user_guide/groupby.rst

+9
Original file line numberDiff line numberDiff line change
@@ -524,6 +524,15 @@ index are the group names and whose values are the sizes of each group.
524524
525525
grouped.describe()
526526
527+
Another aggregation example is to compute the number of unique values of each group. This is similar to the ``value_counts`` function, except that it only counts unique values.
528+
529+
.. ipython:: python
530+
531+
ll = [['foo', 1], ['foo', 2], ['foo', 2], ['bar', 1], ['bar', 1]]
532+
df4 = pd.DataFrame(ll, columns=["A", "B"])
533+
df4
534+
df4.groupby("A")["B"].nunique()
535+
527536
.. note::
528537

529538
Aggregation functions **will not** return the groups that you are aggregating over

doc/source/whatsnew/v1.1.5.rst

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ Bug fixes
2828
- Bug in metadata propagation for ``groupby`` iterator (:issue:`37343`)
2929
- Bug in indexing on a :class:`Series` with ``CategoricalDtype`` after unpickling (:issue:`37631`)
3030
- Bug in :class:`RollingGroupby` with the resulting :class:`MultiIndex` when grouping by a label that is in the index (:issue:`37641`)
31+
- Bug in pytables methods in python 3.9 (:issue:`38041`)
3132
-
3233

3334
.. ---------------------------------------------------------------------------

doc/source/whatsnew/v1.2.0.rst

+151-144
Large diffs are not rendered by default.

pandas/core/algorithms.py

+33-21
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@
4949
)
5050
from pandas.core.dtypes.generic import (
5151
ABCExtensionArray,
52-
ABCIndex,
5352
ABCIndexClass,
5453
ABCMultiIndex,
5554
ABCSeries,
@@ -69,7 +68,7 @@
6968
# dtype access #
7069
# --------------- #
7170
def _ensure_data(
72-
values, dtype: Optional[DtypeObj] = None
71+
values: ArrayLike, dtype: Optional[DtypeObj] = None
7372
) -> Tuple[np.ndarray, DtypeObj]:
7473
"""
7574
routine to ensure that our data is of the correct
@@ -95,6 +94,12 @@ def _ensure_data(
9594
pandas_dtype : np.dtype or ExtensionDtype
9695
"""
9796

97+
if dtype is not None:
98+
# We only have non-None dtype when called from `isin`, and
99+
# both Datetimelike and Categorical dispatch before getting here.
100+
assert not needs_i8_conversion(dtype)
101+
assert not is_categorical_dtype(dtype)
102+
98103
if not isinstance(values, ABCMultiIndex):
99104
# extract_array would raise
100105
values = extract_array(values, extract_numpy=True)
@@ -131,21 +136,20 @@ def _ensure_data(
131136
return ensure_object(values), np.dtype("object")
132137

133138
# datetimelike
134-
vals_dtype = getattr(values, "dtype", None)
135-
if needs_i8_conversion(vals_dtype) or needs_i8_conversion(dtype):
136-
if is_period_dtype(vals_dtype) or is_period_dtype(dtype):
139+
if needs_i8_conversion(values.dtype) or needs_i8_conversion(dtype):
140+
if is_period_dtype(values.dtype) or is_period_dtype(dtype):
137141
from pandas import PeriodIndex
138142

139-
values = PeriodIndex(values)
143+
values = PeriodIndex(values)._data
140144
dtype = values.dtype
141-
elif is_timedelta64_dtype(vals_dtype) or is_timedelta64_dtype(dtype):
145+
elif is_timedelta64_dtype(values.dtype) or is_timedelta64_dtype(dtype):
142146
from pandas import TimedeltaIndex
143147

144-
values = TimedeltaIndex(values)
148+
values = TimedeltaIndex(values)._data
145149
dtype = values.dtype
146150
else:
147151
# Datetime
148-
if values.ndim > 1 and is_datetime64_ns_dtype(vals_dtype):
152+
if values.ndim > 1 and is_datetime64_ns_dtype(values.dtype):
149153
# Avoid calling the DatetimeIndex constructor as it is 1D only
150154
# Note: this is reached by DataFrame.rank calls GH#27027
151155
# TODO(EA2D): special case not needed with 2D EAs
@@ -155,12 +159,12 @@ def _ensure_data(
155159

156160
from pandas import DatetimeIndex
157161

158-
values = DatetimeIndex(values)
162+
values = DatetimeIndex(values)._data
159163
dtype = values.dtype
160164

161165
return values.asi8, dtype
162166

163-
elif is_categorical_dtype(vals_dtype) and (
167+
elif is_categorical_dtype(values.dtype) and (
164168
is_categorical_dtype(dtype) or dtype is None
165169
):
166170
values = values.codes
@@ -237,11 +241,11 @@ def _ensure_arraylike(values):
237241
}
238242

239243

240-
def _get_hashtable_algo(values):
244+
def _get_hashtable_algo(values: np.ndarray):
241245
"""
242246
Parameters
243247
----------
244-
values : arraylike
248+
values : np.ndarray
245249
246250
Returns
247251
-------
@@ -255,15 +259,15 @@ def _get_hashtable_algo(values):
255259
return htable, values
256260

257261

258-
def _get_values_for_rank(values):
262+
def _get_values_for_rank(values: ArrayLike):
259263
if is_categorical_dtype(values):
260-
values = values._values_for_rank()
264+
values = cast("Categorical", values)._values_for_rank()
261265

262266
values, _ = _ensure_data(values)
263267
return values
264268

265269

266-
def get_data_algo(values):
270+
def get_data_algo(values: ArrayLike):
267271
values = _get_values_for_rank(values)
268272

269273
ndtype = _check_object_for_strings(values)
@@ -421,20 +425,28 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
421425
f"to isin(), you passed a [{type(values).__name__}]"
422426
)
423427

424-
if not isinstance(values, (ABCIndex, ABCSeries, ABCExtensionArray, np.ndarray)):
428+
if not isinstance(
429+
values, (ABCIndexClass, ABCSeries, ABCExtensionArray, np.ndarray)
430+
):
425431
values = construct_1d_object_array_from_listlike(list(values))
426432
# TODO: could use ensure_arraylike here
433+
elif isinstance(values, ABCMultiIndex):
434+
# Avoid raising in extract_array
435+
values = np.array(values)
427436

428437
comps = _ensure_arraylike(comps)
429438
comps = extract_array(comps, extract_numpy=True)
430-
if is_categorical_dtype(comps):
439+
if is_categorical_dtype(comps.dtype):
431440
# TODO(extension)
432441
# handle categoricals
433442
return cast("Categorical", comps).isin(values)
434443

435-
if needs_i8_conversion(comps):
444+
if needs_i8_conversion(comps.dtype):
436445
# Dispatch to DatetimeLikeArrayMixin.isin
437446
return array(comps).isin(values)
447+
elif needs_i8_conversion(values.dtype) and not is_object_dtype(comps.dtype):
448+
# e.g. comps are integers and values are datetime64s
449+
return np.zeros(comps.shape, dtype=bool)
438450

439451
comps, dtype = _ensure_data(comps)
440452
values, _ = _ensure_data(values, dtype=dtype)
@@ -474,7 +486,7 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
474486

475487

476488
def factorize_array(
477-
values, na_sentinel: int = -1, size_hint=None, na_value=None, mask=None
489+
values: np.ndarray, na_sentinel: int = -1, size_hint=None, na_value=None, mask=None
478490
) -> Tuple[np.ndarray, np.ndarray]:
479491
"""
480492
Factorize an array-like to codes and uniques.
@@ -838,7 +850,7 @@ def value_counts_arraylike(values, dropna: bool):
838850
return keys, counts
839851

840852

841-
def duplicated(values, keep="first") -> np.ndarray:
853+
def duplicated(values: ArrayLike, keep: str = "first") -> np.ndarray:
842854
"""
843855
Return boolean ndarray denoting duplicate values.
844856

pandas/core/arraylike.py

+143-1
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,15 @@
55
ExtensionArray
66
"""
77
import operator
8+
from typing import Any, Callable
9+
import warnings
810

9-
from pandas.core.ops import roperator
11+
import numpy as np
12+
13+
from pandas._libs import lib
14+
15+
from pandas.core.construction import extract_array
16+
from pandas.core.ops import maybe_dispatch_ufunc_to_dunder_op, roperator
1017
from pandas.core.ops.common import unpack_zerodim_and_defer
1118

1219

@@ -140,3 +147,138 @@ def __pow__(self, other):
140147
@unpack_zerodim_and_defer("__rpow__")
141148
def __rpow__(self, other):
142149
return self._arith_method(other, roperator.rpow)
150+
151+
152+
def array_ufunc(self, ufunc: Callable, method: str, *inputs: Any, **kwargs: Any):
153+
"""
154+
Compatibility with numpy ufuncs.
155+
156+
See also
157+
--------
158+
numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__
159+
"""
160+
from pandas.core.generic import NDFrame
161+
from pandas.core.internals import BlockManager
162+
163+
cls = type(self)
164+
165+
# for binary ops, use our custom dunder methods
166+
result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs)
167+
if result is not NotImplemented:
168+
return result
169+
170+
# Determine if we should defer.
171+
no_defer = (np.ndarray.__array_ufunc__, cls.__array_ufunc__)
172+
173+
for item in inputs:
174+
higher_priority = (
175+
hasattr(item, "__array_priority__")
176+
and item.__array_priority__ > self.__array_priority__
177+
)
178+
has_array_ufunc = (
179+
hasattr(item, "__array_ufunc__")
180+
and type(item).__array_ufunc__ not in no_defer
181+
and not isinstance(item, self._HANDLED_TYPES)
182+
)
183+
if higher_priority or has_array_ufunc:
184+
return NotImplemented
185+
186+
# align all the inputs.
187+
types = tuple(type(x) for x in inputs)
188+
alignable = [x for x, t in zip(inputs, types) if issubclass(t, NDFrame)]
189+
190+
if len(alignable) > 1:
191+
# This triggers alignment.
192+
# At the moment, there aren't any ufuncs with more than two inputs
193+
# so this ends up just being x1.index | x2.index, but we write
194+
# it to handle *args.
195+
196+
if len(set(types)) > 1:
197+
# We currently don't handle ufunc(DataFrame, Series)
198+
# well. Previously this raised an internal ValueError. We might
199+
# support it someday, so raise a NotImplementedError.
200+
raise NotImplementedError(
201+
"Cannot apply ufunc {} to mixed DataFrame and Series "
202+
"inputs.".format(ufunc)
203+
)
204+
axes = self.axes
205+
for obj in alignable[1:]:
206+
# this relies on the fact that we aren't handling mixed
207+
# series / frame ufuncs.
208+
for i, (ax1, ax2) in enumerate(zip(axes, obj.axes)):
209+
axes[i] = ax1.union(ax2)
210+
211+
reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes))
212+
inputs = tuple(
213+
x.reindex(**reconstruct_axes) if issubclass(t, NDFrame) else x
214+
for x, t in zip(inputs, types)
215+
)
216+
else:
217+
reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes))
218+
219+
if self.ndim == 1:
220+
names = [getattr(x, "name") for x in inputs if hasattr(x, "name")]
221+
name = names[0] if len(set(names)) == 1 else None
222+
reconstruct_kwargs = {"name": name}
223+
else:
224+
reconstruct_kwargs = {}
225+
226+
def reconstruct(result):
227+
if lib.is_scalar(result):
228+
return result
229+
if result.ndim != self.ndim:
230+
if method == "outer":
231+
if self.ndim == 2:
232+
# we already deprecated for Series
233+
msg = (
234+
"outer method for ufunc {} is not implemented on "
235+
"pandas objects. Returning an ndarray, but in the "
236+
"future this will raise a 'NotImplementedError'. "
237+
"Consider explicitly converting the DataFrame "
238+
"to an array with '.to_numpy()' first."
239+
)
240+
warnings.warn(msg.format(ufunc), FutureWarning, stacklevel=4)
241+
return result
242+
raise NotImplementedError
243+
return result
244+
if isinstance(result, BlockManager):
245+
# we went through BlockManager.apply
246+
result = self._constructor(result, **reconstruct_kwargs, copy=False)
247+
else:
248+
# we converted an array, lost our axes
249+
result = self._constructor(
250+
result, **reconstruct_axes, **reconstruct_kwargs, copy=False
251+
)
252+
# TODO: When we support multiple values in __finalize__, this
253+
# should pass alignable to `__fianlize__` instead of self.
254+
# Then `np.add(a, b)` would consider attrs from both a and b
255+
# when a and b are NDFrames.
256+
if len(alignable) == 1:
257+
result = result.__finalize__(self)
258+
return result
259+
260+
if self.ndim > 1 and (
261+
len(inputs) > 1 or ufunc.nout > 1 # type: ignore[attr-defined]
262+
):
263+
# Just give up on preserving types in the complex case.
264+
# In theory we could preserve them for them.
265+
# * nout>1 is doable if BlockManager.apply took nout and
266+
# returned a Tuple[BlockManager].
267+
# * len(inputs) > 1 is doable when we know that we have
268+
# aligned blocks / dtypes.
269+
inputs = tuple(np.asarray(x) for x in inputs)
270+
result = getattr(ufunc, method)(*inputs)
271+
elif self.ndim == 1:
272+
# ufunc(series, ...)
273+
inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs)
274+
result = getattr(ufunc, method)(*inputs, **kwargs)
275+
else:
276+
# ufunc(dataframe)
277+
mgr = inputs[0]._mgr
278+
result = mgr.apply(getattr(ufunc, method))
279+
280+
if ufunc.nout > 1: # type: ignore[attr-defined]
281+
result = tuple(reconstruct(x) for x in result)
282+
else:
283+
result = reconstruct(result)
284+
return result

pandas/core/computation/pytables.py

+4
Original file line numberDiff line numberDiff line change
@@ -430,6 +430,10 @@ def visit_Subscript(self, node, **kwargs):
430430
except AttributeError:
431431
pass
432432

433+
if isinstance(slobj, Term):
434+
# In py39 np.ndarray lookups with Term containing int raise
435+
slobj = slobj.value
436+
433437
try:
434438
return self.const_type(value[slobj], self.env)
435439
except TypeError as err:

0 commit comments

Comments
 (0)