Skip to content

Commit de71573

Browse files
committed
Merge remote-tracking branch 'upstream/master' into typ_c_parser
2 parents 9979f41 + 4bacee5 commit de71573

31 files changed

+332
-228
lines changed

doc/source/whatsnew/v0.10.0.rst

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ labeled the aggregated group with the end of the interval: the next day).
181181
``X0``, ``X1``, ...) can be reproduced by specifying ``prefix='X'``:
182182

183183
.. ipython:: python
184+
:okwarning:
184185
185186
import io
186187
@@ -197,11 +198,25 @@ labeled the aggregated group with the end of the interval: the next day).
197198
though this can be controlled by new ``true_values`` and ``false_values``
198199
arguments:
199200

200-
.. ipython:: python
201+
.. code-block:: ipython
201202
202-
print(data)
203-
pd.read_csv(io.StringIO(data))
204-
pd.read_csv(io.StringIO(data), true_values=["Yes"], false_values=["No"])
203+
In [4]: print(data)
204+
205+
a,b,c
206+
1,Yes,2
207+
3,No,4
208+
209+
In [5]: pd.read_csv(io.StringIO(data))
210+
Out[5]:
211+
a b c
212+
0 1 Yes 2
213+
1 3 No 4
214+
215+
In [6]: pd.read_csv(io.StringIO(data), true_values=["Yes"], false_values=["No"])
216+
Out[6]:
217+
a b c
218+
0 1 True 2
219+
1 3 False 4
205220
206221
- The file parsers will not recognize non-string values arising from a
207222
converter function as NA if passed in the ``na_values`` argument. It's better

doc/source/whatsnew/v1.4.0.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,14 @@ Previously, negative arguments returned empty frames.
164164
df.groupby("A").nth(slice(1, -1))
165165
df.groupby("A").nth([slice(None, 1), slice(-1, None)])
166166
167+
:meth:`.GroupBy.nth` now accepts index notation.
168+
169+
.. ipython:: python
170+
171+
df.groupby("A").nth[1, -1]
172+
df.groupby("A").nth[1:-1]
173+
df.groupby("A").nth[:1, -1:]
174+
167175
.. _whatsnew_140.dict_tight:
168176

169177
DataFrame.from_dict and DataFrame.to_dict have new ``'tight'`` option
@@ -804,6 +812,8 @@ ExtensionArray
804812
- Avoid raising ``PerformanceWarning`` about fragmented DataFrame when using many columns with an extension dtype (:issue:`44098`)
805813
- Bug in :class:`IntegerArray` and :class:`FloatingArray` construction incorrectly coercing mismatched NA values (e.g. ``np.timedelta64("NaT")``) to numeric NA (:issue:`44514`)
806814
- Bug in :meth:`BooleanArray.__eq__` and :meth:`BooleanArray.__ne__` raising ``TypeError`` on comparison with an incompatible type (like a string). This caused :meth:`DataFrame.replace` to sometimes raise a ``TypeError`` if a nullable boolean column was included (:issue:`44499`)
815+
- Bug in :func:`array` incorrectly raising when passed a ``ndarray`` with ``float16`` dtype (:issue:`44715`)
816+
- Bug in calling ``np.sqrt`` on :class:`BooleanArray` returning a malformed :class:`FloatingArray` (:issue:`44715`)
807817
- Bug in :meth:`Series.where` with ``ExtensionDtype`` when ``other`` is a NA scalar incompatible with the series dtype (e.g. ``NaT`` with a numeric dtype) incorrectly casting to a compatible NA value (:issue:`44697`)
808818
-
809819

pandas/core/arrays/_mixins.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -339,14 +339,6 @@ def fillna(
339339
# ------------------------------------------------------------------------
340340
# Reductions
341341

342-
def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
343-
meth = getattr(self, name, None)
344-
if meth:
345-
return meth(skipna=skipna, **kwargs)
346-
else:
347-
msg = f"'{type(self).__name__}' does not implement reduction '{name}'"
348-
raise TypeError(msg)
349-
350342
def _wrap_reduction_result(self, axis: int | None, result):
351343
if axis is None or self.ndim == 1:
352344
return self._box_func(result)

pandas/core/arrays/base.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1352,7 +1352,13 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
13521352
------
13531353
TypeError : subclass does not define reductions
13541354
"""
1355-
raise TypeError(f"cannot perform {name} with type {self.dtype}")
1355+
meth = getattr(self, name, None)
1356+
if meth is None:
1357+
raise TypeError(
1358+
f"'{type(self).__name__}' with dtype {self.dtype} "
1359+
f"does not support reduction '{name}'"
1360+
)
1361+
return meth(skipna=skipna, **kwargs)
13561362

13571363
# https://github.com/python/typeshed/issues/2148#issuecomment-520783318
13581364
# Incompatible types in assignment (expression has type "None", base class

pandas/core/arrays/categorical.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -940,7 +940,7 @@ def set_categories(
940940
"a future version. Removing unused categories will always "
941941
"return a new Categorical object.",
942942
FutureWarning,
943-
stacklevel=find_stack_level(),
943+
stacklevel=2,
944944
)
945945
else:
946946
inplace = False

pandas/core/arrays/floating.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,10 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
254254
"values should be floating numpy array. Use "
255255
"the 'pd.array' function instead"
256256
)
257+
if values.dtype == np.float16:
258+
# If we don't raise here, then accessing self.dtype would raise
259+
raise TypeError("FloatingArray does not support np.float16 dtype.")
260+
257261
super().__init__(values, mask, copy=copy)
258262

259263
@classmethod

pandas/core/arrays/masked.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,10 @@ def reconstruct(x):
466466
return IntegerArray(x, m)
467467
elif is_float_dtype(x.dtype):
468468
m = mask.copy()
469+
if x.dtype == np.float16:
470+
# reached in e.g. np.sqrt on BooleanArray
471+
# we don't support float16
472+
x = x.astype(np.float32)
469473
return FloatingArray(x, m)
470474
else:
471475
x[mask] = np.nan

pandas/core/arrays/sparse/array.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1363,13 +1363,6 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
13631363
else:
13641364
arr = self.dropna()
13651365

1366-
# we don't support these kwargs.
1367-
# They should only be present when called via pandas, so do it here.
1368-
# instead of in `any` / `all` (which will raise if they're present,
1369-
# thanks to nv.validate
1370-
kwargs.pop("filter_type", None)
1371-
kwargs.pop("numeric_only", None)
1372-
kwargs.pop("op", None)
13731366
return getattr(arr, name)(**kwargs)
13741367

13751368
def all(self, axis=None, *args, **kwargs):

pandas/core/arrays/string_arrow.py

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -335,12 +335,6 @@ def _as_pandas_scalar(self, arrow_scalar: pa.Scalar):
335335
else:
336336
return scalar
337337

338-
def _reduce(self, name: str, skipna: bool = True, **kwargs):
339-
if name in ["min", "max"]:
340-
return getattr(self, name)(skipna=skipna)
341-
342-
raise TypeError(f"Cannot perform reduction '{name}' with string dtype")
343-
344338
@property
345339
def nbytes(self) -> int:
346340
"""

pandas/core/construction.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,12 @@ def array(
353353
elif inferred_dtype == "integer":
354354
return IntegerArray._from_sequence(data, copy=copy)
355355

356-
elif inferred_dtype in ("floating", "mixed-integer-float"):
356+
elif (
357+
inferred_dtype in ("floating", "mixed-integer-float")
358+
and getattr(data, "dtype", None) != np.float16
359+
):
360+
# GH#44715 Exclude np.float16 bc FloatingArray does not support it;
361+
# we will fall back to PandasArray.
357362
return FloatingArray._from_sequence(data, copy=copy)
358363

359364
elif inferred_dtype == "boolean":

pandas/core/frame.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9183,6 +9183,29 @@ def join(
91839183
3 K3 A3 NaN
91849184
4 K4 A4 NaN
91859185
5 K5 A5 NaN
9186+
9187+
Using non-unique key values shows how they are matched.
9188+
9189+
>>> df = pd.DataFrame({'key': ['K0', 'K1', 'K1', 'K3', 'K0', 'K1'],
9190+
... 'A': ['A0', 'A1', 'A2', 'A3', 'A4', 'A5']})
9191+
9192+
>>> df
9193+
key A
9194+
0 K0 A0
9195+
1 K1 A1
9196+
2 K1 A2
9197+
3 K3 A3
9198+
4 K0 A4
9199+
5 K1 A5
9200+
9201+
>>> df.join(other.set_index('key'), on='key')
9202+
key A B
9203+
0 K0 A0 B0
9204+
1 K1 A1 B1
9205+
2 K1 A2 B1
9206+
3 K3 A3 NaN
9207+
4 K0 A4 B0
9208+
5 K1 A5 B1
91869209
"""
91879210
return self._join_compat(
91889211
other, on=on, how=how, lsuffix=lsuffix, rsuffix=rsuffix, sort=sort

pandas/core/groupby/groupby.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,10 @@ class providing the base-class of operations.
100100
numba_,
101101
ops,
102102
)
103-
from pandas.core.groupby.indexing import GroupByIndexingMixin
103+
from pandas.core.groupby.indexing import (
104+
GroupByIndexingMixin,
105+
GroupByNthSelector,
106+
)
104107
from pandas.core.indexes.api import (
105108
CategoricalIndex,
106109
Index,
@@ -902,6 +905,15 @@ def __getattr__(self, attr: str):
902905
f"'{type(self).__name__}' object has no attribute '{attr}'"
903906
)
904907

908+
def __getattribute__(self, attr: str):
909+
# Intercept nth to allow both call and index
910+
if attr == "nth":
911+
return GroupByNthSelector(self)
912+
elif attr == "nth_actual":
913+
return super().__getattribute__("nth")
914+
else:
915+
return super().__getattribute__(attr)
916+
905917
@final
906918
def _make_wrapper(self, name: str) -> Callable:
907919
assert name in self._apply_allowlist
@@ -2524,6 +2536,9 @@ def nth(
25242536
"""
25252537
Take the nth row from each group if n is an int, otherwise a subset of rows.
25262538
2539+
Can be either a call or an index. dropna is not available with index notation.
2540+
Index notation accepts a comma separated list of integers and slices.
2541+
25272542
If dropna, will take the nth non-null row, dropna is either
25282543
'all' or 'any'; this is equivalent to calling dropna(how=dropna)
25292544
before the groupby.
@@ -2535,6 +2550,7 @@ def nth(
25352550
25362551
.. versionchanged:: 1.4.0
25372552
Added slice and lists containiing slices.
2553+
Added index notation.
25382554
25392555
dropna : {'any', 'all', None}, default None
25402556
Apply the specified dropna operation before counting which row is
@@ -2580,6 +2596,22 @@ def nth(
25802596
1 2.0
25812597
2 3.0
25822598
2599+
Index notation may also be used
2600+
2601+
>>> g.nth[0, 1]
2602+
B
2603+
A
2604+
1 NaN
2605+
1 2.0
2606+
2 3.0
2607+
2 5.0
2608+
>>> g.nth[:-1]
2609+
B
2610+
A
2611+
1 NaN
2612+
1 2.0
2613+
2 3.0
2614+
25832615
Specifying `dropna` allows count ignoring ``NaN``
25842616
25852617
>>> g.nth(0, dropna='any')

pandas/core/groupby/indexing.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from typing import (
44
TYPE_CHECKING,
55
Iterable,
6+
Literal,
67
cast,
78
)
89

@@ -281,3 +282,22 @@ def __getitem__(self, arg: PositionalIndexer | tuple) -> DataFrame | Series:
281282
self.groupby_object._reset_group_selection()
282283
mask = self.groupby_object._make_mask_from_positional_indexer(arg)
283284
return self.groupby_object._mask_selected_obj(mask)
285+
286+
287+
class GroupByNthSelector:
288+
"""
289+
Dynamically substituted for GroupBy.nth to enable both call and index
290+
"""
291+
292+
def __init__(self, groupby_object: groupby.GroupBy):
293+
self.groupby_object = groupby_object
294+
295+
def __call__(
296+
self,
297+
n: PositionalIndexer | tuple,
298+
dropna: Literal["any", "all", None] = None,
299+
) -> DataFrame | Series:
300+
return self.groupby_object.nth_actual(n, dropna)
301+
302+
def __getitem__(self, n: PositionalIndexer | tuple) -> DataFrame | Series:
303+
return self.groupby_object.nth_actual(n)

pandas/core/indexes/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7016,7 +7016,7 @@ def _maybe_cast_data_without_dtype(
70167016
"In a future version, the Index constructor will not infer numeric "
70177017
"dtypes when passed object-dtype sequences (matching Series behavior)",
70187018
FutureWarning,
7019-
stacklevel=find_stack_level(),
7019+
stacklevel=3,
70207020
)
70217021
if result.dtype.kind in ["b", "c"]:
70227022
return subarr

pandas/io/excel/_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1000,7 +1000,7 @@ def __init__(
10001000

10011001
# cast ExcelWriter to avoid adding 'if self.handles is not None'
10021002
self.handles = IOHandles(
1003-
cast(IO[bytes], path), compression={"copression": None}
1003+
cast(IO[bytes], path), compression={"compression": None}
10041004
)
10051005
if not isinstance(path, ExcelWriter):
10061006
self.handles = get_handle(

pandas/tests/arrays/boolean/test_function.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,17 @@ def test_ufuncs_unary(ufunc):
6666
tm.assert_series_equal(result, expected)
6767

6868

69+
def test_ufunc_numeric():
70+
# np.sqrt on np.bool returns float16, which we upcast to Float32
71+
# bc we do not have Float16
72+
arr = pd.array([True, False, None], dtype="boolean")
73+
74+
res = np.sqrt(arr)
75+
76+
expected = pd.array([1, 0, None], dtype="Float32")
77+
tm.assert_extension_array_equal(res, expected)
78+
79+
6980
@pytest.mark.parametrize("values", [[True, False], [True, None]])
7081
def test_ufunc_reduce_raises(values):
7182
a = pd.array(values, dtype="boolean")

pandas/tests/arrays/categorical/test_operators.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -371,17 +371,15 @@ def test_numeric_like_ops(self):
371371
# min/max)
372372
s = df["value_group"]
373373
for op in ["kurt", "skew", "var", "std", "mean", "sum", "median"]:
374-
msg = f"'Categorical' does not implement reduction '{op}'"
374+
msg = f"does not support reduction '{op}'"
375375
with pytest.raises(TypeError, match=msg):
376376
getattr(s, op)(numeric_only=False)
377377

378378
# mad technically works because it takes always the numeric data
379379

380380
# numpy ops
381381
s = Series(Categorical([1, 2, 3, 4]))
382-
with pytest.raises(
383-
TypeError, match="'Categorical' does not implement reduction 'sum'"
384-
):
382+
with pytest.raises(TypeError, match="does not support reduction 'sum'"):
385383
np.sum(s)
386384

387385
# numeric ops on a Series

pandas/tests/arrays/floating/test_construction.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
1+
import locale
2+
13
import numpy as np
24
import pytest
35

6+
from pandas.compat import (
7+
is_platform_windows,
8+
np_version_under1p19,
9+
)
10+
411
import pandas as pd
512
import pandas._testing as tm
613
from pandas.core.arrays import FloatingArray
@@ -40,6 +47,33 @@ def test_floating_array_constructor():
4047
FloatingArray(values)
4148

4249

50+
def test_floating_array_disallows_float16(request):
51+
# GH#44715
52+
arr = np.array([1, 2], dtype=np.float16)
53+
mask = np.array([False, False])
54+
55+
msg = "FloatingArray does not support np.float16 dtype"
56+
with pytest.raises(TypeError, match=msg):
57+
FloatingArray(arr, mask)
58+
59+
if not np_version_under1p19:
60+
# Troubleshoot
61+
# https://github.com/numpy/numpy/issues/20512#issuecomment-985807740
62+
lowered = np.core._type_aliases.english_lower("Float16")
63+
assert lowered == "float16", lowered
64+
65+
if np_version_under1p19 or (
66+
locale.getlocale()[0] != "en_US" and not is_platform_windows()
67+
):
68+
# the locale condition may need to be refined; this fails on
69+
# the CI in the ZH_CN build
70+
mark = pytest.mark.xfail(reason="numpy does not raise on np.dtype('Float16')")
71+
request.node.add_marker(mark)
72+
73+
with pytest.raises(TypeError, match="data type 'Float16' not understood"):
74+
pd.array([1.0, 2.0], dtype="Float16")
75+
76+
4377
def test_floating_array_constructor_copy():
4478
values = np.array([1, 2, 3, 4], dtype="float64")
4579
mask = np.array([False, False, False, True], dtype="bool")

0 commit comments

Comments
 (0)