Skip to content

Commit 03829ce

Browse files
committed
Merge remote-tracking branch 'upstream/master' into docfix-multiindex-set_levels
2 parents 6ab5843 + ee42275 commit 03829ce

File tree

18 files changed

+180
-32
lines changed

18 files changed

+180
-32
lines changed

pandas/_typing.py

+13-5
Original file line numberDiff line numberDiff line change
@@ -23,21 +23,29 @@
2323
from pandas.core.indexes.base import Index # noqa: F401
2424
from pandas.core.series import Series # noqa: F401
2525
from pandas.core.generic import NDFrame # noqa: F401
26+
from pandas import Interval # noqa: F401
2627

28+
# array-like
2729

2830
AnyArrayLike = TypeVar("AnyArrayLike", "ExtensionArray", "Index", "Series", np.ndarray)
2931
ArrayLike = TypeVar("ArrayLike", "ExtensionArray", np.ndarray)
32+
33+
# scalars
34+
35+
PythonScalar = Union[str, int, float, bool]
3036
DatetimeLikeScalar = TypeVar("DatetimeLikeScalar", "Period", "Timestamp", "Timedelta")
37+
PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"]
38+
Scalar = Union[PythonScalar, PandasScalar]
39+
40+
# other
41+
3142
Dtype = Union[str, np.dtype, "ExtensionDtype"]
3243
FilePathOrBuffer = Union[str, Path, IO[AnyStr]]
33-
3444
FrameOrSeries = TypeVar("FrameOrSeries", bound="NDFrame")
35-
Scalar = Union[str, int, float, bool]
3645
Axis = Union[str, int]
3746
Ordered = Optional[bool]
38-
JSONSerializable = Union[Scalar, List, Dict]
39-
47+
JSONSerializable = Union[PythonScalar, List, Dict]
4048
Axes = Collection
4149

4250
# to maintain type information across generic functions and parametrization
43-
_T = TypeVar("_T")
51+
T = TypeVar("T")

pandas/core/arrays/categorical.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
import operator
22
from shutil import get_terminal_size
3-
from typing import Type, Union, cast
3+
from typing import Dict, Hashable, List, Type, Union, cast
44
from warnings import warn
55

66
import numpy as np
77

88
from pandas._config import get_option
99

1010
from pandas._libs import algos as libalgos, hashtable as htable
11-
from pandas._typing import ArrayLike, Dtype, Ordered
11+
from pandas._typing import ArrayLike, Dtype, Ordered, Scalar
1212
from pandas.compat.numpy import function as nv
1313
from pandas.util._decorators import (
1414
Appender,
@@ -511,7 +511,7 @@ def itemsize(self) -> int:
511511
"""
512512
return self.categories.itemsize
513513

514-
def tolist(self) -> list:
514+
def tolist(self) -> List[Scalar]:
515515
"""
516516
Return a list of the values.
517517
@@ -2067,7 +2067,7 @@ def __setitem__(self, key, value):
20672067
lindexer = self._maybe_coerce_indexer(lindexer)
20682068
self._codes[key] = lindexer
20692069

2070-
def _reverse_indexer(self):
2070+
def _reverse_indexer(self) -> Dict[Hashable, np.ndarray]:
20712071
"""
20722072
Compute the inverse of a categorical, returning
20732073
a dict of categories -> indexers.
@@ -2097,8 +2097,8 @@ def _reverse_indexer(self):
20972097
self.codes.astype("int64"), categories.size
20982098
)
20992099
counts = counts.cumsum()
2100-
result = (r[start:end] for start, end in zip(counts, counts[1:]))
2101-
result = dict(zip(categories, result))
2100+
_result = (r[start:end] for start, end in zip(counts, counts[1:]))
2101+
result = dict(zip(categories, _result))
21022102
return result
21032103

21042104
# reduction ops #

pandas/core/common.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,12 @@
99
from datetime import datetime, timedelta
1010
from functools import partial
1111
import inspect
12-
from typing import Any, Iterable, Union
12+
from typing import Any, Collection, Iterable, Union
1313

1414
import numpy as np
1515

1616
from pandas._libs import lib, tslibs
17+
from pandas._typing import T
1718

1819
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
1920
from pandas.core.dtypes.common import (
@@ -270,7 +271,7 @@ def maybe_make_list(obj):
270271
return obj
271272

272273

273-
def maybe_iterable_to_list(obj: Union[Iterable, Any]) -> Union[list, Any]:
274+
def maybe_iterable_to_list(obj: Union[Iterable[T], T]) -> Union[Collection[T], T]:
274275
"""
275276
If obj is Iterable but not list-like, consume into list.
276277
"""

pandas/core/dtypes/base.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -276,10 +276,12 @@ def is_dtype(cls, dtype) -> bool:
276276
return False
277277
elif isinstance(dtype, cls):
278278
return True
279-
try:
280-
return cls.construct_from_string(dtype) is not None
281-
except TypeError:
282-
return False
279+
if isinstance(dtype, str):
280+
try:
281+
return cls.construct_from_string(dtype) is not None
282+
except TypeError:
283+
return False
284+
return False
283285

284286
@property
285287
def _is_numeric(self) -> bool:

pandas/core/dtypes/dtypes.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -882,7 +882,11 @@ def construct_from_string(cls, string):
882882
return cls(freq=string)
883883
except ValueError:
884884
pass
885-
raise TypeError(f"Cannot construct a 'PeriodDtype' from '{string}'")
885+
if isinstance(string, str):
886+
msg = f"Cannot construct a 'PeriodDtype' from '{string}'"
887+
else:
888+
msg = f"'construct_from_string' expects a string, got {type(string)}"
889+
raise TypeError(msg)
886890

887891
def __str__(self) -> str_type:
888892
return self.name

pandas/core/groupby/grouper.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
split-apply-combine paradigm.
44
"""
55

6-
from typing import Hashable, List, Optional, Tuple
6+
from typing import Dict, Hashable, List, Optional, Tuple
77

88
import numpy as np
99

@@ -419,7 +419,7 @@ def _make_codes(self) -> None:
419419
self._group_index = uniques
420420

421421
@cache_readonly
422-
def groups(self) -> dict:
422+
def groups(self) -> Dict[Hashable, np.ndarray]:
423423
return self.index.groupby(Categorical.from_codes(self.codes, self.group_index))
424424

425425

pandas/core/indexes/base.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from datetime import datetime
22
import operator
33
from textwrap import dedent
4-
from typing import FrozenSet, Hashable, Optional, Union
4+
from typing import Dict, FrozenSet, Hashable, Optional, Union
55
import warnings
66

77
import numpy as np
@@ -4594,7 +4594,7 @@ def _maybe_promote(self, other):
45944594
return self.astype("object"), other.astype("object")
45954595
return self, other
45964596

4597-
def groupby(self, values):
4597+
def groupby(self, values) -> Dict[Hashable, np.ndarray]:
45984598
"""
45994599
Group the index labels by a given array of values.
46004600
@@ -4605,7 +4605,7 @@ def groupby(self, values):
46054605
46064606
Returns
46074607
-------
4608-
groups : dict
4608+
dict
46094609
{group name -> group labels}
46104610
"""
46114611

pandas/core/indexing.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Tuple
1+
from typing import Hashable, List, Tuple, Union
22

33
import numpy as np
44

@@ -2224,7 +2224,7 @@ def _convert_key(self, key, is_setter: bool = False):
22242224
return key
22252225

22262226

2227-
def _tuplify(ndim: int, loc) -> tuple:
2227+
def _tuplify(ndim: int, loc: Hashable) -> Tuple[Union[Hashable, slice], ...]:
22282228
"""
22292229
Given an indexer for the first dimension, create an equivalent tuple
22302230
for indexing over all dimensions.
@@ -2238,9 +2238,10 @@ def _tuplify(ndim: int, loc) -> tuple:
22382238
-------
22392239
tuple
22402240
"""
2241-
tup = [slice(None, None) for _ in range(ndim)]
2242-
tup[0] = loc
2243-
return tuple(tup)
2241+
_tup: List[Union[Hashable, slice]]
2242+
_tup = [slice(None, None) for _ in range(ndim)]
2243+
_tup[0] = loc
2244+
return tuple(_tup)
22442245

22452246

22462247
def convert_to_index_sliceable(obj, key):

pandas/io/pytables.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1452,7 +1452,7 @@ def copy(
14521452
data = self.select(k)
14531453
if isinstance(s, Table):
14541454

1455-
index: Union[bool, list] = False
1455+
index: Union[bool, List[str]] = False
14561456
if propindexes:
14571457
index = [a.name for a in s.axes if a.is_indexed]
14581458
new_store.append(

pandas/tests/dtypes/test_dtypes.py

+3
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,9 @@ def test_construction_from_string(self):
408408
with pytest.raises(TypeError):
409409
PeriodDtype.construct_from_string("datetime64[ns, US/Eastern]")
410410

411+
with pytest.raises(TypeError, match="list"):
412+
PeriodDtype.construct_from_string([1, 2, 3])
413+
411414
def test_is_dtype(self):
412415
assert PeriodDtype.is_dtype(self.dtype)
413416
assert PeriodDtype.is_dtype("period[D]")

pandas/tests/extension/base/dtype.py

+3
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,9 @@ def test_is_dtype_from_self(self, dtype):
3737
result = type(dtype).is_dtype(dtype)
3838
assert result is True
3939

40+
def test_is_dtype_other_input(self, dtype):
41+
assert dtype.is_dtype([1, 2, 3]) is False
42+
4043
def test_is_not_string_type(self, dtype):
4144
return not pd.api.types.is_string_dtype(dtype)
4245

pandas/tests/frame/methods/test_replace.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from datetime import datetime
22
from io import StringIO
33
import re
4-
from typing import Dict
4+
from typing import Dict, List, Union
55

66
import numpy as np
77
import pytest
@@ -12,12 +12,12 @@
1212

1313

1414
@pytest.fixture
15-
def mix_ab() -> Dict[str, list]:
15+
def mix_ab() -> Dict[str, List[Union[int, str]]]:
1616
return {"a": list(range(4)), "b": list("ab..")}
1717

1818

1919
@pytest.fixture
20-
def mix_abc() -> Dict[str, list]:
20+
def mix_abc() -> Dict[str, List[Union[float, str]]]:
2121
return {"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]}
2222

2323

pandas/tests/groupby/test_groupby.py

+14
Original file line numberDiff line numberDiff line change
@@ -586,6 +586,20 @@ def test_groupby_multiple_columns(df, op):
586586
tm.assert_series_equal(result, expected)
587587

588588

589+
def test_as_index_select_column():
590+
# GH 5764
591+
df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=["A", "B"])
592+
result = df.groupby("A", as_index=False)["B"].get_group(1)
593+
expected = pd.Series([2, 4], name="B")
594+
tm.assert_series_equal(result, expected)
595+
596+
result = df.groupby("A", as_index=False)["B"].apply(lambda x: x.cumsum())
597+
expected = pd.Series(
598+
[2, 6, 6], name="B", index=pd.MultiIndex.from_tuples([(0, 0), (0, 1), (1, 2)])
599+
)
600+
tm.assert_series_equal(result, expected)
601+
602+
589603
def test_groupby_as_index_agg(df):
590604
grouped = df.groupby("A", as_index=False)
591605

pandas/tests/groupby/test_transform.py

+67
Original file line numberDiff line numberDiff line change
@@ -1103,3 +1103,70 @@ def test_transform_lambda_with_datetimetz():
11031103
name="time",
11041104
)
11051105
tm.assert_series_equal(result, expected)
1106+
1107+
1108+
def test_transform_fastpath_raises():
1109+
# GH#29631 case where fastpath defined in groupby.generic _choose_path
1110+
# raises, but slow_path does not
1111+
1112+
df = pd.DataFrame({"A": [1, 1, 2, 2], "B": [1, -1, 1, 2]})
1113+
gb = df.groupby("A")
1114+
1115+
def func(grp):
1116+
# we want a function such that func(frame) fails but func.apply(frame)
1117+
# works
1118+
if grp.ndim == 2:
1119+
# Ensure that fast_path fails
1120+
raise NotImplementedError("Don't cross the streams")
1121+
return grp * 2
1122+
1123+
# Check that the fastpath raises, see _transform_general
1124+
obj = gb._obj_with_exclusions
1125+
gen = gb.grouper.get_iterator(obj, axis=gb.axis)
1126+
fast_path, slow_path = gb._define_paths(func)
1127+
_, group = next(gen)
1128+
1129+
with pytest.raises(NotImplementedError, match="Don't cross the streams"):
1130+
fast_path(group)
1131+
1132+
result = gb.transform(func)
1133+
1134+
expected = pd.DataFrame([2, -2, 2, 4], columns=["B"])
1135+
tm.assert_frame_equal(result, expected)
1136+
1137+
1138+
def test_transform_lambda_indexing():
1139+
# GH 7883
1140+
df = pd.DataFrame(
1141+
{
1142+
"A": ["foo", "bar", "foo", "bar", "foo", "flux", "foo", "flux"],
1143+
"B": ["one", "one", "two", "three", "two", "six", "five", "three"],
1144+
"C": range(8),
1145+
"D": range(8),
1146+
"E": range(8),
1147+
}
1148+
)
1149+
df = df.set_index(["A", "B"])
1150+
df = df.sort_index()
1151+
result = df.groupby(level="A").transform(lambda x: x.iloc[-1])
1152+
expected = DataFrame(
1153+
{
1154+
"C": [3, 3, 7, 7, 4, 4, 4, 4],
1155+
"D": [3, 3, 7, 7, 4, 4, 4, 4],
1156+
"E": [3, 3, 7, 7, 4, 4, 4, 4],
1157+
},
1158+
index=MultiIndex.from_tuples(
1159+
[
1160+
("bar", "one"),
1161+
("bar", "three"),
1162+
("flux", "six"),
1163+
("flux", "three"),
1164+
("foo", "five"),
1165+
("foo", "one"),
1166+
("foo", "two"),
1167+
("foo", "two"),
1168+
],
1169+
names=["A", "B"],
1170+
),
1171+
)
1172+
tm.assert_frame_equal(result, expected)

pandas/tests/indexes/test_numeric.py

+6
Original file line numberDiff line numberDiff line change
@@ -736,6 +736,12 @@ def test_get_indexer(self):
736736
expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp)
737737
tm.assert_numpy_array_equal(indexer, expected)
738738

739+
def test_get_indexer_nan(self):
740+
# GH 7820
741+
result = Index([1, 2, np.nan]).get_indexer([np.nan])
742+
expected = np.array([2], dtype=np.intp)
743+
tm.assert_numpy_array_equal(result, expected)
744+
739745
def test_intersection(self):
740746
index = self.create_index()
741747
other = Index([1, 2, 3, 4, 5])

pandas/tests/indexing/test_loc.py

+14
Original file line numberDiff line numberDiff line change
@@ -966,3 +966,17 @@ def test_loc_getitem_label_list_integer_labels(
966966
expected = df.iloc[:, expected_columns]
967967
result = df.loc[["A", "B", "C"], column_key]
968968
tm.assert_frame_equal(result, expected, check_column_type=check_column_type)
969+
970+
971+
def test_loc_setitem_float_intindex():
972+
# GH 8720
973+
rand_data = np.random.randn(8, 4)
974+
result = pd.DataFrame(rand_data)
975+
result.loc[:, 0.5] = np.nan
976+
expected_data = np.hstack((rand_data, np.array([np.nan] * 8).reshape(8, 1)))
977+
expected = pd.DataFrame(expected_data, columns=[0.0, 1.0, 2.0, 3.0, 0.5])
978+
tm.assert_frame_equal(result, expected)
979+
980+
result = pd.DataFrame(rand_data)
981+
result.loc[:, 0.5] = np.nan
982+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)