Merge remote-tracking branch 'upstream/master' into docfix-multiindex-set_levels

hweecat · hweecat · commit 03829ce03001 · 2020-01-01T01:21:24.000+08:00
diff --git a/pandas/_typing.py b/pandas/_typing.py
@@ -23,21 +23,29 @@
     from pandas.core.indexes.base import Index  # noqa: F401
     from pandas.core.series import Series  # noqa: F401
     from pandas.core.generic import NDFrame  # noqa: F401
+    from pandas import Interval  # noqa: F401
 
+# array-like
 
 AnyArrayLike = TypeVar("AnyArrayLike", "ExtensionArray", "Index", "Series", np.ndarray)
 ArrayLike = TypeVar("ArrayLike", "ExtensionArray", np.ndarray)
+
+# scalars
+
+PythonScalar = Union[str, int, float, bool]
 DatetimeLikeScalar = TypeVar("DatetimeLikeScalar", "Period", "Timestamp", "Timedelta")
+PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"]
+Scalar = Union[PythonScalar, PandasScalar]
+
+# other
+
 Dtype = Union[str, np.dtype, "ExtensionDtype"]
 FilePathOrBuffer = Union[str, Path, IO[AnyStr]]
-
 FrameOrSeries = TypeVar("FrameOrSeries", bound="NDFrame")
-Scalar = Union[str, int, float, bool]
 Axis = Union[str, int]
 Ordered = Optional[bool]
-JSONSerializable = Union[Scalar, List, Dict]
-
+JSONSerializable = Union[PythonScalar, List, Dict]
 Axes = Collection
 
 # to maintain type information across generic functions and parametrization
-_T = TypeVar("_T")
+T = TypeVar("T")
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -1,14 +1,14 @@
 import operator
 from shutil import get_terminal_size
-from typing import Type, Union, cast
+from typing import Dict, Hashable, List, Type, Union, cast
 from warnings import warn
 
 import numpy as np
 
 from pandas._config import get_option
 
 from pandas._libs import algos as libalgos, hashtable as htable
-from pandas._typing import ArrayLike, Dtype, Ordered
+from pandas._typing import ArrayLike, Dtype, Ordered, Scalar
 from pandas.compat.numpy import function as nv
 from pandas.util._decorators import (
     Appender,
@@ -511,7 +511,7 @@ def itemsize(self) -> int:
         """
         return self.categories.itemsize
 
-    def tolist(self) -> list:
+    def tolist(self) -> List[Scalar]:
         """
         Return a list of the values.
 
@@ -2067,7 +2067,7 @@ def __setitem__(self, key, value):
         lindexer = self._maybe_coerce_indexer(lindexer)
         self._codes[key] = lindexer
 
-    def _reverse_indexer(self):
+    def _reverse_indexer(self) -> Dict[Hashable, np.ndarray]:
         """
         Compute the inverse of a categorical, returning
         a dict of categories -> indexers.
@@ -2097,8 +2097,8 @@ def _reverse_indexer(self):
             self.codes.astype("int64"), categories.size
         )
         counts = counts.cumsum()
-        result = (r[start:end] for start, end in zip(counts, counts[1:]))
-        result = dict(zip(categories, result))
+        _result = (r[start:end] for start, end in zip(counts, counts[1:]))
+        result = dict(zip(categories, _result))
         return result
 
     # reduction ops #
diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -9,11 +9,12 @@
 from datetime import datetime, timedelta
 from functools import partial
 import inspect
-from typing import Any, Iterable, Union
+from typing import Any, Collection, Iterable, Union
 
 import numpy as np
 
 from pandas._libs import lib, tslibs
+from pandas._typing import T
 
 from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
 from pandas.core.dtypes.common import (
@@ -270,7 +271,7 @@ def maybe_make_list(obj):
     return obj
 
 
-def maybe_iterable_to_list(obj: Union[Iterable, Any]) -> Union[list, Any]:
+def maybe_iterable_to_list(obj: Union[Iterable[T], T]) -> Union[Collection[T], T]:
     """
     If obj is Iterable but not list-like, consume into list.
     """
diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py
@@ -276,10 +276,12 @@ def is_dtype(cls, dtype) -> bool:
             return False
         elif isinstance(dtype, cls):
             return True
-        try:
-            return cls.construct_from_string(dtype) is not None
-        except TypeError:
-            return False
+        if isinstance(dtype, str):
+            try:
+                return cls.construct_from_string(dtype) is not None
+            except TypeError:
+                return False
+        return False
 
     @property
     def _is_numeric(self) -> bool:
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
@@ -882,7 +882,11 @@ def construct_from_string(cls, string):
                 return cls(freq=string)
             except ValueError:
                 pass
-        raise TypeError(f"Cannot construct a 'PeriodDtype' from '{string}'")
+        if isinstance(string, str):
+            msg = f"Cannot construct a 'PeriodDtype' from '{string}'"
+        else:
+            msg = f"'construct_from_string' expects a string, got {type(string)}"
+        raise TypeError(msg)
 
     def __str__(self) -> str_type:
         return self.name
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
@@ -3,7 +3,7 @@
 split-apply-combine paradigm.
 """
 
-from typing import Hashable, List, Optional, Tuple
+from typing import Dict, Hashable, List, Optional, Tuple
 
 import numpy as np
 
@@ -419,7 +419,7 @@ def _make_codes(self) -> None:
             self._group_index = uniques
 
     @cache_readonly
-    def groups(self) -> dict:
+    def groups(self) -> Dict[Hashable, np.ndarray]:
         return self.index.groupby(Categorical.from_codes(self.codes, self.group_index))
 
 
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -1,7 +1,7 @@
 from datetime import datetime
 import operator
 from textwrap import dedent
-from typing import FrozenSet, Hashable, Optional, Union
+from typing import Dict, FrozenSet, Hashable, Optional, Union
 import warnings
 
 import numpy as np
@@ -4594,7 +4594,7 @@ def _maybe_promote(self, other):
                 return self.astype("object"), other.astype("object")
         return self, other
 
-    def groupby(self, values):
+    def groupby(self, values) -> Dict[Hashable, np.ndarray]:
         """
         Group the index labels by a given array of values.
 
@@ -4605,7 +4605,7 @@ def groupby(self, values):
 
         Returns
         -------
-        groups : dict
+        dict
             {group name -> group labels}
         """
 
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
@@ -1,4 +1,4 @@
-from typing import Tuple
+from typing import Hashable, List, Tuple, Union
 
 import numpy as np
 
@@ -2224,7 +2224,7 @@ def _convert_key(self, key, is_setter: bool = False):
         return key
 
 
-def _tuplify(ndim: int, loc) -> tuple:
+def _tuplify(ndim: int, loc: Hashable) -> Tuple[Union[Hashable, slice], ...]:
     """
     Given an indexer for the first dimension, create an equivalent tuple
     for indexing over all dimensions.
@@ -2238,9 +2238,10 @@ def _tuplify(ndim: int, loc) -> tuple:
     -------
     tuple
     """
-    tup = [slice(None, None) for _ in range(ndim)]
-    tup[0] = loc
-    return tuple(tup)
+    _tup: List[Union[Hashable, slice]]
+    _tup = [slice(None, None) for _ in range(ndim)]
+    _tup[0] = loc
+    return tuple(_tup)
 
 
 def convert_to_index_sliceable(obj, key):
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -1452,7 +1452,7 @@ def copy(
                 data = self.select(k)
                 if isinstance(s, Table):
 
-                    index: Union[bool, list] = False
+                    index: Union[bool, List[str]] = False
                     if propindexes:
                         index = [a.name for a in s.axes if a.is_indexed]
                     new_store.append(
diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
@@ -408,6 +408,9 @@ def test_construction_from_string(self):
         with pytest.raises(TypeError):
             PeriodDtype.construct_from_string("datetime64[ns, US/Eastern]")
 
+        with pytest.raises(TypeError, match="list"):
+            PeriodDtype.construct_from_string([1, 2, 3])
+
     def test_is_dtype(self):
         assert PeriodDtype.is_dtype(self.dtype)
         assert PeriodDtype.is_dtype("period[D]")
diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py
@@ -37,6 +37,9 @@ def test_is_dtype_from_self(self, dtype):
         result = type(dtype).is_dtype(dtype)
         assert result is True
 
+    def test_is_dtype_other_input(self, dtype):
+        assert dtype.is_dtype([1, 2, 3]) is False
+
     def test_is_not_string_type(self, dtype):
         return not pd.api.types.is_string_dtype(dtype)
 
diff --git a/pandas/tests/frame/methods/test_replace.py b/pandas/tests/frame/methods/test_replace.py
@@ -1,7 +1,7 @@
 from datetime import datetime
 from io import StringIO
 import re
-from typing import Dict
+from typing import Dict, List, Union
 
 import numpy as np
 import pytest
@@ -12,12 +12,12 @@
 
 
 @pytest.fixture
-def mix_ab() -> Dict[str, list]:
+def mix_ab() -> Dict[str, List[Union[int, str]]]:
     return {"a": list(range(4)), "b": list("ab..")}
 
 
 @pytest.fixture
-def mix_abc() -> Dict[str, list]:
+def mix_abc() -> Dict[str, List[Union[float, str]]]:
     return {"a": list(range(4)), "b": list("ab.."), "c": ["a", "b", np.nan, "d"]}
 
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -586,6 +586,20 @@ def test_groupby_multiple_columns(df, op):
     tm.assert_series_equal(result, expected)
 
 
+def test_as_index_select_column():
+    # GH 5764
+    df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=["A", "B"])
+    result = df.groupby("A", as_index=False)["B"].get_group(1)
+    expected = pd.Series([2, 4], name="B")
+    tm.assert_series_equal(result, expected)
+
+    result = df.groupby("A", as_index=False)["B"].apply(lambda x: x.cumsum())
+    expected = pd.Series(
+        [2, 6, 6], name="B", index=pd.MultiIndex.from_tuples([(0, 0), (0, 1), (1, 2)])
+    )
+    tm.assert_series_equal(result, expected)
+
+
 def test_groupby_as_index_agg(df):
     grouped = df.groupby("A", as_index=False)
 
diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py
@@ -1103,3 +1103,70 @@ def test_transform_lambda_with_datetimetz():
         name="time",
     )
     tm.assert_series_equal(result, expected)
+
+
+def test_transform_fastpath_raises():
+    # GH#29631 case where fastpath defined in groupby.generic _choose_path
+    #  raises, but slow_path does not
+
+    df = pd.DataFrame({"A": [1, 1, 2, 2], "B": [1, -1, 1, 2]})
+    gb = df.groupby("A")
+
+    def func(grp):
+        # we want a function such that func(frame) fails but func.apply(frame)
+        #  works
+        if grp.ndim == 2:
+            # Ensure that fast_path fails
+            raise NotImplementedError("Don't cross the streams")
+        return grp * 2
+
+    # Check that the fastpath raises, see _transform_general
+    obj = gb._obj_with_exclusions
+    gen = gb.grouper.get_iterator(obj, axis=gb.axis)
+    fast_path, slow_path = gb._define_paths(func)
+    _, group = next(gen)
+
+    with pytest.raises(NotImplementedError, match="Don't cross the streams"):
+        fast_path(group)
+
+    result = gb.transform(func)
+
+    expected = pd.DataFrame([2, -2, 2, 4], columns=["B"])
+    tm.assert_frame_equal(result, expected)
+
+
+def test_transform_lambda_indexing():
+    # GH 7883
+    df = pd.DataFrame(
+        {
+            "A": ["foo", "bar", "foo", "bar", "foo", "flux", "foo", "flux"],
+            "B": ["one", "one", "two", "three", "two", "six", "five", "three"],
+            "C": range(8),
+            "D": range(8),
+            "E": range(8),
+        }
+    )
+    df = df.set_index(["A", "B"])
+    df = df.sort_index()
+    result = df.groupby(level="A").transform(lambda x: x.iloc[-1])
+    expected = DataFrame(
+        {
+            "C": [3, 3, 7, 7, 4, 4, 4, 4],
+            "D": [3, 3, 7, 7, 4, 4, 4, 4],
+            "E": [3, 3, 7, 7, 4, 4, 4, 4],
+        },
+        index=MultiIndex.from_tuples(
+            [
+                ("bar", "one"),
+                ("bar", "three"),
+                ("flux", "six"),
+                ("flux", "three"),
+                ("foo", "five"),
+                ("foo", "one"),
+                ("foo", "two"),
+                ("foo", "two"),
+            ],
+            names=["A", "B"],
+        ),
+    )
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py
@@ -736,6 +736,12 @@ def test_get_indexer(self):
         expected = np.array([0, 1, 1, 2, 2, 3, 3, 4, 4, 5], dtype=np.intp)
         tm.assert_numpy_array_equal(indexer, expected)
 
+    def test_get_indexer_nan(self):
+        # GH 7820
+        result = Index([1, 2, np.nan]).get_indexer([np.nan])
+        expected = np.array([2], dtype=np.intp)
+        tm.assert_numpy_array_equal(result, expected)
+
     def test_intersection(self):
         index = self.create_index()
         other = Index([1, 2, 3, 4, 5])
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py
@@ -966,3 +966,17 @@ def test_loc_getitem_label_list_integer_labels(
     expected = df.iloc[:, expected_columns]
     result = df.loc[["A", "B", "C"], column_key]
     tm.assert_frame_equal(result, expected, check_column_type=check_column_type)
+
+
+def test_loc_setitem_float_intindex():
+    # GH 8720
+    rand_data = np.random.randn(8, 4)
+    result = pd.DataFrame(rand_data)
+    result.loc[:, 0.5] = np.nan
+    expected_data = np.hstack((rand_data, np.array([np.nan] * 8).reshape(8, 1)))
+    expected = pd.DataFrame(expected_data, columns=[0.0, 1.0, 2.0, 3.0, 0.5])
+    tm.assert_frame_equal(result, expected)
+
+    result = pd.DataFrame(rand_data)
+    result.loc[:, 0.5] = np.nan
+    tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py
diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py