Merge branch 'main' into reduction_dtypes_II

topper-123 · web-flow · commit 80d7f9d88eea · 2023-05-23T23:53:51.000+01:00
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -170,8 +170,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         pandas.Period.asfreq \
         pandas.Period.now \
         pandas.arrays.PeriodArray \
-        pandas.arrays.IntervalArray.from_arrays \
-        pandas.arrays.IntervalArray.to_tuples \
         pandas.Int8Dtype \
         pandas.Int16Dtype \
         pandas.Int32Dtype \
@@ -181,8 +179,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         pandas.UInt32Dtype \
         pandas.UInt64Dtype \
         pandas.NA \
-        pandas.Float32Dtype \
-        pandas.Float64Dtype \
         pandas.CategoricalDtype.categories \
         pandas.CategoricalDtype.ordered \
         pandas.Categorical.dtype \
@@ -258,9 +254,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         pandas.util.hash_pandas_object \
         pandas_object \
         pandas.api.interchange.from_dataframe \
-        pandas.Index.T \
-        pandas.Index.memory_usage \
-        pandas.Index.copy \
         pandas.Index.drop \
         pandas.Index.identical \
         pandas.Index.insert \
diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx
@@ -1075,6 +1075,13 @@ def group_mean(
                     y = val - compensation[lab, j]
                     t = sumx[lab, j] + y
                     compensation[lab, j] = t - sumx[lab, j] - y
+                    if compensation[lab, j] != compensation[lab, j]:
+                        # GH#50367
+                        # If val is +/- infinity, compensation is NaN
+                        # which would lead to results being NaN instead
+                        # of +/-infinity. We cannot use util.is_nan
+                        # because of no gil
+                        compensation[lab, j] = 0.
                     sumx[lab, j] = t
 
         for i in range(ncounts):
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
@@ -1592,7 +1592,7 @@ cdef class _Timedelta(timedelta):
 
     def as_unit(self, str unit, bint round_ok=True):
         """
-        Convert the underlying int64 representaton to the given unit.
+        Convert the underlying int64 representation to the given unit.
 
         Parameters
         ----------
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -32,7 +32,10 @@
 from pandas.util._decorators import doc
 from pandas.util._exceptions import find_stack_level
 
-from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
+from pandas.core.dtypes.cast import (
+    construct_1d_object_array_from_listlike,
+    np_find_common_type,
+)
 from pandas.core.dtypes.common import (
     ensure_float64,
     ensure_object,
@@ -518,7 +521,7 @@ def f(c, v):
             f = np.in1d
 
     else:
-        common = np.find_common_type([values.dtype, comps_array.dtype], [])
+        common = np_find_common_type(values.dtype, comps_array.dtype)
         values = values.astype(common, copy=False)
         comps_array = comps_array.astype(common, copy=False)
         f = htable.ismember
diff --git a/pandas/core/arrays/floating.py b/pandas/core/arrays/floating.py
@@ -134,6 +134,20 @@ class FloatingArray(NumericArray):
 Methods
 -------
 None
+
+Examples
+--------
+For Float32Dtype:
+
+>>> ser = pd.Series([2.25, pd.NA], dtype=pd.Float32Dtype())
+>>> ser.dtype
+Float32Dtype()
+
+For Float64Dtype:
+
+>>> ser = pd.Series([2.25, pd.NA], dtype=pd.Float64Dtype())
+>>> ser.dtype
+Float64Dtype()
 """
 
 # create the Dtype
diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -509,6 +509,8 @@ def from_breaks(
             "name": "",
             "examples": textwrap.dedent(
                 """\
+        Examples
+        --------
         >>> pd.arrays.IntervalArray.from_arrays([0, 1, 2], [1, 2, 3])
         <IntervalArray>
         [(0, 1], (1, 2], (2, 3]]
@@ -1635,9 +1637,8 @@ def __arrow_array__(self, type=None):
 
         return pyarrow.ExtensionArray.from_storage(interval_type, storage_array)
 
-    _interval_shared_docs[
-        "to_tuples"
-    ] = """
+    _interval_shared_docs["to_tuples"] = textwrap.dedent(
+        """
         Return an %(return_type)s of tuples of the form (left, right).
 
         Parameters
@@ -1651,9 +1652,27 @@ def __arrow_array__(self, type=None):
         tuples: %(return_type)s
         %(examples)s\
         """
+    )
 
     @Appender(
-        _interval_shared_docs["to_tuples"] % {"return_type": "ndarray", "examples": ""}
+        _interval_shared_docs["to_tuples"]
+        % {
+            "return_type": "ndarray",
+            "examples": textwrap.dedent(
+                """\
+
+         Examples
+         --------
+         >>> idx = pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)])
+         >>> idx
+         <IntervalArray>
+         [(0, 1], (1, 2]]
+         Length: 2, dtype: interval[int64, right]
+         >>> idx.to_tuples()
+         array([(0, 1), (1, 2)], dtype=object)
+         """
+            ),
+        }
     )
     def to_tuples(self, na_tuple: bool = True) -> np.ndarray:
         tuples = com.asarray_tuplesafe(zip(self._left, self._right))
diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -300,6 +300,8 @@ def transpose(self, *args, **kwargs) -> Self:
 
         Examples
         --------
+        For Series:
+
         >>> s = pd.Series(['Ant', 'Bear', 'Cow'])
         >>> s
         0     Ant
@@ -311,6 +313,12 @@ def transpose(self, *args, **kwargs) -> Self:
         1    Bear
         2     Cow
         dtype: object
+
+        For Index:
+
+        >>> idx = pd.Index([1, 2, 3])
+        >>> idx.T
+        Index([1, 2, 3], dtype='int64')
         """,
     )
 
@@ -1088,6 +1096,12 @@ def _memory_usage(self, deep: bool = False) -> int:
         -----
         Memory usage does not include memory consumed by elements that
         are not components of the array if deep=False or if used on PyPy
+
+        Examples
+        --------
+        >>> idx = pd.Index([1, 2, 3])
+        >>> idx.memory_usage()
+        24
         """
         if hasattr(self.array, "memory_usage"):
             return self.array.memory_usage(  # pyright: ignore[reportGeneralTypeIssues]
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -1328,6 +1328,32 @@ def common_dtype_categorical_compat(
     return dtype
 
 
+def np_find_common_type(*dtypes: np.dtype) -> np.dtype:
+    """
+    np.find_common_type implementation pre-1.25 deprecation using np.result_type
+    https://github.com/pandas-dev/pandas/pull/49569#issuecomment-1308300065
+
+    Parameters
+    ----------
+    dtypes : np.dtypes
+
+    Returns
+    -------
+    np.dtype
+    """
+    try:
+        common_dtype = np.result_type(*dtypes)
+        if common_dtype.kind in "mMSU":
+            # NumPy promotion currently (1.25) misbehaves for for times and strings,
+            # so fall back to object (find_common_dtype did unless there
+            # was only one dtype)
+            common_dtype = np.dtype("O")
+
+    except TypeError:
+        common_dtype = np.dtype("O")
+    return common_dtype
+
+
 @overload
 def find_common_type(types: list[np.dtype]) -> np.dtype:
     ...
@@ -1395,7 +1421,7 @@ def find_common_type(types):
             if t.kind in "iufc":
                 return np.dtype("object")
 
-    return np.find_common_type(types, [])
+    return np_find_common_type(*types)
 
 
 def construct_2d_arraylike_from_scalar(
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
@@ -17,6 +17,7 @@
 from pandas.core.dtypes.cast import (
     common_dtype_categorical_compat,
     find_common_type,
+    np_find_common_type,
 )
 from pandas.core.dtypes.dtypes import CategoricalDtype
 from pandas.core.dtypes.generic import (
@@ -156,11 +157,9 @@ def _get_result_dtype(
                 target_dtype = np.dtype(object)
                 kinds = {"o"}
     else:
-        # Argument 1 to "list" has incompatible type "Set[Union[ExtensionDtype,
-        # Any]]"; expected "Iterable[Union[dtype[Any], None, Type[Any],
-        # _SupportsDType[dtype[Any]], str, Tuple[Any, Union[SupportsIndex,
-        # Sequence[SupportsIndex]]], List[Any], _DTypeDict, Tuple[Any, Any]]]"
-        target_dtype = np.find_common_type(list(dtypes), [])  # type: ignore[arg-type]
+        # error: Argument 1 to "np_find_common_type" has incompatible type
+        # "*Set[Union[ExtensionDtype, Any]]"; expected "dtype[Any]"
+        target_dtype = np_find_common_type(*dtypes)  # type: ignore[arg-type]
 
     return any_ea, kinds, target_dtype
 
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
@@ -1921,6 +1921,8 @@ def _subtype_with_str(self):
     def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
         # TODO for now only handle SparseDtypes and numpy dtypes => extend
         # with other compatible extension dtypes
+        from pandas.core.dtypes.cast import np_find_common_type
+
         if any(
             isinstance(x, ExtensionDtype) and not isinstance(x, SparseDtype)
             for x in dtypes
@@ -1943,8 +1945,8 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
                 stacklevel=find_stack_level(),
             )
 
-        np_dtypes = [x.subtype if isinstance(x, SparseDtype) else x for x in dtypes]
-        return SparseDtype(np.find_common_type(np_dtypes, []), fill_value=fill_value)
+        np_dtypes = (x.subtype if isinstance(x, SparseDtype) else x for x in dtypes)
+        return SparseDtype(np_find_common_type(*np_dtypes), fill_value=fill_value)
 
 
 @register_extension_dtype
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -1221,6 +1221,13 @@ def copy(
         -----
         In most cases, there should be no functional difference from using
         ``deep``, but if ``deep`` is passed it will attempt to deepcopy.
+
+        Examples
+        --------
+        >>> idx = pd.Index(['a', 'b', 'c'])
+        >>> new_idx = idx.copy()
+        >>> idx is new_idx
+        False
         """
 
         name = self._validate_names(name=name, deep=deep)[0]
diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py
@@ -29,6 +29,7 @@
     ensure_dtype_can_hold_na,
     find_common_type,
     infer_dtype_from_scalar,
+    np_find_common_type,
 )
 from pandas.core.dtypes.common import (
     ensure_platform_int,
@@ -1409,7 +1410,7 @@ def concat_arrays(to_concat: list) -> ArrayLike:
         target_dtype = to_concat_no_proxy[0].dtype
     elif all(x.kind in "iub" and isinstance(x, np.dtype) for x in dtypes):
         # GH#42092
-        target_dtype = np.find_common_type(list(dtypes), [])
+        target_dtype = np_find_common_type(*dtypes)
     else:
         target_dtype = find_common_type([arr.dtype for arr in to_concat_no_proxy])
 
diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py
@@ -997,9 +997,7 @@ def test_maybe_convert_objects_itemsize(self, data0, data1):
         data = [data0, data1]
         arr = np.array(data, dtype="object")
 
-        common_kind = np.find_common_type(
-            [type(data0), type(data1)], scalar_types=[]
-        ).kind
+        common_kind = np.result_type(type(data0), type(data1)).kind
         kind0 = "python" if not hasattr(data0, "dtype") else data0.dtype.kind
         kind1 = "python" if not hasattr(data1, "dtype") else data1.dtype.kind
         if kind0 != "python" and kind1 != "python":
diff --git a/pandas/tests/groupby/test_libgroupby.py b/pandas/tests/groupby/test_libgroupby.py
@@ -282,3 +282,23 @@ def test_cython_group_mean_not_datetimelike_but_has_NaT_values():
     tm.assert_numpy_array_equal(
         actual[:, 0], np.array(np.divide(np.add(data[0], data[1]), 2), dtype="float64")
     )
+
+
+def test_cython_group_mean_Inf_at_begining_and_end():
+    # GH 50367
+    actual = np.array([[np.nan, np.nan], [np.nan, np.nan]], dtype="float64")
+    counts = np.array([0, 0], dtype="int64")
+    data = np.array(
+        [[np.inf, 1.0], [1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0], [5, np.inf]],
+        dtype="float64",
+    )
+    labels = np.array([0, 1, 0, 1, 0, 1], dtype=np.intp)
+
+    group_mean(actual, counts, data, labels, is_datetimelike=False)
+
+    expected = np.array([[np.inf, 3], [3, np.inf]], dtype="float64")
+
+    tm.assert_numpy_array_equal(
+        actual,
+        expected,
+    )
diff --git a/web/pandas/community/ecosystem.md b/web/pandas/community/ecosystem.md
@@ -321,7 +321,14 @@ which support geometric operations. If your work entails maps and
 geographical coordinates, and you love pandas, you should take a close
 look at Geopandas.
 
-### [staricase](https://github.com/staircase-dev/staircase)
+### [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas)
+
+gurobipy-pandas provides a convenient accessor API to connect pandas with
+gurobipy. It enables users to more easily and efficiently build mathematical
+optimization models from data stored in DataFrames and Series, and to read
+solutions back directly as pandas objects.
+
+### [staircase](https://github.com/staircase-dev/staircase)
 
 staircase is a data analysis package, built upon pandas and numpy, for modelling and
 manipulation of mathematical step functions. It provides a rich variety of arithmetic
@@ -546,6 +553,7 @@ authors to coordinate on the namespace.
   | [composeml](https://github.com/alteryx/compose)                      | `slice`    | `DataFrame`           |
   | [datatest](https://datatest.readthedocs.io/en/stable/)               | `validate` | `Series`, `DataFrame` |
   | [composeml](https://github.com/alteryx/compose)                      | `slice`    | `DataFrame`           |
+  | [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas)         | `gppd`     | `Series`, `DataFrame` |
   | [staircase](https://www.staircase.dev/)                              | `sc`       | `Series`, `DataFrame` |
   | [woodwork](https://github.com/alteryx/woodwork)                      | `slice`    | `Series`, `DataFrame` |