Merge branch 'add-FutureWarning-for-pandas.io.sql.execute' of https://github.com/luke396/pandas into add-FutureWarning-for-pandas.io.sql.execute

luke · luke · commit 3c3f908eebee · 2023-01-09T23:33:33.000+08:00
diff --git a/asv_bench/benchmarks/array.py b/asv_bench/benchmarks/array.py
@@ -44,6 +44,15 @@ def time_from_integer_array(self):
         pd.array(self.values_integer, dtype="Int64")
 
 
+class IntervalArray:
+    def setup(self):
+        N = 10_000
+        self.tuples = [(i, i + 1) for i in range(N)]
+
+    def time_from_tuples(self):
+        pd.arrays.IntervalArray.from_tuples(self.tuples)
+
+
 class StringArray:
     def setup(self):
         N = 100_000
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -83,6 +83,36 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
     $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05,SS06
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
+    MSG='Partially validate docstrings (RT02)' ; echo $MSG
+    $BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=RT02 --ignore_functions \
+    	    pandas.Series.align \
+	    pandas.Series.dt.total_seconds \
+	    pandas.Series.cat.rename_categories \
+	    pandas.Series.cat.reorder_categories \
+	    pandas.Series.cat.add_categories \
+	    pandas.Series.cat.remove_categories \
+	    pandas.Series.cat.remove_unused_categories \
+	    pandas.Index.all \
+	    pandas.Index.any \
+	    pandas.CategoricalIndex.rename_categories \
+	    pandas.CategoricalIndex.reorder_categories \
+	    pandas.CategoricalIndex.add_categories \
+	    pandas.CategoricalIndex.remove_categories \
+	    pandas.CategoricalIndex.remove_unused_categories \
+	    pandas.MultiIndex.drop \
+	    pandas.DatetimeIndex.to_pydatetime \
+	    pandas.TimedeltaIndex.to_pytimedelta \
+	    pandas.core.groupby.SeriesGroupBy.apply \
+	    pandas.core.groupby.DataFrameGroupBy.apply \
+	    pandas.io.formats.style.Styler.export \
+	    pandas.api.extensions.ExtensionArray.astype \
+	    pandas.api.extensions.ExtensionArray.dropna \
+	    pandas.api.extensions.ExtensionArray.isna \
+	    pandas.api.extensions.ExtensionArray.repeat \
+	    pandas.api.extensions.ExtensionArray.unique \
+	    pandas.DataFrame.align
+    RET=$(($RET + $?)) ; echo $MSG "DONE"
+
 fi
 
 ### DOCUMENTATION NOTEBOOKS ###
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -628,6 +628,7 @@ Removal of prior version deprecations/changes
 - Disallow passing non-keyword arguments to :meth:`DataFrame.replace`, :meth:`Series.replace` except for ``to_replace`` and ``value`` (:issue:`47587`)
 - Disallow passing non-keyword arguments to :meth:`DataFrame.sort_values` except for ``by`` (:issue:`41505`)
 - Disallow passing non-keyword arguments to :meth:`Series.sort_values` (:issue:`41505`)
+- Disallow passing 2 non-keyword arguments to :meth:`DataFrame.reindex` (:issue:`17966`)
 - Disallow :meth:`Index.reindex` with non-unique :class:`Index` objects (:issue:`42568`)
 - Disallowed constructing :class:`Categorical` with scalar ``data`` (:issue:`38433`)
 - Disallowed constructing :class:`CategoricalIndex` without passing ``data`` (:issue:`38944`)
@@ -752,6 +753,7 @@ Removal of prior version deprecations/changes
 Performance improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 - Performance improvement in :meth:`.DataFrameGroupBy.median` and :meth:`.SeriesGroupBy.median` and :meth:`.GroupBy.cumprod` for nullable dtypes (:issue:`37493`)
+- Performance improvement in :meth:`.DataFrameGroupBy.all`, :meth:`.DataFrameGroupBy.any`, :meth:`.SeriesGroupBy.all`, and :meth:`.SeriesGroupBy.any` for object dtype (:issue:`50623`)
 - Performance improvement in :meth:`MultiIndex.argsort` and :meth:`MultiIndex.sort_values` (:issue:`48406`)
 - Performance improvement in :meth:`MultiIndex.size` (:issue:`48723`)
 - Performance improvement in :meth:`MultiIndex.union` without missing values and without duplicates (:issue:`48505`, :issue:`48752`)
@@ -775,6 +777,7 @@ Performance improvements
 - Performance improvement for :func:`concat` with extension array backed indexes (:issue:`49128`, :issue:`49178`)
 - Reduce memory usage of :meth:`DataFrame.to_pickle`/:meth:`Series.to_pickle` when using BZ2 or LZMA (:issue:`49068`)
 - Performance improvement for :class:`~arrays.StringArray` constructor passing a numpy array with type ``np.str_`` (:issue:`49109`)
+- Performance improvement in :meth:`~arrays.IntervalArray.from_tuples` (:issue:`50620`)
 - Performance improvement in :meth:`~arrays.ArrowExtensionArray.factorize` (:issue:`49177`)
 - Performance improvement in :meth:`~arrays.ArrowExtensionArray.__setitem__` when key is a null slice (:issue:`50248`)
 - Performance improvement in :class:`~arrays.ArrowExtensionArray` comparison methods when array contains NA (:issue:`50524`)
diff --git a/pandas/_libs/interval.pyx b/pandas/_libs/interval.pyx
@@ -58,6 +58,21 @@ cdef class IntervalMixin:
         -------
         bool
             True if the Interval is closed on the left-side.
+
+        See Also
+        --------
+        Interval.closed_right : Check if the interval is closed on the right side.
+        Interval.open_left : Boolean inverse of closed_left.
+
+        Examples
+        --------
+        >>> iv = pd.Interval(0, 5, closed='left')
+        >>> iv.closed_left
+        True
+
+        >>> iv = pd.Interval(0, 5, closed='right')
+        >>> iv.closed_left
+        False
         """
         return self.closed in ("left", "both")
 
@@ -72,6 +87,21 @@ cdef class IntervalMixin:
         -------
         bool
             True if the Interval is closed on the left-side.
+
+        See Also
+        --------
+        Interval.closed_left : Check if the interval is closed on the left side.
+        Interval.open_right : Boolean inverse of closed_right.
+
+        Examples
+        --------
+        >>> iv = pd.Interval(0, 5, closed='both')
+        >>> iv.closed_right
+        True
+
+        >>> iv = pd.Interval(0, 5, closed='left')
+        >>> iv.closed_right
+        False
         """
         return self.closed in ("right", "both")
 
@@ -86,6 +116,21 @@ cdef class IntervalMixin:
         -------
         bool
             True if the Interval is not closed on the left-side.
+
+        See Also
+        --------
+        Interval.open_right : Check if the interval is open on the right side.
+        Interval.closed_left : Boolean inverse of open_left.
+
+        Examples
+        --------
+        >>> iv = pd.Interval(0, 5, closed='neither')
+        >>> iv.open_left
+        True
+
+        >>> iv = pd.Interval(0, 5, closed='both')
+        >>> iv.open_left
+        False
         """
         return not self.closed_left
 
@@ -100,6 +145,21 @@ cdef class IntervalMixin:
         -------
         bool
             True if the Interval is not closed on the left-side.
+
+        See Also
+        --------
+        Interval.open_left : Check if the interval is open on the left side.
+        Interval.closed_right : Boolean inverse of open_right.
+
+        Examples
+        --------
+        >>> iv = pd.Interval(0, 5, closed='left')
+        >>> iv.open_right
+        True
+
+        >>> iv = pd.Interval(0, 5)
+        >>> iv.open_right
+        False
         """
         return not self.closed_right
 
@@ -124,6 +184,10 @@ cdef class IntervalMixin:
     def length(self):
         """
         Return the length of the Interval.
+
+        See Also
+        --------
+        Interval.is_empty : Indicates if an interval contains no points.
         """
         return self.right - self.left
 
@@ -140,6 +204,10 @@ cdef class IntervalMixin:
             an :class:`~arrays.IntervalArray` or :class:`IntervalIndex` is
             empty.
 
+        See Also
+        --------
+        Interval.length : Return the length of the Interval.
+
         Examples
         --------
         An :class:`Interval` that contains points is not empty:
diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
@@ -579,7 +579,16 @@ cdef class _Timestamp(ABCTimestamp):
     @property
     def is_month_start(self) -> bool:
         """
-        Return True if date is first day of month.
+        Check if the date is the first day of the month.
+
+        Returns
+        -------
+        bool
+            True if the date is the first day of the month.
+
+        See Also
+        --------
+        Timestamp.is_month_end : Similar property indicating the last day of the month.
 
         Examples
         --------
@@ -596,7 +605,16 @@ cdef class _Timestamp(ABCTimestamp):
     @property
     def is_month_end(self) -> bool:
         """
-        Return True if date is last day of month.
+        Check if the date is the last day of the month.
+
+        Returns
+        -------
+        bool
+            True if the date is the last day of the month.
+
+        See Also
+        --------
+        Timestamp.is_month_start : Similar property indicating month start.
 
         Examples
         --------
@@ -613,7 +631,17 @@ cdef class _Timestamp(ABCTimestamp):
     @property
     def is_quarter_start(self) -> bool:
         """
-        Return True if date is first day of the quarter.
+        Check if the date is the first day of the quarter.
+
+        Returns
+        -------
+        bool
+            True if date is first day of the quarter.
+
+        See Also
+        --------
+        Timestamp.is_quarter_end : Similar property indicating the quarter end.
+        Timestamp.quarter : Return the quarter of the date.
 
         Examples
         --------
@@ -630,7 +658,17 @@ cdef class _Timestamp(ABCTimestamp):
     @property
     def is_quarter_end(self) -> bool:
         """
-        Return True if date is last day of the quarter.
+        Check if date is last day of the quarter.
+
+        Returns
+        -------
+        bool
+            True if date is last day of the quarter.
+
+        See Also
+        --------
+        Timestamp.is_quarter_start : Similar property indicating the quarter start.
+        Timestamp.quarter : Return the quarter of the date.
 
         Examples
         --------
diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -608,7 +608,7 @@ def from_tuples(
             left = right = data
 
         for d in data:
-            if isna(d):
+            if not isinstance(d, tuple) and isna(d):
                 lhs = rhs = np.nan
             else:
                 name = cls.__name__
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -1774,22 +1774,16 @@ def _bool_agg(self, val_test: Literal["any", "all"], skipna: bool):
         """
 
         def objs_to_bool(vals: ArrayLike) -> tuple[np.ndarray, type]:
-            if is_object_dtype(vals.dtype):
+            if is_object_dtype(vals.dtype) and skipna:
                 # GH#37501: don't raise on pd.NA when skipna=True
-                if skipna:
-                    func = np.vectorize(
-                        lambda x: bool(x) if not isna(x) else True, otypes=[bool]
-                    )
-                    vals = func(vals)
-                else:
-                    vals = vals.astype(bool, copy=False)
-
-                vals = cast(np.ndarray, vals)
+                mask = isna(vals)
+                if mask.any():
+                    # mask on original values computed separately
+                    vals = vals.copy()
+                    vals[mask] = True
             elif isinstance(vals, BaseMaskedArray):
-                vals = vals._data.astype(bool, copy=False)
-            else:
-                vals = vals.astype(bool, copy=False)
-
+                vals = vals._data
+            vals = vals.astype(bool, copy=False)
             return vals.view(np.int8), bool
 
         def result_to_bool(
diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py
@@ -388,7 +388,9 @@ def test_ea_with_na(self, any_numeric_ea_dtype):
         # GH#48778
 
         df = DataFrame({"a": [1, pd.NA, pd.NA], "b": pd.NA}, dtype=any_numeric_ea_dtype)
-        result = df.describe()
+        # Warning from numpy for taking std of single element
+        with tm.assert_produces_warning(RuntimeWarning, check_stacklevel=False):
+            result = df.describe()
         expected = DataFrame(
             {"a": [1.0, 1.0, pd.NA] + [1.0] * 5, "b": [0.0] + [pd.NA] * 7},
             index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
diff --git a/pandas/tests/frame/methods/test_reindex.py b/pandas/tests/frame/methods/test_reindex.py
@@ -840,14 +840,12 @@ def test_reindex_axis_style(self):
         result = df.reindex([0, 1, 3], axis="index")
         tm.assert_frame_equal(result, expected)
 
-    def test_reindex_positional_warns(self):
+    def test_reindex_positional_raises(self):
         # https://github.com/pandas-dev/pandas/issues/12392
+        # Enforced in 2.0
         df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
-        expected = DataFrame({"A": [1.0, 2], "B": [4.0, 5], "C": [np.nan, np.nan]})
-        with tm.assert_produces_warning(FutureWarning):
-            result = df.reindex([0, 1], ["A", "B", "C"])
-
-        tm.assert_frame_equal(result, expected)
+        with pytest.raises(TypeError, match=r".* is ambiguous."):
+            df.reindex([0, 1], ["A", "B", "C"])
 
     def test_reindex_axis_style_raises(self):
         # https://github.com/pandas-dev/pandas/issues/12392
@@ -914,9 +912,7 @@ def test_reindex_api_equivalence(self):
         for res in [res2, res3]:
             tm.assert_frame_equal(res1, res)
 
-        with tm.assert_produces_warning(FutureWarning) as m:
-            res1 = df.reindex(["b", "a"], ["e", "d"])
-        assert "reindex" in str(m[0].message)
+        res1 = df.reindex(index=["b", "a"], columns=["e", "d"])
         res2 = df.reindex(columns=["e", "d"], index=["b", "a"])
         res3 = df.reindex(labels=["b", "a"], axis=0).reindex(labels=["e", "d"], axis=1)
         for res in [res2, res3]:
diff --git a/pandas/tests/frame/methods/test_shift.py b/pandas/tests/frame/methods/test_shift.py
@@ -397,6 +397,13 @@ def test_shift_axis1_multiple_blocks(self, using_array_manager):
         result = df3.shift(2, axis=1)
 
         expected = df3.take([-1, -1, 0, 1, 2], axis=1)
+        # Explicit cast to float to avoid implicit cast when setting nan.
+        # Column names aren't unique, so directly calling `expected.astype` won't work.
+        expected = expected.pipe(
+            lambda df: df.set_axis(range(df.shape[1]), axis=1)
+            .astype({0: "float", 1: "float"})
+            .set_axis(df.columns, axis=1)
+        )
         expected.iloc[:, :2] = np.nan
         expected.columns = df3.columns
 
@@ -410,6 +417,13 @@ def test_shift_axis1_multiple_blocks(self, using_array_manager):
         result = df3.shift(-2, axis=1)
 
         expected = df3.take([2, 3, 4, -1, -1], axis=1)
+        # Explicit cast to float to avoid implicit cast when setting nan.
+        # Column names aren't unique, so directly calling `expected.astype` won't work.
+        expected = expected.pipe(
+            lambda df: df.set_axis(range(df.shape[1]), axis=1)
+            .astype({3: "float", 4: "float"})
+            .set_axis(df.columns, axis=1)
+        )
         expected.iloc[:, -2:] = np.nan
         expected.columns = df3.columns
 
diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py
diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py
diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py