Merge branch 'pandas-dev:main' into reflect-changes-in-components

diogomsmiranda · web-flow · commit e041e8cca5a4 · 2024-04-01T18:49:28.000+01:00
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -72,10 +72,6 @@ jobs:
           no_output_timeout: 30m # Sometimes the tests won't generate any output, make sure the job doesn't get killed by that
           command: |
             pip3 install cibuildwheel==2.15.0
-            # When this is a nightly wheel build, allow picking up NumPy 2.0 dev wheels:
-            if [[ "$IS_SCHEDULE_DISPATCH" == "true" || "$IS_PUSH" != 'true' ]]; then
-                export CIBW_ENVIRONMENT="PIP_EXTRA_INDEX_URL=https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"
-            fi
             cibuildwheel --prerelease-pythons --output-dir wheelhouse
 
           environment:
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
@@ -148,18 +148,6 @@ jobs:
           CIBW_PRERELEASE_PYTHONS: True
           CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}
 
-      - name: Build nightly wheels (with NumPy pre-release)
-        if: ${{ (env.IS_SCHEDULE_DISPATCH == 'true' && env.IS_PUSH != 'true') }}
-        uses: pypa/cibuildwheel@v2.17.0
-        with:
-         package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
-        env:
-          # The nightly wheels should be build witht he NumPy 2.0 pre-releases
-          # which requires the additional URL.
-          CIBW_ENVIRONMENT: PIP_EXTRA_INDEX_URL=https://pypi.anaconda.org/scientific-python-nightly-wheels/simple
-          CIBW_PRERELEASE_PYTHONS: True
-          CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}
-
       - name: Set up Python
         uses: mamba-org/setup-micromamba@v1
         with:
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -84,7 +84,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.assign SA01" \
         -i "pandas.DataFrame.at_time PR01" \
         -i "pandas.DataFrame.axes SA01" \
-        -i "pandas.DataFrame.backfill PR01,SA01" \
         -i "pandas.DataFrame.bfill SA01" \
         -i "pandas.DataFrame.columns SA01" \
         -i "pandas.DataFrame.copy SA01" \
@@ -104,7 +103,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.mean RT03,SA01" \
         -i "pandas.DataFrame.median RT03,SA01" \
         -i "pandas.DataFrame.min RT03" \
-        -i "pandas.DataFrame.pad PR01,SA01" \
         -i "pandas.DataFrame.plot PR02,SA01" \
         -i "pandas.DataFrame.pop SA01" \
         -i "pandas.DataFrame.prod RT03" \
@@ -119,17 +117,13 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.sparse.to_dense SA01" \
         -i "pandas.DataFrame.std PR01,RT03,SA01" \
         -i "pandas.DataFrame.sum RT03" \
-        -i "pandas.DataFrame.swapaxes PR01,SA01" \
         -i "pandas.DataFrame.swaplevel SA01" \
         -i "pandas.DataFrame.to_feather SA01" \
         -i "pandas.DataFrame.to_markdown SA01" \
         -i "pandas.DataFrame.to_parquet RT03" \
         -i "pandas.DataFrame.to_period SA01" \
         -i "pandas.DataFrame.to_timestamp SA01" \
         -i "pandas.DataFrame.tz_convert SA01" \
-        -i "pandas.DataFrame.tz_localize SA01" \
-        -i "pandas.DataFrame.unstack RT03" \
-        -i "pandas.DataFrame.value_counts RT03" \
         -i "pandas.DataFrame.var PR01,RT03,SA01" \
         -i "pandas.DataFrame.where RT03" \
         -i "pandas.DatetimeIndex.ceil SA01" \
@@ -226,7 +220,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.to_list RT03" \
         -i "pandas.Index.union PR07,RT03,SA01" \
         -i "pandas.Index.unique RT03" \
-        -i "pandas.Index.value_counts RT03" \
         -i "pandas.Index.view GL08" \
         -i "pandas.Int16Dtype SA01" \
         -i "pandas.Int32Dtype SA01" \
@@ -482,10 +475,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.to_timestamp RT03,SA01" \
         -i "pandas.Series.truediv PR07" \
         -i "pandas.Series.tz_convert SA01" \
-        -i "pandas.Series.tz_localize SA01" \
-        -i "pandas.Series.unstack SA01" \
         -i "pandas.Series.update PR07,SA01" \
-        -i "pandas.Series.value_counts RT03" \
         -i "pandas.Series.var PR01,RT03,SA01" \
         -i "pandas.Series.where RT03" \
         -i "pandas.SparseDtype SA01" \
diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -924,6 +924,7 @@ def value_counts(
         Returns
         -------
         Series
+            Series containing counts of unique values.
 
         See Also
         --------
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -7162,7 +7162,7 @@ def value_counts(
         dropna: bool = True,
     ) -> Series:
         """
-        Return a Series containing the frequency of each distinct row in the Dataframe.
+        Return a Series containing the frequency of each distinct row in the DataFrame.
 
         Parameters
         ----------
@@ -7175,13 +7175,14 @@ def value_counts(
         ascending : bool, default False
             Sort in ascending order.
         dropna : bool, default True
-            Don't include counts of rows that contain NA values.
+            Do not include counts of rows that contain NA values.
 
             .. versionadded:: 1.3.0
 
         Returns
         -------
         Series
+            Series containing the frequency of each distinct row in the DataFrame.
 
         See Also
         --------
@@ -7192,8 +7193,8 @@ def value_counts(
         The returned Series will have a MultiIndex with one level per input
         column but an Index (non-multi) for a single label. By default, rows
         that contain any NA values are omitted from the result. By default,
-        the resulting Series will be in descending order so that the first
-        element is the most frequently-occurring row.
+        the resulting Series will be sorted by frequencies in descending order so that
+        the first element is the most frequently-occurring row.
 
         Examples
         --------
@@ -9658,6 +9659,8 @@ def unstack(
         Returns
         -------
         Series or DataFrame
+            If index is a MultiIndex: DataFrame with pivoted index labels as new
+            inner-most level column labels, else Series.
 
         See Also
         --------
@@ -11494,7 +11497,7 @@ def any(
         **kwargs,
     ) -> Series | bool: ...
 
-    @doc(make_doc("any", ndim=2))
+    @doc(make_doc("any", ndim=1))
     def any(
         self,
         *,
@@ -11540,7 +11543,7 @@ def all(
         **kwargs,
     ) -> Series | bool: ...
 
-    @doc(make_doc("all", ndim=2))
+    @doc(make_doc("all", ndim=1))
     def all(
         self,
         axis: Axis | None = 0,
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -10485,10 +10485,10 @@ def tz_localize(
         nonexistent: TimeNonexistent = "raise",
     ) -> Self:
         """
-        Localize tz-naive index of a Series or DataFrame to target time zone.
+        Localize time zone naive index of a Series or DataFrame to target time zone.
 
         This operation localizes the Index. To localize the values in a
-        timezone-naive Series, use :meth:`Series.dt.tz_localize`.
+        time zone naive Series, use :meth:`Series.dt.tz_localize`.
 
         Parameters
         ----------
@@ -10548,13 +10548,19 @@ def tz_localize(
         Returns
         -------
         {klass}
-            Same type as the input.
+            Same type as the input, with time zone naive or aware index, depending on
+            ``tz``.
 
         Raises
         ------
         TypeError
             If the TimeSeries is tz-aware and tz is not None.
 
+        See Also
+        --------
+        Series.dt.tz_localize: Localize the values in a time zone naive Series.
+        Timestamp.tz_localize: Localize the Timestamp to a timezone.
+
         Examples
         --------
         Localize local times:
@@ -11712,7 +11718,7 @@ def last_valid_index(self) -> Hashable:
 skipna : bool, default True
     Exclude NA/null values when computing the result.
 numeric_only : bool, default False
-    Include only float, int, boolean columns. Not implemented for Series.
+    Include only float, int, boolean columns.
 
 {min_count}\
 **kwargs
@@ -11881,9 +11887,9 @@ def last_valid_index(self) -> Hashable:
 
 Returns
 -------
-{name1} or {name2}
-    If level is specified, then, {name2} is returned; otherwise, {name1}
-    is returned.
+{name2} or {name1}
+    If axis=None, then a scalar boolean is returned.
+    Otherwise a Series is returned with index matching the index argument.
 
 {see_also}
 {examples}"""
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -4257,6 +4257,10 @@ def unstack(
         DataFrame
             Unstacked Series.
 
+        See Also
+        --------
+        DataFrame.unstack : Pivot the MultiIndex of a DataFrame.
+
         Notes
         -----
         Reference :ref:`the user guide <reshaping.stacking>` for more examples.
diff --git a/pandas/io/html.py b/pandas/io/html.py
@@ -584,14 +584,8 @@ class _BeautifulSoupHtml5LibFrameParser(_HtmlFrameParser):
     :class:`pandas.io.html._HtmlFrameParser`.
     """
 
-    def __init__(self, *args, **kwargs) -> None:
-        super().__init__(*args, **kwargs)
-        from bs4 import SoupStrainer
-
-        self._strainer = SoupStrainer("table")
-
     def _parse_tables(self, document, match, attrs):
-        element_name = self._strainer.name
+        element_name = "table"
         tables = document.find_all(element_name, attrs=attrs)
         if not tables:
             raise ValueError("No tables found")
diff --git a/pyproject.toml b/pyproject.toml
@@ -152,6 +152,9 @@ setup = ['--vsenv'] # For Windows
 skip = "cp36-* cp37-* cp38-* pp* *_i686 *_ppc64le *_s390x"
 build-verbosity = "3"
 environment = {LDFLAGS="-Wl,--strip-all"}
+# TODO: remove this once numpy 2.0 proper releases
+# and specify numpy 2.0 as a dependency in [build-system] requires in pyproject.toml
+before-build = "pip install numpy==2.0.0rc1"
 test-requires = "hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0"
 test-command = """
   PANDAS_CI='1' python -c 'import pandas as pd; \
@@ -160,7 +163,9 @@ test-command = """
   """
 
 [tool.cibuildwheel.windows]
-before-build = "pip install delvewheel"
+# TODO: remove this once numpy 2.0 proper releases
+# and specify numpy 2.0 as a dependency in [build-system] requires in pyproject.toml
+before-build = "pip install delvewheel numpy==2.0.0rc1"
 repair-wheel-command = "delvewheel repair -w {dest_dir} {wheel}"
 
 [[tool.cibuildwheel.overrides]]