pandas-dev · diogomsmiranda · Apr 1, 2024 · Apr 1, 2024 · Apr 1, 2024 · Apr 1, 2024
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -72,10 +72,6 @@ jobs:
           no_output_timeout: 30m # Sometimes the tests won't generate any output, make sure the job doesn't get killed by that
           command: |
             pip3 install cibuildwheel==2.15.0
-            # When this is a nightly wheel build, allow picking up NumPy 2.0 dev wheels:
-            if [[ "$IS_SCHEDULE_DISPATCH" == "true" || "$IS_PUSH" != 'true' ]]; then
-                export CIBW_ENVIRONMENT="PIP_EXTRA_INDEX_URL=https://pypi.anaconda.org/scientific-python-nightly-wheels/simple"
-            fi
             cibuildwheel --prerelease-pythons --output-dir wheelhouse
 
           environment:

@@ -148,18 +148,6 @@ jobs:
           CIBW_PRERELEASE_PYTHONS: True
           CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}
 
-      - name: Build nightly wheels (with NumPy pre-release)
-        if: ${{ (env.IS_SCHEDULE_DISPATCH == 'true' && env.IS_PUSH != 'true') }}
-        uses: pypa/[email protected]
-        with:
-         package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }}
-        env:
-          # The nightly wheels should be build witht he NumPy 2.0 pre-releases
-          # which requires the additional URL.
-          CIBW_ENVIRONMENT: PIP_EXTRA_INDEX_URL=https://pypi.anaconda.org/scientific-python-nightly-wheels/simple
-          CIBW_PRERELEASE_PYTHONS: True
-          CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }}
-
       - name: Set up Python
         uses: mamba-org/setup-micromamba@v1
         with:

@@ -84,7 +84,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.assign SA01" \
         -i "pandas.DataFrame.at_time PR01" \
         -i "pandas.DataFrame.axes SA01" \
-        -i "pandas.DataFrame.backfill PR01,SA01" \
         -i "pandas.DataFrame.bfill SA01" \
         -i "pandas.DataFrame.columns SA01" \
         -i "pandas.DataFrame.copy SA01" \
@@ -104,7 +103,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.mean RT03,SA01" \
         -i "pandas.DataFrame.median RT03,SA01" \
         -i "pandas.DataFrame.min RT03" \
-        -i "pandas.DataFrame.pad PR01,SA01" \
         -i "pandas.DataFrame.plot PR02,SA01" \
         -i "pandas.DataFrame.pop SA01" \
         -i "pandas.DataFrame.prod RT03" \
@@ -119,17 +117,13 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DataFrame.sparse.to_dense SA01" \
         -i "pandas.DataFrame.std PR01,RT03,SA01" \
         -i "pandas.DataFrame.sum RT03" \
-        -i "pandas.DataFrame.swapaxes PR01,SA01" \
         -i "pandas.DataFrame.swaplevel SA01" \
         -i "pandas.DataFrame.to_feather SA01" \
         -i "pandas.DataFrame.to_markdown SA01" \
         -i "pandas.DataFrame.to_parquet RT03" \
         -i "pandas.DataFrame.to_period SA01" \
         -i "pandas.DataFrame.to_timestamp SA01" \
         -i "pandas.DataFrame.tz_convert SA01" \
-        -i "pandas.DataFrame.tz_localize SA01" \
-        -i "pandas.DataFrame.unstack RT03" \
-        -i "pandas.DataFrame.value_counts RT03" \
         -i "pandas.DataFrame.var PR01,RT03,SA01" \
         -i "pandas.DataFrame.where RT03" \
         -i "pandas.DatetimeIndex.ceil SA01" \
@@ -226,7 +220,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Index.to_list RT03" \
         -i "pandas.Index.union PR07,RT03,SA01" \
         -i "pandas.Index.unique RT03" \
-        -i "pandas.Index.value_counts RT03" \
         -i "pandas.Index.view GL08" \
         -i "pandas.Int16Dtype SA01" \
         -i "pandas.Int32Dtype SA01" \
@@ -482,10 +475,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.Series.to_timestamp RT03,SA01" \
         -i "pandas.Series.truediv PR07" \
         -i "pandas.Series.tz_convert SA01" \
-        -i "pandas.Series.tz_localize SA01" \
-        -i "pandas.Series.unstack SA01" \
         -i "pandas.Series.update PR07,SA01" \
-        -i "pandas.Series.value_counts RT03" \
         -i "pandas.Series.var PR01,RT03,SA01" \
         -i "pandas.Series.where RT03" \
         -i "pandas.SparseDtype SA01" \

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -338,6 +338,7 @@ Bug fixes
 - Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
 - Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
 - Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
+- Fixed bug in :meth:`Timestamp.replace` which was not reflecting the resulting changes in :meth:`Timestamp.unit`. (:issue:`57749`)
 
 Categorical
 ^^^^^^^^^^^

diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx
@@ -2439,10 +2439,12 @@ default 'raise'
             datetime ts_input
             tzinfo_type tzobj
             _TSObject ts
+            NPY_DATETIMEUNIT rep_reso
 
         # set to naive if needed
         tzobj = self.tzinfo
         value = self._value
+        rep_reso = self._creso
 
         # GH 37610. Preserve fold when replacing.
         if fold is None:
@@ -2466,40 +2468,54 @@ default 'raise'
 
         if year is not None:
             dts.year = validate("year", year)
+            rep_reso = NPY_DATETIMEUNIT.NPY_FR_Y
         if month is not None:
             dts.month = validate("month", month)
+            rep_reso = NPY_DATETIMEUNIT.NPY_FR_M
         if day is not None:
             dts.day = validate("day", day)
+            rep_reso = NPY_DATETIMEUNIT.NPY_FR_D
         if hour is not None:
             dts.hour = validate("hour", hour)
+            rep_reso = NPY_DATETIMEUNIT.NPY_FR_h
         if minute is not None:
             dts.min = validate("minute", minute)
+            rep_reso = NPY_DATETIMEUNIT.NPY_FR_m
         if second is not None:
             dts.sec = validate("second", second)
+            rep_reso = NPY_DATETIMEUNIT.NPY_FR_s
         if microsecond is not None:
             dts.us = validate("microsecond", microsecond)
+            if microsecond > 999:
+                rep_reso = NPY_DATETIMEUNIT.NPY_FR_us
+            else:
+                rep_reso = NPY_DATETIMEUNIT.NPY_FR_ms
         if nanosecond is not None:
             dts.ps = validate("nanosecond", nanosecond) * 1000
+            rep_reso = NPY_DATETIMEUNIT.NPY_FR_ns
         if tzinfo is not object:
             tzobj = tzinfo
 
+        if rep_reso < self._creso:
+            rep_reso = self._creso
+
         # reconstruct & check bounds
         if tzobj is None:
             # We can avoid going through pydatetime paths, which is robust
             #  to datetimes outside of pydatetime range.
             ts = _TSObject()
             try:
-                ts.value = npy_datetimestruct_to_datetime(self._creso, &dts)
+                ts.value = npy_datetimestruct_to_datetime(rep_reso, &dts)
             except OverflowError as err:
                 fmt = dts_to_iso_string(&dts)
                 raise OutOfBoundsDatetime(
                     f"Out of bounds timestamp: {fmt} with frequency '{self.unit}'"
                 ) from err
             ts.dts = dts
-            ts.creso = self._creso
+            ts.creso = rep_reso
             ts.fold = fold
             return create_timestamp_from_ts(
-                ts.value, dts, tzobj, fold, reso=self._creso
+                ts.value, dts, tzobj, fold, reso=rep_reso
             )
 
         elif tzobj is not None and treat_tz_as_pytz(tzobj):
@@ -2518,10 +2534,10 @@ default 'raise'
             ts_input = datetime(**kwargs)
 
         ts = convert_datetime_to_tsobject(
-            ts_input, tzobj, nanos=dts.ps // 1000, reso=self._creso
+            ts_input, tzobj, nanos=dts.ps // 1000, reso=rep_reso
         )
         return create_timestamp_from_ts(
-            ts.value, dts, tzobj, fold, reso=self._creso
+            ts.value, dts, tzobj, fold, reso=rep_reso
         )
 
     def to_julian_date(self) -> np.float64:

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -924,6 +924,7 @@ def value_counts(
         Returns
         -------
         Series
+            Series containing counts of unique values.
 
         See Also
         --------

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -7162,7 +7162,7 @@ def value_counts(
         dropna: bool = True,
     ) -> Series:
         """
-        Return a Series containing the frequency of each distinct row in the Dataframe.
+        Return a Series containing the frequency of each distinct row in the DataFrame.
 
         Parameters
         ----------
@@ -7175,13 +7175,14 @@ def value_counts(
         ascending : bool, default False
             Sort in ascending order.
         dropna : bool, default True
-            Don't include counts of rows that contain NA values.
+            Do not include counts of rows that contain NA values.
 
             .. versionadded:: 1.3.0
 
         Returns
         -------
         Series
+            Series containing the frequency of each distinct row in the DataFrame.
 
         See Also
         --------
@@ -7192,8 +7193,8 @@ def value_counts(
         The returned Series will have a MultiIndex with one level per input
         column but an Index (non-multi) for a single label. By default, rows
         that contain any NA values are omitted from the result. By default,
-        the resulting Series will be in descending order so that the first
-        element is the most frequently-occurring row.
+        the resulting Series will be sorted by frequencies in descending order so that
+        the first element is the most frequently-occurring row.
 
         Examples
         --------
@@ -9658,6 +9659,8 @@ def unstack(
         Returns
         -------
         Series or DataFrame
+            If index is a MultiIndex: DataFrame with pivoted index labels as new
+            inner-most level column labels, else Series.
 
         See Also
         --------
@@ -11494,7 +11497,7 @@ def any(
         **kwargs,
     ) -> Series | bool: ...
 
-    @doc(make_doc("any", ndim=2))
+    @doc(make_doc("any", ndim=1))
     def any(
         self,
         *,
@@ -11540,7 +11543,7 @@ def all(
         **kwargs,
     ) -> Series | bool: ...
 
-    @doc(make_doc("all", ndim=2))
+    @doc(make_doc("all", ndim=1))
     def all(
         self,
         axis: Axis | None = 0,

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -10485,10 +10485,10 @@ def tz_localize(
         nonexistent: TimeNonexistent = "raise",
     ) -> Self:
         """
-        Localize tz-naive index of a Series or DataFrame to target time zone.
+        Localize time zone naive index of a Series or DataFrame to target time zone.
 
         This operation localizes the Index. To localize the values in a
-        timezone-naive Series, use :meth:`Series.dt.tz_localize`.
+        time zone naive Series, use :meth:`Series.dt.tz_localize`.
 
         Parameters
         ----------
@@ -10548,13 +10548,19 @@ def tz_localize(
         Returns
         -------
         {klass}
-            Same type as the input.
+            Same type as the input, with time zone naive or aware index, depending on
+            ``tz``.
 
         Raises
         ------
         TypeError
             If the TimeSeries is tz-aware and tz is not None.
 
+        See Also
+        --------
+        Series.dt.tz_localize: Localize the values in a time zone naive Series.
+        Timestamp.tz_localize: Localize the Timestamp to a timezone.
+
         Examples
         --------
         Localize local times:
@@ -11712,7 +11718,7 @@ def last_valid_index(self) -> Hashable:
 skipna : bool, default True
     Exclude NA/null values when computing the result.
 numeric_only : bool, default False
-    Include only float, int, boolean columns. Not implemented for Series.
+    Include only float, int, boolean columns.
 
 {min_count}\
 **kwargs
@@ -11881,9 +11887,9 @@ def last_valid_index(self) -> Hashable:
 
 Returns
 -------
-{name1} or {name2}
-    If level is specified, then, {name2} is returned; otherwise, {name1}
-    is returned.
+{name2} or {name1}
+    If axis=None, then a scalar boolean is returned.
+    Otherwise a Series is returned with index matching the index argument.
 
 {see_also}
 {examples}"""

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -4257,6 +4257,10 @@ def unstack(
         DataFrame
             Unstacked Series.
 
+        See Also
+        --------
+        DataFrame.unstack : Pivot the MultiIndex of a DataFrame.
+
         Notes
         -----
         Reference :ref:`the user guide <reshaping.stacking>` for more examples.

diff --git a/pandas/io/html.py b/pandas/io/html.py
@@ -584,14 +584,8 @@ class _BeautifulSoupHtml5LibFrameParser(_HtmlFrameParser):
     :class:`pandas.io.html._HtmlFrameParser`.
     """
 
-    def __init__(self, *args, **kwargs) -> None:
-        super().__init__(*args, **kwargs)
-        from bs4 import SoupStrainer
-
-        self._strainer = SoupStrainer("table")
-
     def _parse_tables(self, document, match, attrs):
-        element_name = self._strainer.name
+        element_name = "table"
         tables = document.find_all(element_name, attrs=attrs)
         if not tables:
             raise ValueError("No tables found")

diff --git a/pandas/tests/scalar/timestamp/methods/test_replace.py b/pandas/tests/scalar/timestamp/methods/test_replace.py
@@ -189,3 +189,13 @@ def test_replace_preserves_fold(self, fold):
         ts_replaced = ts.replace(second=1)
 
         assert ts_replaced.fold == fold
+
+    def test_replace_unit(self):
+        # GH#57749
+        ts = Timestamp("2023-07-15 23:08:12")
+        ts1 = Timestamp("2023-07-15 23:08:12.134567")
+        ts2 = Timestamp("2023-07-15 23:08:12.134567123")
+        ts = ts.replace(microsecond=ts1.microsecond)
+        assert ts == ts1
+        ts = ts.replace(nanosecond=ts2.nanosecond)
+        assert ts == ts2
diff --git a/pyproject.toml b/pyproject.toml
@@ -152,6 +152,9 @@ setup = ['--vsenv'] # For Windows
 skip = "cp36-* cp37-* cp38-* pp* *_i686 *_ppc64le *_s390x"
 build-verbosity = "3"
 environment = {LDFLAGS="-Wl,--strip-all"}
+# TODO: remove this once numpy 2.0 proper releases
+# and specify numpy 2.0 as a dependency in [build-system] requires in pyproject.toml
+before-build = "pip install numpy==2.0.0rc1"
 test-requires = "hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0"
 test-command = """
   PANDAS_CI='1' python -c 'import pandas as pd; \
@@ -160,7 +163,9 @@ test-command = """
   """
 
 [tool.cibuildwheel.windows]
-before-build = "pip install delvewheel"
+# TODO: remove this once numpy 2.0 proper releases
+# and specify numpy 2.0 as a dependency in [build-system] requires in pyproject.toml
+before-build = "pip install delvewheel numpy==2.0.0rc1"
 repair-wheel-command = "delvewheel repair -w {dest_dir} {wheel}"
 
 [[tool.cibuildwheel.overrides]]
-Original file line number
+Diff line change
@@ Expand Up / @@ -924,6 +924,7 @@ def value_counts( @@
             Returns
             -------
             Series
+                Series containing counts of unique values.
             See Also
             --------
@@ Expand Down @@