diff --git a/.circleci/config.yml b/.circleci/config.yml index ea93575ac9430..6f134c9a7a7bd 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -72,10 +72,6 @@ jobs: no_output_timeout: 30m # Sometimes the tests won't generate any output, make sure the job doesn't get killed by that command: | pip3 install cibuildwheel==2.15.0 - # When this is a nightly wheel build, allow picking up NumPy 2.0 dev wheels: - if [[ "$IS_SCHEDULE_DISPATCH" == "true" || "$IS_PUSH" != 'true' ]]; then - export CIBW_ENVIRONMENT="PIP_EXTRA_INDEX_URL=https://pypi.anaconda.org/scientific-python-nightly-wheels/simple" - fi cibuildwheel --prerelease-pythons --output-dir wheelhouse environment: diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 470c044d2e99e..d6cfd3b0ad257 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -148,18 +148,6 @@ jobs: CIBW_PRERELEASE_PYTHONS: True CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }} - - name: Build nightly wheels (with NumPy pre-release) - if: ${{ (env.IS_SCHEDULE_DISPATCH == 'true' && env.IS_PUSH != 'true') }} - uses: pypa/cibuildwheel@v2.17.0 - with: - package-dir: ./dist/${{ startsWith(matrix.buildplat[1], 'macosx') && env.sdist_name || needs.build_sdist.outputs.sdist_file }} - env: - # The nightly wheels should be build witht he NumPy 2.0 pre-releases - # which requires the additional URL. - CIBW_ENVIRONMENT: PIP_EXTRA_INDEX_URL=https://pypi.anaconda.org/scientific-python-nightly-wheels/simple - CIBW_PRERELEASE_PYTHONS: True - CIBW_BUILD: ${{ matrix.python[0] }}-${{ matrix.buildplat[1] }} - - name: Set up Python uses: mamba-org/setup-micromamba@v1 with: diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 0c4e6641444f1..f4ea0a1e2bc28 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -84,7 +84,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.DataFrame.assign SA01" \ -i "pandas.DataFrame.at_time PR01" \ -i "pandas.DataFrame.axes SA01" \ - -i "pandas.DataFrame.backfill PR01,SA01" \ -i "pandas.DataFrame.bfill SA01" \ -i "pandas.DataFrame.columns SA01" \ -i "pandas.DataFrame.copy SA01" \ @@ -104,7 +103,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.DataFrame.mean RT03,SA01" \ -i "pandas.DataFrame.median RT03,SA01" \ -i "pandas.DataFrame.min RT03" \ - -i "pandas.DataFrame.pad PR01,SA01" \ -i "pandas.DataFrame.plot PR02,SA01" \ -i "pandas.DataFrame.pop SA01" \ -i "pandas.DataFrame.prod RT03" \ @@ -119,7 +117,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.DataFrame.sparse.to_dense SA01" \ -i "pandas.DataFrame.std PR01,RT03,SA01" \ -i "pandas.DataFrame.sum RT03" \ - -i "pandas.DataFrame.swapaxes PR01,SA01" \ -i "pandas.DataFrame.swaplevel SA01" \ -i "pandas.DataFrame.to_feather SA01" \ -i "pandas.DataFrame.to_markdown SA01" \ @@ -127,9 +124,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.DataFrame.to_period SA01" \ -i "pandas.DataFrame.to_timestamp SA01" \ -i "pandas.DataFrame.tz_convert SA01" \ - -i "pandas.DataFrame.tz_localize SA01" \ - -i "pandas.DataFrame.unstack RT03" \ - -i "pandas.DataFrame.value_counts RT03" \ -i "pandas.DataFrame.var PR01,RT03,SA01" \ -i "pandas.DataFrame.where RT03" \ -i "pandas.DatetimeIndex.ceil SA01" \ @@ -226,7 +220,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Index.to_list RT03" \ -i "pandas.Index.union PR07,RT03,SA01" \ -i "pandas.Index.unique RT03" \ - -i "pandas.Index.value_counts RT03" \ -i "pandas.Index.view GL08" \ -i "pandas.Int16Dtype SA01" \ -i "pandas.Int32Dtype SA01" \ @@ -482,10 +475,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then -i "pandas.Series.to_timestamp RT03,SA01" \ -i "pandas.Series.truediv PR07" \ -i "pandas.Series.tz_convert SA01" \ - -i "pandas.Series.tz_localize SA01" \ - -i "pandas.Series.unstack SA01" \ -i "pandas.Series.update PR07,SA01" \ - -i "pandas.Series.value_counts RT03" \ -i "pandas.Series.var PR01,RT03,SA01" \ -i "pandas.Series.where RT03" \ -i "pandas.SparseDtype SA01" \ diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 15e85d0f90c5e..1f220c51d436c 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -338,6 +338,7 @@ Bug fixes - Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`) - Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`) - Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`) +- Fixed bug in :meth:`Timestamp.replace` which was not reflecting the resulting changes in :meth:`Timestamp.unit`. (:issue:`57749`) Categorical ^^^^^^^^^^^ diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index d4cd90613ca5b..4c87132fbeeda 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -2439,10 +2439,12 @@ default 'raise' datetime ts_input tzinfo_type tzobj _TSObject ts + NPY_DATETIMEUNIT rep_reso # set to naive if needed tzobj = self.tzinfo value = self._value + rep_reso = self._creso # GH 37610. Preserve fold when replacing. if fold is None: @@ -2466,40 +2468,54 @@ default 'raise' if year is not None: dts.year = validate("year", year) + rep_reso = NPY_DATETIMEUNIT.NPY_FR_Y if month is not None: dts.month = validate("month", month) + rep_reso = NPY_DATETIMEUNIT.NPY_FR_M if day is not None: dts.day = validate("day", day) + rep_reso = NPY_DATETIMEUNIT.NPY_FR_D if hour is not None: dts.hour = validate("hour", hour) + rep_reso = NPY_DATETIMEUNIT.NPY_FR_h if minute is not None: dts.min = validate("minute", minute) + rep_reso = NPY_DATETIMEUNIT.NPY_FR_m if second is not None: dts.sec = validate("second", second) + rep_reso = NPY_DATETIMEUNIT.NPY_FR_s if microsecond is not None: dts.us = validate("microsecond", microsecond) + if microsecond > 999: + rep_reso = NPY_DATETIMEUNIT.NPY_FR_us + else: + rep_reso = NPY_DATETIMEUNIT.NPY_FR_ms if nanosecond is not None: dts.ps = validate("nanosecond", nanosecond) * 1000 + rep_reso = NPY_DATETIMEUNIT.NPY_FR_ns if tzinfo is not object: tzobj = tzinfo + if rep_reso < self._creso: + rep_reso = self._creso + # reconstruct & check bounds if tzobj is None: # We can avoid going through pydatetime paths, which is robust # to datetimes outside of pydatetime range. ts = _TSObject() try: - ts.value = npy_datetimestruct_to_datetime(self._creso, &dts) + ts.value = npy_datetimestruct_to_datetime(rep_reso, &dts) except OverflowError as err: fmt = dts_to_iso_string(&dts) raise OutOfBoundsDatetime( f"Out of bounds timestamp: {fmt} with frequency '{self.unit}'" ) from err ts.dts = dts - ts.creso = self._creso + ts.creso = rep_reso ts.fold = fold return create_timestamp_from_ts( - ts.value, dts, tzobj, fold, reso=self._creso + ts.value, dts, tzobj, fold, reso=rep_reso ) elif tzobj is not None and treat_tz_as_pytz(tzobj): @@ -2518,10 +2534,10 @@ default 'raise' ts_input = datetime(**kwargs) ts = convert_datetime_to_tsobject( - ts_input, tzobj, nanos=dts.ps // 1000, reso=self._creso + ts_input, tzobj, nanos=dts.ps // 1000, reso=rep_reso ) return create_timestamp_from_ts( - ts.value, dts, tzobj, fold, reso=self._creso + ts.value, dts, tzobj, fold, reso=rep_reso ) def to_julian_date(self) -> np.float64: diff --git a/pandas/core/base.py b/pandas/core/base.py index 263265701691b..f923106e967b7 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -924,6 +924,7 @@ def value_counts( Returns ------- Series + Series containing counts of unique values. See Also -------- diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 50a93994dc76b..4715164a4208a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7162,7 +7162,7 @@ def value_counts( dropna: bool = True, ) -> Series: """ - Return a Series containing the frequency of each distinct row in the Dataframe. + Return a Series containing the frequency of each distinct row in the DataFrame. Parameters ---------- @@ -7175,13 +7175,14 @@ def value_counts( ascending : bool, default False Sort in ascending order. dropna : bool, default True - Don't include counts of rows that contain NA values. + Do not include counts of rows that contain NA values. .. versionadded:: 1.3.0 Returns ------- Series + Series containing the frequency of each distinct row in the DataFrame. See Also -------- @@ -7192,8 +7193,8 @@ def value_counts( The returned Series will have a MultiIndex with one level per input column but an Index (non-multi) for a single label. By default, rows that contain any NA values are omitted from the result. By default, - the resulting Series will be in descending order so that the first - element is the most frequently-occurring row. + the resulting Series will be sorted by frequencies in descending order so that + the first element is the most frequently-occurring row. Examples -------- @@ -9658,6 +9659,8 @@ def unstack( Returns ------- Series or DataFrame + If index is a MultiIndex: DataFrame with pivoted index labels as new + inner-most level column labels, else Series. See Also -------- @@ -11494,7 +11497,7 @@ def any( **kwargs, ) -> Series | bool: ... - @doc(make_doc("any", ndim=2)) + @doc(make_doc("any", ndim=1)) def any( self, *, @@ -11540,7 +11543,7 @@ def all( **kwargs, ) -> Series | bool: ... - @doc(make_doc("all", ndim=2)) + @doc(make_doc("all", ndim=1)) def all( self, axis: Axis | None = 0, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 858d2ba82a969..bfee4ced2b3d8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10485,10 +10485,10 @@ def tz_localize( nonexistent: TimeNonexistent = "raise", ) -> Self: """ - Localize tz-naive index of a Series or DataFrame to target time zone. + Localize time zone naive index of a Series or DataFrame to target time zone. This operation localizes the Index. To localize the values in a - timezone-naive Series, use :meth:`Series.dt.tz_localize`. + time zone naive Series, use :meth:`Series.dt.tz_localize`. Parameters ---------- @@ -10548,13 +10548,19 @@ def tz_localize( Returns ------- {klass} - Same type as the input. + Same type as the input, with time zone naive or aware index, depending on + ``tz``. Raises ------ TypeError If the TimeSeries is tz-aware and tz is not None. + See Also + -------- + Series.dt.tz_localize: Localize the values in a time zone naive Series. + Timestamp.tz_localize: Localize the Timestamp to a timezone. + Examples -------- Localize local times: @@ -11712,7 +11718,7 @@ def last_valid_index(self) -> Hashable: skipna : bool, default True Exclude NA/null values when computing the result. numeric_only : bool, default False - Include only float, int, boolean columns. Not implemented for Series. + Include only float, int, boolean columns. {min_count}\ **kwargs @@ -11881,9 +11887,9 @@ def last_valid_index(self) -> Hashable: Returns ------- -{name1} or {name2} - If level is specified, then, {name2} is returned; otherwise, {name1} - is returned. +{name2} or {name1} + If axis=None, then a scalar boolean is returned. + Otherwise a Series is returned with index matching the index argument. {see_also} {examples}""" diff --git a/pandas/core/series.py b/pandas/core/series.py index b0dc05fce7913..843788273a6ef 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4257,6 +4257,10 @@ def unstack( DataFrame Unstacked Series. + See Also + -------- + DataFrame.unstack : Pivot the MultiIndex of a DataFrame. + Notes ----- Reference :ref:`the user guide ` for more examples. diff --git a/pandas/io/html.py b/pandas/io/html.py index b4f6a5508726b..42f5266e7649b 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -584,14 +584,8 @@ class _BeautifulSoupHtml5LibFrameParser(_HtmlFrameParser): :class:`pandas.io.html._HtmlFrameParser`. """ - def __init__(self, *args, **kwargs) -> None: - super().__init__(*args, **kwargs) - from bs4 import SoupStrainer - - self._strainer = SoupStrainer("table") - def _parse_tables(self, document, match, attrs): - element_name = self._strainer.name + element_name = "table" tables = document.find_all(element_name, attrs=attrs) if not tables: raise ValueError("No tables found") diff --git a/pandas/tests/scalar/timestamp/methods/test_replace.py b/pandas/tests/scalar/timestamp/methods/test_replace.py index d67de79a8dd10..70ab1a7580bda 100644 --- a/pandas/tests/scalar/timestamp/methods/test_replace.py +++ b/pandas/tests/scalar/timestamp/methods/test_replace.py @@ -189,3 +189,13 @@ def test_replace_preserves_fold(self, fold): ts_replaced = ts.replace(second=1) assert ts_replaced.fold == fold + + def test_replace_unit(self): + # GH#57749 + ts = Timestamp("2023-07-15 23:08:12") + ts1 = Timestamp("2023-07-15 23:08:12.134567") + ts2 = Timestamp("2023-07-15 23:08:12.134567123") + ts = ts.replace(microsecond=ts1.microsecond) + assert ts == ts1 + ts = ts.replace(nanosecond=ts2.nanosecond) + assert ts == ts2 diff --git a/pyproject.toml b/pyproject.toml index 5f5b013ca8461..259d003de92d5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -152,6 +152,9 @@ setup = ['--vsenv'] # For Windows skip = "cp36-* cp37-* cp38-* pp* *_i686 *_ppc64le *_s390x" build-verbosity = "3" environment = {LDFLAGS="-Wl,--strip-all"} +# TODO: remove this once numpy 2.0 proper releases +# and specify numpy 2.0 as a dependency in [build-system] requires in pyproject.toml +before-build = "pip install numpy==2.0.0rc1" test-requires = "hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0" test-command = """ PANDAS_CI='1' python -c 'import pandas as pd; \ @@ -160,7 +163,9 @@ test-command = """ """ [tool.cibuildwheel.windows] -before-build = "pip install delvewheel" +# TODO: remove this once numpy 2.0 proper releases +# and specify numpy 2.0 as a dependency in [build-system] requires in pyproject.toml +before-build = "pip install delvewheel numpy==2.0.0rc1" repair-wheel-command = "delvewheel repair -w {dest_dir} {wheel}" [[tool.cibuildwheel.overrides]]