pandas-dev
diff --git a/‎.github/ISSUE_TEMPLATE/pdep_vote.yaml
Lines changed: 74 additions & 0 deletions b/‎.github/ISSUE_TEMPLATE/pdep_vote.yaml
Lines changed: 74 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/frame_methods.py
Lines changed: 24 additions & 0 deletions b/‎asv_bench/benchmarks/frame_methods.py
Lines changed: 24 additions & 0 deletions
diff --git a/‎ci/code_checks.sh
Lines changed: 0 additions & 5 deletions b/‎ci/code_checks.sh
Lines changed: 0 additions & 5 deletions
diff --git a/‎doc/source/getting_started/install.rst
Lines changed: 2 additions & 0 deletions b/‎doc/source/getting_started/install.rst
Lines changed: 2 additions & 0 deletions
diff --git a/‎doc/source/getting_started/intro_tutorials/02_read_write.rst
Lines changed: 6 additions & 0 deletions b/‎doc/source/getting_started/intro_tutorials/02_read_write.rst
Lines changed: 6 additions & 0 deletions
diff --git a/‎doc/source/user_guide/missing_data.rst
Lines changed: 21 additions & 0 deletions b/‎doc/source/user_guide/missing_data.rst
Lines changed: 21 additions & 0 deletions
diff --git a/‎doc/source/whatsnew/v3.0.0.rst
Lines changed: 24 additions & 24 deletions b/‎doc/source/whatsnew/v3.0.0.rst
Lines changed: 24 additions & 24 deletions
diff --git a/‎pandas/_libs/tslibs/offsets.pyx
Lines changed: 5 additions & 7 deletions b/‎pandas/_libs/tslibs/offsets.pyx
Lines changed: 5 additions & 7 deletions
diff --git a/‎pandas/_libs/tslibs/timestamps.pyx
Lines changed: 10 additions & 1 deletion b/‎pandas/_libs/tslibs/timestamps.pyx
Lines changed: 10 additions & 1 deletion
@@ -0,0 +1,74 @@
+name: PDEP Vote
+description: Call for a vote on a PDEP
+title: "VOTE: "
+labels: [Vote]
+
+body:
+  - type: markdown
+    attributes:
+      value: >
+        As per [PDEP-1](https://pandas.pydata.org/pdeps/0001-purpose-and-guidelines.html), the following issue template should be used when a
+        maintainer has opened a PDEP discussion and is ready to call for a vote.
+  - type: checkboxes
+    attributes:
+      label: Locked issue
+      options:
+        - label: >
+            I locked this voting issue so that only voting members are able to cast their votes or
+            comment on this issue.
+          required: true
+  - type: input
+    id: PDEP-name
+    attributes:
+      label: PDEP number and title
+      placeholder: >
+        PDEP-1: Purpose and guidelines
+    validations:
+      required: true
+  - type: input
+    id: PDEP-link
+    attributes:
+      label: Pull request with discussion
+      description: e.g. https://github.com/pandas-dev/pandas/pull/47444
+    validations:
+      required: true
+  - type: input
+    id: PDEP-rendered-link
+    attributes:
+      label: Rendered PDEP for easy reading
+      description: e.g. https://github.com/pandas-dev/pandas/pull/47444/files?short_path=7c449e6#diff-7c449e698132205b235c501f7e47ebba38da4d2b7f9492c98f16745dba787041
+    validations:
+      required: true
+  - type: input
+    id: PDEP-number-of-discussion-participants
+    attributes:
+      label: Discussion participants
+      description: >
+        You may find it useful to list or total the number of participating members in the
+        PDEP discussion PR. This would be the maximum possible disapprove votes.
+      placeholder: >
+        14 voting members participated in the PR discussion thus far.
+  - type: input
+    id: PDEP-vote-end
+    attributes:
+      label: Voting will close in 15 days.
+      description: The voting period end date. ('Voting will close in 15 days.' will be automatically written)
+  - type: markdown
+    attributes:
+      value: ---
+  - type: textarea
+    id: Vote
+    attributes:
+      label: Vote
+      value: |
+        Cast your vote in a comment below.
+        * +1: approve.
+        * 0: abstain.
+            * Reason: A one sentence reason is required.
+        * -1: disapprove
+            * Reason: A one sentence reason is required.
+        A disapprove vote requires prior participation in the linked discussion PR.
+
+        @pandas-dev/pandas-core
+    validations:
+      required: true
@@ -862,4 +862,28 @@ def time_last_valid_index(self, dtype):
         self.df.last_valid_index()
 
 
+class Update:
+    def setup(self):
+        rng = np.random.default_rng()
+        self.df = DataFrame(rng.uniform(size=(1_000_000, 10)))
+
+        idx = rng.choice(range(1_000_000), size=1_000_000, replace=False)
+        self.df_random = DataFrame(self.df, index=idx)
+
+        idx = rng.choice(range(1_000_000), size=100_000, replace=False)
+        cols = rng.choice(range(10), size=2, replace=False)
+        self.df_sample = DataFrame(
+            rng.uniform(size=(100_000, 2)), index=idx, columns=cols
+        )
+
+    def time_to_update_big_frame_small_arg(self):
+        self.df.update(self.df_sample)
+
+    def time_to_update_random_indices(self):
+        self.df_random.update(self.df_sample)
+
+    def time_to_update_small_frame_big_arg(self):
+        self.df_sample.update(self.df)
+
+
 from .pandas_vb_common import setup  # noqa: F401 isort:skip
@@ -153,11 +153,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.DatetimeTZDtype SA01" \
         -i "pandas.DatetimeTZDtype.tz SA01" \
         -i "pandas.DatetimeTZDtype.unit SA01" \
-        -i "pandas.ExcelFile PR01,SA01" \
-        -i "pandas.ExcelFile.parse PR01,SA01" \
-        -i "pandas.ExcelWriter SA01" \
-        -i "pandas.Float32Dtype SA01" \
-        -i "pandas.Float64Dtype SA01" \
         -i "pandas.Grouper PR02,SA01" \
         -i "pandas.HDFStore.append PR01,SA01" \
         -i "pandas.HDFStore.get SA01" \
 
@@ -269,6 +269,8 @@ SciPy                     1.10.0             computation     Miscellaneous stati
 xarray                    2022.12.0          computation     pandas-like API for N-dimensional data
 ========================= ================== =============== =============================================================
 
+.. _install.excel_dependencies:
+
 Excel files
 ^^^^^^^^^^^
 
 
@@ -111,6 +111,12 @@ strings (``object``).
 
 My colleague requested the Titanic data as a spreadsheet.
 
+.. note::
+    If you want to use :func:`~pandas.to_excel` and :func:`~pandas.read_excel`,
+    you need to install an Excel reader as outlined in the
+    :ref:`Excel files <install.excel_dependencies>` section of the
+    installation documentation.
+
 .. ipython:: python
 
     titanic.to_excel("titanic.xlsx", sheet_name="passengers", index=False)
 
@@ -386,6 +386,27 @@ Replace NA with a scalar value
    df
    df.fillna(0)
 
+When the data has object dtype, you can control what type of NA values are present.
+
+.. ipython:: python
+
+   df = pd.DataFrame({"a": [pd.NA, np.nan, None]}, dtype=object)
+   df
+   df.fillna(None)
+   df.fillna(np.nan)
+   df.fillna(pd.NA)
+
+However when the dtype is not object, these will all be replaced with the proper NA value for the dtype.
+
+.. ipython:: python
+
+   data = {"np": [1.0, np.nan, np.nan, 2], "arrow": pd.array([1.0, pd.NA, pd.NA, 2], dtype="float64[pyarrow]")}
+   df = pd.DataFrame(data)
+   df
+   df.fillna(None)
+   df.fillna(np.nan)
+   df.fillna(pd.NA)
+
 Fill gaps forward or backward
 
 .. ipython:: python
 
@@ -28,6 +28,7 @@ enhancement2
 
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
+- :class:`pandas.api.typing.FrozenList` is available for typing the outputs of :attr:`MultiIndex.names`, :attr:`MultiIndex.codes` and :attr:`MultiIndex.levels` (:issue:`58237`)
 - :func:`DataFrame.to_excel` now raises an ``UserWarning`` when the character count in a cell exceeds Excel's limitation of 32767 characters (:issue:`56954`)
 - :func:`read_stata` now returns ``datetime64`` resolutions better matching those natively stored in the stata format (:issue:`55642`)
 - :meth:`Styler.set_tooltips` provides alternative method to storing tooltips by using title attribute of td elements. (:issue:`56981`)
@@ -37,6 +38,7 @@ Other enhancements
 - Users can globally disable any ``PerformanceWarning`` by setting the option ``mode.performance_warnings`` to ``False`` (:issue:`56920`)
 - :meth:`Styler.format_index_names` can now be used to format the index and column names (:issue:`48936` and :issue:`47489`)
 - :meth:`DataFrame.cummin`, :meth:`DataFrame.cummax`, :meth:`DataFrame.cumprod` and :meth:`DataFrame.cumsum` methods now have a ``numeric_only`` parameter (:issue:`53072`)
+- :meth:`DataFrame.fillna` and :meth:`Series.fillna` can now accept ``value=None``; for non-object dtype the corresponding NA value will be used (:issue:`57723`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_300.notable_bug_fixes:
@@ -208,6 +210,8 @@ Removal of prior version deprecations/changes
 - :meth:`SeriesGroupBy.agg` no longer pins the name of the group to the input passed to the provided ``func`` (:issue:`51703`)
 - All arguments except ``name`` in :meth:`Index.rename` are now keyword only (:issue:`56493`)
 - All arguments except the first ``path``-like argument in IO writers are now keyword only (:issue:`54229`)
+- Disallow allowing logical operations (``||``, ``&``, ``^``) between pandas objects and dtype-less sequences (e.g. ``list``, ``tuple``); wrap the objects in :class:`Series`, :class:`Index`, or ``np.array`` first instead (:issue:`52264`)
+- Disallow automatic casting to object in :class:`Series` logical operations (``&``, ``^``, ``||``) between series with mismatched indexes and dtypes other than ``object`` or ``bool`` (:issue:`52538`)
 - Disallow calling :meth:`Series.replace` or :meth:`DataFrame.replace` without a ``value`` and with non-dict-like ``to_replace`` (:issue:`33302`)
 - Disallow constructing a :class:`arrays.SparseArray` with scalar data (:issue:`53039`)
 - Disallow non-standard (``np.ndarray``, :class:`Index`, :class:`ExtensionArray`, or :class:`Series`) to :func:`isin`, :func:`unique`, :func:`factorize` (:issue:`52986`)
@@ -327,6 +331,7 @@ Performance improvements
 - Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`)
 - Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`)
 - Performance improvement in :func:`merge` if hash-join can be used (:issue:`57970`)
+- Performance improvement in :meth:`to_hdf` avoid unnecessary reopenings of the HDF5 file to speedup data addition to files with a very large number of groups . (:issue:`58248`)
 - Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
 - Performance improvement in indexing operations for string dtypes (:issue:`56997`)
 - Performance improvement in unary methods on a :class:`RangeIndex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57825`)
@@ -336,19 +341,6 @@ Performance improvements
 
 Bug fixes
 ~~~~~~~~~
-- Fixed bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`)
-- Fixed bug in :meth:`.DataFrameGroupBy.median` where nat values gave an incorrect result. (:issue:`57926`)
-- Fixed bug in :meth:`DataFrame.cumsum` which was raising ``IndexError`` if dtype is ``timedelta64[ns]`` (:issue:`57956`)
-- Fixed bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
-- Fixed bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
-- Fixed bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
-- Fixed bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`)
-- Fixed bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`)
-- Fixed bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
-- Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
-- Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
-- Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
-- Fixed bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
 
 Categorical
 ^^^^^^^^^^^
@@ -357,14 +349,15 @@ Categorical
 
 Datetimelike
 ^^^^^^^^^^^^
+- Bug in :class:`Timestamp` constructor failing to raise when ``tz=None`` is explicitly specified in conjunction with timezone-aware ``tzinfo`` or data (:issue:`48688`)
 - Bug in :func:`date_range` where the last valid timestamp would sometimes not be produced (:issue:`56134`)
 - Bug in :func:`date_range` where using a negative frequency value would not include all points between the start and end values (:issue:`56382`)
--
+- Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`)
 
 Timedelta
 ^^^^^^^^^
 - Accuracy improvement in :meth:`Timedelta.to_pytimedelta` to round microseconds consistently for large nanosecond based Timedelta (:issue:`57841`)
--
+- Bug in :meth:`DataFrame.cumsum` which was raising ``IndexError`` if dtype is ``timedelta64[ns]`` (:issue:`57956`)
 
 Timezones
 ^^^^^^^^^
@@ -378,6 +371,7 @@ Numeric
 
 Conversion
 ^^^^^^^^^^
+- Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`)
 - Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`)
 - Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`)
 
@@ -393,7 +387,7 @@ Interval
 
 Indexing
 ^^^^^^^^
--
+- Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`)
 -
 
 Missing
@@ -408,10 +402,10 @@ MultiIndex
 
 I/O
 ^^^
+- Bug in :class:`DataFrame` and :class:`Series` ``repr`` of :py:class:`collections.abc.Mapping`` elements. (:issue:`57915`)
 - Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`)
-- Now all ``Mapping`` s are pretty printed correctly. Before only literal ``dict`` s were. (:issue:`57915`)
--
--
+- Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
+- Bug in :meth:`read_csv` raising ``TypeError`` when ``index_col`` is specified and ``na_values`` is a dict containing the key ``None``. (:issue:`57547`)
 
 Period
 ^^^^^^
@@ -426,23 +420,25 @@ Plotting
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 - Bug in :meth:`.DataFrameGroupBy.groups` and :meth:`.SeriesGroupby.groups` that would not respect groupby argument ``dropna`` (:issue:`55919`)
+- Bug in :meth:`.DataFrameGroupBy.median` where nat values gave an incorrect result. (:issue:`57926`)
 - Bug in :meth:`.DataFrameGroupBy.quantile` when ``interpolation="nearest"`` is inconsistent with :meth:`DataFrame.quantile` (:issue:`47942`)
 - Bug in :meth:`DataFrame.ewm` and :meth:`Series.ewm` when passed ``times`` and aggregation functions other than mean (:issue:`51695`)
--
+- Bug in :meth:`DataFrameGroupBy.apply` that was returning a completely empty DataFrame when all return values of ``func`` were ``None`` instead of returning an empty DataFrame with the original columns and dtypes. (:issue:`57775`)
+
 
 Reshaping
 ^^^^^^^^^
--
+- Bug in :meth:`DataFrame.join` inconsistently setting result index name (:issue:`55815`)
 -
 
 Sparse
 ^^^^^^
--
+- Bug in :class:`SparseDtype` for equal comparison with na fill value. (:issue:`54770`)
 -
 
 ExtensionArray
 ^^^^^^^^^^^^^^
-- Fixed bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`)
+- Bug in :meth:`api.types.is_datetime64_any_dtype` where a custom :class:`ExtensionDtype` would return ``False`` for array-likes (:issue:`57055`)
 -
 
 Styler
@@ -452,11 +448,15 @@ Styler
 Other
 ^^^^^
 - Bug in :class:`DataFrame` when passing a ``dict`` with a NA scalar and ``columns`` that would always return ``np.nan`` (:issue:`57205`)
-- Bug in :func:`tseries.api.guess_datetime_format` would fail to infer time format when "%Y" == "%H%M" (:issue:`57452`)
 - Bug in :func:`unique` on :class:`Index` not always returning :class:`Index` (:issue:`57043`)
+- Bug in :meth:`DataFrame.eval` and :meth:`DataFrame.query` which caused an exception when using NumPy attributes via ``@`` notation, e.g., ``df.eval("@np.floor(a)")``. (:issue:`58041`)
 - Bug in :meth:`DataFrame.sort_index` when passing ``axis="columns"`` and ``ignore_index=True`` and ``ascending=False`` not returning a :class:`RangeIndex` columns (:issue:`57293`)
+- Bug in :meth:`DataFrame.transform` that was returning the wrong order unless the index was monotonically increasing. (:issue:`57069`)
 - Bug in :meth:`DataFrame.where` where using a non-bool type array in the function would return a ``ValueError`` instead of a ``TypeError`` (:issue:`56330`)
 - Bug in :meth:`Index.sort_values` when passing a key function that turns values into tuples, e.g. ``key=natsort.natsort_key``, would raise ``TypeError`` (:issue:`56081`)
+- Bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
+- Bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
+- Bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
 - Bug in Dataframe Interchange Protocol implementation was returning incorrect results for data buffers' associated dtype, for string and datetime columns (:issue:`54781`)
 
 .. ***DO NOT USE THIS SECTION***
 
@@ -219,8 +219,7 @@ cdef _get_calendar(weekmask, holidays, calendar):
         holidays = holidays + calendar.holidays().tolist()
     except AttributeError:
         pass
-    holidays = [_to_dt64D(dt) for dt in holidays]
-    holidays = tuple(sorted(holidays))
+    holidays = tuple(sorted(_to_dt64D(dt) for dt in holidays))
 
     kwargs = {"weekmask": weekmask}
     if holidays:
@@ -419,11 +418,10 @@ cdef class BaseOffset:
 
         if "holidays" in all_paras and not all_paras["holidays"]:
             all_paras.pop("holidays")
-        exclude = ["kwds", "name", "calendar"]
-        attrs = [(k, v) for k, v in all_paras.items()
-                 if (k not in exclude) and (k[0] != "_")]
-        attrs = sorted(set(attrs))
-        params = tuple([str(type(self))] + attrs)
+        exclude = {"kwds", "name", "calendar"}
+        attrs = {(k, v) for k, v in all_paras.items()
+                 if (k not in exclude) and (k[0] != "_")}
+        params = tuple([str(type(self))] + sorted(attrs))
         return params
 
     @property
 
@@ -1751,7 +1751,7 @@ class Timestamp(_Timestamp):
         tzinfo_type tzinfo=None,
         *,
         nanosecond=None,
-        tz=None,
+        tz=_no_input,
         unit=None,
         fold=None,
     ):
@@ -1783,6 +1783,10 @@ class Timestamp(_Timestamp):
         _date_attributes = [year, month, day, hour, minute, second,
                             microsecond, nanosecond]
 
+        explicit_tz_none = tz is None
+        if tz is _no_input:
+            tz = None
+
         if tzinfo is not None:
             # GH#17690 tzinfo must be a datetime.tzinfo object, ensured
             #  by the cython annotation.
@@ -1883,6 +1887,11 @@ class Timestamp(_Timestamp):
         if ts.value == NPY_NAT:
             return NaT
 
+        if ts.tzinfo is not None and explicit_tz_none:
+            raise ValueError(
+                "Passed data is timezone-aware, incompatible with 'tz=None'."
+            )
+
         return create_timestamp_from_ts(ts.value, ts.dts, ts.tzinfo, ts.fold, ts.creso)
 
     def _round(self, freq, mode, ambiguous="raise", nonexistent="raise"):