pandas-dev
diff --git a/‎.github/actions/build_pandas/action.yml
+7 b/‎.github/actions/build_pandas/action.yml
+7
diff --git a/‎ci/code_checks.sh
+1-10 b/‎ci/code_checks.sh
+1-10
diff --git a/‎doc/source/development/contributing_codebase.rst
+1-1 b/‎doc/source/development/contributing_codebase.rst
+1-1
diff --git a/‎doc/source/reference/extensions.rst
+1 b/‎doc/source/reference/extensions.rst
+1
diff --git a/‎doc/source/user_guide/io.rst
+1-1 b/‎doc/source/user_guide/io.rst
+1-1
diff --git a/‎doc/source/whatsnew/v0.10.0.rst
+1-1 b/‎doc/source/whatsnew/v0.10.0.rst
+1-1
diff --git a/‎doc/source/whatsnew/v0.24.0.rst
+2-2 b/‎doc/source/whatsnew/v0.24.0.rst
+2-2
diff --git a/‎doc/source/whatsnew/v2.0.3.rst
+1-3 b/‎doc/source/whatsnew/v2.0.3.rst
+1-3
diff --git a/‎doc/source/whatsnew/v2.1.0.rst
+7-3 b/‎doc/source/whatsnew/v2.1.0.rst
+7-3
diff --git a/‎pandas/_libs/parsers.pyi
+1 b/‎pandas/_libs/parsers.pyi
+1
diff --git a/‎pandas/_libs/parsers.pyx
+3-1 b/‎pandas/_libs/parsers.pyx
+3-1
diff --git a/‎pandas/_typing.py
+20 b/‎pandas/_typing.py
+20
diff --git a/‎pandas/core/arrays/base.py
+26 b/‎pandas/core/arrays/base.py
+26
diff --git a/‎pandas/core/arrays/datetimelike.py
+6-5 b/‎pandas/core/arrays/datetimelike.py
+6-5
diff --git a/‎pandas/core/arrays/numpy_.py
+6-5 b/‎pandas/core/arrays/numpy_.py
+6-5
@@ -14,6 +14,13 @@ runs:
         micromamba list
       shell: bash -el {0}
 
+    - name: Uninstall existing Pandas installation
+      run: |
+        if pip list | grep -q ^pandas; then
+          pip uninstall -y pandas || true
+        fi
+      shell: bash -el {0}
+
     - name: Build Pandas
       run: |
         if [[ ${{ inputs.editable }} == "true" ]]; then
 
@@ -105,12 +105,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         pandas.errors.UnsupportedFunctionCall \
         pandas.test \
         pandas.NaT \
-        pandas.read_clipboard \
-        pandas.ExcelFile \
-        pandas.ExcelFile.parse \
         pandas.io.formats.style.Styler.to_html \
-        pandas.HDFStore.groups \
-        pandas.HDFStore.walk \
         pandas.read_feather \
         pandas.DataFrame.to_feather \
         pandas.read_parquet \
@@ -123,11 +118,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         pandas.io.stata.StataReader.value_labels \
         pandas.io.stata.StataReader.variable_labels \
         pandas.io.stata.StataWriter.write_file \
-        pandas.core.resample.Resampler.__iter__ \
-        pandas.core.resample.Resampler.groups \
-        pandas.core.resample.Resampler.indices \
-        pandas.core.resample.Resampler.get_group \
-        pandas.core.resample.Resampler.ffill \
         pandas.core.resample.Resampler.asfreq \
         pandas.core.resample.Resampler.count \
         pandas.core.resample.Resampler.nunique \
@@ -241,6 +231,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         pandas.api.extensions.ExtensionArray.factorize \
         pandas.api.extensions.ExtensionArray.fillna \
         pandas.api.extensions.ExtensionArray.insert \
+        pandas.api.extensions.ExtensionArray.interpolate \
         pandas.api.extensions.ExtensionArray.isin \
         pandas.api.extensions.ExtensionArray.isna \
         pandas.api.extensions.ExtensionArray.ravel \
 
@@ -861,7 +861,7 @@ performance regressions. pandas is in the process of migrating to
 `asv benchmarks <https://github.com/airspeed-velocity/asv>`__
 to enable easy monitoring of the performance of critical pandas operations.
 These benchmarks are all found in the ``pandas/asv_bench`` directory, and the
-test results can be found `here <https://pandas.pydata.org/speed/pandas/>`__.
+test results can be found `here <https://asv-runner.github.io/asv-collection/pandas>`__.
 
 To use all features of asv, you will need either ``conda`` or
 ``virtualenv``. For more details please check the `asv installation
 
@@ -52,6 +52,7 @@ objects.
       api.extensions.ExtensionArray.factorize
       api.extensions.ExtensionArray.fillna
       api.extensions.ExtensionArray.insert
+      api.extensions.ExtensionArray.interpolate
       api.extensions.ExtensionArray.isin
       api.extensions.ExtensionArray.isna
       api.extensions.ExtensionArray.ravel
 
@@ -2664,7 +2664,7 @@ Links can be extracted from cells along with the text using ``extract_links="all
     """
 
     df = pd.read_html(
-        html_table,
+        StringIO(html_table),
         extract_links="all"
     )[0]
     df
 
@@ -181,7 +181,7 @@ labeled the aggregated group with the end of the interval: the next day).
   ``X0``, ``X1``, ...) can be reproduced by specifying ``prefix='X'``:
 
 .. ipython:: python
-   :okwarning:
+   :okexcept:
 
     import io
 
 
@@ -286,7 +286,7 @@ value. (:issue:`17054`)
 
 .. ipython:: python
 
-    result = pd.read_html("""
+    result = pd.read_html(StringIO("""
       <table>
         <thead>
           <tr>
@@ -298,7 +298,7 @@ value. (:issue:`17054`)
             <td colspan="2">1</td><td>2</td>
           </tr>
         </tbody>
-      </table>""")
+      </table>"""))
 
 *Previous behavior*:
 
 
@@ -1,6 +1,6 @@
 .. _whatsnew_203:
 
-What's new in 2.0.3 (July XX, 2023)
+What's new in 2.0.3 (June 28, 2023)
 -----------------------------------
 
 These are the changes in pandas 2.0.3. See :ref:`release` for a full changelog
@@ -17,7 +17,6 @@ Fixed regressions
 - Fixed performance regression in merging on datetime-like columns (:issue:`53231`)
 - Fixed regression when :meth:`DataFrame.to_string` creates extra space for string dtypes (:issue:`52690`)
 - For external ExtensionArray implementations, restored the default use of ``_values_for_factorize`` for hashing arrays (:issue:`53475`)
--
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_203.bug_fixes:
@@ -38,7 +37,6 @@ Bug fixes
 
 Other
 ~~~~~
--
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_203.contributors:
 
@@ -142,20 +142,21 @@ Other enhancements
 - Let :meth:`DataFrame.to_feather` accept a non-default :class:`Index` and non-string column names (:issue:`51787`)
 - Performance improvement in :func:`read_csv` (:issue:`52632`) with ``engine="c"``
 - :meth:`Categorical.from_codes` has gotten a ``validate`` parameter (:issue:`50975`)
-- :meth:`DataFrame.stack` gained the ``sort`` keyword to dictate whether the resulting :class:`MultiIndex` levels are sorted (:issue:`15105`)
 - :meth:`DataFrame.unstack` gained the ``sort`` keyword to dictate whether the resulting :class:`MultiIndex` levels are sorted (:issue:`15105`)
 - :meth:`DataFrameGroupby.agg` and :meth:`DataFrameGroupby.transform` now support grouping by multiple keys when the index is not a :class:`MultiIndex` for ``engine="numba"`` (:issue:`53486`)
 - :meth:`Series.explode` now supports pyarrow-backed list types (:issue:`53602`)
 - :meth:`Series.str.join` now supports ``ArrowDtype(pa.string())`` (:issue:`53646`)
 - :meth:`SeriesGroupby.agg` and :meth:`DataFrameGroupby.agg` now support passing in multiple functions for ``engine="numba"`` (:issue:`53486`)
 - :meth:`SeriesGroupby.transform` and :meth:`DataFrameGroupby.transform` now support passing in a string as the function for ``engine="numba"`` (:issue:`53579`)
+- Added :meth:`ExtensionArray.interpolate` used by :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` (:issue:`53659`)
 - Added ``engine_kwargs`` parameter to :meth:`DataFrame.to_excel` (:issue:`53220`)
 - Added a new parameter ``by_row`` to :meth:`Series.apply`. When set to ``False`` the supplied callables will always operate on the whole Series (:issue:`53400`).
 - Groupby aggregations (such as :meth:`DataFrameGroupby.sum`) now can preserve the dtype of the input instead of casting to ``float64`` (:issue:`44952`)
 - Improved error message when :meth:`DataFrameGroupBy.agg` failed (:issue:`52930`)
 - Many read/to_* functions, such as :meth:`DataFrame.to_pickle` and :func:`read_csv`, support forwarding compression arguments to lzma.LZMAFile (:issue:`52979`)
 - Performance improvement in :func:`concat` with homogeneous ``np.float64`` or ``np.float32`` dtypes (:issue:`52685`)
 - Performance improvement in :meth:`DataFrame.filter` when ``items`` is given (:issue:`52941`)
+-
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_210.notable_bug_fixes:
@@ -293,6 +294,7 @@ Deprecations
 - Deprecated making the functions in a list of functions given to :meth:`DataFrame.agg` attempt to operate on each element in the :class:`DataFrame` and only operate on the columns of the :class:`DataFrame` if the elementwise operations failed. To keep the current behavior, use :meth:`DataFrame.transform` instead. (:issue:`53325`)
 - Deprecated passing a :class:`DataFrame` to :meth:`DataFrame.from_records`, use :meth:`DataFrame.set_index` or :meth:`DataFrame.drop` instead (:issue:`51353`)
 - Deprecated silently dropping unrecognized timezones when parsing strings to datetimes (:issue:`18702`)
+- Deprecated the "downcast" keyword in :meth:`Series.interpolate`, :meth:`DataFrame.interpolate`, :meth:`Series.fillna`, :meth:`DataFrame.fillna`, :meth:`Series.ffill`, :meth:`DataFrame.ffill`, :meth:`Series.bfill`, :meth:`DataFrame.bfill` (:issue:`40988`)
 - Deprecated the ``axis`` keyword in :meth:`DataFrame.ewm`, :meth:`Series.ewm`, :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.expanding`, :meth:`Series.expanding` (:issue:`51778`)
 - Deprecated the ``axis`` keyword in :meth:`DataFrame.resample`, :meth:`Series.resample` (:issue:`51778`)
 - Deprecated the behavior of :func:`concat` with both ``len(keys) != len(objs)``, in a future version this will raise instead of truncating to the shorter of the two sequences (:issue:`43485`)
@@ -330,13 +332,13 @@ Deprecations
 - Deprecated :meth:`Series.first` and :meth:`DataFrame.first` (please create a mask and filter using ``.loc`` instead) (:issue:`45908`)
 - Deprecated :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` for object-dtype (:issue:`53631`)
 - Deprecated :meth:`Series.last` and :meth:`DataFrame.last` (please create a mask and filter using ``.loc`` instead) (:issue:`53692`)
-- Deprecated allowing ``downcast`` keyword other than ``None``, ``False``, "infer", or a dict with these as values in :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`40988`)
 - Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`)
 - Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`)
 - Deprecated bytes input to :func:`read_excel`. To read a file path, use a string or path-like object. (:issue:`53767`)
 - Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`)
 - Deprecated falling back to filling when ``value`` is not specified in :meth:`DataFrame.replace` and :meth:`Series.replace` with non-dict-like ``to_replace`` (:issue:`33302`)
 - Deprecated literal json input to :func:`read_json`. Wrap literal json string input in ``io.StringIO`` instead. (:issue:`53409`)
+- Deprecated literal string/bytes input to :func:`read_html`. Wrap literal string/bytes input in ``io.StringIO`` / ``io.BytesIO`` instead. (:issue:`53767`)
 - Deprecated option "mode.use_inf_as_na", convert inf entries to ``NaN`` before instead (:issue:`51684`)
 - Deprecated parameter ``obj`` in :meth:`GroupBy.get_group` (:issue:`53545`)
 - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`)
@@ -541,7 +543,8 @@ Reshaping
 - Bug in :meth:`DataFrame.idxmin` and :meth:`DataFrame.idxmax`, where the axis dtype would be lost for empty frames (:issue:`53265`)
 - Bug in :meth:`DataFrame.merge` not merging correctly when having ``MultiIndex`` with single level (:issue:`52331`)
 - Bug in :meth:`DataFrame.stack` losing extension dtypes when columns is a :class:`MultiIndex` and frame contains mixed dtypes (:issue:`45740`)
-- Bug in :meth:`DataFrame.stack` sorting columns lexicographically (:issue:`53786`)
+- Bug in :meth:`DataFrame.stack` sorting columns lexicographically in rare cases (:issue:`53786`)
+- Bug in :meth:`DataFrame.stack` sorting index lexicographically in rare cases (:issue:`53824`)
 - Bug in :meth:`DataFrame.transpose` inferring dtype for object column (:issue:`51546`)
 - Bug in :meth:`Series.combine_first` converting ``int64`` dtype to ``float64`` and losing precision on very large integers (:issue:`51764`)
 -
@@ -554,6 +557,7 @@ Sparse
 
 ExtensionArray
 ^^^^^^^^^^^^^^
+- Bug in :class:`DataFrame` constructor not copying :class:`Series` with extension dtype when given in dict (:issue:`53744`)
 - Bug in :class:`~arrays.ArrowExtensionArray` converting pandas non-nanosecond temporal objects from non-zero values to zero values (:issue:`53171`)
 - Bug in :meth:`Series.quantile` for pyarrow temporal types raising ArrowInvalid (:issue:`52678`)
 - Bug in :meth:`Series.rank` returning wrong order for small values with ``Float64`` dtype (:issue:`52471`)
 
@@ -12,6 +12,7 @@ from pandas._typing import (
 )
 
 STR_NA_VALUES: set[str]
+DEFAULT_BUFFER_HEURISTIC: int
 
 def sanitize_objects(
     values: npt.NDArray[np.object_],
 
@@ -118,6 +118,8 @@ cdef:
     float64_t NEGINF = -INF
     int64_t DEFAULT_CHUNKSIZE = 256 * 1024
 
+DEFAULT_BUFFER_HEURISTIC = 2 ** 20
+
 
 cdef extern from "pandas/portable.h":
     # I *think* this is here so that strcasecmp is defined on Windows
@@ -584,7 +586,7 @@ cdef class TextReader:
             raise EmptyDataError("No columns to parse from file")
 
         # Compute buffer_lines as function of table width.
-        heuristic = 2**20 // self.table_width
+        heuristic = DEFAULT_BUFFER_HEURISTIC // self.table_width
         self.buffer_lines = 1
         while self.buffer_lines * 2 < heuristic:
             self.buffer_lines *= 2
 
@@ -307,6 +307,26 @@ def closed(self) -> bool:
 
 # Arguments for fillna()
 FillnaOptions = Literal["backfill", "bfill", "ffill", "pad"]
+InterpolateOptions = Literal[
+    "linear",
+    "time",
+    "index",
+    "values",
+    "nearest",
+    "zero",
+    "slinear",
+    "quadratic",
+    "cubic",
+    "barycentric",
+    "polynomial",
+    "krogh",
+    "piecewise_polynomial",
+    "spline",
+    "pchip",
+    "akima",
+    "cubicspline",
+    "from_derivatives",
+]
 
 # internals
 Manager = Union[
 
@@ -78,6 +78,7 @@
         AxisInt,
         Dtype,
         FillnaOptions,
+        InterpolateOptions,
         NumpySorter,
         NumpyValueArrayLike,
         PositionalIndexer,
@@ -90,6 +91,8 @@
         npt,
     )
 
+    from pandas import Index
+
 _extension_array_shared_docs: dict[str, str] = {}
 
 
@@ -118,6 +121,7 @@ class ExtensionArray:
     fillna
     equals
     insert
+    interpolate
     isin
     isna
     ravel
@@ -156,6 +160,7 @@ class ExtensionArray:
     * take
     * copy
     * _concat_same_type
+    * interpolate
 
     A default repr displaying the type, (truncated) data, length,
     and dtype is provided. It can be customized or replaced by
@@ -755,6 +760,27 @@ def argmax(self, skipna: bool = True) -> int:
             raise NotImplementedError
         return nargminmax(self, "argmax")
 
+    def interpolate(
+        self,
+        *,
+        method: InterpolateOptions,
+        axis: int,
+        index: Index,
+        limit,
+        limit_direction,
+        limit_area,
+        fill_value,
+        copy: bool,
+        **kwargs,
+    ) -> Self:
+        """
+        See DataFrame.interpolate.__doc__.
+        """
+        # NB: we return type(self) even if copy=False
+        raise NotImplementedError(
+            f"{type(self).__name__} does not implement interpolate"
+        )
+
     def fillna(
         self,
         value: object | ArrayLike | None = None,
 
@@ -58,6 +58,7 @@
     Dtype,
     DtypeObj,
     F,
+    InterpolateOptions,
     NpDtype,
     PositionalIndexer2D,
     PositionalIndexerTuple,
@@ -2233,23 +2234,23 @@ def copy(self, order: str = "C") -> Self:
     def interpolate(
         self,
         *,
-        method,
+        method: InterpolateOptions,
         axis: int,
         index: Index,
         limit,
         limit_direction,
         limit_area,
-        inplace: bool,
+        copy: bool,
         **kwargs,
     ) -> Self:
         """
         See NDFrame.interpolate.__doc__.
         """
-        # NB: we return type(self) even if inplace=True
+        # NB: we return type(self) even if copy=False
         if method != "linear":
             raise NotImplementedError
 
-        if inplace:
+        if not copy:
             out_data = self._ndarray
         else:
             out_data = self._ndarray.copy()
@@ -2264,7 +2265,7 @@ def interpolate(
             limit_area=limit_area,
             **kwargs,
         )
-        if inplace:
+        if not copy:
             return self
         return type(self)._simple_new(out_data, dtype=self.dtype)
 
 
@@ -36,6 +36,7 @@
         AxisInt,
         Dtype,
         FillnaOptions,
+        InterpolateOptions,
         NpDtype,
         Scalar,
         Self,
@@ -261,20 +262,20 @@ def pad_or_backfill(
     def interpolate(
         self,
         *,
-        method,
+        method: InterpolateOptions,
         axis: int,
         index: Index,
         limit,
         limit_direction,
         limit_area,
-        inplace: bool,
+        copy: bool,
         **kwargs,
     ) -> Self:
         """
         See NDFrame.interpolate.__doc__.
         """
-        # NB: we return type(self) even if inplace=True
-        if inplace:
+        # NB: we return type(self) even if copy=False
+        if not copy:
             out_data = self._ndarray
         else:
             out_data = self._ndarray.copy()
@@ -290,7 +291,7 @@ def interpolate(
             limit_area=limit_area,
             **kwargs,
         )
-        if inplace:
+        if not copy:
             return self
         return type(self)._simple_new(out_data, dtype=self.dtype)
Original file line number	Diff line number	Diff line change
`@@ -12,6 +12,7 @@ from pandas._typing import (`
`12`	`12`	`)`
`13`	`13`
`14`	`14`	`STR_NA_VALUES: set[str]`
	`15`	`+DEFAULT_BUFFER_HEURISTIC: int`
`15`	`16`
`16`	`17`	`def sanitize_objects(`
`17`	`18`	`values: npt.NDArray[np.object_],`