Skip to content

Commit dd0bfe8

Browse files
committed
Merge branch 'master' into reduction_dtypes_II
2 parents 467073a + c96827b commit dd0bfe8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+1586
-918
lines changed

.github/actions/build_pandas/action.yml

+7
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,13 @@ runs:
1414
micromamba list
1515
shell: bash -el {0}
1616

17+
- name: Uninstall existing Pandas installation
18+
run: |
19+
if pip list | grep -q ^pandas; then
20+
pip uninstall -y pandas || true
21+
fi
22+
shell: bash -el {0}
23+
1724
- name: Build Pandas
1825
run: |
1926
if [[ ${{ inputs.editable }} == "true" ]]; then

ci/code_checks.sh

+1-10
Original file line numberDiff line numberDiff line change
@@ -105,12 +105,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
105105
pandas.errors.UnsupportedFunctionCall \
106106
pandas.test \
107107
pandas.NaT \
108-
pandas.read_clipboard \
109-
pandas.ExcelFile \
110-
pandas.ExcelFile.parse \
111108
pandas.io.formats.style.Styler.to_html \
112-
pandas.HDFStore.groups \
113-
pandas.HDFStore.walk \
114109
pandas.read_feather \
115110
pandas.DataFrame.to_feather \
116111
pandas.read_parquet \
@@ -123,11 +118,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
123118
pandas.io.stata.StataReader.value_labels \
124119
pandas.io.stata.StataReader.variable_labels \
125120
pandas.io.stata.StataWriter.write_file \
126-
pandas.core.resample.Resampler.__iter__ \
127-
pandas.core.resample.Resampler.groups \
128-
pandas.core.resample.Resampler.indices \
129-
pandas.core.resample.Resampler.get_group \
130-
pandas.core.resample.Resampler.ffill \
131121
pandas.core.resample.Resampler.asfreq \
132122
pandas.core.resample.Resampler.count \
133123
pandas.core.resample.Resampler.nunique \
@@ -241,6 +231,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
241231
pandas.api.extensions.ExtensionArray.factorize \
242232
pandas.api.extensions.ExtensionArray.fillna \
243233
pandas.api.extensions.ExtensionArray.insert \
234+
pandas.api.extensions.ExtensionArray.interpolate \
244235
pandas.api.extensions.ExtensionArray.isin \
245236
pandas.api.extensions.ExtensionArray.isna \
246237
pandas.api.extensions.ExtensionArray.ravel \

doc/source/development/contributing_codebase.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -861,7 +861,7 @@ performance regressions. pandas is in the process of migrating to
861861
`asv benchmarks <https://github.com/airspeed-velocity/asv>`__
862862
to enable easy monitoring of the performance of critical pandas operations.
863863
These benchmarks are all found in the ``pandas/asv_bench`` directory, and the
864-
test results can be found `here <https://pandas.pydata.org/speed/pandas/>`__.
864+
test results can be found `here <https://asv-runner.github.io/asv-collection/pandas>`__.
865865

866866
To use all features of asv, you will need either ``conda`` or
867867
``virtualenv``. For more details please check the `asv installation

doc/source/reference/extensions.rst

+1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ objects.
5252
api.extensions.ExtensionArray.factorize
5353
api.extensions.ExtensionArray.fillna
5454
api.extensions.ExtensionArray.insert
55+
api.extensions.ExtensionArray.interpolate
5556
api.extensions.ExtensionArray.isin
5657
api.extensions.ExtensionArray.isna
5758
api.extensions.ExtensionArray.ravel

doc/source/user_guide/io.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -2664,7 +2664,7 @@ Links can be extracted from cells along with the text using ``extract_links="all
26642664
"""
26652665
26662666
df = pd.read_html(
2667-
html_table,
2667+
StringIO(html_table),
26682668
extract_links="all"
26692669
)[0]
26702670
df

doc/source/whatsnew/v0.10.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ labeled the aggregated group with the end of the interval: the next day).
181181
``X0``, ``X1``, ...) can be reproduced by specifying ``prefix='X'``:
182182

183183
.. ipython:: python
184-
:okwarning:
184+
:okexcept:
185185
186186
import io
187187

doc/source/whatsnew/v0.24.0.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -286,7 +286,7 @@ value. (:issue:`17054`)
286286

287287
.. ipython:: python
288288
289-
result = pd.read_html("""
289+
result = pd.read_html(StringIO("""
290290
<table>
291291
<thead>
292292
<tr>
@@ -298,7 +298,7 @@ value. (:issue:`17054`)
298298
<td colspan="2">1</td><td>2</td>
299299
</tr>
300300
</tbody>
301-
</table>""")
301+
</table>"""))
302302
303303
*Previous behavior*:
304304

doc/source/whatsnew/v2.0.3.rst

+1-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
.. _whatsnew_203:
22

3-
What's new in 2.0.3 (July XX, 2023)
3+
What's new in 2.0.3 (June 28, 2023)
44
-----------------------------------
55

66
These are the changes in pandas 2.0.3. See :ref:`release` for a full changelog
@@ -17,7 +17,6 @@ Fixed regressions
1717
- Fixed performance regression in merging on datetime-like columns (:issue:`53231`)
1818
- Fixed regression when :meth:`DataFrame.to_string` creates extra space for string dtypes (:issue:`52690`)
1919
- For external ExtensionArray implementations, restored the default use of ``_values_for_factorize`` for hashing arrays (:issue:`53475`)
20-
-
2120

2221
.. ---------------------------------------------------------------------------
2322
.. _whatsnew_203.bug_fixes:
@@ -38,7 +37,6 @@ Bug fixes
3837

3938
Other
4039
~~~~~
41-
-
4240

4341
.. ---------------------------------------------------------------------------
4442
.. _whatsnew_203.contributors:

doc/source/whatsnew/v2.1.0.rst

+7-3
Original file line numberDiff line numberDiff line change
@@ -142,20 +142,21 @@ Other enhancements
142142
- Let :meth:`DataFrame.to_feather` accept a non-default :class:`Index` and non-string column names (:issue:`51787`)
143143
- Performance improvement in :func:`read_csv` (:issue:`52632`) with ``engine="c"``
144144
- :meth:`Categorical.from_codes` has gotten a ``validate`` parameter (:issue:`50975`)
145-
- :meth:`DataFrame.stack` gained the ``sort`` keyword to dictate whether the resulting :class:`MultiIndex` levels are sorted (:issue:`15105`)
146145
- :meth:`DataFrame.unstack` gained the ``sort`` keyword to dictate whether the resulting :class:`MultiIndex` levels are sorted (:issue:`15105`)
147146
- :meth:`DataFrameGroupby.agg` and :meth:`DataFrameGroupby.transform` now support grouping by multiple keys when the index is not a :class:`MultiIndex` for ``engine="numba"`` (:issue:`53486`)
148147
- :meth:`Series.explode` now supports pyarrow-backed list types (:issue:`53602`)
149148
- :meth:`Series.str.join` now supports ``ArrowDtype(pa.string())`` (:issue:`53646`)
150149
- :meth:`SeriesGroupby.agg` and :meth:`DataFrameGroupby.agg` now support passing in multiple functions for ``engine="numba"`` (:issue:`53486`)
151150
- :meth:`SeriesGroupby.transform` and :meth:`DataFrameGroupby.transform` now support passing in a string as the function for ``engine="numba"`` (:issue:`53579`)
151+
- Added :meth:`ExtensionArray.interpolate` used by :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` (:issue:`53659`)
152152
- Added ``engine_kwargs`` parameter to :meth:`DataFrame.to_excel` (:issue:`53220`)
153153
- Added a new parameter ``by_row`` to :meth:`Series.apply`. When set to ``False`` the supplied callables will always operate on the whole Series (:issue:`53400`).
154154
- Groupby aggregations (such as :meth:`DataFrameGroupby.sum`) now can preserve the dtype of the input instead of casting to ``float64`` (:issue:`44952`)
155155
- Improved error message when :meth:`DataFrameGroupBy.agg` failed (:issue:`52930`)
156156
- Many read/to_* functions, such as :meth:`DataFrame.to_pickle` and :func:`read_csv`, support forwarding compression arguments to lzma.LZMAFile (:issue:`52979`)
157157
- Performance improvement in :func:`concat` with homogeneous ``np.float64`` or ``np.float32`` dtypes (:issue:`52685`)
158158
- Performance improvement in :meth:`DataFrame.filter` when ``items`` is given (:issue:`52941`)
159+
-
159160

160161
.. ---------------------------------------------------------------------------
161162
.. _whatsnew_210.notable_bug_fixes:
@@ -293,6 +294,7 @@ Deprecations
293294
- Deprecated making the functions in a list of functions given to :meth:`DataFrame.agg` attempt to operate on each element in the :class:`DataFrame` and only operate on the columns of the :class:`DataFrame` if the elementwise operations failed. To keep the current behavior, use :meth:`DataFrame.transform` instead. (:issue:`53325`)
294295
- Deprecated passing a :class:`DataFrame` to :meth:`DataFrame.from_records`, use :meth:`DataFrame.set_index` or :meth:`DataFrame.drop` instead (:issue:`51353`)
295296
- Deprecated silently dropping unrecognized timezones when parsing strings to datetimes (:issue:`18702`)
297+
- Deprecated the "downcast" keyword in :meth:`Series.interpolate`, :meth:`DataFrame.interpolate`, :meth:`Series.fillna`, :meth:`DataFrame.fillna`, :meth:`Series.ffill`, :meth:`DataFrame.ffill`, :meth:`Series.bfill`, :meth:`DataFrame.bfill` (:issue:`40988`)
296298
- Deprecated the ``axis`` keyword in :meth:`DataFrame.ewm`, :meth:`Series.ewm`, :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.expanding`, :meth:`Series.expanding` (:issue:`51778`)
297299
- Deprecated the ``axis`` keyword in :meth:`DataFrame.resample`, :meth:`Series.resample` (:issue:`51778`)
298300
- Deprecated the behavior of :func:`concat` with both ``len(keys) != len(objs)``, in a future version this will raise instead of truncating to the shorter of the two sequences (:issue:`43485`)
@@ -330,13 +332,13 @@ Deprecations
330332
- Deprecated :meth:`Series.first` and :meth:`DataFrame.first` (please create a mask and filter using ``.loc`` instead) (:issue:`45908`)
331333
- Deprecated :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` for object-dtype (:issue:`53631`)
332334
- Deprecated :meth:`Series.last` and :meth:`DataFrame.last` (please create a mask and filter using ``.loc`` instead) (:issue:`53692`)
333-
- Deprecated allowing ``downcast`` keyword other than ``None``, ``False``, "infer", or a dict with these as values in :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`40988`)
334335
- Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`)
335336
- Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`)
336337
- Deprecated bytes input to :func:`read_excel`. To read a file path, use a string or path-like object. (:issue:`53767`)
337338
- Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`)
338339
- Deprecated falling back to filling when ``value`` is not specified in :meth:`DataFrame.replace` and :meth:`Series.replace` with non-dict-like ``to_replace`` (:issue:`33302`)
339340
- Deprecated literal json input to :func:`read_json`. Wrap literal json string input in ``io.StringIO`` instead. (:issue:`53409`)
341+
- Deprecated literal string/bytes input to :func:`read_html`. Wrap literal string/bytes input in ``io.StringIO`` / ``io.BytesIO`` instead. (:issue:`53767`)
340342
- Deprecated option "mode.use_inf_as_na", convert inf entries to ``NaN`` before instead (:issue:`51684`)
341343
- Deprecated parameter ``obj`` in :meth:`GroupBy.get_group` (:issue:`53545`)
342344
- Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`)
@@ -541,7 +543,8 @@ Reshaping
541543
- Bug in :meth:`DataFrame.idxmin` and :meth:`DataFrame.idxmax`, where the axis dtype would be lost for empty frames (:issue:`53265`)
542544
- Bug in :meth:`DataFrame.merge` not merging correctly when having ``MultiIndex`` with single level (:issue:`52331`)
543545
- Bug in :meth:`DataFrame.stack` losing extension dtypes when columns is a :class:`MultiIndex` and frame contains mixed dtypes (:issue:`45740`)
544-
- Bug in :meth:`DataFrame.stack` sorting columns lexicographically (:issue:`53786`)
546+
- Bug in :meth:`DataFrame.stack` sorting columns lexicographically in rare cases (:issue:`53786`)
547+
- Bug in :meth:`DataFrame.stack` sorting index lexicographically in rare cases (:issue:`53824`)
545548
- Bug in :meth:`DataFrame.transpose` inferring dtype for object column (:issue:`51546`)
546549
- Bug in :meth:`Series.combine_first` converting ``int64`` dtype to ``float64`` and losing precision on very large integers (:issue:`51764`)
547550
-
@@ -554,6 +557,7 @@ Sparse
554557

555558
ExtensionArray
556559
^^^^^^^^^^^^^^
560+
- Bug in :class:`DataFrame` constructor not copying :class:`Series` with extension dtype when given in dict (:issue:`53744`)
557561
- Bug in :class:`~arrays.ArrowExtensionArray` converting pandas non-nanosecond temporal objects from non-zero values to zero values (:issue:`53171`)
558562
- Bug in :meth:`Series.quantile` for pyarrow temporal types raising ArrowInvalid (:issue:`52678`)
559563
- Bug in :meth:`Series.rank` returning wrong order for small values with ``Float64`` dtype (:issue:`52471`)

pandas/_libs/parsers.pyi

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ from pandas._typing import (
1212
)
1313

1414
STR_NA_VALUES: set[str]
15+
DEFAULT_BUFFER_HEURISTIC: int
1516

1617
def sanitize_objects(
1718
values: npt.NDArray[np.object_],

pandas/_libs/parsers.pyx

+3-1
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ cdef:
118118
float64_t NEGINF = -INF
119119
int64_t DEFAULT_CHUNKSIZE = 256 * 1024
120120

121+
DEFAULT_BUFFER_HEURISTIC = 2 ** 20
122+
121123

122124
cdef extern from "pandas/portable.h":
123125
# I *think* this is here so that strcasecmp is defined on Windows
@@ -584,7 +586,7 @@ cdef class TextReader:
584586
raise EmptyDataError("No columns to parse from file")
585587

586588
# Compute buffer_lines as function of table width.
587-
heuristic = 2**20 // self.table_width
589+
heuristic = DEFAULT_BUFFER_HEURISTIC // self.table_width
588590
self.buffer_lines = 1
589591
while self.buffer_lines * 2 < heuristic:
590592
self.buffer_lines *= 2

pandas/_typing.py

+20
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,26 @@ def closed(self) -> bool:
307307

308308
# Arguments for fillna()
309309
FillnaOptions = Literal["backfill", "bfill", "ffill", "pad"]
310+
InterpolateOptions = Literal[
311+
"linear",
312+
"time",
313+
"index",
314+
"values",
315+
"nearest",
316+
"zero",
317+
"slinear",
318+
"quadratic",
319+
"cubic",
320+
"barycentric",
321+
"polynomial",
322+
"krogh",
323+
"piecewise_polynomial",
324+
"spline",
325+
"pchip",
326+
"akima",
327+
"cubicspline",
328+
"from_derivatives",
329+
]
310330

311331
# internals
312332
Manager = Union[

pandas/core/arrays/base.py

+26
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
AxisInt,
7979
Dtype,
8080
FillnaOptions,
81+
InterpolateOptions,
8182
NumpySorter,
8283
NumpyValueArrayLike,
8384
PositionalIndexer,
@@ -90,6 +91,8 @@
9091
npt,
9192
)
9293

94+
from pandas import Index
95+
9396
_extension_array_shared_docs: dict[str, str] = {}
9497

9598

@@ -118,6 +121,7 @@ class ExtensionArray:
118121
fillna
119122
equals
120123
insert
124+
interpolate
121125
isin
122126
isna
123127
ravel
@@ -156,6 +160,7 @@ class ExtensionArray:
156160
* take
157161
* copy
158162
* _concat_same_type
163+
* interpolate
159164
160165
A default repr displaying the type, (truncated) data, length,
161166
and dtype is provided. It can be customized or replaced by
@@ -755,6 +760,27 @@ def argmax(self, skipna: bool = True) -> int:
755760
raise NotImplementedError
756761
return nargminmax(self, "argmax")
757762

763+
def interpolate(
764+
self,
765+
*,
766+
method: InterpolateOptions,
767+
axis: int,
768+
index: Index,
769+
limit,
770+
limit_direction,
771+
limit_area,
772+
fill_value,
773+
copy: bool,
774+
**kwargs,
775+
) -> Self:
776+
"""
777+
See DataFrame.interpolate.__doc__.
778+
"""
779+
# NB: we return type(self) even if copy=False
780+
raise NotImplementedError(
781+
f"{type(self).__name__} does not implement interpolate"
782+
)
783+
758784
def fillna(
759785
self,
760786
value: object | ArrayLike | None = None,

pandas/core/arrays/datetimelike.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
Dtype,
5959
DtypeObj,
6060
F,
61+
InterpolateOptions,
6162
NpDtype,
6263
PositionalIndexer2D,
6364
PositionalIndexerTuple,
@@ -2233,23 +2234,23 @@ def copy(self, order: str = "C") -> Self:
22332234
def interpolate(
22342235
self,
22352236
*,
2236-
method,
2237+
method: InterpolateOptions,
22372238
axis: int,
22382239
index: Index,
22392240
limit,
22402241
limit_direction,
22412242
limit_area,
2242-
inplace: bool,
2243+
copy: bool,
22432244
**kwargs,
22442245
) -> Self:
22452246
"""
22462247
See NDFrame.interpolate.__doc__.
22472248
"""
2248-
# NB: we return type(self) even if inplace=True
2249+
# NB: we return type(self) even if copy=False
22492250
if method != "linear":
22502251
raise NotImplementedError
22512252

2252-
if inplace:
2253+
if not copy:
22532254
out_data = self._ndarray
22542255
else:
22552256
out_data = self._ndarray.copy()
@@ -2264,7 +2265,7 @@ def interpolate(
22642265
limit_area=limit_area,
22652266
**kwargs,
22662267
)
2267-
if inplace:
2268+
if not copy:
22682269
return self
22692270
return type(self)._simple_new(out_data, dtype=self.dtype)
22702271

pandas/core/arrays/numpy_.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
AxisInt,
3737
Dtype,
3838
FillnaOptions,
39+
InterpolateOptions,
3940
NpDtype,
4041
Scalar,
4142
Self,
@@ -261,20 +262,20 @@ def pad_or_backfill(
261262
def interpolate(
262263
self,
263264
*,
264-
method,
265+
method: InterpolateOptions,
265266
axis: int,
266267
index: Index,
267268
limit,
268269
limit_direction,
269270
limit_area,
270-
inplace: bool,
271+
copy: bool,
271272
**kwargs,
272273
) -> Self:
273274
"""
274275
See NDFrame.interpolate.__doc__.
275276
"""
276-
# NB: we return type(self) even if inplace=True
277-
if inplace:
277+
# NB: we return type(self) even if copy=False
278+
if not copy:
278279
out_data = self._ndarray
279280
else:
280281
out_data = self._ndarray.copy()
@@ -290,7 +291,7 @@ def interpolate(
290291
limit_area=limit_area,
291292
**kwargs,
292293
)
293-
if inplace:
294+
if not copy:
294295
return self
295296
return type(self)._simple_new(out_data, dtype=self.dtype)
296297

0 commit comments

Comments
 (0)