Skip to content

Commit 398ec4c

Browse files
Merge remote-tracking branch 'upstream/master' into bisect
2 parents 7fbb2f9 + ffa6e20 commit 398ec4c

File tree

214 files changed

+7830
-6837
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

214 files changed

+7830
-6837
lines changed

.github/workflows/ci.yml

+6-1
Original file line numberDiff line numberDiff line change
@@ -153,12 +153,17 @@ jobs:
153153
run: |
154154
source activate pandas-dev
155155
pytest pandas/tests/frame/methods --array-manager
156+
pytest pandas/tests/frame/test_reductions.py --array-manager
157+
pytest pandas/tests/reductions/ --array-manager
158+
pytest pandas/tests/generic/test_generic.py --array-manager
156159
pytest pandas/tests/arithmetic/ --array-manager
160+
pytest pandas/tests/groupby/aggregate/ --array-manager
161+
pytest pandas/tests/reshape/merge --array-manager
157162
158163
# indexing subset (temporary since other tests don't pass yet)
159164
pytest pandas/tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_setitem_boolean --array-manager
160165
pytest pandas/tests/frame/indexing/test_where.py --array-manager
161-
pytest pandas/tests/frame/indexing/test_indexing.py::TestDataFrameIndexing::test_setitem_multi_index --array-manager
166+
pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_multi_index --array-manager
162167
pytest pandas/tests/frame/indexing/test_setitem.py::TestDataFrameSetItem::test_setitem_listlike_indexer_duplicate_columns --array-manager
163168
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_astype_assignment_with_dups --array-manager
164169
pytest pandas/tests/indexing/multiindex/test_setitem.py::TestMultiIndexSetItem::test_frame_setitem_multi_column --array-manager

.pre-commit-config.yaml

+5
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,11 @@ repos:
163163
entry: np\.bool[^_8]
164164
language: pygrep
165165
types_or: [python, cython, rst]
166+
- id: np-object
167+
name: Check for use of np.object instead of np.object_
168+
entry: np\.object[^_8]
169+
language: pygrep
170+
types_or: [python, cython, rst]
166171
- id: no-os-remove
167172
name: Check code for instances of os.remove
168173
entry: os\.remove

asv_bench/benchmarks/algos/isin.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def time_isin(self, dtype, exponent, title):
104104

105105
class IsinWithRandomFloat:
106106
params = [
107-
[np.float64, np.object],
107+
[np.float64, np.object_],
108108
[
109109
1_300,
110110
2_000,
@@ -134,7 +134,7 @@ def time_isin(self, dtype, size, title):
134134

135135
class IsinWithArangeSorted:
136136
params = [
137-
[np.float64, np.int64, np.uint64, np.object],
137+
[np.float64, np.int64, np.uint64, np.object_],
138138
[
139139
1_000,
140140
2_000,
@@ -155,7 +155,7 @@ def time_isin(self, dtype, size):
155155

156156
class IsinWithArange:
157157
params = [
158-
[np.float64, np.int64, np.uint64, np.object],
158+
[np.float64, np.int64, np.uint64, np.object_],
159159
[
160160
1_000,
161161
2_000,
@@ -273,6 +273,7 @@ class IsInLongSeriesLookUpDominates:
273273
def setup(self, dtype, MaxNumber, series_type):
274274
N = 10 ** 7
275275

276+
# https://github.com/pandas-dev/pandas/issues/39844
276277
if not np_version_under1p20 and dtype in ("Int64", "Float64"):
277278
raise NotImplementedError
278279

@@ -303,6 +304,11 @@ class IsInLongSeriesValuesDominate:
303304

304305
def setup(self, dtype, series_type):
305306
N = 10 ** 7
307+
308+
# https://github.com/pandas-dev/pandas/issues/39844
309+
if not np_version_under1p20 and dtype in ("Int64", "Float64"):
310+
raise NotImplementedError
311+
306312
if series_type == "random":
307313
np.random.seed(42)
308314
vals = np.random.randint(0, 10 * N, N)

asv_bench/benchmarks/rolling.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ def time_ewm(self, constructor, window, dtype, method):
114114
getattr(self.ewm, method)()
115115

116116
def time_ewm_times(self, constructor, window, dtype, method):
117-
self.ewm.mean()
117+
self.ewm_times.mean()
118118

119119

120120
class VariableWindowMethods(Methods):

doc/source/ecosystem.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ A good implementation for Python users is `has2k1/plotnine <https://github.com/h
156156
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
157157

158158
`IPython Vega <https://github.com/vega/ipyvega>`__ leverages `Vega
159-
<https://github.com/trifacta/vega>`__ to create plots within Jupyter Notebook.
159+
<https://github.com/vega/vega>`__ to create plots within Jupyter Notebook.
160160

161161
`Plotly <https://plot.ly/python>`__
162162
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

doc/source/getting_started/install.rst

+84-30
Original file line numberDiff line numberDiff line change
@@ -255,47 +255,52 @@ For example, :func:`pandas.read_hdf` requires the ``pytables`` package, while
255255
optional dependency is not installed, pandas will raise an ``ImportError`` when
256256
the method requiring that dependency is called.
257257

258+
Visualization
259+
^^^^^^^^^^^^^
260+
258261
========================= ================== =============================================================
259262
Dependency Minimum Version Notes
260263
========================= ================== =============================================================
261-
BeautifulSoup4 4.6.0 HTML parser for read_html (see :ref:`note <optional_html>`)
264+
matplotlib 2.2.3 Plotting library
262265
Jinja2 2.10 Conditional formatting with DataFrame.style
263-
PyQt4 Clipboard I/O
264-
PyQt5 Clipboard I/O
265-
PyTables 3.5.1 HDF5-based reading / writing
266-
SQLAlchemy 1.3.0 SQL support for databases other than sqlite
266+
tabulate 0.8.7 Printing in Markdown-friendly format (see `tabulate`_)
267+
========================= ================== =============================================================
268+
269+
Computation
270+
^^^^^^^^^^^
271+
272+
========================= ================== =============================================================
273+
Dependency Minimum Version Notes
274+
========================= ================== =============================================================
267275
SciPy 1.12.0 Miscellaneous statistical functions
268-
xlsxwriter 1.0.2 Excel writing
269-
blosc 1.17.0 Compression for HDF5
270-
fsspec 0.7.4 Handling files aside from local and HTTP
271-
fastparquet 0.4.0 Parquet reading / writing
272-
gcsfs 0.6.0 Google Cloud Storage access
273-
html5lib 1.0.1 HTML parser for read_html (see :ref:`note <optional_html>`)
274-
lxml 4.3.0 HTML parser for read_html (see :ref:`note <optional_html>`)
275-
matplotlib 2.2.3 Visualization
276276
numba 0.46.0 Alternative execution engine for rolling operations
277+
(see :ref:`Enhancing Performance <enhancingperf.numba>`)
278+
xarray 0.12.3 pandas-like API for N-dimensional data
279+
========================= ================== =============================================================
280+
281+
Excel files
282+
^^^^^^^^^^^
283+
284+
========================= ================== =============================================================
285+
Dependency Minimum Version Notes
286+
========================= ================== =============================================================
287+
xlrd 1.2.0 Reading Excel
288+
xlwt 1.3.0 Writing Excel
289+
xlsxwriter 1.0.2 Writing Excel
277290
openpyxl 3.0.0 Reading / writing for xlsx files
278-
pandas-gbq 0.12.0 Google Big Query access
279-
psycopg2 2.7 PostgreSQL engine for sqlalchemy
280-
pyarrow 0.15.0 Parquet, ORC, and feather reading / writing
281-
pymysql 0.8.1 MySQL engine for sqlalchemy
282-
pyreadstat SPSS files (.sav) reading
283291
pyxlsb 1.0.6 Reading for xlsb files
284-
qtpy Clipboard I/O
285-
s3fs 0.4.0 Amazon S3 access
286-
tabulate 0.8.7 Printing in Markdown-friendly format (see `tabulate`_)
287-
xarray 0.12.3 pandas-like API for N-dimensional data
288-
xclip Clipboard I/O on linux
289-
xlrd 1.2.0 Excel reading
290-
xlwt 1.3.0 Excel writing
291-
xsel Clipboard I/O on linux
292-
zlib Compression for HDF5
293292
========================= ================== =============================================================
294293

295-
.. _optional_html:
294+
HTML
295+
^^^^
296296

297-
Optional dependencies for parsing HTML
298-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
297+
========================= ================== =============================================================
298+
Dependency Minimum Version Notes
299+
========================= ================== =============================================================
300+
BeautifulSoup4 4.6.0 HTML parser for read_html
301+
html5lib 1.0.1 HTML parser for read_html
302+
lxml 4.3.0 HTML parser for read_html
303+
========================= ================== =============================================================
299304

300305
One of the following combinations of libraries is needed to use the
301306
top-level :func:`~pandas.read_html` function:
@@ -320,3 +325,52 @@ top-level :func:`~pandas.read_html` function:
320325
.. _BeautifulSoup4: https://www.crummy.com/software/BeautifulSoup
321326
.. _lxml: https://lxml.de
322327
.. _tabulate: https://github.com/astanin/python-tabulate
328+
329+
SQL databases
330+
^^^^^^^^^^^^^
331+
332+
========================= ================== =============================================================
333+
Dependency Minimum Version Notes
334+
========================= ================== =============================================================
335+
SQLAlchemy 1.3.0 SQL support for databases other than sqlite
336+
psycopg2 2.7 PostgreSQL engine for sqlalchemy
337+
pymysql 0.8.1 MySQL engine for sqlalchemy
338+
========================= ================== =============================================================
339+
340+
Other data sources
341+
^^^^^^^^^^^^^^^^^^
342+
343+
========================= ================== =============================================================
344+
Dependency Minimum Version Notes
345+
========================= ================== =============================================================
346+
PyTables 3.5.1 HDF5-based reading / writing
347+
blosc 1.17.0 Compression for HDF5
348+
zlib Compression for HDF5
349+
fastparquet 0.4.0 Parquet reading / writing
350+
pyarrow 0.15.0 Parquet, ORC, and feather reading / writing
351+
pyreadstat SPSS files (.sav) reading
352+
========================= ================== =============================================================
353+
354+
Access data in the cloud
355+
^^^^^^^^^^^^^^^^^^^^^^^^
356+
357+
========================= ================== =============================================================
358+
Dependency Minimum Version Notes
359+
========================= ================== =============================================================
360+
fsspec 0.7.4 Handling files aside from simple local and HTTP
361+
gcsfs 0.6.0 Google Cloud Storage access
362+
pandas-gbq 0.12.0 Google Big Query access
363+
s3fs 0.4.0 Amazon S3 access
364+
========================= ================== =============================================================
365+
366+
Clipboard
367+
^^^^^^^^^
368+
369+
========================= ================== =============================================================
370+
Dependency Minimum Version Notes
371+
========================= ================== =============================================================
372+
PyQt4/PyQt5 Clipboard I/O
373+
qtpy Clipboard I/O
374+
xclip Clipboard I/O on linux
375+
xsel Clipboard I/O on linux
376+
========================= ================== =============================================================

doc/source/user_guide/cookbook.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -1410,7 +1410,7 @@ Often it's useful to obtain the lower (or upper) triangular form of a correlatio
14101410
14111411
corr_mat.where(mask)
14121412
1413-
The ``method`` argument within ``DataFrame.corr`` can accept a callable in addition to the named correlation types. Here we compute the ``distance correlation <https://en.wikipedia.org/wiki/Distance_correlation>``__ matrix for a ``DataFrame`` object.
1413+
The ``method`` argument within ``DataFrame.corr`` can accept a callable in addition to the named correlation types. Here we compute the `distance correlation <https://en.wikipedia.org/wiki/Distance_correlation>`__ matrix for a ``DataFrame`` object.
14141414

14151415
.. ipython:: python
14161416

doc/source/user_guide/io.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like
2929
binary;`Feather Format <https://github.com/wesm/feather>`__;:ref:`read_feather<io.feather>`;:ref:`to_feather<io.feather>`
3030
binary;`Parquet Format <https://parquet.apache.org/>`__;:ref:`read_parquet<io.parquet>`;:ref:`to_parquet<io.parquet>`
3131
binary;`ORC Format <https://orc.apache.org/>`__;:ref:`read_orc<io.orc>`;
32-
binary;`Msgpack <https://msgpack.org/index.html>`__;:ref:`read_msgpack<io.msgpack>`;:ref:`to_msgpack<io.msgpack>`
32+
binary;`Msgpack <https://msgpack.org/>`__;:ref:`read_msgpack<io.msgpack>`;:ref:`to_msgpack<io.msgpack>`
3333
binary;`Stata <https://en.wikipedia.org/wiki/Stata>`__;:ref:`read_stata<io.stata_reader>`;:ref:`to_stata<io.stata_writer>`
3434
binary;`SAS <https://en.wikipedia.org/wiki/SAS_(software)>`__;:ref:`read_sas<io.sas_reader>`;
3535
binary;`SPSS <https://en.wikipedia.org/wiki/SPSS>`__;:ref:`read_spss<io.spss_reader>`;

doc/source/user_guide/style.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -1154,7 +1154,7 @@
11541154
"metadata": {},
11551155
"outputs": [],
11561156
"source": [
1157-
"from IPython.html import widgets\n",
1157+
"from ipywidgets import widgets\n",
11581158
"@widgets.interact\n",
11591159
"def f(h_neg=(0, 359, 1), h_pos=(0, 359), s=(0., 99.9), l=(0., 99.9)):\n",
11601160
" return df.style.background_gradient(\n",

doc/source/whatsnew/v0.24.0.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -1755,8 +1755,8 @@ Missing
17551755

17561756
- Bug in :func:`DataFrame.fillna` where a ``ValueError`` would raise when one column contained a ``datetime64[ns, tz]`` dtype (:issue:`15522`)
17571757
- Bug in :func:`Series.hasnans` that could be incorrectly cached and return incorrect answers if null elements are introduced after an initial call (:issue:`19700`)
1758-
- :func:`Series.isin` now treats all NaN-floats as equal also for ``np.object``-dtype. This behavior is consistent with the behavior for float64 (:issue:`22119`)
1759-
- :func:`unique` no longer mangles NaN-floats and the ``NaT``-object for ``np.object``-dtype, i.e. ``NaT`` is no longer coerced to a NaN-value and is treated as a different entity. (:issue:`22295`)
1758+
- :func:`Series.isin` now treats all NaN-floats as equal also for ``np.object_``-dtype. This behavior is consistent with the behavior for float64 (:issue:`22119`)
1759+
- :func:`unique` no longer mangles NaN-floats and the ``NaT``-object for ``np.object_``-dtype, i.e. ``NaT`` is no longer coerced to a NaN-value and is treated as a different entity. (:issue:`22295`)
17601760
- :class:`DataFrame` and :class:`Series` now properly handle numpy masked arrays with hardened masks. Previously, constructing a DataFrame or Series from a masked array with a hard mask would create a pandas object containing the underlying value, rather than the expected NaN. (:issue:`24574`)
17611761
- Bug in :class:`DataFrame` constructor where ``dtype`` argument was not honored when handling numpy masked record arrays. (:issue:`24874`)
17621762

doc/source/whatsnew/v1.2.3.rst

+9-2
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,15 @@ including other versions of pandas.
1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
1717

18-
- Fixed regression in :func:`pandas.to_excel` raising ``KeyError`` when giving duplicate columns with ``columns`` attribute (:issue:`39695`)
19-
-
18+
- Fixed regression in :meth:`~DataFrame.to_excel` raising ``KeyError`` when giving duplicate columns with ``columns`` attribute (:issue:`39695`)
19+
- Fixed regression in nullable integer unary ops propagating mask on assignment (:issue:`39943`)
20+
- Fixed regression in :meth:`DataFrame.__setitem__` not aligning :class:`DataFrame` on right-hand side for boolean indexer (:issue:`39931`)
21+
- Fixed regression in :meth:`~DataFrame.to_json` failing to use ``compression`` with URL-like paths that are internally opened in binary mode or with user-provided file objects that are opened in binary mode (:issue:`39985`)
22+
- Fixed regression in :meth:`~Series.sort_index` and :meth:`~DataFrame.sort_index`,
23+
which exited with an ungraceful error when having kwarg ``ascending=None`` passed (:issue:`39434`).
24+
Passing ``ascending=None`` is still considered invalid,
25+
and the new error message suggests a proper usage
26+
(``ascending`` must be a boolean or a list-like boolean).
2027

2128
.. ---------------------------------------------------------------------------
2229

0 commit comments

Comments
 (0)