Skip to content

Commit bebcc6e

Browse files
authored
Merge branch 'main' into ngroup-axis-1
2 parents b356c6a + 56c1b20 commit bebcc6e

File tree

182 files changed

+3983
-2274
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

182 files changed

+3983
-2274
lines changed

.github/ISSUE_TEMPLATE/bug_report.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ body:
1717
[latest version](https://pandas.pydata.org/docs/whatsnew/index.html) of pandas.
1818
required: true
1919
- label: >
20-
I have confirmed this bug exists on the [main branch]
21-
(https://pandas.pydata.org/docs/dev/getting_started/install.html#installing-the-development-version-of-pandas)
20+
I have confirmed this bug exists on the
21+
[main branch](https://pandas.pydata.org/docs/dev/getting_started/install.html#installing-the-development-version-of-pandas)
2222
of pandas.
2323
- type: textarea
2424
id: example

.github/workflows/docbuild-and-upload.yml

-6
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,6 @@ jobs:
4646
- name: Build Pandas
4747
uses: ./.github/actions/build_pandas
4848

49-
- name: Set up maintainers cache
50-
uses: actions/cache@v3
51-
with:
52-
path: maintainers.json
53-
key: maintainers
54-
5549
- name: Build website
5650
run: python web/pandas_web.py web/pandas --target-path=web/build
5751

.github/workflows/macos-windows.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ jobs:
3131
strategy:
3232
matrix:
3333
os: [macos-latest, windows-latest]
34-
env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml]
34+
env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml, actions-311.yaml]
3535
fail-fast: false
3636
runs-on: ${{ matrix.os }}
3737
name: ${{ format('{0} {1}', matrix.os, matrix.env_file) }}

.github/workflows/python-dev.yml

+6-4
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,14 @@ name: Python Dev
2323
on:
2424
push:
2525
branches:
26-
- main
27-
- 1.5.x
26+
# - main
27+
# - 1.5.x
28+
- None
2829
pull_request:
2930
branches:
30-
- main
31-
- 1.5.x
31+
# - main
32+
# - 1.5.x
33+
- None
3234
paths-ignore:
3335
- "doc/**"
3436

.github/workflows/ubuntu.yml

+8-2
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ jobs:
2727
timeout-minutes: 180
2828
strategy:
2929
matrix:
30-
env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml]
30+
env_file: [actions-38.yaml, actions-39.yaml, actions-310.yaml, actions-311.yaml]
3131
pattern: ["not single_cpu", "single_cpu"]
3232
pyarrow_version: ["7", "8", "9", "10"]
3333
include:
@@ -77,7 +77,7 @@ jobs:
7777
- name: "Numpy Dev"
7878
env_file: actions-310-numpydev.yaml
7979
pattern: "not slow and not network and not single_cpu"
80-
test_args: "-W error::DeprecationWarning:numpy -W error::FutureWarning:numpy"
80+
test_args: "-W error::DeprecationWarning -W error::FutureWarning"
8181
error_on_warnings: "0"
8282
exclude:
8383
- env_file: actions-38.yaml
@@ -92,6 +92,12 @@ jobs:
9292
pyarrow_version: "8"
9393
- env_file: actions-39.yaml
9494
pyarrow_version: "9"
95+
- env_file: actions-311.yaml
96+
pyarrow_version: "7"
97+
- env_file: actions-311.yaml
98+
pyarrow_version: "8"
99+
- env_file: actions-311.yaml
100+
pyarrow_version: "9"
95101
fail-fast: false
96102
name: ${{ matrix.name || format('{0} pyarrow={1} {2}', matrix.env_file, matrix.pyarrow_version, matrix.pattern) }}
97103
env:

.pre-commit-config.yaml

-3
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,3 @@ repos:
443443
types: [python]
444444
files: ^pandas/tests
445445
language: python
446-
exclude: |
447-
(?x)
448-
^pandas/tests/generic/test_generic.py # GH50380

asv_bench/benchmarks/array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ def time_setitem(self, multiple_chunks):
9393
self.array[i] = "foo"
9494

9595
def time_setitem_list(self, multiple_chunks):
96-
indexer = list(range(0, 50)) + list(range(-50, 0))
96+
indexer = list(range(0, 50)) + list(range(-1000, 0, 50))
9797
self.array[indexer] = ["foo"] * len(indexer)
9898

9999
def time_setitem_slice(self, multiple_chunks):

asv_bench/benchmarks/io/hdf.py

+8
Original file line numberDiff line numberDiff line change
@@ -128,9 +128,17 @@ def setup(self, format):
128128
self.df["object"] = tm.makeStringIndex(N)
129129
self.df.to_hdf(self.fname, "df", format=format)
130130

131+
# Numeric df
132+
self.df1 = self.df.copy()
133+
self.df1 = self.df1.reset_index()
134+
self.df1.to_hdf(self.fname, "df1", format=format)
135+
131136
def time_read_hdf(self, format):
132137
read_hdf(self.fname, "df")
133138

139+
def peakmem_read_hdf(self, format):
140+
read_hdf(self.fname, "df")
141+
134142
def time_write_hdf(self, format):
135143
self.df.to_hdf(self.fname, "df", format=format)
136144

asv_bench/benchmarks/io/json.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,8 @@ def time_float_longint_str_lines(self):
294294
class ToJSONMem:
295295
def setup_cache(self):
296296
df = DataFrame([[1]])
297-
frames = {"int": df, "float": df.astype(float)}
297+
df2 = DataFrame(range(8), date_range("1/1/2000", periods=8, freq="T"))
298+
frames = {"int": df, "float": df.astype(float), "datetime": df2}
298299

299300
return frames
300301

@@ -308,5 +309,10 @@ def peakmem_float(self, frames):
308309
for _ in range(100_000):
309310
df.to_json()
310311

312+
def peakmem_time(self, frames):
313+
df = frames["datetime"]
314+
for _ in range(10_000):
315+
df.to_json(orient="table")
316+
311317

312318
from ..pandas_vb_common import setup # noqa: F401 isort:skip

ci/code_checks.sh

+2-27
Original file line numberDiff line numberDiff line change
@@ -79,33 +79,8 @@ fi
7979
### DOCSTRINGS ###
8080
if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8181

82-
MSG='Validate docstrings (EX04, GL01, GL02, GL03, GL04, GL05, GL06, GL07, GL09, GL10, PR03, PR04, PR05, PR06, PR08, PR09, PR10, RT01, RT04, RT05, SA02, SA03, SA04, SS01, SS02, SS03, SS04, SS05, SS06)' ; echo $MSG
83-
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05,SS06
84-
RET=$(($RET + $?)) ; echo $MSG "DONE"
85-
86-
MSG='Partially validate docstrings (RT02)' ; echo $MSG
87-
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=RT02 --ignore_functions \
88-
pandas.Series.align \
89-
pandas.Series.dt.total_seconds \
90-
pandas.Series.cat.rename_categories \
91-
pandas.Series.cat.reorder_categories \
92-
pandas.Series.cat.add_categories \
93-
pandas.Series.cat.remove_categories \
94-
pandas.Series.cat.remove_unused_categories \
95-
pandas.Index.all \
96-
pandas.Index.any \
97-
pandas.MultiIndex.drop \
98-
pandas.DatetimeIndex.to_pydatetime \
99-
pandas.TimedeltaIndex.to_pytimedelta \
100-
pandas.core.groupby.SeriesGroupBy.apply \
101-
pandas.core.groupby.DataFrameGroupBy.apply \
102-
pandas.io.formats.style.Styler.export \
103-
pandas.api.extensions.ExtensionArray.astype \
104-
pandas.api.extensions.ExtensionArray.dropna \
105-
pandas.api.extensions.ExtensionArray.isna \
106-
pandas.api.extensions.ExtensionArray.repeat \
107-
pandas.api.extensions.ExtensionArray.unique \
108-
pandas.DataFrame.align
82+
MSG='Validate docstrings (EX04, GL01, GL02, GL03, GL04, GL05, GL06, GL07, GL09, GL10, PR03, PR04, PR05, PR06, PR08, PR09, PR10, RT01, RT02, RT04, RT05, SA02, SA03, SA04, SS01, SS02, SS03, SS04, SS05, SS06)' ; echo $MSG
83+
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT02,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05,SS06
10984
RET=$(($RET + $?)) ; echo $MSG "DONE"
11085

11186
fi

ci/deps/actions-311.yaml

+57
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
name: pandas-dev
2+
channels:
3+
- conda-forge
4+
dependencies:
5+
- python=3.11
6+
7+
# build dependencies
8+
- versioneer[toml]
9+
- cython>=0.29.32
10+
11+
# test dependencies
12+
- pytest>=7.0
13+
- pytest-cov
14+
- pytest-xdist>=2.2.0
15+
- psutil
16+
- pytest-asyncio>=0.17
17+
- boto3
18+
19+
# required dependencies
20+
- python-dateutil
21+
- numpy<1.24.0
22+
- pytz
23+
24+
# optional dependencies
25+
- beautifulsoup4
26+
- blosc
27+
- bottleneck
28+
- brotlipy
29+
- fastparquet
30+
- fsspec
31+
- html5lib
32+
- hypothesis
33+
- gcsfs
34+
- jinja2
35+
- lxml
36+
- matplotlib>=3.6.1
37+
# - numba not compatible with 3.11
38+
- numexpr
39+
- openpyxl
40+
- odfpy
41+
- pandas-gbq
42+
- psycopg2
43+
- pymysql
44+
# - pytables>=3.8.0 # first version that supports 3.11
45+
- pyarrow
46+
- pyreadstat
47+
- python-snappy
48+
- pyxlsb
49+
- s3fs>=2021.08.0
50+
- scipy
51+
- sqlalchemy<1.4.46
52+
- tabulate
53+
- tzdata>=2022a
54+
- xarray
55+
- xlrd
56+
- xlsxwriter
57+
- zstandard

doc/source/development/community.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ The pandas Community Meeting is a regular sync meeting for the project's
2222
maintainers which is open to the community. Everyone is welcome to attend and
2323
contribute to conversations.
2424

25-
The meetings take place on the second Wednesday of each month at 18:00 UTC.
25+
The meetings take place on the second and fourth Wednesdays of each month at 18:00 UTC.
2626

2727
The minutes of past meetings are available in `this Google Document <https://docs.google.com/document/d/1tGbTiYORHiSPgVMXawiweGJlBw5dOkVJLY-licoBmBU/edit?usp=sharing>`__.
2828

doc/source/development/contributing_gitpod.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ docs you need to run the following command in the docs directory::
145145

146146
Alternatively you can build a single page with::
147147

148-
python make.py html python make.py --single development/contributing_gitpod.rst
148+
python make.py --single development/contributing_gitpod.rst
149149

150150
You have two main options to render the documentation in Gitpod.
151151

doc/source/development/maintaining.rst

+13-6
Original file line numberDiff line numberDiff line change
@@ -458,8 +458,8 @@ which will be triggered when the tag is pushed.
458458
git checkout master
459459
git pull --ff-only upstream master
460460
git checkout -B RLS-<version>
461-
sed -i 's/BUILD_COMMIT: "v.*/BUILD_COMMIT: "'<version>'"/' azure/windows.yml azure/posix.yml
462-
sed -i 's/BUILD_COMMIT="v.*/BUILD_COMMIT="'<version>'"/' .travis.yml
461+
sed -i 's/BUILD_COMMIT: "v.*/BUILD_COMMIT: "'v<version>'"/' azure/windows.yml azure/posix.yml
462+
sed -i 's/BUILD_COMMIT="v.*/BUILD_COMMIT="'v<version>'"/' .travis.yml
463463
git commit -am "RLS <version>"
464464
git push -u origin RLS-<version>
465465

@@ -474,14 +474,21 @@ which will be triggered when the tag is pushed.
474474
Post-Release
475475
````````````
476476

477-
1. Close the milestone and the issue for the released version.
477+
1. Update symlink to stable documentation by logging in to our web server, and
478+
editing ``/var/www/html/pandas-docs/stable`` to point to ``version/<latest-version>``.
478479

479-
2. Create a new issue for the next release, with the estimated date or release.
480+
2. If releasing a major or minor release, open a PR in our source code to update
481+
``web/pandas/versions.json``, to have the desired versions in the documentation
482+
dropdown menu.
480483

481-
3. Open a PR with the placeholder for the release notes of the next version. See
484+
3. Close the milestone and the issue for the released version.
485+
486+
4. Create a new issue for the next release, with the estimated date of release.
487+
488+
5. Open a PR with the placeholder for the release notes of the next version. See
482489
for example [the PR for 1.5.3](https://github.com/pandas-dev/pandas/pull/49843/files).
483490

484-
4. Announce the new release in the official channels (use previous announcements
491+
6. Announce the new release in the official channels (use previous announcements
485492
for reference):
486493

487494
- The pandas-dev and pydata mailing lists

doc/source/user_guide/timeseries.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -292,7 +292,7 @@ The default behavior, ``errors='raise'``, is to raise when unparsable:
292292
.. code-block:: ipython
293293
294294
In [2]: pd.to_datetime(['2009/07/31', 'asd'], errors='raise')
295-
ValueError: Unknown string format
295+
ValueError: Unknown datetime string format
296296
297297
Pass ``errors='ignore'`` to return the original input when unparsable:
298298

doc/source/whatsnew/index.rst

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Version 1.5
2424
.. toctree::
2525
:maxdepth: 2
2626

27+
v1.5.4
2728
v1.5.3
2829
v1.5.2
2930
v1.5.1

doc/source/whatsnew/v1.5.1.rst

+4-4
Original file line numberDiff line numberDiff line change
@@ -76,14 +76,14 @@ Fixed regressions
7676
- Fixed regression in :meth:`DataFrame.loc` raising ``FutureWarning`` when setting an empty :class:`DataFrame` (:issue:`48480`)
7777
- Fixed regression in :meth:`DataFrame.describe` raising ``TypeError`` when result contains ``NA`` (:issue:`48778`)
7878
- Fixed regression in :meth:`DataFrame.plot` ignoring invalid ``colormap`` for ``kind="scatter"`` (:issue:`48726`)
79-
- Fixed regression in :meth:`MultiIndex.values`` resetting ``freq`` attribute of underlying :class:`Index` object (:issue:`49054`)
79+
- Fixed regression in :meth:`MultiIndex.values` resetting ``freq`` attribute of underlying :class:`Index` object (:issue:`49054`)
8080
- Fixed performance regression in :func:`factorize` when ``na_sentinel`` is not ``None`` and ``sort=False`` (:issue:`48620`)
8181
- Fixed regression causing an ``AttributeError`` during warning emitted if the provided table name in :meth:`DataFrame.to_sql` and the table name actually used in the database do not match (:issue:`48733`)
8282
- Fixed regression in :func:`to_datetime` when ``arg`` was a date string with nanosecond and ``format`` contained ``%f`` would raise a ``ValueError`` (:issue:`48767`)
83-
- Fixed regression in :func:`assert_frame_equal` raising for :class:`MultiIndex` with :class:`Categorical` and ``check_like=True`` (:issue:`48975`)
83+
- Fixed regression in :func:`testing.assert_frame_equal` raising for :class:`MultiIndex` with :class:`Categorical` and ``check_like=True`` (:issue:`48975`)
8484
- Fixed regression in :meth:`DataFrame.fillna` replacing wrong values for ``datetime64[ns]`` dtype and ``inplace=True`` (:issue:`48863`)
8585
- Fixed :meth:`.DataFrameGroupBy.size` not returning a Series when ``axis=1`` (:issue:`48738`)
86-
- Fixed Regression in :meth:`DataFrameGroupBy.apply` when user defined function is called on an empty dataframe (:issue:`47985`)
86+
- Fixed Regression in :meth:`.DataFrameGroupBy.apply` when user defined function is called on an empty dataframe (:issue:`47985`)
8787
- Fixed regression in :meth:`DataFrame.apply` when passing non-zero ``axis`` via keyword argument (:issue:`48656`)
8888
- Fixed regression in :meth:`Series.groupby` and :meth:`DataFrame.groupby` when the grouper is a nullable data type (e.g. :class:`Int64`) or a PyArrow-backed string array, contains null values, and ``dropna=False`` (:issue:`48794`)
8989
- Fixed performance regression in :meth:`Series.isin` with mismatching dtypes (:issue:`49162`)
@@ -99,7 +99,7 @@ Bug fixes
9999
~~~~~~~~~
100100
- Bug in :meth:`Series.__getitem__` not falling back to positional for integer keys and boolean :class:`Index` (:issue:`48653`)
101101
- Bug in :meth:`DataFrame.to_hdf` raising ``AssertionError`` with boolean index (:issue:`48667`)
102-
- Bug in :func:`assert_index_equal` for extension arrays with non matching ``NA`` raising ``ValueError`` (:issue:`48608`)
102+
- Bug in :func:`testing.assert_index_equal` for extension arrays with non matching ``NA`` raising ``ValueError`` (:issue:`48608`)
103103
- Bug in :meth:`DataFrame.pivot_table` raising unexpected ``FutureWarning`` when setting datetime column as index (:issue:`48683`)
104104
- Bug in :meth:`DataFrame.sort_values` emitting unnecessary ``FutureWarning`` when called on :class:`DataFrame` with boolean sparse columns (:issue:`48784`)
105105
- Bug in :class:`.arrays.ArrowExtensionArray` with a comparison operator to an invalid object would not raise a ``NotImplementedError`` (:issue:`48833`)

doc/source/whatsnew/v1.5.2.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,4 @@ Other
4343
Contributors
4444
~~~~~~~~~~~~
4545

46-
.. contributors:: v1.5.1..v1.5.2|HEAD
46+
.. contributors:: v1.5.1..v1.5.2

doc/source/whatsnew/v1.5.3.rst

+7-8
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
.. _whatsnew_153:
22

3-
What's new in 1.5.3 (December ??, 2022)
4-
---------------------------------------
3+
What's new in 1.5.3 (January 18, 2023)
4+
--------------------------------------
55

66
These are the changes in pandas 1.5.3. See :ref:`release` for a full changelog
77
including other versions of pandas.
@@ -15,12 +15,11 @@ Fixed regressions
1515
~~~~~~~~~~~~~~~~~
1616
- Fixed performance regression in :meth:`Series.isin` when ``values`` is empty (:issue:`49839`)
1717
- Fixed regression in :meth:`DataFrame.memory_usage` showing unnecessary ``FutureWarning`` when :class:`DataFrame` is empty (:issue:`50066`)
18-
- Fixed regression in :meth:`DataFrameGroupBy.transform` when used with ``as_index=False`` (:issue:`49834`)
18+
- Fixed regression in :meth:`.DataFrameGroupBy.transform` when used with ``as_index=False`` (:issue:`49834`)
1919
- Enforced reversion of ``color`` as an alias for ``c`` and ``size`` as an alias for ``s`` in function :meth:`DataFrame.plot.scatter` (:issue:`49732`)
20-
- Fixed regression in :meth:`SeriesGroupBy.apply` setting a ``name`` attribute on the result if the result was a :class:`DataFrame` (:issue:`49907`)
20+
- Fixed regression in :meth:`.SeriesGroupBy.apply` setting a ``name`` attribute on the result if the result was a :class:`DataFrame` (:issue:`49907`)
2121
- Fixed performance regression in setting with the :meth:`~DataFrame.at` indexer (:issue:`49771`)
2222
- Fixed regression in :func:`to_datetime` raising ``ValueError`` when parsing array of ``float`` containing ``np.nan`` (:issue:`50237`)
23-
-
2423

2524
.. ---------------------------------------------------------------------------
2625
.. _whatsnew_153.bug_fixes:
@@ -32,8 +31,8 @@ Bug fixes
3231
- Bug in :meth:`Series.quantile` emitting warning from NumPy when :class:`Series` has only ``NA`` values (:issue:`50681`)
3332
- Bug when chaining several :meth:`.Styler.concat` calls, only the last styler was concatenated (:issue:`49207`)
3433
- Fixed bug when instantiating a :class:`DataFrame` subclass inheriting from ``typing.Generic`` that triggered a ``UserWarning`` on python 3.11 (:issue:`49649`)
34+
- Bug in :func:`pivot_table` with NumPy 1.24 or greater when the :class:`DataFrame` columns has nested elements (:issue:`50342`)
3535
- Bug in :func:`pandas.testing.assert_series_equal` (and equivalent ``assert_`` functions) when having nested data and using numpy >= 1.25 (:issue:`50360`)
36-
-
3736

3837
.. ---------------------------------------------------------------------------
3938
.. _whatsnew_153.other:
@@ -48,12 +47,12 @@ Other
4847
as pandas works toward compatibility with SQLAlchemy 2.0.
4948

5049
- Reverted deprecation (:issue:`45324`) of behavior of :meth:`Series.__getitem__` and :meth:`Series.__setitem__` slicing with an integer :class:`Index`; this will remain positional (:issue:`49612`)
51-
-
50+
- A ``FutureWarning`` raised when attempting to set values inplace with :meth:`DataFrame.loc` or :meth:`DataFrame.iloc` has been changed to a ``DeprecationWarning`` (:issue:`48673`)
5251

5352
.. ---------------------------------------------------------------------------
5453
.. _whatsnew_153.contributors:
5554

5655
Contributors
5756
~~~~~~~~~~~~
5857

59-
.. contributors:: v1.5.2..v1.5.3|HEAD
58+
.. contributors:: v1.5.2..v1.5.3

0 commit comments

Comments
 (0)