Skip to content

Commit df8b523

Browse files
authored
Merge branch 'pandas-dev:main' into issue-50395
2 parents 863c068 + 9aa3f95 commit df8b523

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

58 files changed

+1635
-793
lines changed

.circleci/config.yml

+2-3
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ jobs:
4848
name: Build aarch64 wheels
4949
no_output_timeout: 30m # Sometimes the tests won't generate any output, make sure the job doesn't get killed by that
5050
command: |
51-
pip3 install cibuildwheel==2.14.1
51+
pip3 install cibuildwheel==2.15.0
5252
cibuildwheel --prerelease-pythons --output-dir wheelhouse
5353
environment:
5454
CIBW_BUILD: << parameters.cibw-build >>
@@ -92,5 +92,4 @@ workflows:
9292
only: /^v.*/
9393
matrix:
9494
parameters:
95-
# TODO: Enable Python 3.12 wheels when numpy releases a version that supports Python 3.12
96-
cibw-build: ["cp39-manylinux_aarch64", "cp310-manylinux_aarch64", "cp311-manylinux_aarch64"]#, "cp312-manylinux_aarch64"]
95+
cibw-build: ["cp39-manylinux_aarch64", "cp310-manylinux_aarch64", "cp311-manylinux_aarch64", "cp312-manylinux_aarch64"]

.github/workflows/wheels.yml

+9-7
Original file line numberDiff line numberDiff line change
@@ -97,8 +97,7 @@ jobs:
9797
- [macos-12, macosx_*]
9898
- [windows-2022, win_amd64]
9999
# TODO: support PyPy?
100-
# TODO: Enable Python 3.12 wheels when numpy releases a version that supports Python 3.12
101-
python: [["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11"]]#, ["cp312", "3.12"]]
100+
python: [["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11"], ["cp312", "3.12"]]
102101
env:
103102
IS_PUSH: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') }}
104103
IS_SCHEDULE_DISPATCH: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
@@ -150,8 +149,10 @@ jobs:
150149
uses: mamba-org/setup-micromamba@v1
151150
with:
152151
environment-name: wheel-env
152+
# Use a fixed Python, since we might have an unreleased Python not
153+
# yet present on conda-forge
153154
create-args: >-
154-
python=${{ matrix.python[1] }}
155+
python=3.11
155156
anaconda-client
156157
wheel
157158
cache-downloads: true
@@ -167,12 +168,13 @@ jobs:
167168
shell: pwsh
168169
run: |
169170
$TST_CMD = @"
170-
python -m pip install pytz six numpy python-dateutil tzdata>=2022.1 hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17;
171-
python -m pip install --find-links=pandas\wheelhouse --no-index pandas;
171+
python -m pip install hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17;
172+
python -m pip install `$(Get-Item pandas\wheelhouse\*.whl);
172173
python -c `'import pandas as pd; pd.test(extra_args=[\"`\"--no-strict-data-files`\"\", \"`\"-m not clipboard and not single_cpu and not slow and not network and not db`\"\"])`';
173174
"@
174-
docker pull python:${{ matrix.python[1] }}-windowsservercore
175-
docker run --env PANDAS_CI='1' -v ${PWD}:C:\pandas python:${{ matrix.python[1] }}-windowsservercore powershell -Command $TST_CMD
175+
# add rc to the end of the image name if the Python version is unreleased
176+
docker pull python:${{ matrix.python[1] == '3.12' && '3.12-rc' || format('{0}-windowsservercore', matrix.python[1]) }}
177+
docker run --env PANDAS_CI='1' -v ${PWD}:C:\pandas python:${{ matrix.python[1] == '3.12' && '3.12-rc' || format('{0}-windowsservercore', matrix.python[1]) }} powershell -Command $TST_CMD
176178
177179
- uses: actions/upload-artifact@v3
178180
with:

doc/source/user_guide/cookbook.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -459,7 +459,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
459459
df
460460
461461
# List the size of the animals with the highest weight.
462-
df.groupby("animal").apply(lambda subf: subf["size"][subf["weight"].idxmax()])
462+
df.groupby("animal").apply(lambda subf: subf["size"][subf["weight"].idxmax()], include_groups=False)
463463
464464
`Using get_group
465465
<https://stackoverflow.com/questions/14734533/how-to-access-pandas-groupby-dataframe-by-key>`__
@@ -482,7 +482,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
482482
return pd.Series(["L", avg_weight, True], index=["size", "weight", "adult"])
483483
484484
485-
expected_df = gb.apply(GrowUp)
485+
expected_df = gb.apply(GrowUp, include_groups=False)
486486
expected_df
487487
488488
`Expanding apply

doc/source/user_guide/groupby.rst

+10-4
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,12 @@ This is mainly syntactic sugar for the alternative, which is much more verbose:
420420
Additionally, this method avoids recomputing the internal grouping information
421421
derived from the passed key.
422422

423+
You can also include the grouping columns if you want to operate on them.
424+
425+
.. ipython:: python
426+
427+
grouped[["A", "B"]].sum()
428+
423429
.. _groupby.iterating-label:
424430

425431
Iterating through groups
@@ -1053,7 +1059,7 @@ missing values with the ``ffill()`` method.
10531059
).set_index("date")
10541060
df_re
10551061
1056-
df_re.groupby("group").resample("1D").ffill()
1062+
df_re.groupby("group").resample("1D", include_groups=False).ffill()
10571063
10581064
.. _groupby.filter:
10591065

@@ -1219,13 +1225,13 @@ the argument ``group_keys`` which defaults to ``True``. Compare
12191225

12201226
.. ipython:: python
12211227
1222-
df.groupby("A", group_keys=True).apply(lambda x: x)
1228+
df.groupby("A", group_keys=True).apply(lambda x: x, include_groups=False)
12231229
12241230
with
12251231

12261232
.. ipython:: python
12271233
1228-
df.groupby("A", group_keys=False).apply(lambda x: x)
1234+
df.groupby("A", group_keys=False).apply(lambda x: x, include_groups=False)
12291235
12301236
12311237
Numba Accelerated Routines
@@ -1709,7 +1715,7 @@ column index name will be used as the name of the inserted column:
17091715
result = {"b_sum": x["b"].sum(), "c_mean": x["c"].mean()}
17101716
return pd.Series(result, name="metrics")
17111717
1712-
result = df.groupby("a").apply(compute_metrics)
1718+
result = df.groupby("a").apply(compute_metrics, include_groups=False)
17131719
17141720
result
17151721

doc/source/whatsnew/v0.14.0.rst

+16-5
Original file line numberDiff line numberDiff line change
@@ -328,13 +328,24 @@ More consistent behavior for some groupby methods:
328328

329329
- groupby ``head`` and ``tail`` now act more like ``filter`` rather than an aggregation:
330330

331-
.. ipython:: python
331+
.. code-block:: ipython
332332
333-
df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
334-
g = df.groupby('A')
335-
g.head(1) # filters DataFrame
333+
In [1]: df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
336334
337-
g.apply(lambda x: x.head(1)) # used to simply fall-through
335+
In [2]: g = df.groupby('A')
336+
337+
In [3]: g.head(1) # filters DataFrame
338+
Out[3]:
339+
A B
340+
0 1 2
341+
2 5 6
342+
343+
In [4]: g.apply(lambda x: x.head(1)) # used to simply fall-through
344+
Out[4]:
345+
A B
346+
A
347+
1 0 1 2
348+
5 2 5 6
338349
339350
- groupby head and tail respect column selection:
340351

doc/source/whatsnew/v0.18.1.rst

+87-6
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,52 @@ Previously you would have to do this to get a rolling window mean per-group:
7777
df = pd.DataFrame({"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)})
7878
df
7979
80-
.. ipython:: python
80+
.. code-block:: ipython
8181
82-
df.groupby("A").apply(lambda x: x.rolling(4).B.mean())
82+
In [1]: df.groupby("A").apply(lambda x: x.rolling(4).B.mean())
83+
Out[1]:
84+
A
85+
1 0 NaN
86+
1 NaN
87+
2 NaN
88+
3 1.5
89+
4 2.5
90+
5 3.5
91+
6 4.5
92+
7 5.5
93+
8 6.5
94+
9 7.5
95+
10 8.5
96+
11 9.5
97+
12 10.5
98+
13 11.5
99+
14 12.5
100+
15 13.5
101+
16 14.5
102+
17 15.5
103+
18 16.5
104+
19 17.5
105+
2 20 NaN
106+
21 NaN
107+
22 NaN
108+
23 21.5
109+
24 22.5
110+
25 23.5
111+
26 24.5
112+
27 25.5
113+
28 26.5
114+
29 27.5
115+
30 28.5
116+
31 29.5
117+
3 32 NaN
118+
33 NaN
119+
34 NaN
120+
35 33.5
121+
36 34.5
122+
37 35.5
123+
38 36.5
124+
39 37.5
125+
Name: B, dtype: float64
83126
84127
Now you can do:
85128

@@ -101,15 +144,53 @@ For ``.resample(..)`` type of operations, previously you would have to:
101144
102145
df
103146
104-
.. ipython:: python
147+
.. code-block:: ipython
105148
106-
df.groupby("group").apply(lambda x: x.resample("1D").ffill())
149+
In[1]: df.groupby("group").apply(lambda x: x.resample("1D").ffill())
150+
Out[1]:
151+
group val
152+
group date
153+
1 2016-01-03 1 5
154+
2016-01-04 1 5
155+
2016-01-05 1 5
156+
2016-01-06 1 5
157+
2016-01-07 1 5
158+
2016-01-08 1 5
159+
2016-01-09 1 5
160+
2016-01-10 1 6
161+
2 2016-01-17 2 7
162+
2016-01-18 2 7
163+
2016-01-19 2 7
164+
2016-01-20 2 7
165+
2016-01-21 2 7
166+
2016-01-22 2 7
167+
2016-01-23 2 7
168+
2016-01-24 2 8
107169
108170
Now you can do:
109171

110-
.. ipython:: python
172+
.. code-block:: ipython
111173
112-
df.groupby("group").resample("1D").ffill()
174+
In[1]: df.groupby("group").resample("1D").ffill()
175+
Out[1]:
176+
group val
177+
group date
178+
1 2016-01-03 1 5
179+
2016-01-04 1 5
180+
2016-01-05 1 5
181+
2016-01-06 1 5
182+
2016-01-07 1 5
183+
2016-01-08 1 5
184+
2016-01-09 1 5
185+
2016-01-10 1 6
186+
2 2016-01-17 2 7
187+
2016-01-18 2 7
188+
2016-01-19 2 7
189+
2016-01-20 2 7
190+
2016-01-21 2 7
191+
2016-01-22 2 7
192+
2016-01-23 2 7
193+
2016-01-24 2 8
113194
114195
.. _whatsnew_0181.enhancements.method_chain:
115196

doc/source/whatsnew/v2.1.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ Fixed regressions
2121
- Fixed regression in :meth:`DataFrame.__setitem__` raising ``AssertionError`` when setting a :class:`Series` with a partial :class:`MultiIndex` (:issue:`54875`)
2222
- Fixed regression in :meth:`DataFrame.filter` not respecting the order of elements for ``filter`` (:issue:`54980`)
2323
- Fixed regression in :meth:`DataFrame.to_sql` not roundtripping datetime columns correctly for sqlite (:issue:`54877`)
24+
- Fixed regression in :meth:`DataFrameGroupBy.agg` when aggregating a DataFrame with duplicate column names using a dictionary (:issue:`55006`)
2425
- Fixed regression in :meth:`MultiIndex.append` raising when appending overlapping :class:`IntervalIndex` levels (:issue:`54934`)
2526
- Fixed regression in :meth:`Series.drop_duplicates` for PyArrow strings (:issue:`54904`)
2627
- Fixed regression in :meth:`Series.interpolate` raising when ``fill_value`` was given (:issue:`54920`)

doc/source/whatsnew/v2.2.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -146,12 +146,12 @@ Deprecations
146146
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_pickle` except ``path``. (:issue:`54229`)
147147
- Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_string` except ``buf``. (:issue:`54229`)
148148
- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.downcasting", True)`` (:issue:`53656`)
149+
- Deprecated including the groups in computations when using :meth:`DataFrameGroupBy.apply` and :meth:`DataFrameGroupBy.resample`; pass ``include_groups=False`` to exclude the groups (:issue:`7155`)
149150
- Deprecated not passing a tuple to :class:`DataFrameGroupBy.get_group` or :class:`SeriesGroupBy.get_group` when grouping by a length-1 list-like (:issue:`25971`)
150151
- Deprecated strings ``S``, ``U``, and ``N`` denoting units in :func:`to_timedelta` (:issue:`52536`)
151152
- Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`)
152153
- Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`52536`)
153154
- Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`)
154-
-
155155

156156
.. ---------------------------------------------------------------------------
157157
.. _whatsnew_220.performance:
@@ -227,6 +227,7 @@ MultiIndex
227227

228228
I/O
229229
^^^
230+
- Bug in :func:`read_csv` where ``on_bad_lines="warn"`` would write to ``stderr`` instead of raise a Python warning. This now yields a :class:`.errors.ParserWarning` (:issue:`54296`)
230231
- Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`)
231232

232233
Period

generate_version.py

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#!/usr/bin/env python3
2+
13
# Note: This file has to live next to setup.py or versioneer will not work
24
import argparse
35
import os

meson.build

+9-7
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,17 @@
22
project(
33
'pandas',
44
'c', 'cpp', 'cython',
5-
version: run_command(['python', 'generate_version.py', '--print'], check: true).stdout().strip(),
5+
version: run_command(['generate_version.py', '--print'], check: true).stdout().strip(),
66
license: 'BSD-3',
77
meson_version: '>=1.0.1',
88
default_options: [
99
'buildtype=release',
10-
# TODO: Reactivate werror, some warnings on Windows
11-
#'werror=true',
1210
'c_std=c99'
1311
]
1412
)
1513

1614
fs = import('fs')
17-
py = import('python').find_installation()
15+
py = import('python').find_installation(pure: false)
1816
tempita = files('generate_pxi.py')
1917
versioneer = files('generate_version.py')
2018

@@ -30,7 +28,7 @@ add_project_arguments('-DNPY_TARGET_VERSION=NPY_1_21_API_VERSION', language : 'c
3028

3129

3230
if fs.exists('_version_meson.py')
33-
py.install_sources('_version_meson.py', pure: false, subdir: 'pandas')
31+
py.install_sources('_version_meson.py', subdir: 'pandas')
3432
else
3533
custom_target('write_version_file',
3634
output: '_version_meson.py',
@@ -40,11 +38,15 @@ else
4038
build_by_default: true,
4139
build_always_stale: true,
4240
install: true,
43-
install_dir: py.get_install_dir(pure: false) / 'pandas'
41+
install_dir: py.get_install_dir() / 'pandas'
4442
)
4543
meson.add_dist_script(py, versioneer, '-o', '_version_meson.py')
4644
endif
4745

4846
# Needed by pandas.test() when it looks for the pytest ini options
49-
py.install_sources('pyproject.toml', pure: false, subdir: 'pandas')
47+
py.install_sources(
48+
'pyproject.toml',
49+
subdir: 'pandas'
50+
)
51+
5052
subdir('pandas')

pandas/_libs/meson.build

+4-3
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,9 @@ foreach ext_name, ext_dict : libs_sources
114114
)
115115
endforeach
116116

117-
py.install_sources('__init__.py',
118-
pure: false,
119-
subdir: 'pandas/_libs')
117+
py.install_sources(
118+
'__init__.py',
119+
subdir: 'pandas/_libs'
120+
)
120121

121122
subdir('window')

pandas/_libs/parsers.pyx

+9-4
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ from csv import (
66
QUOTE_NONE,
77
QUOTE_NONNUMERIC,
88
)
9-
import sys
109
import time
1110
import warnings
1211

@@ -880,9 +879,15 @@ cdef class TextReader:
880879

881880
cdef _check_tokenize_status(self, int status):
882881
if self.parser.warn_msg != NULL:
883-
print(PyUnicode_DecodeUTF8(
884-
self.parser.warn_msg, strlen(self.parser.warn_msg),
885-
self.encoding_errors), file=sys.stderr)
882+
warnings.warn(
883+
PyUnicode_DecodeUTF8(
884+
self.parser.warn_msg,
885+
strlen(self.parser.warn_msg),
886+
self.encoding_errors
887+
),
888+
ParserWarning,
889+
stacklevel=find_stack_level()
890+
)
886891
free(self.parser.warn_msg)
887892
self.parser.warn_msg = NULL
888893

pandas/_libs/tslibs/meson.build

+4-3
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ foreach ext_name, ext_dict : tslibs_sources
3131
)
3232
endforeach
3333

34-
py.install_sources('__init__.py',
35-
pure: false,
36-
subdir: 'pandas/_libs/tslibs')
34+
py.install_sources(
35+
'__init__.py',
36+
subdir: 'pandas/_libs/tslibs'
37+
)

0 commit comments

Comments
 (0)