pandas-dev
diff --git a/‎.circleci/config.yml
+2-3 b/‎.circleci/config.yml
+2-3
diff --git a/‎.github/workflows/wheels.yml
+9-7 b/‎.github/workflows/wheels.yml
+9-7
diff --git a/‎doc/source/user_guide/cookbook.rst
+2-2 b/‎doc/source/user_guide/cookbook.rst
+2-2
diff --git a/‎doc/source/user_guide/groupby.rst
+10-4 b/‎doc/source/user_guide/groupby.rst
+10-4
diff --git a/‎doc/source/whatsnew/v0.14.0.rst
+16-5 b/‎doc/source/whatsnew/v0.14.0.rst
+16-5
diff --git a/‎doc/source/whatsnew/v0.18.1.rst
+87-6 b/‎doc/source/whatsnew/v0.18.1.rst
+87-6
diff --git a/‎doc/source/whatsnew/v2.1.1.rst
+1 b/‎doc/source/whatsnew/v2.1.1.rst
+1
diff --git a/‎doc/source/whatsnew/v2.2.0.rst
+2-1 b/‎doc/source/whatsnew/v2.2.0.rst
+2-1
diff --git a/‎generate_version.py
+2 b/‎generate_version.py
+2
diff --git a/‎meson.build
+9-7 b/‎meson.build
+9-7
diff --git a/‎pandas/_libs/meson.build
+4-3 b/‎pandas/_libs/meson.build
+4-3
diff --git a/‎pandas/_libs/parsers.pyx
+9-4 b/‎pandas/_libs/parsers.pyx
+9-4
diff --git a/‎pandas/_libs/tslibs/meson.build
+4-3 b/‎pandas/_libs/tslibs/meson.build
+4-3
@@ -48,7 +48,7 @@ jobs:
           name: Build aarch64 wheels
           no_output_timeout: 30m # Sometimes the tests won't generate any output, make sure the job doesn't get killed by that
           command: |
-            pip3 install cibuildwheel==2.14.1
+            pip3 install cibuildwheel==2.15.0
             cibuildwheel --prerelease-pythons --output-dir wheelhouse
           environment:
             CIBW_BUILD: << parameters.cibw-build >>
@@ -92,5 +92,4 @@ workflows:
               only: /^v.*/
           matrix:
             parameters:
-              # TODO: Enable Python 3.12 wheels when numpy releases a version that supports Python 3.12
-              cibw-build: ["cp39-manylinux_aarch64", "cp310-manylinux_aarch64", "cp311-manylinux_aarch64"]#, "cp312-manylinux_aarch64"]
+              cibw-build: ["cp39-manylinux_aarch64", "cp310-manylinux_aarch64", "cp311-manylinux_aarch64", "cp312-manylinux_aarch64"]
@@ -97,8 +97,7 @@ jobs:
         - [macos-12, macosx_*]
         - [windows-2022, win_amd64]
         # TODO: support PyPy?
-        # TODO: Enable Python 3.12 wheels when numpy releases a version that supports Python 3.12
-        python: [["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11"]]#, ["cp312", "3.12"]]
+        python: [["cp39", "3.9"], ["cp310", "3.10"], ["cp311", "3.11"], ["cp312", "3.12"]]
     env:
       IS_PUSH: ${{ github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') }}
       IS_SCHEDULE_DISPATCH: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
@@ -150,8 +149,10 @@ jobs:
         uses: mamba-org/setup-micromamba@v1
         with:
           environment-name: wheel-env
+          # Use a fixed Python, since we might have an unreleased Python not
+          # yet present on conda-forge
           create-args: >-
-            python=${{ matrix.python[1] }}
+            python=3.11
             anaconda-client
             wheel
           cache-downloads: true
@@ -167,12 +168,13 @@ jobs:
         shell: pwsh
         run: |
           $TST_CMD = @"
-          python -m pip install pytz six numpy python-dateutil tzdata>=2022.1 hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17;
-          python -m pip install --find-links=pandas\wheelhouse --no-index pandas;
+          python -m pip install hypothesis>=6.46.1 pytest>=7.3.2 pytest-xdist>=2.2.0 pytest-asyncio>=0.17;
+          python -m pip install `$(Get-Item pandas\wheelhouse\*.whl);
           python -c `'import pandas as pd; pd.test(extra_args=[\"`\"--no-strict-data-files`\"\", \"`\"-m not clipboard and not single_cpu and not slow and not network and not db`\"\"])`';
           "@
-          docker pull python:${{ matrix.python[1] }}-windowsservercore
-          docker run --env PANDAS_CI='1' -v ${PWD}:C:\pandas python:${{ matrix.python[1] }}-windowsservercore powershell -Command $TST_CMD
+          # add rc to the end of the image name if the Python version is unreleased
+          docker pull python:${{ matrix.python[1] == '3.12' && '3.12-rc' || format('{0}-windowsservercore', matrix.python[1]) }}
+          docker run --env PANDAS_CI='1' -v ${PWD}:C:\pandas python:${{ matrix.python[1] == '3.12' && '3.12-rc' || format('{0}-windowsservercore', matrix.python[1]) }} powershell -Command $TST_CMD
 
       - uses: actions/upload-artifact@v3
         with:
 
@@ -459,7 +459,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
    df
 
    # List the size of the animals with the highest weight.
-   df.groupby("animal").apply(lambda subf: subf["size"][subf["weight"].idxmax()])
+   df.groupby("animal").apply(lambda subf: subf["size"][subf["weight"].idxmax()], include_groups=False)
 
 `Using get_group
 <https://stackoverflow.com/questions/14734533/how-to-access-pandas-groupby-dataframe-by-key>`__
@@ -482,7 +482,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
        return pd.Series(["L", avg_weight, True], index=["size", "weight", "adult"])
 
 
-   expected_df = gb.apply(GrowUp)
+   expected_df = gb.apply(GrowUp, include_groups=False)
    expected_df
 
 `Expanding apply
 
@@ -420,6 +420,12 @@ This is mainly syntactic sugar for the alternative, which is much more verbose:
 Additionally, this method avoids recomputing the internal grouping information
 derived from the passed key.
 
+You can also include the grouping columns if you want to operate on them.
+
+.. ipython:: python
+
+   grouped[["A", "B"]].sum()
+
 .. _groupby.iterating-label:
 
 Iterating through groups
@@ -1053,7 +1059,7 @@ missing values with the ``ffill()`` method.
    ).set_index("date")
    df_re
 
-   df_re.groupby("group").resample("1D").ffill()
+   df_re.groupby("group").resample("1D", include_groups=False).ffill()
 
 .. _groupby.filter:
 
@@ -1219,13 +1225,13 @@ the argument ``group_keys`` which defaults to ``True``. Compare
 
 .. ipython:: python
 
-    df.groupby("A", group_keys=True).apply(lambda x: x)
+    df.groupby("A", group_keys=True).apply(lambda x: x, include_groups=False)
 
 with
 
 .. ipython:: python
 
-    df.groupby("A", group_keys=False).apply(lambda x: x)
+    df.groupby("A", group_keys=False).apply(lambda x: x, include_groups=False)
 
 
 Numba Accelerated Routines
@@ -1709,7 +1715,7 @@ column index name will be used as the name of the inserted column:
        result = {"b_sum": x["b"].sum(), "c_mean": x["c"].mean()}
        return pd.Series(result, name="metrics")
 
-   result = df.groupby("a").apply(compute_metrics)
+   result = df.groupby("a").apply(compute_metrics, include_groups=False)
 
    result
 
 
@@ -328,13 +328,24 @@ More consistent behavior for some groupby methods:
 
 - groupby ``head`` and ``tail`` now act more like ``filter`` rather than an aggregation:
 
-  .. ipython:: python
+  .. code-block:: ipython
 
-     df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
-     g = df.groupby('A')
-     g.head(1)  # filters DataFrame
+     In [1]: df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
 
-     g.apply(lambda x: x.head(1))  # used to simply fall-through
+     In [2]: g = df.groupby('A')
+
+     In [3]: g.head(1)  # filters DataFrame
+     Out[3]:
+        A  B
+     0  1  2
+     2  5  6
+
+     In [4]: g.apply(lambda x: x.head(1))  # used to simply fall-through
+     Out[4]:
+          A  B
+     A
+     1 0  1  2
+     5 2  5  6
 
 - groupby head and tail respect column selection:
 
 
@@ -77,9 +77,52 @@ Previously you would have to do this to get a rolling window mean per-group:
    df = pd.DataFrame({"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)})
    df
 
-.. ipython:: python
+.. code-block:: ipython
 
-   df.groupby("A").apply(lambda x: x.rolling(4).B.mean())
+   In [1]: df.groupby("A").apply(lambda x: x.rolling(4).B.mean())
+   Out[1]:
+   A
+   1  0      NaN
+      1      NaN
+      2      NaN
+      3      1.5
+      4      2.5
+      5      3.5
+      6      4.5
+      7      5.5
+      8      6.5
+      9      7.5
+      10     8.5
+      11     9.5
+      12    10.5
+      13    11.5
+      14    12.5
+      15    13.5
+      16    14.5
+      17    15.5
+      18    16.5
+      19    17.5
+   2  20     NaN
+      21     NaN
+      22     NaN
+      23    21.5
+      24    22.5
+      25    23.5
+      26    24.5
+      27    25.5
+      28    26.5
+      29    27.5
+      30    28.5
+      31    29.5
+   3  32     NaN
+      33     NaN
+      34     NaN
+      35    33.5
+      36    34.5
+      37    35.5
+      38    36.5
+      39    37.5
+   Name: B, dtype: float64
 
 Now you can do:
 
@@ -101,15 +144,53 @@ For ``.resample(..)`` type of operations, previously you would have to:
 
    df
 
-.. ipython:: python
+.. code-block:: ipython
 
-   df.groupby("group").apply(lambda x: x.resample("1D").ffill())
+   In[1]: df.groupby("group").apply(lambda x: x.resample("1D").ffill())
+   Out[1]:
+                     group  val
+   group date
+   1     2016-01-03      1    5
+         2016-01-04      1    5
+         2016-01-05      1    5
+         2016-01-06      1    5
+         2016-01-07      1    5
+         2016-01-08      1    5
+         2016-01-09      1    5
+         2016-01-10      1    6
+   2     2016-01-17      2    7
+         2016-01-18      2    7
+         2016-01-19      2    7
+         2016-01-20      2    7
+         2016-01-21      2    7
+         2016-01-22      2    7
+         2016-01-23      2    7
+         2016-01-24      2    8
 
 Now you can do:
 
-.. ipython:: python
+.. code-block:: ipython
 
-   df.groupby("group").resample("1D").ffill()
+   In[1]: df.groupby("group").resample("1D").ffill()
+   Out[1]:
+                     group  val
+   group date
+   1     2016-01-03      1    5
+         2016-01-04      1    5
+         2016-01-05      1    5
+         2016-01-06      1    5
+         2016-01-07      1    5
+         2016-01-08      1    5
+         2016-01-09      1    5
+         2016-01-10      1    6
+   2     2016-01-17      2    7
+         2016-01-18      2    7
+         2016-01-19      2    7
+         2016-01-20      2    7
+         2016-01-21      2    7
+         2016-01-22      2    7
+         2016-01-23      2    7
+         2016-01-24      2    8
 
 .. _whatsnew_0181.enhancements.method_chain:
 
 
@@ -21,6 +21,7 @@ Fixed regressions
 - Fixed regression in :meth:`DataFrame.__setitem__` raising ``AssertionError`` when setting a :class:`Series` with a partial :class:`MultiIndex` (:issue:`54875`)
 - Fixed regression in :meth:`DataFrame.filter` not respecting the order of elements for ``filter`` (:issue:`54980`)
 - Fixed regression in :meth:`DataFrame.to_sql` not roundtripping datetime columns correctly for sqlite (:issue:`54877`)
+- Fixed regression in :meth:`DataFrameGroupBy.agg` when aggregating a DataFrame with duplicate column names using a dictionary (:issue:`55006`)
 - Fixed regression in :meth:`MultiIndex.append` raising when appending overlapping :class:`IntervalIndex` levels (:issue:`54934`)
 - Fixed regression in :meth:`Series.drop_duplicates` for PyArrow strings (:issue:`54904`)
 - Fixed regression in :meth:`Series.interpolate` raising when ``fill_value`` was given (:issue:`54920`)
 
@@ -146,12 +146,12 @@ Deprecations
 - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_pickle` except ``path``. (:issue:`54229`)
 - Deprecated allowing non-keyword arguments in :meth:`DataFrame.to_string` except ``buf``. (:issue:`54229`)
 - Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.downcasting", True)`` (:issue:`53656`)
+- Deprecated including the groups in computations when using :meth:`DataFrameGroupBy.apply` and :meth:`DataFrameGroupBy.resample`; pass ``include_groups=False`` to exclude the groups (:issue:`7155`)
 - Deprecated not passing a tuple to :class:`DataFrameGroupBy.get_group` or :class:`SeriesGroupBy.get_group` when grouping by a length-1 list-like (:issue:`25971`)
 - Deprecated strings ``S``, ``U``, and ``N`` denoting units in :func:`to_timedelta` (:issue:`52536`)
 - Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Second`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`52536`)
 - Deprecated strings ``T``, ``S``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`52536`)
 - Deprecated the extension test classes ``BaseNoReduceTests``, ``BaseBooleanReduceTests``, and ``BaseNumericReduceTests``, use ``BaseReduceTests`` instead (:issue:`54663`)
--
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_220.performance:
@@ -227,6 +227,7 @@ MultiIndex
 
 I/O
 ^^^
+- Bug in :func:`read_csv` where ``on_bad_lines="warn"`` would write to ``stderr`` instead of raise a Python warning. This now yields a :class:`.errors.ParserWarning` (:issue:`54296`)
 - Bug in :func:`read_excel`, with ``engine="xlrd"`` (``xls`` files) erroring when file contains NaNs/Infs (:issue:`54564`)
 
 Period
 
@@ -1,3 +1,5 @@
+#!/usr/bin/env python3
+
 # Note: This file has to live next to setup.py or versioneer will not work
 import argparse
 import os
 
@@ -2,19 +2,17 @@
 project(
     'pandas',
     'c', 'cpp', 'cython',
-    version: run_command(['python', 'generate_version.py', '--print'], check: true).stdout().strip(),
+    version: run_command(['generate_version.py', '--print'], check: true).stdout().strip(),
     license: 'BSD-3',
     meson_version: '>=1.0.1',
     default_options: [
         'buildtype=release',
-        # TODO: Reactivate werror, some warnings on Windows
-        #'werror=true',
         'c_std=c99'
     ]
 )
 
 fs = import('fs')
-py = import('python').find_installation()
+py = import('python').find_installation(pure: false)
 tempita = files('generate_pxi.py')
 versioneer = files('generate_version.py')
 
@@ -30,7 +28,7 @@ add_project_arguments('-DNPY_TARGET_VERSION=NPY_1_21_API_VERSION', language : 'c
 
 
 if fs.exists('_version_meson.py')
-    py.install_sources('_version_meson.py', pure: false, subdir: 'pandas')
+    py.install_sources('_version_meson.py', subdir: 'pandas')
 else
     custom_target('write_version_file',
         output: '_version_meson.py',
@@ -40,11 +38,15 @@ else
         build_by_default: true,
         build_always_stale: true,
         install: true,
-        install_dir: py.get_install_dir(pure: false) / 'pandas'
+        install_dir: py.get_install_dir() / 'pandas'
     )
     meson.add_dist_script(py, versioneer, '-o', '_version_meson.py')
 endif
 
 # Needed by pandas.test() when it looks for the pytest ini options
-py.install_sources('pyproject.toml', pure: false, subdir: 'pandas')
+py.install_sources(
+    'pyproject.toml',
+    subdir: 'pandas'
+)
+
 subdir('pandas')
@@ -114,8 +114,9 @@ foreach ext_name, ext_dict : libs_sources
     )
 endforeach
 
-py.install_sources('__init__.py',
-                    pure: false,
-                    subdir: 'pandas/_libs')
+py.install_sources(
+    '__init__.py',
+    subdir: 'pandas/_libs'
+)
 
 subdir('window')
@@ -6,7 +6,6 @@ from csv import (
     QUOTE_NONE,
     QUOTE_NONNUMERIC,
 )
-import sys
 import time
 import warnings
 
@@ -880,9 +879,15 @@ cdef class TextReader:
 
     cdef _check_tokenize_status(self, int status):
         if self.parser.warn_msg != NULL:
-            print(PyUnicode_DecodeUTF8(
-                self.parser.warn_msg, strlen(self.parser.warn_msg),
-                self.encoding_errors), file=sys.stderr)
+            warnings.warn(
+                PyUnicode_DecodeUTF8(
+                    self.parser.warn_msg,
+                    strlen(self.parser.warn_msg),
+                    self.encoding_errors
+                ),
+                ParserWarning,
+                stacklevel=find_stack_level()
+            )
             free(self.parser.warn_msg)
             self.parser.warn_msg = NULL
 
 
@@ -31,6 +31,7 @@ foreach ext_name, ext_dict : tslibs_sources
     )
 endforeach
 
-py.install_sources('__init__.py',
-                    pure: false,
-                    subdir: 'pandas/_libs/tslibs')
+py.install_sources(
+    '__init__.py',
+    subdir: 'pandas/_libs/tslibs'
+)
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,5 @@`
	`1`	`+#!/usr/bin/env python3`
	`2`	`+`
`1`	`3`	`# Note: This file has to live next to setup.py or versioneer will not work`
`2`	`4`	`import argparse`
`3`	`5`	`import os`