mroeschke
diff --git a/‎.github/workflows/assign.yml
-19 b/‎.github/workflows/assign.yml
-19
diff --git a/‎.github/workflows/preview-docs.yml renamed to ‎.github/workflows/comment-commands.yml
+11-5 b/‎.github/workflows/preview-docs.yml renamed to ‎.github/workflows/comment-commands.yml
+11-5
diff --git a/‎asv_bench/benchmarks/io/csv.py
+15 b/‎asv_bench/benchmarks/io/csv.py
+15
diff --git a/‎doc/source/user_guide/cookbook.rst
+2-2 b/‎doc/source/user_guide/cookbook.rst
+2-2
diff --git a/‎doc/source/user_guide/groupby.rst
+10-4 b/‎doc/source/user_guide/groupby.rst
+10-4
diff --git a/‎doc/source/user_guide/io.rst
+12 b/‎doc/source/user_guide/io.rst
+12
diff --git a/‎doc/source/user_guide/scale.rst
+4 b/‎doc/source/user_guide/scale.rst
+4
diff --git a/‎doc/source/whatsnew/v0.14.0.rst
+18-6 b/‎doc/source/whatsnew/v0.14.0.rst
+18-6
diff --git a/‎doc/source/whatsnew/v0.18.1.rst
+87-6 b/‎doc/source/whatsnew/v0.18.1.rst
+87-6
diff --git a/‎doc/source/whatsnew/v2.0.1.rst
+9 b/‎doc/source/whatsnew/v2.0.1.rst
+9
@@ -1,19 +1,25 @@
-name: Preview docs
+name: Comment Commands
 on:
   issue_comment:
     types: created
 
 permissions:
   contents: read
+  issues: write
+  pull-requests: write
 
 jobs:
+  issue_assign:
+    runs-on: ubuntu-22.04
+    steps:
+    - if: (!github.event.issue.pull_request) && github.event.comment.body == 'take'
+      run: |
+        echo "Assigning issue ${{ github.event.issue.number }} to ${{ github.event.comment.user.login }}"
+        curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"assignees": ["${{ github.event.comment.user.login }}"]}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/assignees
   preview_docs:
-    permissions:
-      issues: write
-      pull-requests: write
     runs-on: ubuntu-22.04
     steps:
-    - if: github.event.comment.body == '/preview'
+    - if: github.event.issue.pull_request && github.event.comment.body == '/preview'
       run: |
         if curl --output /dev/null --silent --head --fail "https://pandas.pydata.org/preview/${{ github.event.issue.number }}/"; then
           curl -H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" -d '{"body": "Website preview of this PR available at: https://pandas.pydata.org/preview/${{ github.event.issue.number }}/"}' https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.issue.number }}/comments
 
@@ -555,4 +555,19 @@ def time_read_csv_index_col(self):
         read_csv(self.StringIO_input, index_col="a")
 
 
+class ReadCSVDatePyarrowEngine(StringIORewind):
+    def setup(self):
+        count_elem = 100_000
+        data = "a\n" + "2019-12-31\n" * count_elem
+        self.StringIO_input = StringIO(data)
+
+    def time_read_csv_index_col(self):
+        read_csv(
+            self.StringIO_input,
+            parse_dates=["a"],
+            engine="pyarrow",
+            dtype_backend="pyarrow",
+        )
+
+
 from ..pandas_vb_common import setup  # noqa: F401 isort:skip
@@ -459,7 +459,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
    df
 
    # List the size of the animals with the highest weight.
-   df.groupby("animal").apply(lambda subf: subf["size"][subf["weight"].idxmax()])
+   df.groupby("animal")[["size", "weight"]].apply(lambda subf: subf["size"][subf["weight"].idxmax()])
 
 `Using get_group
 <https://stackoverflow.com/questions/14734533/how-to-access-pandas-groupby-dataframe-by-key>`__
@@ -482,7 +482,7 @@ Unlike agg, apply's callable is passed a sub-DataFrame which gives you access to
        return pd.Series(["L", avg_weight, True], index=["size", "weight", "adult"])
 
 
-   expected_df = gb.apply(GrowUp)
+   expected_df = gb[["size", "weight"]].apply(GrowUp)
    expected_df
 
 `Expanding apply
 
@@ -430,6 +430,12 @@ This is mainly syntactic sugar for the alternative, which is much more verbose:
 Additionally, this method avoids recomputing the internal grouping information
 derived from the passed key.
 
+You can also include the grouping columns if you want to operate on them.
+
+.. ipython:: python
+
+   grouped[["A", "B"]].sum()
+
 .. _groupby.iterating-label:
 
 Iterating through groups
@@ -1067,7 +1073,7 @@ missing values with the ``ffill()`` method.
    ).set_index("date")
    df_re
 
-   df_re.groupby("group").resample("1D").ffill()
+   df_re.groupby("group")[["val"]].resample("1D").ffill()
 
 .. _groupby.filter:
 
@@ -1233,13 +1239,13 @@ the argument ``group_keys`` which defaults to ``True``. Compare
 
 .. ipython:: python
 
-    df.groupby("A", group_keys=True).apply(lambda x: x)
+    df.groupby("A", group_keys=True)[["B", "C", "D"]].apply(lambda x: x)
 
 with
 
 .. ipython:: python
 
-    df.groupby("A", group_keys=False).apply(lambda x: x)
+    df.groupby("A", group_keys=False)[["B", "C", "D"]].apply(lambda x: x)
 
 
 Numba Accelerated Routines
@@ -1722,7 +1728,7 @@ column index name will be used as the name of the inserted column:
        result = {"b_sum": x["b"].sum(), "c_mean": x["c"].mean()}
        return pd.Series(result, name="metrics")
 
-   result = df.groupby("a").apply(compute_metrics)
+   result = df.groupby("a")[["b", "c"]].apply(compute_metrics)
 
    result
 
 
@@ -3449,6 +3449,18 @@ Reading Excel files
 In the most basic use-case, ``read_excel`` takes a path to an Excel
 file, and the ``sheet_name`` indicating which sheet to parse.
 
+When using the ``engine_kwargs`` parameter, pandas will pass these arguments to the
+engine. For this, it is important to know which function pandas is
+using internally.
+
+* For the engine openpyxl, pandas is using :func:`openpyxl.load_workbook` to read in (``.xlsx``) and (``.xlsm``) files.
+
+* For the engine xlrd, pandas is using :func:`xlrd.open_workbook` to read in (``.xls``) files.
+
+* For the engine pyxlsb, pandas is using :func:`pyxlsb.open_workbook` to read in (``.xlsb``) files.
+
+* For the engine odf, pandas is using :func:`odf.opendocument.load` to read in (``.ods``) files.
+
 .. code-block:: python
 
    # Returns a DataFrame
 
@@ -257,6 +257,7 @@ We'll import ``dask.dataframe`` and notice that the API feels similar to pandas.
 We can use Dask's ``read_parquet`` function, but provide a globstring of files to read in.
 
 .. ipython:: python
+   :okwarning:
 
    import dask.dataframe as dd
 
@@ -286,6 +287,7 @@ column names and dtypes. That's because Dask hasn't actually read the data yet.
 Rather than executing immediately, doing operations build up a **task graph**.
 
 .. ipython:: python
+   :okwarning:
 
    ddf
    ddf["name"]
@@ -300,6 +302,7 @@ returns a Dask Series with the same dtype and the same name.
 To get the actual result you can call ``.compute()``.
 
 .. ipython:: python
+   :okwarning:
 
    %time ddf["name"].value_counts().compute()
 
@@ -345,6 +348,7 @@ known automatically. In this case, since we created the parquet files manually,
 we need to supply the divisions manually.
 
 .. ipython:: python
+   :okwarning:
 
    N = 12
    starts = [f"20{i:>02d}-01-01" for i in range(N)]
 
@@ -328,13 +328,25 @@ More consistent behavior for some groupby methods:
 
 - groupby ``head`` and ``tail`` now act more like ``filter`` rather than an aggregation:
 
-  .. ipython:: python
+  .. code-block:: ipython
 
-     df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
-     g = df.groupby('A')
-     g.head(1)  # filters DataFrame
+     In [1]: df = pd.DataFrame([[1, 2], [1, 4], [5, 6]], columns=['A', 'B'])
+
+     In [2]: g = df.groupby('A')
+
+     In [3]: g.head(1)  # filters DataFrame
+     Out[3]:
+        A  B
+     0  1  2
+     2  5  6
+
+     In [4]: g.apply(lambda x: x.head(1))  # used to simply fall-through
+     Out[4]:
+          A  B
+     A
+     1 0  1  2
+     5 2  5  6
 
-     g.apply(lambda x: x.head(1))  # used to simply fall-through
 
 - groupby head and tail respect column selection:
 
@@ -494,7 +506,7 @@ See also issues (:issue:`6134`, :issue:`4036`, :issue:`3057`, :issue:`2598`, :is
 
    You should specify all axes in the ``.loc`` specifier, meaning the indexer for the **index** and
    for the **columns**. Their are some ambiguous cases where the passed indexer could be mis-interpreted
-   as indexing *both* axes, rather than into say the MuliIndex for the rows.
+   as indexing *both* axes, rather than into say the MultiIndex for the rows.
 
    You should do this:
 
 
@@ -77,9 +77,52 @@ Previously you would have to do this to get a rolling window mean per-group:
    df = pd.DataFrame({"A": [1] * 20 + [2] * 12 + [3] * 8, "B": np.arange(40)})
    df
 
-.. ipython:: python
+.. code-block:: ipython
 
-   df.groupby("A").apply(lambda x: x.rolling(4).B.mean())
+   In [1]: df.groupby("A").apply(lambda x: x.rolling(4).B.mean())
+   Out[1]:
+   A
+   1  0      NaN
+      1      NaN
+      2      NaN
+      3      1.5
+      4      2.5
+      5      3.5
+      6      4.5
+      7      5.5
+      8      6.5
+      9      7.5
+      10     8.5
+      11     9.5
+      12    10.5
+      13    11.5
+      14    12.5
+      15    13.5
+      16    14.5
+      17    15.5
+      18    16.5
+      19    17.5
+   2  20     NaN
+      21     NaN
+      22     NaN
+      23    21.5
+      24    22.5
+      25    23.5
+      26    24.5
+      27    25.5
+      28    26.5
+      29    27.5
+      30    28.5
+      31    29.5
+   3  32     NaN
+      33     NaN
+      34     NaN
+      35    33.5
+      36    34.5
+      37    35.5
+      38    36.5
+      39    37.5
+   Name: B, dtype: float64
 
 Now you can do:
 
@@ -101,15 +144,53 @@ For ``.resample(..)`` type of operations, previously you would have to:
 
    df
 
-.. ipython:: python
+.. code-block:: ipython
 
-   df.groupby("group").apply(lambda x: x.resample("1D").ffill())
+   In[1]: df.groupby("group").apply(lambda x: x.resample("1D").ffill())
+   Out[1]:
+                     group  val
+   group date
+   1     2016-01-03      1    5
+         2016-01-04      1    5
+         2016-01-05      1    5
+         2016-01-06      1    5
+         2016-01-07      1    5
+         2016-01-08      1    5
+         2016-01-09      1    5
+         2016-01-10      1    6
+   2     2016-01-17      2    7
+         2016-01-18      2    7
+         2016-01-19      2    7
+         2016-01-20      2    7
+         2016-01-21      2    7
+         2016-01-22      2    7
+         2016-01-23      2    7
+         2016-01-24      2    8
 
 Now you can do:
 
-.. ipython:: python
+.. code-block:: ipython
 
-   df.groupby("group").resample("1D").ffill()
+   In[1]: df.groupby("group").resample("1D").ffill()
+   Out[1]:
+                     group  val
+   group date
+   1     2016-01-03      1    5
+         2016-01-04      1    5
+         2016-01-05      1    5
+         2016-01-06      1    5
+         2016-01-07      1    5
+         2016-01-08      1    5
+         2016-01-09      1    5
+         2016-01-10      1    6
+   2     2016-01-17      2    7
+         2016-01-18      2    7
+         2016-01-19      2    7
+         2016-01-20      2    7
+         2016-01-21      2    7
+         2016-01-22      2    7
+         2016-01-23      2    7
+         2016-01-24      2    8
 
 .. _whatsnew_0181.enhancements.method_chain:
 
 
@@ -15,6 +15,9 @@ Fixed regressions
 ~~~~~~~~~~~~~~~~~
 - Fixed regression for subclassed Series when constructing from a dictionary (:issue:`52445`)
 - Fixed regression in :meth:`Series.describe` showing ``RuntimeWarning`` for extension dtype :class:`Series` with one element (:issue:`52515`)
+- Fixed regression in :meth:`DataFrame.sort_values` not resetting index when :class:`DataFrame` is already sorted and ``ignore_index=True`` (:issue:`52553`)
+- Fixed regression in :meth:`MultiIndex.isin` raising ``TypeError`` for ``Generator`` (:issue:`52568`)
+- Fixed regression in :meth:`DataFrame.pivot` changing :class:`Index` name of input object (:issue:`52629`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_201.bug_fixes:
@@ -27,12 +30,18 @@ Bug fixes
 - Bug in :meth:`Series.describe` not returning :class:`ArrowDtype` with ``pyarrow.float64`` type with numeric data (:issue:`52427`)
 - Fixed segfault in :meth:`Series.to_numpy` with ``null[pyarrow]`` dtype (:issue:`52443`)
 - Bug in :func:`pandas.testing.assert_series_equal` where ``check_dtype=False`` would still raise for datetime or timedelta types with different resolutions (:issue:`52449`)
+- Bug in :meth:`DataFrame.max` and related casting different :class:`Timestamp` resolutions always to nanoseconds (:issue:`52524`)
+- Bug in :meth:`ArrowDtype.__from_arrow__` not respecting if dtype is explicitly given (:issue:`52533`)
+- Bug in :func:`read_csv` casting PyArrow datetimes to NumPy when ``dtype_backend="pyarrow"`` and ``parse_dates`` is set causing a performance bottleneck in the process (:issue:`52546`)
+- Bug in :class:`arrays.DatetimeArray` constructor returning an incorrect unit when passed a non-nanosecond numpy datetime array (:issue:`52555`)
+- Bug in :func:`to_numeric` with ``errors='coerce'`` and ``dtype_backend='pyarrow'`` with :class:`ArrowDtype` data (:issue:`52588`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_201.other:
 
 Other
 ~~~~~
+- Implemented :meth:`Series.str.split` and :meth:`Series.str.rsplit` for :class:`ArrowDtype` with ``pyarrow.string`` (:issue:`52401`)
 - :class:`DataFrame` created from empty dicts had :attr:`~DataFrame.columns`  of dtype ``object``. It is now a :class:`RangeIndex` (:issue:`52404`)
 - :class:`Series` created from empty dicts had :attr:`~Series.index`  of dtype ``object``. It is now a :class:`RangeIndex` (:issue:`52404`)