From b4b461e0c69e420b0b583bef15f4d740b276693e Mon Sep 17 00:00:00 2001
From: David Leon <davidleon123@gmail.com>
Date: Tue, 20 Dec 2022 19:02:33 +0100
Subject: [PATCH 1/3] added copy on write for droplevel

---
 pandas/core/generic.py                 |  3 ++-
 pandas/tests/copy_view/test_methods.py | 17 +++++++++++++++++
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 2de83bb7a4468..084ebc04694db 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -740,7 +740,8 @@ def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool_t, copy: bool_t):
         else:
             # With copy=False, we create a new object but don't copy the
             #  underlying data.
-            obj = self.copy(deep=copy)
+            if copy:
+                obj = self.copy(deep=None)
             setattr(obj, obj._get_axis_name(axis), labels)
             return obj
 
diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
index bf65f153b10dd..42fdc2d8353c2 100644
--- a/pandas/tests/copy_view/test_methods.py
+++ b/pandas/tests/copy_view/test_methods.py
@@ -280,3 +280,20 @@ def test_head_tail(method, using_copy_on_write):
         # without CoW enabled, head and tail return views. Mutating df2 also mutates df.
         df2.iloc[0, 0] = 1
     tm.assert_frame_equal(df, df_orig)
+
+def test_droplevel(using_copy_on_write):
+    # GH 49473
+    df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}).set_index(["a","b"])   
+    df_orig = df.copy()
+    df2 = df.droplevel(0) 
+    
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
+    else:
+        assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
+
+    # mutating df2 triggers a copy-on-write for that column / block
+    df2.loc["b","c"] = 1
+    
+    assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
+    tm.assert_frame_equal(df, df_orig)

From 9fbc21b3ffa96cf0e112c86eb80a35046f558edc Mon Sep 17 00:00:00 2001
From: David Leon <davidleon123@gmail.com>
Date: Mon, 2 Jan 2023 16:33:49 +0100
Subject: [PATCH 2/3] fixed copy on write for droplevel

---
 .pre-commit-config.yaml                       |  42 +++-
 pandas/_libs/tslibs/strptime.pxd              |   4 +
 .../core/array_algos/masked_accumulations.py  |  92 ++++++++
 pandas/core/generic.py                        | 140 +++++-------
 pandas/core/methods/to_dict.py                | 202 ++++++++++++++++++
 pandas/tests/copy_view/test_methods.py        | 178 ++++++++++++++-
 pandas/tests/copy_view/test_util.py           |  14 ++
 pandas/tests/extension/base/accumulate.py     |  37 ++++
 pandas/tests/groupby/test_raises.py           | 178 +++++++++++++++
 scripts/check_test_naming.py                  | 152 +++++++++++++
 scripts/tests/test_check_test_naming.py       |  54 +++++
 11 files changed, 1004 insertions(+), 89 deletions(-)
 create mode 100644 pandas/_libs/tslibs/strptime.pxd
 create mode 100644 pandas/core/array_algos/masked_accumulations.py
 create mode 100644 pandas/core/methods/to_dict.py
 create mode 100644 pandas/tests/copy_view/test_util.py
 create mode 100644 pandas/tests/extension/base/accumulate.py
 create mode 100644 pandas/tests/groupby/test_raises.py
 create mode 100644 scripts/check_test_naming.py
 create mode 100644 scripts/tests/test_check_test_naming.py

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0779f9c95f7b4..f3158e64df8dd 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,7 +1,17 @@
 minimum_pre_commit_version: 2.15.0
 exclude: ^LICENSES/|\.(html|csv|svg)$
-# reserve "manual" for mypy and pyright
-default_stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg, post-checkout, post-commit, post-merge, post-rewrite]
+# reserve "manual" for relatively slow hooks which we still want to run in CI
+default_stages: [
+    commit,
+    merge-commit,
+    push,
+    prepare-commit-msg,
+    commit-msg,
+    post-checkout,
+    post-commit,
+    post-merge,
+    post-rewrite
+]
 ci:
     autofix_prs: false
 repos:
@@ -27,15 +37,18 @@ repos:
     rev: v0.9.1
     hooks:
     -   id: cython-lint
+    -   id: double-quote-cython-strings
 -   repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v4.4.0
     hooks:
     -   id: debug-statements
     -   id: end-of-file-fixer
         exclude: \.txt$
-        stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg, post-checkout, post-commit, post-merge, post-rewrite]
+        stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg,
+                 post-checkout, post-commit, post-merge, post-rewrite]
     -   id: trailing-whitespace
-        stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg, post-checkout, post-commit, post-merge, post-rewrite]
+        stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg,
+                 post-checkout, post-commit, post-merge, post-rewrite]
 -   repo: https://github.com/cpplint/cpplint
     rev: 1.6.1
     hooks:
@@ -45,7 +58,14 @@ repos:
         # this particular codebase (e.g. src/headers, src/klib). However,
         # we can lint all header files since they aren't "generated" like C files are.
         exclude: ^pandas/_libs/src/(klib|headers)/
-        args: [--quiet, '--extensions=c,h', '--headers=h', --recursive, '--filter=-readability/casting,-runtime/int,-build/include_subdir']
+        args: [
+            --quiet,
+            '--extensions=c,h',
+            '--headers=h',
+            --recursive,
+            --linelength=88,
+            '--filter=-readability/casting,-runtime/int,-build/include_subdir,-readability/fn_size'
+        ]
 -   repo: https://github.com/PyCQA/flake8
     rev: 6.0.0
     hooks:
@@ -106,6 +126,7 @@ repos:
     hooks:
     -   id: yesqa
         additional_dependencies: *flake8_dependencies
+        stages: [manual]
 -   repo: local
     hooks:
     # NOTE: we make `black` a local hook because if it's installed from
@@ -213,7 +234,6 @@ repos:
         exclude: ^pandas/tests/extension/base/base\.py
     -   id: pip-to-conda
         name: Generate pip dependency from conda
-        description: This hook checks if the conda environment.yml and requirements-dev.txt are equal
         language: python
         entry: python scripts/generate_pip_deps_from_conda.py
         files: ^(environment.yml|requirements-dev.txt)$
@@ -310,6 +330,16 @@ repos:
         files: ^pandas
         exclude: ^(pandas/tests|pandas/_version.py|pandas/io/clipboard)
         language: python
+        stages: [manual]
         additional_dependencies:
         - autotyping==22.9.0
         - libcst==0.4.7
+    -   id: check-test-naming
+        name: check that test names start with 'test'
+        entry: python -m scripts.check_test_naming
+        types: [python]
+        files: ^pandas/tests
+        language: python
+        exclude: |
+            (?x)
+            ^pandas/tests/generic/test_generic.py  # GH50380
diff --git a/pandas/_libs/tslibs/strptime.pxd b/pandas/_libs/tslibs/strptime.pxd
new file mode 100644
index 0000000000000..175195d4362e4
--- /dev/null
+++ b/pandas/_libs/tslibs/strptime.pxd
@@ -0,0 +1,4 @@
+from numpy cimport int64_t
+
+
+cdef bint parse_today_now(str val, int64_t* iresult, bint utc)
diff --git a/pandas/core/array_algos/masked_accumulations.py b/pandas/core/array_algos/masked_accumulations.py
new file mode 100644
index 0000000000000..07113128e0947
--- /dev/null
+++ b/pandas/core/array_algos/masked_accumulations.py
@@ -0,0 +1,92 @@
+"""
+masked_accumulations.py is for accumulation algorithms using a mask-based approach
+for missing values.
+"""
+
+from __future__ import annotations
+
+from typing import Callable
+
+import numpy as np
+
+from pandas._typing import npt
+
+from pandas.core.dtypes.common import (
+    is_bool_dtype,
+    is_float_dtype,
+    is_integer_dtype,
+)
+
+
+def _cum_func(
+    func: Callable,
+    values: np.ndarray,
+    mask: npt.NDArray[np.bool_],
+    *,
+    skipna: bool = True,
+):
+    """
+    Accumulations for 1D masked array.
+
+    We will modify values in place to replace NAs with the appropriate fill value.
+
+    Parameters
+    ----------
+    func : np.cumsum, np.cumprod, np.maximum.accumulate, np.minimum.accumulate
+    values : np.ndarray
+        Numpy array with the values (can be of any dtype that support the
+        operation).
+    mask : np.ndarray
+        Boolean numpy array (True values indicate missing values).
+    skipna : bool, default True
+        Whether to skip NA.
+    """
+    dtype_info: np.iinfo | np.finfo
+    if is_float_dtype(values):
+        dtype_info = np.finfo(values.dtype.type)
+    elif is_integer_dtype(values):
+        dtype_info = np.iinfo(values.dtype.type)
+    elif is_bool_dtype(values):
+        # Max value of bool is 1, but since we are setting into a boolean
+        # array, 255 is fine as well. Min value has to be 0 when setting
+        # into the boolean array.
+        dtype_info = np.iinfo(np.uint8)
+    else:
+        raise NotImplementedError(
+            f"No masked accumulation defined for dtype {values.dtype.type}"
+        )
+    try:
+        fill_value = {
+            np.cumprod: 1,
+            np.maximum.accumulate: dtype_info.min,
+            np.cumsum: 0,
+            np.minimum.accumulate: dtype_info.max,
+        }[func]
+    except KeyError:
+        raise NotImplementedError(
+            f"No accumulation for {func} implemented on BaseMaskedArray"
+        )
+
+    values[mask] = fill_value
+
+    if not skipna:
+        mask = np.maximum.accumulate(mask)
+
+    values = func(values)
+    return values, mask
+
+
+def cumsum(values: np.ndarray, mask: npt.NDArray[np.bool_], *, skipna: bool = True):
+    return _cum_func(np.cumsum, values, mask, skipna=skipna)
+
+
+def cumprod(values: np.ndarray, mask: npt.NDArray[np.bool_], *, skipna: bool = True):
+    return _cum_func(np.cumprod, values, mask, skipna=skipna)
+
+
+def cummin(values: np.ndarray, mask: npt.NDArray[np.bool_], *, skipna: bool = True):
+    return _cum_func(np.minimum.accumulate, values, mask, skipna=skipna)
+
+
+def cummax(values: np.ndarray, mask: npt.NDArray[np.bool_], *, skipna: bool = True):
+    return _cum_func(np.maximum.accumulate, values, mask, skipna=skipna)
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 084ebc04694db..ad3dac7ba9821 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -700,7 +700,7 @@ def set_axis(
         labels,
         *,
         axis: Axis = 0,
-        copy: bool_t = True,
+        copy: bool_t | None = None,
     ) -> NDFrameT:
         """
         Assign desired index to given axis.
@@ -724,7 +724,7 @@ def set_axis(
 
         Returns
         -------
-        renamed : %(klass)s
+        %(klass)s
             An object of type %(klass)s.
 
         See Also
@@ -734,14 +734,15 @@ def set_axis(
         return self._set_axis_nocheck(labels, axis, inplace=False, copy=copy)
 
     @final
-    def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool_t, copy: bool_t):
+    def _set_axis_nocheck(
+        self, labels, axis: Axis, inplace: bool_t, copy: bool_t | None
+    ):
         if inplace:
             setattr(self, self._get_axis_name(axis), labels)
         else:
             # With copy=False, we create a new object but don't copy the
             #  underlying data.
-            if copy:
-                obj = self.copy(deep=None)
+            obj = self.copy(deep=copy)
             setattr(obj, obj._get_axis_name(axis), labels)
             return obj
 
@@ -764,7 +765,7 @@ def swapaxes(
 
         Returns
         -------
-        y : same as input
+        same as input
         """
         i = self._get_axis_number(axis1)
         j = self._get_axis_number(axis2)
@@ -849,7 +850,7 @@ def droplevel(self: NDFrameT, level: IndexLabel, axis: Axis = 0) -> NDFrameT:
         """
         labels = self._get_axis(axis)
         new_labels = labels.droplevel(level)
-        return self.set_axis(new_labels, axis=axis)
+        return self.set_axis(new_labels, axis=axis, copy=None)
 
     def pop(self, item: Hashable) -> Series | Any:
         result = self[item]
@@ -1759,9 +1760,6 @@ def _get_label_or_level_values(self, key: Level, axis: AxisInt = 0) -> ArrayLike
             if `key` matches neither a label nor a level
         ValueError
             if `key` matches multiple labels
-        FutureWarning
-            if `key` is ambiguous. This will become an ambiguity error in a
-            future version
         """
         axis = self._get_axis_number(axis)
         other_axes = [ax for ax in range(self._AXIS_LEN) if ax != axis]
@@ -3713,7 +3711,7 @@ def take(self: NDFrameT, indices, axis: Axis = 0, **kwargs) -> NDFrameT:
 
         Returns
         -------
-        taken : same type as caller
+        same type as caller
             An array-like containing the elements taken from the object.
 
         See Also
@@ -4165,7 +4163,7 @@ def get(self, key, default=None):
 
         Returns
         -------
-        value : same type as items contained in object
+        same type as items contained in object
 
         Examples
         --------
@@ -4222,7 +4220,7 @@ def reindex_like(
         self: NDFrameT,
         other,
         method: Literal["backfill", "bfill", "pad", "ffill", "nearest"] | None = None,
-        copy: bool_t = True,
+        copy: bool_t | None = None,
         limit=None,
         tolerance=None,
     ) -> NDFrameT:
@@ -5258,7 +5256,7 @@ def _reindex_with_indexers(
         self: NDFrameT,
         reindexers,
         fill_value=None,
-        copy: bool_t = False,
+        copy: bool_t | None = False,
         allow_dups: bool_t = False,
     ) -> NDFrameT:
         """allow_dups indicates an internal call here"""
@@ -5287,8 +5285,8 @@ def _reindex_with_indexers(
             # If we've made a copy once, no need to make another one
             copy = False
 
-        if copy and new_data is self._mgr:
-            new_data = new_data.copy()
+        if (copy or copy is None) and new_data is self._mgr:
+            new_data = new_data.copy(deep=copy)
 
         return self._constructor(new_data).__finalize__(self)
 
@@ -5731,7 +5729,7 @@ def pipe(
 
         Returns
         -------
-        object : the return type of ``func``.
+        the return type of ``func``.
 
         See Also
         --------
@@ -6025,7 +6023,7 @@ def astype(
 
         Returns
         -------
-        casted : same type as caller
+        same type as caller
 
         See Also
         --------
@@ -6036,11 +6034,11 @@ def astype(
 
         Notes
         -----
-        .. deprecated:: 1.3.0
+        .. versionchanged:: 2.0.0
 
             Using ``astype`` to convert from timezone-naive dtype to
-            timezone-aware dtype is deprecated and will raise in a
-            future version.  Use :meth:`Series.dt.tz_localize` instead.
+            timezone-aware dtype will raise an exception.
+            Use :meth:`Series.dt.tz_localize` instead.
 
         Examples
         --------
@@ -6211,7 +6209,7 @@ def copy(self: NDFrameT, deep: bool_t | None = True) -> NDFrameT:
 
         Returns
         -------
-        copy : Series or DataFrame
+        Series or DataFrame
             Object type matches caller.
 
         Notes
@@ -6317,43 +6315,7 @@ def __deepcopy__(self: NDFrameT, memo=None) -> NDFrameT:
         return self.copy(deep=True)
 
     @final
-    def _convert(
-        self: NDFrameT,
-        datetime: bool_t = False,
-        numeric: bool_t = False,
-        timedelta: bool_t = False,
-    ) -> NDFrameT:
-        """
-        Attempt to infer better dtype for object columns.
-
-        Parameters
-        ----------
-        datetime : bool, default False
-            If True, convert to date where possible.
-        numeric : bool, default False
-            If True, attempt to convert to numbers (including strings), with
-            unconvertible values becoming NaN.
-        timedelta : bool, default False
-            If True, convert to timedelta where possible.
-
-        Returns
-        -------
-        converted : same as input object
-        """
-        validate_bool_kwarg(datetime, "datetime")
-        validate_bool_kwarg(numeric, "numeric")
-        validate_bool_kwarg(timedelta, "timedelta")
-        return self._constructor(
-            self._mgr.convert(
-                datetime=datetime,
-                numeric=numeric,
-                timedelta=timedelta,
-                copy=True,
-            )
-        ).__finalize__(self)
-
-    @final
-    def infer_objects(self: NDFrameT) -> NDFrameT:
+    def infer_objects(self: NDFrameT, copy: bool_t = True) -> NDFrameT:
         """
         Attempt to infer better dtypes for object columns.
 
@@ -6362,9 +6324,15 @@ def infer_objects(self: NDFrameT) -> NDFrameT:
         columns unchanged. The inference rules are the
         same as during normal Series/DataFrame construction.
 
+        Parameters
+        ----------
+        copy : bool, default True
+            Whether to make a copy for non-object or non-inferrable columns
+            or Series.
+
         Returns
         -------
-        converted : same type as input object
+        same type as input object
 
         See Also
         --------
@@ -6391,12 +6359,8 @@ def infer_objects(self: NDFrameT) -> NDFrameT:
         A    int64
         dtype: object
         """
-        # numeric=False necessary to only soft convert;
-        # python objects will still be converted to
-        # native numpy numeric types
-        return self._constructor(
-            self._mgr.convert(datetime=True, numeric=False, timedelta=True, copy=True)
-        ).__finalize__(self, method="infer_objects")
+        new_mgr = self._mgr.convert(copy=copy)
+        return self._constructor(new_mgr).__finalize__(self, method="infer_objects")
 
     @final
     def convert_dtypes(
@@ -6446,7 +6410,7 @@ def convert_dtypes(
         By default, ``convert_dtypes`` will attempt to convert a Series (or each
         Series in a DataFrame) to dtypes that support ``pd.NA``. By using the options
         ``convert_string``, ``convert_integer``, ``convert_boolean`` and
-        ``convert_boolean``, it is possible to turn off individual conversions
+        ``convert_floating``, it is possible to turn off individual conversions
         to ``StringDtype``, the integer extension types, ``BooleanDtype``
         or floating extension types, respectively.
 
@@ -6468,6 +6432,13 @@ def convert_dtypes(
         In the future, as new dtypes are added that support ``pd.NA``, the results
         of this method will change to support those new dtypes.
 
+        .. versionadded:: 2.0
+            The nullable dtype implementation can be configured by calling
+            ``pd.set_option("mode.dtype_backend", "pandas")`` to use
+            numpy-backed nullable dtypes or
+            ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
+            pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
+
         Examples
         --------
         >>> df = pd.DataFrame(
@@ -6508,12 +6479,12 @@ def convert_dtypes(
         2  3  z   <NA>  <NA>    20  200.0
 
         >>> dfn.dtypes
-        a      Int32
-        b     string
-        c    boolean
-        d     string
-        e      Int64
-        f    Float64
+        a             Int32
+        b    string[python]
+        c           boolean
+        d    string[python]
+        e             Int64
+        f           Float64
         dtype: object
 
         Start with a Series of strings and missing data represented by ``np.nan``.
@@ -6608,6 +6579,7 @@ def fillna(
     def fillna(
         self: NDFrameT,
         value: Hashable | Mapping | Series | DataFrame = None,
+        *,
         method: FillnaOptions | None = None,
         axis: Axis | None = None,
         inplace: bool_t = False,
@@ -9088,7 +9060,7 @@ def align(
         join: AlignJoin = "outer",
         axis: Axis | None = None,
         level: Level = None,
-        copy: bool_t = True,
+        copy: bool_t | None = None,
         fill_value: Hashable = None,
         method: FillnaOptions | None = None,
         limit: int | None = None,
@@ -9281,7 +9253,7 @@ def _align_frame(
         join: AlignJoin = "outer",
         axis: Axis | None = None,
         level=None,
-        copy: bool_t = True,
+        copy: bool_t | None = None,
         fill_value=None,
         method=None,
         limit=None,
@@ -9345,7 +9317,7 @@ def _align_series(
         join: AlignJoin = "outer",
         axis: Axis | None = None,
         level=None,
-        copy: bool_t = True,
+        copy: bool_t | None = None,
         fill_value=None,
         method=None,
         limit=None,
@@ -9374,7 +9346,7 @@ def _align_series(
             if is_series:
                 left = self._reindex_indexer(join_index, lidx, copy)
             elif lidx is None or join_index is None:
-                left = self.copy() if copy else self
+                left = self.copy(deep=copy) if copy or copy is None else self
             else:
                 left = self._constructor(
                     self._mgr.reindex_indexer(join_index, lidx, axis=1, copy=copy)
@@ -9403,7 +9375,7 @@ def _align_series(
             left = self._constructor(fdata)
 
             if ridx is None:
-                right = other
+                right = other.copy(deep=copy) if copy or copy is None else other
             else:
                 right = other.reindex(join_index, level=level)
 
@@ -10665,7 +10637,7 @@ def pct_change(
 
         Returns
         -------
-        chg : Series or DataFrame
+        Series or DataFrame
             The same type as the calling object.
 
         See Also
@@ -10863,7 +10835,11 @@ def _accum_func(
         def block_accum_func(blk_values):
             values = blk_values.T if hasattr(blk_values, "T") else blk_values
 
-            result = nanops.na_accum_func(values, func, skipna=skipna)
+            result: np.ndarray | ExtensionArray
+            if isinstance(values, ExtensionArray):
+                result = values._accumulate(name, skipna=skipna, **kwargs)
+            else:
+                result = nanops.na_accum_func(values, func, skipna=skipna)
 
             result = result.T if hasattr(result, "T") else result
             return result
@@ -11667,7 +11643,7 @@ def _find_valid_index(self, *, how: str) -> Hashable | None:
         -------
         idx_first_valid : type of index
         """
-        idxpos = find_valid_index(self._values, how=how)
+        idxpos = find_valid_index(self._values, how=how, is_valid=~isna(self._values))
         if idxpos is None:
             return None
         return self.index[idxpos]
@@ -11680,7 +11656,7 @@ def first_valid_index(self) -> Hashable | None:
 
         Returns
         -------
-        scalar : type of index
+        type of index
 
         Notes
         -----
diff --git a/pandas/core/methods/to_dict.py b/pandas/core/methods/to_dict.py
new file mode 100644
index 0000000000000..19f4e5c23785b
--- /dev/null
+++ b/pandas/core/methods/to_dict.py
@@ -0,0 +1,202 @@
+from __future__ import annotations
+
+from typing import Literal
+import warnings
+
+from pandas.util._exceptions import find_stack_level
+
+from pandas.core.dtypes.cast import maybe_box_native
+from pandas.core.dtypes.common import is_object_dtype
+
+from pandas import DataFrame
+from pandas.core import common as com
+
+
+def to_dict(
+    df: DataFrame,
+    orient: Literal[
+        "dict", "list", "series", "split", "tight", "records", "index"
+    ] = "dict",
+    into: type[dict] = dict,
+    index: bool = True,
+) -> dict | list[dict]:
+    """
+    Convert the DataFrame to a dictionary.
+
+    The type of the key-value pairs can be customized with the parameters
+    (see below).
+
+    Parameters
+    ----------
+    orient : str {'dict', 'list', 'series', 'split', 'tight', 'records', 'index'}
+        Determines the type of the values of the dictionary.
+
+        - 'dict' (default) : dict like {column -> {index -> value}}
+        - 'list' : dict like {column -> [values]}
+        - 'series' : dict like {column -> Series(values)}
+        - 'split' : dict like
+          {'index' -> [index], 'columns' -> [columns], 'data' -> [values]}
+        - 'tight' : dict like
+          {'index' -> [index], 'columns' -> [columns], 'data' -> [values],
+          'index_names' -> [index.names], 'column_names' -> [column.names]}
+        - 'records' : list like
+          [{column -> value}, ... , {column -> value}]
+        - 'index' : dict like {index -> {column -> value}}
+
+        .. versionadded:: 1.4.0
+            'tight' as an allowed value for the ``orient`` argument
+
+    into : class, default dict
+        The collections.abc.Mapping subclass used for all Mappings
+        in the return value.  Can be the actual class or an empty
+        instance of the mapping type you want.  If you want a
+        collections.defaultdict, you must pass it initialized.
+
+    index : bool, default True
+        Whether to include the index item (and index_names item if `orient`
+        is 'tight') in the returned dictionary. Can only be ``False``
+        when `orient` is 'split' or 'tight'.
+
+        .. versionadded:: 1.6.0
+
+    Returns
+    -------
+    dict, list or collections.abc.Mapping
+        Return a collections.abc.Mapping object representing the DataFrame.
+        The resulting transformation depends on the `orient` parameter.
+    """
+    if not df.columns.is_unique:
+        warnings.warn(
+            "DataFrame columns are not unique, some columns will be omitted.",
+            UserWarning,
+            stacklevel=find_stack_level(),
+        )
+    # GH16122
+    into_c = com.standardize_mapping(into)
+
+    #  error: Incompatible types in assignment (expression has type "str",
+    # variable has type "Literal['dict', 'list', 'series', 'split', 'tight',
+    # 'records', 'index']")
+    orient = orient.lower()  # type: ignore[assignment]
+
+    if not index and orient not in ["split", "tight"]:
+        raise ValueError(
+            "'index=False' is only valid when 'orient' is 'split' or 'tight'"
+        )
+
+    if orient == "series":
+        # GH46470 Return quickly if orient series to avoid creating dtype objects
+        return into_c((k, v) for k, v in df.items())
+
+    object_dtype_indices = [
+        i for i, col_dtype in enumerate(df.dtypes.values) if is_object_dtype(col_dtype)
+    ]
+    are_all_object_dtype_cols = len(object_dtype_indices) == len(df.dtypes)
+
+    if orient == "dict":
+        return into_c((k, v.to_dict(into)) for k, v in df.items())
+
+    elif orient == "list":
+        object_dtype_indices_as_set = set(object_dtype_indices)
+        return into_c(
+            (
+                k,
+                list(map(maybe_box_native, v.tolist()))
+                if i in object_dtype_indices_as_set
+                else v.tolist(),
+            )
+            for i, (k, v) in enumerate(df.items())
+        )
+
+    elif orient == "split":
+        data = df._create_data_for_split_and_tight_to_dict(
+            are_all_object_dtype_cols, object_dtype_indices
+        )
+
+        return into_c(
+            ((("index", df.index.tolist()),) if index else ())
+            + (
+                ("columns", df.columns.tolist()),
+                ("data", data),
+            )
+        )
+
+    elif orient == "tight":
+        data = df._create_data_for_split_and_tight_to_dict(
+            are_all_object_dtype_cols, object_dtype_indices
+        )
+
+        return into_c(
+            ((("index", df.index.tolist()),) if index else ())
+            + (
+                ("columns", df.columns.tolist()),
+                (
+                    "data",
+                    [
+                        list(map(maybe_box_native, t))
+                        for t in df.itertuples(index=False, name=None)
+                    ],
+                ),
+            )
+            + ((("index_names", list(df.index.names)),) if index else ())
+            + (("column_names", list(df.columns.names)),)
+        )
+
+    elif orient == "records":
+        columns = df.columns.tolist()
+        if are_all_object_dtype_cols:
+            rows = (
+                dict(zip(columns, row)) for row in df.itertuples(index=False, name=None)
+            )
+            return [
+                into_c((k, maybe_box_native(v)) for k, v in row.items()) for row in rows
+            ]
+        else:
+            data = [
+                into_c(zip(columns, t)) for t in df.itertuples(index=False, name=None)
+            ]
+            if object_dtype_indices:
+                object_dtype_indices_as_set = set(object_dtype_indices)
+                object_dtype_cols = {
+                    col
+                    for i, col in enumerate(df.columns)
+                    if i in object_dtype_indices_as_set
+                }
+                for row in data:
+                    for col in object_dtype_cols:
+                        row[col] = maybe_box_native(row[col])
+            return data
+
+    elif orient == "index":
+        if not df.index.is_unique:
+            raise ValueError("DataFrame index must be unique for orient='index'.")
+        columns = df.columns.tolist()
+        if are_all_object_dtype_cols:
+            return into_c(
+                (t[0], dict(zip(df.columns, map(maybe_box_native, t[1:]))))
+                for t in df.itertuples(name=None)
+            )
+        elif object_dtype_indices:
+            object_dtype_indices_as_set = set(object_dtype_indices)
+            is_object_dtype_by_index = [
+                i in object_dtype_indices_as_set for i in range(len(df.columns))
+            ]
+            return into_c(
+                (
+                    t[0],
+                    {
+                        columns[i]: maybe_box_native(v)
+                        if is_object_dtype_by_index[i]
+                        else v
+                        for i, v in enumerate(t[1:])
+                    },
+                )
+                for t in df.itertuples(name=None)
+            )
+        else:
+            return into_c(
+                (t[0], dict(zip(df.columns, t[1:]))) for t in df.itertuples(name=None)
+            )
+
+    else:
+        raise ValueError(f"orient '{orient}' not understood")
diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
index 42fdc2d8353c2..534dfc1b26571 100644
--- a/pandas/tests/copy_view/test_methods.py
+++ b/pandas/tests/copy_view/test_methods.py
@@ -3,6 +3,7 @@
 
 from pandas import (
     DataFrame,
+    MultiIndex,
     Series,
 )
 import pandas._testing as tm
@@ -171,6 +172,53 @@ def test_select_dtypes(using_copy_on_write):
     tm.assert_frame_equal(df, df_orig)
 
 
+@pytest.mark.parametrize(
+    "func",
+    [
+        lambda x, y: x.align(y),
+        lambda x, y: x.align(y.a, axis=0),
+        lambda x, y: x.align(y.a.iloc[slice(0, 1)], axis=1),
+    ],
+)
+def test_align_frame(using_copy_on_write, func):
+    df = DataFrame({"a": [1, 2, 3], "b": "a"})
+    df_orig = df.copy()
+    df_changed = df[["b", "a"]].copy()
+    df2, _ = func(df, df_changed)
+
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    else:
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+
+    df2.iloc[0, 0] = 0
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    tm.assert_frame_equal(df, df_orig)
+
+
+def test_align_series(using_copy_on_write):
+    ser = Series([1, 2])
+    ser_orig = ser.copy()
+    ser_other = ser.copy()
+    ser2, ser_other_result = ser.align(ser_other)
+
+    if using_copy_on_write:
+        assert np.shares_memory(ser2.values, ser.values)
+        assert np.shares_memory(ser_other_result.values, ser_other.values)
+    else:
+        assert not np.shares_memory(ser2.values, ser.values)
+        assert not np.shares_memory(ser_other_result.values, ser_other.values)
+
+    ser2.iloc[0] = 0
+    ser_other_result.iloc[0] = 0
+    if using_copy_on_write:
+        assert not np.shares_memory(ser2.values, ser.values)
+        assert not np.shares_memory(ser_other_result.values, ser_other.values)
+    tm.assert_series_equal(ser, ser_orig)
+    tm.assert_series_equal(ser_other, ser_orig)
+
+
 def test_to_frame(using_copy_on_write):
     # Case: converting a Series to a DataFrame with to_frame
     ser = Series([1, 2, 3])
@@ -252,6 +300,45 @@ def test_set_index(using_copy_on_write):
     tm.assert_frame_equal(df, df_orig)
 
 
+def test_add_prefix(using_copy_on_write):
+    # GH 49473
+    df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
+    df_orig = df.copy()
+    df2 = df.add_prefix("CoW_")
+
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(df2, "CoW_a"), get_array(df, "a"))
+    df2.iloc[0, 0] = 0
+
+    assert not np.shares_memory(get_array(df2, "CoW_a"), get_array(df, "a"))
+
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(df2, "CoW_c"), get_array(df, "c"))
+    expected = DataFrame(
+        {"CoW_a": [0, 2, 3], "CoW_b": [4, 5, 6], "CoW_c": [0.1, 0.2, 0.3]}
+    )
+    tm.assert_frame_equal(df2, expected)
+    tm.assert_frame_equal(df, df_orig)
+
+
+def test_add_suffix(using_copy_on_write):
+    # GH 49473
+    df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
+    df_orig = df.copy()
+    df2 = df.add_suffix("_CoW")
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(df2, "a_CoW"), get_array(df, "a"))
+    df2.iloc[0, 0] = 0
+    assert not np.shares_memory(get_array(df2, "a_CoW"), get_array(df, "a"))
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(df2, "c_CoW"), get_array(df, "c"))
+    expected = DataFrame(
+        {"a_CoW": [0, 2, 3], "b_CoW": [4, 5, 6], "c_CoW": [0.1, 0.2, 0.3]}
+    )
+    tm.assert_frame_equal(df2, expected)
+    tm.assert_frame_equal(df, df_orig)
+
+
 @pytest.mark.parametrize(
     "method",
     [
@@ -281,6 +368,95 @@ def test_head_tail(method, using_copy_on_write):
         df2.iloc[0, 0] = 1
     tm.assert_frame_equal(df, df_orig)
 
+
+@pytest.mark.parametrize("method", ["assign", "drop_duplicates"])
+def test_assign_drop_duplicates(using_copy_on_write, method):
+    df = DataFrame({"a": [1, 2, 3]})
+    df_orig = df.copy()
+    df2 = getattr(df, method)()
+    df2._mgr._verify_integrity()
+
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    else:
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+
+    df2.iloc[0, 0] = 0
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    tm.assert_frame_equal(df, df_orig)
+
+
+def test_reindex_like(using_copy_on_write):
+    df = DataFrame({"a": [1, 2], "b": "a"})
+    other = DataFrame({"b": "a", "a": [1, 2]})
+
+    df_orig = df.copy()
+    df2 = df.reindex_like(other)
+
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    else:
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+
+    df2.iloc[0, 1] = 0
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    tm.assert_frame_equal(df, df_orig)
+
+
+def test_reorder_levels(using_copy_on_write):
+    index = MultiIndex.from_tuples(
+        [(1, 1), (1, 2), (2, 1), (2, 2)], names=["one", "two"]
+    )
+    df = DataFrame({"a": [1, 2, 3, 4]}, index=index)
+    df_orig = df.copy()
+    df2 = df.reorder_levels(order=["two", "one"])
+
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    else:
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+
+    df2.iloc[0, 0] = 0
+    if using_copy_on_write:
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    tm.assert_frame_equal(df, df_orig)
+
+
+def test_frame_set_axis(using_copy_on_write):
+    # GH 49473
+    df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]})
+    df_orig = df.copy()
+    df2 = df.set_axis(["a", "b", "c"], axis="index")
+
+    if using_copy_on_write:
+        assert np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    else:
+        assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+
+    # mutating df2 triggers a copy-on-write for that column / block
+    df2.iloc[0, 0] = 0
+    assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a"))
+    tm.assert_frame_equal(df, df_orig)
+
+
+def test_series_set_axis(using_copy_on_write):
+    # GH 49473
+    ser = Series([1, 2, 3])
+    ser_orig = ser.copy()
+    ser2 = ser.set_axis(["a", "b", "c"], axis="index")
+
+    if using_copy_on_write:
+        assert np.shares_memory(ser, ser2)
+    else:
+        assert not np.shares_memory(ser, ser2)
+
+    # mutating ser triggers a copy-on-write for the column / block
+    ser2.iloc[0] = 0
+    assert not np.shares_memory(ser2, ser)
+    tm.assert_series_equal(ser, ser_orig)
+
 def test_droplevel(using_copy_on_write):
     # GH 49473
     df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}).set_index(["a","b"])   
@@ -293,7 +469,7 @@ def test_droplevel(using_copy_on_write):
         assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
 
     # mutating df2 triggers a copy-on-write for that column / block
-    df2.loc["b","c"] = 1
+    df2.loc[4,"c"] = 1
     
     assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
     tm.assert_frame_equal(df, df_orig)
diff --git a/pandas/tests/copy_view/test_util.py b/pandas/tests/copy_view/test_util.py
new file mode 100644
index 0000000000000..ff55330d70b28
--- /dev/null
+++ b/pandas/tests/copy_view/test_util.py
@@ -0,0 +1,14 @@
+import numpy as np
+
+from pandas import DataFrame
+from pandas.tests.copy_view.util import get_array
+
+
+def test_get_array_numpy():
+    df = DataFrame({"a": [1, 2, 3]})
+    assert np.shares_memory(get_array(df, "a"), get_array(df, "a"))
+
+
+def test_get_array_masked():
+    df = DataFrame({"a": [1, 2, 3]}, dtype="Int64")
+    assert np.shares_memory(get_array(df, "a"), get_array(df, "a"))
diff --git a/pandas/tests/extension/base/accumulate.py b/pandas/tests/extension/base/accumulate.py
new file mode 100644
index 0000000000000..868172f930844
--- /dev/null
+++ b/pandas/tests/extension/base/accumulate.py
@@ -0,0 +1,37 @@
+import pytest
+
+import pandas as pd
+from pandas.tests.extension.base.base import BaseExtensionTests
+
+
+class BaseAccumulateTests(BaseExtensionTests):
+    """
+    Accumulation specific tests. Generally these only
+    make sense for numeric/boolean operations.
+    """
+
+    def check_accumulate(self, s, op_name, skipna):
+        result = getattr(s, op_name)(skipna=skipna)
+
+        if result.dtype == pd.Float32Dtype() and op_name == "cumprod" and skipna:
+            pytest.skip(
+                f"Float32 precision lead to large differences with op {op_name} "
+                f"and skipna={skipna}"
+            )
+
+        expected = getattr(s.astype("float64"), op_name)(skipna=skipna)
+        self.assert_series_equal(result, expected, check_dtype=False)
+
+    @pytest.mark.parametrize("skipna", [True, False])
+    def test_accumulate_series_raises(self, data, all_numeric_accumulations, skipna):
+        op_name = all_numeric_accumulations
+        ser = pd.Series(data)
+
+        with pytest.raises(NotImplementedError):
+            getattr(ser, op_name)(skipna=skipna)
+
+    @pytest.mark.parametrize("skipna", [True, False])
+    def test_accumulate_series(self, data, all_numeric_accumulations, skipna):
+        op_name = all_numeric_accumulations
+        ser = pd.Series(data)
+        self.check_accumulate(ser, op_name, skipna)
diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py
new file mode 100644
index 0000000000000..cc3f468349efb
--- /dev/null
+++ b/pandas/tests/groupby/test_raises.py
@@ -0,0 +1,178 @@
+# Only tests that raise an error and have no better location should go here.
+# Tests for specific groupby methods should go in their respective
+# test file.
+
+import datetime
+
+import pytest
+
+from pandas import DataFrame
+from pandas.tests.groupby import get_groupby_method_args
+
+
+@pytest.mark.parametrize("how", ["method", "agg", "transform"])
+def test_groupby_raises_string(how, groupby_func, as_index, sort):
+    df = DataFrame(
+        {
+            "a": [1, 1, 1, 2, 2],
+            "b": range(5),
+            "c": list("xyzwt"),
+        }
+    )
+    args = get_groupby_method_args(groupby_func, df)
+    gb = df.groupby("a", as_index=as_index, sort=sort)
+
+    klass, msg = {
+        "all": (None, ""),
+        "any": (None, ""),
+        "bfill": (None, ""),
+        "corrwith": (TypeError, "Could not convert"),
+        "count": (None, ""),
+        "cumcount": (None, ""),
+        "cummax": (NotImplementedError, "function is not implemented for this dtype"),
+        "cummin": (NotImplementedError, "function is not implemented for this dtype"),
+        "cumprod": (NotImplementedError, "function is not implemented for this dtype"),
+        "cumsum": (NotImplementedError, "function is not implemented for this dtype"),
+        "diff": (TypeError, "unsupported operand type"),
+        "ffill": (None, ""),
+        "fillna": (None, ""),
+        "first": (None, ""),
+        "idxmax": (TypeError, "'argmax' not allowed for this dtype"),
+        "idxmin": (TypeError, "'argmin' not allowed for this dtype"),
+        "last": (None, ""),
+        "max": (None, ""),
+        "mean": (TypeError, "Could not convert xyz to numeric"),
+        "median": (TypeError, "could not convert string to float"),
+        "min": (None, ""),
+        "ngroup": (None, ""),
+        "nunique": (None, ""),
+        "pct_change": (TypeError, "unsupported operand type"),
+        "prod": (TypeError, "can't multiply sequence by non-int of type 'str'"),
+        "quantile": (TypeError, "cannot be performed against 'object' dtypes!"),
+        "rank": (None, ""),
+        "sem": (ValueError, "could not convert string to float"),
+        "shift": (None, ""),
+        "size": (None, ""),
+        "skew": (TypeError, "could not convert string to float"),
+        "std": (ValueError, "could not convert string to float"),
+        "sum": (None, ""),
+        "var": (TypeError, "could not convert string to float"),
+    }[groupby_func]
+
+    if klass is None:
+        if how == "method":
+            getattr(gb, groupby_func)(*args)
+        elif how == "agg":
+            gb.agg(groupby_func, *args)
+        else:
+            gb.transform(groupby_func, *args)
+    else:
+        with pytest.raises(klass, match=msg):
+            if how == "method":
+                getattr(gb, groupby_func)(*args)
+            elif how == "agg":
+                gb.agg(groupby_func, *args)
+            else:
+                gb.transform(groupby_func, *args)
+
+
+@pytest.mark.parametrize("how", ["agg", "transform"])
+def test_groupby_raises_string_udf(how):
+    df = DataFrame(
+        {
+            "a": [1, 1, 1, 2, 2],
+            "b": range(5),
+            "c": list("xyzwt"),
+        }
+    )
+    gb = df.groupby("a")
+
+    def func(x):
+        raise TypeError("Test error message")
+
+    with pytest.raises(TypeError, match="Test error message"):
+        getattr(gb, how)(func)
+
+
+@pytest.mark.parametrize("how", ["method", "agg", "transform"])
+def test_groupby_raises_datetime(how, groupby_func, as_index, sort):
+    df = DataFrame(
+        {
+            "a": [1, 1, 1, 2, 2],
+            "b": range(5),
+            "c": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000),
+        }
+    )
+    args = get_groupby_method_args(groupby_func, df)
+    gb = df.groupby("a", as_index=as_index, sort=sort)
+
+    klass, msg = {
+        "all": (None, ""),
+        "any": (None, ""),
+        "bfill": (None, ""),
+        "corrwith": (TypeError, "cannot perform __mul__ with this index type"),
+        "count": (None, ""),
+        "cumcount": (None, ""),
+        "cummax": (None, ""),
+        "cummin": (None, ""),
+        "cumprod": (TypeError, "datetime64 type does not support cumprod operations"),
+        "cumsum": (TypeError, "datetime64 type does not support cumsum operations"),
+        "diff": (None, ""),
+        "ffill": (None, ""),
+        "fillna": (None, ""),
+        "first": (None, ""),
+        "idxmax": (None, ""),
+        "idxmin": (None, ""),
+        "last": (None, ""),
+        "max": (None, ""),
+        "mean": (None, ""),
+        "median": (None, ""),
+        "min": (None, ""),
+        "ngroup": (None, ""),
+        "nunique": (None, ""),
+        "pct_change": (TypeError, "cannot perform __truediv__ with this index type"),
+        "prod": (TypeError, "datetime64 type does not support prod"),
+        "quantile": (None, ""),
+        "rank": (None, ""),
+        "sem": (TypeError, "Cannot cast DatetimeArray to dtype float64"),
+        "shift": (None, ""),
+        "size": (None, ""),
+        "skew": (TypeError, r"dtype datetime64\[ns\] does not support reduction"),
+        "std": (TypeError, "Cannot cast DatetimeArray to dtype float64"),
+        "sum": (TypeError, "datetime64 type does not support sum operations"),
+        "var": (None, ""),
+    }[groupby_func]
+
+    if klass is None:
+        if how == "method":
+            getattr(gb, groupby_func)(*args)
+        elif how == "agg":
+            gb.agg(groupby_func, *args)
+        else:
+            gb.transform(groupby_func, *args)
+    else:
+        with pytest.raises(klass, match=msg):
+            if how == "method":
+                getattr(gb, groupby_func)(*args)
+            elif how == "agg":
+                gb.agg(groupby_func, *args)
+            else:
+                gb.transform(groupby_func, *args)
+
+
+@pytest.mark.parametrize("how", ["agg", "transform"])
+def test_groupby_raises_datetime_udf(how):
+    df = DataFrame(
+        {
+            "a": [1, 1, 1, 2, 2],
+            "b": range(5),
+            "c": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000),
+        }
+    )
+    gb = df.groupby("a")
+
+    def func(x):
+        raise TypeError("Test error message")
+
+    with pytest.raises(TypeError, match="Test error message"):
+        getattr(gb, how)(func)
diff --git a/scripts/check_test_naming.py b/scripts/check_test_naming.py
new file mode 100644
index 0000000000000..33890feb8692d
--- /dev/null
+++ b/scripts/check_test_naming.py
@@ -0,0 +1,152 @@
+"""
+Check that test names start with `test`, and that test classes start with `Test`.
+
+This is meant to be run as a pre-commit hook - to run it manually, you can do:
+
+    pre-commit run check-test-naming --all-files
+
+NOTE: if this finds a false positive, you can add the comment `# not a test` to the
+class or function definition. Though hopefully that shouldn't be necessary.
+"""
+from __future__ import annotations
+
+import argparse
+import ast
+import os
+from pathlib import Path
+import sys
+from typing import (
+    Iterator,
+    Sequence,
+)
+
+PRAGMA = "# not a test"
+
+
+def _find_names(node: ast.Module) -> Iterator[str]:
+    for _node in ast.walk(node):
+        if isinstance(_node, ast.Name):
+            yield _node.id
+        elif isinstance(_node, ast.Attribute):
+            yield _node.attr
+
+
+def _is_fixture(node: ast.expr) -> bool:
+    if isinstance(node, ast.Call):
+        node = node.func
+    return (
+        isinstance(node, ast.Attribute)
+        and node.attr == "fixture"
+        and isinstance(node.value, ast.Name)
+        and node.value.id == "pytest"
+    )
+
+
+def _is_register_dtype(node):
+    return isinstance(node, ast.Name) and node.id == "register_extension_dtype"
+
+
+def is_misnamed_test_func(
+    node: ast.expr | ast.stmt, names: Sequence[str], line: str
+) -> bool:
+    return (
+        isinstance(node, ast.FunctionDef)
+        and not node.name.startswith("test")
+        and names.count(node.name) == 0
+        and not any(_is_fixture(decorator) for decorator in node.decorator_list)
+        and PRAGMA not in line
+        and node.name
+        not in ("teardown_method", "setup_method", "teardown_class", "setup_class")
+    )
+
+
+def is_misnamed_test_class(
+    node: ast.expr | ast.stmt, names: Sequence[str], line: str
+) -> bool:
+    return (
+        isinstance(node, ast.ClassDef)
+        and not node.name.startswith("Test")
+        and names.count(node.name) == 0
+        and not any(_is_register_dtype(decorator) for decorator in node.decorator_list)
+        and PRAGMA not in line
+    )
+
+
+def main(content: str, file: str) -> int:
+    lines = content.splitlines()
+    tree = ast.parse(content)
+    names = list(_find_names(tree))
+    ret = 0
+    for node in tree.body:
+        if is_misnamed_test_func(node, names, lines[node.lineno - 1]):
+            print(
+                f"{file}:{node.lineno}:{node.col_offset} "
+                "found test function which does not start with 'test'"
+            )
+            ret = 1
+        elif is_misnamed_test_class(node, names, lines[node.lineno - 1]):
+            print(
+                f"{file}:{node.lineno}:{node.col_offset} "
+                "found test class which does not start with 'Test'"
+            )
+            ret = 1
+        if (
+            isinstance(node, ast.ClassDef)
+            and names.count(node.name) == 0
+            and not any(
+                _is_register_dtype(decorator) for decorator in node.decorator_list
+            )
+            and PRAGMA not in lines[node.lineno - 1]
+        ):
+            for _node in node.body:
+                if is_misnamed_test_func(_node, names, lines[_node.lineno - 1]):
+                    # It could be that this function is used somewhere by the
+                    # parent class. For example, there might be a base class
+                    # with
+                    #
+                    # class Foo:
+                    #     def foo(self):
+                    #          assert 1+1==2
+                    #     def test_foo(self):
+                    #         self.foo()
+                    #
+                    # and then some subclass overwrites `foo`. So, we check that
+                    # `self.foo` doesn't appear in any of the test classes.
+                    # Note some false negatives might get through, but that's OK.
+                    # This is good enough that has helped identify several examples
+                    # of tests not being run.
+                    assert isinstance(_node, ast.FunctionDef)  # help mypy
+                    should_continue = False
+                    for _file in (Path("pandas") / "tests").rglob("*.py"):
+                        with open(os.path.join(_file)) as fd:
+                            _content = fd.read()
+                        if f"self.{_node.name}" in _content:
+                            should_continue = True
+                            break
+                    if should_continue:
+                        continue
+
+                    print(
+                        f"{file}:{_node.lineno}:{_node.col_offset} "
+                        "found test function which does not start with 'test'"
+                    )
+                    ret = 1
+    return ret
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("paths", nargs="*")
+    args = parser.parse_args()
+
+    ret = 0
+
+    for file in args.paths:
+        filename = os.path.basename(file)
+        if not (filename.startswith("test") and filename.endswith(".py")):
+            continue
+        with open(file, encoding="utf-8") as fd:
+            content = fd.read()
+        ret |= main(content, file)
+
+    sys.exit(ret)
diff --git a/scripts/tests/test_check_test_naming.py b/scripts/tests/test_check_test_naming.py
new file mode 100644
index 0000000000000..9ddaf2fe2a97d
--- /dev/null
+++ b/scripts/tests/test_check_test_naming.py
@@ -0,0 +1,54 @@
+import pytest
+
+from scripts.check_test_naming import main
+
+
+@pytest.mark.parametrize(
+    "src, expected_out, expected_ret",
+    [
+        (
+            "def foo(): pass\n",
+            "t.py:1:0 found test function which does not start with 'test'\n",
+            1,
+        ),
+        (
+            "class Foo:\n    def test_foo(): pass\n",
+            "t.py:1:0 found test class which does not start with 'Test'\n",
+            1,
+        ),
+        ("def test_foo(): pass\n", "", 0),
+        (
+            "class TestFoo:\n    def foo(): pass\n",
+            "t.py:2:4 found test function which does not start with 'test'\n",
+            1,
+        ),
+        ("class TestFoo:\n    def test_foo(): pass\n", "", 0),
+        (
+            "class Foo:\n    def foo(): pass\n",
+            "t.py:1:0 found test class which does not start with 'Test'\n"
+            "t.py:2:4 found test function which does not start with 'test'\n",
+            1,
+        ),
+        (
+            "def foo():\n    pass\ndef test_foo():\n    foo()\n",
+            "",
+            0,
+        ),
+        (
+            "class Foo:  # not a test\n"
+            "    pass\n"
+            "def test_foo():\n"
+            "    Class.foo()\n",
+            "",
+            0,
+        ),
+        ("@pytest.fixture\ndef foo(): pass\n", "", 0),
+        ("@pytest.fixture()\ndef foo(): pass\n", "", 0),
+        ("@register_extension_dtype\nclass Foo: pass\n", "", 0),
+    ],
+)
+def test_main(capsys, src, expected_out, expected_ret):
+    ret = main(src, "t.py")
+    out, _ = capsys.readouterr()
+    assert out == expected_out
+    assert ret == expected_ret

From bb1e8d6eca13aa74e18e0627d15219bcf5e35cad Mon Sep 17 00:00:00 2001
From: David Leon <davidleon123@gmail.com>
Date: Thu, 5 Jan 2023 11:34:21 +0100
Subject: [PATCH 3/3] implemented PR comments

---
 pandas/tests/copy_view/test_methods.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py
index 56285031aca53..a06fc5b8ad5e5 100644
--- a/pandas/tests/copy_view/test_methods.py
+++ b/pandas/tests/copy_view/test_methods.py
@@ -598,9 +598,8 @@ def test_tz_convert_localize(using_copy_on_write, func, tz):
 
 def test_droplevel(using_copy_on_write):
     # GH 49473
-    df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}).set_index(
-        ["a", "b"]
-    )
+    index = MultiIndex.from_tuples([(1, 1), (1, 2), (2, 1)], names=["one", "two"])
+    df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, index=index)
     df_orig = df.copy()
     df2 = df.droplevel(0)
 
@@ -610,7 +609,7 @@ def test_droplevel(using_copy_on_write):
         assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
 
     # mutating df2 triggers a copy-on-write for that column / block
-    df2.loc[4, "c"] = 1
+    df2.iloc[0, 0] = 0
 
     assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c"))
     tm.assert_frame_equal(df, df_orig)