From b4b461e0c69e420b0b583bef15f4d740b276693e Mon Sep 17 00:00:00 2001 From: David Leon Date: Tue, 20 Dec 2022 19:02:33 +0100 Subject: [PATCH 1/3] added copy on write for droplevel --- pandas/core/generic.py | 3 ++- pandas/tests/copy_view/test_methods.py | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2de83bb7a4468..084ebc04694db 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -740,7 +740,8 @@ def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool_t, copy: bool_t): else: # With copy=False, we create a new object but don't copy the # underlying data. - obj = self.copy(deep=copy) + if copy: + obj = self.copy(deep=None) setattr(obj, obj._get_axis_name(axis), labels) return obj diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index bf65f153b10dd..42fdc2d8353c2 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -280,3 +280,20 @@ def test_head_tail(method, using_copy_on_write): # without CoW enabled, head and tail return views. Mutating df2 also mutates df. df2.iloc[0, 0] = 1 tm.assert_frame_equal(df, df_orig) + +def test_droplevel(using_copy_on_write): + # GH 49473 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}).set_index(["a","b"]) + df_orig = df.copy() + df2 = df.droplevel(0) + + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + else: + assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + + # mutating df2 triggers a copy-on-write for that column / block + df2.loc["b","c"] = 1 + + assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) + tm.assert_frame_equal(df, df_orig) From 9fbc21b3ffa96cf0e112c86eb80a35046f558edc Mon Sep 17 00:00:00 2001 From: David Leon Date: Mon, 2 Jan 2023 16:33:49 +0100 Subject: [PATCH 2/3] fixed copy on write for droplevel --- .pre-commit-config.yaml | 42 +++- pandas/_libs/tslibs/strptime.pxd | 4 + .../core/array_algos/masked_accumulations.py | 92 ++++++++ pandas/core/generic.py | 140 +++++------- pandas/core/methods/to_dict.py | 202 ++++++++++++++++++ pandas/tests/copy_view/test_methods.py | 178 ++++++++++++++- pandas/tests/copy_view/test_util.py | 14 ++ pandas/tests/extension/base/accumulate.py | 37 ++++ pandas/tests/groupby/test_raises.py | 178 +++++++++++++++ scripts/check_test_naming.py | 152 +++++++++++++ scripts/tests/test_check_test_naming.py | 54 +++++ 11 files changed, 1004 insertions(+), 89 deletions(-) create mode 100644 pandas/_libs/tslibs/strptime.pxd create mode 100644 pandas/core/array_algos/masked_accumulations.py create mode 100644 pandas/core/methods/to_dict.py create mode 100644 pandas/tests/copy_view/test_util.py create mode 100644 pandas/tests/extension/base/accumulate.py create mode 100644 pandas/tests/groupby/test_raises.py create mode 100644 scripts/check_test_naming.py create mode 100644 scripts/tests/test_check_test_naming.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0779f9c95f7b4..f3158e64df8dd 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,7 +1,17 @@ minimum_pre_commit_version: 2.15.0 exclude: ^LICENSES/|\.(html|csv|svg)$ -# reserve "manual" for mypy and pyright -default_stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg, post-checkout, post-commit, post-merge, post-rewrite] +# reserve "manual" for relatively slow hooks which we still want to run in CI +default_stages: [ + commit, + merge-commit, + push, + prepare-commit-msg, + commit-msg, + post-checkout, + post-commit, + post-merge, + post-rewrite +] ci: autofix_prs: false repos: @@ -27,15 +37,18 @@ repos: rev: v0.9.1 hooks: - id: cython-lint + - id: double-quote-cython-strings - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 hooks: - id: debug-statements - id: end-of-file-fixer exclude: \.txt$ - stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg, post-checkout, post-commit, post-merge, post-rewrite] + stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg, + post-checkout, post-commit, post-merge, post-rewrite] - id: trailing-whitespace - stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg, post-checkout, post-commit, post-merge, post-rewrite] + stages: [commit, merge-commit, push, prepare-commit-msg, commit-msg, + post-checkout, post-commit, post-merge, post-rewrite] - repo: https://github.com/cpplint/cpplint rev: 1.6.1 hooks: @@ -45,7 +58,14 @@ repos: # this particular codebase (e.g. src/headers, src/klib). However, # we can lint all header files since they aren't "generated" like C files are. exclude: ^pandas/_libs/src/(klib|headers)/ - args: [--quiet, '--extensions=c,h', '--headers=h', --recursive, '--filter=-readability/casting,-runtime/int,-build/include_subdir'] + args: [ + --quiet, + '--extensions=c,h', + '--headers=h', + --recursive, + --linelength=88, + '--filter=-readability/casting,-runtime/int,-build/include_subdir,-readability/fn_size' + ] - repo: https://github.com/PyCQA/flake8 rev: 6.0.0 hooks: @@ -106,6 +126,7 @@ repos: hooks: - id: yesqa additional_dependencies: *flake8_dependencies + stages: [manual] - repo: local hooks: # NOTE: we make `black` a local hook because if it's installed from @@ -213,7 +234,6 @@ repos: exclude: ^pandas/tests/extension/base/base\.py - id: pip-to-conda name: Generate pip dependency from conda - description: This hook checks if the conda environment.yml and requirements-dev.txt are equal language: python entry: python scripts/generate_pip_deps_from_conda.py files: ^(environment.yml|requirements-dev.txt)$ @@ -310,6 +330,16 @@ repos: files: ^pandas exclude: ^(pandas/tests|pandas/_version.py|pandas/io/clipboard) language: python + stages: [manual] additional_dependencies: - autotyping==22.9.0 - libcst==0.4.7 + - id: check-test-naming + name: check that test names start with 'test' + entry: python -m scripts.check_test_naming + types: [python] + files: ^pandas/tests + language: python + exclude: | + (?x) + ^pandas/tests/generic/test_generic.py # GH50380 diff --git a/pandas/_libs/tslibs/strptime.pxd b/pandas/_libs/tslibs/strptime.pxd new file mode 100644 index 0000000000000..175195d4362e4 --- /dev/null +++ b/pandas/_libs/tslibs/strptime.pxd @@ -0,0 +1,4 @@ +from numpy cimport int64_t + + +cdef bint parse_today_now(str val, int64_t* iresult, bint utc) diff --git a/pandas/core/array_algos/masked_accumulations.py b/pandas/core/array_algos/masked_accumulations.py new file mode 100644 index 0000000000000..07113128e0947 --- /dev/null +++ b/pandas/core/array_algos/masked_accumulations.py @@ -0,0 +1,92 @@ +""" +masked_accumulations.py is for accumulation algorithms using a mask-based approach +for missing values. +""" + +from __future__ import annotations + +from typing import Callable + +import numpy as np + +from pandas._typing import npt + +from pandas.core.dtypes.common import ( + is_bool_dtype, + is_float_dtype, + is_integer_dtype, +) + + +def _cum_func( + func: Callable, + values: np.ndarray, + mask: npt.NDArray[np.bool_], + *, + skipna: bool = True, +): + """ + Accumulations for 1D masked array. + + We will modify values in place to replace NAs with the appropriate fill value. + + Parameters + ---------- + func : np.cumsum, np.cumprod, np.maximum.accumulate, np.minimum.accumulate + values : np.ndarray + Numpy array with the values (can be of any dtype that support the + operation). + mask : np.ndarray + Boolean numpy array (True values indicate missing values). + skipna : bool, default True + Whether to skip NA. + """ + dtype_info: np.iinfo | np.finfo + if is_float_dtype(values): + dtype_info = np.finfo(values.dtype.type) + elif is_integer_dtype(values): + dtype_info = np.iinfo(values.dtype.type) + elif is_bool_dtype(values): + # Max value of bool is 1, but since we are setting into a boolean + # array, 255 is fine as well. Min value has to be 0 when setting + # into the boolean array. + dtype_info = np.iinfo(np.uint8) + else: + raise NotImplementedError( + f"No masked accumulation defined for dtype {values.dtype.type}" + ) + try: + fill_value = { + np.cumprod: 1, + np.maximum.accumulate: dtype_info.min, + np.cumsum: 0, + np.minimum.accumulate: dtype_info.max, + }[func] + except KeyError: + raise NotImplementedError( + f"No accumulation for {func} implemented on BaseMaskedArray" + ) + + values[mask] = fill_value + + if not skipna: + mask = np.maximum.accumulate(mask) + + values = func(values) + return values, mask + + +def cumsum(values: np.ndarray, mask: npt.NDArray[np.bool_], *, skipna: bool = True): + return _cum_func(np.cumsum, values, mask, skipna=skipna) + + +def cumprod(values: np.ndarray, mask: npt.NDArray[np.bool_], *, skipna: bool = True): + return _cum_func(np.cumprod, values, mask, skipna=skipna) + + +def cummin(values: np.ndarray, mask: npt.NDArray[np.bool_], *, skipna: bool = True): + return _cum_func(np.minimum.accumulate, values, mask, skipna=skipna) + + +def cummax(values: np.ndarray, mask: npt.NDArray[np.bool_], *, skipna: bool = True): + return _cum_func(np.maximum.accumulate, values, mask, skipna=skipna) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 084ebc04694db..ad3dac7ba9821 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -700,7 +700,7 @@ def set_axis( labels, *, axis: Axis = 0, - copy: bool_t = True, + copy: bool_t | None = None, ) -> NDFrameT: """ Assign desired index to given axis. @@ -724,7 +724,7 @@ def set_axis( Returns ------- - renamed : %(klass)s + %(klass)s An object of type %(klass)s. See Also @@ -734,14 +734,15 @@ def set_axis( return self._set_axis_nocheck(labels, axis, inplace=False, copy=copy) @final - def _set_axis_nocheck(self, labels, axis: Axis, inplace: bool_t, copy: bool_t): + def _set_axis_nocheck( + self, labels, axis: Axis, inplace: bool_t, copy: bool_t | None + ): if inplace: setattr(self, self._get_axis_name(axis), labels) else: # With copy=False, we create a new object but don't copy the # underlying data. - if copy: - obj = self.copy(deep=None) + obj = self.copy(deep=copy) setattr(obj, obj._get_axis_name(axis), labels) return obj @@ -764,7 +765,7 @@ def swapaxes( Returns ------- - y : same as input + same as input """ i = self._get_axis_number(axis1) j = self._get_axis_number(axis2) @@ -849,7 +850,7 @@ def droplevel(self: NDFrameT, level: IndexLabel, axis: Axis = 0) -> NDFrameT: """ labels = self._get_axis(axis) new_labels = labels.droplevel(level) - return self.set_axis(new_labels, axis=axis) + return self.set_axis(new_labels, axis=axis, copy=None) def pop(self, item: Hashable) -> Series | Any: result = self[item] @@ -1759,9 +1760,6 @@ def _get_label_or_level_values(self, key: Level, axis: AxisInt = 0) -> ArrayLike if `key` matches neither a label nor a level ValueError if `key` matches multiple labels - FutureWarning - if `key` is ambiguous. This will become an ambiguity error in a - future version """ axis = self._get_axis_number(axis) other_axes = [ax for ax in range(self._AXIS_LEN) if ax != axis] @@ -3713,7 +3711,7 @@ def take(self: NDFrameT, indices, axis: Axis = 0, **kwargs) -> NDFrameT: Returns ------- - taken : same type as caller + same type as caller An array-like containing the elements taken from the object. See Also @@ -4165,7 +4163,7 @@ def get(self, key, default=None): Returns ------- - value : same type as items contained in object + same type as items contained in object Examples -------- @@ -4222,7 +4220,7 @@ def reindex_like( self: NDFrameT, other, method: Literal["backfill", "bfill", "pad", "ffill", "nearest"] | None = None, - copy: bool_t = True, + copy: bool_t | None = None, limit=None, tolerance=None, ) -> NDFrameT: @@ -5258,7 +5256,7 @@ def _reindex_with_indexers( self: NDFrameT, reindexers, fill_value=None, - copy: bool_t = False, + copy: bool_t | None = False, allow_dups: bool_t = False, ) -> NDFrameT: """allow_dups indicates an internal call here""" @@ -5287,8 +5285,8 @@ def _reindex_with_indexers( # If we've made a copy once, no need to make another one copy = False - if copy and new_data is self._mgr: - new_data = new_data.copy() + if (copy or copy is None) and new_data is self._mgr: + new_data = new_data.copy(deep=copy) return self._constructor(new_data).__finalize__(self) @@ -5731,7 +5729,7 @@ def pipe( Returns ------- - object : the return type of ``func``. + the return type of ``func``. See Also -------- @@ -6025,7 +6023,7 @@ def astype( Returns ------- - casted : same type as caller + same type as caller See Also -------- @@ -6036,11 +6034,11 @@ def astype( Notes ----- - .. deprecated:: 1.3.0 + .. versionchanged:: 2.0.0 Using ``astype`` to convert from timezone-naive dtype to - timezone-aware dtype is deprecated and will raise in a - future version. Use :meth:`Series.dt.tz_localize` instead. + timezone-aware dtype will raise an exception. + Use :meth:`Series.dt.tz_localize` instead. Examples -------- @@ -6211,7 +6209,7 @@ def copy(self: NDFrameT, deep: bool_t | None = True) -> NDFrameT: Returns ------- - copy : Series or DataFrame + Series or DataFrame Object type matches caller. Notes @@ -6317,43 +6315,7 @@ def __deepcopy__(self: NDFrameT, memo=None) -> NDFrameT: return self.copy(deep=True) @final - def _convert( - self: NDFrameT, - datetime: bool_t = False, - numeric: bool_t = False, - timedelta: bool_t = False, - ) -> NDFrameT: - """ - Attempt to infer better dtype for object columns. - - Parameters - ---------- - datetime : bool, default False - If True, convert to date where possible. - numeric : bool, default False - If True, attempt to convert to numbers (including strings), with - unconvertible values becoming NaN. - timedelta : bool, default False - If True, convert to timedelta where possible. - - Returns - ------- - converted : same as input object - """ - validate_bool_kwarg(datetime, "datetime") - validate_bool_kwarg(numeric, "numeric") - validate_bool_kwarg(timedelta, "timedelta") - return self._constructor( - self._mgr.convert( - datetime=datetime, - numeric=numeric, - timedelta=timedelta, - copy=True, - ) - ).__finalize__(self) - - @final - def infer_objects(self: NDFrameT) -> NDFrameT: + def infer_objects(self: NDFrameT, copy: bool_t = True) -> NDFrameT: """ Attempt to infer better dtypes for object columns. @@ -6362,9 +6324,15 @@ def infer_objects(self: NDFrameT) -> NDFrameT: columns unchanged. The inference rules are the same as during normal Series/DataFrame construction. + Parameters + ---------- + copy : bool, default True + Whether to make a copy for non-object or non-inferrable columns + or Series. + Returns ------- - converted : same type as input object + same type as input object See Also -------- @@ -6391,12 +6359,8 @@ def infer_objects(self: NDFrameT) -> NDFrameT: A int64 dtype: object """ - # numeric=False necessary to only soft convert; - # python objects will still be converted to - # native numpy numeric types - return self._constructor( - self._mgr.convert(datetime=True, numeric=False, timedelta=True, copy=True) - ).__finalize__(self, method="infer_objects") + new_mgr = self._mgr.convert(copy=copy) + return self._constructor(new_mgr).__finalize__(self, method="infer_objects") @final def convert_dtypes( @@ -6446,7 +6410,7 @@ def convert_dtypes( By default, ``convert_dtypes`` will attempt to convert a Series (or each Series in a DataFrame) to dtypes that support ``pd.NA``. By using the options ``convert_string``, ``convert_integer``, ``convert_boolean`` and - ``convert_boolean``, it is possible to turn off individual conversions + ``convert_floating``, it is possible to turn off individual conversions to ``StringDtype``, the integer extension types, ``BooleanDtype`` or floating extension types, respectively. @@ -6468,6 +6432,13 @@ def convert_dtypes( In the future, as new dtypes are added that support ``pd.NA``, the results of this method will change to support those new dtypes. + .. versionadded:: 2.0 + The nullable dtype implementation can be configured by calling + ``pd.set_option("mode.dtype_backend", "pandas")`` to use + numpy-backed nullable dtypes or + ``pd.set_option("mode.dtype_backend", "pyarrow")`` to use + pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``). + Examples -------- >>> df = pd.DataFrame( @@ -6508,12 +6479,12 @@ def convert_dtypes( 2 3 z 20 200.0 >>> dfn.dtypes - a Int32 - b string - c boolean - d string - e Int64 - f Float64 + a Int32 + b string[python] + c boolean + d string[python] + e Int64 + f Float64 dtype: object Start with a Series of strings and missing data represented by ``np.nan``. @@ -6608,6 +6579,7 @@ def fillna( def fillna( self: NDFrameT, value: Hashable | Mapping | Series | DataFrame = None, + *, method: FillnaOptions | None = None, axis: Axis | None = None, inplace: bool_t = False, @@ -9088,7 +9060,7 @@ def align( join: AlignJoin = "outer", axis: Axis | None = None, level: Level = None, - copy: bool_t = True, + copy: bool_t | None = None, fill_value: Hashable = None, method: FillnaOptions | None = None, limit: int | None = None, @@ -9281,7 +9253,7 @@ def _align_frame( join: AlignJoin = "outer", axis: Axis | None = None, level=None, - copy: bool_t = True, + copy: bool_t | None = None, fill_value=None, method=None, limit=None, @@ -9345,7 +9317,7 @@ def _align_series( join: AlignJoin = "outer", axis: Axis | None = None, level=None, - copy: bool_t = True, + copy: bool_t | None = None, fill_value=None, method=None, limit=None, @@ -9374,7 +9346,7 @@ def _align_series( if is_series: left = self._reindex_indexer(join_index, lidx, copy) elif lidx is None or join_index is None: - left = self.copy() if copy else self + left = self.copy(deep=copy) if copy or copy is None else self else: left = self._constructor( self._mgr.reindex_indexer(join_index, lidx, axis=1, copy=copy) @@ -9403,7 +9375,7 @@ def _align_series( left = self._constructor(fdata) if ridx is None: - right = other + right = other.copy(deep=copy) if copy or copy is None else other else: right = other.reindex(join_index, level=level) @@ -10665,7 +10637,7 @@ def pct_change( Returns ------- - chg : Series or DataFrame + Series or DataFrame The same type as the calling object. See Also @@ -10863,7 +10835,11 @@ def _accum_func( def block_accum_func(blk_values): values = blk_values.T if hasattr(blk_values, "T") else blk_values - result = nanops.na_accum_func(values, func, skipna=skipna) + result: np.ndarray | ExtensionArray + if isinstance(values, ExtensionArray): + result = values._accumulate(name, skipna=skipna, **kwargs) + else: + result = nanops.na_accum_func(values, func, skipna=skipna) result = result.T if hasattr(result, "T") else result return result @@ -11667,7 +11643,7 @@ def _find_valid_index(self, *, how: str) -> Hashable | None: ------- idx_first_valid : type of index """ - idxpos = find_valid_index(self._values, how=how) + idxpos = find_valid_index(self._values, how=how, is_valid=~isna(self._values)) if idxpos is None: return None return self.index[idxpos] @@ -11680,7 +11656,7 @@ def first_valid_index(self) -> Hashable | None: Returns ------- - scalar : type of index + type of index Notes ----- diff --git a/pandas/core/methods/to_dict.py b/pandas/core/methods/to_dict.py new file mode 100644 index 0000000000000..19f4e5c23785b --- /dev/null +++ b/pandas/core/methods/to_dict.py @@ -0,0 +1,202 @@ +from __future__ import annotations + +from typing import Literal +import warnings + +from pandas.util._exceptions import find_stack_level + +from pandas.core.dtypes.cast import maybe_box_native +from pandas.core.dtypes.common import is_object_dtype + +from pandas import DataFrame +from pandas.core import common as com + + +def to_dict( + df: DataFrame, + orient: Literal[ + "dict", "list", "series", "split", "tight", "records", "index" + ] = "dict", + into: type[dict] = dict, + index: bool = True, +) -> dict | list[dict]: + """ + Convert the DataFrame to a dictionary. + + The type of the key-value pairs can be customized with the parameters + (see below). + + Parameters + ---------- + orient : str {'dict', 'list', 'series', 'split', 'tight', 'records', 'index'} + Determines the type of the values of the dictionary. + + - 'dict' (default) : dict like {column -> {index -> value}} + - 'list' : dict like {column -> [values]} + - 'series' : dict like {column -> Series(values)} + - 'split' : dict like + {'index' -> [index], 'columns' -> [columns], 'data' -> [values]} + - 'tight' : dict like + {'index' -> [index], 'columns' -> [columns], 'data' -> [values], + 'index_names' -> [index.names], 'column_names' -> [column.names]} + - 'records' : list like + [{column -> value}, ... , {column -> value}] + - 'index' : dict like {index -> {column -> value}} + + .. versionadded:: 1.4.0 + 'tight' as an allowed value for the ``orient`` argument + + into : class, default dict + The collections.abc.Mapping subclass used for all Mappings + in the return value. Can be the actual class or an empty + instance of the mapping type you want. If you want a + collections.defaultdict, you must pass it initialized. + + index : bool, default True + Whether to include the index item (and index_names item if `orient` + is 'tight') in the returned dictionary. Can only be ``False`` + when `orient` is 'split' or 'tight'. + + .. versionadded:: 1.6.0 + + Returns + ------- + dict, list or collections.abc.Mapping + Return a collections.abc.Mapping object representing the DataFrame. + The resulting transformation depends on the `orient` parameter. + """ + if not df.columns.is_unique: + warnings.warn( + "DataFrame columns are not unique, some columns will be omitted.", + UserWarning, + stacklevel=find_stack_level(), + ) + # GH16122 + into_c = com.standardize_mapping(into) + + # error: Incompatible types in assignment (expression has type "str", + # variable has type "Literal['dict', 'list', 'series', 'split', 'tight', + # 'records', 'index']") + orient = orient.lower() # type: ignore[assignment] + + if not index and orient not in ["split", "tight"]: + raise ValueError( + "'index=False' is only valid when 'orient' is 'split' or 'tight'" + ) + + if orient == "series": + # GH46470 Return quickly if orient series to avoid creating dtype objects + return into_c((k, v) for k, v in df.items()) + + object_dtype_indices = [ + i for i, col_dtype in enumerate(df.dtypes.values) if is_object_dtype(col_dtype) + ] + are_all_object_dtype_cols = len(object_dtype_indices) == len(df.dtypes) + + if orient == "dict": + return into_c((k, v.to_dict(into)) for k, v in df.items()) + + elif orient == "list": + object_dtype_indices_as_set = set(object_dtype_indices) + return into_c( + ( + k, + list(map(maybe_box_native, v.tolist())) + if i in object_dtype_indices_as_set + else v.tolist(), + ) + for i, (k, v) in enumerate(df.items()) + ) + + elif orient == "split": + data = df._create_data_for_split_and_tight_to_dict( + are_all_object_dtype_cols, object_dtype_indices + ) + + return into_c( + ((("index", df.index.tolist()),) if index else ()) + + ( + ("columns", df.columns.tolist()), + ("data", data), + ) + ) + + elif orient == "tight": + data = df._create_data_for_split_and_tight_to_dict( + are_all_object_dtype_cols, object_dtype_indices + ) + + return into_c( + ((("index", df.index.tolist()),) if index else ()) + + ( + ("columns", df.columns.tolist()), + ( + "data", + [ + list(map(maybe_box_native, t)) + for t in df.itertuples(index=False, name=None) + ], + ), + ) + + ((("index_names", list(df.index.names)),) if index else ()) + + (("column_names", list(df.columns.names)),) + ) + + elif orient == "records": + columns = df.columns.tolist() + if are_all_object_dtype_cols: + rows = ( + dict(zip(columns, row)) for row in df.itertuples(index=False, name=None) + ) + return [ + into_c((k, maybe_box_native(v)) for k, v in row.items()) for row in rows + ] + else: + data = [ + into_c(zip(columns, t)) for t in df.itertuples(index=False, name=None) + ] + if object_dtype_indices: + object_dtype_indices_as_set = set(object_dtype_indices) + object_dtype_cols = { + col + for i, col in enumerate(df.columns) + if i in object_dtype_indices_as_set + } + for row in data: + for col in object_dtype_cols: + row[col] = maybe_box_native(row[col]) + return data + + elif orient == "index": + if not df.index.is_unique: + raise ValueError("DataFrame index must be unique for orient='index'.") + columns = df.columns.tolist() + if are_all_object_dtype_cols: + return into_c( + (t[0], dict(zip(df.columns, map(maybe_box_native, t[1:])))) + for t in df.itertuples(name=None) + ) + elif object_dtype_indices: + object_dtype_indices_as_set = set(object_dtype_indices) + is_object_dtype_by_index = [ + i in object_dtype_indices_as_set for i in range(len(df.columns)) + ] + return into_c( + ( + t[0], + { + columns[i]: maybe_box_native(v) + if is_object_dtype_by_index[i] + else v + for i, v in enumerate(t[1:]) + }, + ) + for t in df.itertuples(name=None) + ) + else: + return into_c( + (t[0], dict(zip(df.columns, t[1:]))) for t in df.itertuples(name=None) + ) + + else: + raise ValueError(f"orient '{orient}' not understood") diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 42fdc2d8353c2..534dfc1b26571 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -3,6 +3,7 @@ from pandas import ( DataFrame, + MultiIndex, Series, ) import pandas._testing as tm @@ -171,6 +172,53 @@ def test_select_dtypes(using_copy_on_write): tm.assert_frame_equal(df, df_orig) +@pytest.mark.parametrize( + "func", + [ + lambda x, y: x.align(y), + lambda x, y: x.align(y.a, axis=0), + lambda x, y: x.align(y.a.iloc[slice(0, 1)], axis=1), + ], +) +def test_align_frame(using_copy_on_write, func): + df = DataFrame({"a": [1, 2, 3], "b": "a"}) + df_orig = df.copy() + df_changed = df[["b", "a"]].copy() + df2, _ = func(df, df_changed) + + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + else: + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + + df2.iloc[0, 0] = 0 + if using_copy_on_write: + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + tm.assert_frame_equal(df, df_orig) + + +def test_align_series(using_copy_on_write): + ser = Series([1, 2]) + ser_orig = ser.copy() + ser_other = ser.copy() + ser2, ser_other_result = ser.align(ser_other) + + if using_copy_on_write: + assert np.shares_memory(ser2.values, ser.values) + assert np.shares_memory(ser_other_result.values, ser_other.values) + else: + assert not np.shares_memory(ser2.values, ser.values) + assert not np.shares_memory(ser_other_result.values, ser_other.values) + + ser2.iloc[0] = 0 + ser_other_result.iloc[0] = 0 + if using_copy_on_write: + assert not np.shares_memory(ser2.values, ser.values) + assert not np.shares_memory(ser_other_result.values, ser_other.values) + tm.assert_series_equal(ser, ser_orig) + tm.assert_series_equal(ser_other, ser_orig) + + def test_to_frame(using_copy_on_write): # Case: converting a Series to a DataFrame with to_frame ser = Series([1, 2, 3]) @@ -252,6 +300,45 @@ def test_set_index(using_copy_on_write): tm.assert_frame_equal(df, df_orig) +def test_add_prefix(using_copy_on_write): + # GH 49473 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + df_orig = df.copy() + df2 = df.add_prefix("CoW_") + + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "CoW_a"), get_array(df, "a")) + df2.iloc[0, 0] = 0 + + assert not np.shares_memory(get_array(df2, "CoW_a"), get_array(df, "a")) + + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "CoW_c"), get_array(df, "c")) + expected = DataFrame( + {"CoW_a": [0, 2, 3], "CoW_b": [4, 5, 6], "CoW_c": [0.1, 0.2, 0.3]} + ) + tm.assert_frame_equal(df2, expected) + tm.assert_frame_equal(df, df_orig) + + +def test_add_suffix(using_copy_on_write): + # GH 49473 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + df_orig = df.copy() + df2 = df.add_suffix("_CoW") + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "a_CoW"), get_array(df, "a")) + df2.iloc[0, 0] = 0 + assert not np.shares_memory(get_array(df2, "a_CoW"), get_array(df, "a")) + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "c_CoW"), get_array(df, "c")) + expected = DataFrame( + {"a_CoW": [0, 2, 3], "b_CoW": [4, 5, 6], "c_CoW": [0.1, 0.2, 0.3]} + ) + tm.assert_frame_equal(df2, expected) + tm.assert_frame_equal(df, df_orig) + + @pytest.mark.parametrize( "method", [ @@ -281,6 +368,95 @@ def test_head_tail(method, using_copy_on_write): df2.iloc[0, 0] = 1 tm.assert_frame_equal(df, df_orig) + +@pytest.mark.parametrize("method", ["assign", "drop_duplicates"]) +def test_assign_drop_duplicates(using_copy_on_write, method): + df = DataFrame({"a": [1, 2, 3]}) + df_orig = df.copy() + df2 = getattr(df, method)() + df2._mgr._verify_integrity() + + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + else: + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + + df2.iloc[0, 0] = 0 + if using_copy_on_write: + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + tm.assert_frame_equal(df, df_orig) + + +def test_reindex_like(using_copy_on_write): + df = DataFrame({"a": [1, 2], "b": "a"}) + other = DataFrame({"b": "a", "a": [1, 2]}) + + df_orig = df.copy() + df2 = df.reindex_like(other) + + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + else: + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + + df2.iloc[0, 1] = 0 + if using_copy_on_write: + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + tm.assert_frame_equal(df, df_orig) + + +def test_reorder_levels(using_copy_on_write): + index = MultiIndex.from_tuples( + [(1, 1), (1, 2), (2, 1), (2, 2)], names=["one", "two"] + ) + df = DataFrame({"a": [1, 2, 3, 4]}, index=index) + df_orig = df.copy() + df2 = df.reorder_levels(order=["two", "one"]) + + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + else: + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + + df2.iloc[0, 0] = 0 + if using_copy_on_write: + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + tm.assert_frame_equal(df, df_orig) + + +def test_frame_set_axis(using_copy_on_write): + # GH 49473 + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0.1, 0.2, 0.3]}) + df_orig = df.copy() + df2 = df.set_axis(["a", "b", "c"], axis="index") + + if using_copy_on_write: + assert np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + else: + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + + # mutating df2 triggers a copy-on-write for that column / block + df2.iloc[0, 0] = 0 + assert not np.shares_memory(get_array(df2, "a"), get_array(df, "a")) + tm.assert_frame_equal(df, df_orig) + + +def test_series_set_axis(using_copy_on_write): + # GH 49473 + ser = Series([1, 2, 3]) + ser_orig = ser.copy() + ser2 = ser.set_axis(["a", "b", "c"], axis="index") + + if using_copy_on_write: + assert np.shares_memory(ser, ser2) + else: + assert not np.shares_memory(ser, ser2) + + # mutating ser triggers a copy-on-write for the column / block + ser2.iloc[0] = 0 + assert not np.shares_memory(ser2, ser) + tm.assert_series_equal(ser, ser_orig) + def test_droplevel(using_copy_on_write): # GH 49473 df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}).set_index(["a","b"]) @@ -293,7 +469,7 @@ def test_droplevel(using_copy_on_write): assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) # mutating df2 triggers a copy-on-write for that column / block - df2.loc["b","c"] = 1 + df2.loc[4,"c"] = 1 assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) tm.assert_frame_equal(df, df_orig) diff --git a/pandas/tests/copy_view/test_util.py b/pandas/tests/copy_view/test_util.py new file mode 100644 index 0000000000000..ff55330d70b28 --- /dev/null +++ b/pandas/tests/copy_view/test_util.py @@ -0,0 +1,14 @@ +import numpy as np + +from pandas import DataFrame +from pandas.tests.copy_view.util import get_array + + +def test_get_array_numpy(): + df = DataFrame({"a": [1, 2, 3]}) + assert np.shares_memory(get_array(df, "a"), get_array(df, "a")) + + +def test_get_array_masked(): + df = DataFrame({"a": [1, 2, 3]}, dtype="Int64") + assert np.shares_memory(get_array(df, "a"), get_array(df, "a")) diff --git a/pandas/tests/extension/base/accumulate.py b/pandas/tests/extension/base/accumulate.py new file mode 100644 index 0000000000000..868172f930844 --- /dev/null +++ b/pandas/tests/extension/base/accumulate.py @@ -0,0 +1,37 @@ +import pytest + +import pandas as pd +from pandas.tests.extension.base.base import BaseExtensionTests + + +class BaseAccumulateTests(BaseExtensionTests): + """ + Accumulation specific tests. Generally these only + make sense for numeric/boolean operations. + """ + + def check_accumulate(self, s, op_name, skipna): + result = getattr(s, op_name)(skipna=skipna) + + if result.dtype == pd.Float32Dtype() and op_name == "cumprod" and skipna: + pytest.skip( + f"Float32 precision lead to large differences with op {op_name} " + f"and skipna={skipna}" + ) + + expected = getattr(s.astype("float64"), op_name)(skipna=skipna) + self.assert_series_equal(result, expected, check_dtype=False) + + @pytest.mark.parametrize("skipna", [True, False]) + def test_accumulate_series_raises(self, data, all_numeric_accumulations, skipna): + op_name = all_numeric_accumulations + ser = pd.Series(data) + + with pytest.raises(NotImplementedError): + getattr(ser, op_name)(skipna=skipna) + + @pytest.mark.parametrize("skipna", [True, False]) + def test_accumulate_series(self, data, all_numeric_accumulations, skipna): + op_name = all_numeric_accumulations + ser = pd.Series(data) + self.check_accumulate(ser, op_name, skipna) diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py new file mode 100644 index 0000000000000..cc3f468349efb --- /dev/null +++ b/pandas/tests/groupby/test_raises.py @@ -0,0 +1,178 @@ +# Only tests that raise an error and have no better location should go here. +# Tests for specific groupby methods should go in their respective +# test file. + +import datetime + +import pytest + +from pandas import DataFrame +from pandas.tests.groupby import get_groupby_method_args + + +@pytest.mark.parametrize("how", ["method", "agg", "transform"]) +def test_groupby_raises_string(how, groupby_func, as_index, sort): + df = DataFrame( + { + "a": [1, 1, 1, 2, 2], + "b": range(5), + "c": list("xyzwt"), + } + ) + args = get_groupby_method_args(groupby_func, df) + gb = df.groupby("a", as_index=as_index, sort=sort) + + klass, msg = { + "all": (None, ""), + "any": (None, ""), + "bfill": (None, ""), + "corrwith": (TypeError, "Could not convert"), + "count": (None, ""), + "cumcount": (None, ""), + "cummax": (NotImplementedError, "function is not implemented for this dtype"), + "cummin": (NotImplementedError, "function is not implemented for this dtype"), + "cumprod": (NotImplementedError, "function is not implemented for this dtype"), + "cumsum": (NotImplementedError, "function is not implemented for this dtype"), + "diff": (TypeError, "unsupported operand type"), + "ffill": (None, ""), + "fillna": (None, ""), + "first": (None, ""), + "idxmax": (TypeError, "'argmax' not allowed for this dtype"), + "idxmin": (TypeError, "'argmin' not allowed for this dtype"), + "last": (None, ""), + "max": (None, ""), + "mean": (TypeError, "Could not convert xyz to numeric"), + "median": (TypeError, "could not convert string to float"), + "min": (None, ""), + "ngroup": (None, ""), + "nunique": (None, ""), + "pct_change": (TypeError, "unsupported operand type"), + "prod": (TypeError, "can't multiply sequence by non-int of type 'str'"), + "quantile": (TypeError, "cannot be performed against 'object' dtypes!"), + "rank": (None, ""), + "sem": (ValueError, "could not convert string to float"), + "shift": (None, ""), + "size": (None, ""), + "skew": (TypeError, "could not convert string to float"), + "std": (ValueError, "could not convert string to float"), + "sum": (None, ""), + "var": (TypeError, "could not convert string to float"), + }[groupby_func] + + if klass is None: + if how == "method": + getattr(gb, groupby_func)(*args) + elif how == "agg": + gb.agg(groupby_func, *args) + else: + gb.transform(groupby_func, *args) + else: + with pytest.raises(klass, match=msg): + if how == "method": + getattr(gb, groupby_func)(*args) + elif how == "agg": + gb.agg(groupby_func, *args) + else: + gb.transform(groupby_func, *args) + + +@pytest.mark.parametrize("how", ["agg", "transform"]) +def test_groupby_raises_string_udf(how): + df = DataFrame( + { + "a": [1, 1, 1, 2, 2], + "b": range(5), + "c": list("xyzwt"), + } + ) + gb = df.groupby("a") + + def func(x): + raise TypeError("Test error message") + + with pytest.raises(TypeError, match="Test error message"): + getattr(gb, how)(func) + + +@pytest.mark.parametrize("how", ["method", "agg", "transform"]) +def test_groupby_raises_datetime(how, groupby_func, as_index, sort): + df = DataFrame( + { + "a": [1, 1, 1, 2, 2], + "b": range(5), + "c": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000), + } + ) + args = get_groupby_method_args(groupby_func, df) + gb = df.groupby("a", as_index=as_index, sort=sort) + + klass, msg = { + "all": (None, ""), + "any": (None, ""), + "bfill": (None, ""), + "corrwith": (TypeError, "cannot perform __mul__ with this index type"), + "count": (None, ""), + "cumcount": (None, ""), + "cummax": (None, ""), + "cummin": (None, ""), + "cumprod": (TypeError, "datetime64 type does not support cumprod operations"), + "cumsum": (TypeError, "datetime64 type does not support cumsum operations"), + "diff": (None, ""), + "ffill": (None, ""), + "fillna": (None, ""), + "first": (None, ""), + "idxmax": (None, ""), + "idxmin": (None, ""), + "last": (None, ""), + "max": (None, ""), + "mean": (None, ""), + "median": (None, ""), + "min": (None, ""), + "ngroup": (None, ""), + "nunique": (None, ""), + "pct_change": (TypeError, "cannot perform __truediv__ with this index type"), + "prod": (TypeError, "datetime64 type does not support prod"), + "quantile": (None, ""), + "rank": (None, ""), + "sem": (TypeError, "Cannot cast DatetimeArray to dtype float64"), + "shift": (None, ""), + "size": (None, ""), + "skew": (TypeError, r"dtype datetime64\[ns\] does not support reduction"), + "std": (TypeError, "Cannot cast DatetimeArray to dtype float64"), + "sum": (TypeError, "datetime64 type does not support sum operations"), + "var": (None, ""), + }[groupby_func] + + if klass is None: + if how == "method": + getattr(gb, groupby_func)(*args) + elif how == "agg": + gb.agg(groupby_func, *args) + else: + gb.transform(groupby_func, *args) + else: + with pytest.raises(klass, match=msg): + if how == "method": + getattr(gb, groupby_func)(*args) + elif how == "agg": + gb.agg(groupby_func, *args) + else: + gb.transform(groupby_func, *args) + + +@pytest.mark.parametrize("how", ["agg", "transform"]) +def test_groupby_raises_datetime_udf(how): + df = DataFrame( + { + "a": [1, 1, 1, 2, 2], + "b": range(5), + "c": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000), + } + ) + gb = df.groupby("a") + + def func(x): + raise TypeError("Test error message") + + with pytest.raises(TypeError, match="Test error message"): + getattr(gb, how)(func) diff --git a/scripts/check_test_naming.py b/scripts/check_test_naming.py new file mode 100644 index 0000000000000..33890feb8692d --- /dev/null +++ b/scripts/check_test_naming.py @@ -0,0 +1,152 @@ +""" +Check that test names start with `test`, and that test classes start with `Test`. + +This is meant to be run as a pre-commit hook - to run it manually, you can do: + + pre-commit run check-test-naming --all-files + +NOTE: if this finds a false positive, you can add the comment `# not a test` to the +class or function definition. Though hopefully that shouldn't be necessary. +""" +from __future__ import annotations + +import argparse +import ast +import os +from pathlib import Path +import sys +from typing import ( + Iterator, + Sequence, +) + +PRAGMA = "# not a test" + + +def _find_names(node: ast.Module) -> Iterator[str]: + for _node in ast.walk(node): + if isinstance(_node, ast.Name): + yield _node.id + elif isinstance(_node, ast.Attribute): + yield _node.attr + + +def _is_fixture(node: ast.expr) -> bool: + if isinstance(node, ast.Call): + node = node.func + return ( + isinstance(node, ast.Attribute) + and node.attr == "fixture" + and isinstance(node.value, ast.Name) + and node.value.id == "pytest" + ) + + +def _is_register_dtype(node): + return isinstance(node, ast.Name) and node.id == "register_extension_dtype" + + +def is_misnamed_test_func( + node: ast.expr | ast.stmt, names: Sequence[str], line: str +) -> bool: + return ( + isinstance(node, ast.FunctionDef) + and not node.name.startswith("test") + and names.count(node.name) == 0 + and not any(_is_fixture(decorator) for decorator in node.decorator_list) + and PRAGMA not in line + and node.name + not in ("teardown_method", "setup_method", "teardown_class", "setup_class") + ) + + +def is_misnamed_test_class( + node: ast.expr | ast.stmt, names: Sequence[str], line: str +) -> bool: + return ( + isinstance(node, ast.ClassDef) + and not node.name.startswith("Test") + and names.count(node.name) == 0 + and not any(_is_register_dtype(decorator) for decorator in node.decorator_list) + and PRAGMA not in line + ) + + +def main(content: str, file: str) -> int: + lines = content.splitlines() + tree = ast.parse(content) + names = list(_find_names(tree)) + ret = 0 + for node in tree.body: + if is_misnamed_test_func(node, names, lines[node.lineno - 1]): + print( + f"{file}:{node.lineno}:{node.col_offset} " + "found test function which does not start with 'test'" + ) + ret = 1 + elif is_misnamed_test_class(node, names, lines[node.lineno - 1]): + print( + f"{file}:{node.lineno}:{node.col_offset} " + "found test class which does not start with 'Test'" + ) + ret = 1 + if ( + isinstance(node, ast.ClassDef) + and names.count(node.name) == 0 + and not any( + _is_register_dtype(decorator) for decorator in node.decorator_list + ) + and PRAGMA not in lines[node.lineno - 1] + ): + for _node in node.body: + if is_misnamed_test_func(_node, names, lines[_node.lineno - 1]): + # It could be that this function is used somewhere by the + # parent class. For example, there might be a base class + # with + # + # class Foo: + # def foo(self): + # assert 1+1==2 + # def test_foo(self): + # self.foo() + # + # and then some subclass overwrites `foo`. So, we check that + # `self.foo` doesn't appear in any of the test classes. + # Note some false negatives might get through, but that's OK. + # This is good enough that has helped identify several examples + # of tests not being run. + assert isinstance(_node, ast.FunctionDef) # help mypy + should_continue = False + for _file in (Path("pandas") / "tests").rglob("*.py"): + with open(os.path.join(_file)) as fd: + _content = fd.read() + if f"self.{_node.name}" in _content: + should_continue = True + break + if should_continue: + continue + + print( + f"{file}:{_node.lineno}:{_node.col_offset} " + "found test function which does not start with 'test'" + ) + ret = 1 + return ret + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("paths", nargs="*") + args = parser.parse_args() + + ret = 0 + + for file in args.paths: + filename = os.path.basename(file) + if not (filename.startswith("test") and filename.endswith(".py")): + continue + with open(file, encoding="utf-8") as fd: + content = fd.read() + ret |= main(content, file) + + sys.exit(ret) diff --git a/scripts/tests/test_check_test_naming.py b/scripts/tests/test_check_test_naming.py new file mode 100644 index 0000000000000..9ddaf2fe2a97d --- /dev/null +++ b/scripts/tests/test_check_test_naming.py @@ -0,0 +1,54 @@ +import pytest + +from scripts.check_test_naming import main + + +@pytest.mark.parametrize( + "src, expected_out, expected_ret", + [ + ( + "def foo(): pass\n", + "t.py:1:0 found test function which does not start with 'test'\n", + 1, + ), + ( + "class Foo:\n def test_foo(): pass\n", + "t.py:1:0 found test class which does not start with 'Test'\n", + 1, + ), + ("def test_foo(): pass\n", "", 0), + ( + "class TestFoo:\n def foo(): pass\n", + "t.py:2:4 found test function which does not start with 'test'\n", + 1, + ), + ("class TestFoo:\n def test_foo(): pass\n", "", 0), + ( + "class Foo:\n def foo(): pass\n", + "t.py:1:0 found test class which does not start with 'Test'\n" + "t.py:2:4 found test function which does not start with 'test'\n", + 1, + ), + ( + "def foo():\n pass\ndef test_foo():\n foo()\n", + "", + 0, + ), + ( + "class Foo: # not a test\n" + " pass\n" + "def test_foo():\n" + " Class.foo()\n", + "", + 0, + ), + ("@pytest.fixture\ndef foo(): pass\n", "", 0), + ("@pytest.fixture()\ndef foo(): pass\n", "", 0), + ("@register_extension_dtype\nclass Foo: pass\n", "", 0), + ], +) +def test_main(capsys, src, expected_out, expected_ret): + ret = main(src, "t.py") + out, _ = capsys.readouterr() + assert out == expected_out + assert ret == expected_ret From bb1e8d6eca13aa74e18e0627d15219bcf5e35cad Mon Sep 17 00:00:00 2001 From: David Leon Date: Thu, 5 Jan 2023 11:34:21 +0100 Subject: [PATCH 3/3] implemented PR comments --- pandas/tests/copy_view/test_methods.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 56285031aca53..a06fc5b8ad5e5 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -598,9 +598,8 @@ def test_tz_convert_localize(using_copy_on_write, func, tz): def test_droplevel(using_copy_on_write): # GH 49473 - df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}).set_index( - ["a", "b"] - ) + index = MultiIndex.from_tuples([(1, 1), (1, 2), (2, 1)], names=["one", "two"]) + df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}, index=index) df_orig = df.copy() df2 = df.droplevel(0) @@ -610,7 +609,7 @@ def test_droplevel(using_copy_on_write): assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) # mutating df2 triggers a copy-on-write for that column / block - df2.loc[4, "c"] = 1 + df2.iloc[0, 0] = 0 assert not np.shares_memory(get_array(df2, "c"), get_array(df, "c")) tm.assert_frame_equal(df, df_orig)