diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2d8f6468aca83..e7d51bb99338a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -153,3 +153,5 @@ jobs: run: | source activate pandas-dev pytest pandas/tests/frame/methods --array-manager + pytest pandas/tests/frame/indexing --array-manager + pytest pandas/tests/indexing --array-manager diff --git a/pandas/conftest.py b/pandas/conftest.py index bc455092ebe86..bf425c63ba456 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -382,11 +382,14 @@ def __len__(self): # Indices # ---------------------------------------------------------------- @pytest.fixture -def multiindex_year_month_day_dataframe_random_data(): +def multiindex_year_month_day_dataframe_random_data(using_array_manager): """ DataFrame with 3 level MultiIndex (year, month, day) covering first 100 business days from 2000-01-01 with random data """ + if using_array_manager: + # TODO(ArrayManager) groupby + pytest.skip("Not yet implemented for ArrayManager") tdf = tm.makeTimeDataFrame(100) ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum() # use Int64Index, to make sure things work diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 63d238da12101..6e51ccb3c9a0e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -621,6 +621,10 @@ def _as_manager(self, typ: str) -> DataFrame: # fastpath of passing a manager doesn't check the option/manager class return DataFrame(new_mgr) + @property + def _has_array_manager(self): + return isinstance(self._mgr, ArrayManager) + # ---------------------------------------------------------------------- @property @@ -3231,7 +3235,9 @@ def _setitem_array(self, key, value): key, axis=1, raise_missing=False )[1] self._check_setitem_copy() - self.iloc[:, indexer] = value + self.iloc._setitem_with_indexer( + (slice(None), indexer), value, name="setitem" + ) def _setitem_frame(self, key, value): # support boolean setting with DataFrame input, e.g. diff --git a/pandas/core/indexers.py b/pandas/core/indexers.py index c7011b4339fe7..47274797c2e4f 100644 --- a/pandas/core/indexers.py +++ b/pandas/core/indexers.py @@ -324,8 +324,8 @@ def length_of_indexer(indexer, target=None) -> int: start, stop = stop + 1, start + 1 step = -step return (stop - start + step - 1) // step - elif isinstance(indexer, (ABCSeries, ABCIndex, np.ndarray, list)): - if isinstance(indexer, list): + elif isinstance(indexer, (ABCSeries, ABCIndex, np.ndarray, list, range)): + if isinstance(indexer, (list, range)): indexer = np.array(indexer) if indexer.dtype == bool: diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index cc7c5f666feda..4b6dec3bb0a87 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1684,7 +1684,9 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str): elif len(ilocs) == 1 and lplane_indexer == len(value) and not is_scalar(pi): # We are setting multiple rows in a single column. - self._setitem_single_column(ilocs[0], value, pi) + self._setitem_single_column( + ilocs[0], value, pi, overwrite=name == "setitem" + ) elif len(ilocs) == 1 and 0 != lplane_indexer != len(value): # We are trying to set N values into M entries of a single @@ -1708,7 +1710,7 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str): elif len(ilocs) == len(value): # We are setting multiple columns in a single row. for loc, v in zip(ilocs, value): - self._setitem_single_column(loc, v, pi) + self._setitem_single_column(loc, v, pi, overwrite=name == "setitem") elif len(ilocs) == 1 and com.is_null_slice(pi) and len(self.obj) == 0: # This is a setitem-with-expansion, see @@ -1728,7 +1730,7 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str): # scalar value for loc in ilocs: - self._setitem_single_column(loc, value, pi) + self._setitem_single_column(loc, value, pi, overwrite=name == "setitem") def _setitem_with_indexer_2d_value(self, indexer, value): # We get here with np.ndim(value) == 2, excluding DataFrame, @@ -1797,7 +1799,7 @@ def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str self._setitem_single_column(loc, val, pi) - def _setitem_single_column(self, loc: int, value, plane_indexer): + def _setitem_single_column(self, loc: int, value, plane_indexer, overwrite=True): """ Parameters @@ -1806,7 +1808,14 @@ def _setitem_single_column(self, loc: int, value, plane_indexer): Indexer for column position plane_indexer : int, slice, listlike[int] The indexer we use for setitem along axis=0. + overwrite : bool + Whether to overwrite the original column, or update the existing + column inplace """ + if not overwrite and self.obj._has_array_manager: + self.obj._mgr.setitem(plane_indexer, value, loc) + return + pi = plane_indexer ser = self.obj._ixs(loc, axis=1) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index a8493e647f39a..b4c916ae86b29 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -14,9 +14,11 @@ from pandas.core.dtypes.cast import find_common_type, infer_dtype_from_scalar from pandas.core.dtypes.common import ( is_bool_dtype, + is_datetime64_dtype, is_dtype_equal, is_extension_array_dtype, is_numeric_dtype, + is_scalar, ) from pandas.core.dtypes.dtypes import ExtensionDtype, PandasDtype from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries @@ -339,9 +341,24 @@ def where(self, other, cond, align: bool, errors: str, axis: int) -> ArrayManage axis=axis, ) - # TODO what is this used for? - # def setitem(self, indexer, value) -> ArrayManager: - # return self.apply_with_block("setitem", indexer=indexer, value=value) + def setitem(self, indexer, value, column_idx) -> ArrayManager: + """ + Set value for a single column and a given row indexer. For example, from + ``df.loc[indexer] = value`` + """ + arr = self.arrays[column_idx] + + # TODO this special case can be removed once we only store EAs + # special case to support setting np.nan in a non-float numpy array + if ( + isinstance(arr, np.ndarray) + and is_datetime64_dtype(arr.dtype) + and is_scalar(value) + and isna(value) + ): + value = np.datetime64("NaT", "ns") + + arr[indexer] = value def putmask(self, mask, new, align: bool = True): @@ -454,7 +471,7 @@ def is_mixed_type(self) -> bool: @property def is_numeric_mixed_type(self) -> bool: - return False + return all(is_numeric_dtype(t) for t in self.get_dtypes()) @property def any_extension_types(self) -> bool: @@ -625,7 +642,14 @@ def fast_xs(self, loc: int) -> ArrayLike: else: temp_dtype = dtype - result = np.array([arr[loc] for arr in self.arrays], dtype=temp_dtype) + if dtype == "object": + # TODO properly test this, check + # pandas/tests/indexing/test_chaining_and_caching.py::TestChaining + # ::test_chained_getitem_with_lists + result = np.empty(self.shape_proper[1], dtype=dtype) + result[:] = [arr[loc] for arr in self.arrays] + else: + result = np.array([arr[loc] for arr in self.arrays], dtype=temp_dtype) if isinstance(dtype, ExtensionDtype): result = dtype.construct_array_type()._from_sequence(result, dtype=dtype) return result diff --git a/pandas/tests/frame/indexing/test_categorical.py b/pandas/tests/frame/indexing/test_categorical.py index b3e0783d7388f..b0ef6c1004bd4 100644 --- a/pandas/tests/frame/indexing/test_categorical.py +++ b/pandas/tests/frame/indexing/test_categorical.py @@ -243,16 +243,22 @@ def test_setitem_mask_categorical(self, exp_multi_row): # category c is kept in .categories tm.assert_frame_equal(df, exp_fancy) - def test_loc_setitem_categorical_values_partial_column_slice(self): + def test_loc_setitem_categorical_values_partial_column_slice( + self, using_array_manager + ): # Assigning a Category to parts of a int/... column uses the values of # the Categorical df = DataFrame({"a": [1, 1, 1, 1, 1], "b": list("aaaaa")}) exp = DataFrame({"a": [1, "b", "b", 1, 1], "b": list("aabba")}) - df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"]) - df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"]) - tm.assert_frame_equal(df, exp) - - def test_loc_setitem_single_row_categorical(self): + if using_array_manager: + with pytest.raises(ValueError, match=""): + df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"]) + else: + df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"]) + df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"]) + tm.assert_frame_equal(df, exp) + + def test_loc_setitem_single_row_categorical(self, using_array_manager): # GH 25495 df = DataFrame({"Alpha": ["a"], "Numeric": [0]}) categories = Categorical(df["Alpha"], categories=["a", "b", "c"]) @@ -260,6 +266,9 @@ def test_loc_setitem_single_row_categorical(self): result = df["Alpha"] expected = Series(categories, index=df.index, name="Alpha") + if using_array_manager: + # with ArrayManager the object dtype is preserved + expected = expected.astype(object) tm.assert_series_equal(result, expected) def test_loc_indexing_preserves_index_category_dtype(self): diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 6808ffe65e561..843e87381a703 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -5,6 +5,7 @@ import pytest from pandas._libs import iNaT +import pandas.util._test_decorators as td from pandas.core.dtypes.common import is_integer @@ -206,6 +207,8 @@ def test_setitem_list_missing_columns(self, columns, box, expected): df[columns] = box tm.assert_frame_equal(df, expected) + # TODO(ArrayManager) iset with multiple elements not yet implemented + @td.skip_array_manager_not_yet_implemented def test_setitem_multi_index(self): # GH7655, test that assigning to a sub-frame of a frame # with multi-index columns aligns both rows and columns @@ -475,6 +478,8 @@ def test_setitem(self, float_frame): assert smaller["col10"].dtype == np.object_ assert (smaller["col10"] == ["1", "2"]).all() + @td.skip_array_manager_invalid_test # valid version below with nullable dtypes + def test_setitem_dtype_change(self): # dtype changing GH4204 df = DataFrame([[0, 0]]) df.iloc[0] = np.nan @@ -485,6 +490,18 @@ def test_setitem(self, float_frame): df.loc[0] = np.nan tm.assert_frame_equal(df, expected) + def test_setitem_nullable_nan(self): + # version of the test above using nullable dtypes (so that setting + # NaN can preserve the dtype) + df = DataFrame([[0, 0]], dtype="Int64") + df.iloc[0] = np.nan + expected = DataFrame([[np.nan, np.nan]], dtype="Int64") + tm.assert_frame_equal(df, expected) + + df = DataFrame([[0, 0]], dtype="Int64") + df.loc[0] = np.nan + tm.assert_frame_equal(df, expected) + def test_setitem_tuple(self, float_frame): float_frame["A", "B"] = float_frame["A"] assert ("A", "B") in float_frame.columns @@ -579,6 +596,8 @@ def test_setitem_cast(self, float_frame): float_frame["something"] = 2.5 assert float_frame["something"].dtype == np.float64 + @td.skip_array_manager_invalid_test + def test_setitem_cast_object(self): # GH 7704 # dtype conversion on setting df = DataFrame(np.random.rand(30, 3), columns=tuple("ABC")) @@ -591,6 +610,7 @@ def test_setitem_cast(self, float_frame): ) tm.assert_series_equal(result, expected) + def test_setitem_preserve_int8_dtype(self): # Test that data type is preserved . #5782 df = DataFrame({"one": np.arange(6, dtype=np.int8)}) df.loc[1, "one"] = 6 @@ -603,7 +623,10 @@ def test_setitem_boolean_column(self, float_frame): mask = float_frame["A"] > 0 float_frame.loc[mask, "B"] = 0 - expected.values[mask.values, 1] = 0 + + vals = float_frame.to_numpy() + vals[mask.values, 1] = 0 + expected = DataFrame(vals, index=expected.index, columns=expected.columns) tm.assert_frame_equal(float_frame, expected) @@ -693,7 +716,7 @@ def test_setitem_ambig(self): assert len(dm.columns) == 3 assert dm[2].dtype == np.object_ - def test_setitem_clear_caches(self): + def test_setitem_clear_caches(self, using_array_manager): # see gh-304 df = DataFrame( {"x": [1.1, 2.1, 3.1, 4.1], "y": [5.1, 6.1, 7.1, 8.1]}, index=[0, 1, 2, 3] @@ -706,7 +729,10 @@ def test_setitem_clear_caches(self): expected = Series([np.nan, np.nan, 42, 42], index=df.index, name="z") - assert df["z"] is not foo + if not using_array_manager: + # with ArrayManager, the values are updated in place in the array + # so the Series can be the same (? good explanation ?) + assert df["z"] is not foo tm.assert_series_equal(df["z"], expected) def test_setitem_None(self, float_frame): @@ -799,6 +825,7 @@ def test_getitem_setitem_integer_slice_keyerrors(self): with pytest.raises(KeyError, match=r"^3$"): df2.loc[3:11] = 0 + @td.skip_array_manager_invalid_test # already covered in test_iloc_col_slice_view def test_fancy_getitem_slice_mixed(self, float_frame, float_string_frame): sliced = float_string_frame.iloc[:, -3:] assert sliced["D"].dtype == np.float64 @@ -857,6 +884,7 @@ def test_getitem_fancy_scalar(self, float_frame): for idx in f.index[::5]: assert ix[idx, col] == ts[idx] + @td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values def test_setitem_fancy_scalar(self, float_frame): f = float_frame expected = float_frame.copy() @@ -896,6 +924,7 @@ def test_getitem_fancy_boolean(self, float_frame): expected = f.reindex(index=f.index[boolvec], columns=["C", "D"]) tm.assert_frame_equal(result, expected) + @td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values def test_setitem_fancy_boolean(self, float_frame): # from 2d, set with booleans frame = float_frame.copy() @@ -1056,7 +1085,7 @@ def test_setitem_single_column_mixed(self): expected = np.array([np.nan, "qux", np.nan, "qux", np.nan], dtype=object) tm.assert_almost_equal(df["str"].values, expected) - def test_setitem_single_column_mixed_datetime(self): + def test_setitem_single_column_mixed_datetime(self, using_array_manager): df = DataFrame( np.random.randn(5, 3), index=["a", "b", "c", "d", "e"], @@ -1074,10 +1103,12 @@ def test_setitem_single_column_mixed_datetime(self): tm.assert_series_equal(result, expected) # GH#16674 iNaT is treated as an integer when given by the user - df.loc["b", "timestamp"] = iNaT - assert not isna(df.loc["b", "timestamp"]) - assert df["timestamp"].dtype == np.object_ - assert df.loc["b", "timestamp"] == iNaT + if not using_array_manager: + # TODO(ArrayManager) setting iNaT in DatetimeArray actually sets NaT + df.loc["b", "timestamp"] = iNaT + assert not isna(df.loc["b", "timestamp"]) + assert df["timestamp"].dtype == np.object_ + assert df.loc["b", "timestamp"] == iNaT # allow this syntax df.loc["c", "timestamp"] = np.nan @@ -1093,6 +1124,7 @@ def test_setitem_single_column_mixed_datetime(self): # pytest.raises( # Exception, df.loc.__setitem__, ('d', 'timestamp'), [np.nan]) + @td.skip_array_manager_invalid_test def test_setitem_mixed_datetime(self): # GH 9336 expected = DataFrame( @@ -1282,21 +1314,28 @@ def test_iloc_row(self): expected = df.loc[8:14] tm.assert_frame_equal(result, expected) + # list of integers + result = df.iloc[[1, 2, 4, 6]] + expected = df.reindex(df.index[[1, 2, 4, 6]]) + tm.assert_frame_equal(result, expected) + + def test_iloc_row_slice_view(self, using_array_manager): + df = DataFrame(np.random.randn(10, 4), index=range(0, 20, 2)) + # verify slice is view # setting it makes it raise/warn + subset = df.iloc[slice(4, 8)] + msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" with pytest.raises(com.SettingWithCopyError, match=msg): - result[2] = 0.0 + subset[2] = 0.0 exp_col = df[2].copy() - exp_col[4:8] = 0.0 + if not using_array_manager: + # TODO(ArrayManager) verify it is expected that the original didn't change + exp_col[4:8] = 0.0 tm.assert_series_equal(df[2], exp_col) - # list of integers - result = df.iloc[[1, 2, 4, 6]] - expected = df.reindex(df.index[[1, 2, 4, 6]]) - tm.assert_frame_equal(result, expected) - def test_iloc_col(self): df = DataFrame(np.random.randn(4, 10), columns=range(0, 20, 2)) @@ -1314,19 +1353,32 @@ def test_iloc_col(self): expected = df.loc[:, 8:14] tm.assert_frame_equal(result, expected) - # verify slice is view - # and that we are setting a copy - msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" - with pytest.raises(com.SettingWithCopyError, match=msg): - result[8] = 0.0 - - assert (df[8] == 0).all() - # list of integers result = df.iloc[:, [1, 2, 4, 6]] expected = df.reindex(columns=df.columns[[1, 2, 4, 6]]) tm.assert_frame_equal(result, expected) + def test_iloc_col_slice_view(self, using_array_manager): + df = DataFrame(np.random.randn(4, 10), columns=range(0, 20, 2)) + original = df.copy() + subset = df.iloc[:, slice(4, 8)] + + if not using_array_manager: + # verify slice is view + # and that we are setting a copy + msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): + subset[8] = 0.0 + + assert (df[8] == 0).all() + else: + # TODO(ArrayManager) verify this is the desired behaviour + subset[8] = 0.0 + # subset changed + assert (subset[8] == 0).all() + # but df itself did not change (setitem replaces full column) + tm.assert_frame_equal(df, original) + def test_iloc_duplicates(self): df = DataFrame(np.random.rand(3, 3), columns=list("ABC"), index=list("aab")) @@ -1422,17 +1474,21 @@ def test_setitem_with_unaligned_tz_aware_datetime_column(self): df.loc[[0, 1, 2], "dates"] = column[[1, 0, 2]] tm.assert_series_equal(df["dates"], column) - def test_loc_setitem_datetime_coercion(self): + def test_loc_setitem_datetime_coercion(self, using_array_manager): # gh-1048 df = DataFrame({"c": [Timestamp("2010-10-01")] * 3}) df.loc[0:1, "c"] = np.datetime64("2008-08-08") assert Timestamp("2008-08-08") == df.loc[0, "c"] assert Timestamp("2008-08-08") == df.loc[1, "c"] - df.loc[2, "c"] = date(2005, 5, 5) - with tm.assert_produces_warning(FutureWarning): - # Comparing Timestamp to date obj is deprecated - assert Timestamp("2005-05-05") == df.loc[2, "c"] - assert Timestamp("2005-05-05").date() == df.loc[2, "c"] + if using_array_manager: + with pytest.raises(TypeError, match=""): + df.loc[2, "c"] = date(2005, 5, 5) + else: + df.loc[2, "c"] = date(2005, 5, 5) + with tm.assert_produces_warning(FutureWarning): + # Comparing Timestamp to date obj is deprecated + assert Timestamp("2005-05-05") == df.loc[2, "c"] + assert Timestamp("2005-05-05").date() == df.loc[2, "c"] def test_loc_setitem_datetimelike_with_inference(self): # GH 7592 @@ -1623,6 +1679,7 @@ def test_getitem_boolean_indexing_mixed(self): with pytest.raises(TypeError, match=msg): df[df > 0.3] = 1 + @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) groupby def test_type_error_multiindex(self): # See gh-12218 df = DataFrame( @@ -1702,7 +1759,7 @@ def test_setitem_unsorted_multiindex_columns(self, indexer): class TestDataFrameIndexingUInt64: - def test_setitem(self, uint64_frame): + def test_setitem(self, uint64_frame, using_array_manager): df = uint64_frame idx = df["A"].rename("foo") @@ -1719,6 +1776,11 @@ def test_setitem(self, uint64_frame): # With NaN: because uint64 has no NaN element, # the column should be cast to object. df2 = df.copy() + if using_array_manager: + # with ArrayManager we raise in this case + with pytest.raises(TypeError, match=""): + df2.iloc[1, 1] = pd.NaT + return df2.iloc[1, 1] = pd.NaT df2.iloc[1, 2] = pd.NaT result = df2["B"] @@ -1732,7 +1794,7 @@ def test_setitem(self, uint64_frame): ) -def test_object_casting_indexing_wraps_datetimelike(): +def test_object_casting_indexing_wraps_datetimelike(using_array_manager): # GH#31649, check the indexing methods all the way down the stack df = DataFrame( { @@ -1754,6 +1816,10 @@ def test_object_casting_indexing_wraps_datetimelike(): assert isinstance(ser.values[1], Timestamp) assert isinstance(ser.values[2], pd.Timedelta) + if using_array_manager: + # remainder of the test checking BlockManager internals + return + mgr = df._mgr mgr._rebuild_blknos_and_blklocs() arr = mgr.fast_xs(0) diff --git a/pandas/tests/frame/indexing/test_insert.py b/pandas/tests/frame/indexing/test_insert.py index 6e4deb5469777..3cfcba3903187 100644 --- a/pandas/tests/frame/indexing/test_insert.py +++ b/pandas/tests/frame/indexing/test_insert.py @@ -69,11 +69,17 @@ def test_insert_with_columns_dups(self): ) tm.assert_frame_equal(df, exp) - def test_insert_item_cache(self): + def test_insert_item_cache(self, using_array_manager): df = DataFrame(np.random.randn(4, 3)) ser = df[0] - with tm.assert_produces_warning(PerformanceWarning): + if using_array_manager: + expected_warning = None + else: + # with BlockManager warn about high fragmentation of single dtype + expected_warning = PerformanceWarning + + with tm.assert_produces_warning(expected_warning): for n in range(100): df[n + 3] = df[1] * n diff --git a/pandas/tests/frame/indexing/test_set_value.py b/pandas/tests/frame/indexing/test_set_value.py index 84def57f6b6e0..746bad42d14ed 100644 --- a/pandas/tests/frame/indexing/test_set_value.py +++ b/pandas/tests/frame/indexing/test_set_value.py @@ -1,6 +1,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas.core.dtypes.common import is_float_dtype from pandas import DataFrame, isna @@ -13,6 +15,8 @@ def test_set_value(self, float_frame): float_frame._set_value(idx, col, 1) assert float_frame[col][idx] == 1 + # TODO(ArrayManager) set_value with resize should allow changing new float column + @td.skip_array_manager_not_yet_implemented def test_set_value_resize(self, float_frame): res = float_frame._set_value("foobar", "B", 0) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 9318764a1b5ad..3ba0889526819 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -1,6 +1,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas.core.dtypes.base import registry as ea_registry from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype, PeriodDtype @@ -35,6 +37,7 @@ def test_setitem_list_not_dataframe(self, float_frame): float_frame[["A", "B"]] = data tm.assert_almost_equal(float_frame[["A", "B"]].values, data) + @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) groupby def test_setitem_error_msmgs(self): # GH 7432 @@ -264,6 +267,7 @@ def test_frame_setitem_existing_datetime64_col_other_units(self, unit): df["dates"] = vals assert (df["dates"].values == ex_vals).all() + @td.skip_array_manager_invalid_test def test_setitem_dt64tz(self, timezone_frame): df = timezone_frame @@ -348,7 +352,9 @@ def test_setitem_frame_length_0_str_key(self, indexer): expected["A"] = expected["A"].astype("object") tm.assert_frame_equal(df, expected) - def test_setitem_frame_duplicate_columns(self): + # TODO(ArrayManager) iset with multiple elements not yet implemented + @td.skip_array_manager_not_yet_implemented + def test_setitem_frame_duplicate_columns(self, using_array_manager): # GH#15695 cols = ["A", "B", "C"] * 2 df = DataFrame(index=range(3), columns=cols) @@ -364,6 +370,8 @@ def test_setitem_frame_duplicate_columns(self): columns=cols, dtype="object", ) + if using_array_manager: + expected["C"] = expected["C"].astype("int64") tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("cols", [["a", "b", "c"], ["a", "a", "a"]]) @@ -375,6 +383,8 @@ def test_setitem_df_wrong_column_number(self, cols): with pytest.raises(ValueError, match=msg): df["a"] = rhs + # TODO(ArrayManager) iset with multiple elements not yet implemented + @td.skip_array_manager_not_yet_implemented def test_setitem_listlike_indexer_duplicate_columns(self): # GH#38604 df = DataFrame([[1, 2, 3]], columns=["a", "b", "b"]) @@ -452,6 +462,7 @@ def test_setitem_callable(self): class TestDataFrameSetItemBooleanMask: + @td.skip_array_manager_invalid_test # TODO(ArrayManager) rewrite not using .values @pytest.mark.parametrize( "mask_type", [lambda df: df > np.abs(df) / 2, lambda df: (df > np.abs(df) / 2).values], diff --git a/pandas/tests/frame/indexing/test_xs.py b/pandas/tests/frame/indexing/test_xs.py index 3be3ce15622b4..e10cef5ca6a9b 100644 --- a/pandas/tests/frame/indexing/test_xs.py +++ b/pandas/tests/frame/indexing/test_xs.py @@ -3,6 +3,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas import DataFrame, Index, IndexSlice, MultiIndex, Series, concat import pandas._testing as tm import pandas.core.common as com @@ -102,14 +104,20 @@ def test_xs_keep_level(self): result = df.xs([2008, "sat"], level=["year", "day"], drop_level=False) tm.assert_frame_equal(result, expected) - def test_xs_view(self): + def test_xs_view(self, using_array_manager): # in 0.14 this will return a view if possible a copy otherwise, but # this is numpy dependent dm = DataFrame(np.arange(20.0).reshape(4, 5), index=range(4), columns=range(5)) - dm.xs(2)[:] = 10 - assert (dm.xs(2) == 10).all() + if using_array_manager: + msg = r"\nA value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): + dm.xs(2)[:] = 20 + assert not (dm.xs(2) == 20).any() + else: + dm.xs(2)[:] = 20 + assert (dm.xs(2) == 20).all() class TestXSWithMultiIndex: @@ -320,6 +328,7 @@ def test_xs_droplevel_false(self): expected = DataFrame({"a": [1]}) tm.assert_frame_equal(result, expected) + @td.skip_array_manager_invalid_test def test_xs_droplevel_false_view(self): # GH#37832 df = DataFrame([[1, 2, 3]], columns=Index(["a", "b", "c"])) diff --git a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py index 62c0171fe641f..5ecd6b5ad665a 100644 --- a/pandas/tests/indexing/multiindex/test_chaining_and_caching.py +++ b/pandas/tests/indexing/multiindex/test_chaining_and_caching.py @@ -1,6 +1,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas import DataFrame, MultiIndex, Series import pandas._testing as tm import pandas.core.common as com @@ -28,6 +30,7 @@ def test_detect_chained_assignment(): zed["eyes"]["right"].fillna(value=555, inplace=True) +@td.skip_array_manager_invalid_test # with ArrayManager df.loc[0] is not a view def test_cache_updating(): # 5216 # make sure that we don't try to set a dead cache diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index e5d114d5a9b18..4f8107b93d8df 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -1,6 +1,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + import pandas as pd from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range, isna, notna import pandas._testing as tm @@ -116,6 +118,9 @@ def check(target, indexers, value, compare_fn, expected=None): expected=copy, ) + # TODO(ArrayManager) df.loc["bar"] *= 2 doesn't raise an error but results in + # all NaNs -> doesn't work in the "split" path (also for BlockManager actually) + @td.skip_array_manager_not_yet_implemented def test_multiindex_setitem(self): # GH 3738 @@ -170,7 +175,7 @@ def test_multiindex_setitem(self): df.loc[idx[:, :, "Stock"], "price"] *= 2 tm.assert_frame_equal(df, expected) - def test_multiindex_assignment(self): + def test_multiindex_assignment(self, using_array_manager): # GH3777 part 2 @@ -195,11 +200,15 @@ def test_multiindex_assignment(self): df.loc[4, "c"] = arr exp = Series(arr, index=[8, 10], name="c", dtype="float64") + if using_array_manager: + exp = exp.astype("int64") tm.assert_series_equal(df.loc[4, "c"], exp) # scalar ok df.loc[4, "c"] = 10 exp = Series(10, index=[8, 10], name="c", dtype="float64") + if using_array_manager: + exp = exp.astype("int64") tm.assert_series_equal(df.loc[4, "c"], exp) # invalid assignments @@ -312,6 +321,8 @@ def test_frame_getitem_setitem_multislice(self): df.loc[:, :] = 10 tm.assert_frame_equal(df, result) + # TODO(ArrayManager) iset with multiple elements not yet implemented + @td.skip_array_manager_not_yet_implemented def test_frame_setitem_multi_column(self): df = DataFrame( np.random.randn(10, 4), columns=[["a", "a", "b", "b"], [0, 1, 0, 1]] @@ -417,6 +428,8 @@ def test_nonunique_assignment_1750(self): assert (df.xs((1, 1))["C"] == "_").all() + # TODO(ArrayManager) iset with multiple elements not yet implemented + @td.skip_array_manager_not_yet_implemented def test_astype_assignment_with_dups(self): # GH 4686 @@ -439,6 +452,8 @@ def test_setitem_nonmonotonic(self): tm.assert_frame_equal(df, expected) +@td.skip_array_manager_invalid_test # df["foo"] select multiple columns -> .values +# is not a view def test_frame_setitem_view_direct(multiindex_dataframe_random_data): # this works because we are modifying the underlying array # really a no-no diff --git a/pandas/tests/indexing/test_at.py b/pandas/tests/indexing/test_at.py index fbf33999386e6..27573411f6003 100644 --- a/pandas/tests/indexing/test_at.py +++ b/pandas/tests/indexing/test_at.py @@ -7,14 +7,19 @@ import pandas._testing as tm -def test_at_timezone(): +def test_at_timezone(using_array_manager): # https://github.com/pandas-dev/pandas/issues/33544 result = DataFrame({"foo": [datetime(2000, 1, 1)]}) - result.at[0, "foo"] = datetime(2000, 1, 2, tzinfo=timezone.utc) - expected = DataFrame( - {"foo": [datetime(2000, 1, 2, tzinfo=timezone.utc)]}, dtype=object - ) - tm.assert_frame_equal(result, expected) + if using_array_manager: + # TODO(ArrayManager) this should give a better error message + with pytest.raises(TypeError, match="tz-naive and tz-aware"): + result.at[0, "foo"] = datetime(2000, 1, 2, tzinfo=timezone.utc) + else: + result.at[0, "foo"] = datetime(2000, 1, 2, tzinfo=timezone.utc) + expected = DataFrame( + {"foo": [datetime(2000, 1, 2, tzinfo=timezone.utc)]}, dtype=object + ) + tm.assert_frame_equal(result, expected) class TestAtSetItem: diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 1ac2a16660f93..8d91ee3b6acd5 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -163,24 +163,28 @@ def test_detect_chained_assignment(self): tm.assert_frame_equal(df, expected) @pytest.mark.arm_slow - def test_detect_chained_assignment_raises(self): + def test_detect_chained_assignment_raises(self, using_array_manager): # test with the chaining df = DataFrame( { "A": Series(range(2), dtype="int64"), - "B": np.array(np.arange(2, 4), dtype=np.float64), + "B": np.array(np.arange(1, 4, 2), dtype=np.float64), } ) assert df._is_copy is None - with pytest.raises(com.SettingWithCopyError, match=msg): - df["A"][0] = -5 - - with pytest.raises(com.SettingWithCopyError, match=msg): - df["A"][1] = np.nan + if not using_array_manager: + with pytest.raises(com.SettingWithCopyError, match=msg): + df["A"][0] = -5 - assert df["A"]._is_copy is None + assert df["A"]._is_copy is None + else: + df["A"][0] = -5 + df["A"][1] = -6 + expected = DataFrame([[-5, 1], [-6, 3]], columns=list("AB")) + expected["B"] = expected["B"].astype("float64") + tm.assert_frame_equal(df, expected) @pytest.mark.arm_slow def test_detect_chained_assignment_fails(self): @@ -213,18 +217,22 @@ def test_detect_chained_assignment_doc_example(self): df[indexer]["c"] = 42 @pytest.mark.arm_slow - def test_detect_chained_assignment_object_dtype(self): + def test_detect_chained_assignment_object_dtype(self, using_array_manager): expected = DataFrame({"A": [111, "bbb", "ccc"], "B": [1, 2, 3]}) df = DataFrame({"A": ["aaa", "bbb", "ccc"], "B": [1, 2, 3]}) - with pytest.raises(com.SettingWithCopyError, match=msg): + if not using_array_manager: + with pytest.raises(com.SettingWithCopyError, match=msg): + df["A"][0] = 111 + + df.loc[0, "A"] = 111 + else: df["A"][0] = 111 with pytest.raises(com.SettingWithCopyError, match=msg): df.loc[0]["A"] = 111 - df.loc[0, "A"] = 111 tm.assert_frame_equal(df, expected) @pytest.mark.arm_slow @@ -273,7 +281,7 @@ def test_detect_chained_assignment_implicit_take(self): df["letters"] = df["letters"].apply(str.lower) @pytest.mark.arm_slow - def test_detect_chained_assignment_implicit_take2(self): + def test_detect_chained_assignment_implicit_take2(self, using_array_manager): # Implicitly take 2 df = random_text(100000) @@ -284,10 +292,12 @@ def test_detect_chained_assignment_implicit_take2(self): df.loc[:, "letters"] = df["letters"].apply(str.lower) # Should be ok even though it's a copy! - assert df._is_copy is None + if not using_array_manager: + assert df._is_copy is None df["letters"] = df["letters"].apply(str.lower) - assert df._is_copy is None + if not using_array_manager: + assert df._is_copy is None @pytest.mark.arm_slow def test_detect_chained_assignment_str(self): @@ -341,7 +351,7 @@ def test_detect_chained_assignment_undefined_column(self): df.iloc[0:5]["group"] = "a" @pytest.mark.arm_slow - def test_detect_chained_assignment_changing_dtype(self): + def test_detect_chained_assignment_changing_dtype(self, using_array_manager): # Mixed type setting but same dtype & changing dtype df = DataFrame( @@ -359,8 +369,9 @@ def test_detect_chained_assignment_changing_dtype(self): with pytest.raises(com.SettingWithCopyError, match=msg): df.loc[2]["C"] = "foo" - with pytest.raises(com.SettingWithCopyError, match=msg): - df["C"][2] = "foo" + if not using_array_manager: + with pytest.raises(com.SettingWithCopyError, match=msg): + df["C"][2] = "foo" def test_setting_with_copy_bug(self): diff --git a/pandas/tests/indexing/test_datetime.py b/pandas/tests/indexing/test_datetime.py index 44a5e2ae6d9e9..eb1c2139d02ef 100644 --- a/pandas/tests/indexing/test_datetime.py +++ b/pandas/tests/indexing/test_datetime.py @@ -7,7 +7,7 @@ class TestDatetimeIndex: - def test_indexing_with_datetime_tz(self): + def test_indexing_with_datetime_tz(self, using_array_manager): # GH#8260 # support datetime64 with tz @@ -65,10 +65,14 @@ def test_indexing_with_datetime_tz(self): # trying to set a single element on a part of a different timezone # this converts to object df2 = df.copy() - df2.loc[df2.new_col == "new", "time"] = v - - expected = Series([v[0], df.loc[1, "time"]], name="time") - tm.assert_series_equal(df2.time, expected) + if using_array_manager: + with pytest.raises(ValueError, match="Timezones don't match"): + df2.loc[df2.new_col == "new", "time"] = v + else: + df2.loc[df2.new_col == "new", "time"] = v + + expected = Series([v[0], df.loc[1, "time"]], name="time") + tm.assert_series_equal(df2.time, expected) v = df.loc[df.new_col == "new", "time"] + pd.Timedelta("1s") df.loc[df.new_col == "new", "time"] = v diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 1668123e782ff..f8a8934ec5ebf 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -7,6 +7,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas import ( Categorical, CategoricalDtype, @@ -77,7 +79,7 @@ class TestiLocBaseIndependent: ], ) @pytest.mark.parametrize("indexer", [tm.loc, tm.iloc]) - def test_iloc_setitem_fullcol_categorical(self, indexer, key): + def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manager): frame = DataFrame({0: range(3)}, dtype=object) cat = Categorical(["alpha", "beta", "gamma"]) @@ -85,8 +87,10 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key): # NB: pending GH#38896, the expected likely should become # expected= DataFrame({"A": cat.astype(object)}) # and should remain a view on the original values - - assert frame._mgr.blocks[0]._can_hold_element(cat) + if using_array_manager: + expected = expected.astype(object) + else: + assert frame._mgr.blocks[0]._can_hold_element(cat) df = frame.copy() orig_vals = df.values @@ -97,7 +101,7 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key): tm.assert_frame_equal(df, expected) # TODO: this inconsistency is likely undesired GH#39986 - if overwrite: + if overwrite and not using_array_manager: # check that we overwrote underlying tm.assert_numpy_array_equal(orig_vals, df.values) @@ -110,12 +114,15 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key): assert cat[0] != "gamma" @pytest.mark.parametrize("box", [pd_array, Series]) - def test_iloc_setitem_ea_inplace(self, frame_or_series, box): + def test_iloc_setitem_ea_inplace(self, frame_or_series, box, using_array_manager): # GH#38952 Case with not setting a full column # IntegerArray without NAs arr = pd_array([1, 2, 3, 4]) obj = frame_or_series(arr.to_numpy("i8")) - values = obj.values + if frame_or_series is Series or not using_array_manager: + values = obj.values + else: + values = obj[0].values obj.iloc[:2] = box(arr[2:]) expected = frame_or_series(np.array([3, 4, 3, 4], dtype="i8")) @@ -125,7 +132,10 @@ def test_iloc_setitem_ea_inplace(self, frame_or_series, box): if frame_or_series is Series: assert obj.values is values else: - assert obj.values.base is values.base and values.base is not None + if using_array_manager: + assert obj[0].values is values + else: + assert obj.values.base is values.base and values.base is not None def test_is_scalar_access(self): # GH#32085 index with duplicates doesnt matter for _is_scalar_access @@ -497,13 +507,16 @@ def test_iloc_setitem_dups(self): df.iloc[[1, 0], [0, 1]] = df.iloc[[1, 0], [0, 1]].reset_index(drop=True) tm.assert_frame_equal(df, expected) - def test_iloc_setitem_frame_duplicate_columns_multiple_blocks(self): + def test_iloc_setitem_frame_duplicate_columns_multiple_blocks( + self, using_array_manager + ): # Same as the "assign back to self" check in test_iloc_setitem_dups # but on a DataFrame with multiple blocks df = DataFrame([[0, 1], [2, 3]], columns=["B", "B"]) df.iloc[:, 0] = df.iloc[:, 0].astype("f8") - assert len(df._mgr.blocks) == 2 + if not using_array_manager: + assert len(df._mgr.blocks) == 2 expected = df.copy() # assign back to self @@ -593,6 +606,7 @@ def test_iloc_getitem_labelled_frame(self): with pytest.raises(ValueError, match=msg): df.iloc["j", "D"] + @td.skip_array_manager_not_yet_implemented # TODO(ArrayManager) quantile/describe def test_iloc_getitem_doc_issue(self): # multi axis slicing issue with single block @@ -686,7 +700,7 @@ def test_iloc_setitem_list_of_lists(self): tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("indexer", [[0], slice(None, 1, None), np.array([0])]) - @pytest.mark.parametrize("value", [["Z"], np.array(["Z"])]) + @pytest.mark.parametrize("value", [[10], np.array([10])]) def test_iloc_setitem_with_scalar_index(self, indexer, value): # GH #19474 # assigning like "df.iloc[0, [0]] = ['Z']" should be evaluated @@ -696,6 +710,38 @@ def test_iloc_setitem_with_scalar_index(self, indexer, value): df.iloc[0, indexer] = value result = df.iloc[0, 0] + assert is_scalar(result) and result == 10 + + @pytest.mark.parametrize("indexer", [[0], slice(None, 1, None), np.array([0])]) + @pytest.mark.parametrize("value", [["Z"], np.array(["Z"])]) + def test_iloc_setitem_with_scalar_index_upcast( + self, indexer, value, using_array_manager + ): + # GH #19474 + # assigning like "df.iloc[0, [0]] = ['Z']" should be evaluated + # elementwisely, not using "setter('A', ['Z'])". + + df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + if using_array_manager: + with pytest.raises(ValueError, match="invalid literal"): + df.iloc[0, indexer] = value + else: + df.iloc[0, indexer] = value + result = df.iloc[0, 0] + + assert is_scalar(result) and result == "Z" + + @pytest.mark.parametrize("indexer", [[0], slice(None, 1, None), np.array([0])]) + @pytest.mark.parametrize("value", [["Z"], np.array(["Z"])]) + def test_iloc_setitem_with_scalar_index_object(self, indexer, value): + # GH #19474 + # assigning like "df.iloc[0, [0]] = ['Z']" should be evaluated + # elementwisely, not using "setter('A', ['Z'])". + + df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"], dtype=object) + df.iloc[0, indexer] = value + result = df.iloc[0, 0] + assert is_scalar(result) and result == "Z" def test_iloc_mask(self): @@ -948,6 +994,9 @@ def test_iloc_getitem_readonly_key(self): expected = df["data"].loc[[1, 3, 6]] tm.assert_series_equal(result, expected) + # TODO(ArrayManager) setting single item with an iterable doesn't work yet + # in the "split" path + @td.skip_array_manager_not_yet_implemented def test_iloc_assign_series_to_df_cell(self): # GH 37593 df = DataFrame(columns=["a"], index=[0]) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index dcd073681cecf..aa58d7f783232 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -7,6 +7,8 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype import pandas as pd @@ -135,18 +137,25 @@ def test_inf_upcast(self): expected = pd.Float64Index([0, 1, np.inf]) tm.assert_index_equal(result, expected) - def test_setitem_dtype_upcast(self): + def test_setitem_dtype_upcast(self, using_array_manager): # GH3216 df = DataFrame([{"a": 1}, {"a": 3, "b": 2}]) df["c"] = np.nan assert df["c"].dtype == np.float64 - df.loc[0, "c"] = "foo" - expected = DataFrame( - [{"a": 1, "b": np.nan, "c": "foo"}, {"a": 3, "b": 2, "c": np.nan}] - ) - tm.assert_frame_equal(df, expected) + if using_array_manager: + with pytest.raises(ValueError, match="could not convert string to float"): + df.loc[0, "c"] = "foo" + # TODO(ArrayManager) also update the other cases below (depends on the + # exact setitem behaviour we want for integer/floats) + return + else: + df.loc[0, "c"] = "foo" + expected = DataFrame( + [{"a": 1, "b": np.nan, "c": "foo"}, {"a": 3, "b": 2, "c": np.nan}] + ) + tm.assert_frame_equal(df, expected) # GH10280 df = DataFrame( @@ -394,6 +403,8 @@ def test_set_index_nan(self): ) tm.assert_frame_equal(result, df) + # TODO(ArrayManager) update for setitem upcasting changes (pending discussions) + @td.skip_array_manager_not_yet_implemented def test_multi_assign(self): # GH 3626, an assignment of a sub-df to a df @@ -465,6 +476,9 @@ def test_multi_assign(self): df.loc[df["A"] == 0, ["A", "B"]] = df["D"] tm.assert_frame_equal(df, expected) + # TODO(ArrayManager) setting single item with an iterable doesn't work yet + # in the "split" path + @td.skip_array_manager_not_yet_implemented def test_setitem_list(self): # GH 6043 @@ -531,6 +545,8 @@ def test_string_slice(self): with pytest.raises(KeyError, match="'2011'"): df.loc["2011", 0] + # TODO(ArrayManager) update for setitem upcasting changes (pending discussions) + @td.skip_array_manager_not_yet_implemented def test_astype_assignment(self): # GH4312 (iloc) @@ -852,36 +868,52 @@ class TestDataframeNoneCoercion: ] @pytest.mark.parametrize("expected", EXPECTED_SINGLE_ROW_RESULTS) - def test_coercion_with_loc(self, expected): + def test_coercion_with_loc(self, expected, using_array_manager): start_data, expected_result = expected start_dataframe = DataFrame({"foo": start_data}) + if using_array_manager and start_dataframe["foo"].dtype == "int64": + with pytest.raises(TypeError, match=""): + start_dataframe.loc[0, ["foo"]] = None + return start_dataframe.loc[0, ["foo"]] = None expected_dataframe = DataFrame({"foo": expected_result}) tm.assert_frame_equal(start_dataframe, expected_dataframe) @pytest.mark.parametrize("expected", EXPECTED_SINGLE_ROW_RESULTS) - def test_coercion_with_setitem_and_dataframe(self, expected): + def test_coercion_with_setitem_and_dataframe(self, expected, using_array_manager): start_data, expected_result = expected start_dataframe = DataFrame({"foo": start_data}) + if using_array_manager and start_dataframe["foo"].dtype == "int64": + with pytest.raises(TypeError, match=""): + start_dataframe[ + start_dataframe["foo"] == start_dataframe["foo"][0] + ] = None + return start_dataframe[start_dataframe["foo"] == start_dataframe["foo"][0]] = None expected_dataframe = DataFrame({"foo": expected_result}) tm.assert_frame_equal(start_dataframe, expected_dataframe) @pytest.mark.parametrize("expected", EXPECTED_SINGLE_ROW_RESULTS) - def test_none_coercion_loc_and_dataframe(self, expected): + def test_none_coercion_loc_and_dataframe(self, expected, using_array_manager): start_data, expected_result = expected start_dataframe = DataFrame({"foo": start_data}) + if using_array_manager and start_dataframe["foo"].dtype == "int64": + with pytest.raises(TypeError, match=""): + start_dataframe.loc[ + start_dataframe["foo"] == start_dataframe["foo"][0] + ] = None + return start_dataframe.loc[start_dataframe["foo"] == start_dataframe["foo"][0]] = None expected_dataframe = DataFrame({"foo": expected_result}) tm.assert_frame_equal(start_dataframe, expected_dataframe) - def test_none_coercion_mixed_dtypes(self): + def test_none_coercion_mixed_dtypes(self, using_array_manager): start_dataframe = DataFrame( { "a": [1, 2, 3], @@ -890,6 +922,10 @@ def test_none_coercion_mixed_dtypes(self): "d": ["a", "b", "c"], } ) + if using_array_manager: + with pytest.raises(TypeError, match=""): + start_dataframe.iloc[0] = None + return start_dataframe.iloc[0] = None exp = DataFrame( diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 1cd352e4e0899..1eaf1e1dfecd1 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -404,6 +404,8 @@ def frame_for_consistency(self): } ) + # TODO(ArrayManager) rewrite test to assert that it raises + @td.skip_array_manager_invalid_test def test_loc_setitem_consistency(self, frame_for_consistency): # GH 6149 # coerce similarly for setitem and loc when rows have a null-slice @@ -425,6 +427,8 @@ def test_loc_setitem_consistency(self, frame_for_consistency): df.loc[:, "date"] = np.array([0, 0, 0, 0, 0], dtype=np.int64) tm.assert_frame_equal(df, expected) + # TODO(ArrayManager) rewrite test to assert that it raises + @td.skip_array_manager_invalid_test def test_loc_setitem_consistency_dt64_to_str(self, frame_for_consistency): # GH 6149 # coerce similarly for setitem and loc when rows have a null-slice @@ -439,6 +443,8 @@ def test_loc_setitem_consistency_dt64_to_str(self, frame_for_consistency): df.loc[:, "date"] = "foo" tm.assert_frame_equal(df, expected) + # TODO(ArrayManager) rewrite test to assert that it raises + @td.skip_array_manager_invalid_test def test_loc_setitem_consistency_dt64_to_float(self, frame_for_consistency): # GH 6149 # coerce similarly for setitem and loc when rows have a null-slice @@ -452,6 +458,8 @@ def test_loc_setitem_consistency_dt64_to_float(self, frame_for_consistency): df.loc[:, "date"] = 1.0 tm.assert_frame_equal(df, expected) + # TODO(ArrayManager) rewrite test to assert that it raises + @td.skip_array_manager_invalid_test def test_loc_setitem_consistency_single_row(self): # GH 15494 # setting on frame with single row @@ -460,6 +468,8 @@ def test_loc_setitem_consistency_single_row(self): expected = DataFrame({"date": Series(["string"])}) tm.assert_frame_equal(df, expected) + # TODO(ArrayManager) rewrite test to assert that it raises + @td.skip_array_manager_invalid_test def test_loc_setitem_consistency_empty(self): # empty (essentially noops) expected = DataFrame(columns=["x", "y"]) @@ -472,6 +482,8 @@ def test_loc_setitem_consistency_empty(self): df["x"] = 1 tm.assert_frame_equal(df, expected) + # TODO(ArrayManager) rewrite test to assert that it raises + @td.skip_array_manager_invalid_test def test_loc_setitem_consistency_slice_column_len(self): # .loc[:,column] setting with slice == len of the column # GH10408 @@ -547,7 +559,7 @@ def test_loc_modify_datetime(self): tm.assert_frame_equal(df, expected) - def test_loc_setitem_frame(self): + def test_loc_setitem_frame(self, using_array_manager): df = DataFrame(np.random.randn(4, 4), index=list("abcd"), columns=list("ABCD")) result = df.iloc[0, 0] @@ -571,6 +583,8 @@ def test_loc_setitem_frame(self): expected = DataFrame({"A": Series([1, 2, 3], index=[4, 3, 5])}).reindex( index=[3, 5, 4] ) + if using_array_manager: + expected = expected.astype(object) tm.assert_frame_equal(df, expected) # GH 6252 @@ -592,6 +606,8 @@ def test_loc_setitem_frame(self): expected = DataFrame( {"A": Series(val1, index=keys1), "B": Series(val2, index=keys2)} ).reindex(index=index) + if using_array_manager: + expected = expected.astype(float) tm.assert_frame_equal(df, expected) # GH 8669 @@ -653,7 +669,7 @@ def test_loc_setitem_frame_multiples(self): @pytest.mark.parametrize( "indexer", [["A"], slice(None, "A", None), np.array(["A"])] ) - @pytest.mark.parametrize("value", [["Z"], np.array(["Z"])]) + @pytest.mark.parametrize("value", [[10], np.array([10])]) def test_loc_setitem_with_scalar_index(self, indexer, value): # GH #19474 # assigning like "df.loc[0, ['A']] = ['Z']" should be evaluated @@ -663,6 +679,44 @@ def test_loc_setitem_with_scalar_index(self, indexer, value): df.loc[0, indexer] = value result = df.loc[0, "A"] + assert is_scalar(result) and result == 10 + + @pytest.mark.parametrize( + "indexer", [["A"], slice(None, "A", None), np.array(["A"])] + ) + @pytest.mark.parametrize("value", [["Z"], np.array(["Z"])]) + def test_loc_setitem_with_scalar_index_upcast( + self, indexer, value, using_array_manager + ): + # GH #19474 + # assigning like "df.loc[0, ['A']] = ['Z']" should be evaluated + # elementwisely, not using "setter('A', ['Z'])". + + df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]) + if using_array_manager: + with pytest.raises(ValueError, match="invalid literal"): + df.loc[0, indexer] = value + else: + df.loc[0, indexer] = value + result = df.loc[0, "A"] + + assert is_scalar(result) and result == "Z" + + @pytest.mark.parametrize( + "indexer", [["A"], slice(None, "A", None), np.array(["A"])] + ) + @pytest.mark.parametrize("value", [["Z"], np.array(["Z"])]) + def test_loc_setitem_with_scalar_index_object( + self, indexer, value, using_array_manager + ): + # GH #19474 + # assigning like "df.loc[0, ['A']] = ['Z']" should be evaluated + # elementwisely, not using "setter('A', ['Z'])". + + df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"], dtype=object) + df.loc[0, indexer] = value + result = df.loc[0, "A"] + assert is_scalar(result) and result == "Z" @pytest.mark.parametrize( @@ -977,6 +1031,9 @@ def test_loc_setitem_empty_append_single_value(self): df.loc[0, "x"] = expected.loc[0, "x"] tm.assert_frame_equal(df, expected) + # TODO(ArrayManager) setting on zero dimension does not give correct + # error message for the "split" path (also for BlockManager actually) + @td.skip_array_manager_not_yet_implemented def test_loc_setitem_empty_append_raises(self): # GH6173, various appends to an empty dataframe @@ -1022,7 +1079,7 @@ def test_loc_reverse_assignment(self): tm.assert_series_equal(result, expected) - def test_loc_setitem_str_to_small_float_conversion_type(self): + def test_loc_setitem_str_to_small_float_conversion_type(self, using_array_manager): # GH#20388 np.random.seed(13) col_data = [str(np.random.random() * 1e-12) for _ in range(5)] @@ -1033,6 +1090,8 @@ def test_loc_setitem_str_to_small_float_conversion_type(self): # change the dtype of the elements from object to float one by one result.loc[result.index, "A"] = [float(x) for x in col_data] expected = DataFrame(col_data, columns=["A"], dtype=float) + if using_array_manager: + expected = expected.astype(object) tm.assert_frame_equal(result, expected) def test_loc_getitem_time_object(self, frame_or_series): diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index ad2d7250d9d6c..541a0756d6047 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -7,13 +7,15 @@ import numpy as np import pytest +import pandas.util._test_decorators as td + import pandas as pd from pandas import DataFrame, Index, Period, Series, Timestamp, date_range, period_range import pandas._testing as tm class TestPartialSetting: - def test_partial_setting(self): + def test_partial_setting(self, using_array_manager): # GH2578, allow ix and friends to partially set @@ -96,6 +98,8 @@ def test_partial_setting(self): df = df_orig.copy() df["B"] = df["B"].astype(np.float64) df.loc[:, "B"] = df.loc[:, "A"] + if using_array_manager: + expected["B"] = expected["B"].astype("float64") tm.assert_frame_equal(df, expected) # single dtype frame, partial setting @@ -112,6 +116,9 @@ def test_partial_setting(self): df.loc[:, "C"] = df.loc[:, "A"] tm.assert_frame_equal(df, expected) + # TODO(ArrayManager) concat with reindexing + @td.skip_array_manager_not_yet_implemented + def test_partial_setting2(self): # GH 8473 dates = date_range("1/1/2000", periods=8) df_orig = DataFrame( @@ -138,6 +145,10 @@ def test_partial_setting(self): df.at[dates[-1] + dates.freq, 0] = 7 tm.assert_frame_equal(df, expected) + # TODO(ArrayManager) + # df.loc[0] = Series(1, index=range(4)) case creats float columns + # instead of object dtype + @td.skip_array_manager_not_yet_implemented def test_partial_setting_mixed_dtype(self): # in a mixed dtype environment, try to preserve dtypes diff --git a/pandas/tests/indexing/test_scalar.py b/pandas/tests/indexing/test_scalar.py index ce48fd1e5c905..1f992256a5a8c 100644 --- a/pandas/tests/indexing/test_scalar.py +++ b/pandas/tests/indexing/test_scalar.py @@ -198,12 +198,16 @@ def test_mixed_index_at_iat_loc_iloc_dataframe(self): with pytest.raises(KeyError, match="^3$"): df.loc[0, 3] - def test_iat_setter_incompatible_assignment(self): + def test_iat_setter_incompatible_assignment(self, using_array_manager): # GH 23236 result = DataFrame({"a": [0, 1], "b": [4, 5]}) - result.iat[0, 0] = None - expected = DataFrame({"a": [None, 1], "b": [4, 5]}) - tm.assert_frame_equal(result, expected) + if using_array_manager: + with pytest.raises(TypeError, match=""): + result.iat[0, 0] = None + else: + result.iat[0, 0] = None + expected = DataFrame({"a": [None, 1], "b": [4, 5]}) + tm.assert_frame_equal(result, expected) def test_getitem_zerodim_np_array(self): # GH24924