diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8c85c4e961d99..8db3ea6b1895e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5313,7 +5313,9 @@ def _replace_columnwise( target, value = mapping[ax[i]] newobj = ser.replace(target, value, regex=regex) - res.iloc[:, i] = newobj + # If we had unique columns, we could just do + # res[res.columns[i]] = newobj + res._iset_item_mgr(i, newobj._values) if inplace: return diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index fc2204724aceb..6c8ea2b9445ac 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -11,6 +11,7 @@ from pandas._libs.indexing import NDFrameIndexerBase from pandas._libs.lib import item_from_zerodim +from pandas._typing import Shape from pandas.errors import ( AbstractMethodError, InvalidIndexError, @@ -634,12 +635,12 @@ def __call__(self, axis=None): new_self.axis = axis return new_self - def _get_setitem_indexer(self, key): + def _get_setitem_indexer(self, key, value): """ Convert a potentially-label-based key into a positional indexer. """ if self.name == "loc": - self._ensure_listlike_indexer(key) + self._ensure_listlike_indexer(key, value=value) if self.axis is not None: return self._convert_tuple(key) @@ -677,9 +678,11 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None): if self.ndim != 2: return + pi = None if isinstance(key, tuple) and len(key) > 1: # key may be a tuple if we are .loc # if length of key is > 1 set key to column part + pi = key[0] key = key[column_axis] axis = column_axis @@ -693,9 +696,18 @@ def _ensure_listlike_indexer(self, key, axis=None, value=None): # GH#38148 keys = self.obj.columns.union(key, sort=False) - self.obj._mgr = self.obj._mgr.reindex_axis( - keys, axis=0, consolidate=False, only_slice=True - ) + if isinstance(value, ABCDataFrame) and com.is_null_slice(pi): + # We are setting obj.loc[:, new_keys] = newframe + # Setting these directly instead of reindexing keeps + # us from converting integer dtypes to floats + new_keys = keys.difference(self.obj.columns) + self.obj[new_keys] = value[new_keys] + + else: + + self.obj._mgr = self.obj._mgr.reindex_axis( + keys, axis=0, consolidate=False, only_slice=True + ) def __setitem__(self, key, value): if isinstance(key, tuple): @@ -703,7 +715,7 @@ def __setitem__(self, key, value): key = tuple(com.apply_if_callable(x, self.obj) for x in key) else: key = com.apply_if_callable(key, self.obj) - indexer = self._get_setitem_indexer(key) + indexer = self._get_setitem_indexer(key, value) self._has_valid_setitem_indexer(key) iloc = self if self.name == "iloc" else self.obj.iloc @@ -1271,9 +1283,10 @@ def _convert_to_indexer(self, key, axis: int): key = list(key) if com.is_bool_indexer(key): + # TODO: in this case should we do a .take on the value here? + # test_loc_setitem_all_false_boolean_two_blocks key = check_bool_indexer(labels, key) - (inds,) = key.nonzero() - return inds + return key else: return self._get_listlike_indexer(key, axis)[1] else: @@ -1525,7 +1538,7 @@ def _convert_to_indexer(self, key, axis: int): """ return key - def _get_setitem_indexer(self, key): + def _get_setitem_indexer(self, key, value): # GH#32257 Fall through to let numpy do validation if is_iterator(key): return list(key) @@ -1547,32 +1560,6 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"): """ info_axis = self.obj._info_axis_number - # maybe partial set - take_split_path = not self.obj._mgr.is_single_block - - # if there is only one block/type, still have to take split path - # unless the block is one-dimensional or it can hold the value - if ( - not take_split_path - and getattr(self.obj._mgr, "blocks", False) - and self.ndim > 1 - ): - # in case of dict, keys are indices - val = list(value.values()) if isinstance(value, dict) else value - blk = self.obj._mgr.blocks[0] - take_split_path = not blk._can_hold_element(val) - - # if we have any multi-indexes that have non-trivial slices - # (not null slices) then we must take the split path, xref - # GH 10360, GH 27841 - if isinstance(indexer, tuple) and len(indexer) == len(self.obj.axes): - for i, ax in zip(indexer, self.obj.axes): - if isinstance(ax, MultiIndex) and not ( - is_integer(i) or com.is_null_slice(i) - ): - take_split_path = True - break - if isinstance(indexer, tuple): nindexer = [] for i, idx in enumerate(indexer): @@ -1666,7 +1653,7 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"): return # align and set the values - if take_split_path: + if self.ndim > 1: # We have to operate column-wise self._setitem_with_indexer_split_path(indexer, value, name) else: @@ -1679,23 +1666,65 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str): # Above we only set take_split_path to True for 2D cases assert self.ndim == 2 + orig = indexer if not isinstance(indexer, tuple): indexer = _tuplify(self.ndim, indexer) if len(indexer) > self.ndim: raise IndexError("too many indices for array") if isinstance(indexer[0], np.ndarray) and indexer[0].ndim > 2: raise ValueError(r"Cannot set values with ndim > 2") - if (isinstance(value, ABCSeries) and name != "iloc") or isinstance(value, dict): from pandas import Series value = self._align_series(indexer, Series(value)) + info_idx = indexer[1] + pi = indexer[0] + if ( + isinstance(pi, ABCDataFrame) + and orig is pi + and hasattr(self.obj._mgr, "blocks") + and len(self.obj._mgr.blocks) == 1 + ): + # FIXME: kludge + return self._setitem_single_block(orig, value, name) + + from pandas.core.internals import ArrayManager + + if ( + com.is_null_slice(info_idx) + and is_scalar(value) + and not isinstance(pi, ABCDataFrame) + and not isinstance(self.obj._mgr, ArrayManager) + ): + # We can go directly through BlockManager.setitem without worrying + # about alignment. + # TODO: do we need to do some kind of copy_with_setting check? + self.obj._mgr = self.obj._mgr.setitem(indexer=indexer, value=value) + return + + if is_integer(info_idx) and not isinstance(self.obj._mgr, ArrayManager): + if is_integer(pi): + # We need to watch out for case where we are treating a listlike + # as a scalar, e.g. test_setitem_iloc_scalar_single for JSONArray + + mgr = self.obj._mgr + blkno = mgr.blknos[info_idx] + blkloc = mgr.blklocs[info_idx] + blk = mgr.blocks[blkno] + + if blk._can_hold_element(value): + # NB: we are assuming here that _can_hold_element is accurate + # TODO: do we need to do some kind of copy_with_setting check? + self.obj._check_is_chained_assignment_possible() + blk.setitem_inplace((pi, blkloc), value) + self.obj._maybe_update_cacher(clear=True) + return + # Ensure we have something we can iterate over info_axis = indexer[1] ilocs = self._ensure_iterable_column_indexer(info_axis) - pi = indexer[0] lplane_indexer = length_of_indexer(pi, self.obj.index) # lplane_indexer gives the expected length of obj[indexer[0]] @@ -1711,7 +1740,7 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str): elif len(ilocs) == 1 and lplane_indexer == len(value) and not is_scalar(pi): # We are setting multiple rows in a single column. - self._setitem_single_column(ilocs[0], value, pi) + self._setitem_iat_loc(ilocs[0], pi, value) elif len(ilocs) == 1 and 0 != lplane_indexer != len(value): # We are trying to set N values into M entries of a single @@ -1739,7 +1768,7 @@ def _setitem_with_indexer_split_path(self, indexer, value, name: str): elif len(ilocs) == len(value): # We are setting multiple columns in a single row. for loc, v in zip(ilocs, value): - self._setitem_single_column(loc, v, pi) + self._setitem_iat_loc(loc, pi, v) elif len(ilocs) == 1 and com.is_null_slice(pi) and len(self.obj) == 0: # This is a setitem-with-expansion, see @@ -1777,6 +1806,7 @@ def _setitem_with_indexer_2d_value(self, indexer, value): for i, loc in enumerate(ilocs): # setting with a list, re-coerces + # self._setitem_iat_loc(loc, pi, value[:, i].tolist()) self._setitem_single_column(loc, value[:, i].tolist(), pi) def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str): @@ -1793,7 +1823,7 @@ def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str if name == "iloc": for i, loc in enumerate(ilocs): val = value.iloc[:, i] - self._setitem_single_column(loc, val, pi) + self._setitem_iat_loc(loc, pi, val) elif not unique_cols and value.columns.equals(self.obj.columns): # We assume we are already aligned, see @@ -1810,12 +1840,21 @@ def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str else: val = np.nan - self._setitem_single_column(loc, val, pi) + self._setitem_iat_loc(loc, pi, val) elif not unique_cols: raise ValueError("Setting with non-unique columns is not allowed.") else: + # TODO: not totally clear why we are requiring this + # Need so that we raise in test_multiindex_setitem + self._align_frame(indexer[0], value) + + if com.is_bool_indexer(indexer[0]) and indexer[0].sum() == len(value): + # TODO: better place for this? + pi = indexer[0].nonzero()[0] + sub_indexer[0] = pi + for loc in ilocs: item = self.obj.columns[loc] if item in value: @@ -1826,7 +1865,7 @@ def _setitem_with_indexer_frame_value(self, indexer, value: DataFrame, name: str else: val = np.nan - self._setitem_single_column(loc, val, pi) + self._setitem_iat_loc(loc, pi, val) def _setitem_single_column(self, loc: int, value, plane_indexer): """ @@ -1839,6 +1878,7 @@ def _setitem_single_column(self, loc: int, value, plane_indexer): The indexer we use for setitem along axis=0. """ pi = plane_indexer + pi, value = mask_setitem_value(pi, value, (len(self.obj),)) ser = self.obj._ixs(loc, axis=1) @@ -1877,6 +1917,35 @@ def _setitem_single_column(self, loc: int, value, plane_indexer): # reset the sliced object if unique self.obj._iset_item(loc, ser) + def _setitem_iat_loc(self, loc: int, pi, value): + # TODO: likely a BM method? + from pandas.core.internals import ArrayManager + + if isinstance(self.obj._mgr, ArrayManager): + # TODO: implement this correctly for ArrayManager + return self._setitem_single_column(loc, value, pi) + + mgr = self.obj._mgr + blkno = mgr.blknos[loc] + blkloc = mgr.blklocs[loc] + blk = mgr.blocks[blkno] + assert blk.mgr_locs[blkloc] == loc + + if blk._can_hold_element(value): + # NB: we are assuming here that _can_hold_element is accurate + # TODO: do we need to do some kind of copy_with_setting check? + try: + self.obj._check_is_chained_assignment_possible() + blk.setitem_inplace((pi, blkloc), value) + self.obj._maybe_update_cacher(clear=True) + except ValueError: + if blk.is_extension: + # FIXME: kludge bc _can_hold_element is wrong for EABLock + return self._setitem_single_column(loc, value, pi) + raise + else: + self._setitem_single_column(loc, value, pi) + def _setitem_single_block(self, indexer, value, name: str): """ _setitem_with_indexer for the case when we have a single Block. @@ -2442,3 +2511,34 @@ def need_slice(obj: slice) -> bool: or obj.stop is not None or (obj.step is not None and obj.step != 1) ) + + +def mask_setitem_value(indexer, value, shape: Shape): + """ + Convert a boolean indexer to a positional indexer, masking `value` if necessary. + """ + if com.is_bool_indexer(indexer): + indexer = np.asarray(indexer).nonzero()[0] + if is_list_like(value) and len(value) == shape[0]: + if not is_array_like(value): + value = [value[n] for n in indexer] + else: + value = value[indexer] + + elif isinstance(indexer, tuple): + indexer = list(indexer) + for i, key in enumerate(indexer): + if com.is_bool_indexer(key): + new_key = np.asarray(key).nonzero()[0] + indexer[i] = new_key + + if is_list_like(value) and len(value) == shape[i]: + # FIXME: assuming value.ndim == 1 here? + # FIXME: assuming non-i tuple member is scalar? + if not is_array_like(value): + value = [value[n] for n in new_key] + else: + value = value[new_key] + + indexer = tuple(indexer) + return indexer, value diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 3654f77825ab4..970b355f1424f 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -38,6 +38,7 @@ from pandas.core.dtypes.cast import ( astype_array_safe, can_hold_element, + convert_scalar_for_putitemlike, find_common_type, infer_dtype_from, maybe_downcast_numeric, @@ -111,6 +112,7 @@ is_empty_indexer, is_scalar_indexer, ) +from pandas.core.indexing import mask_setitem_value import pandas.core.missing as missing if TYPE_CHECKING: @@ -923,6 +925,8 @@ def setitem(self, indexer, value): if transpose: values = values.T + indexer, value = mask_setitem_value(indexer, value, values.shape) + # length checking check_setitem_lengths(indexer, value, values) @@ -941,6 +945,34 @@ def setitem(self, indexer, value): return self + @final + def setitem_inplace(self, indexer, value) -> None: + """ + setitem but only inplace. + + Notes + ----- + Assumes self is 2D and that indexer is a 2-tuple. + """ + if lib.is_scalar(value) and isinstance(self.dtype, np.dtype): + # Convert timedelta/datetime to timedelta64/datetime64 + value = convert_scalar_for_putitemlike(value, self.dtype) + + pi = indexer[0] + values = self.values + + indexer, value = mask_setitem_value(indexer, value, values.shape) + + if not isinstance(self, ExtensionBlock): + # includes DatetimeArray, TimedeltaArray + blkloc = indexer[1] + # error: Invalid index type "Tuple[Any, Any]" for "ExtensionArray"; + # expected type "Union[int, slice, ndarray]" + values[blkloc, pi] = value # type: ignore[index] + else: + # TODO(EA2D): special case not needed with 2D EAs + values[pi] = value + def putmask(self, mask, new) -> list[Block]: """ putmask the data to the block; it is possible that we may create a @@ -1854,6 +1886,7 @@ def convert( copy=copy, ) res_values = ensure_block_shape(res_values, self.ndim) + res_values = ensure_wrapped_if_datetimelike(res_values) return [self.make_block(res_values)] diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index d69709bf9d06c..6373aae6ac6fa 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -880,6 +880,7 @@ def take(self: T, indexer, axis: int = 1, verify: bool = True) -> T: BlockManager """ # We have 6 tests that get here with a slice + # TODO: should these be np.intp? indexer = ( np.arange(indexer.start, indexer.stop, indexer.step, dtype=np.intp) if isinstance(indexer, slice) diff --git a/pandas/tests/indexing/multiindex/test_setitem.py b/pandas/tests/indexing/multiindex/test_setitem.py index 2a12d690ff0bd..da2c1a9586de4 100644 --- a/pandas/tests/indexing/multiindex/test_setitem.py +++ b/pandas/tests/indexing/multiindex/test_setitem.py @@ -121,9 +121,6 @@ def test_setitem_multiindex3(self): expected=copy, ) - # TODO(ArrayManager) df.loc["bar"] *= 2 doesn't raise an error but results in - # all NaNs -> doesn't work in the "split" path (also for BlockManager actually) - @td.skip_array_manager_not_yet_implemented def test_multiindex_setitem(self): # GH 3738 diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index e088f1ce87a6a..5bedd87d1249c 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -79,18 +79,15 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manage orig_vals = df.values indexer(df)[key, 0] = cat - overwrite = isinstance(key, slice) and key == slice(None) - - if overwrite or using_array_manager: + if not using_array_manager: + expected = DataFrame({0: cat.astype(object)}) + assert np.shares_memory(df.values, orig_vals) + else: # TODO(ArrayManager) we always overwrite because ArrayManager takes # the "split" path, which still overwrites # TODO: GH#39986 this probably shouldn't behave differently expected = DataFrame({0: cat}) assert not np.shares_memory(df.values, orig_vals) - else: - expected = DataFrame({0: cat}).astype(object) - if not using_array_manager: - assert np.shares_memory(df[0].values, orig_vals) tm.assert_frame_equal(df, expected) @@ -103,7 +100,10 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manage df = frame.copy() orig_vals = df.values indexer(df)[key, 0] = cat - expected = DataFrame({0: cat, 1: range(3)}) + if using_array_manager: + expected = DataFrame({0: cat, 1: range(3)}) + else: + expected = DataFrame({0: cat.astype(object), 1: range(3)}) tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("box", [array, Series]) @@ -868,11 +868,17 @@ def test_series_indexing_zerodim_np_array(self): result = s.iloc[np.array(0)] assert result == 1 - @pytest.mark.xfail(reason="https://github.com/pandas-dev/pandas/issues/33457") - def test_iloc_setitem_categorical_updates_inplace(self): + def test_iloc_setitem_categorical_updates_inplace( + self, using_array_manager, request + ): # Mixed dtype ensures we go through take_split_path in setitem_with_indexer + if using_array_manager: + mark = pytest.mark.xfail( + reason="https://github.com/pandas-dev/pandas/issues/33457" + ) + request.node.add_marker(mark) cat = Categorical(["A", "B", "C"]) - df = DataFrame({1: cat, 2: [1, 2, 3]}) + df = DataFrame({1: cat, 2: [1, 2, 3]}, copy=False) # This should modify our original values in-place df.iloc[:, 0] = cat[::-1] @@ -1239,13 +1245,17 @@ def test_iloc_setitem_series_duplicate_columns(self): [("int64", "0", 0), ("float", "1.2", 1.2)], ) def test_iloc_setitem_dtypes_duplicate_columns( - self, dtypes, init_value, expected_value + self, dtypes, init_value, expected_value, using_array_manager, request ): # GH#22035 + if using_array_manager: + mark = pytest.mark.xfail(reason="incorrectly retains int64/float dtype") + request.node.add_marker(mark) + df = DataFrame([[init_value, "str", "str2"]], columns=["a", "b", "b"]) df.iloc[:, 0] = df.iloc[:, 0].astype(dtypes) expected_df = DataFrame( - [[expected_value, "str", "str2"]], columns=["a", "b", "b"] + [[expected_value, "str", "str2"]], columns=["a", "b", "b"], dtype=object ) tm.assert_frame_equal(df, expected_df) diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index 0f9612fa5c96c..90395e2312468 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -537,7 +537,7 @@ def test_string_slice_empty(self): with pytest.raises(KeyError, match="^0$"): df.loc["2011", 0] - def test_astype_assignment(self): + def test_astype_assignment(self, using_array_manager): # GH4312 (iloc) df_orig = DataFrame( @@ -549,6 +549,11 @@ def test_astype_assignment(self): expected = DataFrame( [[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") ) + if not using_array_manager: + # TODO(ArrayManager): get behaviors to match + # original (object) array can hold new values, so setting is inplace + expected["A"] = expected["A"].astype(object) + expected["B"] = expected["B"].astype(object) tm.assert_frame_equal(df, expected) df = df_orig.copy() @@ -556,6 +561,11 @@ def test_astype_assignment(self): expected = DataFrame( [[1, 2, "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") ) + if not using_array_manager: + # TODO(ArrayManager): get behaviors to match + # original (object) array can hold new values, so setting is inplace + expected["A"] = expected["A"].astype(object) + expected["B"] = expected["B"].astype(object) tm.assert_frame_equal(df, expected) # GH5702 (loc) @@ -564,6 +574,10 @@ def test_astype_assignment(self): expected = DataFrame( [[1, "2", "3", ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") ) + if not using_array_manager: + # TODO(ArrayManager): get behaviors to match + # df["A"] can hold the RHS, so the assignment is inplace, remains object + expected["A"] = expected["A"].astype(object) tm.assert_frame_equal(df, expected) df = df_orig.copy() @@ -571,18 +585,34 @@ def test_astype_assignment(self): expected = DataFrame( [["1", 2, 3, ".4", 5, 6.0, "foo"]], columns=list("ABCDEFG") ) + if not using_array_manager: + # TODO(ArrayManager): get behaviors to match + # original (object) array can hold new values, so setting is inplace + expected["B"] = expected["B"].astype(object) + expected["C"] = expected["C"].astype(object) tm.assert_frame_equal(df, expected) - def test_astype_assignment_full_replacements(self): + def test_astype_assignment_full_replacements(self, using_array_manager): # full replacements / no nans - df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]}) - df.iloc[:, 0] = df["A"].astype(np.int64) - expected = DataFrame({"A": [1, 2, 3, 4]}) + # the new values can all be held by the existing array, so the assignment + # is in-place + orig = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]}) + value = orig.astype(np.int64) + # expected = DataFrame({"A": [1, 2, 3, 4]}) + + df = orig.copy() + df.iloc[ + :, 0 + ] = value # <- not yet, bc value is a DataFrame; would work with value["A"] + if using_array_manager: + # TODO(ArrayManager): get behaviors to match + expected = DataFrame({"A": [1, 2, 3, 4]}) + else: + expected = orig tm.assert_frame_equal(df, expected) - df = DataFrame({"A": [1.0, 2.0, 3.0, 4.0]}) - df.loc[:, "A"] = df["A"].astype(np.int64) - expected = DataFrame({"A": [1, 2, 3, 4]}) + df = orig.copy() + df.loc[:, "A"] = value tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("indexer", [tm.getitem, tm.loc]) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 2a9ee81b7a23a..e97c5efd2b5b5 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -332,20 +332,32 @@ def test_loc_setitem_slice(self): expected = DataFrame({"a": [0, 1, 1], "b": [100, 200, 300]}, dtype="uint64") tm.assert_frame_equal(df2, expected) - def test_loc_setitem_dtype(self): + def test_loc_setitem_dtype(self, using_array_manager): # GH31340 df = DataFrame({"id": ["A"], "a": [1.2], "b": [0.0], "c": [-2.5]}) + orig = df.copy() cols = ["a", "b", "c"] + df.loc[:, cols] = df.loc[:, cols].astype("float32") - expected = DataFrame( - { - "id": ["A"], - "a": np.array([1.2], dtype="float32"), - "b": np.array([0.0], dtype="float32"), - "c": np.array([-2.5], dtype="float32"), - } - ) # id is inferred as object + if using_array_manager: + # TODO(ArrayManager): get behaviors to match + expected = DataFrame( + {"id": ["A"], "a": [1.2], "b": [0.0], "c": [-2.5]}, dtype="float32" + ) # id is inferred as object + else: + # the float32 data (for columns "b" and "c") can be held in the existing + # float64 columns losslessly, so we keep the original underlying arrays + # and our dtypes are not changed. + expected = orig + # expected = DataFrame( + # { + # "id": ["A"], + # "a": np.array([1.2], dtype="float32"), + # "b": np.array([0.0], dtype="float32"), + # "c": np.array([-2.5], dtype="float32"), + # } + # ) # id is inferred as object tm.assert_frame_equal(df, expected) @@ -676,22 +688,31 @@ def test_loc_setitem_frame_with_reindex(self, using_array_manager): expected = DataFrame({"A": ser}) tm.assert_frame_equal(df, expected) - def test_loc_setitem_frame_with_reindex_mixed(self): + def test_loc_setitem_frame_with_reindex_mixed(self, using_array_manager, request): # GH#40480 + if using_array_manager: + mark = pytest.mark.xfail(reason="df.A stays int64") + request.node.add_marker(mark) + df = DataFrame(index=[3, 5, 4], columns=["A", "B"], dtype=float) df["B"] = "string" df.loc[[4, 3, 5], "A"] = np.array([1, 2, 3], dtype="int64") - ser = Series([2, 3, 1], index=[3, 5, 4], dtype="int64") + # setting int64 array into float64 column successfully casts so is inplace + ser = Series([2.0, 3.0, 1.0], index=[3, 5, 4], dtype="float64") expected = DataFrame({"A": ser}) expected["B"] = "string" tm.assert_frame_equal(df, expected) - def test_loc_setitem_frame_with_inverted_slice(self): + def test_loc_setitem_frame_with_inverted_slice(self, using_array_manager, request): # GH#40480 + if using_array_manager: + mark = pytest.mark.xfail(reason="df.A stays int64") + request.node.add_marker(mark) df = DataFrame(index=[1, 2, 3], columns=["A", "B"], dtype=float) df["B"] = "string" df.loc[slice(3, 0, -1), "A"] = np.array([1, 2, 3], dtype="int64") - expected = DataFrame({"A": [3, 2, 1], "B": "string"}, index=[1, 2, 3]) + # setting int64 array into float64 column successfully casts so is inplace + expected = DataFrame({"A": [3.0, 2.0, 1.0], "B": "string"}, index=[1, 2, 3]) tm.assert_frame_equal(df, expected) # TODO(ArrayManager) "split" path overwrites column and therefore don't take @@ -1363,7 +1384,7 @@ def test_loc_setitem_categorical_values_partial_column_slice(self): df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"]) tm.assert_frame_equal(df, exp) - def test_loc_setitem_single_row_categorical(self): + def test_loc_setitem_single_row_categorical(self, using_array_manager): # GH#25495 df = DataFrame({"Alpha": ["a"], "Numeric": [0]}) categories = Categorical(df["Alpha"], categories=["a", "b", "c"]) @@ -1371,6 +1392,9 @@ def test_loc_setitem_single_row_categorical(self): result = df["Alpha"] expected = Series(categories, index=df.index, name="Alpha") + if not using_array_manager: + # TODO(ArrayManager): get behavior to match + expected = expected.astype(object) tm.assert_series_equal(result, expected) def test_loc_setitem_datetime_coercion(self): diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index 95a9fd227c685..826392c75965c 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -228,7 +228,7 @@ def test_partial_set_empty_frame_empty_consistencies(self): class TestPartialSetting: - def test_partial_setting(self): + def test_partial_setting(self, using_array_manager): # GH2578, allow ix and friends to partially set @@ -266,7 +266,7 @@ def test_partial_setting(self): with pytest.raises(IndexError, match=msg): s.iat[3] = 5.0 - def test_partial_setting_frame(self): + def test_partial_setting_frame(self, using_array_manager): df_orig = DataFrame( np.arange(6).reshape(3, 2), columns=["A", "B"], dtype="int64" ) @@ -306,7 +306,12 @@ def test_partial_setting_frame(self): tm.assert_frame_equal(df, expected) # mixed dtype frame, overwrite - expected = DataFrame(dict({"A": [0, 2, 4], "B": Series([0, 2, 4])})) + if using_array_manager: + # TODO(ArrayManager): get behavior to match + expected = DataFrame(dict({"A": [0, 2, 4], "B": Series([0, 2, 4])})) + else: + # float64 can hold df.loc[:, "A"], so setting is inplace + expected = DataFrame(dict({"A": [0, 2, 4], "B": Series([0.0, 2.0, 4.0])})) df = df_orig.copy() df["B"] = df["B"].astype(np.float64) df.loc[:, "B"] = df.loc[:, "A"] diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 5477559262cb8..fa9b0bb572000 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -33,7 +33,7 @@ def setup_method(self, datapath): for k in range(df.shape[1]): col = df.iloc[:, k] if col.dtype == np.int64: - df.iloc[:, k] = df.iloc[:, k].astype(np.float64) + df[df.columns[k]] = df.iloc[:, k].astype(np.float64) self.data.append(df) @pytest.mark.slow