From 6450a2c8b51f51bfaedf0787f124802f15cbe47b Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 10 Nov 2020 23:33:39 +0100 Subject: [PATCH 01/29] BUG: Bug in loc did not change dtype when complete columne was assigned --- doc/source/whatsnew/v1.2.0.rst | 1 + pandas/core/indexing.py | 6 ++++++ pandas/tests/frame/indexing/test_setitem.py | 7 +++++++ pandas/tests/series/indexing/test_setitem.py | 7 +++++++ 4 files changed, 21 insertions(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index e488ca52be8a0..df229ff560a0e 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -469,6 +469,7 @@ Indexing - Bug in :meth:`Index.where` incorrectly casting numeric values to strings (:issue:`37591`) - Bug in :meth:`Series.loc` and :meth:`DataFrame.loc` raises when numeric label was given for object :class:`Index` although label was in :class:`Index` (:issue:`26491`) - Bug in :meth:`DataFrame.loc` returned requested key plus missing values when ``loc`` was applied to single level from :class:`MultiIndex` (:issue:`27104`) +- Bug in :meth:`DataFrame.loc` did not preserve dtype of new values, when complete columns was assigned (:issue:`20635`) Missing ^^^^^^^ diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index c5e331a104726..ea41db3818a70 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -21,6 +21,7 @@ is_object_dtype, is_scalar, is_sequence, + is_dtype_equal, ) from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.generic import ABCDataFrame, ABCMultiIndex, ABCSeries @@ -1550,6 +1551,11 @@ def _setitem_with_indexer(self, indexer, value): val = list(value.values()) if isinstance(value, dict) else value blk = self.obj._mgr.blocks[0] take_split_path = not blk._can_hold_element(val) + if isinstance(value, ABCDataFrame): + dtypes = [dtype for dtype in value.dtypes.unique()] + take_split_path = not ( + len(dtypes) == 1 and is_dtype_equal(dtypes[0], blk.dtype) + ) # if we have any multi-indexes that have non-trivial slices # (not null slices) then we must take the split path, xref diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index cb04a61b9e1cb..7144b075ecc9b 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -298,6 +298,13 @@ def test_iloc_setitem_bool_indexer(self, klass): expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]}) tm.assert_frame_equal(df, expected) + def test_setitem_complete_columns_different_dtypes(self): + # GH: 20635 + df = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": ["3", "4"]}) + df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype("int") + expected = DataFrame({"A": ["a", "b"], "B": [1, 2], "C": [3, 4]}) + tm.assert_frame_equal(df, expected) + class TestDataFrameSetItemSlicing: def test_setitem_slice_position(self): diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 7e25e5200d610..35fe03c313739 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -201,6 +201,13 @@ def test_setitem_nan_casts(self): ser[:5] = np.nan tm.assert_series_equal(ser, expected) + def test_setitem_assigning_different_dtype(self): + # GH: 20635 + ser = Series(["3", "4"], name="A") + ser.loc[:] = ser.loc[:].astype("int") + expected = Series([3, 4], name="A") + tm.assert_series_equal(ser, expected) + class TestSetitemWithExpansion: def test_setitem_empty_series(self): From 1599c5cca3d22c975976d5b455078ef711640f44 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 10 Nov 2020 23:35:52 +0100 Subject: [PATCH 02/29] Fix list comprehension issue --- pandas/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index ea41db3818a70..e59fedd1d468f 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1552,7 +1552,7 @@ def _setitem_with_indexer(self, indexer, value): blk = self.obj._mgr.blocks[0] take_split_path = not blk._can_hold_element(val) if isinstance(value, ABCDataFrame): - dtypes = [dtype for dtype in value.dtypes.unique()] + dtypes = list(value.dtypes.unique()) take_split_path = not ( len(dtypes) == 1 and is_dtype_equal(dtypes[0], blk.dtype) ) From 4d396127aba9758093b1a1e6015fcbfdcbe384aa Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 10 Nov 2020 23:36:41 +0100 Subject: [PATCH 03/29] Fix import order --- pandas/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index e59fedd1d468f..29fa3ee3ce21b 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -13,6 +13,7 @@ from pandas.core.dtypes.common import ( is_array_like, + is_dtype_equal, is_hashable, is_integer, is_iterator, @@ -21,7 +22,6 @@ is_object_dtype, is_scalar, is_sequence, - is_dtype_equal, ) from pandas.core.dtypes.concat import concat_compat from pandas.core.dtypes.generic import ABCDataFrame, ABCMultiIndex, ABCSeries From f9f37cb8b2151fdaf8c77d86dbe7bdcfdaadb5ea Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 11 Nov 2020 12:39:03 +0100 Subject: [PATCH 04/29] Add test --- pandas/tests/frame/indexing/test_setitem.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 7144b075ecc9b..853beea47fa28 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -16,6 +16,7 @@ date_range, notna, period_range, + to_datetime, ) import pandas._testing as tm from pandas.core.arrays import SparseArray @@ -305,6 +306,22 @@ def test_setitem_complete_columns_different_dtypes(self): expected = DataFrame({"A": ["a", "b"], "B": [1, 2], "C": [3, 4]}) tm.assert_frame_equal(df, expected) + def test_setitem_conversion_to_datetime(self): + # GH: 20511 + df = DataFrame( + [["2015-01-01", "2016-01-01"], ["2016-01-01", "2015-01-01"]] + ).add_prefix("date") + df.iloc[:, [0]] = df.iloc[:, [0]].apply( + lambda x: to_datetime(x, errors="coerce") + ) + expected = DataFrame( + { + "date0": [to_datetime("2015-01-01"), to_datetime("2016-01-01")], + "date1": ["2016-01-01", "2015-01-01"], + } + ) + tm.assert_frame_equal(df, expected) + class TestDataFrameSetItemSlicing: def test_setitem_slice_position(self): From 8d203f9f9fb412858f1b24a4d2b21635caa04a52 Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 11 Nov 2020 16:58:28 +0100 Subject: [PATCH 05/29] Change dtype for 32 bit --- pandas/tests/frame/indexing/test_setitem.py | 4 ++-- pandas/tests/series/indexing/test_setitem.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 853beea47fa28..93b00b8b798cb 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -302,8 +302,8 @@ def test_iloc_setitem_bool_indexer(self, klass): def test_setitem_complete_columns_different_dtypes(self): # GH: 20635 df = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": ["3", "4"]}) - df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype("int") - expected = DataFrame({"A": ["a", "b"], "B": [1, 2], "C": [3, 4]}) + df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype("int64") + expected = DataFrame({"A": ["a", "b"], "B": [1, 2], "C": [3, 4]}, dtype="int64") tm.assert_frame_equal(df, expected) def test_setitem_conversion_to_datetime(self): diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index a8af0633e8cab..ed9c426ec6de4 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -213,8 +213,8 @@ def test_setitem_nan_casts(self): def test_setitem_assigning_different_dtype(self): # GH: 20635 ser = Series(["3", "4"], name="A") - ser.loc[:] = ser.loc[:].astype("int") - expected = Series([3, 4], name="A") + ser.loc[:] = ser.loc[:].astype("int64") + expected = Series([3, 4], name="A", dtype="int64") tm.assert_series_equal(ser, expected) From e35e009852bbb0a7acd11205d06629470d236a8d Mon Sep 17 00:00:00 2001 From: phofl Date: Wed, 11 Nov 2020 17:11:16 +0100 Subject: [PATCH 06/29] Implement fix and add new test --- pandas/core/indexing.py | 4 +++- pandas/tests/frame/indexing/test_setitem.py | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 29fa3ee3ce21b..b80422a3c13f1 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1551,7 +1551,9 @@ def _setitem_with_indexer(self, indexer, value): val = list(value.values()) if isinstance(value, dict) else value blk = self.obj._mgr.blocks[0] take_split_path = not blk._can_hold_element(val) - if isinstance(value, ABCDataFrame): + if isinstance(value, ABCSeries): + take_split_path = not (is_dtype_equal(value.dtype, blk.dtype)) + elif isinstance(value, ABCDataFrame): dtypes = list(value.dtypes.unique()) take_split_path = not ( len(dtypes) == 1 and is_dtype_equal(dtypes[0], blk.dtype) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 93b00b8b798cb..ed421ef8f9c3f 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -306,6 +306,13 @@ def test_setitem_complete_columns_different_dtypes(self): expected = DataFrame({"A": ["a", "b"], "B": [1, 2], "C": [3, 4]}, dtype="int64") tm.assert_frame_equal(df, expected) + def test_setitem_single_column_as_series_different_dtype(self): + # GH: 20635 + df = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": ["3", "4"]}) + df.loc[:, "C"] = df.loc[:, "C"].astype("int64") + expected = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": [3, 4]}) + tm.assert_frame_equal(df, expected) + def test_setitem_conversion_to_datetime(self): # GH: 20511 df = DataFrame( From 71fbf9f46c84dc22b5a3195538c30a955f15ca8f Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 13 Nov 2020 12:06:46 +0100 Subject: [PATCH 07/29] Add new column --- pandas/tests/frame/indexing/test_setitem.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index ed421ef8f9c3f..7701e420213ee 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -299,11 +299,11 @@ def test_iloc_setitem_bool_indexer(self, klass): expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]}) tm.assert_frame_equal(df, expected) - def test_setitem_complete_columns_different_dtypes(self): + def test_setitem_complete_columns_different_dtypes(self, dtype): # GH: 20635 - df = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": ["3", "4"]}) + df = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": ["3", "4"], "D": [1, 2]}) df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype("int64") - expected = DataFrame({"A": ["a", "b"], "B": [1, 2], "C": [3, 4]}, dtype="int64") + expected = DataFrame({"A": ["a", "b"], "B": [1, 2], "C": [3, 4], "D": [1, 2]}, dtype="int64") tm.assert_frame_equal(df, expected) def test_setitem_single_column_as_series_different_dtype(self): From babcd38c7ad2d26fc832f840b51171525160e883 Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 13 Nov 2020 12:08:56 +0100 Subject: [PATCH 08/29] Run black --- pandas/tests/frame/indexing/test_setitem.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 7701e420213ee..0176cc8dec6b1 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -303,7 +303,9 @@ def test_setitem_complete_columns_different_dtypes(self, dtype): # GH: 20635 df = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": ["3", "4"], "D": [1, 2]}) df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype("int64") - expected = DataFrame({"A": ["a", "b"], "B": [1, 2], "C": [3, 4], "D": [1, 2]}, dtype="int64") + expected = DataFrame( + {"A": ["a", "b"], "B": [1, 2], "C": [3, 4], "D": [1, 2]}, dtype="int64" + ) tm.assert_frame_equal(df, expected) def test_setitem_single_column_as_series_different_dtype(self): From caa6046ce52cf944a5db510fef844f08ea618ece Mon Sep 17 00:00:00 2001 From: phofl Date: Fri, 13 Nov 2020 15:26:06 +0100 Subject: [PATCH 09/29] Parametrize tests --- pandas/tests/frame/indexing/test_setitem.py | 8 ++++---- pandas/tests/series/indexing/test_setitem.py | 7 ++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 0176cc8dec6b1..0fa3eda9cd156 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -299,13 +299,13 @@ def test_iloc_setitem_bool_indexer(self, klass): expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]}) tm.assert_frame_equal(df, expected) + @pytest.mark.parametrize("dtype", ["int64", "Int64"]) def test_setitem_complete_columns_different_dtypes(self, dtype): # GH: 20635 df = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": ["3", "4"], "D": [1, 2]}) - df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype("int64") - expected = DataFrame( - {"A": ["a", "b"], "B": [1, 2], "C": [3, 4], "D": [1, 2]}, dtype="int64" - ) + df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype("int64").astype(dtype) + expected = DataFrame({"A": ["a", "b"], "B": [1, 2], "C": [3, 4], "D": [1, 2]}) + expected[["B", "C"]] = expected[["B", "C"]].astype(dtype) tm.assert_frame_equal(df, expected) def test_setitem_single_column_as_series_different_dtype(self): diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index ed9c426ec6de4..e328e09db4a59 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -210,11 +210,12 @@ def test_setitem_nan_casts(self): ser[:5] = np.nan tm.assert_series_equal(ser, expected) - def test_setitem_assigning_different_dtype(self): + @pytest.mark.parametrize("dtype", ["int64", "Int64"]) + def test_setitem_assigning_different_dtype(self, dtype): # GH: 20635 ser = Series(["3", "4"], name="A") - ser.loc[:] = ser.loc[:].astype("int64") - expected = Series([3, 4], name="A", dtype="int64") + ser.loc[:] = ser.loc[:].astype("int64").astype(dtype) + expected = Series([3, 4], name="A", dtype=dtype) tm.assert_series_equal(ser, expected) From 3b98ee0cee150f807e1deb0b929a222b579c16db Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 14 Nov 2020 13:49:11 +0100 Subject: [PATCH 10/29] Adress review comments --- pandas/core/indexing.py | 15 ++++++++------- pandas/tests/frame/indexing/test_setitem.py | 8 +++++--- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index b80422a3c13f1..6fd3af1770789 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1551,13 +1551,14 @@ def _setitem_with_indexer(self, indexer, value): val = list(value.values()) if isinstance(value, dict) else value blk = self.obj._mgr.blocks[0] take_split_path = not blk._can_hold_element(val) - if isinstance(value, ABCSeries): - take_split_path = not (is_dtype_equal(value.dtype, blk.dtype)) - elif isinstance(value, ABCDataFrame): - dtypes = list(value.dtypes.unique()) - take_split_path = not ( - len(dtypes) == 1 and is_dtype_equal(dtypes[0], blk.dtype) - ) + if not take_split_path: + if isinstance(value, ABCSeries): + take_split_path = not (is_dtype_equal(value.dtype, blk.dtype)) + elif isinstance(value, ABCDataFrame): + dtypes = list(value.dtypes.unique()) + take_split_path = not ( + len(dtypes) == 1 and is_dtype_equal(dtypes[0], blk.dtype) + ) # if we have any multi-indexes that have non-trivial slices # (not null slices) then we must take the split path, xref diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 0fa3eda9cd156..9822f4d960b84 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -303,7 +303,8 @@ def test_iloc_setitem_bool_indexer(self, klass): def test_setitem_complete_columns_different_dtypes(self, dtype): # GH: 20635 df = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": ["3", "4"], "D": [1, 2]}) - df.loc[:, ["B", "C"]] = df.loc[:, ["B", "C"]].astype("int64").astype(dtype) + rhs = df.loc[:, ["B", "C"]].astype("int64").astype(dtype) + df.loc[:, ["B", "C"]] = rhs expected = DataFrame({"A": ["a", "b"], "B": [1, 2], "C": [3, 4], "D": [1, 2]}) expected[["B", "C"]] = expected[["B", "C"]].astype(dtype) tm.assert_frame_equal(df, expected) @@ -318,8 +319,9 @@ def test_setitem_single_column_as_series_different_dtype(self): def test_setitem_conversion_to_datetime(self): # GH: 20511 df = DataFrame( - [["2015-01-01", "2016-01-01"], ["2016-01-01", "2015-01-01"]] - ).add_prefix("date") + [["2015-01-01", "2016-01-01"], ["2016-01-01", "2015-01-01"]], + columns=["date0", "date1"], + ) df.iloc[:, [0]] = df.iloc[:, [0]].apply( lambda x: to_datetime(x, errors="coerce") ) From f9b8a598817e55134e79409164a8ada986824526 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 14 Nov 2020 20:35:11 +0100 Subject: [PATCH 11/29] Change whatsnew wording --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 09e77160aae2c..ddc0273046a03 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -477,7 +477,7 @@ Indexing - Bug in :meth:`DataFrame.loc` returned requested key plus missing values when ``loc`` was applied to single level from :class:`MultiIndex` (:issue:`27104`) - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using a listlike indexer containing NA values (:issue:`37722`) - Bug in :meth:`DataFrame.xs` ignored ``droplevel=False`` for columns (:issue:`19056`) -- Bug in :meth:`DataFrame.loc` did not preserve dtype of new values, when complete columns was assigned (:issue:`20635`) +- Bug in :meth:`DataFrame.loc` not preserving dtype of new values, when complete columns was assigned (:issue:`20635`) Missing ^^^^^^^ From 4bef38e98b73a0406c1d13786daa968a858f26f6 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 14 Nov 2020 20:39:14 +0100 Subject: [PATCH 12/29] Simplify tests --- pandas/tests/frame/indexing/test_setitem.py | 4 ++-- pandas/tests/series/indexing/test_setitem.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 9822f4d960b84..501649a97ad6f 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -303,7 +303,7 @@ def test_iloc_setitem_bool_indexer(self, klass): def test_setitem_complete_columns_different_dtypes(self, dtype): # GH: 20635 df = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": ["3", "4"], "D": [1, 2]}) - rhs = df.loc[:, ["B", "C"]].astype("int64").astype(dtype) + rhs = df[["B", "C"]].astype("int64").astype(dtype) df.loc[:, ["B", "C"]] = rhs expected = DataFrame({"A": ["a", "b"], "B": [1, 2], "C": [3, 4], "D": [1, 2]}) expected[["B", "C"]] = expected[["B", "C"]].astype(dtype) @@ -312,7 +312,7 @@ def test_setitem_complete_columns_different_dtypes(self, dtype): def test_setitem_single_column_as_series_different_dtype(self): # GH: 20635 df = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": ["3", "4"]}) - df.loc[:, "C"] = df.loc[:, "C"].astype("int64") + df.loc[:, "C"] = df["C"].astype("int64") expected = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": [3, 4]}) tm.assert_frame_equal(df, expected) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index e328e09db4a59..81fbcb2c8593f 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -214,7 +214,7 @@ def test_setitem_nan_casts(self): def test_setitem_assigning_different_dtype(self, dtype): # GH: 20635 ser = Series(["3", "4"], name="A") - ser.loc[:] = ser.loc[:].astype("int64").astype(dtype) + ser.loc[:] = ser.astype("int64").astype(dtype) expected = Series([3, 4], name="A", dtype=dtype) tm.assert_series_equal(ser, expected) From 27ea3e29da04b767912965ed61a37d75e41b098a Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 15 Nov 2020 21:22:42 +0100 Subject: [PATCH 13/29] Fix related issue --- pandas/core/indexing.py | 5 +++++ pandas/tests/frame/indexing/test_setitem.py | 12 ++++++++++++ 2 files changed, 17 insertions(+) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 6fd3af1770789..8a4e6830378bf 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -8,6 +8,8 @@ from pandas._libs.indexing import NDFrameIndexerBase from pandas._libs.lib import item_from_zerodim + +from pandas.core.dtypes.cast import infer_dtype_from_scalar from pandas.errors import AbstractMethodError, InvalidIndexError from pandas.util._decorators import doc @@ -1552,6 +1554,9 @@ def _setitem_with_indexer(self, indexer, value): blk = self.obj._mgr.blocks[0] take_split_path = not blk._can_hold_element(val) if not take_split_path: + if is_scalar(value): + dtype = infer_dtype_from_scalar(value) + take_split_path = not is_dtype_equal(dtype, blk.dtype) if isinstance(value, ABCSeries): take_split_path = not (is_dtype_equal(value.dtype, blk.dtype)) elif isinstance(value, ABCDataFrame): diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 501649a97ad6f..9704d4cfdacfc 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -333,6 +333,18 @@ def test_setitem_conversion_to_datetime(self): ) tm.assert_frame_equal(df, expected) + def test_setitem_scalar_dtype_change(self): + # GH#27583 + df = DataFrame({"a": [0.0], "b": [0.0]}) + df[["a", "b"]] = 0 + expected = DataFrame({"a": [0], "b": [0]}) + tm.assert_frame_equal(df, expected) + + df = DataFrame({"a": [0.0], "b": [0.0]}) + df["b"] = 0 + expected = DataFrame({"a": [0.0], "b": [0]}) + tm.assert_frame_equal(df, expected) + class TestDataFrameSetItemSlicing: def test_setitem_slice_position(self): From f94277b05ddaca302d0acee116972459548066dd Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 15 Nov 2020 21:23:47 +0100 Subject: [PATCH 14/29] Add issues --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index ddc0273046a03..ad63928b7f679 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -477,7 +477,7 @@ Indexing - Bug in :meth:`DataFrame.loc` returned requested key plus missing values when ``loc`` was applied to single level from :class:`MultiIndex` (:issue:`27104`) - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`CategoricalIndex` using a listlike indexer containing NA values (:issue:`37722`) - Bug in :meth:`DataFrame.xs` ignored ``droplevel=False`` for columns (:issue:`19056`) -- Bug in :meth:`DataFrame.loc` not preserving dtype of new values, when complete columns was assigned (:issue:`20635`) +- Bug in :meth:`DataFrame.loc` not preserving dtype of new values, when complete columns was assigned (:issue:`20635`, :issue:`20511`, :issue:`27583`) Missing ^^^^^^^ From d5f6150d4dbc7ccb92b8a80f4e6108cfbe998d0d Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 15 Nov 2020 21:31:14 +0100 Subject: [PATCH 15/29] Move import --- pandas/core/indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 85c3da25ccd34..cd5db0fb7d051 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -9,10 +9,10 @@ from pandas._libs.indexing import NDFrameIndexerBase from pandas._libs.lib import item_from_zerodim -from pandas.core.dtypes.cast import infer_dtype_from_scalar from pandas.errors import AbstractMethodError, InvalidIndexError from pandas.util._decorators import doc +from pandas.core.dtypes.cast import infer_dtype_from_scalar from pandas.core.dtypes.common import ( is_array_like, is_dtype_equal, @@ -1557,7 +1557,7 @@ def _setitem_with_indexer(self, indexer, value): if is_scalar(value): dtype = infer_dtype_from_scalar(value) take_split_path = not is_dtype_equal(dtype, blk.dtype) - if isinstance(value, ABCSeries): + elif isinstance(value, ABCSeries): take_split_path = not (is_dtype_equal(value.dtype, blk.dtype)) elif isinstance(value, ABCDataFrame): dtypes = list(value.dtypes.unique()) From 706dc6a801d13d3cadef0fc050978b62a784d6a8 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 15 Nov 2020 21:47:48 +0100 Subject: [PATCH 16/29] Delete line --- pandas/core/indexing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index cd5db0fb7d051..d35cfd2cbed3f 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -8,7 +8,6 @@ from pandas._libs.indexing import NDFrameIndexerBase from pandas._libs.lib import item_from_zerodim - from pandas.errors import AbstractMethodError, InvalidIndexError from pandas.util._decorators import doc From 66d4b4e835fda2335f3bfa04b21d920e12a68639 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 15 Nov 2020 22:22:08 +0100 Subject: [PATCH 17/29] Fix return value --- pandas/core/indexing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index d35cfd2cbed3f..866e1508612b2 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1554,7 +1554,7 @@ def _setitem_with_indexer(self, indexer, value): take_split_path = not blk._can_hold_element(val) if not take_split_path: if is_scalar(value): - dtype = infer_dtype_from_scalar(value) + dtype, _ = infer_dtype_from_scalar(value) take_split_path = not is_dtype_equal(dtype, blk.dtype) elif isinstance(value, ABCSeries): take_split_path = not (is_dtype_equal(value.dtype, blk.dtype)) From fa250752efaa9566c0cf280acedaaeaeae933073 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 17 Nov 2020 21:56:40 +0100 Subject: [PATCH 18/29] Move and rename tests --- pandas/tests/frame/indexing/test_setitem.py | 34 -------------------- pandas/tests/indexing/test_iloc.py | 18 +++++++++++ pandas/tests/indexing/test_loc.py | 25 ++++++++++++++ pandas/tests/series/indexing/test_setitem.py | 8 ----- 4 files changed, 43 insertions(+), 42 deletions(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 9704d4cfdacfc..3df41c5692ef5 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -299,40 +299,6 @@ def test_iloc_setitem_bool_indexer(self, klass): expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]}) tm.assert_frame_equal(df, expected) - @pytest.mark.parametrize("dtype", ["int64", "Int64"]) - def test_setitem_complete_columns_different_dtypes(self, dtype): - # GH: 20635 - df = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": ["3", "4"], "D": [1, 2]}) - rhs = df[["B", "C"]].astype("int64").astype(dtype) - df.loc[:, ["B", "C"]] = rhs - expected = DataFrame({"A": ["a", "b"], "B": [1, 2], "C": [3, 4], "D": [1, 2]}) - expected[["B", "C"]] = expected[["B", "C"]].astype(dtype) - tm.assert_frame_equal(df, expected) - - def test_setitem_single_column_as_series_different_dtype(self): - # GH: 20635 - df = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": ["3", "4"]}) - df.loc[:, "C"] = df["C"].astype("int64") - expected = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": [3, 4]}) - tm.assert_frame_equal(df, expected) - - def test_setitem_conversion_to_datetime(self): - # GH: 20511 - df = DataFrame( - [["2015-01-01", "2016-01-01"], ["2016-01-01", "2015-01-01"]], - columns=["date0", "date1"], - ) - df.iloc[:, [0]] = df.iloc[:, [0]].apply( - lambda x: to_datetime(x, errors="coerce") - ) - expected = DataFrame( - { - "date0": [to_datetime("2015-01-01"), to_datetime("2016-01-01")], - "date1": ["2016-01-01", "2015-01-01"], - } - ) - tm.assert_frame_equal(df, expected) - def test_setitem_scalar_dtype_change(self): # GH#27583 df = DataFrame({"a": [0.0], "b": [0.0]}) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 0360d7e01e62d..203d0476065f0 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -17,6 +17,7 @@ concat, date_range, isna, + to_datetime, ) import pandas._testing as tm from pandas.api.types import is_scalar @@ -801,6 +802,23 @@ def test_iloc_setitem_empty_frame_raises_with_3d_ndarray(self): with pytest.raises(ValueError, match=msg): obj.iloc[nd3] = 0 + def test_setitem_conversion_to_datetime(self): + # GH: 20511 + df = DataFrame( + [["2015-01-01", "2016-01-01"], ["2016-01-01", "2015-01-01"]], + columns=["date0", "date1"], + ) + df.iloc[:, [0]] = df.iloc[:, [0]].apply( + lambda x: to_datetime(x, errors="coerce") + ) + expected = DataFrame( + { + "date0": [to_datetime("2015-01-01"), to_datetime("2016-01-01")], + "date1": ["2016-01-01", "2015-01-01"], + } + ) + tm.assert_frame_equal(df, expected) + class TestILocErrors: # NB: this test should work for _any_ Series we can pass as diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index b45eddc3ac49c..493e76e3b2231 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1127,6 +1127,23 @@ def test_loc_setitem_listlike_with_timedelta64index(self, indexer, expected): tm.assert_frame_equal(expected, df) + def test_setitem_null_slice_single_column_series_value_different_dtype(self): + # GH: 20635 + df = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": ["3", "4"]}) + df.loc[:, "C"] = df["C"].astype("int64") + expected = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": [3, 4]}) + tm.assert_frame_equal(df, expected) + + @pytest.mark.parametrize("dtype", ["int64", "Int64"]) + def test_setitem_null_slice_different_dtypes(self, dtype): + # GH: 20635 + df = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": ["3", "4"], "D": [1, 2]}) + rhs = df[["B", "C"]].astype("int64").astype(dtype) + df.loc[:, ["B", "C"]] = rhs + expected = DataFrame({"A": ["a", "b"], "B": [1, 2], "C": [3, 4], "D": [1, 2]}) + expected[["B", "C"]] = expected[["B", "C"]].astype(dtype) + tm.assert_frame_equal(df, expected) + class TestLocWithMultiIndex: @pytest.mark.parametrize( @@ -1995,6 +2012,14 @@ def test_loc_setitem_dt64tz_values(self): result = s2["a"] assert result == expected + @pytest.mark.parametrize("dtype", ["int64", "Int64"]) + def test_setitem_series_null_slice_different_dtypes(self, dtype): + # GH: 20635 + ser = Series(["3", "4"], name="A") + ser.loc[:] = ser.astype("int64").astype(dtype) + expected = Series([3, 4], name="A", dtype=dtype) + tm.assert_series_equal(ser, expected) + @pytest.mark.parametrize("value", [1, 1.5]) def test_loc_int_in_object_index(frame_or_series, value): diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index dcd3284fff384..4ed7510a1d9e1 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -219,14 +219,6 @@ def test_setitem_nan_casts(self): ser[:5] = np.nan tm.assert_series_equal(ser, expected) - @pytest.mark.parametrize("dtype", ["int64", "Int64"]) - def test_setitem_assigning_different_dtype(self, dtype): - # GH: 20635 - ser = Series(["3", "4"], name="A") - ser.loc[:] = ser.astype("int64").astype(dtype) - expected = Series([3, 4], name="A", dtype=dtype) - tm.assert_series_equal(ser, expected) - class TestSetitemWithExpansion: def test_setitem_empty_series(self): From 3c06ba627644730e9ce14fc87cee6d3c1de67db8 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 17 Nov 2020 22:01:57 +0100 Subject: [PATCH 19/29] Fix failing test --- pandas/tests/frame/indexing/test_indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index ff9646d45c0ac..1808c8ce17b7a 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1471,7 +1471,7 @@ def test_at_time_between_time_datetimeindex(self): result.loc[akey] = 0 result = result.loc[akey] expected = df.loc[akey].copy() - expected.loc[:] = 0 + expected.loc[:] = 0. tm.assert_frame_equal(result, expected) result = df.copy() @@ -1483,7 +1483,7 @@ def test_at_time_between_time_datetimeindex(self): result.loc[bkey] = 0 result = result.loc[bkey] expected = df.loc[bkey].copy() - expected.loc[:] = 0 + expected.loc[:] = 0. tm.assert_frame_equal(result, expected) result = df.copy() From 0f556c407ff03c8dbf7b1d227c638e8e13a295f8 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 17 Nov 2020 22:15:16 +0100 Subject: [PATCH 20/29] Fix pre commit --- pandas/tests/frame/indexing/test_indexing.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 1808c8ce17b7a..c11db6238f310 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -1471,7 +1471,7 @@ def test_at_time_between_time_datetimeindex(self): result.loc[akey] = 0 result = result.loc[akey] expected = df.loc[akey].copy() - expected.loc[:] = 0. + expected.loc[:] = 0.0 tm.assert_frame_equal(result, expected) result = df.copy() @@ -1483,7 +1483,7 @@ def test_at_time_between_time_datetimeindex(self): result.loc[bkey] = 0 result = result.loc[bkey] expected = df.loc[bkey].copy() - expected.loc[:] = 0. + expected.loc[:] = 0.0 tm.assert_frame_equal(result, expected) result = df.copy() From b759ac964a7b20d0e3f8de9cf29e89338af44173 Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 17 Nov 2020 22:41:14 +0100 Subject: [PATCH 21/29] Remove import --- pandas/tests/frame/indexing/test_setitem.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 3df41c5692ef5..1c36a9ab0ca25 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -16,7 +16,6 @@ date_range, notna, period_range, - to_datetime, ) import pandas._testing as tm from pandas.core.arrays import SparseArray From a353930a593010f8855f5d323b36213e06ac367e Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 17 Nov 2020 23:41:59 +0100 Subject: [PATCH 22/29] Fix test --- pandas/tests/indexing/multiindex/test_partial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/multiindex/test_partial.py b/pandas/tests/indexing/multiindex/test_partial.py index 538aa1d3a1164..1acab95157419 100644 --- a/pandas/tests/indexing/multiindex/test_partial.py +++ b/pandas/tests/indexing/multiindex/test_partial.py @@ -120,7 +120,7 @@ def test_partial_set(self, multiindex_year_month_day_dataframe_random_data): exp["A"].loc[2000, 4].values[:] = 1 tm.assert_frame_equal(df, exp) - df.loc[2000] = 5 + df.loc[2000] = 5.0 exp.loc[2000].values[:] = 5 tm.assert_frame_equal(df, exp) From d28e1e10fd0114340c11de5b3218d4dcbc48972e Mon Sep 17 00:00:00 2001 From: phofl Date: Tue, 17 Nov 2020 23:47:01 +0100 Subject: [PATCH 23/29] Add test --- pandas/tests/indexing/test_iloc.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 203d0476065f0..4acdcd856d22f 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -819,6 +819,15 @@ def test_setitem_conversion_to_datetime(self): ) tm.assert_frame_equal(df, expected) + def test_iloc_conversion_to_float_32_for_columns_list(self): + # GH#33198 + arr = np.random.randn(10 ** 2).reshape(5, 20).astype(np.float64) + df = DataFrame(arr) + df.iloc[:, 10:] = df.iloc[:, 10:].astype(np.float32) + result = df.dtypes.value_counts() + expected = Series([10, 10], index=[np.float64, np.float32]) + tm.assert_series_equal(result, expected) + class TestILocErrors: # NB: this test should work for _any_ Series we can pass as From 1aa8522a6e0094b6611f35b21f7390f5d08a3ddc Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 21 Nov 2020 00:54:45 +0100 Subject: [PATCH 24/29] Adress review comments --- pandas/tests/indexing/test_iloc.py | 4 ++-- pandas/tests/indexing/test_loc.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 4acdcd856d22f..643e59ea4548a 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -802,8 +802,8 @@ def test_iloc_setitem_empty_frame_raises_with_3d_ndarray(self): with pytest.raises(ValueError, match=msg): obj.iloc[nd3] = 0 - def test_setitem_conversion_to_datetime(self): - # GH: 20511 + def test_iloc_setitem_conversion_to_datetime(self): + # GH#20511 df = DataFrame( [["2015-01-01", "2016-01-01"], ["2016-01-01", "2015-01-01"]], columns=["date0", "date1"], diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 3b0c6cf711a5c..cf8c042ef38bc 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1127,16 +1127,16 @@ def test_loc_setitem_listlike_with_timedelta64index(self, indexer, expected): tm.assert_frame_equal(expected, df) - def test_setitem_null_slice_single_column_series_value_different_dtype(self): - # GH: 20635 + def test_loc_setitem_null_slice_single_column_series_value_different_dtype(self): + # GH#20635 df = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": ["3", "4"]}) df.loc[:, "C"] = df["C"].astype("int64") expected = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": [3, 4]}) tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("dtype", ["int64", "Int64"]) - def test_setitem_null_slice_different_dtypes(self, dtype): - # GH: 20635 + def test_loc_setitem_null_slice_different_dtypes(self, dtype): + # GH#20635 df = DataFrame({"A": ["a", "b"], "B": ["1", "2"], "C": ["3", "4"], "D": [1, 2]}) rhs = df[["B", "C"]].astype("int64").astype(dtype) df.loc[:, ["B", "C"]] = rhs From 1bc0d46e7dbe07eb4116267f2d09c5143898fc9a Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 21 Nov 2020 01:02:20 +0100 Subject: [PATCH 25/29] Fix test --- pandas/tests/indexing/test_iloc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 643e59ea4548a..e3278e4e4cc7d 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -825,7 +825,7 @@ def test_iloc_conversion_to_float_32_for_columns_list(self): df = DataFrame(arr) df.iloc[:, 10:] = df.iloc[:, 10:].astype(np.float32) result = df.dtypes.value_counts() - expected = Series([10, 10], index=[np.float64, np.float32]) + expected = Series([10, 10], index=[np.dtype("float32"), np.dtype("float64")]) tm.assert_series_equal(result, expected) From 14fe5a8146b44c2d3689a123204fd66ea58b6213 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 21 Nov 2020 02:29:23 +0100 Subject: [PATCH 26/29] Move test --- pandas/tests/frame/indexing/test_setitem.py | 9 --------- pandas/tests/indexing/test_iloc.py | 12 +++++++++++- pandas/tests/indexing/test_loc.py | 4 ++-- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 1c36a9ab0ca25..d8785b77b77d7 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -289,15 +289,6 @@ def test_setitem_periodindex(self): assert isinstance(rs.index, PeriodIndex) tm.assert_index_equal(rs.index, rng) - @pytest.mark.parametrize("klass", [list, np.array]) - def test_iloc_setitem_bool_indexer(self, klass): - # GH: 36741 - df = DataFrame({"flag": ["x", "y", "z"], "value": [1, 3, 4]}) - indexer = klass([True, False, False]) - df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2 - expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]}) - tm.assert_frame_equal(df, expected) - def test_setitem_scalar_dtype_change(self): # GH#27583 df = DataFrame({"a": [0.0], "b": [0.0]}) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 2a6eae5914f1a..6b7bb72e80cfd 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -14,6 +14,7 @@ Index, NaT, Series, + Timestamp, concat, date_range, isna, @@ -846,7 +847,7 @@ def test_iloc_setitem_conversion_to_datetime(self): ) expected = DataFrame( { - "date0": [to_datetime("2015-01-01"), to_datetime("2016-01-01")], + "date0": [Timestamp("2015-01-01"), Timestamp("2016-01-01")], "date1": ["2016-01-01", "2015-01-01"], } ) @@ -861,6 +862,15 @@ def test_iloc_conversion_to_float_32_for_columns_list(self): expected = Series([10, 10], index=[np.dtype("float32"), np.dtype("float64")]) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("klass", [list, np.array]) + def test_iloc_setitem_bool_indexer(self, klass): + # GH#36741 + df = DataFrame({"flag": ["x", "y", "z"], "value": [1, 3, 4]}) + indexer = klass([True, False, False]) + df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2 + expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]}) + tm.assert_frame_equal(df, expected) + class TestILocErrors: # NB: this test should work for _any_ Series we can pass as diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index cf8c042ef38bc..61e7bdf73d781 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2018,8 +2018,8 @@ def test_loc_setitem_dt64tz_values(self): assert result == expected @pytest.mark.parametrize("dtype", ["int64", "Int64"]) - def test_setitem_series_null_slice_different_dtypes(self, dtype): - # GH: 20635 + def test_loc_setitem_series_null_slice_different_dtypes(self, dtype): + # GH#20635 ser = Series(["3", "4"], name="A") ser.loc[:] = ser.astype("int64").astype(dtype) expected = Series([3, 4], name="A", dtype=dtype) From 26b5d6ff2d42b210bbeceb54a8bb850126dcb5b0 Mon Sep 17 00:00:00 2001 From: phofl Date: Sat, 21 Nov 2020 02:38:02 +0100 Subject: [PATCH 27/29] Fix test --- pandas/tests/indexing/test_iloc.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 6b7bb72e80cfd..6e6a98b155ffd 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -857,20 +857,11 @@ def test_iloc_conversion_to_float_32_for_columns_list(self): # GH#33198 arr = np.random.randn(10 ** 2).reshape(5, 20).astype(np.float64) df = DataFrame(arr) - df.iloc[:, 10:] = df.iloc[:, 10:].astype(np.float32) + df.iloc[:, 11:] = df.iloc[:, 11:].astype(np.float32) result = df.dtypes.value_counts() - expected = Series([10, 10], index=[np.dtype("float32"), np.dtype("float64")]) + expected = Series([11, 9], index=[np.dtype("float64"), np.dtype("float32")]) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("klass", [list, np.array]) - def test_iloc_setitem_bool_indexer(self, klass): - # GH#36741 - df = DataFrame({"flag": ["x", "y", "z"], "value": [1, 3, 4]}) - indexer = klass([True, False, False]) - df.iloc[indexer, 1] = df.iloc[indexer, 1] * 2 - expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]}) - tm.assert_frame_equal(df, expected) - class TestILocErrors: # NB: this test should work for _any_ Series we can pass as From e6e22f335e3f3fd674a641a9f18367b75e928728 Mon Sep 17 00:00:00 2001 From: phofl Date: Sun, 22 Nov 2020 21:12:40 +0100 Subject: [PATCH 28/29] Fix bug with series to cell --- pandas/core/indexing.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index c33696418d808..3bcb8ab39a455 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1548,7 +1548,16 @@ def _setitem_with_indexer(self, indexer, value, name="iloc"): blk = self.obj._mgr.blocks[0] take_split_path = not blk._can_hold_element(val) if not take_split_path: - if is_scalar(value): + if ( + isinstance(indexer, tuple) + and is_integer(indexer[0]) + and is_integer(indexer[1]) + and not is_scalar(value) + ): + # GH#37749 this is for listlikes to be treated as scalars, can + # not take split path here + pass + elif is_scalar(value): dtype, _ = infer_dtype_from_scalar(value) take_split_path = not is_dtype_equal(dtype, blk.dtype) elif isinstance(value, ABCSeries): From f97a252c7943337ce8f2e98864b82012956568fc Mon Sep 17 00:00:00 2001 From: phofl Date: Thu, 24 Dec 2020 00:55:46 +0100 Subject: [PATCH 29/29] Move whatsnew --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index c3d896166fabe..92102f48092e6 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -224,7 +224,7 @@ Indexing - Bug in :meth:`CategoricalIndex.get_indexer` failing to raise ``InvalidIndexError`` when non-unique (:issue:`38372`) - Bug in inserting many new columns into a :class:`DataFrame` causing incorrect subsequent indexing behavior (:issue:`38380`) - Bug in :meth:`DataFrame.iloc.__setitem__` and :meth:`DataFrame.loc.__setitem__` with mixed dtypes when setting with a dictionary value (:issue:`38335`) -- +- Bug in :meth:`DataFrame.loc` not preserving dtype of new values, when complete columns was assigned (:issue:`20635`, :issue:`20511`, :issue:`27583`) - Missing