From 2d126dd0c5fd9768a772ffefede956dfff827667 Mon Sep 17 00:00:00 2001 From: Wilson Date: Sun, 30 Oct 2022 05:22:34 +0000 Subject: [PATCH 01/10] Fixes DataFrame.update crashes when NaT present. GH16713 --- pandas/core/frame.py | 4 +++ pandas/tests/frame/methods/test_update.py | 32 +++++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index fe6fda34a89ef..730aa1238f277 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8195,6 +8195,10 @@ def update( for col in self.columns: this = self[col]._values that = other[col]._values + + if all(isna(that)): + continue + if filter_func is not None: with np.errstate(all="ignore"): mask = ~filter_func(this) | isna(that) diff --git a/pandas/tests/frame/methods/test_update.py b/pandas/tests/frame/methods/test_update.py index a35530100a425..17201b27b149d 100644 --- a/pandas/tests/frame/methods/test_update.py +++ b/pandas/tests/frame/methods/test_update.py @@ -166,3 +166,35 @@ def test_update_modify_view(self, using_copy_on_write): tm.assert_frame_equal(result_view, df2_orig) else: tm.assert_frame_equal(result_view, expected) + + def test_update_dt_column_with_NaT_create_column(self): + df = DataFrame( + { + "A": [1, None], + "B": [ + pd.NaT, + pd.to_datetime("2016-01-01"), + ], + } + ) + df2 = DataFrame({"A": [2, 3]}) + df.update(df2, overwrite=False) + expected = DataFrame( + {"A": [1.0, 3.0], "B": [pd.NaT, pd.to_datetime("2016-01-01")]} + ) + + tm.assert_frame_equal(df, expected) + + def test_update_dt_column_with_NaT_create_row(self): + + df = DataFrame({"A": [1, None], "B": [pd.to_datetime("2017-1-1"), pd.NaT]}) + + df2 = DataFrame({"A": [2], "B": [pd.to_datetime("2016-01-01")]}) + + df.update(df2, overwrite=False) + + expected = DataFrame( + {"A": [1, None], "B": [pd.to_datetime("2017-1-1"), pd.NaT]} + ) + + tm.assert_frame_equal(df, expected) From b4c792afa4ba2801e24bd26507082643067b5dad Mon Sep 17 00:00:00 2001 From: Wilson Date: Sun, 30 Oct 2022 05:24:39 +0000 Subject: [PATCH 02/10] Formatting --- pandas/tests/frame/methods/test_update.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/frame/methods/test_update.py b/pandas/tests/frame/methods/test_update.py index 17201b27b149d..198c09072d2df 100644 --- a/pandas/tests/frame/methods/test_update.py +++ b/pandas/tests/frame/methods/test_update.py @@ -178,7 +178,9 @@ def test_update_dt_column_with_NaT_create_column(self): } ) df2 = DataFrame({"A": [2, 3]}) + df.update(df2, overwrite=False) + expected = DataFrame( {"A": [1.0, 3.0], "B": [pd.NaT, pd.to_datetime("2016-01-01")]} ) From c63d1cc55d9984e16f73e8db94c4ddbbda8b3721 Mon Sep 17 00:00:00 2001 From: Wilson Date: Thu, 3 Nov 2022 00:38:11 +0000 Subject: [PATCH 03/10] No longer reindexing columns, skipping non-matching columns --- pandas/core/frame.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 730aa1238f277..690290ae2fb7c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8190,15 +8190,16 @@ def update( if not isinstance(other, DataFrame): other = DataFrame(other) - other = other.reindex_like(self) + # reindex rows, non-matching columns get skipped + other = other.reindex(self.index) for col in self.columns: + if col not in other.columns: + continue + this = self[col]._values that = other[col]._values - if all(isna(that)): - continue - if filter_func is not None: with np.errstate(all="ignore"): mask = ~filter_func(this) | isna(that) From 7bea232958aafae7e4c178907e2aa9adfa650801 Mon Sep 17 00:00:00 2001 From: Wilson Date: Thu, 3 Nov 2022 17:59:35 +0000 Subject: [PATCH 04/10] switching to using set intersection to find shared columns --- pandas/core/frame.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 690290ae2fb7c..b453abf30d625 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8193,10 +8193,9 @@ def update( # reindex rows, non-matching columns get skipped other = other.reindex(self.index) - for col in self.columns: - if col not in other.columns: - continue + shared_cols = set(self.columns) & set(other.columns) + for col in shared_cols: this = self[col]._values that = other[col]._values From 9ee432994180934400da13bbf3e1d905a0ba2ed4 Mon Sep 17 00:00:00 2001 From: Wilson Date: Thu, 3 Nov 2022 19:06:01 +0000 Subject: [PATCH 05/10] switching to using index intersection --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b453abf30d625..ade2b8abe0428 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8193,7 +8193,7 @@ def update( # reindex rows, non-matching columns get skipped other = other.reindex(self.index) - shared_cols = set(self.columns) & set(other.columns) + shared_cols = self.columns.intersection(other.columns) for col in shared_cols: this = self[col]._values From f19162900efc3199311c7f09cfa540b73dd9770b Mon Sep 17 00:00:00 2001 From: Wilson Date: Tue, 8 Nov 2022 22:41:39 +0000 Subject: [PATCH 06/10] Removing unnecessary variable creation --- pandas/core/frame.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ade2b8abe0428..ee0335cbfda41 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8190,12 +8190,9 @@ def update( if not isinstance(other, DataFrame): other = DataFrame(other) - # reindex rows, non-matching columns get skipped other = other.reindex(self.index) - shared_cols = self.columns.intersection(other.columns) - - for col in shared_cols: + for col in self.columns.intersection(other.columns): this = self[col]._values that = other[col]._values From 35fa0a801bd16c0630733a2c91a76b02ea33434e Mon Sep 17 00:00:00 2001 From: Wilson Date: Tue, 8 Nov 2022 22:43:38 +0000 Subject: [PATCH 07/10] Formatting and removing test --- pandas/tests/frame/methods/test_update.py | 28 ++--------------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/pandas/tests/frame/methods/test_update.py b/pandas/tests/frame/methods/test_update.py index 198c09072d2df..40f87f1382625 100644 --- a/pandas/tests/frame/methods/test_update.py +++ b/pandas/tests/frame/methods/test_update.py @@ -168,35 +168,11 @@ def test_update_modify_view(self, using_copy_on_write): tm.assert_frame_equal(result_view, expected) def test_update_dt_column_with_NaT_create_column(self): - df = DataFrame( - { - "A": [1, None], - "B": [ - pd.NaT, - pd.to_datetime("2016-01-01"), - ], - } - ) + # GH#16713 + df = DataFrame({"A": [1, None], "B": [pd.NaT, pd.to_datetime("2016-01-01")]}) df2 = DataFrame({"A": [2, 3]}) - df.update(df2, overwrite=False) - expected = DataFrame( {"A": [1.0, 3.0], "B": [pd.NaT, pd.to_datetime("2016-01-01")]} ) - - tm.assert_frame_equal(df, expected) - - def test_update_dt_column_with_NaT_create_row(self): - - df = DataFrame({"A": [1, None], "B": [pd.to_datetime("2017-1-1"), pd.NaT]}) - - df2 = DataFrame({"A": [2], "B": [pd.to_datetime("2016-01-01")]}) - - df.update(df2, overwrite=False) - - expected = DataFrame( - {"A": [1, None], "B": [pd.to_datetime("2017-1-1"), pd.NaT]} - ) - tm.assert_frame_equal(df, expected) From ead244a766448d6498e7bcbdc78d682529ee7511 Mon Sep 17 00:00:00 2001 From: Wilson Date: Tue, 8 Nov 2022 23:09:48 +0000 Subject: [PATCH 08/10] Adding to whatsnew --- doc/source/whatsnew/v2.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 1f245b585df48..b2cfe5cadd256 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -376,6 +376,7 @@ Missing - Bug in :meth:`Index.equals` raising ``TypeError`` when :class:`Index` consists of tuples that contain ``NA`` (:issue:`48446`) - Bug in :meth:`Series.map` caused incorrect result when data has NaNs and defaultdict mapping was used (:issue:`48813`) - Bug in :class:`NA` raising a ``TypeError`` instead of return :class:`NA` when performing a binary operation with a ``bytes`` object (:issue:`49108`) +- Bug in :meth:`Dataframe.update` raising ``TypeError`` when column has NaT values (:issue:`16713`) MultiIndex ^^^^^^^^^^ From bd07c5ac30d48d59a20d63f3760c79079079d622 Mon Sep 17 00:00:00 2001 From: Wilson Date: Sat, 12 Nov 2022 21:02:03 +0000 Subject: [PATCH 09/10] updating whatsnew message --- doc/source/whatsnew/v2.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index b2cfe5cadd256..8d14e42a3d905 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -376,7 +376,7 @@ Missing - Bug in :meth:`Index.equals` raising ``TypeError`` when :class:`Index` consists of tuples that contain ``NA`` (:issue:`48446`) - Bug in :meth:`Series.map` caused incorrect result when data has NaNs and defaultdict mapping was used (:issue:`48813`) - Bug in :class:`NA` raising a ``TypeError`` instead of return :class:`NA` when performing a binary operation with a ``bytes`` object (:issue:`49108`) -- Bug in :meth:`Dataframe.update` raising ``TypeError`` when column has NaT values (:issue:`16713`) +- Bug in :meth:`DataFrame.update` with overwrite=False raising ``TypeError`` when self has column with ``NaT`` values and column not present in other DataFrame (:issue:`16713`) MultiIndex ^^^^^^^^^^ From d8167ea053423a74880bb0a9dccbb4dffa2fef5b Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Tue, 15 Nov 2022 12:05:31 +0000 Subject: [PATCH 10/10] Update doc/source/whatsnew/v2.0.0.rst --- doc/source/whatsnew/v2.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 2740cba01bbf8..1ca513e8f5e6a 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -662,7 +662,7 @@ Missing - Bug in :meth:`Index.equals` raising ``TypeError`` when :class:`Index` consists of tuples that contain ``NA`` (:issue:`48446`) - Bug in :meth:`Series.map` caused incorrect result when data has NaNs and defaultdict mapping was used (:issue:`48813`) - Bug in :class:`NA` raising a ``TypeError`` instead of return :class:`NA` when performing a binary operation with a ``bytes`` object (:issue:`49108`) -- Bug in :meth:`DataFrame.update` with overwrite=False raising ``TypeError`` when self has column with ``NaT`` values and column not present in other DataFrame (:issue:`16713`) +- Bug in :meth:`DataFrame.update` with ``overwrite=False`` raising ``TypeError`` when ``self`` has column with ``NaT`` values and column not present in ``other`` (:issue:`16713`) MultiIndex ^^^^^^^^^^