From 0c1d126bfa316a985a6e38ddceb9c7a644561396 Mon Sep 17 00:00:00 2001
From: danielhrisca <daniel.hrisca@gmail.com>
Date: Mon, 11 Jan 2021 10:05:06 +0200
Subject: [PATCH 01/13] ENH: add argument to preserve dtypes of common columns
 in combine_first

---
 pandas/core/frame.py                          | 20 ++++++++-
 .../tests/frame/methods/test_combine_first.py | 44 +++++++++++++++++++
 2 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index e65e9302dd4d5..2d87bc325b9bb 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6425,7 +6425,7 @@ def combine(
         # convert_objects just in case
         return self._constructor(result, index=new_index, columns=new_columns)
 
-    def combine_first(self, other: DataFrame) -> DataFrame:
+    def combine_first(self, other: DataFrame, preserve_dtypes: bool = False) -> DataFrame:
         """
         Update null elements with value in the same location in `other`.
 
@@ -6438,6 +6438,11 @@ def combine_first(self, other: DataFrame) -> DataFrame:
         other : DataFrame
             Provided DataFrame to use to fill null values.
 
+        preserve_dtypes : bool, default False
+            try to preserve the column dtypes afetr combining
+
+            .. versionadded:: 1.2.1
+
         Returns
         -------
         DataFrame
@@ -6482,7 +6487,18 @@ def combiner(x, y):
 
             return expressions.where(mask, y_values, x_values)
 
-        return self.combine(other, combiner, overwrite=False)
+        combined = self.combine(other, combiner, overwrite=False)
+
+        if preserve_dtypes:
+            dtypes = {
+                col: find_common_type([self.dtypes[col], other.dtypes[col]])
+                for col in self.columns.intersection(other.columns)
+            }
+
+            if dtypes:
+                combined = combined.astype(dtypes)
+
+        return combined
 
     def update(
         self,
diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py
index 934ad9eb8213a..75bd5ac193916 100644
--- a/pandas/tests/frame/methods/test_combine_first.py
+++ b/pandas/tests/frame/methods/test_combine_first.py
@@ -24,6 +24,12 @@ def test_combine_first_mixed(self):
         combined = f.combine_first(g)
         tm.assert_frame_equal(combined, exp)
 
+        exp = DataFrame(
+            {"A": list("abab"), "B": [0, 1, 0, 1]}, index=[0, 1, 5, 6]
+        )
+        combined = f.combine_first(g, preserve_dtypes=True)
+        tm.assert_frame_equal(combined, exp)
+
     def test_combine_first(self, float_frame):
         # disjoint
         head, tail = float_frame[:5], float_frame[5:]
@@ -363,9 +369,16 @@ def test_combine_first_int(self):
         expected_12 = DataFrame({"a": [0, 1, 3, 5]}, dtype="float64")
         tm.assert_frame_equal(result_12, expected_12)
 
+        result_12 = df1.combine_first(df2, preserve_dtypes=True)
+        expected_12 = DataFrame({"a": [0, 1, 3, 5]})
+        tm.assert_frame_equal(result_12, expected_12)
+
         result_21 = df2.combine_first(df1)
         expected_21 = DataFrame({"a": [1, 4, 3, 5]}, dtype="float64")
+        tm.assert_frame_equal(result_21, expected_21)
 
+        result_21 = df2.combine_first(df1, preserve_dtypes=True)
+        expected_21 = DataFrame({"a": [1, 4, 3, 5]})
         tm.assert_frame_equal(result_21, expected_21)
 
     @pytest.mark.parametrize("val", [1, 1.0])
@@ -439,3 +452,34 @@ def test_combine_first_with_nan_multiindex():
         index=mi_expected,
     )
     tm.assert_frame_equal(res, expected)
+
+def test_combine_preserve_dtypes():
+    a = Series(["a", "b"], index=range(2))
+    b = Series(range(2), index=range(2))
+    f = DataFrame({"A": a, "B": b})
+
+    c = Series(["a", "b"], index=range(5, 7))
+    b = Series(range(-1, 1), index=range(5, 7))
+    g = DataFrame({"B": b, "C": c})
+
+    exp = DataFrame(
+        {
+            "A": ["a", "b", np.nan, np.nan],
+            "B": [0.0, 1.0, -1.0, 0.0],
+            "C": [np.nan, np.nan, "a", "b"]
+        },
+        index=[0, 1, 5, 6]
+    )
+    combined = f.combine_first(g)
+    tm.assert_frame_equal(combined, exp)
+
+    exp = DataFrame(
+        {
+            "A": ["a", "b", np.nan, np.nan],
+            "B": [0, 1, -1, 0],
+            "C": [np.nan, np.nan, "a", "b"]
+        },
+        index=[0, 1, 5, 6]
+    )
+    combined = f.combine_first(g, preserve_dtypes=True)
+    tm.assert_frame_equal(combined, exp)

From 1a5fe0feefab079b75f0f475bac09605a67b9e6d Mon Sep 17 00:00:00 2001
From: danielhrisca <daniel.hrisca@gmail.com>
Date: Mon, 11 Jan 2021 10:37:47 +0200
Subject: [PATCH 02/13] fix black code style

---
 pandas/core/frame.py                             |  4 +++-
 pandas/tests/frame/methods/test_combine_first.py | 13 ++++++-------
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 2d87bc325b9bb..8fab19b652712 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6425,7 +6425,9 @@ def combine(
         # convert_objects just in case
         return self._constructor(result, index=new_index, columns=new_columns)
 
-    def combine_first(self, other: DataFrame, preserve_dtypes: bool = False) -> DataFrame:
+    def combine_first(
+        self, other: DataFrame, preserve_dtypes: bool = False
+    ) -> DataFrame:
         """
         Update null elements with value in the same location in `other`.
 
diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py
index 75bd5ac193916..03786ad997f75 100644
--- a/pandas/tests/frame/methods/test_combine_first.py
+++ b/pandas/tests/frame/methods/test_combine_first.py
@@ -24,9 +24,7 @@ def test_combine_first_mixed(self):
         combined = f.combine_first(g)
         tm.assert_frame_equal(combined, exp)
 
-        exp = DataFrame(
-            {"A": list("abab"), "B": [0, 1, 0, 1]}, index=[0, 1, 5, 6]
-        )
+        exp = DataFrame({"A": list("abab"), "B": [0, 1, 0, 1]}, index=[0, 1, 5, 6])
         combined = f.combine_first(g, preserve_dtypes=True)
         tm.assert_frame_equal(combined, exp)
 
@@ -453,6 +451,7 @@ def test_combine_first_with_nan_multiindex():
     )
     tm.assert_frame_equal(res, expected)
 
+
 def test_combine_preserve_dtypes():
     a = Series(["a", "b"], index=range(2))
     b = Series(range(2), index=range(2))
@@ -466,9 +465,9 @@ def test_combine_preserve_dtypes():
         {
             "A": ["a", "b", np.nan, np.nan],
             "B": [0.0, 1.0, -1.0, 0.0],
-            "C": [np.nan, np.nan, "a", "b"]
+            "C": [np.nan, np.nan, "a", "b"],
         },
-        index=[0, 1, 5, 6]
+        index=[0, 1, 5, 6],
     )
     combined = f.combine_first(g)
     tm.assert_frame_equal(combined, exp)
@@ -477,9 +476,9 @@ def test_combine_preserve_dtypes():
         {
             "A": ["a", "b", np.nan, np.nan],
             "B": [0, 1, -1, 0],
-            "C": [np.nan, np.nan, "a", "b"]
+            "C": [np.nan, np.nan, "a", "b"],
         },
-        index=[0, 1, 5, 6]
+        index=[0, 1, 5, 6],
     )
     combined = f.combine_first(g, preserve_dtypes=True)
     tm.assert_frame_equal(combined, exp)

From 24f6ffcff8c750f76288921e48f113f56116d71c Mon Sep 17 00:00:00 2001
From: danielhrisca <daniel.hrisca@gmail.com>
Date: Mon, 11 Jan 2021 10:44:44 +0200
Subject: [PATCH 03/13] fix misspelled word in docstring

---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 8fab19b652712..77220201ce8b3 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6441,7 +6441,7 @@ def combine_first(
             Provided DataFrame to use to fill null values.
 
         preserve_dtypes : bool, default False
-            try to preserve the column dtypes afetr combining
+            try to preserve the column dtypes after combining
 
             .. versionadded:: 1.2.1
 

From d0f9ed38cac7e26a6dd9ba85aa762cc922a9a77d Mon Sep 17 00:00:00 2001
From: danielhrisca <daniel.hrisca@gmail.com>
Date: Mon, 11 Jan 2021 20:24:06 +0200
Subject: [PATCH 04/13] update tests and remove preserve_dtypes argument from
 combine_first

---
 pandas/core/frame.py                          | 31 +++----
 .../tests/frame/methods/test_combine_first.py | 83 ++++++++++---------
 2 files changed, 58 insertions(+), 56 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 77220201ce8b3..cdfc9da5d6218 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6425,9 +6425,7 @@ def combine(
         # convert_objects just in case
         return self._constructor(result, index=new_index, columns=new_columns)
 
-    def combine_first(
-        self, other: DataFrame, preserve_dtypes: bool = False
-    ) -> DataFrame:
+    def combine_first(self, other: DataFrame) -> DataFrame:
         """
         Update null elements with value in the same location in `other`.
 
@@ -6440,11 +6438,6 @@ def combine_first(
         other : DataFrame
             Provided DataFrame to use to fill null values.
 
-        preserve_dtypes : bool, default False
-            try to preserve the column dtypes after combining
-
-            .. versionadded:: 1.2.1
-
         Returns
         -------
         DataFrame
@@ -6491,14 +6484,22 @@ def combiner(x, y):
 
         combined = self.combine(other, combiner, overwrite=False)
 
-        if preserve_dtypes:
-            dtypes = {
-                col: find_common_type([self.dtypes[col], other.dtypes[col]])
-                for col in self.columns.intersection(other.columns)
-            }
+        dtypes = {}
+
+        for col in self.columns.intersection(other.columns):
+            try:
+                if combined.dtypes[col] != self.dtypes[col]:
+                    dtypes[col] = find_common_type(
+                        [self.dtypes[col], other.dtypes[col]]
+                    )
+            except TypeError:
+                try:
+                    combined[col] = combined[col].astype(self.dtypes[col])
+                except:
+                    pass
 
-            if dtypes:
-                combined = combined.astype(dtypes)
+        if dtypes:
+            combined = combined.astype(dtypes)
 
         return combined
 
diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py
index 03786ad997f75..4e24bdbcc6df0 100644
--- a/pandas/tests/frame/methods/test_combine_first.py
+++ b/pandas/tests/frame/methods/test_combine_first.py
@@ -6,6 +6,7 @@
 import pandas as pd
 from pandas import DataFrame, Index, MultiIndex, Series
 import pandas._testing as tm
+from pandas.core.dtypes.cast import find_common_type
 
 
 class TestDataFrameCombineFirst:
@@ -18,14 +19,8 @@ def test_combine_first_mixed(self):
         b = Series(range(2), index=range(5, 7))
         g = DataFrame({"A": a, "B": b})
 
-        exp = DataFrame(
-            {"A": list("abab"), "B": [0.0, 1.0, 0.0, 1.0]}, index=[0, 1, 5, 6]
-        )
-        combined = f.combine_first(g)
-        tm.assert_frame_equal(combined, exp)
-
         exp = DataFrame({"A": list("abab"), "B": [0, 1, 0, 1]}, index=[0, 1, 5, 6])
-        combined = f.combine_first(g, preserve_dtypes=True)
+        combined = f.combine_first(g)
         tm.assert_frame_equal(combined, exp)
 
     def test_combine_first(self, float_frame):
@@ -148,7 +143,7 @@ def test_combine_first_return_obj_type_with_bools(self):
         )
         df2 = DataFrame([[-42.6, np.nan, True], [-5.0, 1.6, False]], index=[1, 2])
 
-        expected = Series([True, True, False], name=2, dtype=object)
+        expected = Series([True, True, False], name=2, dtype=bool)
 
         result_12 = df1.combine_first(df2)[2]
         tm.assert_series_equal(result_12, expected)
@@ -161,22 +156,22 @@ def test_combine_first_return_obj_type_with_bools(self):
         (
             (
                 [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
-                [None, None, None],
+                [pd.NaT, pd.NaT, pd.NaT],
                 [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
             ),
             (
-                [None, None, None],
+                [pd.NaT, pd.NaT, pd.NaT],
                 [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
                 [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
             ),
             (
-                [datetime(2000, 1, 2), None, None],
+                [datetime(2000, 1, 2), pd.NaT, pd.NaT],
                 [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
                 [datetime(2000, 1, 2), datetime(2000, 1, 2), datetime(2000, 1, 3)],
             ),
             (
                 [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
-                [datetime(2000, 1, 2), None, None],
+                [datetime(2000, 1, 2), pd.NaT, pd.NaT],
                 [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)],
             ),
         ),
@@ -200,13 +195,13 @@ def test_combine_first_align_nan(self):
 
         res = dfa.combine_first(dfb)
         exp = DataFrame(
-            {"a": [pd.Timestamp("2011-01-01"), pd.NaT], "b": [2.0, 5.0]},
+            {"a": [pd.Timestamp("2011-01-01"), pd.NaT], "b": [2, 5]},
             columns=["a", "b"],
         )
         tm.assert_frame_equal(res, exp)
         assert res["a"].dtype == "datetime64[ns]"
         # ToDo: this must be int64
-        assert res["b"].dtype == "float64"
+        assert res["b"].dtype == "int64"
 
         res = dfa.iloc[:0].combine_first(dfb)
         exp = DataFrame({"a": [np.nan, np.nan], "b": [4, 5]}, columns=["a", "b"])
@@ -223,14 +218,12 @@ def test_combine_first_timezone(self):
             columns=["UTCdatetime", "abc"],
             data=data1,
             index=pd.date_range("20140627", periods=1),
-            dtype="object",
         )
         data2 = pd.to_datetime("20121212 12:12").tz_localize("UTC")
         df2 = DataFrame(
             columns=["UTCdatetime", "xyz"],
             data=data2,
             index=pd.date_range("20140628", periods=1),
-            dtype="object",
         )
         res = df2[["UTCdatetime"]].combine_first(df1)
         exp = DataFrame(
@@ -243,13 +236,10 @@ def test_combine_first_timezone(self):
             },
             columns=["UTCdatetime", "abc"],
             index=pd.date_range("20140627", periods=2, freq="D"),
-            dtype="object",
         )
         assert res["UTCdatetime"].dtype == "datetime64[ns, UTC]"
         assert res["abc"].dtype == "datetime64[ns, UTC]"
-        # Need to cast all to "obejct" because combine_first does not retain dtypes:
-        # GH Issue 7509
-        res = res.astype("object")
+
         tm.assert_frame_equal(res, exp)
 
         # see gh-10567
@@ -364,18 +354,10 @@ def test_combine_first_int(self):
         df2 = DataFrame({"a": [1, 4]}, dtype="int64")
 
         result_12 = df1.combine_first(df2)
-        expected_12 = DataFrame({"a": [0, 1, 3, 5]}, dtype="float64")
-        tm.assert_frame_equal(result_12, expected_12)
-
-        result_12 = df1.combine_first(df2, preserve_dtypes=True)
         expected_12 = DataFrame({"a": [0, 1, 3, 5]})
         tm.assert_frame_equal(result_12, expected_12)
 
         result_21 = df2.combine_first(df1)
-        expected_21 = DataFrame({"a": [1, 4, 3, 5]}, dtype="float64")
-        tm.assert_frame_equal(result_21, expected_21)
-
-        result_21 = df2.combine_first(df1, preserve_dtypes=True)
         expected_21 = DataFrame({"a": [1, 4, 3, 5]})
         tm.assert_frame_equal(result_21, expected_21)
 
@@ -415,11 +397,41 @@ def test_combine_first_string_dtype_only_na(self):
 def test_combine_first_timestamp_bug(scalar1, scalar2, nulls_fixture):
     # GH28481
     na_value = nulls_fixture
+
     frame = DataFrame([[na_value, na_value]], columns=["a", "b"])
     other = DataFrame([[scalar1, scalar2]], columns=["b", "c"])
 
+    try:
+        common_dtype = find_common_type([frame.dtypes["b"], other.dtypes["b"]])
+    except TypeError:
+        common_dtype = "object"
+
+    if common_dtype == "object" or frame.dtypes["b"] == other.dtypes["b"]:
+        val = scalar1
+    else:
+        val = na_value
+
     result = frame.combine_first(other)
-    expected = DataFrame([[na_value, scalar1, scalar2]], columns=["a", "b", "c"])
+
+    expected = DataFrame([[na_value, val, scalar2]], columns=["a", "b", "c"])
+
+    expected["b"] = expected["b"].astype(common_dtype)
+
+    tm.assert_frame_equal(result, expected)
+
+
+def test_combine_first_timestamp_bug_NaT():
+    # GH28481
+    frame = DataFrame([[pd.NaT, pd.NaT]], columns=["a", "b"])
+    other = DataFrame(
+        [[datetime(2020, 1, 1), datetime(2020, 1, 2)]], columns=["b", "c"]
+    )
+
+    result = frame.combine_first(other)
+    expected = DataFrame(
+        [[pd.NaT, datetime(2020, 1, 1), datetime(2020, 1, 2)]], columns=["a", "b", "c"]
+    )
+
     tm.assert_frame_equal(result, expected)
 
 
@@ -461,17 +473,6 @@ def test_combine_preserve_dtypes():
     b = Series(range(-1, 1), index=range(5, 7))
     g = DataFrame({"B": b, "C": c})
 
-    exp = DataFrame(
-        {
-            "A": ["a", "b", np.nan, np.nan],
-            "B": [0.0, 1.0, -1.0, 0.0],
-            "C": [np.nan, np.nan, "a", "b"],
-        },
-        index=[0, 1, 5, 6],
-    )
-    combined = f.combine_first(g)
-    tm.assert_frame_equal(combined, exp)
-
     exp = DataFrame(
         {
             "A": ["a", "b", np.nan, np.nan],
@@ -480,5 +481,5 @@ def test_combine_preserve_dtypes():
         },
         index=[0, 1, 5, 6],
     )
-    combined = f.combine_first(g, preserve_dtypes=True)
+    combined = f.combine_first(g)
     tm.assert_frame_equal(combined, exp)

From 198eaa4ad3b23810dbb5a0f78934e1365c66a983 Mon Sep 17 00:00:00 2001
From: danielhrisca <daniel.hrisca@gmail.com>
Date: Mon, 11 Jan 2021 20:33:51 +0200
Subject: [PATCH 05/13] fix isort and flake8 errors

---
 pandas/core/frame.py                             | 8 +++++++-
 pandas/tests/frame/methods/test_combine_first.py | 3 ++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index cdfc9da5d6218..eb675e6e29896 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6488,14 +6488,20 @@ def combiner(x, y):
 
         for col in self.columns.intersection(other.columns):
             try:
+                # if the column has different dtype in the
+                # DataFrame objects then add the common dtype
+                # to the columns dtype conversion dict
                 if combined.dtypes[col] != self.dtypes[col]:
                     dtypes[col] = find_common_type(
                         [self.dtypes[col], other.dtypes[col]]
                     )
             except TypeError:
+                # numpy dtype was compared with pandas dtype
                 try:
+                    # just try to apply the initial column dtype
                     combined[col] = combined[col].astype(self.dtypes[col])
-                except:
+                except ValueError:
+                    # could not apply the initial dtype, so skip
                     pass
 
         if dtypes:
diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py
index 4e24bdbcc6df0..350caf7776289 100644
--- a/pandas/tests/frame/methods/test_combine_first.py
+++ b/pandas/tests/frame/methods/test_combine_first.py
@@ -3,10 +3,11 @@
 import numpy as np
 import pytest
 
+from pandas.core.dtypes.cast import find_common_type
+
 import pandas as pd
 from pandas import DataFrame, Index, MultiIndex, Series
 import pandas._testing as tm
-from pandas.core.dtypes.cast import find_common_type
 
 
 class TestDataFrameCombineFirst:

From f209590de4cdf766c16ef57a3f44084d48c10c41 Mon Sep 17 00:00:00 2001
From: danielhrisca <daniel.hrisca@gmail.com>
Date: Tue, 12 Jan 2021 09:04:51 +0200
Subject: [PATCH 06/13] updates according to erview

---
 pandas/core/frame.py                          | 25 +++++--------------
 .../tests/frame/methods/test_combine_first.py | 10 +++-----
 2 files changed, 10 insertions(+), 25 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index eb675e6e29896..6cc70f96e9d0f 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6484,25 +6484,12 @@ def combiner(x, y):
 
         combined = self.combine(other, combiner, overwrite=False)
 
-        dtypes = {}
-
-        for col in self.columns.intersection(other.columns):
-            try:
-                # if the column has different dtype in the
-                # DataFrame objects then add the common dtype
-                # to the columns dtype conversion dict
-                if combined.dtypes[col] != self.dtypes[col]:
-                    dtypes[col] = find_common_type(
-                        [self.dtypes[col], other.dtypes[col]]
-                    )
-            except TypeError:
-                # numpy dtype was compared with pandas dtype
-                try:
-                    # just try to apply the initial column dtype
-                    combined[col] = combined[col].astype(self.dtypes[col])
-                except ValueError:
-                    # could not apply the initial dtype, so skip
-                    pass
+        dtypes = {
+            col: find_common_type([self.dtypes[col], other.dtypes[col]])
+            for col in self.columns.intersection(other.columns)
+            if not is_dtype_equal(combined.dtypes[col], self.dtypes[col])
+            and not is_dtype_equal(combined.dtypes[col], find_common_type([self.dtypes[col], other.dtypes[col]]))
+        }
 
         if dtypes:
             combined = combined.astype(dtypes)
diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py
index 350caf7776289..e3a1d5792d24d 100644
--- a/pandas/tests/frame/methods/test_combine_first.py
+++ b/pandas/tests/frame/methods/test_combine_first.py
@@ -3,7 +3,7 @@
 import numpy as np
 import pytest
 
-from pandas.core.dtypes.cast import find_common_type
+from pandas.core.dtypes.cast import find_common_type, is_dtype_equal
 
 import pandas as pd
 from pandas import DataFrame, Index, MultiIndex, Series
@@ -402,12 +402,9 @@ def test_combine_first_timestamp_bug(scalar1, scalar2, nulls_fixture):
     frame = DataFrame([[na_value, na_value]], columns=["a", "b"])
     other = DataFrame([[scalar1, scalar2]], columns=["b", "c"])
 
-    try:
-        common_dtype = find_common_type([frame.dtypes["b"], other.dtypes["b"]])
-    except TypeError:
-        common_dtype = "object"
+    common_dtype = find_common_type([frame.dtypes["b"], other.dtypes["b"]])
 
-    if common_dtype == "object" or frame.dtypes["b"] == other.dtypes["b"]:
+    if is_dtype_equal(common_dtype, "object") or frame.dtypes["b"] == other.dtypes["b"]:
         val = scalar1
     else:
         val = na_value
@@ -466,6 +463,7 @@ def test_combine_first_with_nan_multiindex():
 
 
 def test_combine_preserve_dtypes():
+    # GH7509
     a = Series(["a", "b"], index=range(2))
     b = Series(range(2), index=range(2))
     f = DataFrame({"A": a, "B": b})

From 5e252d081146ddab89a88c612d7e9184288294ae Mon Sep 17 00:00:00 2001
From: danielhrisca <daniel.hrisca@gmail.com>
Date: Tue, 12 Jan 2021 09:49:24 +0200
Subject: [PATCH 07/13] update whatsnew with example code

---
 doc/source/whatsnew/v1.3.0.rst | 35 ++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index cb97fdeccd579..1853f25da0e34 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -65,6 +65,41 @@ Notable bug fixes
 These are bug fixes that might have notable behavior changes.
 
 
+Preserve dtypes in  :meth:`~pandas.DataFrame.combine_first`
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+:meth:`~pandas.DataFrame.combine_first` will now preserve dtypes (:issue:`7509`)
+
+*pandas 1.2.x*
+
+.. code-block:: ipython
+
+   In [1]: df1 = pd.DataFrame({"A": [1, 2, 3], "B": [1, 2, 3]}, index=[0, 1, 2])
+   df2 = pd.DataFrame({"B": [4, 5, 6], "C": [1, 2, 3]}, index=[2, 3, 4])
+   combined = df1.combine_first(df2)
+   (combined, "---------------", combined.dtypes)
+   Out[2]:
+   (     A    B    C
+   0  1.0  1.0  NaN
+   1  2.0  2.0  NaN
+   2  3.0  3.0  1.0
+   3  NaN  5.0  2.0
+   4  NaN  6.0  3.0,
+   '---------------',
+   A    float64
+   B    float64
+   C    float64
+   dtype: object)
+
+*pandas 1.3.0*
+
+.. ipython:: python
+
+   df1 = pd.DataFrame({"A": [1, 2, 3], "B": [1, 2, 3]}, index=[0, 1, 2])
+   df2 = pd.DataFrame({"B": [4, 5, 6], "C": [1, 2, 3]}, index=[2, 3, 4])
+   combined = df1.combine_first(df2)
+   (combined, "---------------", combined.dtypes)
+
 
 .. _whatsnew_130.api_breaking.deps:
 

From 7c67e3cfb44b39da619235d1c04d8ca9aa1f1e37 Mon Sep 17 00:00:00 2001
From: danielhrisca <daniel.hrisca@gmail.com>
Date: Tue, 12 Jan 2021 09:51:12 +0200
Subject: [PATCH 08/13] wrong header in documentation

---
 doc/source/whatsnew/v1.3.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 1853f25da0e34..7c53c567646ec 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -66,7 +66,7 @@ These are bug fixes that might have notable behavior changes.
 
 
 Preserve dtypes in  :meth:`~pandas.DataFrame.combine_first`
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 :meth:`~pandas.DataFrame.combine_first` will now preserve dtypes (:issue:`7509`)
 

From 47d091195d45501ea8643712f0391c6efa66c330 Mon Sep 17 00:00:00 2001
From: danielhrisca <daniel.hrisca@gmail.com>
Date: Tue, 12 Jan 2021 09:53:17 +0200
Subject: [PATCH 09/13] fix black code style

---
 pandas/core/frame.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 6cc70f96e9d0f..7db27c9c29ec8 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6488,7 +6488,10 @@ def combiner(x, y):
             col: find_common_type([self.dtypes[col], other.dtypes[col]])
             for col in self.columns.intersection(other.columns)
             if not is_dtype_equal(combined.dtypes[col], self.dtypes[col])
-            and not is_dtype_equal(combined.dtypes[col], find_common_type([self.dtypes[col], other.dtypes[col]]))
+            and not is_dtype_equal(
+                combined.dtypes[col],
+                find_common_type([self.dtypes[col], other.dtypes[col]]),
+            )
         }
 
         if dtypes:

From 1b5691cc7143b95e7a765471e1d136fdca66b566 Mon Sep 17 00:00:00 2001
From: danielhrisca <daniel.hrisca@gmail.com>
Date: Wed, 13 Jan 2021 08:02:50 +0200
Subject: [PATCH 10/13] update whatsnew with example code as requested in the
 review

---
 doc/source/whatsnew/v1.3.0.rst | 28 ++++++++++++++++++++--------
 1 file changed, 20 insertions(+), 8 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 7c53c567646ec..eba01f609ef0c 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -70,14 +70,17 @@ Preserve dtypes in  :meth:`~pandas.DataFrame.combine_first`
 
 :meth:`~pandas.DataFrame.combine_first` will now preserve dtypes (:issue:`7509`)
 
+.. ipython:: python
+
+   df1 = pd.DataFrame({"A": [1, 2, 3], "B": [1, 2, 3]}, index=[0, 1, 2])
+   df2 = pd.DataFrame({"B": [4, 5, 6], "C": [1, 2, 3]}, index=[2, 3, 4])
+   combined = df1.combine_first(df2)
+
 *pandas 1.2.x*
 
 .. code-block:: ipython
 
-   In [1]: df1 = pd.DataFrame({"A": [1, 2, 3], "B": [1, 2, 3]}, index=[0, 1, 2])
-   df2 = pd.DataFrame({"B": [4, 5, 6], "C": [1, 2, 3]}, index=[2, 3, 4])
-   combined = df1.combine_first(df2)
-   (combined, "---------------", combined.dtypes)
+   In [1]: (combined, "---------------", combined.dtypes)
    Out[2]:
    (     A    B    C
    0  1.0  1.0  NaN
@@ -95,10 +98,19 @@ Preserve dtypes in  :meth:`~pandas.DataFrame.combine_first`
 
 .. ipython:: python
 
-   df1 = pd.DataFrame({"A": [1, 2, 3], "B": [1, 2, 3]}, index=[0, 1, 2])
-   df2 = pd.DataFrame({"B": [4, 5, 6], "C": [1, 2, 3]}, index=[2, 3, 4])
-   combined = df1.combine_first(df2)
-   (combined, "---------------", combined.dtypes)
+   In [1]: (combined, "---------------", combined.dtypes)
+   Out[2]:
+   (     A  B    C
+   0  1.0  1  NaN
+   1  2.0  2  NaN
+   2  3.0  3  1.0
+   3  NaN  5  2.0
+   4  NaN  6  3.0, 
+   '---------------', 
+   A    float64
+   B      int64
+   C    float64
+   dtype: object)
 
 
 .. _whatsnew_130.api_breaking.deps:

From ba49f9cc3a3ae8c9947fb4eae696022f96378cfd Mon Sep 17 00:00:00 2001
From: danielhrisca <daniel.hrisca@gmail.com>
Date: Wed, 13 Jan 2021 15:54:16 +0200
Subject: [PATCH 11/13] remove redundant check

---
 pandas/core/frame.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 7db27c9c29ec8..897ff7e12cc9e 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -6488,10 +6488,6 @@ def combiner(x, y):
             col: find_common_type([self.dtypes[col], other.dtypes[col]])
             for col in self.columns.intersection(other.columns)
             if not is_dtype_equal(combined.dtypes[col], self.dtypes[col])
-            and not is_dtype_equal(
-                combined.dtypes[col],
-                find_common_type([self.dtypes[col], other.dtypes[col]]),
-            )
         }
 
         if dtypes:

From a2d4e383c6d9b6541bda10f65921b6a9595c94b6 Mon Sep 17 00:00:00 2001
From: danielhrisca <daniel.hrisca@gmail.com>
Date: Wed, 13 Jan 2021 16:40:06 +0200
Subject: [PATCH 12/13] further fix and polish the whatsnew entry

---
 doc/source/whatsnew/v1.3.0.rst | 27 +++++----------------------
 1 file changed, 5 insertions(+), 22 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index eba01f609ef0c..557663d10accf 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -73,44 +73,27 @@ Preserve dtypes in  :meth:`~pandas.DataFrame.combine_first`
 .. ipython:: python
 
    df1 = pd.DataFrame({"A": [1, 2, 3], "B": [1, 2, 3]}, index=[0, 1, 2])
+   df1
    df2 = pd.DataFrame({"B": [4, 5, 6], "C": [1, 2, 3]}, index=[2, 3, 4])
+   df2
    combined = df1.combine_first(df2)
 
 *pandas 1.2.x*
 
 .. code-block:: ipython
 
-   In [1]: (combined, "---------------", combined.dtypes)
+   In [1]: combined.dtypes
    Out[2]:
-   (     A    B    C
-   0  1.0  1.0  NaN
-   1  2.0  2.0  NaN
-   2  3.0  3.0  1.0
-   3  NaN  5.0  2.0
-   4  NaN  6.0  3.0,
-   '---------------',
    A    float64
    B    float64
    C    float64
-   dtype: object)
+   dtype: object
 
 *pandas 1.3.0*
 
 .. ipython:: python
 
-   In [1]: (combined, "---------------", combined.dtypes)
-   Out[2]:
-   (     A  B    C
-   0  1.0  1  NaN
-   1  2.0  2  NaN
-   2  3.0  3  1.0
-   3  NaN  5  2.0
-   4  NaN  6  3.0, 
-   '---------------', 
-   A    float64
-   B      int64
-   C    float64
-   dtype: object)
+   combined.dtypes
 
 
 .. _whatsnew_130.api_breaking.deps:

From f9379289ac6daf75ade5df90d5e81a0593f0745a Mon Sep 17 00:00:00 2001
From: danielhrisca <daniel.hrisca@gmail.com>
Date: Fri, 15 Jan 2021 16:43:18 +0200
Subject: [PATCH 13/13] fix single letter variable names

---
 .../tests/frame/methods/test_combine_first.py  | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py
index e3a1d5792d24d..1325bfbda24c6 100644
--- a/pandas/tests/frame/methods/test_combine_first.py
+++ b/pandas/tests/frame/methods/test_combine_first.py
@@ -464,15 +464,15 @@ def test_combine_first_with_nan_multiindex():
 
 def test_combine_preserve_dtypes():
     # GH7509
-    a = Series(["a", "b"], index=range(2))
-    b = Series(range(2), index=range(2))
-    f = DataFrame({"A": a, "B": b})
+    a_column = Series(["a", "b"], index=range(2))
+    b_column = Series(range(2), index=range(2))
+    df1 = DataFrame({"A": a_column, "B": b_column})
 
-    c = Series(["a", "b"], index=range(5, 7))
-    b = Series(range(-1, 1), index=range(5, 7))
-    g = DataFrame({"B": b, "C": c})
+    c_column = Series(["a", "b"], index=range(5, 7))
+    b_column = Series(range(-1, 1), index=range(5, 7))
+    df2 = DataFrame({"B": b_column, "C": c_column})
 
-    exp = DataFrame(
+    expected = DataFrame(
         {
             "A": ["a", "b", np.nan, np.nan],
             "B": [0, 1, -1, 0],
@@ -480,5 +480,5 @@ def test_combine_preserve_dtypes():
         },
         index=[0, 1, 5, 6],
     )
-    combined = f.combine_first(g)
-    tm.assert_frame_equal(combined, exp)
+    combined = df1.combine_first(df2)
+    tm.assert_frame_equal(combined, expected)