From 2046cb57a5871c618829e1772f35bc63147d8d13 Mon Sep 17 00:00:00 2001
From: sinhrks <sinhrks@gmail.com>
Date: Thu, 11 Aug 2016 23:00:12 +0900
Subject: [PATCH] BUG/DEPR: combine dtype fixes

---
 doc/source/whatsnew/v0.19.0.txt           |   2 +
 pandas/core/frame.py                      |  26 +-
 pandas/tests/frame/test_combine_concat.py | 461 ++++++++++++++--------
 pandas/tests/frame/test_operators.py      |  58 +--
 pandas/tests/types/test_cast.py           |  40 +-
 pandas/types/cast.py                      |  21 +-
 pandas/types/common.py                    |  29 +-
 7 files changed, 410 insertions(+), 227 deletions(-)

diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
index 5cbdbe6168bba..411b2b0abaf5a 100644
--- a/doc/source/whatsnew/v0.19.0.txt
+++ b/doc/source/whatsnew/v0.19.0.txt
@@ -788,6 +788,7 @@ Deprecations
 - ``pandas.tseries.frequencies.get_standard_freq`` is deprecated. Use  ``pandas.tseries.frequencies.to_offset(freq).rule_code`` instead. (:issue:`13874`)
 - ``pandas.tseries.frequencies.to_offset``'s ``freqstr`` keyword is deprecated in favor of ``freq``. (:issue:`13874`)
 
+
 .. _whatsnew_0190.prior_deprecations:
 
 Removal of prior version deprecations/changes
@@ -939,6 +940,7 @@ Bug Fixes
 
 
 - Bug in ``Series`` comparison operators when dealing with zero dim NumPy arrays (:issue:`13006`)
+- Bug in ``.combine_first`` may return incorrect ``dtype`` (:issue:`7630`, :issue:`10567`)
 - Bug in ``groupby`` where ``apply`` returns different result depending on whether first result is ``None`` or not (:issue:`12824`)
 - Bug in ``groupby(..).nth()`` where the group key is included inconsistently if called after ``.head()/.tail()`` (:issue:`12839`)
 - Bug in ``.to_html``, ``.to_latex`` and ``.to_string`` silently ignore custom datetime formatter passed through the ``formatters`` key word (:issue:`10690`)
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 4416213817ab4..ea83200465582 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -31,20 +31,22 @@
                                _possibly_downcast_to_dtype,
                                _invalidate_string_dtypes,
                                _coerce_to_dtypes,
-                               _maybe_upcast_putmask)
+                               _maybe_upcast_putmask,
+                               _find_common_type)
 from pandas.types.common import (is_categorical_dtype,
                                  is_object_dtype,
                                  is_extension_type,
                                  is_datetimetz,
                                  is_datetime64_dtype,
+                                 is_datetime64tz_dtype,
                                  is_bool_dtype,
                                  is_integer_dtype,
                                  is_float_dtype,
                                  is_integer,
                                  is_scalar,
+                                 is_dtype_equal,
                                  needs_i8_conversion,
                                  _get_dtype_from_object,
-                                 _lcd_dtypes,
                                  _ensure_float,
                                  _ensure_float64,
                                  _ensure_int64,
@@ -3700,17 +3702,20 @@ def combine(self, other, func, fill_value=None, overwrite=True):
                 otherSeries[other_mask] = fill_value
 
             # if we have different dtypes, possibily promote
-            new_dtype = this_dtype
-            if this_dtype != other_dtype:
-                new_dtype = _lcd_dtypes(this_dtype, other_dtype)
-                series = series.astype(new_dtype)
+            if notnull(series).all():
+                new_dtype = this_dtype
                 otherSeries = otherSeries.astype(new_dtype)
+            else:
+                new_dtype = _find_common_type([this_dtype, other_dtype])
+                if not is_dtype_equal(this_dtype, new_dtype):
+                    series = series.astype(new_dtype)
+                if not is_dtype_equal(other_dtype, new_dtype):
+                    otherSeries = otherSeries.astype(new_dtype)
 
             # see if we need to be represented as i8 (datetimelike)
             # try to keep us at this dtype
             needs_i8_conversion_i = needs_i8_conversion(new_dtype)
             if needs_i8_conversion_i:
-                this_dtype = new_dtype
                 arr = func(series, otherSeries, True)
             else:
                 arr = func(series, otherSeries)
@@ -3721,7 +3726,12 @@ def combine(self, other, func, fill_value=None, overwrite=True):
 
             # try to downcast back to the original dtype
             if needs_i8_conversion_i:
-                arr = _possibly_cast_to_datetime(arr, this_dtype)
+                # ToDo: This conversion should be handled in
+                # _possibly_cast_to_datetime but the change affects lot...
+                if is_datetime64tz_dtype(new_dtype):
+                    arr = DatetimeIndex._simple_new(arr, tz=new_dtype.tz)
+                else:
+                    arr = _possibly_cast_to_datetime(arr, new_dtype)
             else:
                 arr = _possibly_downcast_to_dtype(arr, this_dtype)
 
diff --git a/pandas/tests/frame/test_combine_concat.py b/pandas/tests/frame/test_combine_concat.py
index 7202915f13258..e5aaba26135e7 100644
--- a/pandas/tests/frame/test_combine_concat.py
+++ b/pandas/tests/frame/test_combine_concat.py
@@ -20,23 +20,11 @@
 from pandas.tests.frame.common import TestData
 
 
-class TestDataFrameCombineConcat(tm.TestCase, TestData):
+class TestDataFrameConcatCommon(tm.TestCase, TestData):
 
     _multiprocess_can_split_ = True
 
-    def test_combine_first_mixed(self):
-        a = Series(['a', 'b'], index=lrange(2))
-        b = Series(lrange(2), index=lrange(2))
-        f = DataFrame({'A': a, 'B': b})
-
-        a = Series(['a', 'b'], index=lrange(5, 7))
-        b = Series(lrange(2), index=lrange(5, 7))
-        g = DataFrame({'A': a, 'B': b})
-
-        # TODO(wesm): no verification?
-        combined = f.combine_first(g)  # noqa
-
-    def test_combine_multiple_frames_dtypes(self):
+    def test_concat_multiple_frames_dtypes(self):
 
         # GH 2759
         A = DataFrame(data=np.ones((10, 2)), columns=[
@@ -46,7 +34,7 @@ def test_combine_multiple_frames_dtypes(self):
         expected = Series(dict(float64=2, float32=2))
         assert_series_equal(results, expected)
 
-    def test_combine_multiple_tzs(self):
+    def test_concat_multiple_tzs(self):
         # GH 12467
         # combining datetime tz-aware and naive DataFrames
         ts1 = Timestamp('2015-01-01', tz=None)
@@ -194,147 +182,6 @@ def test_append_dtypes(self):
         expected = DataFrame({'bar': Series([Timestamp('20130101'), 1])})
         assert_frame_equal(result, expected)
 
-    def test_combine_first(self):
-        # disjoint
-        head, tail = self.frame[:5], self.frame[5:]
-
-        combined = head.combine_first(tail)
-        reordered_frame = self.frame.reindex(combined.index)
-        assert_frame_equal(combined, reordered_frame)
-        self.assertTrue(tm.equalContents(combined.columns, self.frame.columns))
-        assert_series_equal(combined['A'], reordered_frame['A'])
-
-        # same index
-        fcopy = self.frame.copy()
-        fcopy['A'] = 1
-        del fcopy['C']
-
-        fcopy2 = self.frame.copy()
-        fcopy2['B'] = 0
-        del fcopy2['D']
-
-        combined = fcopy.combine_first(fcopy2)
-
-        self.assertTrue((combined['A'] == 1).all())
-        assert_series_equal(combined['B'], fcopy['B'])
-        assert_series_equal(combined['C'], fcopy2['C'])
-        assert_series_equal(combined['D'], fcopy['D'])
-
-        # overlap
-        head, tail = reordered_frame[:10].copy(), reordered_frame
-        head['A'] = 1
-
-        combined = head.combine_first(tail)
-        self.assertTrue((combined['A'][:10] == 1).all())
-
-        # reverse overlap
-        tail['A'][:10] = 0
-        combined = tail.combine_first(head)
-        self.assertTrue((combined['A'][:10] == 0).all())
-
-        # no overlap
-        f = self.frame[:10]
-        g = self.frame[10:]
-        combined = f.combine_first(g)
-        assert_series_equal(combined['A'].reindex(f.index), f['A'])
-        assert_series_equal(combined['A'].reindex(g.index), g['A'])
-
-        # corner cases
-        comb = self.frame.combine_first(self.empty)
-        assert_frame_equal(comb, self.frame)
-
-        comb = self.empty.combine_first(self.frame)
-        assert_frame_equal(comb, self.frame)
-
-        comb = self.frame.combine_first(DataFrame(index=["faz", "boo"]))
-        self.assertTrue("faz" in comb.index)
-
-        # #2525
-        df = DataFrame({'a': [1]}, index=[datetime(2012, 1, 1)])
-        df2 = DataFrame({}, columns=['b'])
-        result = df.combine_first(df2)
-        self.assertTrue('b' in result)
-
-    def test_combine_first_mixed_bug(self):
-        idx = Index(['a', 'b', 'c', 'e'])
-        ser1 = Series([5.0, -9.0, 4.0, 100.], index=idx)
-        ser2 = Series(['a', 'b', 'c', 'e'], index=idx)
-        ser3 = Series([12, 4, 5, 97], index=idx)
-
-        frame1 = DataFrame({"col0": ser1,
-                            "col2": ser2,
-                            "col3": ser3})
-
-        idx = Index(['a', 'b', 'c', 'f'])
-        ser1 = Series([5.0, -9.0, 4.0, 100.], index=idx)
-        ser2 = Series(['a', 'b', 'c', 'f'], index=idx)
-        ser3 = Series([12, 4, 5, 97], index=idx)
-
-        frame2 = DataFrame({"col1": ser1,
-                            "col2": ser2,
-                            "col5": ser3})
-
-        combined = frame1.combine_first(frame2)
-        self.assertEqual(len(combined.columns), 5)
-
-        # gh 3016 (same as in update)
-        df = DataFrame([[1., 2., False, True], [4., 5., True, False]],
-                       columns=['A', 'B', 'bool1', 'bool2'])
-
-        other = DataFrame([[45, 45]], index=[0], columns=['A', 'B'])
-        result = df.combine_first(other)
-        assert_frame_equal(result, df)
-
-        df.ix[0, 'A'] = np.nan
-        result = df.combine_first(other)
-        df.ix[0, 'A'] = 45
-        assert_frame_equal(result, df)
-
-        # doc example
-        df1 = DataFrame({'A': [1., np.nan, 3., 5., np.nan],
-                         'B': [np.nan, 2., 3., np.nan, 6.]})
-
-        df2 = DataFrame({'A': [5., 2., 4., np.nan, 3., 7.],
-                         'B': [np.nan, np.nan, 3., 4., 6., 8.]})
-
-        result = df1.combine_first(df2)
-        expected = DataFrame(
-            {'A': [1, 2, 3, 5, 3, 7.], 'B': [np.nan, 2, 3, 4, 6, 8]})
-        assert_frame_equal(result, expected)
-
-        # GH3552, return object dtype with bools
-        df1 = DataFrame(
-            [[np.nan, 3., True], [-4.6, np.nan, True], [np.nan, 7., False]])
-        df2 = DataFrame(
-            [[-42.6, np.nan, True], [-5., 1.6, False]], index=[1, 2])
-
-        result = df1.combine_first(df2)[2]
-        expected = Series([True, True, False], name=2)
-        assert_series_equal(result, expected)
-
-        # GH 3593, converting datetime64[ns] incorrecly
-        df0 = DataFrame({"a": [datetime(2000, 1, 1),
-                               datetime(2000, 1, 2),
-                               datetime(2000, 1, 3)]})
-        df1 = DataFrame({"a": [None, None, None]})
-        df2 = df1.combine_first(df0)
-        assert_frame_equal(df2, df0)
-
-        df2 = df0.combine_first(df1)
-        assert_frame_equal(df2, df0)
-
-        df0 = DataFrame({"a": [datetime(2000, 1, 1),
-                               datetime(2000, 1, 2),
-                               datetime(2000, 1, 3)]})
-        df1 = DataFrame({"a": [datetime(2000, 1, 2), None, None]})
-        df2 = df1.combine_first(df0)
-        result = df0.copy()
-        result.iloc[0, :] = df1.iloc[0, :]
-        assert_frame_equal(df2, result)
-
-        df2 = df0.combine_first(df1)
-        assert_frame_equal(df2, df0)
-
     def test_update(self):
         df = DataFrame([[1.5, nan, 3.],
                         [1.5, nan, 3.],
@@ -476,3 +323,305 @@ def test_join_multiindex_leftright(self):
         assert_frame_equal(df1.join(df2, how='right'), exp)
         assert_frame_equal(df2.join(df1, how='left'),
                            exp[['value2', 'value1']])
+
+
+class TestDataFrameCombineFirst(tm.TestCase, TestData):
+
+    _multiprocess_can_split_ = True
+
+    def test_combine_first_mixed(self):
+        a = Series(['a', 'b'], index=lrange(2))
+        b = Series(lrange(2), index=lrange(2))
+        f = DataFrame({'A': a, 'B': b})
+
+        a = Series(['a', 'b'], index=lrange(5, 7))
+        b = Series(lrange(2), index=lrange(5, 7))
+        g = DataFrame({'A': a, 'B': b})
+
+        exp = pd.DataFrame({'A': list('abab'), 'B': [0., 1., 0., 1.]},
+                           index=[0, 1, 5, 6])
+        combined = f.combine_first(g)
+        tm.assert_frame_equal(combined, exp)
+
+    def test_combine_first(self):
+        # disjoint
+        head, tail = self.frame[:5], self.frame[5:]
+
+        combined = head.combine_first(tail)
+        reordered_frame = self.frame.reindex(combined.index)
+        assert_frame_equal(combined, reordered_frame)
+        self.assertTrue(tm.equalContents(combined.columns, self.frame.columns))
+        assert_series_equal(combined['A'], reordered_frame['A'])
+
+        # same index
+        fcopy = self.frame.copy()
+        fcopy['A'] = 1
+        del fcopy['C']
+
+        fcopy2 = self.frame.copy()
+        fcopy2['B'] = 0
+        del fcopy2['D']
+
+        combined = fcopy.combine_first(fcopy2)
+
+        self.assertTrue((combined['A'] == 1).all())
+        assert_series_equal(combined['B'], fcopy['B'])
+        assert_series_equal(combined['C'], fcopy2['C'])
+        assert_series_equal(combined['D'], fcopy['D'])
+
+        # overlap
+        head, tail = reordered_frame[:10].copy(), reordered_frame
+        head['A'] = 1
+
+        combined = head.combine_first(tail)
+        self.assertTrue((combined['A'][:10] == 1).all())
+
+        # reverse overlap
+        tail['A'][:10] = 0
+        combined = tail.combine_first(head)
+        self.assertTrue((combined['A'][:10] == 0).all())
+
+        # no overlap
+        f = self.frame[:10]
+        g = self.frame[10:]
+        combined = f.combine_first(g)
+        assert_series_equal(combined['A'].reindex(f.index), f['A'])
+        assert_series_equal(combined['A'].reindex(g.index), g['A'])
+
+        # corner cases
+        comb = self.frame.combine_first(self.empty)
+        assert_frame_equal(comb, self.frame)
+
+        comb = self.empty.combine_first(self.frame)
+        assert_frame_equal(comb, self.frame)
+
+        comb = self.frame.combine_first(DataFrame(index=["faz", "boo"]))
+        self.assertTrue("faz" in comb.index)
+
+        # #2525
+        df = DataFrame({'a': [1]}, index=[datetime(2012, 1, 1)])
+        df2 = DataFrame({}, columns=['b'])
+        result = df.combine_first(df2)
+        self.assertTrue('b' in result)
+
+    def test_combine_first_mixed_bug(self):
+        idx = Index(['a', 'b', 'c', 'e'])
+        ser1 = Series([5.0, -9.0, 4.0, 100.], index=idx)
+        ser2 = Series(['a', 'b', 'c', 'e'], index=idx)
+        ser3 = Series([12, 4, 5, 97], index=idx)
+
+        frame1 = DataFrame({"col0": ser1,
+                            "col2": ser2,
+                            "col3": ser3})
+
+        idx = Index(['a', 'b', 'c', 'f'])
+        ser1 = Series([5.0, -9.0, 4.0, 100.], index=idx)
+        ser2 = Series(['a', 'b', 'c', 'f'], index=idx)
+        ser3 = Series([12, 4, 5, 97], index=idx)
+
+        frame2 = DataFrame({"col1": ser1,
+                            "col2": ser2,
+                            "col5": ser3})
+
+        combined = frame1.combine_first(frame2)
+        self.assertEqual(len(combined.columns), 5)
+
+        # gh 3016 (same as in update)
+        df = DataFrame([[1., 2., False, True], [4., 5., True, False]],
+                       columns=['A', 'B', 'bool1', 'bool2'])
+
+        other = DataFrame([[45, 45]], index=[0], columns=['A', 'B'])
+        result = df.combine_first(other)
+        assert_frame_equal(result, df)
+
+        df.ix[0, 'A'] = np.nan
+        result = df.combine_first(other)
+        df.ix[0, 'A'] = 45
+        assert_frame_equal(result, df)
+
+        # doc example
+        df1 = DataFrame({'A': [1., np.nan, 3., 5., np.nan],
+                         'B': [np.nan, 2., 3., np.nan, 6.]})
+
+        df2 = DataFrame({'A': [5., 2., 4., np.nan, 3., 7.],
+                         'B': [np.nan, np.nan, 3., 4., 6., 8.]})
+
+        result = df1.combine_first(df2)
+        expected = DataFrame(
+            {'A': [1, 2, 3, 5, 3, 7.], 'B': [np.nan, 2, 3, 4, 6, 8]})
+        assert_frame_equal(result, expected)
+
+        # GH3552, return object dtype with bools
+        df1 = DataFrame(
+            [[np.nan, 3., True], [-4.6, np.nan, True], [np.nan, 7., False]])
+        df2 = DataFrame(
+            [[-42.6, np.nan, True], [-5., 1.6, False]], index=[1, 2])
+
+        result = df1.combine_first(df2)[2]
+        expected = Series([True, True, False], name=2)
+        assert_series_equal(result, expected)
+
+        # GH 3593, converting datetime64[ns] incorrecly
+        df0 = DataFrame({"a": [datetime(2000, 1, 1),
+                               datetime(2000, 1, 2),
+                               datetime(2000, 1, 3)]})
+        df1 = DataFrame({"a": [None, None, None]})
+        df2 = df1.combine_first(df0)
+        assert_frame_equal(df2, df0)
+
+        df2 = df0.combine_first(df1)
+        assert_frame_equal(df2, df0)
+
+        df0 = DataFrame({"a": [datetime(2000, 1, 1),
+                               datetime(2000, 1, 2),
+                               datetime(2000, 1, 3)]})
+        df1 = DataFrame({"a": [datetime(2000, 1, 2), None, None]})
+        df2 = df1.combine_first(df0)
+        result = df0.copy()
+        result.iloc[0, :] = df1.iloc[0, :]
+        assert_frame_equal(df2, result)
+
+        df2 = df0.combine_first(df1)
+        assert_frame_equal(df2, df0)
+
+    def test_combine_first_align_nan(self):
+        # GH 7509 (not fixed)
+        dfa = pd.DataFrame([[pd.Timestamp('2011-01-01'), 2]],
+                           columns=['a', 'b'])
+        dfb = pd.DataFrame([[4], [5]], columns=['b'])
+        self.assertEqual(dfa['a'].dtype, 'datetime64[ns]')
+        self.assertEqual(dfa['b'].dtype, 'int64')
+
+        res = dfa.combine_first(dfb)
+        exp = pd.DataFrame({'a': [pd.Timestamp('2011-01-01'), pd.NaT],
+                            'b': [2., 5.]}, columns=['a', 'b'])
+        tm.assert_frame_equal(res, exp)
+        self.assertEqual(res['a'].dtype, 'datetime64[ns]')
+        # ToDo: this must be int64
+        self.assertEqual(res['b'].dtype, 'float64')
+
+        res = dfa.iloc[:0].combine_first(dfb)
+        exp = pd.DataFrame({'a': [np.nan, np.nan],
+                            'b': [4, 5]}, columns=['a', 'b'])
+        tm.assert_frame_equal(res, exp)
+        # ToDo: this must be datetime64
+        self.assertEqual(res['a'].dtype, 'float64')
+        # ToDo: this must be int64
+        self.assertEqual(res['b'].dtype, 'int64')
+
+    def test_combine_first_timezone(self):
+        # GH 7630
+        data1 = pd.to_datetime('20100101 01:01').tz_localize('UTC')
+        df1 = pd.DataFrame(columns=['UTCdatetime', 'abc'],
+                           data=data1,
+                           index=pd.date_range('20140627', periods=1))
+        data2 = pd.to_datetime('20121212 12:12').tz_localize('UTC')
+        df2 = pd.DataFrame(columns=['UTCdatetime', 'xyz'],
+                           data=data2,
+                           index=pd.date_range('20140628', periods=1))
+        res = df2[['UTCdatetime']].combine_first(df1)
+        exp = pd.DataFrame({'UTCdatetime': [pd.Timestamp('2010-01-01 01:01',
+                                                         tz='UTC'),
+                                            pd.Timestamp('2012-12-12 12:12',
+                                                         tz='UTC')],
+                            'abc': [pd.Timestamp('2010-01-01 01:01:00',
+                                                 tz='UTC'), pd.NaT]},
+                           columns=['UTCdatetime', 'abc'],
+                           index=pd.date_range('20140627', periods=2,
+                                               freq='D'))
+        tm.assert_frame_equal(res, exp)
+        self.assertEqual(res['UTCdatetime'].dtype, 'datetime64[ns, UTC]')
+        self.assertEqual(res['abc'].dtype, 'datetime64[ns, UTC]')
+
+        # GH 10567
+        dts1 = pd.date_range('2015-01-01', '2015-01-05', tz='UTC')
+        df1 = pd.DataFrame({'DATE': dts1})
+        dts2 = pd.date_range('2015-01-03', '2015-01-05', tz='UTC')
+        df2 = pd.DataFrame({'DATE': dts2})
+
+        res = df1.combine_first(df2)
+        tm.assert_frame_equal(res, df1)
+        self.assertEqual(res['DATE'].dtype, 'datetime64[ns, UTC]')
+
+        dts1 = pd.DatetimeIndex(['2011-01-01', 'NaT', '2011-01-03',
+                                 '2011-01-04'], tz='US/Eastern')
+        df1 = pd.DataFrame({'DATE': dts1}, index=[1, 3, 5, 7])
+        dts2 = pd.DatetimeIndex(['2012-01-01', '2012-01-02',
+                                 '2012-01-03'], tz='US/Eastern')
+        df2 = pd.DataFrame({'DATE': dts2}, index=[2, 4, 5])
+
+        res = df1.combine_first(df2)
+        exp_dts = pd.DatetimeIndex(['2011-01-01', '2012-01-01', 'NaT',
+                                    '2012-01-02', '2011-01-03', '2011-01-04'],
+                                   tz='US/Eastern')
+        exp = pd.DataFrame({'DATE': exp_dts}, index=[1, 2, 3, 4, 5, 7])
+        tm.assert_frame_equal(res, exp)
+
+        # different tz
+        dts1 = pd.date_range('2015-01-01', '2015-01-05', tz='US/Eastern')
+        df1 = pd.DataFrame({'DATE': dts1})
+        dts2 = pd.date_range('2015-01-03', '2015-01-05')
+        df2 = pd.DataFrame({'DATE': dts2})
+
+        # if df1 doesn't have NaN, keep its dtype
+        res = df1.combine_first(df2)
+        tm.assert_frame_equal(res, df1)
+        self.assertEqual(res['DATE'].dtype, 'datetime64[ns, US/Eastern]')
+
+        dts1 = pd.date_range('2015-01-01', '2015-01-02', tz='US/Eastern')
+        df1 = pd.DataFrame({'DATE': dts1})
+        dts2 = pd.date_range('2015-01-01', '2015-01-03')
+        df2 = pd.DataFrame({'DATE': dts2})
+
+        res = df1.combine_first(df2)
+        exp_dts = [pd.Timestamp('2015-01-01', tz='US/Eastern'),
+                   pd.Timestamp('2015-01-02', tz='US/Eastern'),
+                   pd.Timestamp('2015-01-03')]
+        exp = pd.DataFrame({'DATE': exp_dts})
+        tm.assert_frame_equal(res, exp)
+        self.assertEqual(res['DATE'].dtype, 'object')
+
+    def test_combine_first_timedelta(self):
+        data1 = pd.TimedeltaIndex(['1 day', 'NaT', '3 day', '4day'])
+        df1 = pd.DataFrame({'TD': data1}, index=[1, 3, 5, 7])
+        data2 = pd.TimedeltaIndex(['10 day', '11 day', '12 day'])
+        df2 = pd.DataFrame({'TD': data2}, index=[2, 4, 5])
+
+        res = df1.combine_first(df2)
+        exp_dts = pd.TimedeltaIndex(['1 day', '10 day', 'NaT',
+                                     '11 day', '3 day', '4 day'])
+        exp = pd.DataFrame({'TD': exp_dts}, index=[1, 2, 3, 4, 5, 7])
+        tm.assert_frame_equal(res, exp)
+        self.assertEqual(res['TD'].dtype, 'timedelta64[ns]')
+
+    def test_combine_first_period(self):
+        data1 = pd.PeriodIndex(['2011-01', 'NaT', '2011-03',
+                                '2011-04'], freq='M')
+        df1 = pd.DataFrame({'P': data1}, index=[1, 3, 5, 7])
+        data2 = pd.PeriodIndex(['2012-01-01', '2012-02',
+                                '2012-03'], freq='M')
+        df2 = pd.DataFrame({'P': data2}, index=[2, 4, 5])
+
+        res = df1.combine_first(df2)
+        exp_dts = pd.PeriodIndex(['2011-01', '2012-01', 'NaT',
+                                  '2012-02', '2011-03', '2011-04'],
+                                 freq='M')
+        exp = pd.DataFrame({'P': exp_dts}, index=[1, 2, 3, 4, 5, 7])
+        tm.assert_frame_equal(res, exp)
+        self.assertEqual(res['P'].dtype, 'object')
+
+        # different freq
+        dts2 = pd.PeriodIndex(['2012-01-01', '2012-01-02',
+                               '2012-01-03'], freq='D')
+        df2 = pd.DataFrame({'P': dts2}, index=[2, 4, 5])
+
+        res = df1.combine_first(df2)
+        exp_dts = [pd.Period('2011-01', freq='M'),
+                   pd.Period('2012-01-01', freq='D'),
+                   pd.NaT,
+                   pd.Period('2012-01-02', freq='D'),
+                   pd.Period('2011-03', freq='M'),
+                   pd.Period('2011-04', freq='M')]
+        exp = pd.DataFrame({'P': exp_dts}, index=[1, 2, 3, 4, 5, 7])
+        tm.assert_frame_equal(res, exp)
+        self.assertEqual(res['P'].dtype, 'object')
diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py
index c91585a28d867..ce7af25eb0460 100644
--- a/pandas/tests/frame/test_operators.py
+++ b/pandas/tests/frame/test_operators.py
@@ -1013,44 +1013,52 @@ def test_combineAdd(self):
         with tm.assert_produces_warning(FutureWarning):
             # trivial
             comb = self.frame.combineAdd(self.frame)
-            assert_frame_equal(comb, self.frame * 2)
+        assert_frame_equal(comb, self.frame * 2)
 
-            # more rigorous
-            a = DataFrame([[1., nan, nan, 2., nan]],
-                          columns=np.arange(5))
-            b = DataFrame([[2., 3., nan, 2., 6., nan]],
-                          columns=np.arange(6))
-            expected = DataFrame([[3., 3., nan, 4., 6., nan]],
-                                 columns=np.arange(6))
+        # more rigorous
+        a = DataFrame([[1., nan, nan, 2., nan]],
+                      columns=np.arange(5))
+        b = DataFrame([[2., 3., nan, 2., 6., nan]],
+                      columns=np.arange(6))
+        expected = DataFrame([[3., 3., nan, 4., 6., nan]],
+                             columns=np.arange(6))
 
+        with tm.assert_produces_warning(FutureWarning):
             result = a.combineAdd(b)
-            assert_frame_equal(result, expected)
+        assert_frame_equal(result, expected)
+
+        with tm.assert_produces_warning(FutureWarning):
             result2 = a.T.combineAdd(b.T)
-            assert_frame_equal(result2, expected.T)
+        assert_frame_equal(result2, expected.T)
 
-            expected2 = a.combine(b, operator.add, fill_value=0.)
-            assert_frame_equal(expected, expected2)
+        expected2 = a.combine(b, operator.add, fill_value=0.)
+        assert_frame_equal(expected, expected2)
 
-            # corner cases
+        # corner cases
+        with tm.assert_produces_warning(FutureWarning):
             comb = self.frame.combineAdd(self.empty)
-            assert_frame_equal(comb, self.frame)
+        assert_frame_equal(comb, self.frame)
 
+        with tm.assert_produces_warning(FutureWarning):
             comb = self.empty.combineAdd(self.frame)
-            assert_frame_equal(comb, self.frame)
+        assert_frame_equal(comb, self.frame)
 
-            # integer corner case
-            df1 = DataFrame({'x': [5]})
-            df2 = DataFrame({'x': [1]})
-            df3 = DataFrame({'x': [6]})
+        # integer corner case
+        df1 = DataFrame({'x': [5]})
+        df2 = DataFrame({'x': [1]})
+        df3 = DataFrame({'x': [6]})
+
+        with tm.assert_produces_warning(FutureWarning):
             comb = df1.combineAdd(df2)
-            assert_frame_equal(comb, df3)
+        assert_frame_equal(comb, df3)
 
-            # mixed type GH2191
-            df1 = DataFrame({'A': [1, 2], 'B': [3, 4]})
-            df2 = DataFrame({'A': [1, 2], 'C': [5, 6]})
+        # mixed type GH2191
+        df1 = DataFrame({'A': [1, 2], 'B': [3, 4]})
+        df2 = DataFrame({'A': [1, 2], 'C': [5, 6]})
+        with tm.assert_produces_warning(FutureWarning):
             rs = df1.combineAdd(df2)
-            xp = DataFrame({'A': [2, 4], 'B': [3, 4.], 'C': [5, 6.]})
-            assert_frame_equal(xp, rs)
+        xp = DataFrame({'A': [2, 4], 'B': [3, 4.], 'C': [5, 6.]})
+        assert_frame_equal(xp, rs)
 
         # TODO: test integer fill corner?
 
diff --git a/pandas/tests/types/test_cast.py b/pandas/tests/types/test_cast.py
index 3394974d833fb..46f37bf0ef8c2 100644
--- a/pandas/tests/types/test_cast.py
+++ b/pandas/tests/types/test_cast.py
@@ -192,6 +192,7 @@ def test_possibly_convert_objects_copy(self):
 
 
 class TestCommonTypes(tm.TestCase):
+
     def test_numpy_dtypes(self):
         # (source_types, destination_type)
         testcases = (
@@ -218,18 +219,43 @@ def test_numpy_dtypes(self):
             ((np.complex128, np.int32), np.complex128),
             ((np.object, np.float32), np.object),
             ((np.object, np.int16), np.object),
+
+            ((np.dtype('datetime64[ns]'), np.dtype('datetime64[ns]')),
+             np.dtype('datetime64[ns]')),
+            ((np.dtype('timedelta64[ns]'), np.dtype('timedelta64[ns]')),
+             np.dtype('timedelta64[ns]')),
+
+            ((np.dtype('datetime64[ns]'), np.dtype('datetime64[ms]')),
+             np.dtype('datetime64[ns]')),
+            ((np.dtype('timedelta64[ms]'), np.dtype('timedelta64[ns]')),
+             np.dtype('timedelta64[ns]')),
+
+            ((np.dtype('datetime64[ns]'), np.dtype('timedelta64[ns]')),
+             np.object),
+            ((np.dtype('datetime64[ns]'), np.int64), np.object)
         )
         for src, common in testcases:
             self.assertEqual(_find_common_type(src), common)
 
+        with tm.assertRaises(ValueError):
+            # empty
+            _find_common_type([])
+
     def test_pandas_dtypes(self):
-        # TODO: not implemented yet
-        with self.assertRaises(TypeError):
-            self.assertEqual(_find_common_type([CategoricalDtype()]),
-                             CategoricalDtype)
-        with self.assertRaises(TypeError):
-            self.assertEqual(_find_common_type([DatetimeTZDtype()]),
-                             DatetimeTZDtype)
+        dtype = CategoricalDtype()
+        self.assertEqual(_find_common_type([dtype]), 'category')
+        self.assertEqual(_find_common_type([dtype, dtype]), 'category')
+        self.assertEqual(_find_common_type([np.object, dtype]), np.object)
+
+        dtype = DatetimeTZDtype(unit='ns', tz='US/Eastern')
+        self.assertEqual(_find_common_type([dtype, dtype]),
+                         'datetime64[ns, US/Eastern]')
+
+        for dtype2 in [DatetimeTZDtype(unit='ns', tz='Asia/Tokyo'),
+                       np.dtype('datetime64[ns]'), np.object, np.int64]:
+            self.assertEqual(_find_common_type([dtype, dtype2]), np.object)
+            self.assertEqual(_find_common_type([dtype2, dtype]), np.object)
+
 
 if __name__ == '__main__':
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
diff --git a/pandas/types/cast.py b/pandas/types/cast.py
index 93be926fe1eeb..59c939126d2a4 100644
--- a/pandas/types/cast.py
+++ b/pandas/types/cast.py
@@ -866,8 +866,23 @@ def _possibly_cast_to_datetime(value, dtype, errors='raise'):
 
 def _find_common_type(types):
     """Find a common data type among the given dtypes."""
-    # TODO: enable using pandas-specific types
+
+    if len(types) == 0:
+        raise ValueError('no types given')
+
+    first = types[0]
+    # workaround for find_common_type([np.dtype('datetime64[ns]')] * 2)
+    # => object
+    if all(is_dtype_equal(first, t) for t in types[1:]):
+        return first
+
     if any(isinstance(t, ExtensionDtype) for t in types):
-        raise TypeError("Common type discovery is currently only "
-                        "supported for pure numpy dtypes.")
+        return np.object
+
+    # take lowest unit
+    if all(is_datetime64_dtype(t) for t in types):
+        return np.dtype('datetime64[ns]')
+    if all(is_timedelta64_dtype(t) for t in types):
+        return np.dtype('timedelta64[ns]')
+
     return np.find_common_type(types, [])
diff --git a/pandas/types/common.py b/pandas/types/common.py
index bffff0357f329..39db0be3e416e 100644
--- a/pandas/types/common.py
+++ b/pandas/types/common.py
@@ -9,7 +9,7 @@
 from .generic import (ABCCategorical, ABCPeriodIndex,
                       ABCDatetimeIndex, ABCSeries,
                       ABCSparseArray, ABCSparseSeries)
-from .inference import is_integer, is_string_like
+from .inference import is_string_like
 from .inference import *  # noqa
 
 
@@ -386,33 +386,6 @@ def _validate_date_like_dtype(dtype):
                          (dtype.name, dtype.type.__name__))
 
 
-def _lcd_dtypes(a_dtype, b_dtype):
-    """ return the lcd dtype to hold these types """
-
-    if is_datetime64_dtype(a_dtype) or is_datetime64_dtype(b_dtype):
-        return _NS_DTYPE
-    elif is_timedelta64_dtype(a_dtype) or is_timedelta64_dtype(b_dtype):
-        return _TD_DTYPE
-    elif is_complex_dtype(a_dtype):
-        if is_complex_dtype(b_dtype):
-            return a_dtype
-        return np.float64
-    elif is_integer_dtype(a_dtype):
-        if is_integer_dtype(b_dtype):
-            if a_dtype.itemsize == b_dtype.itemsize:
-                return a_dtype
-            return np.int64
-        return np.float64
-    elif is_float_dtype(a_dtype):
-        if is_float_dtype(b_dtype):
-            if a_dtype.itemsize == b_dtype.itemsize:
-                return a_dtype
-            else:
-                return np.float64
-        elif is_integer(b_dtype):
-            return np.float64
-    return np.object
-
 _string_dtypes = frozenset(map(_get_dtype_from_object, (binary_type,
                                                         text_type)))