From 6b5ca31c53c0e9246a6441609e900b92664e79c1 Mon Sep 17 00:00:00 2001
From: jreback <jeff@reback.net>
Date: Mon, 13 May 2013 18:05:33 -0400
Subject: [PATCH] BUG: (GH3593) fixed a bug in the incorrect conversion of
 datetime64[ns] in combine_first

---
 RELEASE.rst                 |  2 ++
 pandas/core/common.py       | 34 +++++++++++++++++++++++++++
 pandas/core/frame.py        | 47 ++++++++++++++++++++++++++++++++-----
 pandas/core/internals.py    | 16 ++++++++++---
 pandas/core/series.py       | 13 ++++++----
 pandas/tests/test_frame.py  | 19 +++++++++++++++
 pandas/tests/test_series.py |  2 +-
 7 files changed, 119 insertions(+), 14 deletions(-)

diff --git a/RELEASE.rst b/RELEASE.rst
index 4085d350f3766..862d458f34e22 100644
--- a/RELEASE.rst
+++ b/RELEASE.rst
@@ -104,6 +104,7 @@ pandas 0.11.1
   - ``combine_first`` not returning the same dtype in cases where it can (GH3552_)
   - Fixed bug with ``Panel.transpose`` argument aliases (GH3556_)
   - Fixed platform bug in ``PeriodIndex.take`` (GH3579_)
+  - Fixed bud in incorrect conversion of datetime64[ns] in ``combine_first`` (GH3593_)
   - Fixed bug in reset_index with ``NaN`` in a multi-index (GH3586_)
 
 .. _GH3164: https://github.com/pydata/pandas/issues/3164
@@ -145,6 +146,7 @@ pandas 0.11.1
 .. _GH3586: https://github.com/pydata/pandas/issues/3586
 .. _GH3493: https://github.com/pydata/pandas/issues/3493
 .. _GH3579: https://github.com/pydata/pandas/issues/3579
+.. _GH3593: https://github.com/pydata/pandas/issues/3593
 .. _GH3556: https://github.com/pydata/pandas/issues/3556
 
 
diff --git a/pandas/core/common.py b/pandas/core/common.py
index f71627be1296d..2da2db052cb93 100644
--- a/pandas/core/common.py
+++ b/pandas/core/common.py
@@ -921,6 +921,33 @@ def _possibly_downcast_to_dtype(result, dtype):
 
     return result
 
+def _lcd_dtypes(a_dtype, b_dtype):
+    """ return the lcd dtype to hold these types """
+
+    if is_datetime64_dtype(a_dtype) or is_datetime64_dtype(b_dtype):
+        return _NS_DTYPE
+    elif is_timedelta64_dtype(a_dtype) or is_timedelta64_dtype(b_dtype):
+        return _TD_DTYPE
+    elif is_complex_dtype(a_dtype):
+        if is_complex_dtype(b_dtype):
+            return a_dtype
+        return np.float64
+    elif is_integer_dtype(a_dtype):
+        if is_integer_dtype(b_dtype):
+            if a_dtype.itemsize == b_dtype.itemsize:
+                return a_dtype
+            return np.int64
+        return np.float64
+    elif is_float_dtype(a_dtype):
+        if is_float_dtype(b_dtype):
+            if a_dtype.itemsize == b_dtype.itemsize:
+                return a_dtype
+            else:
+                return np.float64
+        elif is_integer(b_dtype):
+            return np.float64
+    return np.object
+
 def _interp_wrapper(f, wrap_dtype, na_override=None):
     def wrapper(arr, mask, limit=None):
         view = arr.view(wrap_dtype)
@@ -1524,6 +1551,13 @@ def is_float_dtype(arr_or_dtype):
         tipo = arr_or_dtype.dtype.type
     return issubclass(tipo, np.floating)
 
+def is_complex_dtype(arr_or_dtype):
+    if isinstance(arr_or_dtype, np.dtype):
+        tipo = arr_or_dtype.type
+    else:
+        tipo = arr_or_dtype.dtype.type
+    return issubclass(tipo, np.complexfloating)
+
 
 def is_list_like(arg):
     return hasattr(arg, '__iter__') and not isinstance(arg, basestring) or hasattr(arg,'len')
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 3df95b27f8736..1b01c92f03a32 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -3738,8 +3738,11 @@ def combine(self, other, func, fill_value=None, overwrite=True):
 
         result = {}
         for col in new_columns:
-            series = this[col].values
-            otherSeries = other[col].values
+            series = this[col]
+            otherSeries = other[col]
+
+            this_dtype = series.dtype
+            other_dtype = otherSeries.dtype
 
             this_mask = isnull(series)
             other_mask = isnull(otherSeries)
@@ -3756,18 +3759,40 @@ def combine(self, other, func, fill_value=None, overwrite=True):
                 series[this_mask] = fill_value
                 otherSeries[other_mask] = fill_value
 
-            arr = func(series, otherSeries)
+            # if we have different dtypes, possibily promote
+            new_dtype = this_dtype
+            if this_dtype != other_dtype:
+                new_dtype = com._lcd_dtypes(this_dtype,other_dtype)
+                series = series.astype(new_dtype)
+                otherSeries = otherSeries.astype(new_dtype)
+
+            # see if we need to be represented as i8 (datetimelike)
+            # try to keep us at this dtype
+            needs_i8_conversion = com.needs_i8_conversion(new_dtype)
+            if needs_i8_conversion:
+                this_dtype = new_dtype
+                arr = func(series, otherSeries, True)
+            else:
+                arr = func(series, otherSeries)
 
             if do_fill:
                 arr = com.ensure_float(arr)
                 arr[this_mask & other_mask] = NA
 
+            # try to downcast back to the original dtype
+            if needs_i8_conversion:
+                arr = com._possibly_cast_to_datetime(arr, this_dtype)
+            else:
+                arr = com._possibly_downcast_to_dtype(arr, this_dtype)
+
             result[col] = arr
 
         # convert_objects just in case
         return self._constructor(result, 
                                  index=new_index, 
-                                 columns=new_columns).convert_objects(copy=False)
+                                 columns=new_columns).convert_objects(
+            convert_dates=True,
+            copy=False)
 
     def combine_first(self, other):
         """
@@ -3788,8 +3813,18 @@ def combine_first(self, other):
         -------
         combined : DataFrame
         """
-        def combiner(x, y):
-            return expressions.where(isnull(x), y, x, raise_on_error=True)
+        def combiner(x, y, needs_i8_conversion=False):
+            x_values = x.values if hasattr(x,'values') else x
+            y_values = y.values if hasattr(y,'values') else y
+            if needs_i8_conversion:
+                mask = isnull(x)
+                x_values = x_values.view('i8')
+                y_values = y_values.view('i8')
+            else:
+                mask = isnull(x_values)
+            
+            return expressions.where(mask, y_values, x_values, raise_on_error=True)
+
         return self.combine(other, combiner, overwrite=False)
 
     def update(self, other, join='left', overwrite=True, filter_func=None,
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
index b6459b0e461b4..d058d20427ad7 100644
--- a/pandas/core/internals.py
+++ b/pandas/core/internals.py
@@ -258,14 +258,15 @@ def downcast(self, dtypes = None):
 
         return blocks
 
-    def astype(self, dtype, copy = True, raise_on_error = True):
+    def astype(self, dtype, copy = True, raise_on_error = True, values = None):
         """
         Coerce to the new type (if copy=True, return a new copy)
         raise on an except if raise == True
         """
         try:
-            newb = make_block(com._astype_nansafe(self.values, dtype, copy = copy),
-                              self.items, self.ref_items, fastpath=True)
+            if values is None:
+                values = com._astype_nansafe(self.values, dtype, copy = copy)
+            newb = make_block(values, self.items, self.ref_items, fastpath=True)
         except:
             if raise_on_error is True:
                 raise
@@ -708,6 +709,15 @@ def is_bool(self):
         """ we can be a bool if we have only bool values but are of type object """
         return lib.is_bool_array(self.values.ravel())
 
+    def astype(self, dtype, copy=True, raise_on_error=True, values=None):
+        """ allow astypes to datetime64[ns],timedelta64[ns] with coercion """
+        dtype = np.dtype(dtype)
+        if dtype == _NS_DTYPE or dtype == _TD_DTYPE:
+            values = com._possibly_convert_datetime(self.values,dtype)
+        else:
+            values = None
+        return super(ObjectBlock, self).astype(dtype=dtype,copy=copy,raise_on_error=raise_on_error,values=values)
+
     def convert(self, convert_dates = True, convert_numeric = True, copy = True):
         """ attempt to coerce any object types to better types
             return a copy of the block (if copy = True)
diff --git a/pandas/core/series.py b/pandas/core/series.py
index cebf2f4ef9d1f..8a3f353aa7c4a 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -17,7 +17,8 @@
 from pandas.core.common import (isnull, notnull, _is_bool_indexer,
                                 _default_index, _maybe_promote, _maybe_upcast,
                                 _asarray_tuplesafe, is_integer_dtype,
-                                _infer_dtype_from_scalar, is_list_like)
+                                _infer_dtype_from_scalar, is_list_like,
+                                _NS_DTYPE, _TD_DTYPE)
 from pandas.core.index import (Index, MultiIndex, InvalidIndexError,
                                _ensure_index, _handle_legacy_indexes)
 from pandas.core.indexing import _SeriesIndexer, _check_bool_indexer, _check_slice_bounds
@@ -929,9 +930,13 @@ def astype(self, dtype):
         """
         See numpy.ndarray.astype
         """
-        casted = com._astype_nansafe(self.values, dtype)
-        return self._constructor(casted, index=self.index, name=self.name,
-                                 dtype=casted.dtype)
+        dtype = np.dtype(dtype)
+        if dtype == _NS_DTYPE or dtype == _TD_DTYPE:
+            values = com._possibly_cast_to_datetime(self.values,dtype)
+        else:
+            values = com._astype_nansafe(self.values, dtype)
+        return self._constructor(values, index=self.index, name=self.name,
+                                 dtype=values.dtype)
 
     def convert_objects(self, convert_dates=True, convert_numeric=True, copy=True):
         """
diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
index 7e7813e048bd1..ce24c72f75882 100644
--- a/pandas/tests/test_frame.py
+++ b/pandas/tests/test_frame.py
@@ -7907,6 +7907,25 @@ def test_combine_first_mixed_bug(self):
         expected = Series([True,True,False])
         assert_series_equal(result,expected) 
 
+        # GH 3593, converting datetime64[ns] incorrecly
+        df0 = DataFrame({"a":[datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)]})
+        df1 = DataFrame({"a":[None, None, None]})
+        df2 = df1.combine_first(df0)
+        assert_frame_equal(df2,df0)
+
+        df2 = df0.combine_first(df1)
+        assert_frame_equal(df2,df0)
+
+        df0 = DataFrame({"a":[datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)]})
+        df1 = DataFrame({"a":[datetime(2000, 1, 2), None, None]})
+        df2 = df1.combine_first(df0)
+        result = df0.copy()
+        result.iloc[0,:] = df1.iloc[0,:]
+        assert_frame_equal(df2,result)
+
+        df2 = df0.combine_first(df1)
+        assert_frame_equal(df2,df0)
+
     def test_update(self):
         df = DataFrame([[1.5, nan, 3.],
                         [1.5, nan, 3.],
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
index 6fbce9df753d8..94d29e9233fb6 100644
--- a/pandas/tests/test_series.py
+++ b/pandas/tests/test_series.py
@@ -1856,7 +1856,7 @@ def test_operators_timedelta64(self):
         v1 = date_range('2012-1-1', periods=3, freq='D')
         v2 = date_range('2012-1-2', periods=3, freq='D')
         rs = Series(v2) - Series(v1)
-        xp = Series(1e9 * 3600 * 24, rs.index).astype('timedelta64[ns]')
+        xp = Series(1e9 * 3600 * 24, rs.index).astype('int64').astype('timedelta64[ns]')
         assert_series_equal(rs, xp)
         self.assert_(rs.dtype=='timedelta64[ns]')