Skip to content

BUG: (GH3593) fixed a bug in the incorrect conversion of datetime64[ns] in combine_first #3595

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 13, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ pandas 0.11.1
- ``combine_first`` not returning the same dtype in cases where it can (GH3552_)
- Fixed bug with ``Panel.transpose`` argument aliases (GH3556_)
- Fixed platform bug in ``PeriodIndex.take`` (GH3579_)
- Fixed bud in incorrect conversion of datetime64[ns] in ``combine_first`` (GH3593_)
- Fixed bug in reset_index with ``NaN`` in a multi-index (GH3586_)

.. _GH3164: https://github.com/pydata/pandas/issues/3164
Expand Down Expand Up @@ -145,6 +146,7 @@ pandas 0.11.1
.. _GH3586: https://github.com/pydata/pandas/issues/3586
.. _GH3493: https://github.com/pydata/pandas/issues/3493
.. _GH3579: https://github.com/pydata/pandas/issues/3579
.. _GH3593: https://github.com/pydata/pandas/issues/3593
.. _GH3556: https://github.com/pydata/pandas/issues/3556


Expand Down
34 changes: 34 additions & 0 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -921,6 +921,33 @@ def _possibly_downcast_to_dtype(result, dtype):

return result

def _lcd_dtypes(a_dtype, b_dtype):
""" return the lcd dtype to hold these types """

if is_datetime64_dtype(a_dtype) or is_datetime64_dtype(b_dtype):
return _NS_DTYPE
elif is_timedelta64_dtype(a_dtype) or is_timedelta64_dtype(b_dtype):
return _TD_DTYPE
elif is_complex_dtype(a_dtype):
if is_complex_dtype(b_dtype):
return a_dtype
return np.float64
elif is_integer_dtype(a_dtype):
if is_integer_dtype(b_dtype):
if a_dtype.itemsize == b_dtype.itemsize:
return a_dtype
return np.int64
return np.float64
elif is_float_dtype(a_dtype):
if is_float_dtype(b_dtype):
if a_dtype.itemsize == b_dtype.itemsize:
return a_dtype
else:
return np.float64
elif is_integer(b_dtype):
return np.float64
return np.object

def _interp_wrapper(f, wrap_dtype, na_override=None):
def wrapper(arr, mask, limit=None):
view = arr.view(wrap_dtype)
Expand Down Expand Up @@ -1524,6 +1551,13 @@ def is_float_dtype(arr_or_dtype):
tipo = arr_or_dtype.dtype.type
return issubclass(tipo, np.floating)

def is_complex_dtype(arr_or_dtype):
if isinstance(arr_or_dtype, np.dtype):
tipo = arr_or_dtype.type
else:
tipo = arr_or_dtype.dtype.type
return issubclass(tipo, np.complexfloating)


def is_list_like(arg):
return hasattr(arg, '__iter__') and not isinstance(arg, basestring) or hasattr(arg,'len')
Expand Down
47 changes: 41 additions & 6 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3738,8 +3738,11 @@ def combine(self, other, func, fill_value=None, overwrite=True):

result = {}
for col in new_columns:
series = this[col].values
otherSeries = other[col].values
series = this[col]
otherSeries = other[col]

this_dtype = series.dtype
other_dtype = otherSeries.dtype

this_mask = isnull(series)
other_mask = isnull(otherSeries)
Expand All @@ -3756,18 +3759,40 @@ def combine(self, other, func, fill_value=None, overwrite=True):
series[this_mask] = fill_value
otherSeries[other_mask] = fill_value

arr = func(series, otherSeries)
# if we have different dtypes, possibily promote
new_dtype = this_dtype
if this_dtype != other_dtype:
new_dtype = com._lcd_dtypes(this_dtype,other_dtype)
series = series.astype(new_dtype)
otherSeries = otherSeries.astype(new_dtype)

# see if we need to be represented as i8 (datetimelike)
# try to keep us at this dtype
needs_i8_conversion = com.needs_i8_conversion(new_dtype)
if needs_i8_conversion:
this_dtype = new_dtype
arr = func(series, otherSeries, True)
else:
arr = func(series, otherSeries)

if do_fill:
arr = com.ensure_float(arr)
arr[this_mask & other_mask] = NA

# try to downcast back to the original dtype
if needs_i8_conversion:
arr = com._possibly_cast_to_datetime(arr, this_dtype)
else:
arr = com._possibly_downcast_to_dtype(arr, this_dtype)

result[col] = arr

# convert_objects just in case
return self._constructor(result,
index=new_index,
columns=new_columns).convert_objects(copy=False)
columns=new_columns).convert_objects(
convert_dates=True,
copy=False)

def combine_first(self, other):
"""
Expand All @@ -3788,8 +3813,18 @@ def combine_first(self, other):
-------
combined : DataFrame
"""
def combiner(x, y):
return expressions.where(isnull(x), y, x, raise_on_error=True)
def combiner(x, y, needs_i8_conversion=False):
x_values = x.values if hasattr(x,'values') else x
y_values = y.values if hasattr(y,'values') else y
if needs_i8_conversion:
mask = isnull(x)
x_values = x_values.view('i8')
y_values = y_values.view('i8')
else:
mask = isnull(x_values)

return expressions.where(mask, y_values, x_values, raise_on_error=True)

return self.combine(other, combiner, overwrite=False)

def update(self, other, join='left', overwrite=True, filter_func=None,
Expand Down
16 changes: 13 additions & 3 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,14 +258,15 @@ def downcast(self, dtypes = None):

return blocks

def astype(self, dtype, copy = True, raise_on_error = True):
def astype(self, dtype, copy = True, raise_on_error = True, values = None):
"""
Coerce to the new type (if copy=True, return a new copy)
raise on an except if raise == True
"""
try:
newb = make_block(com._astype_nansafe(self.values, dtype, copy = copy),
self.items, self.ref_items, fastpath=True)
if values is None:
values = com._astype_nansafe(self.values, dtype, copy = copy)
newb = make_block(values, self.items, self.ref_items, fastpath=True)
except:
if raise_on_error is True:
raise
Expand Down Expand Up @@ -708,6 +709,15 @@ def is_bool(self):
""" we can be a bool if we have only bool values but are of type object """
return lib.is_bool_array(self.values.ravel())

def astype(self, dtype, copy=True, raise_on_error=True, values=None):
""" allow astypes to datetime64[ns],timedelta64[ns] with coercion """
dtype = np.dtype(dtype)
if dtype == _NS_DTYPE or dtype == _TD_DTYPE:
values = com._possibly_convert_datetime(self.values,dtype)
else:
values = None
return super(ObjectBlock, self).astype(dtype=dtype,copy=copy,raise_on_error=raise_on_error,values=values)

def convert(self, convert_dates = True, convert_numeric = True, copy = True):
""" attempt to coerce any object types to better types
return a copy of the block (if copy = True)
Expand Down
13 changes: 9 additions & 4 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
from pandas.core.common import (isnull, notnull, _is_bool_indexer,
_default_index, _maybe_promote, _maybe_upcast,
_asarray_tuplesafe, is_integer_dtype,
_infer_dtype_from_scalar, is_list_like)
_infer_dtype_from_scalar, is_list_like,
_NS_DTYPE, _TD_DTYPE)
from pandas.core.index import (Index, MultiIndex, InvalidIndexError,
_ensure_index, _handle_legacy_indexes)
from pandas.core.indexing import _SeriesIndexer, _check_bool_indexer, _check_slice_bounds
Expand Down Expand Up @@ -929,9 +930,13 @@ def astype(self, dtype):
"""
See numpy.ndarray.astype
"""
casted = com._astype_nansafe(self.values, dtype)
return self._constructor(casted, index=self.index, name=self.name,
dtype=casted.dtype)
dtype = np.dtype(dtype)
if dtype == _NS_DTYPE or dtype == _TD_DTYPE:
values = com._possibly_cast_to_datetime(self.values,dtype)
else:
values = com._astype_nansafe(self.values, dtype)
return self._constructor(values, index=self.index, name=self.name,
dtype=values.dtype)

def convert_objects(self, convert_dates=True, convert_numeric=True, copy=True):
"""
Expand Down
19 changes: 19 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -7907,6 +7907,25 @@ def test_combine_first_mixed_bug(self):
expected = Series([True,True,False])
assert_series_equal(result,expected)

# GH 3593, converting datetime64[ns] incorrecly
df0 = DataFrame({"a":[datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)]})
df1 = DataFrame({"a":[None, None, None]})
df2 = df1.combine_first(df0)
assert_frame_equal(df2,df0)

df2 = df0.combine_first(df1)
assert_frame_equal(df2,df0)

df0 = DataFrame({"a":[datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)]})
df1 = DataFrame({"a":[datetime(2000, 1, 2), None, None]})
df2 = df1.combine_first(df0)
result = df0.copy()
result.iloc[0,:] = df1.iloc[0,:]
assert_frame_equal(df2,result)

df2 = df0.combine_first(df1)
assert_frame_equal(df2,df0)

def test_update(self):
df = DataFrame([[1.5, nan, 3.],
[1.5, nan, 3.],
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1856,7 +1856,7 @@ def test_operators_timedelta64(self):
v1 = date_range('2012-1-1', periods=3, freq='D')
v2 = date_range('2012-1-2', periods=3, freq='D')
rs = Series(v2) - Series(v1)
xp = Series(1e9 * 3600 * 24, rs.index).astype('timedelta64[ns]')
xp = Series(1e9 * 3600 * 24, rs.index).astype('int64').astype('timedelta64[ns]')
assert_series_equal(rs, xp)
self.assert_(rs.dtype=='timedelta64[ns]')

Expand Down