diff --git a/doc/source/release.rst b/doc/source/release.rst index 1ae70b1d93420..6e764e39b4db8 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -62,6 +62,8 @@ API Changes when detecting chained assignment, related (:issue:`5938`) - DataFrame.head(0) returns self instead of empty frame (:issue:`5846`) - ``autocorrelation_plot`` now accepts ``**kwargs``. (:issue:`5623`) + - ``convert_objects`` now accepts a ``convert_timedeltas='coerce'`` argument to allow forced dtype conversion of + timedeltas (:issue:`5458`,:issue:`5689`) Experimental Features ~~~~~~~~~~~~~~~~~~~~~ @@ -78,12 +80,13 @@ Improvements to existing features - support ``dtypes`` property on ``Series/Panel/Panel4D`` - extend ``Panel.apply`` to allow arbitrary functions (rather than only ufuncs) (:issue:`1148`) allow multiple axes to be used to operate on slabs of a ``Panel`` - - The ``ArrayFormatter``s for ``datetime`` and ``timedelta64`` now intelligently + - The ``ArrayFormatter`` for ``datetime`` and ``timedelta64`` now intelligently limit precision based on the values in the array (:issue:`3401`) - pd.show_versions() is now available for convenience when reporting issues. - perf improvements to Series.str.extract (:issue:`5944`) - perf improvments in ``dtypes/ftypes`` methods (:issue:`5968`) - perf improvments in indexing with object dtypes (:issue:`5968`) + - improved dtype inference for ``timedelta`` like passed to constructors (:issue:`5458`,:issue:`5689`) .. _release.bug_fixes-0.13.1: @@ -122,6 +125,7 @@ Bug Fixes - Recent changes in IPython cause warnings to be emitted when using previous versions of pandas in QTConsole, now fixed. If you're using an older version and need to supress the warnings, see (:issue:`5922`). + - Bug in merging ``timedelta`` dtypes (:issue:`5695`) pandas 0.13.0 ------------- diff --git a/doc/source/v0.13.1.txt b/doc/source/v0.13.1.txt index 31004d24e56a6..08c8eb76caaf8 100644 --- a/doc/source/v0.13.1.txt +++ b/doc/source/v0.13.1.txt @@ -3,7 +3,7 @@ v0.13.1 (???) ------------- -This is a major release from 0.13.0 and includes a number of API changes, several new features and +This is a minor release from 0.13.0 and includes a number of API changes, several new features and enhancements along with a large number of bug fixes. Highlights include: @@ -29,6 +29,27 @@ Deprecations Enhancements ~~~~~~~~~~~~ +- The ``ArrayFormatter`` for ``datetime`` and ``timedelta64`` now intelligently + limit precision based on the values in the array (:issue:`3401`) + + Previously output might look like: + + .. code-block:: python + + age today diff + 0 2001-01-01 00:00:00 2013-04-19 00:00:00 4491 days, 00:00:00 + 1 2004-06-01 00:00:00 2013-04-19 00:00:00 3244 days, 00:00:00 + + Now the output looks like: + + .. ipython:: python + + df = DataFrame([ Timestamp('20010101'), + Timestamp('20040601') ], columns=['age']) + df['today'] = Timestamp('20130419') + df['diff'] = df['today']-df['age'] + df + - ``Panel.apply`` will work on non-ufuncs. See :ref:`the docs`. .. ipython:: python @@ -83,27 +104,6 @@ Enhancements result result.loc[:,:,'ItemA'] -- The ``ArrayFormatter``s for ``datetime`` and ``timedelta64`` now intelligently - limit precision based on the values in the array (:issue:`3401`) - - Previously output might look like: - - .. code-block:: python - - age today diff - 0 2001-01-01 00:00:00 2013-04-19 00:00:00 4491 days, 00:00:00 - 1 2004-06-01 00:00:00 2013-04-19 00:00:00 3244 days, 00:00:00 - - Now the output looks like: - - .. ipython:: python - - df = DataFrame([ Timestamp('20010101'), - Timestamp('20040601') ], columns=['age']) - df['today'] = Timestamp('20130419') - df['diff'] = df['today']-df['age'] - df - Experimental ~~~~~~~~~~~~ diff --git a/pandas/core/common.py b/pandas/core/common.py index 774ef02b594f3..5b585c44ca3b8 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -1514,7 +1514,8 @@ def _values_from_object(o): def _possibly_convert_objects(values, convert_dates=True, - convert_numeric=True): + convert_numeric=True, + convert_timedeltas=True): """ if we have an object dtype, try to coerce dates and/or numbers """ # if we have passed in a list or scalar @@ -1539,6 +1540,22 @@ def _possibly_convert_objects(values, convert_dates=True, values = lib.maybe_convert_objects( values, convert_datetime=convert_dates) + # convert timedeltas + if convert_timedeltas and values.dtype == np.object_: + + if convert_timedeltas == 'coerce': + from pandas.tseries.timedeltas import \ + _possibly_cast_to_timedelta + values = _possibly_cast_to_timedelta(values, coerce=True) + + # if we are all nans then leave me alone + if not isnull(new_values).all(): + values = new_values + + else: + values = lib.maybe_convert_objects( + values, convert_timedelta=convert_timedeltas) + # convert to numeric if values.dtype == np.object_: if convert_numeric: @@ -1624,7 +1641,7 @@ def _possibly_cast_to_datetime(value, dtype, coerce=False): elif is_timedelta64: from pandas.tseries.timedeltas import \ _possibly_cast_to_timedelta - value = _possibly_cast_to_timedelta(value) + value = _possibly_cast_to_timedelta(value, coerce='compat') except: pass @@ -1655,7 +1672,7 @@ def _possibly_cast_to_datetime(value, dtype, coerce=False): elif inferred_type in ['timedelta', 'timedelta64']: from pandas.tseries.timedeltas import \ _possibly_cast_to_timedelta - value = _possibly_cast_to_timedelta(value) + value = _possibly_cast_to_timedelta(value, coerce='compat') return value diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 82bc3ac25f68a..fbd49dbe6eeaf 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3626,7 +3626,7 @@ def append(self, other, ignore_index=False, verify_integrity=False): index = None if other.name is None else [other.name] other = other.reindex(self.columns, copy=False) other = DataFrame(other.values.reshape((1, len(other))), - index=index, columns=self.columns) + index=index, columns=self.columns).convert_objects() elif isinstance(other, list) and not isinstance(other[0], DataFrame): other = DataFrame(other) if (self.columns.get_indexer(other.columns) >= 0).all(): diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 4bf27c08cb253..f1e890216830a 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1844,16 +1844,18 @@ def copy(self, deep=True): return self._constructor(data).__finalize__(self) def convert_objects(self, convert_dates=True, convert_numeric=False, - copy=True): + convert_timedeltas=True, copy=True): """ Attempt to infer better dtype for object columns Parameters ---------- - convert_dates : if True, attempt to soft convert_dates, if 'coerce', + convert_dates : if True, attempt to soft convert dates, if 'coerce', force conversion (and non-convertibles get NaT) convert_numeric : if True attempt to coerce to numbers (including strings), non-convertibles get NaN + convert_timedeltas : if True, attempt to soft convert timedeltas, if 'coerce', + force conversion (and non-convertibles get NaT) copy : Boolean, if True, return copy, default is True Returns @@ -1863,6 +1865,7 @@ def convert_objects(self, convert_dates=True, convert_numeric=False, return self._constructor( self._data.convert(convert_dates=convert_dates, convert_numeric=convert_numeric, + convert_timedeltas=convert_timedeltas, copy=copy)).__finalize__(self) #---------------------------------------------------------------------- @@ -3174,23 +3177,22 @@ def abs(self): ------- abs: type of caller """ - obj = np.abs(self) # suprimo numpy 1.6 hacking + # for timedeltas if _np_version_under1p7: + + def _convert_timedeltas(x): + if x.dtype.kind == 'm': + return np.abs(x.view('i8')).astype(x.dtype) + return np.abs(x) + if self.ndim == 1: - if obj.dtype == 'm8[us]': - obj = obj.astype('m8[ns]') + return _convert_timedeltas(self) elif self.ndim == 2: - def f(x): - if x.dtype == 'm8[us]': - x = x.astype('m8[ns]') - return x - - if 'm8[us]' in obj.dtypes.values: - obj = obj.apply(f) + return self.apply(_convert_timedeltas) - return obj + return np.abs(self) def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None, **kwds): diff --git a/pandas/core/internals.py b/pandas/core/internals.py index fbea40eb76a0d..0603746cf9dc5 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1315,8 +1315,8 @@ def is_bool(self): """ return lib.is_bool_array(self.values.ravel()) - def convert(self, convert_dates=True, convert_numeric=True, copy=True, - by_item=True): + def convert(self, convert_dates=True, convert_numeric=True, convert_timedeltas=True, + copy=True, by_item=True): """ attempt to coerce any object types to better types return a copy of the block (if copy = True) by definition we ARE an ObjectBlock!!!!! @@ -1334,7 +1334,8 @@ def convert(self, convert_dates=True, convert_numeric=True, copy=True, values = com._possibly_convert_objects( values.ravel(), convert_dates=convert_dates, - convert_numeric=convert_numeric + convert_numeric=convert_numeric, + convert_timedeltas=convert_timedeltas, ).reshape(values.shape) values = _block_shape(values, ndim=self.ndim) items = self.items.take([i]) diff --git a/pandas/lib.pyx b/pandas/lib.pyx index afd8ac87589be..ea5071eab976c 100644 --- a/pandas/lib.pyx +++ b/pandas/lib.pyx @@ -14,7 +14,7 @@ from cpython cimport (PyDict_New, PyDict_GetItem, PyDict_SetItem, Py_INCREF, PyTuple_SET_ITEM, PyList_Check, PyFloat_Check, PyString_Check, - PyBytes_Check, + PyBytes_Check, PyTuple_SetItem, PyTuple_New, PyObject_SetAttrString) @@ -31,7 +31,7 @@ from datetime import datetime as pydatetime # this is our tseries.pxd from datetime cimport * -from tslib cimport convert_to_tsobject +from tslib cimport convert_to_tsobject, convert_to_timedelta64 import tslib from tslib import NaT, Timestamp, repr_timedelta64 diff --git a/pandas/src/datetime.pxd b/pandas/src/datetime.pxd index 3e11c9d20fb0d..abd3bc3333adb 100644 --- a/pandas/src/datetime.pxd +++ b/pandas/src/datetime.pxd @@ -37,6 +37,7 @@ cdef extern from "datetime.h": bint PyDateTime_Check(object o) bint PyDate_Check(object o) bint PyTime_Check(object o) + bint PyDelta_Check(object o) object PyDateTime_FromDateAndTime(int year, int month, int day, int hour, int minute, int second, int us) diff --git a/pandas/src/inference.pyx b/pandas/src/inference.pyx index 84f1f3cb4904d..e23afad278ee7 100644 --- a/pandas/src/inference.pyx +++ b/pandas/src/inference.pyx @@ -1,4 +1,7 @@ cimport util +from tslib import NaT +from datetime import datetime, timedelta +iNaT = util.get_nat() _TYPE_MAP = { np.int8: 'integer', @@ -55,15 +58,26 @@ def infer_dtype(object _values): val = util.get_value_1d(values, 0) - if util.is_datetime64_object(val): + if util.is_datetime64_object(val) or val is NaT: if is_datetime64_array(values): return 'datetime64' + elif is_timedelta_or_timedelta64_array(values): + return 'timedelta' + elif util.is_integer_object(val): + # a timedelta will show true here as well + if is_timedelta(val): + if is_timedelta_or_timedelta64_array(values): + return 'timedelta' + if is_integer_array(values): return 'integer' elif is_integer_float_array(values): return 'mixed-integer-float' + elif is_timedelta_or_timedelta64_array(values): + return 'timedelta' return 'mixed-integer' + elif is_datetime(val): if is_datetime_array(values): return 'datetime' @@ -115,6 +129,9 @@ def infer_dtype_list(list values): pass +cdef inline bint is_null_datetimelike(v): + return util._checknull(v) or (util.is_integer_object(v) and v == iNaT) or v is NaT + cdef inline bint is_datetime(object o): return PyDateTime_Check(o) @@ -124,6 +141,9 @@ cdef inline bint is_date(object o): cdef inline bint is_time(object o): return PyTime_Check(o) +cdef inline bint is_timedelta(object o): + return PyDelta_Check(o) or util.is_timedelta64_object(o) + def is_bool_array(ndarray values): cdef: Py_ssize_t i, n = len(values) @@ -258,53 +278,58 @@ def is_unicode_array(ndarray values): def is_datetime_array(ndarray[object] values): cdef int i, n = len(values) + cdef object v if n == 0: return False for i in range(n): - if not is_datetime(values[i]): + v = values[i] + if not (is_datetime(v) or is_null_datetimelike(v)): return False return True def is_datetime64_array(ndarray values): cdef int i, n = len(values) + cdef object v if n == 0: return False for i in range(n): - if not util.is_datetime64_object(values[i]): + v = values[i] + if not (util.is_datetime64_object(v) or is_null_datetimelike(v)): return False return True -def is_timedelta(object o): - import datetime - return isinstance(o,datetime.timedelta) or isinstance(o,np.timedelta64) - def is_timedelta_array(ndarray values): - import datetime cdef int i, n = len(values) + cdef object v if n == 0: return False for i in range(n): - if not isinstance(values[i],datetime.timedelta): + v = values[i] + if not (PyDelta_Check(v) or is_null_datetimelike(v)): return False return True def is_timedelta64_array(ndarray values): cdef int i, n = len(values) + cdef object v if n == 0: return False for i in range(n): - if not isinstance(values[i],np.timedelta64): + v = values[i] + if not (util.is_timedelta64_object(v) or is_null_datetimelike(v)): return False return True def is_timedelta_or_timedelta64_array(ndarray values): - import datetime + """ infer with timedeltas and/or nat/none """ cdef int i, n = len(values) + cdef object v if n == 0: return False for i in range(n): - if not (isinstance(values[i],datetime.timedelta) or isinstance(values[i],np.timedelta64)): + v = values[i] + if not (is_timedelta(v) or is_null_datetimelike(v)): return False return True @@ -427,7 +452,7 @@ def maybe_convert_numeric(ndarray[object] values, set na_values, return ints def maybe_convert_objects(ndarray[object] objects, bint try_float=0, - bint safe=0, bint convert_datetime=0): + bint safe=0, bint convert_datetime=0, bint convert_timedelta=0): ''' Type inference function-- convert object array to proper dtype ''' @@ -438,9 +463,11 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, ndarray[int64_t] ints ndarray[uint8_t] bools ndarray[int64_t] idatetimes + ndarray[int64_t] itimedeltas bint seen_float = 0 bint seen_complex = 0 bint seen_datetime = 0 + bint seen_timedelta = 0 bint seen_int = 0 bint seen_bool = 0 bint seen_object = 0 @@ -455,8 +482,14 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, complexes = np.empty(n, dtype='c16') ints = np.empty(n, dtype='i8') bools = np.empty(n, dtype=np.uint8) - datetimes = np.empty(n, dtype='M8[ns]') - idatetimes = datetimes.view(np.int64) + + if convert_datetime: + datetimes = np.empty(n, dtype='M8[ns]') + idatetimes = datetimes.view(np.int64) + + if convert_timedelta: + timedeltas = np.empty(n, dtype='m8[ns]') + itimedeltas = timedeltas.view(np.int64) onan = np.nan fnan = np.nan @@ -481,9 +514,13 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, seen_object = 1 # objects[i] = val.astype('O') break - elif util.is_timedelta64_object(val): - seen_object = 1 - break + elif is_timedelta(val): + if convert_timedelta: + itimedeltas[i] = convert_to_timedelta64(val, 'ns', False) + seen_timedelta = 1 + else: + seen_object = 1 + break elif util.is_integer_object(val): seen_int = 1 floats[i] = val @@ -523,7 +560,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, if not safe: if seen_null: - if not seen_bool and not seen_datetime: + if not seen_bool and not seen_datetime and not seen_timedelta: if seen_complex: return complexes elif seen_float or seen_int: @@ -533,6 +570,9 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, if seen_datetime: if not seen_numeric: return datetimes + elif seen_timedelta: + if not seen_numeric: + return timedeltas else: if seen_complex: return complexes @@ -540,13 +580,13 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, return floats elif seen_int: return ints - elif not seen_datetime and not seen_numeric: + elif not seen_datetime and not seen_numeric and not seen_timedelta: return bools.view(np.bool_) else: # don't cast int to float, etc. if seen_null: - if not seen_bool and not seen_datetime: + if not seen_bool and not seen_datetime and not seen_timedelta: if seen_complex: if not seen_int: return complexes @@ -558,6 +598,9 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, if seen_datetime: if not seen_numeric: return datetimes + elif seen_timedelta: + if not seen_numeric: + return timedeltas else: if seen_complex: if not seen_int: @@ -567,7 +610,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=0, return floats elif seen_int: return ints - elif not seen_datetime and not seen_numeric: + elif not seen_datetime and not seen_numeric and not seen_timedelta: return bools.view(np.bool_) return objects @@ -582,8 +625,6 @@ def try_parse_dates(ndarray[object] values, parser=None, Py_ssize_t i, n ndarray[object] result - from datetime import datetime, timedelta - n = len(values) result = np.empty(n, dtype='O') @@ -841,8 +882,10 @@ def map_infer_mask(ndarray arr, object f, ndarray[uint8_t] mask, result[i] = val if convert: - return maybe_convert_objects(result, try_float=0, - convert_datetime=0) + return maybe_convert_objects(result, + try_float=0, + convert_datetime=0, + convert_timedelta=0) return result @@ -877,8 +920,10 @@ def map_infer(ndarray arr, object f, bint convert=1): result[i] = val if convert: - return maybe_convert_objects(result, try_float=0, - convert_datetime=0) + return maybe_convert_objects(result, + try_float=0, + convert_datetime=0, + convert_timedelta=0) return result diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 6b4a9a2bc4c22..f3f3127bbe875 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -2246,23 +2246,46 @@ def test_operators_empty_int_corner(self): def test_constructor_dtype_timedelta64(self): + # basic td = Series([timedelta(days=i) for i in range(3)]) self.assert_(td.dtype == 'timedelta64[ns]') + td = Series([timedelta(days=1)]) + self.assert_(td.dtype == 'timedelta64[ns]') + + if not _np_version_under1p7: + td = Series([timedelta(days=1),timedelta(days=2),np.timedelta64(1,'s')]) + self.assert_(td.dtype == 'timedelta64[ns]') + # mixed with NaT from pandas import tslib - td = Series([timedelta(days=i) - for i in range(3)] + [tslib.NaT ], dtype='m8[ns]' ) + td = Series([timedelta(days=1),tslib.NaT ], dtype='m8[ns]' ) + self.assert_(td.dtype == 'timedelta64[ns]') + + td = Series([timedelta(days=1),np.nan ], dtype='m8[ns]' ) self.assert_(td.dtype == 'timedelta64[ns]') - td = Series([timedelta(days=i) - for i in range(3)] + [tslib.iNaT ], dtype='m8[ns]' ) + td = Series([np.timedelta64(300000000), pd.NaT],dtype='m8[ns]') self.assert_(td.dtype == 'timedelta64[ns]') - td = Series([timedelta(days=i) - for i in range(3)] + [np.nan ], dtype='m8[ns]' ) + # improved inference + # GH5689 + td = Series([np.timedelta64(300000000), pd.NaT]) self.assert_(td.dtype == 'timedelta64[ns]') + td = Series([np.timedelta64(300000000), tslib.iNaT]) + self.assert_(td.dtype == 'timedelta64[ns]') + + td = Series([np.timedelta64(300000000), np.nan]) + self.assert_(td.dtype == 'timedelta64[ns]') + + td = Series([pd.NaT, np.timedelta64(300000000)]) + self.assert_(td.dtype == 'timedelta64[ns]') + + if not _np_version_under1p7: + td = Series([np.timedelta64(1,'s')]) + self.assert_(td.dtype == 'timedelta64[ns]') + # these are frequency conversion astypes #for t in ['s', 'D', 'us', 'ms']: # self.assertRaises(TypeError, td.astype, 'm8[%s]' % t) @@ -2270,11 +2293,14 @@ def test_constructor_dtype_timedelta64(self): # valid astype td.astype('int64') - # this is an invalid casting - self.assertRaises(Exception, Series, [timedelta(days=i) - for i in range(3)] + ['foo' ], dtype='m8[ns]' ) + # invalid casting self.assertRaises(TypeError, td.astype, 'int32') + # this is an invalid casting + def f(): + Series([timedelta(days=1), 'foo'],dtype='m8[ns]') + self.assertRaises(Exception, f) + # leave as object here td = Series([timedelta(days=i) for i in range(3)] + ['foo']) self.assert_(td.dtype == 'object') @@ -2854,6 +2880,9 @@ def test_bfill(self): assert_series_equal(ts.bfill(), ts.fillna(method='bfill')) def test_sub_of_datetime_from_TimeSeries(self): + if _np_version_under1p7: + raise nose.SkipTest("timedelta broken in np 1.6.1") + from pandas.tseries.timedeltas import _possibly_cast_to_timedelta from datetime import datetime a = Timestamp(datetime(1993, 0o1, 0o7, 13, 30, 00)) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index dd7ab65869303..41a4cf9984c14 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -14,7 +14,7 @@ from pandas.core.index import (Index, MultiIndex, _get_combined_index, _ensure_index, _get_consensus_names, _all_indexes_same) -from pandas.core.internals import (IntBlock, BoolBlock, BlockManager, +from pandas.core.internals import (TimeDeltaBlock, IntBlock, BoolBlock, BlockManager, make_block, _consolidate) from pandas.util.decorators import cache_readonly, Appender, Substitution from pandas.core.common import (PandasError, ABCSeries, @@ -816,7 +816,7 @@ def reindex_block(self, block, axis, ref_items, copy=True): def _may_need_upcasting(blocks): for block in blocks: - if isinstance(block, (IntBlock, BoolBlock)): + if isinstance(block, (IntBlock, BoolBlock)) and not isinstance(block, TimeDeltaBlock): return True return False @@ -827,7 +827,10 @@ def _upcast_blocks(blocks): """ new_blocks = [] for block in blocks: - if isinstance(block, IntBlock): + if isinstance(block, TimeDeltaBlock): + # these are int blocks underlying, but are ok + newb = block + elif isinstance(block, IntBlock): newb = make_block(block.values.astype(float), block.items, block.ref_items, placement=block._ref_locs) elif isinstance(block, BoolBlock): diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index e3b448b650767..a0d90ac0920eb 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -9,7 +9,7 @@ import random from pandas.compat import range, lrange, lzip, zip -from pandas import compat +from pandas import compat, _np_version_under1p7 from pandas.tseries.index import DatetimeIndex from pandas.tools.merge import merge, concat, ordered_merge, MergeError from pandas.util.testing import (assert_frame_equal, assert_series_equal, @@ -791,6 +791,34 @@ def test_append_dtype_coerce(self): result = df1.append(df2,ignore_index=True) assert_frame_equal(result, expected) + def test_join_append_timedeltas(self): + + import datetime as dt + from pandas import NaT + + # timedelta64 issues with join/merge + # GH 5695 + if _np_version_under1p7: + raise nose.SkipTest("numpy < 1.7") + + d = {'d': dt.datetime(2013, 11, 5, 5, 56), 't': dt.timedelta(0, 22500)} + df = DataFrame(columns=list('dt')) + df = df.append(d, ignore_index=True) + result = df.append(d, ignore_index=True) + expected = DataFrame({'d': [dt.datetime(2013, 11, 5, 5, 56), + dt.datetime(2013, 11, 5, 5, 56) ], + 't': [ dt.timedelta(0, 22500), + dt.timedelta(0, 22500) ]}) + assert_frame_equal(result, expected) + + td = np.timedelta64(300000000) + lhs = DataFrame(Series([td,td],index=["A","B"])) + rhs = DataFrame(Series([td],index=["A"])) + + from pandas import NaT + result = lhs.join(rhs,rsuffix='r', how="left") + expected = DataFrame({ '0' : Series([td,td],index=list('AB')), '0r' : Series([td,NaT],index=list('AB')) }) + assert_frame_equal(result, expected) def test_overlapping_columns_error_message(self): # #2649 @@ -1763,7 +1791,24 @@ def test_concat_datetime64_block(self): df = DataFrame({'time': rng}) result = concat([df, df]) - self.assert_((result[:10]['time'] == rng).all()) + self.assert_((result.iloc[:10]['time'] == rng).all()) + self.assert_((result.iloc[10:]['time'] == rng).all()) + + def test_concat_timedelta64_block(self): + + # not friendly for < 1.7 + if _np_version_under1p7: + raise nose.SkipTest("numpy < 1.7") + + from pandas import to_timedelta + + rng = to_timedelta(np.arange(10),unit='s') + + df = DataFrame({'time': rng}) + + result = concat([df, df]) + self.assert_((result.iloc[:10]['time'] == rng).all()) + self.assert_((result.iloc[10:]['time'] == rng).all()) def test_concat_keys_with_none(self): # #1649 diff --git a/pandas/tseries/tests/test_timedeltas.py b/pandas/tseries/tests/test_timedeltas.py index 1d34c5b91d5ed..3d8ee87f6c42f 100644 --- a/pandas/tseries/tests/test_timedeltas.py +++ b/pandas/tseries/tests/test_timedeltas.py @@ -165,11 +165,24 @@ def conv(v): # single element conversion v = timedelta(seconds=1) result = to_timedelta(v,box=False) - expected = to_timedelta([v]) + expected = np.timedelta64(timedelta(seconds=1)) + self.assert_(result == expected) v = np.timedelta64(timedelta(seconds=1)) result = to_timedelta(v,box=False) - expected = to_timedelta([v]) + expected = np.timedelta64(timedelta(seconds=1)) + self.assert_(result == expected) + + def test_to_timedelta_via_apply(self): + _skip_if_numpy_not_friendly() + + # GH 5458 + expected = Series([np.timedelta64(1,'s')]) + result = Series(['00:00:01']).apply(to_timedelta) + tm.assert_series_equal(result, expected) + + result = Series([to_timedelta('00:00:01')]) + tm.assert_series_equal(result, expected) def test_timedelta_ops(self): _skip_if_numpy_not_friendly() diff --git a/pandas/tseries/timedeltas.py b/pandas/tseries/timedeltas.py index 835401a13403f..4a522d9874c4f 100644 --- a/pandas/tseries/timedeltas.py +++ b/pandas/tseries/timedeltas.py @@ -70,42 +70,16 @@ def _convert_listlike(arg, box): _whitespace = re.compile('^\s*$') def _coerce_scalar_to_timedelta_type(r, unit='ns'): - # kludgy here until we have a timedelta scalar - # handle the numpy < 1.7 case - - def conv(v): - if _np_version_under1p7: - return timedelta(microseconds=v/1000.0) - return np.timedelta64(v) + """ convert strings to timedelta; coerce to np.timedelta64""" if isinstance(r, compat.string_types): + + # we are already converting to nanoseconds converter = _get_string_converter(r, unit=unit) r = converter() - r = conv(r) - elif r == tslib.iNaT: - return r - elif isnull(r): - return np.timedelta64('NaT') - elif isinstance(r, np.timedelta64): - r = r.astype("m8[{0}]".format(unit.lower())) - elif is_integer(r): - r = tslib.cast_from_unit(r, unit) - r = conv(r) + unit='ns' - if _np_version_under1p7: - if not isinstance(r, timedelta): - raise AssertionError("Invalid type for timedelta scalar: %s" % type(r)) - if compat.PY3: - # convert to microseconds in timedelta64 - r = np.timedelta64(int(r.total_seconds()*1e9 + r.microseconds*1000)) - else: - return r - - if isinstance(r, timedelta): - r = np.timedelta64(r) - elif not isinstance(r, np.timedelta64): - raise AssertionError("Invalid type for timedelta scalar: %s" % type(r)) - return r.astype('timedelta64[ns]') + return tslib.convert_to_timedelta(r,unit) def _get_string_converter(r, unit='ns'): """ return a string converter for r to process the timedelta format """ @@ -189,7 +163,7 @@ def convert(td, dtype): td *= 1000 return td - if td == tslib.compat_NaT: + if isnull(td) or td == tslib.compat_NaT or td == tslib.iNaT: return tslib.iNaT # convert td value to a nanosecond value diff --git a/pandas/tslib.pxd b/pandas/tslib.pxd index a70f9883c5bb1..1452dbdca03ee 100644 --- a/pandas/tslib.pxd +++ b/pandas/tslib.pxd @@ -1,3 +1,4 @@ from numpy cimport ndarray, int64_t cdef convert_to_tsobject(object, object, object) +cdef convert_to_timedelta64(object, object, object) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index e303df23003cb..c2a727d7d3394 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -38,6 +38,12 @@ from pandas.compat import parse_date from sys import version_info +# numpy compat +from distutils.version import LooseVersion +_np_version = np.version.short_version +_np_version_under1p6 = LooseVersion(_np_version) < '1.6' +_np_version_under1p7 = LooseVersion(_np_version) < '1.7' + # GH3363 cdef bint PY2 = version_info[0] == 2 @@ -1149,48 +1155,80 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False, return oresult -def array_to_timedelta64(ndarray[object] values, coerce=True): +def array_to_timedelta64(ndarray[object] values, coerce=False): """ convert an ndarray to an array of ints that are timedeltas force conversion if coerce = True, - else return an object array """ + else will raise if cannot convert """ cdef: Py_ssize_t i, n - object val - ndarray[int64_t] result + ndarray[int64_t] iresult n = values.shape[0] - result = np.empty(n, dtype='i8') + result = np.empty(n, dtype='m8[ns]') + iresult = result.view('i8') + for i in range(n): - val = values[i] + result[i] = convert_to_timedelta64(values[i], 'ns', coerce) + return iresult - # in py3 this is already an int, don't convert - if is_integer_object(val): - result[i] = val +def convert_to_timedelta(object ts, object unit='ns', coerce=False): + return convert_to_timedelta64(ts, unit, coerce) - elif isinstance(val,timedelta) or isinstance(val,np.timedelta64): +cdef convert_to_timedelta64(object ts, object unit, object coerce): + """ + Convert an incoming object to a timedelta64 if possible - if isinstance(val, np.timedelta64): - if val.dtype != 'm8[ns]': - val = val.astype('m8[ns]') - val = val.item() - else: - val = _delta_to_nanoseconds(np.timedelta64(val).item()) + Handle these types of objects: + - timedelta + - timedelta64 + - np.int64 (with unit providing a possible modifier) + - None/NaT - result[i] = val + if coerce, set a non-valid value to NaT - elif _checknull_with_nat(val): - result[i] = iNaT + Return a ns based int64 + # kludgy here until we have a timedelta scalar + # handle the numpy < 1.7 case + """ + if _checknull_with_nat(ts): + ts = np.timedelta64(iNaT) + elif util.is_datetime64_object(ts): + # only accept a NaT here + if ts.astype('int64') == iNaT: + ts = np.timedelta64(iNaT) + elif isinstance(ts, np.timedelta64): + ts = ts.astype("m8[{0}]".format(unit.lower())) + elif is_integer_object(ts): + if ts == iNaT: + ts = np.timedelta64(iNaT) else: + if util.is_array(ts): + ts = ts.astype('int64').item() + ts = cast_from_unit(ts, unit) + if _np_version_under1p7: + ts = timedelta(microseconds=ts/1000.0) + else: + ts = np.timedelta64(ts) - # just return, don't convert - if not coerce: - return values.copy() - - result[i] = iNaT - - return result + if _np_version_under1p7: + if not isinstance(ts, timedelta): + if coerce: + return np.timedelta64(iNaT) + raise ValueError("Invalid type for timedelta scalar: %s" % type(ts)) + if not PY2: + # convert to microseconds in timedelta64 + ts = np.timedelta64(int(ts.total_seconds()*1e9 + ts.microseconds*1000)) + else: + return ts + if isinstance(ts, timedelta): + ts = np.timedelta64(ts) + elif not isinstance(ts, np.timedelta64): + if coerce: + return np.timedelta64(iNaT) + raise ValueError("Invalid type for timedelta scalar: %s" % type(ts)) + return ts.astype('timedelta64[ns]') def repr_timedelta64(object value, format=None): """ @@ -1206,6 +1244,7 @@ def repr_timedelta64(object value, format=None): converted : Timestamp """ + cdef object ivalue ivalue = value.view('i8') diff --git a/test.py b/test.py new file mode 100644 index 0000000000000..a1cbac87ab2b3 --- /dev/null +++ b/test.py @@ -0,0 +1,12 @@ +import numpy as np +import pandas +from pandas import Series,DataFrame + +print pandas.__version__ + +s = Series(np.arange(1028.)) + +df = DataFrame({ i:s for i in range(1028) }) + +import pdb; pdb.set_trace() +df.apply(lambda x: np.corrcoef(x,s)[0,1])