Skip to content

Commit bbb9e52

Browse files
committed
Merge pull request #5995 from jreback/timedelta_fixes
BUG/CLN: clarified timedelta inferences
2 parents e86e99c + e447c33 commit bbb9e52

17 files changed

+333
-147
lines changed

doc/source/release.rst

+5-1
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ API Changes
6262
when detecting chained assignment, related (:issue:`5938`)
6363
- DataFrame.head(0) returns self instead of empty frame (:issue:`5846`)
6464
- ``autocorrelation_plot`` now accepts ``**kwargs``. (:issue:`5623`)
65+
- ``convert_objects`` now accepts a ``convert_timedeltas='coerce'`` argument to allow forced dtype conversion of
66+
timedeltas (:issue:`5458`,:issue:`5689`)
6567

6668
Experimental Features
6769
~~~~~~~~~~~~~~~~~~~~~
@@ -78,12 +80,13 @@ Improvements to existing features
7880
- support ``dtypes`` property on ``Series/Panel/Panel4D``
7981
- extend ``Panel.apply`` to allow arbitrary functions (rather than only ufuncs) (:issue:`1148`)
8082
allow multiple axes to be used to operate on slabs of a ``Panel``
81-
- The ``ArrayFormatter``s for ``datetime`` and ``timedelta64`` now intelligently
83+
- The ``ArrayFormatter`` for ``datetime`` and ``timedelta64`` now intelligently
8284
limit precision based on the values in the array (:issue:`3401`)
8385
- pd.show_versions() is now available for convenience when reporting issues.
8486
- perf improvements to Series.str.extract (:issue:`5944`)
8587
- perf improvments in ``dtypes/ftypes`` methods (:issue:`5968`)
8688
- perf improvments in indexing with object dtypes (:issue:`5968`)
89+
- improved dtype inference for ``timedelta`` like passed to constructors (:issue:`5458`,:issue:`5689`)
8790

8891
.. _release.bug_fixes-0.13.1:
8992

@@ -122,6 +125,7 @@ Bug Fixes
122125
- Recent changes in IPython cause warnings to be emitted when using previous versions
123126
of pandas in QTConsole, now fixed. If you're using an older version and
124127
need to supress the warnings, see (:issue:`5922`).
128+
- Bug in merging ``timedelta`` dtypes (:issue:`5695`)
125129

126130
pandas 0.13.0
127131
-------------

doc/source/v0.13.1.txt

+22-22
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
v0.13.1 (???)
44
-------------
55

6-
This is a major release from 0.13.0 and includes a number of API changes, several new features and
6+
This is a minor release from 0.13.0 and includes a number of API changes, several new features and
77
enhancements along with a large number of bug fixes.
88

99
Highlights include:
@@ -29,6 +29,27 @@ Deprecations
2929
Enhancements
3030
~~~~~~~~~~~~
3131

32+
- The ``ArrayFormatter`` for ``datetime`` and ``timedelta64`` now intelligently
33+
limit precision based on the values in the array (:issue:`3401`)
34+
35+
Previously output might look like:
36+
37+
.. code-block:: python
38+
39+
age today diff
40+
0 2001-01-01 00:00:00 2013-04-19 00:00:00 4491 days, 00:00:00
41+
1 2004-06-01 00:00:00 2013-04-19 00:00:00 3244 days, 00:00:00
42+
43+
Now the output looks like:
44+
45+
.. ipython:: python
46+
47+
df = DataFrame([ Timestamp('20010101'),
48+
Timestamp('20040601') ], columns=['age'])
49+
df['today'] = Timestamp('20130419')
50+
df['diff'] = df['today']-df['age']
51+
df
52+
3253
- ``Panel.apply`` will work on non-ufuncs. See :ref:`the docs<basics.apply_panel>`.
3354

3455
.. ipython:: python
@@ -83,27 +104,6 @@ Enhancements
83104
result
84105
result.loc[:,:,'ItemA']
85106

86-
- The ``ArrayFormatter``s for ``datetime`` and ``timedelta64`` now intelligently
87-
limit precision based on the values in the array (:issue:`3401`)
88-
89-
Previously output might look like:
90-
91-
.. code-block:: python
92-
93-
age today diff
94-
0 2001-01-01 00:00:00 2013-04-19 00:00:00 4491 days, 00:00:00
95-
1 2004-06-01 00:00:00 2013-04-19 00:00:00 3244 days, 00:00:00
96-
97-
Now the output looks like:
98-
99-
.. ipython:: python
100-
101-
df = DataFrame([ Timestamp('20010101'),
102-
Timestamp('20040601') ], columns=['age'])
103-
df['today'] = Timestamp('20130419')
104-
df['diff'] = df['today']-df['age']
105-
df
106-
107107
Experimental
108108
~~~~~~~~~~~~
109109

pandas/core/common.py

+20-3
Original file line numberDiff line numberDiff line change
@@ -1514,7 +1514,8 @@ def _values_from_object(o):
15141514

15151515

15161516
def _possibly_convert_objects(values, convert_dates=True,
1517-
convert_numeric=True):
1517+
convert_numeric=True,
1518+
convert_timedeltas=True):
15181519
""" if we have an object dtype, try to coerce dates and/or numbers """
15191520

15201521
# if we have passed in a list or scalar
@@ -1539,6 +1540,22 @@ def _possibly_convert_objects(values, convert_dates=True,
15391540
values = lib.maybe_convert_objects(
15401541
values, convert_datetime=convert_dates)
15411542

1543+
# convert timedeltas
1544+
if convert_timedeltas and values.dtype == np.object_:
1545+
1546+
if convert_timedeltas == 'coerce':
1547+
from pandas.tseries.timedeltas import \
1548+
_possibly_cast_to_timedelta
1549+
values = _possibly_cast_to_timedelta(values, coerce=True)
1550+
1551+
# if we are all nans then leave me alone
1552+
if not isnull(new_values).all():
1553+
values = new_values
1554+
1555+
else:
1556+
values = lib.maybe_convert_objects(
1557+
values, convert_timedelta=convert_timedeltas)
1558+
15421559
# convert to numeric
15431560
if values.dtype == np.object_:
15441561
if convert_numeric:
@@ -1624,7 +1641,7 @@ def _possibly_cast_to_datetime(value, dtype, coerce=False):
16241641
elif is_timedelta64:
16251642
from pandas.tseries.timedeltas import \
16261643
_possibly_cast_to_timedelta
1627-
value = _possibly_cast_to_timedelta(value)
1644+
value = _possibly_cast_to_timedelta(value, coerce='compat')
16281645
except:
16291646
pass
16301647

@@ -1655,7 +1672,7 @@ def _possibly_cast_to_datetime(value, dtype, coerce=False):
16551672
elif inferred_type in ['timedelta', 'timedelta64']:
16561673
from pandas.tseries.timedeltas import \
16571674
_possibly_cast_to_timedelta
1658-
value = _possibly_cast_to_timedelta(value)
1675+
value = _possibly_cast_to_timedelta(value, coerce='compat')
16591676

16601677
return value
16611678

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3626,7 +3626,7 @@ def append(self, other, ignore_index=False, verify_integrity=False):
36263626
index = None if other.name is None else [other.name]
36273627
other = other.reindex(self.columns, copy=False)
36283628
other = DataFrame(other.values.reshape((1, len(other))),
3629-
index=index, columns=self.columns)
3629+
index=index, columns=self.columns).convert_objects()
36303630
elif isinstance(other, list) and not isinstance(other[0], DataFrame):
36313631
other = DataFrame(other)
36323632
if (self.columns.get_indexer(other.columns) >= 0).all():

pandas/core/generic.py

+15-13
Original file line numberDiff line numberDiff line change
@@ -1844,16 +1844,18 @@ def copy(self, deep=True):
18441844
return self._constructor(data).__finalize__(self)
18451845

18461846
def convert_objects(self, convert_dates=True, convert_numeric=False,
1847-
copy=True):
1847+
convert_timedeltas=True, copy=True):
18481848
"""
18491849
Attempt to infer better dtype for object columns
18501850
18511851
Parameters
18521852
----------
1853-
convert_dates : if True, attempt to soft convert_dates, if 'coerce',
1853+
convert_dates : if True, attempt to soft convert dates, if 'coerce',
18541854
force conversion (and non-convertibles get NaT)
18551855
convert_numeric : if True attempt to coerce to numbers (including
18561856
strings), non-convertibles get NaN
1857+
convert_timedeltas : if True, attempt to soft convert timedeltas, if 'coerce',
1858+
force conversion (and non-convertibles get NaT)
18571859
copy : Boolean, if True, return copy, default is True
18581860
18591861
Returns
@@ -1863,6 +1865,7 @@ def convert_objects(self, convert_dates=True, convert_numeric=False,
18631865
return self._constructor(
18641866
self._data.convert(convert_dates=convert_dates,
18651867
convert_numeric=convert_numeric,
1868+
convert_timedeltas=convert_timedeltas,
18661869
copy=copy)).__finalize__(self)
18671870

18681871
#----------------------------------------------------------------------
@@ -3174,23 +3177,22 @@ def abs(self):
31743177
-------
31753178
abs: type of caller
31763179
"""
3177-
obj = np.abs(self)
31783180

31793181
# suprimo numpy 1.6 hacking
3182+
# for timedeltas
31803183
if _np_version_under1p7:
3184+
3185+
def _convert_timedeltas(x):
3186+
if x.dtype.kind == 'm':
3187+
return np.abs(x.view('i8')).astype(x.dtype)
3188+
return np.abs(x)
3189+
31813190
if self.ndim == 1:
3182-
if obj.dtype == 'm8[us]':
3183-
obj = obj.astype('m8[ns]')
3191+
return _convert_timedeltas(self)
31843192
elif self.ndim == 2:
3185-
def f(x):
3186-
if x.dtype == 'm8[us]':
3187-
x = x.astype('m8[ns]')
3188-
return x
3189-
3190-
if 'm8[us]' in obj.dtypes.values:
3191-
obj = obj.apply(f)
3193+
return self.apply(_convert_timedeltas)
31923194

3193-
return obj
3195+
return np.abs(self)
31943196

31953197
def pct_change(self, periods=1, fill_method='pad', limit=None, freq=None,
31963198
**kwds):

pandas/core/internals.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -1315,8 +1315,8 @@ def is_bool(self):
13151315
"""
13161316
return lib.is_bool_array(self.values.ravel())
13171317

1318-
def convert(self, convert_dates=True, convert_numeric=True, copy=True,
1319-
by_item=True):
1318+
def convert(self, convert_dates=True, convert_numeric=True, convert_timedeltas=True,
1319+
copy=True, by_item=True):
13201320
""" attempt to coerce any object types to better types
13211321
return a copy of the block (if copy = True)
13221322
by definition we ARE an ObjectBlock!!!!!
@@ -1334,7 +1334,8 @@ def convert(self, convert_dates=True, convert_numeric=True, copy=True,
13341334

13351335
values = com._possibly_convert_objects(
13361336
values.ravel(), convert_dates=convert_dates,
1337-
convert_numeric=convert_numeric
1337+
convert_numeric=convert_numeric,
1338+
convert_timedeltas=convert_timedeltas,
13381339
).reshape(values.shape)
13391340
values = _block_shape(values, ndim=self.ndim)
13401341
items = self.items.take([i])

pandas/lib.pyx

+2-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ from cpython cimport (PyDict_New, PyDict_GetItem, PyDict_SetItem,
1414
Py_INCREF, PyTuple_SET_ITEM,
1515
PyList_Check, PyFloat_Check,
1616
PyString_Check,
17-
PyBytes_Check,
17+
PyBytes_Check,
1818
PyTuple_SetItem,
1919
PyTuple_New,
2020
PyObject_SetAttrString)
@@ -31,7 +31,7 @@ from datetime import datetime as pydatetime
3131
# this is our tseries.pxd
3232
from datetime cimport *
3333

34-
from tslib cimport convert_to_tsobject
34+
from tslib cimport convert_to_tsobject, convert_to_timedelta64
3535
import tslib
3636
from tslib import NaT, Timestamp, repr_timedelta64
3737

pandas/src/datetime.pxd

+1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ cdef extern from "datetime.h":
3737
bint PyDateTime_Check(object o)
3838
bint PyDate_Check(object o)
3939
bint PyTime_Check(object o)
40+
bint PyDelta_Check(object o)
4041
object PyDateTime_FromDateAndTime(int year, int month, int day, int hour,
4142
int minute, int second, int us)
4243

0 commit comments

Comments
 (0)