From 42073a8ae3bbb7156af4862fec3e748c09bcf7c9 Mon Sep 17 00:00:00 2001 From: jreback Date: Thu, 7 Mar 2013 08:08:44 -0500 Subject: [PATCH 1/2] ENH: support min/max on timedelta64[ns] Series GH #2989 DOC: timedelta docs updates for min/max TST: python3 issues --- RELEASE.rst | 3 ++- doc/source/timeseries.rst | 24 ++++++++++++++++++++---- doc/source/v0.11.0.txt | 9 ++++----- pandas/core/nanops.py | 8 +++++++- pandas/tests/test_series.py | 9 +++++++++ 5 files changed, 42 insertions(+), 11 deletions(-) diff --git a/RELEASE.rst b/RELEASE.rst index 78e946006e1fb..1c6a9d4103ab8 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -131,7 +131,7 @@ pandas 0.11.0 - Support null checking on timedelta64, representing (and formatting) with NaT - Support setitem with np.nan value, converts to NaT - Support min/max ops in a Dataframe (abs not working, nor do we error on non-supported ops) - - Support idxmin/idxmax/abs in a Series (but with no NaT) + - Support idxmin/idxmax/abs/max/min in a Series (GH2989_, GH2982_) - Bug on in-place putmasking on an ``integer`` series that needs to be converted to ``float`` (GH2746_) - Bug in argsort of ``datetime64[ns]`` Series with ``NaT`` (GH2967_) @@ -160,6 +160,7 @@ pandas 0.11.0 .. _GH2973: https://github.com/pydata/pandas/issues/2973 .. _GH2967: https://github.com/pydata/pandas/issues/2967 .. _GH2982: https://github.com/pydata/pandas/issues/2982 +.. _GH2989: https://github.com/pydata/pandas/issues/2989 pandas 0.10.1 diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 78dd5cee9c8f9..1c1a0680e1f28 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -966,14 +966,30 @@ Some timedelta numeric like operations are supported. .. ipython:: python - s = Series(date_range('2012-1-1', periods=3, freq='D')) + td - timedelta(minutes=5,seconds=5,microseconds=5) + +``min, max`` and the corresponding ``idxmin, idxmax`` operations are support on frames + +.. ipython:: python + df = DataFrame(dict(A = s - Timestamp('20120101')-timedelta(minutes=5,seconds=5), B = s - Series(date_range('2012-1-2', periods=3, freq='D')))) df - # timedelta arithmetic - td - timedelta(minutes=5,seconds=5,microseconds=5) - # min/max operations df.min() df.min(axis=1) + + df.idxmin() + df.idxmax() + +``min, max`` operations are support on series, these return a single element ``timedelta64[ns]`` Series (this avoids +having to deal with numpy timedelta64 issues). ``idxmin, idxmax`` are supported as well. + +.. ipython:: python + + df.min().max() + df.min(axis=1).min() + + df.min().idxmax() + df.min(axis=1).idxmin() diff --git a/doc/source/v0.11.0.txt b/doc/source/v0.11.0.txt index f4c9d13c0d23e..ea174629c5fc9 100644 --- a/doc/source/v0.11.0.txt +++ b/doc/source/v0.11.0.txt @@ -258,8 +258,6 @@ Bug Fixes df = DataFrame(dict(A = s, B = td)) df s - s.max() - s - datetime(2011,1,1,3,5) - s + timedelta(minutes=5) df['C'] = df['A'] + df['B'] df df.dtypes @@ -274,10 +272,11 @@ Bug Fixes # works on lhs too s.max() - s - datetime(2011,1,1,3,5) - s - timedelta(minutes=5) + s - - Fix pretty-printing of infinite data structures, GH2978 + # some timedelta numeric operations are supported + td - timedelta(minutes=5,seconds=5,microseconds=5) + + - Fix pretty-printing of infinite data structures (closes GH2978_) See the `full release notes diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 93f06aae2b1b7..f841c0dbecd8e 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -146,7 +146,13 @@ def _wrap_results(result,dtype): result = result.view(dtype) elif issubclass(dtype.type, np.timedelta64): if not isinstance(result, np.ndarray): - pass + + # this is a scalar timedelta result! + # we have series convert then take the element (scalar) + # as series will do the right thing in py3 (and deal with numpy 1.6.2 + # bug in that it results dtype of timedelta64[us] + from pandas import Series + result = Series([result],dtype='timedelta64[ns]') else: result = result.view(dtype) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index ee288fda120d3..9ea5e59447475 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -1838,6 +1838,15 @@ def test_timedelta64_functions(self): result = (s1-s2).abs() assert_series_equal(result,expected) + # max/min + result = td.max() + expected = Series([timedelta(2)],dtype='timedelta64[ns]') + assert_series_equal(result,expected) + + result = td.min() + expected = Series([timedelta(1)],dtype='timedelta64[ns]') + assert_series_equal(result,expected) + def test_sub_of_datetime_from_TimeSeries(self): from pandas.core import common as com from datetime import datetime From 1a70843ea4d891bdc887da627ce9da33686e902b Mon Sep 17 00:00:00 2001 From: jreback Date: Thu, 7 Mar 2013 13:08:46 -0500 Subject: [PATCH 2/2] ENH: optimization on possibily_convert_datetime to only try conversion in certain cases --- pandas/core/common.py | 43 +++++++++++++++++++++++++------------------ 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 97ab861d6a3a7..23c178ebb6e4f 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -955,10 +955,10 @@ def _possibly_cast_to_timedelta(value, coerce=True): def _possibly_cast_to_datetime(value, dtype, coerce = False): """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT """ - if isinstance(dtype, basestring): - dtype = np.dtype(dtype) - if dtype is not None: + if isinstance(dtype, basestring): + dtype = np.dtype(dtype) + is_datetime64 = is_datetime64_dtype(dtype) is_timedelta64 = is_timedelta64_dtype(dtype) @@ -984,21 +984,28 @@ def _possibly_cast_to_datetime(value, dtype, coerce = False): except: pass - elif dtype is None: - # we might have a array (or single object) that is datetime like, and no dtype is passed - # don't change the value unless we find a datetime set - v = value - if not is_list_like(v): - v = [ v ] - if len(v): - inferred_type = lib.infer_dtype(v) - if inferred_type == 'datetime': - try: - value = tslib.array_to_datetime(np.array(v)) - except: - pass - elif inferred_type == 'timedelta': - value = _possibly_cast_to_timedelta(value) + else: + + # only do this if we have an array and the dtype of the array is not setup already + # we are not an integer/object, so don't bother with this conversion + if isinstance(value, np.ndarray) and not (issubclass(value.dtype.type, np.integer) or value.dtype == np.object_): + pass + + else: + # we might have a array (or single object) that is datetime like, and no dtype is passed + # don't change the value unless we find a datetime set + v = value + if not is_list_like(v): + v = [ v ] + if len(v): + inferred_type = lib.infer_dtype(v) + if inferred_type == 'datetime': + try: + value = tslib.array_to_datetime(np.array(v)) + except: + pass + elif inferred_type == 'timedelta': + value = _possibly_cast_to_timedelta(value) return value