INT: add TimeDeltaBlock support in internals

jreback · jreback · commit 36d49a954414 · 2013-09-07T17:03:20.000-04:00
ENH: GH3371 support timedelta fillna
diff --git a/doc/source/release.rst b/doc/source/release.rst
@@ -67,6 +67,8 @@ pandas 0.13
   - A Series of dtype ``timedelta64[ns]`` can now be divided by another
     ``timedelta64[ns]`` object to yield a ``float64`` dtyped Series. This
     is frequency conversion.
+  - Timedeltas support ``fillna`` with an integer interpreted as seconds,
+    or a ``timedelta`` (:issue:`3371`)
   - Performance improvements with ``__getitem__`` on ``DataFrames`` with
     when the key is a column
   - Support for using a ``DatetimeIndex/PeriodsIndex`` directly in a datelike calculation
diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst
@@ -1195,6 +1195,15 @@ issues). ``idxmin, idxmax`` are supported as well.
    df.min().idxmax()
    df.min(axis=1).idxmin()
 
+You can fillna on timedeltas. Integers will be interpreted as seconds. You can
+pass a timedelta to get a particular value.
+
+.. ipython:: python
+
+   y.fillna(0)
+   y.fillna(10)
+   y.fillna(timedelta(days=-1,seconds=5))
+
 .. _timeseries.timedeltas_convert:
 
 Time Deltas & Conversions
diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt
@@ -195,6 +195,7 @@ Enhancements
   - NaN handing in get_dummies (:issue:`4446`) with `dummy_na`
 
     .. ipython:: python
+
        # previously, nan was erroneously counted as 2 here
        # now it is not counted at all
        get_dummies([1, 2, np.nan])
@@ -237,10 +238,17 @@ Enhancements
          from pandas import offsets
          td + offsets.Minute(5) + offsets.Milli(5)
 
-    - ``plot(kind='kde')`` now accepts the optional parameters ``bw_method`` and
-      ``ind``, passed to scipy.stats.gaussian_kde() (for scipy >= 0.11.0) to set
-      the bandwidth, and to gkde.evaluate() to specify the indicies at which it
-      is evaluated, respecttively. See scipy docs.
+    - Fillna is now supported for timedeltas
+
+      .. ipython:: python
+
+         td.fillna(0)
+         td.fillna(timedelta(days=1,seconds=5))
+
+  - ``plot(kind='kde')`` now accepts the optional parameters ``bw_method`` and
+    ``ind``, passed to scipy.stats.gaussian_kde() (for scipy >= 0.11.0) to set
+    the bandwidth, and to gkde.evaluate() to specify the indicies at which it
+    is evaluated, respecttively. See scipy docs.
 
 .. _whatsnew_0130.refactoring:
 
diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -1,6 +1,6 @@
 import itertools
 import re
-from datetime import datetime
+from datetime import datetime, timedelta
 import copy
 from collections import defaultdict
 
@@ -41,6 +41,7 @@ class Block(PandasObject):
     is_integer = False
     is_complex = False
     is_datetime = False
+    is_timedelta = False
     is_bool = False
     is_object = False
     is_sparse = False
@@ -326,6 +327,8 @@ def _maybe_downcast(self, blocks, downcast=None):
         # unless indicated
         if downcast is None and self.is_float:
             return blocks
+        elif downcast is None and (self.is_timedelta or self.is_datetime):
+            return blocks
 
         result_blocks = []
         for b in blocks:
@@ -485,6 +488,10 @@ def _try_cast_result(self, result, dtype=None):
         # may need to change the dtype here
         return _possibly_downcast_to_dtype(result, dtype)
 
+    def _try_operate(self, values):
+        """ return a version to operate on as the input """
+        return values
+
     def _try_coerce_args(self, values, other):
         """ provide coercion to our input arguments """
         return values, other
@@ -703,8 +710,11 @@ def interpolate(self, method='pad', axis=0, inplace=False,
                 else:
                     return [self.copy()]
 
+        fill_value = self._try_fill(fill_value)
         values = self.values if inplace else self.values.copy()
+        values = self._try_operate(values)
         values = com.interpolate_2d(values, method, axis, limit, fill_value)
+        values = self._try_coerce_result(values)
 
         blocks = [ make_block(values, self.items, self.ref_items, ndim=self.ndim, klass=self.__class__, fastpath=True) ]
         return self._maybe_downcast(blocks, downcast)
@@ -1008,6 +1018,55 @@ def _try_cast(self, element):
     def should_store(self, value):
         return com.is_integer_dtype(value) and value.dtype == self.dtype
 
+class TimeDeltaBlock(IntBlock):
+    is_timedelta = True
+    _can_hold_na = True
+
+    def _try_fill(self, value):
+        """ if we are a NaT, return the actual fill value """
+        if isinstance(value, type(tslib.NaT)) or isnull(value):
+            value = tslib.iNaT
+        elif isinstance(value, np.timedelta64):
+            pass
+        elif com.is_integer(value):
+            # coerce to seconds of timedelta
+            value = np.timedelta64(int(value*1e9))
+        elif isinstance(value, timedelta):
+            value = np.timedelta64(value)
+
+        return value
+
+    def _try_operate(self, values):
+        """ return a version to operate on """
+        return values.view('i8')
+
+    def _try_coerce_result(self, result):
+        """ reverse of try_coerce_args / try_operate """
+        if isinstance(result, np.ndarray):
+            result = result.astype('m8[ns]')
+        elif isinstance(result, np.integer):
+            result = np.timedelta64(result)
+        return result
+
+    def should_store(self, value):
+        return issubclass(value.dtype.type, np.timedelta64)
+
+    def to_native_types(self, slicer=None, na_rep=None, **kwargs):
+        """ convert to our native types format, slicing if desired """
+
+        values = self.values
+        if slicer is not None:
+            values = values[:, slicer]
+        mask = isnull(values)
+
+        rvalues = np.empty(values.shape, dtype=object)
+        if na_rep is None:
+            na_rep = 'NaT'
+        rvalues[mask] = na_rep
+        imask = (-mask).ravel()
+        rvalues.flat[imask] = np.array([lib.repr_timedelta64(val)
+                                        for val in values.ravel()[imask]], dtype=object)
+        return rvalues.tolist()
 
 class BoolBlock(NumericBlock):
     is_bool = True
@@ -1216,6 +1275,10 @@ def _try_cast(self, element):
         except:
             return element
 
+    def _try_operate(self, values):
+        """ return a version to operate on """
+        return values.view('i8')
+
     def _try_coerce_args(self, values, other):
         """ provide coercion to our input arguments
             we are going to compare vs i8, so coerce to integer
@@ -1242,11 +1305,12 @@ def _try_coerce_result(self, result):
 
     def _try_fill(self, value):
         """ if we are a NaT, return the actual fill value """
-        if isinstance(value, type(tslib.NaT)):
+        if isinstance(value, type(tslib.NaT)) or isnull(value):
             value = tslib.iNaT
         return value
 
     def fillna(self, value, inplace=False, downcast=None):
+        # straight putmask here
         values = self.values if inplace else self.values.copy()
         mask = com.isnull(self.values)
         value = self._try_fill(value)
@@ -1267,12 +1331,9 @@ def to_native_types(self, slicer=None, na_rep=None, **kwargs):
             na_rep = 'NaT'
         rvalues[mask] = na_rep
         imask = (-mask).ravel()
-        if self.dtype == 'datetime64[ns]':
-            rvalues.flat[imask] = np.array(
-                [Timestamp(val)._repr_base for val in values.ravel()[imask]], dtype=object)
-        elif self.dtype == 'timedelta64[ns]':
-            rvalues.flat[imask] = np.array([lib.repr_timedelta64(val)
-                                           for val in values.ravel()[imask]], dtype=object)
+        rvalues.flat[imask] = np.array(
+            [Timestamp(val)._repr_base for val in values.ravel()[imask]], dtype=object)
+
         return rvalues.tolist()
 
     def should_store(self, value):
@@ -1551,6 +1612,8 @@ def make_block(values, items, ref_items, klass=None, ndim=None, dtype=None, fast
             klass = SparseBlock
         elif issubclass(vtype, np.floating):
             klass = FloatBlock
+        elif issubclass(vtype, np.integer) and issubclass(vtype, np.timedelta64):
+            klass = TimeDeltaBlock
         elif issubclass(vtype, np.integer) and not issubclass(vtype, np.datetime64):
             klass = IntBlock
         elif dtype == np.bool_:
@@ -3404,12 +3467,13 @@ def _lcd_dtype(l):
     have_float = len(counts[FloatBlock]) > 0
     have_complex = len(counts[ComplexBlock]) > 0
     have_dt64 = len(counts[DatetimeBlock]) > 0
+    have_td64 = len(counts[TimeDeltaBlock]) > 0
     have_sparse = len(counts[SparseBlock]) > 0
     have_numeric = have_float or have_complex or have_int
 
     if (have_object or
         (have_bool and have_numeric) or
-            (have_numeric and have_dt64)):
+            (have_numeric and (have_dt64 or have_td64))):
         return np.dtype(object)
     elif have_bool:
         return np.dtype(bool)
@@ -3432,6 +3496,8 @@ def _lcd_dtype(l):
 
     elif have_dt64 and not have_float and not have_complex:
         return np.dtype('M8[ns]')
+    elif have_td64 and not have_float and not have_complex:
+        return np.dtype('m8[ns]')
     elif have_complex:
         return np.dtype('c16')
     else:
diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py
@@ -2405,6 +2405,46 @@ def test_timedelta64_functions(self):
         expected = Series([timedelta(1)], dtype='timedelta64[ns]')
         assert_series_equal(result, expected)
 
+    def test_timedelta_fillna(self):
+        if com._np_version_under1p7:
+            raise nose.SkipTest("timedelta broken in np 1.6.1")
+
+        #GH 3371
+        from datetime import timedelta
+
+        s = Series([Timestamp('20130101'),Timestamp('20130101'),Timestamp('20130102'),Timestamp('20130103 9:01:01')])
+        td = s.diff()
+
+        # reg fillna
+        result = td.fillna(0)
+        expected = Series([timedelta(0),timedelta(0),timedelta(1),timedelta(days=1,seconds=9*3600+60+1)])
+        assert_series_equal(result,expected)
+
+        # interprested as seconds
+        result = td.fillna(1)
+        expected = Series([timedelta(seconds=1),timedelta(0),timedelta(1),timedelta(days=1,seconds=9*3600+60+1)])
+        assert_series_equal(result,expected)
+
+        result = td.fillna(timedelta(days=1,seconds=1))
+        expected = Series([timedelta(days=1,seconds=1),timedelta(0),timedelta(1),timedelta(days=1,seconds=9*3600+60+1)])
+        assert_series_equal(result,expected)
+
+        result = td.fillna(np.timedelta64(int(1e9)))
+        expected = Series([timedelta(seconds=1),timedelta(0),timedelta(1),timedelta(days=1,seconds=9*3600+60+1)])
+        assert_series_equal(result,expected)
+
+        from pandas import tslib
+        result = td.fillna(tslib.NaT)
+        expected = Series([tslib.NaT,timedelta(0),timedelta(1),timedelta(days=1,seconds=9*3600+60+1)],dtype='m8[ns]')
+        assert_series_equal(result,expected)
+
+        # ffill
+        td[2] = np.nan
+        result = td.ffill()
+        expected = td.fillna(0)
+        expected[0] = np.nan
+        assert_series_equal(result,expected)
+
     def test_sub_of_datetime_from_TimeSeries(self):
         from pandas.core import common as com
         from datetime import datetime