Skip to content

Commit 36d49a9

Browse files
committed
INT: add TimeDeltaBlock support in internals
ENH: GH3371 support timedelta fillna
1 parent 38a87b6 commit 36d49a9

File tree

5 files changed

+138
-13
lines changed

5 files changed

+138
-13
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ pandas 0.13
6767
- A Series of dtype ``timedelta64[ns]`` can now be divided by another
6868
``timedelta64[ns]`` object to yield a ``float64`` dtyped Series. This
6969
is frequency conversion.
70+
- Timedeltas support ``fillna`` with an integer interpreted as seconds,
71+
or a ``timedelta`` (:issue:`3371`)
7072
- Performance improvements with ``__getitem__`` on ``DataFrames`` with
7173
when the key is a column
7274
- Support for using a ``DatetimeIndex/PeriodsIndex`` directly in a datelike calculation

doc/source/timeseries.rst

+9
Original file line numberDiff line numberDiff line change
@@ -1195,6 +1195,15 @@ issues). ``idxmin, idxmax`` are supported as well.
11951195
df.min().idxmax()
11961196
df.min(axis=1).idxmin()
11971197
1198+
You can fillna on timedeltas. Integers will be interpreted as seconds. You can
1199+
pass a timedelta to get a particular value.
1200+
1201+
.. ipython:: python
1202+
1203+
y.fillna(0)
1204+
y.fillna(10)
1205+
y.fillna(timedelta(days=-1,seconds=5))
1206+
11981207
.. _timeseries.timedeltas_convert:
11991208

12001209
Time Deltas & Conversions

doc/source/v0.13.0.txt

+12-4
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@ Enhancements
195195
- NaN handing in get_dummies (:issue:`4446`) with `dummy_na`
196196

197197
.. ipython:: python
198+
198199
# previously, nan was erroneously counted as 2 here
199200
# now it is not counted at all
200201
get_dummies([1, 2, np.nan])
@@ -237,10 +238,17 @@ Enhancements
237238
from pandas import offsets
238239
td + offsets.Minute(5) + offsets.Milli(5)
239240

240-
- ``plot(kind='kde')`` now accepts the optional parameters ``bw_method`` and
241-
``ind``, passed to scipy.stats.gaussian_kde() (for scipy >= 0.11.0) to set
242-
the bandwidth, and to gkde.evaluate() to specify the indicies at which it
243-
is evaluated, respecttively. See scipy docs.
241+
- Fillna is now supported for timedeltas
242+
243+
.. ipython:: python
244+
245+
td.fillna(0)
246+
td.fillna(timedelta(days=1,seconds=5))
247+
248+
- ``plot(kind='kde')`` now accepts the optional parameters ``bw_method`` and
249+
``ind``, passed to scipy.stats.gaussian_kde() (for scipy >= 0.11.0) to set
250+
the bandwidth, and to gkde.evaluate() to specify the indicies at which it
251+
is evaluated, respecttively. See scipy docs.
244252

245253
.. _whatsnew_0130.refactoring:
246254

pandas/core/internals.py

+75-9
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import itertools
22
import re
3-
from datetime import datetime
3+
from datetime import datetime, timedelta
44
import copy
55
from collections import defaultdict
66

@@ -41,6 +41,7 @@ class Block(PandasObject):
4141
is_integer = False
4242
is_complex = False
4343
is_datetime = False
44+
is_timedelta = False
4445
is_bool = False
4546
is_object = False
4647
is_sparse = False
@@ -326,6 +327,8 @@ def _maybe_downcast(self, blocks, downcast=None):
326327
# unless indicated
327328
if downcast is None and self.is_float:
328329
return blocks
330+
elif downcast is None and (self.is_timedelta or self.is_datetime):
331+
return blocks
329332

330333
result_blocks = []
331334
for b in blocks:
@@ -485,6 +488,10 @@ def _try_cast_result(self, result, dtype=None):
485488
# may need to change the dtype here
486489
return _possibly_downcast_to_dtype(result, dtype)
487490

491+
def _try_operate(self, values):
492+
""" return a version to operate on as the input """
493+
return values
494+
488495
def _try_coerce_args(self, values, other):
489496
""" provide coercion to our input arguments """
490497
return values, other
@@ -703,8 +710,11 @@ def interpolate(self, method='pad', axis=0, inplace=False,
703710
else:
704711
return [self.copy()]
705712

713+
fill_value = self._try_fill(fill_value)
706714
values = self.values if inplace else self.values.copy()
715+
values = self._try_operate(values)
707716
values = com.interpolate_2d(values, method, axis, limit, fill_value)
717+
values = self._try_coerce_result(values)
708718

709719
blocks = [ make_block(values, self.items, self.ref_items, ndim=self.ndim, klass=self.__class__, fastpath=True) ]
710720
return self._maybe_downcast(blocks, downcast)
@@ -1008,6 +1018,55 @@ def _try_cast(self, element):
10081018
def should_store(self, value):
10091019
return com.is_integer_dtype(value) and value.dtype == self.dtype
10101020

1021+
class TimeDeltaBlock(IntBlock):
1022+
is_timedelta = True
1023+
_can_hold_na = True
1024+
1025+
def _try_fill(self, value):
1026+
""" if we are a NaT, return the actual fill value """
1027+
if isinstance(value, type(tslib.NaT)) or isnull(value):
1028+
value = tslib.iNaT
1029+
elif isinstance(value, np.timedelta64):
1030+
pass
1031+
elif com.is_integer(value):
1032+
# coerce to seconds of timedelta
1033+
value = np.timedelta64(int(value*1e9))
1034+
elif isinstance(value, timedelta):
1035+
value = np.timedelta64(value)
1036+
1037+
return value
1038+
1039+
def _try_operate(self, values):
1040+
""" return a version to operate on """
1041+
return values.view('i8')
1042+
1043+
def _try_coerce_result(self, result):
1044+
""" reverse of try_coerce_args / try_operate """
1045+
if isinstance(result, np.ndarray):
1046+
result = result.astype('m8[ns]')
1047+
elif isinstance(result, np.integer):
1048+
result = np.timedelta64(result)
1049+
return result
1050+
1051+
def should_store(self, value):
1052+
return issubclass(value.dtype.type, np.timedelta64)
1053+
1054+
def to_native_types(self, slicer=None, na_rep=None, **kwargs):
1055+
""" convert to our native types format, slicing if desired """
1056+
1057+
values = self.values
1058+
if slicer is not None:
1059+
values = values[:, slicer]
1060+
mask = isnull(values)
1061+
1062+
rvalues = np.empty(values.shape, dtype=object)
1063+
if na_rep is None:
1064+
na_rep = 'NaT'
1065+
rvalues[mask] = na_rep
1066+
imask = (-mask).ravel()
1067+
rvalues.flat[imask] = np.array([lib.repr_timedelta64(val)
1068+
for val in values.ravel()[imask]], dtype=object)
1069+
return rvalues.tolist()
10111070

10121071
class BoolBlock(NumericBlock):
10131072
is_bool = True
@@ -1216,6 +1275,10 @@ def _try_cast(self, element):
12161275
except:
12171276
return element
12181277

1278+
def _try_operate(self, values):
1279+
""" return a version to operate on """
1280+
return values.view('i8')
1281+
12191282
def _try_coerce_args(self, values, other):
12201283
""" provide coercion to our input arguments
12211284
we are going to compare vs i8, so coerce to integer
@@ -1242,11 +1305,12 @@ def _try_coerce_result(self, result):
12421305

12431306
def _try_fill(self, value):
12441307
""" if we are a NaT, return the actual fill value """
1245-
if isinstance(value, type(tslib.NaT)):
1308+
if isinstance(value, type(tslib.NaT)) or isnull(value):
12461309
value = tslib.iNaT
12471310
return value
12481311

12491312
def fillna(self, value, inplace=False, downcast=None):
1313+
# straight putmask here
12501314
values = self.values if inplace else self.values.copy()
12511315
mask = com.isnull(self.values)
12521316
value = self._try_fill(value)
@@ -1267,12 +1331,9 @@ def to_native_types(self, slicer=None, na_rep=None, **kwargs):
12671331
na_rep = 'NaT'
12681332
rvalues[mask] = na_rep
12691333
imask = (-mask).ravel()
1270-
if self.dtype == 'datetime64[ns]':
1271-
rvalues.flat[imask] = np.array(
1272-
[Timestamp(val)._repr_base for val in values.ravel()[imask]], dtype=object)
1273-
elif self.dtype == 'timedelta64[ns]':
1274-
rvalues.flat[imask] = np.array([lib.repr_timedelta64(val)
1275-
for val in values.ravel()[imask]], dtype=object)
1334+
rvalues.flat[imask] = np.array(
1335+
[Timestamp(val)._repr_base for val in values.ravel()[imask]], dtype=object)
1336+
12761337
return rvalues.tolist()
12771338

12781339
def should_store(self, value):
@@ -1551,6 +1612,8 @@ def make_block(values, items, ref_items, klass=None, ndim=None, dtype=None, fast
15511612
klass = SparseBlock
15521613
elif issubclass(vtype, np.floating):
15531614
klass = FloatBlock
1615+
elif issubclass(vtype, np.integer) and issubclass(vtype, np.timedelta64):
1616+
klass = TimeDeltaBlock
15541617
elif issubclass(vtype, np.integer) and not issubclass(vtype, np.datetime64):
15551618
klass = IntBlock
15561619
elif dtype == np.bool_:
@@ -3404,12 +3467,13 @@ def _lcd_dtype(l):
34043467
have_float = len(counts[FloatBlock]) > 0
34053468
have_complex = len(counts[ComplexBlock]) > 0
34063469
have_dt64 = len(counts[DatetimeBlock]) > 0
3470+
have_td64 = len(counts[TimeDeltaBlock]) > 0
34073471
have_sparse = len(counts[SparseBlock]) > 0
34083472
have_numeric = have_float or have_complex or have_int
34093473

34103474
if (have_object or
34113475
(have_bool and have_numeric) or
3412-
(have_numeric and have_dt64)):
3476+
(have_numeric and (have_dt64 or have_td64))):
34133477
return np.dtype(object)
34143478
elif have_bool:
34153479
return np.dtype(bool)
@@ -3432,6 +3496,8 @@ def _lcd_dtype(l):
34323496

34333497
elif have_dt64 and not have_float and not have_complex:
34343498
return np.dtype('M8[ns]')
3499+
elif have_td64 and not have_float and not have_complex:
3500+
return np.dtype('m8[ns]')
34353501
elif have_complex:
34363502
return np.dtype('c16')
34373503
else:

pandas/tests/test_series.py

+40
Original file line numberDiff line numberDiff line change
@@ -2405,6 +2405,46 @@ def test_timedelta64_functions(self):
24052405
expected = Series([timedelta(1)], dtype='timedelta64[ns]')
24062406
assert_series_equal(result, expected)
24072407

2408+
def test_timedelta_fillna(self):
2409+
if com._np_version_under1p7:
2410+
raise nose.SkipTest("timedelta broken in np 1.6.1")
2411+
2412+
#GH 3371
2413+
from datetime import timedelta
2414+
2415+
s = Series([Timestamp('20130101'),Timestamp('20130101'),Timestamp('20130102'),Timestamp('20130103 9:01:01')])
2416+
td = s.diff()
2417+
2418+
# reg fillna
2419+
result = td.fillna(0)
2420+
expected = Series([timedelta(0),timedelta(0),timedelta(1),timedelta(days=1,seconds=9*3600+60+1)])
2421+
assert_series_equal(result,expected)
2422+
2423+
# interprested as seconds
2424+
result = td.fillna(1)
2425+
expected = Series([timedelta(seconds=1),timedelta(0),timedelta(1),timedelta(days=1,seconds=9*3600+60+1)])
2426+
assert_series_equal(result,expected)
2427+
2428+
result = td.fillna(timedelta(days=1,seconds=1))
2429+
expected = Series([timedelta(days=1,seconds=1),timedelta(0),timedelta(1),timedelta(days=1,seconds=9*3600+60+1)])
2430+
assert_series_equal(result,expected)
2431+
2432+
result = td.fillna(np.timedelta64(int(1e9)))
2433+
expected = Series([timedelta(seconds=1),timedelta(0),timedelta(1),timedelta(days=1,seconds=9*3600+60+1)])
2434+
assert_series_equal(result,expected)
2435+
2436+
from pandas import tslib
2437+
result = td.fillna(tslib.NaT)
2438+
expected = Series([tslib.NaT,timedelta(0),timedelta(1),timedelta(days=1,seconds=9*3600+60+1)],dtype='m8[ns]')
2439+
assert_series_equal(result,expected)
2440+
2441+
# ffill
2442+
td[2] = np.nan
2443+
result = td.ffill()
2444+
expected = td.fillna(0)
2445+
expected[0] = np.nan
2446+
assert_series_equal(result,expected)
2447+
24082448
def test_sub_of_datetime_from_TimeSeries(self):
24092449
from pandas.core import common as com
24102450
from datetime import datetime

0 commit comments

Comments
 (0)