Skip to content

Commit 273db34

Browse files
Ajay SaxenaAjay Saxena
Ajay Saxena
authored and
Ajay Saxena
committed
initial iteration for pd cut enhance to support time date type
1 parent 75b606a commit 273db34

File tree

1 file changed

+27
-9
lines changed

1 file changed

+27
-9
lines changed

pandas/tools/tile.py

+27-9
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
import pandas.core.algorithms as algos
1212
import pandas.core.nanops as nanops
1313
from pandas.compat import zip
14-
14+
from pandas.tseries.timedeltas import to_timedelta
15+
from pandas.types.common import (needs_i8_conversion)
1516
import numpy as np
1617

1718

@@ -81,6 +82,13 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
8182
array([1, 1, 1, 1, 1], dtype=int64)
8283
"""
8384
# NOTE: this binning code is changed a bit from histogram for var(x) == 0
85+
# for handling the cut for datetime and timedelta objects
86+
if needs_i8_conversion(x):
87+
x = x.values.view('i8')
88+
time_data = True
89+
else:
90+
time_data = False
91+
8492
if not np.iterable(bins):
8593
if is_scalar(bins) and bins < 1:
8694
raise ValueError("`bins` should be a positive integer.")
@@ -116,7 +124,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
116124

117125
return _bins_to_cuts(x, bins, right=right, labels=labels,
118126
retbins=retbins, precision=precision,
119-
include_lowest=include_lowest)
127+
include_lowest=include_lowest, time_data=time_data)
120128

121129

122130
def qcut(x, q, labels=None, retbins=False, precision=3):
@@ -176,7 +184,8 @@ def qcut(x, q, labels=None, retbins=False, precision=3):
176184

177185

178186
def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False,
179-
precision=3, name=None, include_lowest=False):
187+
precision=3, name=None, include_lowest=False,
188+
time_data=False):
180189
x_is_series = isinstance(x, Series)
181190
series_index = None
182191

@@ -205,7 +214,8 @@ def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False,
205214
while True:
206215
try:
207216
levels = _format_levels(bins, precision, right=right,
208-
include_lowest=include_lowest)
217+
include_lowest=include_lowest,
218+
time_data=time_data)
209219
except ValueError:
210220
increases += 1
211221
precision += 1
@@ -239,7 +249,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False,
239249

240250

241251
def _format_levels(bins, prec, right=True,
242-
include_lowest=False):
252+
include_lowest=False, time_data=False):
243253
fmt = lambda v: _format_label(v, precision=prec)
244254
if right:
245255
levels = []
@@ -249,16 +259,24 @@ def _format_levels(bins, prec, right=True,
249259
if a != b and fa == fb:
250260
raise ValueError('precision too low')
251261

252-
formatted = '(%s, %s]' % (fa, fb)
262+
if time_data:
263+
formatted = '(%s, %s]' % (to_timedelta(float(fa), unit='ns'),
264+
to_timedelta(float(fb), unit='ns'))
265+
else:
266+
formatted = '(%s, %s]' % (fa, fb)
253267

254268
levels.append(formatted)
255269

256270
if include_lowest:
257271
levels[0] = '[' + levels[0][1:]
258272
else:
259-
levels = ['[%s, %s)' % (fmt(a), fmt(b))
260-
for a, b in zip(bins, bins[1:])]
261-
273+
if time_data:
274+
levels = ['[%s, %s)' % (to_timedelta(float(fmt(fa)), unit='ns'),
275+
to_timedelta(float(fmt(b)), unit='ns'))
276+
for a, b in zip(bins, bins[1:])]
277+
else:
278+
levels = ['[%s, %s)' % (fmt(a), fmt(b))
279+
for a, b in zip(bins, bins[1:])]
262280
return levels
263281

264282

0 commit comments

Comments
 (0)