From 355e569e5c9edd35d47cb7fe9a1b2c63ec410be7 Mon Sep 17 00:00:00 2001 From: Ajay Saxena Date: Sun, 4 Dec 2016 17:44:21 -0500 Subject: [PATCH 1/3] allowing datetime and timedelta datatype in pd cut bins --- pandas/tools/tile.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tools/tile.py b/pandas/tools/tile.py index f62bac9e951a7..f28ff94cce318 100644 --- a/pandas/tools/tile.py +++ b/pandas/tools/tile.py @@ -116,6 +116,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, else: bins = np.asarray(bins) + bins, bin_dtype = _coerce_to_type(bins) if (np.diff(bins) < 0).any(): raise ValueError('bins must increase monotonically.') From ac919cff0d31d047dd05a63d4ebb15280980f206 Mon Sep 17 00:00:00 2001 From: Ajay Saxena Date: Mon, 5 Dec 2016 12:54:30 -0500 Subject: [PATCH 2/3] added test for datetime bin type --- pandas/tools/tests/test_tile.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/tools/tests/test_tile.py b/pandas/tools/tests/test_tile.py index 33d2a01b1256e..3ea6e97cdef50 100644 --- a/pandas/tools/tests/test_tile.py +++ b/pandas/tools/tests/test_tile.py @@ -313,6 +313,18 @@ def test_datetime_cut(self): result, bins = cut(data, 3, retbins=True) tm.assert_series_equal(Series(result), expected) + def test_datetime_bin(self): + data = [np.datetime64('2012-12-13'), np.datetime64('2012-12-15')] + bins = [np.datetime64('2012-12-12'), np.datetime64('2012-12-14'), + np.datetime64('2012-12-16')] + result = cut(data, bins=bins) + + expected = Series(['(2012-12-12 00:00:00, 2012-12-14 00:00:00]', + '(2012-12-14 00:00:00, 2012-12-16 00:00:00]'], + ).astype("category", ordered=True) + + tm.assert_series_equal(Series(result), expected) + def curpath(): pth, _ = os.path.split(os.path.abspath(__file__)) From 82bffa139f110edc6852b57588074963d3275384 Mon Sep 17 00:00:00 2001 From: Ajay Saxena Date: Thu, 22 Dec 2016 01:35:52 -0500 Subject: [PATCH 3/3] added method for time type bins in pd cut and modified tests --- pandas/tools/tests/test_tile.py | 20 ++++++++++++++------ pandas/tools/tile.py | 16 +++++++++++++++- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/pandas/tools/tests/test_tile.py b/pandas/tools/tests/test_tile.py index 3ea6e97cdef50..c9a96d80f35ba 100644 --- a/pandas/tools/tests/test_tile.py +++ b/pandas/tools/tests/test_tile.py @@ -12,7 +12,7 @@ from pandas.core.algorithms import quantile from pandas.tools.tile import cut, qcut import pandas.tools.tile as tmod -from pandas import to_datetime, DatetimeIndex +from pandas import to_datetime, DatetimeIndex, Timestamp class TestCut(tm.TestCase): @@ -315,14 +315,22 @@ def test_datetime_cut(self): def test_datetime_bin(self): data = [np.datetime64('2012-12-13'), np.datetime64('2012-12-15')] - bins = [np.datetime64('2012-12-12'), np.datetime64('2012-12-14'), - np.datetime64('2012-12-16')] - result = cut(data, bins=bins) - + bin_data = ['2012-12-12', '2012-12-14', '2012-12-16'] expected = Series(['(2012-12-12 00:00:00, 2012-12-14 00:00:00]', - '(2012-12-14 00:00:00, 2012-12-16 00:00:00]'], + '(2012-12-14 00:00:00, 2012-12-16 00:00:00]'], ).astype("category", ordered=True) + for conv in [Timestamp, Timestamp, np.datetime64]: + bins = [conv(v) for v in bin_data] + result = cut(data, bins=bins) + tm.assert_series_equal(Series(result), expected) + + bin_pydatetime = [Timestamp(v).to_pydatetime() for v in bin_data] + result = cut(data, bins=bin_pydatetime) + tm.assert_series_equal(Series(result), expected) + + bins = to_datetime(bin_data) + result = cut(data, bins=bin_pydatetime) tm.assert_series_equal(Series(result), expected) diff --git a/pandas/tools/tile.py b/pandas/tools/tile.py index f28ff94cce318..a372e113f1d7e 100644 --- a/pandas/tools/tile.py +++ b/pandas/tools/tile.py @@ -13,6 +13,7 @@ from pandas.compat import zip from pandas import to_timedelta, to_datetime from pandas.types.common import is_datetime64_dtype, is_timedelta64_dtype +from pandas.lib import infer_dtype import numpy as np @@ -116,7 +117,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, else: bins = np.asarray(bins) - bins, bin_dtype = _coerce_to_type(bins) + bins = _convert_bin_to_numeric_type(bins) if (np.diff(bins) < 0).any(): raise ValueError('bins must increase monotonically.') @@ -328,6 +329,19 @@ def _coerce_to_type(x): return x, dtype +def _convert_bin_to_numeric_type(x): + """ + if the passed bin is of datetime/timedelta type, + this method converts it to integer + """ + dtype = infer_dtype(x) + if dtype == 'timedelta' or dtype == 'timedelta64': + x = to_timedelta(x).view(np.int64) + elif dtype == 'datetime' or dtype == 'datetime64': + x = to_datetime(x).view(np.int64) + return x + + def _preprocess_for_cut(x): """ handles preprocessing for cut where we convert passed