Skip to content

Commit aa31fd1

Browse files
author
Tom Augspurger
committed
Merge pull request #7093 from TomAugspurger/quantile-datetime
BUG: Let DataFrame.quantile() handle datetime
2 parents c3bebac + 193f238 commit aa31fd1

File tree

3 files changed

+52
-12
lines changed

3 files changed

+52
-12
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,7 @@ Bug Fixes
491491
- Bug in ``unstack`` raises ``ValueError`` when ``MultiIndex`` contains ``PeriodIndex`` (:issue:`4342`)
492492
- Bug in ``boxplot`` and ``hist`` draws unnecessary axes (:issue:`6769`)
493493
- Regression in ``groupby.nth()`` for out-of-bounds indexers (:issue:`6621`)
494+
- Bug in ``quantile`` with datetime values (:issue:`6965`)
494495

495496
pandas 0.13.1
496497
-------------

pandas/core/frame.py

+32-12
Original file line numberDiff line numberDiff line change
@@ -4188,23 +4188,43 @@ def quantile(self, q=0.5, axis=0, numeric_only=True):
41884188
"""
41894189
per = np.asarray(q) * 100
41904190

4191+
if not com.is_list_like(per):
4192+
per = [per]
4193+
q = [q]
4194+
squeeze = True
4195+
else:
4196+
squeeze = False
4197+
41914198
def f(arr, per):
4192-
arr = arr.values
4193-
if arr.dtype != np.float_:
4194-
arr = arr.astype(float)
4195-
arr = arr[notnull(arr)]
4196-
if len(arr) == 0:
4199+
if arr._is_datelike_mixed_type:
4200+
values = _values_from_object(arr).view('i8')
4201+
else:
4202+
values = arr.astype(float)
4203+
values = values[notnull(values)]
4204+
if len(values) == 0:
41974205
return NA
41984206
else:
4199-
return _quantile(arr, per)
4207+
return _quantile(values, per)
42004208

42014209
data = self._get_numeric_data() if numeric_only else self
4202-
if com.is_list_like(per):
4203-
from pandas.tools.merge import concat
4204-
return concat([data.apply(f, axis=axis, args=(x,)) for x in per],
4205-
axis=1, keys=per/100.).T
4206-
else:
4207-
return data.apply(f, axis=axis, args=(per,))
4210+
4211+
# need to know which cols are timestamp going in so that we can
4212+
# map timestamp over them after getting the quantile.
4213+
is_dt_col = data.dtypes.map(com.is_datetime64_dtype)
4214+
is_dt_col = is_dt_col[is_dt_col].index
4215+
4216+
quantiles = [[f(vals, x) for x in per]
4217+
for (_, vals) in data.iteritems()]
4218+
result = DataFrame(quantiles, index=data._info_axis, columns=q).T
4219+
if len(is_dt_col) > 0:
4220+
result[is_dt_col] = result[is_dt_col].applymap(lib.Timestamp)
4221+
if squeeze:
4222+
if result.shape == (1, 1):
4223+
result = result.T.iloc[:, 0] # don't want scalar
4224+
else:
4225+
result = result.T.squeeze()
4226+
result.name = None # For groupby, so it can set an index name
4227+
return result
42084228

42094229
def rank(self, axis=0, numeric_only=None, method='average',
42104230
na_option='keep', ascending=True, pct=False):

pandas/tests/test_frame.py

+19
Original file line numberDiff line numberDiff line change
@@ -10994,6 +10994,25 @@ def test_quantile_multi(self):
1099410994
index=[.1, .9])
1099510995
assert_frame_equal(result, expected)
1099610996

10997+
def test_quantile_datetime(self):
10998+
df = DataFrame({'a': pd.to_datetime(['2010', '2011']), 'b': [0, 5]})
10999+
11000+
# exclude datetime
11001+
result = df.quantile(.5)
11002+
expected = Series([2.5], index=['b'])
11003+
11004+
# datetime
11005+
result = df.quantile(.5, numeric_only=False)
11006+
expected = Series([Timestamp('2010-07-02 12:00:00'), 2.5],
11007+
index=['a', 'b'])
11008+
assert_series_equal(result, expected)
11009+
11010+
# datetime w/ multi
11011+
result = df.quantile([.5], numeric_only=False)
11012+
expected = DataFrame([[Timestamp('2010-07-02 12:00:00'), 2.5]],
11013+
index=[.5], columns=['a', 'b'])
11014+
assert_frame_equal(result, expected)
11015+
1099711016
def test_cumsum(self):
1099811017
self.tsframe.ix[5:10, 0] = nan
1099911018
self.tsframe.ix[10:15, 1] = nan

0 commit comments

Comments
 (0)