Skip to content

Smarter formatting of timedelta and datetime columns #5701

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 15, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ Improvements to existing features
- support ``dtypes`` on ``Panel``
- extend ``Panel.apply`` to allow arbitrary functions (rather than only ufuncs) (:issue:`1148`)
allow multiple axes to be used to operate on slabs of a ``Panel``
- The ``ArrayFormatter``s for ``datetime`` and ``timedelta64`` now intelligently
limit precision based on the values in the array (:issue:`3401`)

.. _release.bug_fixes-0.13.1:

Expand All @@ -99,6 +101,8 @@ Bug Fixes
- Bug in creating an empty DataFrame, copying, then assigning (:issue:`5932`)
- Bug in DataFrame.tail with empty frame (:issue:`5846`)
- Bug in propogating metadata on ``resample`` (:issue:`5862`)
- Fixed string-representation of ``NaT`` to be "NaT" (:issue:`5708`)
- Fixed string-representation for Timestamp to show nanoseconds if present (:issue:`5912`)

pandas 0.13.0
-------------
Expand Down
21 changes: 21 additions & 0 deletions doc/source/v0.13.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,27 @@ Enhancements
result
result.loc[:,:,'ItemA']

- The ``ArrayFormatter``s for ``datetime`` and ``timedelta64`` now intelligently
limit precision based on the values in the array (:issue:`3401`)

Previously output might look like:

.. code-block:: python

age today diff
0 2001-01-01 00:00:00 2013-04-19 00:00:00 4491 days, 00:00:00
1 2004-06-01 00:00:00 2013-04-19 00:00:00 3244 days, 00:00:00

Now the output looks like:

.. ipython:: python

df = DataFrame([ Timestamp('20010101'),
Timestamp('20040601') ], columns=['age'])
df['today'] = Timestamp('20130419')
df['diff'] = df['today']-df['age']
df

Experimental
~~~~~~~~~~~~

Expand Down
126 changes: 94 additions & 32 deletions pandas/core/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,13 @@
from pandas.core.config import get_option, set_option, reset_option
import pandas.core.common as com
import pandas.lib as lib
from pandas.tslib import iNaT

import numpy as np

import itertools
import csv
from datetime import time

from pandas.tseries.period import PeriodIndex, DatetimeIndex

Expand Down Expand Up @@ -1609,7 +1611,7 @@ def format_array(values, formatter, float_format=None, na_rep='NaN',
if digits is None:
digits = get_option("display.precision")

fmt_obj = fmt_klass(values, digits, na_rep=na_rep,
fmt_obj = fmt_klass(values, digits=digits, na_rep=na_rep,
float_format=float_format,
formatter=formatter, space=space,
justify=justify)
Expand Down Expand Up @@ -1704,7 +1706,7 @@ def _val(x, threshold):
fmt_values = [_val(x, threshold) for x in self.values]
return _trim_zeros(fmt_values, self.na_rep)

def get_result(self):
def _format_strings(self):
if self.formatter is not None:
fmt_values = [self.formatter(x) for x in self.values]
else:
Expand Down Expand Up @@ -1732,64 +1734,124 @@ def get_result(self):
fmt_str = '%% .%de' % (self.digits - 1)
fmt_values = self._format_with(fmt_str)

return _make_fixed_width(fmt_values, self.justify)
return fmt_values


class IntArrayFormatter(GenericArrayFormatter):

def get_result(self):
if self.formatter:
formatter = self.formatter
else:
formatter = lambda x: '% d' % x
def _format_strings(self):
formatter = self.formatter or (lambda x: '% d' % x)

fmt_values = [formatter(x) for x in self.values]

return _make_fixed_width(fmt_values, self.justify)
return fmt_values


class Datetime64Formatter(GenericArrayFormatter):
def __init__(self, values, nat_rep='NaT', date_format=None, **kwargs):
super(Datetime64Formatter, self).__init__(values, **kwargs)
self.nat_rep = nat_rep
self.date_format = date_format

def get_result(self):
if self.formatter:
formatter = self.formatter
else:
formatter = _format_datetime64
def _format_strings(self):
formatter = self.formatter or _get_format_datetime64_from_values(
self.values,
nat_rep=self.nat_rep,
date_format=self.date_format)

fmt_values = [formatter(x) for x in self.values]
return _make_fixed_width(fmt_values, self.justify)

return fmt_values

def _format_datetime64(x, tz=None):
if isnull(x):
return 'NaT'

stamp = lib.Timestamp(x, tz=tz)
return stamp._repr_base
def _format_datetime64(x, tz=None, nat_rep='NaT'):
if x is None or lib.checknull(x):
return nat_rep

if tz is not None or not isinstance(x, lib.Timestamp):
x = lib.Timestamp(x, tz=tz)

class Timedelta64Formatter(Datetime64Formatter):
return str(x)

def get_result(self):
if self.formatter:
formatter = self.formatter
else:

formatter = _format_timedelta64
def _format_datetime64_dateonly(x, nat_rep='NaT', date_format=None):
if x is None or lib.checknull(x):
return nat_rep

if not isinstance(x, lib.Timestamp):
x = lib.Timestamp(x)

if date_format:
return x.strftime(date_format)
else:
return x._date_repr


def _is_dates_only(values):
for d in values:
if isinstance(d, np.datetime64):
d = lib.Timestamp(d)

if d is not None and not lib.checknull(d) and d._has_time_component():
return False
return True


def _get_format_datetime64(is_dates_only, nat_rep='NaT', date_format=None):

if is_dates_only:
return lambda x, tz=None: _format_datetime64_dateonly(x,
nat_rep=nat_rep,
date_format=date_format)
else:
return lambda x, tz=None: _format_datetime64(x, tz=tz, nat_rep=nat_rep)


def _get_format_datetime64_from_values(values,
nat_rep='NaT',
date_format=None):
is_dates_only = _is_dates_only(values)
return _get_format_datetime64(is_dates_only=is_dates_only,
nat_rep=nat_rep,
date_format=date_format)


class Timedelta64Formatter(GenericArrayFormatter):

def _format_strings(self):
formatter = self.formatter or _get_format_timedelta64(self.values)

fmt_values = [formatter(x) for x in self.values]
return _make_fixed_width(fmt_values, self.justify)

return fmt_values


def _get_format_timedelta64(values):
values_int = values.astype(np.int64)

def _format_timedelta64(x):
if isnull(x):
return 'NaT'
consider_values = values_int != iNaT

return lib.repr_timedelta64(x)
one_day_in_nanos = (86400 * 1e9)
even_days = np.logical_and(consider_values, values_int % one_day_in_nanos != 0).sum() == 0
all_sub_day = np.logical_and(consider_values, np.abs(values_int) >= one_day_in_nanos).sum() == 0

format_short = even_days or all_sub_day
format = "short" if format_short else "long"

def impl(x):
if x is None or lib.checknull(x):
return 'NaT'
elif format_short and x == 0:
return "0 days" if even_days else "00:00:00"
else:
return lib.repr_timedelta64(x, format=format)

return impl


def _make_fixed_width(strings, justify='right', minimum=None, truncated=False):
if len(strings) == 0:

if len(strings) == 0 or justify == 'all':
return strings

_strlen = _strlen_func()
Expand Down
Loading