Skip to content

Commit 7d0c26e

Browse files
committed
Merge pull request #5701 from cancan101/dt_formatting
Smarter formatting of timedelta and datetime columns
2 parents 78465c0 + cfb9c98 commit 7d0c26e

File tree

7 files changed

+439
-83
lines changed

7 files changed

+439
-83
lines changed

doc/source/release.rst

+4
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ Improvements to existing features
7676
- support ``dtypes`` on ``Panel``
7777
- extend ``Panel.apply`` to allow arbitrary functions (rather than only ufuncs) (:issue:`1148`)
7878
allow multiple axes to be used to operate on slabs of a ``Panel``
79+
- The ``ArrayFormatter``s for ``datetime`` and ``timedelta64`` now intelligently
80+
limit precision based on the values in the array (:issue:`3401`)
7981

8082
.. _release.bug_fixes-0.13.1:
8183

@@ -99,6 +101,8 @@ Bug Fixes
99101
- Bug in creating an empty DataFrame, copying, then assigning (:issue:`5932`)
100102
- Bug in DataFrame.tail with empty frame (:issue:`5846`)
101103
- Bug in propogating metadata on ``resample`` (:issue:`5862`)
104+
- Fixed string-representation of ``NaT`` to be "NaT" (:issue:`5708`)
105+
- Fixed string-representation for Timestamp to show nanoseconds if present (:issue:`5912`)
102106

103107
pandas 0.13.0
104108
-------------

doc/source/v0.13.1.txt

+21
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,27 @@ Enhancements
8383
result
8484
result.loc[:,:,'ItemA']
8585

86+
- The ``ArrayFormatter``s for ``datetime`` and ``timedelta64`` now intelligently
87+
limit precision based on the values in the array (:issue:`3401`)
88+
89+
Previously output might look like:
90+
91+
.. code-block:: python
92+
93+
age today diff
94+
0 2001-01-01 00:00:00 2013-04-19 00:00:00 4491 days, 00:00:00
95+
1 2004-06-01 00:00:00 2013-04-19 00:00:00 3244 days, 00:00:00
96+
97+
Now the output looks like:
98+
99+
.. ipython:: python
100+
101+
df = DataFrame([ Timestamp('20010101'),
102+
Timestamp('20040601') ], columns=['age'])
103+
df['today'] = Timestamp('20130419')
104+
df['diff'] = df['today']-df['age']
105+
df
106+
86107
Experimental
87108
~~~~~~~~~~~~
88109

pandas/core/format.py

+94-32
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,13 @@
1414
from pandas.core.config import get_option, set_option, reset_option
1515
import pandas.core.common as com
1616
import pandas.lib as lib
17+
from pandas.tslib import iNaT
1718

1819
import numpy as np
1920

2021
import itertools
2122
import csv
23+
from datetime import time
2224

2325
from pandas.tseries.period import PeriodIndex, DatetimeIndex
2426

@@ -1609,7 +1611,7 @@ def format_array(values, formatter, float_format=None, na_rep='NaN',
16091611
if digits is None:
16101612
digits = get_option("display.precision")
16111613

1612-
fmt_obj = fmt_klass(values, digits, na_rep=na_rep,
1614+
fmt_obj = fmt_klass(values, digits=digits, na_rep=na_rep,
16131615
float_format=float_format,
16141616
formatter=formatter, space=space,
16151617
justify=justify)
@@ -1704,7 +1706,7 @@ def _val(x, threshold):
17041706
fmt_values = [_val(x, threshold) for x in self.values]
17051707
return _trim_zeros(fmt_values, self.na_rep)
17061708

1707-
def get_result(self):
1709+
def _format_strings(self):
17081710
if self.formatter is not None:
17091711
fmt_values = [self.formatter(x) for x in self.values]
17101712
else:
@@ -1732,64 +1734,124 @@ def get_result(self):
17321734
fmt_str = '%% .%de' % (self.digits - 1)
17331735
fmt_values = self._format_with(fmt_str)
17341736

1735-
return _make_fixed_width(fmt_values, self.justify)
1737+
return fmt_values
17361738

17371739

17381740
class IntArrayFormatter(GenericArrayFormatter):
17391741

1740-
def get_result(self):
1741-
if self.formatter:
1742-
formatter = self.formatter
1743-
else:
1744-
formatter = lambda x: '% d' % x
1742+
def _format_strings(self):
1743+
formatter = self.formatter or (lambda x: '% d' % x)
17451744

17461745
fmt_values = [formatter(x) for x in self.values]
17471746

1748-
return _make_fixed_width(fmt_values, self.justify)
1747+
return fmt_values
17491748

17501749

17511750
class Datetime64Formatter(GenericArrayFormatter):
1751+
def __init__(self, values, nat_rep='NaT', date_format=None, **kwargs):
1752+
super(Datetime64Formatter, self).__init__(values, **kwargs)
1753+
self.nat_rep = nat_rep
1754+
self.date_format = date_format
17521755

1753-
def get_result(self):
1754-
if self.formatter:
1755-
formatter = self.formatter
1756-
else:
1757-
formatter = _format_datetime64
1756+
def _format_strings(self):
1757+
formatter = self.formatter or _get_format_datetime64_from_values(
1758+
self.values,
1759+
nat_rep=self.nat_rep,
1760+
date_format=self.date_format)
17581761

17591762
fmt_values = [formatter(x) for x in self.values]
1760-
return _make_fixed_width(fmt_values, self.justify)
17611763

1764+
return fmt_values
17621765

1763-
def _format_datetime64(x, tz=None):
1764-
if isnull(x):
1765-
return 'NaT'
17661766

1767-
stamp = lib.Timestamp(x, tz=tz)
1768-
return stamp._repr_base
1767+
def _format_datetime64(x, tz=None, nat_rep='NaT'):
1768+
if x is None or lib.checknull(x):
1769+
return nat_rep
17691770

1771+
if tz is not None or not isinstance(x, lib.Timestamp):
1772+
x = lib.Timestamp(x, tz=tz)
17701773

1771-
class Timedelta64Formatter(Datetime64Formatter):
1774+
return str(x)
17721775

1773-
def get_result(self):
1774-
if self.formatter:
1775-
formatter = self.formatter
1776-
else:
17771776

1778-
formatter = _format_timedelta64
1777+
def _format_datetime64_dateonly(x, nat_rep='NaT', date_format=None):
1778+
if x is None or lib.checknull(x):
1779+
return nat_rep
1780+
1781+
if not isinstance(x, lib.Timestamp):
1782+
x = lib.Timestamp(x)
1783+
1784+
if date_format:
1785+
return x.strftime(date_format)
1786+
else:
1787+
return x._date_repr
1788+
1789+
1790+
def _is_dates_only(values):
1791+
for d in values:
1792+
if isinstance(d, np.datetime64):
1793+
d = lib.Timestamp(d)
1794+
1795+
if d is not None and not lib.checknull(d) and d._has_time_component():
1796+
return False
1797+
return True
1798+
1799+
1800+
def _get_format_datetime64(is_dates_only, nat_rep='NaT', date_format=None):
1801+
1802+
if is_dates_only:
1803+
return lambda x, tz=None: _format_datetime64_dateonly(x,
1804+
nat_rep=nat_rep,
1805+
date_format=date_format)
1806+
else:
1807+
return lambda x, tz=None: _format_datetime64(x, tz=tz, nat_rep=nat_rep)
1808+
1809+
1810+
def _get_format_datetime64_from_values(values,
1811+
nat_rep='NaT',
1812+
date_format=None):
1813+
is_dates_only = _is_dates_only(values)
1814+
return _get_format_datetime64(is_dates_only=is_dates_only,
1815+
nat_rep=nat_rep,
1816+
date_format=date_format)
1817+
1818+
1819+
class Timedelta64Formatter(GenericArrayFormatter):
1820+
1821+
def _format_strings(self):
1822+
formatter = self.formatter or _get_format_timedelta64(self.values)
17791823

17801824
fmt_values = [formatter(x) for x in self.values]
1781-
return _make_fixed_width(fmt_values, self.justify)
17821825

1826+
return fmt_values
1827+
1828+
1829+
def _get_format_timedelta64(values):
1830+
values_int = values.astype(np.int64)
17831831

1784-
def _format_timedelta64(x):
1785-
if isnull(x):
1786-
return 'NaT'
1832+
consider_values = values_int != iNaT
17871833

1788-
return lib.repr_timedelta64(x)
1834+
one_day_in_nanos = (86400 * 1e9)
1835+
even_days = np.logical_and(consider_values, values_int % one_day_in_nanos != 0).sum() == 0
1836+
all_sub_day = np.logical_and(consider_values, np.abs(values_int) >= one_day_in_nanos).sum() == 0
1837+
1838+
format_short = even_days or all_sub_day
1839+
format = "short" if format_short else "long"
1840+
1841+
def impl(x):
1842+
if x is None or lib.checknull(x):
1843+
return 'NaT'
1844+
elif format_short and x == 0:
1845+
return "0 days" if even_days else "00:00:00"
1846+
else:
1847+
return lib.repr_timedelta64(x, format=format)
1848+
1849+
return impl
17891850

17901851

17911852
def _make_fixed_width(strings, justify='right', minimum=None, truncated=False):
1792-
if len(strings) == 0:
1853+
1854+
if len(strings) == 0 or justify == 'all':
17931855
return strings
17941856

17951857
_strlen = _strlen_func()

0 commit comments

Comments
 (0)