Skip to content

Commit cfb9c98

Browse files
committed
ENH Datetime64Formatter and Timedelta64Formatter now limit precision.
For Datetime this means that only the date is shown when for all values there is no timezone and time is midnight and for Timedelta this means that the deltas are all whole days (GH3401). str(NaT) fixed to be "NaT" (GH5708).
1 parent 78465c0 commit cfb9c98

File tree

7 files changed

+439
-83
lines changed

7 files changed

+439
-83
lines changed

doc/source/release.rst

+4
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,8 @@ Improvements to existing features
7676
- support ``dtypes`` on ``Panel``
7777
- extend ``Panel.apply`` to allow arbitrary functions (rather than only ufuncs) (:issue:`1148`)
7878
allow multiple axes to be used to operate on slabs of a ``Panel``
79+
- The ``ArrayFormatter``s for ``datetime`` and ``timedelta64`` now intelligently
80+
limit precision based on the values in the array (:issue:`3401`)
7981

8082
.. _release.bug_fixes-0.13.1:
8183

@@ -99,6 +101,8 @@ Bug Fixes
99101
- Bug in creating an empty DataFrame, copying, then assigning (:issue:`5932`)
100102
- Bug in DataFrame.tail with empty frame (:issue:`5846`)
101103
- Bug in propogating metadata on ``resample`` (:issue:`5862`)
104+
- Fixed string-representation of ``NaT`` to be "NaT" (:issue:`5708`)
105+
- Fixed string-representation for Timestamp to show nanoseconds if present (:issue:`5912`)
102106

103107
pandas 0.13.0
104108
-------------

doc/source/v0.13.1.txt

+21
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,27 @@ Enhancements
8383
result
8484
result.loc[:,:,'ItemA']
8585

86+
- The ``ArrayFormatter``s for ``datetime`` and ``timedelta64`` now intelligently
87+
limit precision based on the values in the array (:issue:`3401`)
88+
89+
Previously output might look like:
90+
91+
.. code-block:: python
92+
93+
age today diff
94+
0 2001-01-01 00:00:00 2013-04-19 00:00:00 4491 days, 00:00:00
95+
1 2004-06-01 00:00:00 2013-04-19 00:00:00 3244 days, 00:00:00
96+
97+
Now the output looks like:
98+
99+
.. ipython:: python
100+
101+
df = DataFrame([ Timestamp('20010101'),
102+
Timestamp('20040601') ], columns=['age'])
103+
df['today'] = Timestamp('20130419')
104+
df['diff'] = df['today']-df['age']
105+
df
106+
86107
Experimental
87108
~~~~~~~~~~~~
88109

pandas/core/format.py

+94-32
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,13 @@
1414
from pandas.core.config import get_option, set_option, reset_option
1515
import pandas.core.common as com
1616
import pandas.lib as lib
17+
from pandas.tslib import iNaT
1718

1819
import numpy as np
1920

2021
import itertools
2122
import csv
23+
from datetime import time
2224

2325
from pandas.tseries.period import PeriodIndex, DatetimeIndex
2426

@@ -1609,7 +1611,7 @@ def format_array(values, formatter, float_format=None, na_rep='NaN',
16091611
if digits is None:
16101612
digits = get_option("display.precision")
16111613

1612-
fmt_obj = fmt_klass(values, digits, na_rep=na_rep,
1614+
fmt_obj = fmt_klass(values, digits=digits, na_rep=na_rep,
16131615
float_format=float_format,
16141616
formatter=formatter, space=space,
16151617
justify=justify)
@@ -1704,7 +1706,7 @@ def _val(x, threshold):
17041706
fmt_values = [_val(x, threshold) for x in self.values]
17051707
return _trim_zeros(fmt_values, self.na_rep)
17061708

1707-
def get_result(self):
1709+
def _format_strings(self):
17081710
if self.formatter is not None:
17091711
fmt_values = [self.formatter(x) for x in self.values]
17101712
else:
@@ -1732,64 +1734,124 @@ def get_result(self):
17321734
fmt_str = '%% .%de' % (self.digits - 1)
17331735
fmt_values = self._format_with(fmt_str)
17341736

1735-
return _make_fixed_width(fmt_values, self.justify)
1737+
return fmt_values
17361738

17371739

17381740
class IntArrayFormatter(GenericArrayFormatter):
17391741

1740-
def get_result(self):
1741-
if self.formatter:
1742-
formatter = self.formatter
1743-
else:
1744-
formatter = lambda x: '% d' % x
1742+
def _format_strings(self):
1743+
formatter = self.formatter or (lambda x: '% d' % x)
17451744

17461745
fmt_values = [formatter(x) for x in self.values]
17471746

1748-
return _make_fixed_width(fmt_values, self.justify)
1747+
return fmt_values
17491748

17501749

17511750
class Datetime64Formatter(GenericArrayFormatter):
1751+
def __init__(self, values, nat_rep='NaT', date_format=None, **kwargs):
1752+
super(Datetime64Formatter, self).__init__(values, **kwargs)
1753+
self.nat_rep = nat_rep
1754+
self.date_format = date_format
17521755

1753-
def get_result(self):
1754-
if self.formatter:
1755-
formatter = self.formatter
1756-
else:
1757-
formatter = _format_datetime64
1756+
def _format_strings(self):
1757+
formatter = self.formatter or _get_format_datetime64_from_values(
1758+
self.values,
1759+
nat_rep=self.nat_rep,
1760+
date_format=self.date_format)
17581761

17591762
fmt_values = [formatter(x) for x in self.values]
1760-
return _make_fixed_width(fmt_values, self.justify)
17611763

1764+
return fmt_values
17621765

1763-
def _format_datetime64(x, tz=None):
1764-
if isnull(x):
1765-
return 'NaT'
17661766

1767-
stamp = lib.Timestamp(x, tz=tz)
1768-
return stamp._repr_base
1767+
def _format_datetime64(x, tz=None, nat_rep='NaT'):
1768+
if x is None or lib.checknull(x):
1769+
return nat_rep
17691770

1771+
if tz is not None or not isinstance(x, lib.Timestamp):
1772+
x = lib.Timestamp(x, tz=tz)
17701773

1771-
class Timedelta64Formatter(Datetime64Formatter):
1774+
return str(x)
17721775

1773-
def get_result(self):
1774-
if self.formatter:
1775-
formatter = self.formatter
1776-
else:
17771776

1778-
formatter = _format_timedelta64
1777+
def _format_datetime64_dateonly(x, nat_rep='NaT', date_format=None):
1778+
if x is None or lib.checknull(x):
1779+
return nat_rep
1780+
1781+
if not isinstance(x, lib.Timestamp):
1782+
x = lib.Timestamp(x)
1783+
1784+
if date_format:
1785+
return x.strftime(date_format)
1786+
else:
1787+
return x._date_repr
1788+
1789+
1790+
def _is_dates_only(values):
1791+
for d in values:
1792+
if isinstance(d, np.datetime64):
1793+
d = lib.Timestamp(d)
1794+
1795+
if d is not None and not lib.checknull(d) and d._has_time_component():
1796+
return False
1797+
return True
1798+
1799+
1800+
def _get_format_datetime64(is_dates_only, nat_rep='NaT', date_format=None):
1801+
1802+
if is_dates_only:
1803+
return lambda x, tz=None: _format_datetime64_dateonly(x,
1804+
nat_rep=nat_rep,
1805+
date_format=date_format)
1806+
else:
1807+
return lambda x, tz=None: _format_datetime64(x, tz=tz, nat_rep=nat_rep)
1808+
1809+
1810+
def _get_format_datetime64_from_values(values,
1811+
nat_rep='NaT',
1812+
date_format=None):
1813+
is_dates_only = _is_dates_only(values)
1814+
return _get_format_datetime64(is_dates_only=is_dates_only,
1815+
nat_rep=nat_rep,
1816+
date_format=date_format)
1817+
1818+
1819+
class Timedelta64Formatter(GenericArrayFormatter):
1820+
1821+
def _format_strings(self):
1822+
formatter = self.formatter or _get_format_timedelta64(self.values)
17791823

17801824
fmt_values = [formatter(x) for x in self.values]
1781-
return _make_fixed_width(fmt_values, self.justify)
17821825

1826+
return fmt_values
1827+
1828+
1829+
def _get_format_timedelta64(values):
1830+
values_int = values.astype(np.int64)
17831831

1784-
def _format_timedelta64(x):
1785-
if isnull(x):
1786-
return 'NaT'
1832+
consider_values = values_int != iNaT
17871833

1788-
return lib.repr_timedelta64(x)
1834+
one_day_in_nanos = (86400 * 1e9)
1835+
even_days = np.logical_and(consider_values, values_int % one_day_in_nanos != 0).sum() == 0
1836+
all_sub_day = np.logical_and(consider_values, np.abs(values_int) >= one_day_in_nanos).sum() == 0
1837+
1838+
format_short = even_days or all_sub_day
1839+
format = "short" if format_short else "long"
1840+
1841+
def impl(x):
1842+
if x is None or lib.checknull(x):
1843+
return 'NaT'
1844+
elif format_short and x == 0:
1845+
return "0 days" if even_days else "00:00:00"
1846+
else:
1847+
return lib.repr_timedelta64(x, format=format)
1848+
1849+
return impl
17891850

17901851

17911852
def _make_fixed_width(strings, justify='right', minimum=None, truncated=False):
1792-
if len(strings) == 0:
1853+
1854+
if len(strings) == 0 or justify == 'all':
17931855
return strings
17941856

17951857
_strlen = _strlen_func()

0 commit comments

Comments
 (0)