Skip to content

Commit 6cf5d0c

Browse files
mathauseshoyer
authored andcommitted
fix datetime issues (#984)
* fix datetime issues - pretty print when date is out of bounds - decode_cf_datetime when first date is in bound but later dates are not - work around pandas Overflow error (pandas-dev/pandas#14068) * correct indendation * revise fix datetime issues * update whats-new
1 parent 97e69dc commit 6cf5d0c

File tree

5 files changed

+85
-6
lines changed

5 files changed

+85
-6
lines changed

doc/whats-new.rst

+2
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@ Enhancements
2626

2727
Bug fixes
2828
~~~~~~~~~
29+
- Fix issues for dates outside the valid range of pandas timestamps
30+
(:issue:`975`). By `Mathias Hauser <https://github.com/mathause>`_.
2931

3032
.. _whats-new.0.8.2:
3133

xarray/conventions.py

+14-5
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from pandas.tslib import OutOfBoundsDatetime
1010

1111
from .core import indexing, ops, utils
12-
from .core.formatting import format_timestamp, first_n_items
12+
from .core.formatting import format_timestamp, first_n_items, last_item
1313
from .core.variable import as_variable, Variable
1414
from .core.pycompat import iteritems, OrderedDict, PY3, basestring
1515

@@ -142,6 +142,12 @@ def decode_cf_datetime(num_dates, units, calendar=None):
142142
# strings, in which case we fall back to using netCDF4
143143
raise OutOfBoundsDatetime
144144

145+
# fixes: https://github.com/pydata/pandas/issues/14068
146+
# these lines check if the the lowest or the highest value in dates
147+
# cause an OutOfBoundsDatetime (Overflow) error
148+
pd.to_timedelta(flat_num_dates.min(), delta) + ref_date
149+
pd.to_timedelta(flat_num_dates.max(), delta) + ref_date
150+
145151
dates = (pd.to_timedelta(flat_num_dates, delta) + ref_date).values
146152

147153
except (OutOfBoundsDatetime, OverflowError):
@@ -369,10 +375,13 @@ def __init__(self, array, units, calendar=None):
369375
self.array = array
370376
self.units = units
371377
self.calendar = calendar
372-
# Verify at least one date can be decoded successfully.
373-
# Otherwise, tracebacks end up swallowed by Dataset.__repr__ when users
374-
# try to view their lazily decoded array.
375-
example_value = first_n_items(array, 1) or 0
378+
379+
# Verify that at least the first and last date can be decoded
380+
# successfully. Otherwise, tracebacks end up swallowed by
381+
# Dataset.__repr__ when users try to view their lazily decoded array.
382+
example_value = np.concatenate([first_n_items(array, 1),
383+
last_item(array), [0]])
384+
376385
try:
377386
result = decode_cf_datetime(example_value, units, calendar)
378387
except Exception:

xarray/core/formatting.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import numpy as np
1111
import pandas as pd
12+
from pandas.tslib import OutOfBoundsDatetime
1213

1314
from .options import OPTIONS
1415
from .pycompat import PY2, iteritems, unicode_type, bytes_type, dask_array_type
@@ -82,10 +83,23 @@ def first_n_items(x, n_desired):
8283
x = x[indexer]
8384
return np.asarray(x).flat[:n_desired]
8485

86+
def last_item(x):
87+
"""Returns the last item of an array"""
88+
if x.size == 0:
89+
# work around for https://github.com/numpy/numpy/issues/5195
90+
return []
91+
92+
indexer = (slice(-1, None), ) * x.ndim
93+
return np.array(x[indexer], ndmin=1)
8594

8695
def format_timestamp(t):
8796
"""Cast given object to a Timestamp and return a nicely formatted string"""
88-
datetime_str = unicode_type(pd.Timestamp(t))
97+
# Timestamp is only valid for 1678 to 2262
98+
try:
99+
datetime_str = unicode_type(pd.Timestamp(t))
100+
except OutOfBoundsDatetime:
101+
datetime_str = unicode_type(t)
102+
89103
try:
90104
date_str, time_str = datetime_str.split()
91105
except ValueError:

xarray/test/test_conventions.py

+32
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,37 @@ def test_cf_datetime(self):
186186
pd.Index(actual), units, calendar)
187187
self.assertArrayEqual(num_dates, np.around(encoded, 1))
188188

189+
@requires_netCDF4
190+
def test_decode_cf_datetime_overflow(self):
191+
# checks for
192+
# https://github.com/pydata/pandas/issues/14068
193+
# https://github.com/pydata/xarray/issues/975
194+
195+
from datetime import datetime
196+
units = 'days since 2000-01-01 00:00:00'
197+
198+
# date after 2262 and before 1678
199+
days = (-117608, 95795)
200+
expected = (datetime(1677, 12, 31), datetime(2262, 4, 12))
201+
202+
for i, day in enumerate(days):
203+
result = conventions.decode_cf_datetime(day, units)
204+
self.assertEqual(result, expected[i])
205+
206+
@requires_netCDF4
207+
def test_decode_cf_datetime_transition_to_invalid(self):
208+
# manually create dataset with not-decoded date
209+
from datetime import datetime
210+
ds = Dataset(coords={'time' : [0, 266 * 365]})
211+
units = 'days since 2000-01-01 00:00:00'
212+
ds.time.attrs = dict(units=units)
213+
ds_decoded = conventions.decode_cf(ds)
214+
215+
expected = [datetime(2000, 1, 1, 0, 0),
216+
datetime(2265, 10, 28, 0, 0)]
217+
218+
self.assertArrayEqual(ds_decoded.time.values, expected)
219+
189220
def test_decoded_cf_datetime_array(self):
190221
actual = conventions.DecodedCFDatetimeArray(
191222
np.array([0, 1, 2]), 'days since 1900-01-01', 'standard')
@@ -340,6 +371,7 @@ def test_decode_non_standard_calendar_fallback(self):
340371
self.assertEqual(actual.dtype, np.dtype('O'))
341372
self.assertArrayEqual(actual, expected)
342373

374+
@requires_netCDF4
343375
def test_cf_datetime_nan(self):
344376
for num_dates, units, expected_list in [
345377
([np.nan], 'days since 2000-01-01', ['NaT']),

xarray/test/test_formatting.py

+22
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,16 @@ def test_first_n_items(self):
3737
with self.assertRaisesRegexp(ValueError, 'at least one item'):
3838
formatting.first_n_items(array, 0)
3939

40+
def test_last_item(self):
41+
array = np.arange(100)
42+
43+
reshape = ((10, 10), (1, 100), (2, 2, 5, 5))
44+
expected = np.array(99)
45+
46+
for r in reshape:
47+
result = formatting.last_item(array.reshape(r))
48+
self.assertEqual(result, expected)
49+
4050
def test_format_item(self):
4151
cases = [
4252
(pd.Timestamp('2000-01-01T12'), '2000-01-01T12:00:00'),
@@ -106,3 +116,15 @@ def test_pretty_print(self):
106116

107117
def test_maybe_truncate(self):
108118
self.assertEqual(formatting.maybe_truncate(u'ß', 10), u'ß')
119+
120+
def test_format_timestamp_out_of_bounds(self):
121+
from datetime import datetime
122+
date = datetime(1300, 12, 1)
123+
expected = '1300-12-01'
124+
result = formatting.format_timestamp(date)
125+
self.assertEqual(result, expected)
126+
127+
date = datetime(2300, 12, 1)
128+
expected = '2300-12-01'
129+
result = formatting.format_timestamp(date)
130+
self.assertEqual(result, expected)

0 commit comments

Comments
 (0)