Skip to content

Commit 30a3d82

Browse files
committed
Merge branch 'fix-float-into-datetime' of https://github.com/danbirken/pandas into danbirken-fix-float-into-datetime
2 parents c133763 + f738712 commit 30a3d82

File tree

3 files changed

+165
-14
lines changed

3 files changed

+165
-14
lines changed

doc/source/release.rst

+1
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,7 @@ Bug Fixes
433433
- Fix an issue in TextFileReader w/ Python engine (i.e. PythonParser)
434434
with thousands != "," (:issue:`4596`)
435435
- Bug in getitem with a duplicate index when using where (:issue:`4879`)
436+
- Fix Type inference code coerces float column into datetime (:issue:`4601`)
436437

437438

438439
pandas 0.12.0

pandas/tseries/tests/test_tslib.py

+123
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
import unittest
2+
3+
import numpy as np
4+
5+
from pandas import tslib
6+
from datetime import datetime
7+
8+
class TestDatetimeParsingWrappers(unittest.TestCase):
9+
def test_verify_datetime_bounds(self):
10+
for year in (1, 1000, 1677, 2262, 5000):
11+
dt = datetime(year, 1, 1)
12+
self.assertRaises(
13+
ValueError,
14+
tslib.verify_datetime_bounds,
15+
dt
16+
)
17+
18+
for year in (1678, 2000, 2261):
19+
tslib.verify_datetime_bounds(datetime(year, 1, 1))
20+
21+
def test_does_not_convert_mixed_integer(self):
22+
bad_date_strings = (
23+
'-50000',
24+
'999',
25+
'123.1234',
26+
'm',
27+
'T'
28+
)
29+
30+
for bad_date_string in bad_date_strings:
31+
self.assertFalse(
32+
tslib._does_string_look_like_datetime(bad_date_string)
33+
)
34+
35+
good_date_strings = (
36+
'2012-01-01',
37+
'01/01/2012',
38+
'Mon Sep 16, 2013',
39+
'01012012',
40+
'0101',
41+
'1-1',
42+
)
43+
44+
for good_date_string in good_date_strings:
45+
self.assertTrue(
46+
tslib._does_string_look_like_datetime(good_date_string)
47+
)
48+
49+
class TestArrayToDatetime(unittest.TestCase):
50+
def test_parsing_valid_dates(self):
51+
arr = np.array(['01-01-2013', '01-02-2013'], dtype=object)
52+
self.assert_(
53+
np.array_equal(
54+
tslib.array_to_datetime(arr),
55+
np.array(
56+
[
57+
'2013-01-01T00:00:00.000000000-0000',
58+
'2013-01-02T00:00:00.000000000-0000'
59+
],
60+
dtype='M8[ns]'
61+
)
62+
)
63+
)
64+
65+
arr = np.array(['Mon Sep 16 2013', 'Tue Sep 17 2013'], dtype=object)
66+
self.assert_(
67+
np.array_equal(
68+
tslib.array_to_datetime(arr),
69+
np.array(
70+
[
71+
'2013-09-16T00:00:00.000000000-0000',
72+
'2013-09-17T00:00:00.000000000-0000'
73+
],
74+
dtype='M8[ns]'
75+
)
76+
)
77+
)
78+
79+
def test_number_looking_strings_not_into_datetime(self):
80+
# #4601
81+
# These strings don't look like datetimes so they shouldn't be
82+
# attempted to be converted
83+
arr = np.array(['-352.737091', '183.575577'], dtype=object)
84+
self.assert_(np.array_equal(tslib.array_to_datetime(arr), arr))
85+
86+
arr = np.array(['1', '2', '3', '4', '5'], dtype=object)
87+
self.assert_(np.array_equal(tslib.array_to_datetime(arr), arr))
88+
89+
def test_dates_outside_of_datetime64_ns_bounds(self):
90+
# These datetimes are outside of the bounds of the
91+
# datetime64[ns] bounds, so they cannot be converted to
92+
# datetimes
93+
arr = np.array(['1/1/1676', '1/2/1676'], dtype=object)
94+
self.assert_(np.array_equal(tslib.array_to_datetime(arr), arr))
95+
96+
arr = np.array(['1/1/2263', '1/2/2263'], dtype=object)
97+
self.assert_(np.array_equal(tslib.array_to_datetime(arr), arr))
98+
99+
def test_coerce_of_invalid_datetimes(self):
100+
arr = np.array(['01-01-2013', 'not_a_date', '1'], dtype=object)
101+
102+
# Without coercing, the presence of any invalid dates prevents
103+
# any values from being converted
104+
self.assert_(np.array_equal(tslib.array_to_datetime(arr), arr))
105+
106+
# With coercing, the invalid dates becomes iNaT
107+
self.assert_(
108+
np.array_equal(
109+
tslib.array_to_datetime(arr, coerce=True),
110+
np.array(
111+
[
112+
'2013-01-01T00:00:00.000000000-0000',
113+
tslib.iNaT,
114+
tslib.iNaT
115+
],
116+
dtype='M8[ns]'
117+
)
118+
)
119+
)
120+
121+
if __name__ == '__main__':
122+
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
123+
exit=False)

pandas/tslib.pyx

+41-14
Original file line numberDiff line numberDiff line change
@@ -317,7 +317,6 @@ class Timestamp(_Timestamp):
317317

318318

319319
_nat_strings = set(['NaT','nat','NAT','nan','NaN','NAN'])
320-
_not_datelike_strings = set(['a','A','m','M','p','P','t','T'])
321320
class NaTType(_NaT):
322321
"""(N)ot-(A)-(T)ime, the time equivalent of NaN"""
323322

@@ -841,6 +840,43 @@ def datetime_to_datetime64(ndarray[object] values):
841840

842841
return result, inferred_tz
843842

843+
_not_datelike_strings = set(['a','A','m','M','p','P','t','T'])
844+
845+
def verify_datetime_bounds(dt):
846+
"""Verify datetime.datetime is within the datetime64[ns] bounds."""
847+
if dt.year <= 1677 or dt.year >= 2262:
848+
raise ValueError(
849+
'Given datetime not within valid datetime64[ns] bounds'
850+
)
851+
return dt
852+
853+
def _does_string_look_like_datetime(date_string):
854+
if date_string.startswith('0'):
855+
# Strings starting with 0 are more consistent with a
856+
# date-like string than a number
857+
return True
858+
859+
try:
860+
if float(date_string) < 1000:
861+
return False
862+
except ValueError:
863+
pass
864+
865+
if date_string in _not_datelike_strings:
866+
return False
867+
868+
return True
869+
870+
def parse_datetime_string(date_string, verify_bounds=True, **kwargs):
871+
if not _does_string_look_like_datetime(date_string):
872+
raise ValueError('Given date string not likely a datetime.')
873+
874+
dt = parse_date(date_string, **kwargs)
875+
876+
if verify_bounds:
877+
verify_datetime_bounds(dt)
878+
879+
return dt
844880

845881
def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False,
846882
format=None, utc=None, coerce=False, unit=None):
@@ -908,24 +944,15 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False,
908944
&dts)
909945
_check_dts_bounds(iresult[i], &dts)
910946
except ValueError:
911-
912-
# for some reason, dateutil parses some single letter len-1 strings into today's date
913-
if len(val) == 1 and val in _not_datelike_strings:
914-
if coerce:
915-
iresult[i] = iNaT
916-
continue
917-
elif raise_:
918-
raise
919947
try:
920-
result[i] = parse_date(val, dayfirst=dayfirst)
948+
result[i] = parse_datetime_string(
949+
val, dayfirst=dayfirst
950+
)
921951
except Exception:
922952
if coerce:
923953
iresult[i] = iNaT
924954
continue
925955
raise TypeError
926-
pandas_datetime_to_datetimestruct(iresult[i], PANDAS_FR_ns,
927-
&dts)
928-
_check_dts_bounds(iresult[i], &dts)
929956
except:
930957
if coerce:
931958
iresult[i] = iNaT
@@ -946,7 +973,7 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False,
946973
oresult[i] = 'NaT'
947974
continue
948975
try:
949-
oresult[i] = parse_date(val, dayfirst=dayfirst)
976+
oresult[i] = parse_datetime_string(val, dayfirst=dayfirst)
950977
except Exception:
951978
if raise_:
952979
raise

0 commit comments

Comments
 (0)