8
8
from datetime import date , datetime
9
9
from io import StringIO
10
10
11
- from dateutil .parser import parse
11
+ from dateutil .parser import parse as du_parse
12
+ from hypothesis import given , settings , strategies as st
12
13
import numpy as np
13
14
import pytest
14
15
import pytz
15
16
16
17
from pandas ._libs .tslib import Timestamp
17
18
from pandas ._libs .tslibs import parsing
18
- from pandas .compat import lrange
19
+ from pandas ._libs .tslibs .parsing import parse_datetime_string
20
+ from pandas .compat import is_platform_windows , lrange
19
21
from pandas .compat .numpy import np_array_datetime64_compat
20
22
21
23
import pandas as pd
26
28
import pandas .io .date_converters as conv
27
29
import pandas .io .parsers as parsers
28
30
31
+ # constant
32
+ _DEFAULT_DATETIME = datetime (1 , 1 , 1 )
33
+
34
+ # Strategy for hypothesis
35
+ if is_platform_windows ():
36
+ date_strategy = st .datetimes (min_value = datetime (1900 , 1 , 1 ))
37
+ else :
38
+ date_strategy = st .datetimes ()
39
+
29
40
30
41
def test_separator_date_conflict (all_parsers ):
31
42
# Regression test for gh-4678
@@ -439,7 +450,7 @@ def test_parse_dates_custom_euro_format(all_parsers, kwargs):
439
450
"""
440
451
if "dayfirst" in kwargs :
441
452
df = parser .read_csv (StringIO (data ), names = ["time" , "Q" , "NTU" ],
442
- date_parser = lambda d : parse (d , ** kwargs ),
453
+ date_parser = lambda d : du_parse (d , ** kwargs ),
443
454
header = 0 , index_col = 0 , parse_dates = True ,
444
455
na_values = ["NA" ])
445
456
exp_index = Index ([datetime (2010 , 1 , 31 ), datetime (2010 , 2 , 1 ),
@@ -451,7 +462,7 @@ def test_parse_dates_custom_euro_format(all_parsers, kwargs):
451
462
msg = "got an unexpected keyword argument 'day_first'"
452
463
with pytest .raises (TypeError , match = msg ):
453
464
parser .read_csv (StringIO (data ), names = ["time" , "Q" , "NTU" ],
454
- date_parser = lambda d : parse (d , ** kwargs ),
465
+ date_parser = lambda d : du_parse (d , ** kwargs ),
455
466
skiprows = [0 ], index_col = 0 , parse_dates = True ,
456
467
na_values = ["NA" ])
457
468
@@ -849,3 +860,82 @@ def test_parse_timezone(all_parsers):
849
860
850
861
expected = DataFrame (expected_data )
851
862
tm .assert_frame_equal (result , expected )
863
+
864
+
865
+ @pytest .mark .parametrize ("date_string" , [
866
+ "32/32/2019" ,
867
+ "02/30/2019" ,
868
+ "13/13/2019" ,
869
+ "13/2019" ,
870
+ "a3/11/2018" ,
871
+ "10/11/2o17"
872
+ ])
873
+ def test_invalid_parse_delimited_date (all_parsers , date_string ):
874
+ parser = all_parsers
875
+ expected = DataFrame ({0 : [date_string ]}, dtype = "object" )
876
+ result = parser .read_csv (StringIO (date_string ),
877
+ header = None , parse_dates = [0 ])
878
+ tm .assert_frame_equal (result , expected )
879
+
880
+
881
+ @pytest .mark .parametrize ("date_string,dayfirst,expected" , [
882
+ # %d/%m/%Y; month > 12 thus replacement
883
+ ("13/02/2019" , False , datetime (2019 , 2 , 13 )),
884
+ ("13/02/2019" , True , datetime (2019 , 2 , 13 )),
885
+ # %m/%d/%Y; day > 12 thus there will be no replacement
886
+ ("02/13/2019" , False , datetime (2019 , 2 , 13 )),
887
+ ("02/13/2019" , True , datetime (2019 , 2 , 13 )),
888
+ # %d/%m/%Y; dayfirst==True thus replacement
889
+ ("04/02/2019" , True , datetime (2019 , 2 , 4 ))
890
+ ])
891
+ def test_parse_delimited_date_swap (all_parsers , date_string ,
892
+ dayfirst , expected ):
893
+ parser = all_parsers
894
+ expected = DataFrame ({0 : [expected ]}, dtype = "datetime64[ns]" )
895
+ result = parser .read_csv (StringIO (date_string ), header = None ,
896
+ dayfirst = dayfirst , parse_dates = [0 ])
897
+ tm .assert_frame_equal (result , expected )
898
+
899
+
900
+ def _helper_hypothesis_delimited_date (call , date_string , ** kwargs ):
901
+ msg , result = None , None
902
+ try :
903
+ result = call (date_string , ** kwargs )
904
+ except ValueError as er :
905
+ msg = str (er )
906
+ pass
907
+ return msg , result
908
+
909
+
910
+ @given (date_strategy )
911
+ @settings (deadline = None )
912
+ @pytest .mark .parametrize ("delimiter" , list (" -./" ))
913
+ @pytest .mark .parametrize ("dayfirst" , [True , False ])
914
+ @pytest .mark .parametrize ("date_format" , [
915
+ "%d %m %Y" ,
916
+ "%m %d %Y" ,
917
+ "%m %Y" ,
918
+ "%Y %m %d" ,
919
+ "%y %m %d" ,
920
+ "%Y%m%d" ,
921
+ "%y%m%d" ,
922
+ ])
923
+ def test_hypothesis_delimited_date (date_format , dayfirst ,
924
+ delimiter , test_datetime ):
925
+ if date_format == "%m %Y" and delimiter == "." :
926
+ pytest .skip ("parse_datetime_string cannot reliably tell whether \
927
+ e.g. %m.%Y is a float or a date, thus we skip it" )
928
+ result , expected = None , None
929
+ except_in_dateutil , except_out_dateutil = None , None
930
+ date_string = test_datetime .strftime (date_format .replace (' ' , delimiter ))
931
+
932
+ except_out_dateutil , result = _helper_hypothesis_delimited_date (
933
+ parse_datetime_string , date_string ,
934
+ dayfirst = dayfirst )
935
+ except_in_dateutil , expected = _helper_hypothesis_delimited_date (
936
+ du_parse , date_string ,
937
+ default = _DEFAULT_DATETIME ,
938
+ dayfirst = dayfirst , yearfirst = False )
939
+
940
+ assert except_out_dateutil == except_in_dateutil
941
+ assert result == expected
0 commit comments