8
8
datetime ,
9
9
)
10
10
from io import StringIO
11
+ import warnings
11
12
12
13
from dateutil .parser import parse as du_parse
13
14
from hypothesis import (
39
40
from pandas .core .indexes .datetimes import date_range
40
41
41
42
import pandas .io .date_converters as conv
43
+ from pandas .io .parsers import read_csv
42
44
43
45
# constant
44
46
_DEFAULT_DATETIME = datetime (1 , 1 , 1 )
@@ -1556,16 +1558,16 @@ def test_invalid_parse_delimited_date(all_parsers, date_string):
1556
1558
"date_string,dayfirst,expected" ,
1557
1559
[
1558
1560
# %d/%m/%Y; month > 12 thus replacement
1559
- ("13/02/2019" , False , datetime (2019 , 2 , 13 )),
1560
1561
("13/02/2019" , True , datetime (2019 , 2 , 13 )),
1561
1562
# %m/%d/%Y; day > 12 thus there will be no replacement
1562
1563
("02/13/2019" , False , datetime (2019 , 2 , 13 )),
1563
- ("02/13/2019" , True , datetime (2019 , 2 , 13 )),
1564
1564
# %d/%m/%Y; dayfirst==True thus replacement
1565
1565
("04/02/2019" , True , datetime (2019 , 2 , 4 )),
1566
1566
],
1567
1567
)
1568
- def test_parse_delimited_date_swap (all_parsers , date_string , dayfirst , expected ):
1568
+ def test_parse_delimited_date_swap_no_warning (
1569
+ all_parsers , date_string , dayfirst , expected
1570
+ ):
1569
1571
parser = all_parsers
1570
1572
expected = DataFrame ({0 : [expected ]}, dtype = "datetime64[ns]" )
1571
1573
result = parser .read_csv (
@@ -1574,6 +1576,30 @@ def test_parse_delimited_date_swap(all_parsers, date_string, dayfirst, expected)
1574
1576
tm .assert_frame_equal (result , expected )
1575
1577
1576
1578
1579
+ @pytest .mark .parametrize (
1580
+ "date_string,dayfirst,expected" ,
1581
+ [
1582
+ # %d/%m/%Y; month > 12 thus replacement
1583
+ ("13/02/2019" , False , datetime (2019 , 2 , 13 )),
1584
+ # %m/%d/%Y; day > 12 thus there will be no replacement
1585
+ ("02/13/2019" , True , datetime (2019 , 2 , 13 )),
1586
+ ],
1587
+ )
1588
+ def test_parse_delimited_date_swap_with_warning (
1589
+ all_parsers , date_string , dayfirst , expected
1590
+ ):
1591
+ parser = all_parsers
1592
+ expected = DataFrame ({0 : [expected ]}, dtype = "datetime64[ns]" )
1593
+ warning_msg = (
1594
+ "Provide format or specify infer_datetime_format=True for consistent parsing"
1595
+ )
1596
+ with tm .assert_produces_warning (UserWarning , match = warning_msg ):
1597
+ result = parser .read_csv (
1598
+ StringIO (date_string ), header = None , dayfirst = dayfirst , parse_dates = [0 ]
1599
+ )
1600
+ tm .assert_frame_equal (result , expected )
1601
+
1602
+
1577
1603
def _helper_hypothesis_delimited_date (call , date_string , ** kwargs ):
1578
1604
msg , result = None , None
1579
1605
try :
@@ -1602,9 +1628,11 @@ def test_hypothesis_delimited_date(date_format, dayfirst, delimiter, test_dateti
1602
1628
except_in_dateutil , except_out_dateutil = None , None
1603
1629
date_string = test_datetime .strftime (date_format .replace (" " , delimiter ))
1604
1630
1605
- except_out_dateutil , result = _helper_hypothesis_delimited_date (
1606
- parse_datetime_string , date_string , dayfirst = dayfirst
1607
- )
1631
+ with warnings .catch_warnings ():
1632
+ warnings .filterwarnings ("ignore" , category = UserWarning )
1633
+ except_out_dateutil , result = _helper_hypothesis_delimited_date (
1634
+ parse_datetime_string , date_string , dayfirst = dayfirst
1635
+ )
1608
1636
except_in_dateutil , expected = _helper_hypothesis_delimited_date (
1609
1637
du_parse ,
1610
1638
date_string ,
@@ -1674,3 +1702,95 @@ def test_date_parser_usecols_thousands(all_parsers):
1674
1702
)
1675
1703
expected = DataFrame ({"B" : [3 , 4 ], "C" : [Timestamp ("20-09-2001 01:00:00" )] * 2 })
1676
1704
tm .assert_frame_equal (result , expected )
1705
+
1706
+
1707
+ def test_dayfirst_warnings ():
1708
+ # GH 12585
1709
+ warning_msg_day_first = (
1710
+ "Parsing '31/12/2014' in DD/MM/YYYY format. Provide "
1711
+ "format or specify infer_datetime_format=True for consistent parsing."
1712
+ )
1713
+ warning_msg_month_first = (
1714
+ "Parsing '03/30/2011' in MM/DD/YYYY format. Provide "
1715
+ "format or specify infer_datetime_format=True for consistent parsing."
1716
+ )
1717
+
1718
+ # CASE 1: valid input
1719
+ input = "date\n 31/12/2014\n 10/03/2011"
1720
+ expected_consistent = DatetimeIndex (
1721
+ ["2014-12-31" , "2011-03-10" ], dtype = "datetime64[ns]" , freq = None , name = "date"
1722
+ )
1723
+ expected_inconsistent = DatetimeIndex (
1724
+ ["2014-12-31" , "2011-10-03" ], dtype = "datetime64[ns]" , freq = None , name = "date"
1725
+ )
1726
+
1727
+ # A. dayfirst arg correct, no warning
1728
+ res1 = read_csv (
1729
+ StringIO (input ), parse_dates = ["date" ], dayfirst = True , index_col = "date"
1730
+ ).index
1731
+ tm .assert_index_equal (expected_consistent , res1 )
1732
+
1733
+ # B. dayfirst arg incorrect, warning + incorrect output
1734
+ with tm .assert_produces_warning (UserWarning , match = warning_msg_day_first ):
1735
+ res2 = read_csv (
1736
+ StringIO (input ), parse_dates = ["date" ], dayfirst = False , index_col = "date"
1737
+ ).index
1738
+ tm .assert_index_equal (expected_inconsistent , res2 )
1739
+
1740
+ # C. dayfirst default arg, same as B
1741
+ with tm .assert_produces_warning (UserWarning , match = warning_msg_day_first ):
1742
+ res3 = read_csv (
1743
+ StringIO (input ), parse_dates = ["date" ], dayfirst = False , index_col = "date"
1744
+ ).index
1745
+ tm .assert_index_equal (expected_inconsistent , res3 )
1746
+
1747
+ # D. infer_datetime_format=True overrides dayfirst default
1748
+ # no warning + correct result
1749
+ res4 = read_csv (
1750
+ StringIO (input ),
1751
+ parse_dates = ["date" ],
1752
+ infer_datetime_format = True ,
1753
+ index_col = "date" ,
1754
+ ).index
1755
+ tm .assert_index_equal (expected_consistent , res4 )
1756
+
1757
+ # CASE 2: invalid input
1758
+ # cannot consistently process with single format
1759
+ # warnings *always* raised
1760
+
1761
+ # first in DD/MM/YYYY, second in MM/DD/YYYY
1762
+ input = "date\n 31/12/2014\n 03/30/2011"
1763
+ expected = DatetimeIndex (
1764
+ ["2014-12-31" , "2011-03-30" ], dtype = "datetime64[ns]" , freq = None , name = "date"
1765
+ )
1766
+
1767
+ # A. use dayfirst=True
1768
+ with tm .assert_produces_warning (UserWarning , match = warning_msg_month_first ):
1769
+ res5 = read_csv (
1770
+ StringIO (input ), parse_dates = ["date" ], dayfirst = True , index_col = "date"
1771
+ ).index
1772
+ tm .assert_index_equal (expected , res5 )
1773
+
1774
+ # B. use dayfirst=False
1775
+ with tm .assert_produces_warning (UserWarning , match = warning_msg_day_first ):
1776
+ res6 = read_csv (
1777
+ StringIO (input ), parse_dates = ["date" ], dayfirst = False , index_col = "date"
1778
+ ).index
1779
+ tm .assert_index_equal (expected , res6 )
1780
+
1781
+ # C. use dayfirst default arg, same as B
1782
+ with tm .assert_produces_warning (UserWarning , match = warning_msg_day_first ):
1783
+ res7 = read_csv (
1784
+ StringIO (input ), parse_dates = ["date" ], dayfirst = False , index_col = "date"
1785
+ ).index
1786
+ tm .assert_index_equal (expected , res7 )
1787
+
1788
+ # D. use infer_datetime_format=True
1789
+ with tm .assert_produces_warning (UserWarning , match = warning_msg_day_first ):
1790
+ res8 = read_csv (
1791
+ StringIO (input ),
1792
+ parse_dates = ["date" ],
1793
+ infer_datetime_format = True ,
1794
+ index_col = "date" ,
1795
+ ).index
1796
+ tm .assert_index_equal (expected , res8 )
0 commit comments