@@ -826,7 +826,13 @@ def test_yy_format_with_year_first(all_parsers, parse_dates):
826
826
090331,0830,5,6
827
827
"""
828
828
parser = all_parsers
829
- result = parser .read_csv (StringIO (data ), index_col = 0 , parse_dates = parse_dates )
829
+ result = parser .read_csv_check_warnings (
830
+ UserWarning ,
831
+ "Could not infer format" ,
832
+ StringIO (data ),
833
+ index_col = 0 ,
834
+ parse_dates = parse_dates ,
835
+ )
830
836
index = DatetimeIndex (
831
837
[
832
838
datetime (2009 , 1 , 31 , 0 , 10 , 0 ),
@@ -899,7 +905,13 @@ def test_multi_index_parse_dates(all_parsers, index_col):
899
905
columns = ["A" , "B" , "C" ],
900
906
index = index ,
901
907
)
902
- result = parser .read_csv (StringIO (data ), index_col = index_col , parse_dates = True )
908
+ result = parser .read_csv_check_warnings (
909
+ UserWarning ,
910
+ "Could not infer format" ,
911
+ StringIO (data ),
912
+ index_col = index_col ,
913
+ parse_dates = True ,
914
+ )
903
915
tm .assert_frame_equal (result , expected )
904
916
905
917
@@ -1232,19 +1244,55 @@ def test_read_with_parse_dates_invalid_type(all_parsers, parse_dates):
1232
1244
1233
1245
1234
1246
@pytest .mark .parametrize ("cache_dates" , [True , False ])
1235
- @pytest .mark .parametrize ("value" , ["nan" , "0" , " " ])
1247
+ @pytest .mark .parametrize ("value" , ["nan" , "" ])
1236
1248
def test_bad_date_parse (all_parsers , cache_dates , value ):
1237
1249
# if we have an invalid date make sure that we handle this with
1238
1250
# and w/o the cache properly
1239
1251
parser = all_parsers
1240
1252
s = StringIO ((f"{ value } ,\n " ) * 50000 )
1241
1253
1242
- parser .read_csv (
1254
+ if parser .engine == "pyarrow" :
1255
+ # None in input gets converted to 'None', for which
1256
+ # pandas tries to guess the datetime format, triggering
1257
+ # the warning. TODO: parse dates directly in pyarrow, see
1258
+ # https://github.com/pandas-dev/pandas/issues/48017
1259
+ warn = UserWarning
1260
+ else :
1261
+ warn = None
1262
+ parser .read_csv_check_warnings (
1263
+ warn ,
1264
+ "Could not infer format" ,
1265
+ s ,
1266
+ header = None ,
1267
+ names = ["foo" , "bar" ],
1268
+ parse_dates = ["foo" ],
1269
+ cache_dates = cache_dates ,
1270
+ )
1271
+
1272
+
1273
+ @pytest .mark .parametrize ("cache_dates" , [True , False ])
1274
+ @pytest .mark .parametrize ("value" , ["0" ])
1275
+ def test_bad_date_parse_with_warning (all_parsers , cache_dates , value ):
1276
+ # if we have an invalid date make sure that we handle this with
1277
+ # and w/o the cache properly.
1278
+ parser = all_parsers
1279
+ s = StringIO ((f"{ value } ,\n " ) * 50000 )
1280
+
1281
+ if parser .engine == "pyarrow" :
1282
+ # pyarrow reads "0" as 0 (of type int64), and so
1283
+ # pandas doesn't try to guess the datetime format
1284
+ # TODO: parse dates directly in pyarrow, see
1285
+ # https://github.com/pandas-dev/pandas/issues/48017
1286
+ warn = None
1287
+ else :
1288
+ warn = UserWarning
1289
+ parser .read_csv_check_warnings (
1290
+ warn ,
1291
+ "Could not infer format" ,
1243
1292
s ,
1244
1293
header = None ,
1245
1294
names = ["foo" , "bar" ],
1246
1295
parse_dates = ["foo" ],
1247
- infer_datetime_format = False ,
1248
1296
cache_dates = cache_dates ,
1249
1297
)
1250
1298
@@ -1262,6 +1310,19 @@ def test_parse_dates_empty_string(all_parsers):
1262
1310
tm .assert_frame_equal (result , expected )
1263
1311
1264
1312
1313
+ def test_parse_dates_infer_datetime_format_warning (all_parsers ):
1314
+ # GH 49024
1315
+ parser = all_parsers
1316
+ data = "Date,test\n 2012-01-01,1\n ,2"
1317
+ parser .read_csv_check_warnings (
1318
+ UserWarning ,
1319
+ "The argument 'infer_datetime_format' is deprecated" ,
1320
+ StringIO (data ),
1321
+ parse_dates = ["Date" ],
1322
+ infer_datetime_format = True ,
1323
+ )
1324
+
1325
+
1265
1326
@xfail_pyarrow
1266
1327
@pytest .mark .parametrize (
1267
1328
"data,kwargs,expected" ,
@@ -1635,7 +1696,13 @@ def test_parse_timezone(all_parsers):
1635
1696
def test_invalid_parse_delimited_date (all_parsers , date_string ):
1636
1697
parser = all_parsers
1637
1698
expected = DataFrame ({0 : [date_string ]}, dtype = "object" )
1638
- result = parser .read_csv (StringIO (date_string ), header = None , parse_dates = [0 ])
1699
+ result = parser .read_csv_check_warnings (
1700
+ UserWarning ,
1701
+ "Could not infer format" ,
1702
+ StringIO (date_string ),
1703
+ header = None ,
1704
+ parse_dates = [0 ],
1705
+ )
1639
1706
tm .assert_frame_equal (result , expected )
1640
1707
1641
1708
@@ -1786,7 +1853,13 @@ def test_date_parser_and_names(all_parsers):
1786
1853
# GH#33699
1787
1854
parser = all_parsers
1788
1855
data = StringIO ("""x,y\n 1,2""" )
1789
- result = parser .read_csv (data , parse_dates = ["B" ], names = ["B" ])
1856
+ result = parser .read_csv_check_warnings (
1857
+ UserWarning ,
1858
+ "Could not infer format" ,
1859
+ data ,
1860
+ parse_dates = ["B" ],
1861
+ names = ["B" ],
1862
+ )
1790
1863
expected = DataFrame ({"B" : ["y" , "2" ]}, index = ["x" , "1" ])
1791
1864
tm .assert_frame_equal (result , expected )
1792
1865
@@ -1833,7 +1906,9 @@ def test_date_parser_usecols_thousands(all_parsers):
1833
1906
"""
1834
1907
1835
1908
parser = all_parsers
1836
- result = parser .read_csv (
1909
+ result = parser .read_csv_check_warnings (
1910
+ UserWarning ,
1911
+ "Could not infer format" ,
1837
1912
StringIO (data ),
1838
1913
parse_dates = [1 ],
1839
1914
usecols = [1 , 2 ],
@@ -1947,7 +2022,12 @@ def test_infer_first_column_as_index(all_parsers):
1947
2022
# GH#11019
1948
2023
parser = all_parsers
1949
2024
data = "a,b,c\n 1970-01-01,2,3,4"
1950
- result = parser .read_csv (StringIO (data ), parse_dates = ["a" ])
2025
+ result = parser .read_csv_check_warnings (
2026
+ UserWarning ,
2027
+ "Could not infer format" ,
2028
+ StringIO (data ),
2029
+ parse_dates = ["a" ],
2030
+ )
1951
2031
expected = DataFrame ({"a" : "2" , "b" : 3 , "c" : 4 }, index = ["1970-01-01" ])
1952
2032
tm .assert_frame_equal (result , expected )
1953
2033
0 commit comments