Skip to content

Commit dadb44b

Browse files
author
MarcoGorelli
committed
🥅 catch warnings
1 parent 632ea9d commit dadb44b

File tree

9 files changed

+309
-217
lines changed

9 files changed

+309
-217
lines changed

pandas/core/tools/datetimes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1002,7 +1002,7 @@ def to_datetime(
10021002
are constant:
10031003
10041004
>>> from datetime import datetime
1005-
>>> pd.to_datetime(["2020-01-01 01:00 -01:00", datetime(2020, 1, 1, 3, 0)])
1005+
>>> pd.to_datetime(["2020-01-01 01:00:00-01:00", datetime(2020, 1, 1, 3, 0)])
10061006
DatetimeIndex(['2020-01-01 01:00:00-01:00', '2020-01-01 02:00:00-01:00'],
10071007
dtype='datetime64[ns, pytz.FixedOffset(-60)]', freq=None)
10081008

pandas/tests/frame/methods/test_to_csv.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -514,7 +514,10 @@ def test_to_csv_multiindex(self, float_frame, datetime_frame):
514514
tsframe.index = MultiIndex.from_arrays(new_index)
515515

516516
tsframe.to_csv(path, index_label=["time", "foo"])
517-
recons = self.read_csv(path, index_col=[0, 1])
517+
with tm.assert_produces_warning(
518+
UserWarning, match="Could not infer format"
519+
):
520+
recons = self.read_csv(path, index_col=[0, 1], parse_dates=True)
518521

519522
# TODO to_csv drops column name
520523
tm.assert_frame_equal(tsframe, recons, check_names=False)

pandas/tests/groupby/test_function.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -717,7 +717,8 @@ def test_max_nan_bug():
717717
-05-06,2013-05-06 00:00:00,,log.log
718718
-05-07,2013-05-07 00:00:00,OE,xlsx"""
719719

720-
df = pd.read_csv(StringIO(raw), parse_dates=[0])
720+
with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
721+
df = pd.read_csv(StringIO(raw), parse_dates=[0])
721722
gb = df.groupby("Date")
722723
r = gb[["File"]].max()
723724
e = gb["File"].max().to_frame()

pandas/tests/groupby/transform/test_transform.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1070,7 +1070,8 @@ def demean_rename(x):
10701070
@pytest.mark.parametrize("func", [min, max, np.min, np.max, "first", "last"])
10711071
def test_groupby_transform_timezone_column(func):
10721072
# GH 24198
1073-
ts = pd.to_datetime("now", utc=True).tz_convert("Asia/Singapore")
1073+
with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
1074+
ts = pd.to_datetime("now", utc=True).tz_convert("Asia/Singapore")
10741075
result = DataFrame({"end_time": [ts], "id": [1]})
10751076
result["max_end_time"] = result.groupby("id").end_time.transform(func)
10761077
expected = DataFrame([[ts, 1, ts]], columns=["end_time", "id", "max_end_time"])

pandas/tests/io/excel/test_readers.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -888,11 +888,18 @@ def test_reader_seconds(self, request, engine, read_ext):
888888
]
889889
}
890890
)
891-
892-
actual = pd.read_excel("times_1900" + read_ext, sheet_name="Sheet1")
891+
if engine == "odf":
892+
# odf recognises cell type as time (from its attribute)
893+
# so tries to parse it.
894+
warning = UserWarning
895+
else:
896+
warning = None
897+
with tm.assert_produces_warning(warning, match="Could not infer format"):
898+
actual = pd.read_excel("times_1900" + read_ext, sheet_name="Sheet1")
893899
tm.assert_frame_equal(actual, expected)
894900

895-
actual = pd.read_excel("times_1904" + read_ext, sheet_name="Sheet1")
901+
with tm.assert_produces_warning(warning, match="Could not infer format"):
902+
actual = pd.read_excel("times_1904" + read_ext, sheet_name="Sheet1")
896903
tm.assert_frame_equal(actual, expected)
897904

898905
def test_read_excel_multiindex(self, request, read_ext):

pandas/tests/io/parser/test_parse_dates.py

+89-9
Original file line numberDiff line numberDiff line change
@@ -826,7 +826,13 @@ def test_yy_format_with_year_first(all_parsers, parse_dates):
826826
090331,0830,5,6
827827
"""
828828
parser = all_parsers
829-
result = parser.read_csv(StringIO(data), index_col=0, parse_dates=parse_dates)
829+
result = parser.read_csv_check_warnings(
830+
UserWarning,
831+
"Could not infer format",
832+
StringIO(data),
833+
index_col=0,
834+
parse_dates=parse_dates,
835+
)
830836
index = DatetimeIndex(
831837
[
832838
datetime(2009, 1, 31, 0, 10, 0),
@@ -899,7 +905,13 @@ def test_multi_index_parse_dates(all_parsers, index_col):
899905
columns=["A", "B", "C"],
900906
index=index,
901907
)
902-
result = parser.read_csv(StringIO(data), index_col=index_col, parse_dates=True)
908+
result = parser.read_csv_check_warnings(
909+
UserWarning,
910+
"Could not infer format",
911+
StringIO(data),
912+
index_col=index_col,
913+
parse_dates=True,
914+
)
903915
tm.assert_frame_equal(result, expected)
904916

905917

@@ -1232,19 +1244,55 @@ def test_read_with_parse_dates_invalid_type(all_parsers, parse_dates):
12321244

12331245

12341246
@pytest.mark.parametrize("cache_dates", [True, False])
1235-
@pytest.mark.parametrize("value", ["nan", "0", ""])
1247+
@pytest.mark.parametrize("value", ["nan", ""])
12361248
def test_bad_date_parse(all_parsers, cache_dates, value):
12371249
# if we have an invalid date make sure that we handle this with
12381250
# and w/o the cache properly
12391251
parser = all_parsers
12401252
s = StringIO((f"{value},\n") * 50000)
12411253

1242-
parser.read_csv(
1254+
if parser.engine == "pyarrow":
1255+
# None in input gets converted to 'None', for which
1256+
# pandas tries to guess the datetime format, triggering
1257+
# the warning. TODO: parse dates directly in pyarrow, see
1258+
# https://github.com/pandas-dev/pandas/issues/48017
1259+
warn = UserWarning
1260+
else:
1261+
warn = None
1262+
parser.read_csv_check_warnings(
1263+
warn,
1264+
"Could not infer format",
1265+
s,
1266+
header=None,
1267+
names=["foo", "bar"],
1268+
parse_dates=["foo"],
1269+
cache_dates=cache_dates,
1270+
)
1271+
1272+
1273+
@pytest.mark.parametrize("cache_dates", [True, False])
1274+
@pytest.mark.parametrize("value", ["0"])
1275+
def test_bad_date_parse_with_warning(all_parsers, cache_dates, value):
1276+
# if we have an invalid date make sure that we handle this with
1277+
# and w/o the cache properly.
1278+
parser = all_parsers
1279+
s = StringIO((f"{value},\n") * 50000)
1280+
1281+
if parser.engine == "pyarrow":
1282+
# pyarrow reads "0" as 0 (of type int64), and so
1283+
# pandas doesn't try to guess the datetime format
1284+
# TODO: parse dates directly in pyarrow, see
1285+
# https://github.com/pandas-dev/pandas/issues/48017
1286+
warn = None
1287+
else:
1288+
warn = UserWarning
1289+
parser.read_csv_check_warnings(
1290+
warn,
1291+
"Could not infer format",
12431292
s,
12441293
header=None,
12451294
names=["foo", "bar"],
12461295
parse_dates=["foo"],
1247-
infer_datetime_format=False,
12481296
cache_dates=cache_dates,
12491297
)
12501298

@@ -1262,6 +1310,19 @@ def test_parse_dates_empty_string(all_parsers):
12621310
tm.assert_frame_equal(result, expected)
12631311

12641312

1313+
def test_parse_dates_infer_datetime_format_warning(all_parsers):
1314+
# GH 49024
1315+
parser = all_parsers
1316+
data = "Date,test\n2012-01-01,1\n,2"
1317+
parser.read_csv_check_warnings(
1318+
UserWarning,
1319+
"The argument 'infer_datetime_format' is deprecated",
1320+
StringIO(data),
1321+
parse_dates=["Date"],
1322+
infer_datetime_format=True,
1323+
)
1324+
1325+
12651326
@xfail_pyarrow
12661327
@pytest.mark.parametrize(
12671328
"data,kwargs,expected",
@@ -1635,7 +1696,13 @@ def test_parse_timezone(all_parsers):
16351696
def test_invalid_parse_delimited_date(all_parsers, date_string):
16361697
parser = all_parsers
16371698
expected = DataFrame({0: [date_string]}, dtype="object")
1638-
result = parser.read_csv(StringIO(date_string), header=None, parse_dates=[0])
1699+
result = parser.read_csv_check_warnings(
1700+
UserWarning,
1701+
"Could not infer format",
1702+
StringIO(date_string),
1703+
header=None,
1704+
parse_dates=[0],
1705+
)
16391706
tm.assert_frame_equal(result, expected)
16401707

16411708

@@ -1786,7 +1853,13 @@ def test_date_parser_and_names(all_parsers):
17861853
# GH#33699
17871854
parser = all_parsers
17881855
data = StringIO("""x,y\n1,2""")
1789-
result = parser.read_csv(data, parse_dates=["B"], names=["B"])
1856+
result = parser.read_csv_check_warnings(
1857+
UserWarning,
1858+
"Could not infer format",
1859+
data,
1860+
parse_dates=["B"],
1861+
names=["B"],
1862+
)
17901863
expected = DataFrame({"B": ["y", "2"]}, index=["x", "1"])
17911864
tm.assert_frame_equal(result, expected)
17921865

@@ -1833,7 +1906,9 @@ def test_date_parser_usecols_thousands(all_parsers):
18331906
"""
18341907

18351908
parser = all_parsers
1836-
result = parser.read_csv(
1909+
result = parser.read_csv_check_warnings(
1910+
UserWarning,
1911+
"Could not infer format",
18371912
StringIO(data),
18381913
parse_dates=[1],
18391914
usecols=[1, 2],
@@ -1947,7 +2022,12 @@ def test_infer_first_column_as_index(all_parsers):
19472022
# GH#11019
19482023
parser = all_parsers
19492024
data = "a,b,c\n1970-01-01,2,3,4"
1950-
result = parser.read_csv(StringIO(data), parse_dates=["a"])
2025+
result = parser.read_csv_check_warnings(
2026+
UserWarning,
2027+
"Could not infer format",
2028+
StringIO(data),
2029+
parse_dates=["a"],
2030+
)
19512031
expected = DataFrame({"a": "2", "b": 3, "c": 4}, index=["1970-01-01"])
19522032
tm.assert_frame_equal(result, expected)
19532033

pandas/tests/io/parser/usecols/test_parse_dates.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,13 @@ def test_usecols_with_parse_dates4(all_parsers):
124124
}
125125
expected = DataFrame(cols, columns=["a_b"] + list("cdefghij"))
126126

127-
result = parser.read_csv(StringIO(data), usecols=usecols, parse_dates=parse_dates)
127+
result = parser.read_csv_check_warnings(
128+
UserWarning,
129+
"Could not infer format",
130+
StringIO(data),
131+
usecols=usecols,
132+
parse_dates=parse_dates,
133+
)
128134
tm.assert_frame_equal(result, expected)
129135

130136

pandas/tests/test_algos.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1212,7 +1212,8 @@ def test_value_counts_datetime_outofbounds(self):
12121212
tm.assert_series_equal(res, exp)
12131213

12141214
# GH 12424
1215-
res = to_datetime(Series(["2362-01-01", np.nan]), errors="ignore")
1215+
with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
1216+
res = to_datetime(Series(["2362-01-01", np.nan]), errors="ignore")
12161217
exp = Series(["2362-01-01", np.nan], dtype=object)
12171218
tm.assert_series_equal(res, exp)
12181219

0 commit comments

Comments
 (0)