Skip to content

Commit 2c21af5

Browse files
Backport PR #52195 on branch 2.0.x (WARN: Only warn about inconsistent parsing if there are multiple non-null elements) (#52242)
Backport PR #52195: WARN: Only warn about inconsistent parsing if there are multiple non-null elements Co-authored-by: Marco Edward Gorelli <[email protected]>
1 parent 454e820 commit 2c21af5

File tree

5 files changed

+127
-158
lines changed

5 files changed

+127
-158
lines changed

pandas/core/tools/datetimes.py

+10-7
Original file line numberDiff line numberDiff line change
@@ -139,13 +139,16 @@ def _guess_datetime_format_for_array(arr, dayfirst: bool | None = False) -> str
139139
)
140140
if guessed_format is not None:
141141
return guessed_format
142-
warnings.warn(
143-
"Could not infer format, so each element will be parsed "
144-
"individually, falling back to `dateutil`. To ensure parsing is "
145-
"consistent and as-expected, please specify a format.",
146-
UserWarning,
147-
stacklevel=find_stack_level(),
148-
)
142+
# If there are multiple non-null elements, warn about
143+
# how parsing might not be consistent
144+
if tslib.first_non_null(arr[first_non_null + 1 :]) != -1:
145+
warnings.warn(
146+
"Could not infer format, so each element will be parsed "
147+
"individually, falling back to `dateutil`. To ensure parsing is "
148+
"consistent and as-expected, please specify a format.",
149+
UserWarning,
150+
stacklevel=find_stack_level(),
151+
)
149152
return None
150153

151154

pandas/tests/io/parser/test_parse_dates.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -1252,13 +1252,15 @@ def test_bad_date_parse(all_parsers, cache_dates, value):
12521252
parser = all_parsers
12531253
s = StringIO((f"{value},\n") * 50000)
12541254

1255-
if parser.engine == "pyarrow":
1255+
if parser.engine == "pyarrow" and not cache_dates:
12561256
# None in input gets converted to 'None', for which
12571257
# pandas tries to guess the datetime format, triggering
12581258
# the warning. TODO: parse dates directly in pyarrow, see
12591259
# https://github.com/pandas-dev/pandas/issues/48017
12601260
warn = UserWarning
12611261
else:
1262+
# Note: warning is not raised if 'cache_dates', because here there is only a
1263+
# single unique date and hence no risk of inconsistent parsing.
12621264
warn = None
12631265
parser.read_csv_check_warnings(
12641266
warn,
@@ -1285,6 +1287,10 @@ def test_bad_date_parse_with_warning(all_parsers, cache_dates, value):
12851287
# TODO: parse dates directly in pyarrow, see
12861288
# https://github.com/pandas-dev/pandas/issues/48017
12871289
warn = None
1290+
elif cache_dates:
1291+
# Note: warning is not raised if 'cache_dates', because here there is only a
1292+
# single unique date and hence no risk of inconsistent parsing.
1293+
warn = None
12881294
else:
12891295
warn = UserWarning
12901296
parser.read_csv_check_warnings(
@@ -1737,9 +1743,7 @@ def test_parse_timezone(all_parsers):
17371743
def test_invalid_parse_delimited_date(all_parsers, date_string):
17381744
parser = all_parsers
17391745
expected = DataFrame({0: [date_string]}, dtype="object")
1740-
result = parser.read_csv_check_warnings(
1741-
UserWarning,
1742-
"Could not infer format",
1746+
result = parser.read_csv(
17431747
StringIO(date_string),
17441748
header=None,
17451749
parse_dates=[0],
@@ -2063,9 +2067,7 @@ def test_infer_first_column_as_index(all_parsers):
20632067
# GH#11019
20642068
parser = all_parsers
20652069
data = "a,b,c\n1970-01-01,2,3,4"
2066-
result = parser.read_csv_check_warnings(
2067-
UserWarning,
2068-
"Could not infer format",
2070+
result = parser.read_csv(
20692071
StringIO(data),
20702072
parse_dates=["a"],
20712073
)

pandas/tests/io/parser/usecols/test_parse_dates.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -124,9 +124,7 @@ def test_usecols_with_parse_dates4(all_parsers):
124124
}
125125
expected = DataFrame(cols, columns=["a_b"] + list("cdefghij"))
126126

127-
result = parser.read_csv_check_warnings(
128-
UserWarning,
129-
"Could not infer format",
127+
result = parser.read_csv(
130128
StringIO(data),
131129
usecols=usecols,
132130
parse_dates=parse_dates,

pandas/tests/test_algos.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -1231,8 +1231,7 @@ def test_value_counts_datetime_outofbounds(self):
12311231
tm.assert_series_equal(res, exp)
12321232

12331233
# GH 12424
1234-
with tm.assert_produces_warning(UserWarning, match="Could not infer format"):
1235-
res = to_datetime(Series(["2362-01-01", np.nan]), errors="ignore")
1234+
res = to_datetime(Series(["2362-01-01", np.nan]), errors="ignore")
12361235
exp = Series(["2362-01-01", np.nan], dtype=object)
12371236
tm.assert_series_equal(res, exp)
12381237

0 commit comments

Comments
 (0)