Skip to content

Commit b24ec6e

Browse files
author
MarcoGorelli
committed
remove ymd special-path
1 parent 1d5ce5b commit b24ec6e

File tree

6 files changed

+24
-128
lines changed

6 files changed

+24
-128
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -659,6 +659,7 @@ Datetimelike
659659
- Bug in ``pandas.tseries.holiday.Holiday`` where a half-open date interval causes inconsistent return types from :meth:`USFederalHolidayCalendar.holidays` (:issue:`49075`)
660660
- Bug in rendering :class:`DatetimeIndex` and :class:`Series` and :class:`DataFrame` with timezone-aware dtypes with ``dateutil`` or ``zoneinfo`` timezones near daylight-savings transitions (:issue:`49684`)
661661
- Bug in :func:`to_datetime` was raising ``ValueError`` when parsing :class:`Timestamp` or ``datetime`` objects with non-ISO8601 ``format`` (:issue:`49298`)
662+
- Bug in :func:`to_datetime` with ``exact`` and ``format=%Y%m%d`` wasn't raising if the input didn't match the format (:issue:`50051`)
662663
-
663664

664665
Timedelta

pandas/_libs/tslib.pyx

+8-12
Original file line numberDiff line numberDiff line change
@@ -546,17 +546,10 @@ cpdef array_to_datetime(
546546
seen_datetime = True
547547
iresult[i] = get_datetime64_nanos(val, NPY_FR_ns)
548548

549-
elif is_integer_object(val) or is_float_object(val):
550-
if require_iso8601:
551-
if is_coerce:
552-
iresult[i] = NPY_NAT
553-
continue
554-
elif is_raise:
555-
raise ValueError(
556-
f"time data \"{val}\" at position {i} doesn't "
557-
f"match format \"{format}\""
558-
)
559-
return values, tz_out
549+
elif (
550+
(is_integer_object(val) or is_float_object(val))
551+
and format is None
552+
):
560553
# these must be ns unit by-definition
561554
seen_integer = True
562555

@@ -575,7 +568,10 @@ cpdef array_to_datetime(
575568
except OverflowError:
576569
iresult[i] = NPY_NAT
577570

578-
elif isinstance(val, str):
571+
elif (
572+
(is_integer_object(val) or is_float_object(val))
573+
or isinstance(val, str)
574+
):
579575
# string
580576
if type(val) is not str:
581577
# GH#32264 np.str_ object

pandas/_libs/tslibs/parsing.pyi

-5
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,6 @@ def try_parse_dates(
2727
dayfirst: bool = ...,
2828
default: datetime | None = ...,
2929
) -> npt.NDArray[np.object_]: ...
30-
def try_parse_year_month_day(
31-
years: npt.NDArray[np.object_], # object[:]
32-
months: npt.NDArray[np.object_], # object[:]
33-
days: npt.NDArray[np.object_], # object[:]
34-
) -> npt.NDArray[np.object_]: ...
3530
def try_parse_datetime_components(
3631
years: npt.NDArray[np.object_], # object[:]
3732
months: npt.NDArray[np.object_], # object[:]

pandas/_libs/tslibs/parsing.pyx

+1-20
Original file line numberDiff line numberDiff line change
@@ -744,25 +744,6 @@ def try_parse_dates(
744744
return result.base # .base to access underlying ndarray
745745

746746

747-
def try_parse_year_month_day(
748-
object[:] years, object[:] months, object[:] days
749-
) -> np.ndarray:
750-
cdef:
751-
Py_ssize_t i, n
752-
object[::1] result
753-
754-
n = len(years)
755-
# TODO(cython3): Use len instead of `shape[0]`
756-
if months.shape[0] != n or days.shape[0] != n:
757-
raise ValueError("Length of years/months/days must all be equal")
758-
result = np.empty(n, dtype="O")
759-
760-
for i in range(n):
761-
result[i] = datetime(int(years[i]), int(months[i]), int(days[i]))
762-
763-
return result.base # .base to access underlying ndarray
764-
765-
766747
def try_parse_datetime_components(object[:] years,
767748
object[:] months,
768749
object[:] days,
@@ -890,7 +871,7 @@ def format_is_iso(f: str) -> bint:
890871
but must be consistent. Leading 0s in dates and times are optional.
891872
"""
892873
iso_template = "%Y{date_sep}%m{date_sep}%d{time_sep}%H:%M:%S{micro_or_tz}".format
893-
excluded_formats = ["%Y%m%d", "%Y%m", "%Y"]
874+
excluded_formats = ["%Y%m", "%Y"]
894875

895876
for date_sep in [" ", "/", "\\", "-", ".", ""]:
896877
for time_sep in [" ", "T"]:

pandas/core/tools/datetimes.py

+2-79
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,6 @@
2424
Timedelta,
2525
Timestamp,
2626
iNaT,
27-
nat_strings,
28-
parsing,
2927
timezones as libtimezones,
3028
)
3129
from pandas._libs.tslibs.parsing import (
@@ -38,7 +36,6 @@
3836
AnyArrayLike,
3937
ArrayLike,
4038
DateTimeErrorChoices,
41-
npt,
4239
)
4340

4441
from pandas.core.dtypes.common import (
@@ -57,13 +54,11 @@
5754
ABCDataFrame,
5855
ABCSeries,
5956
)
60-
from pandas.core.dtypes.missing import notna
6157

6258
from pandas.arrays import (
6359
DatetimeArray,
6460
IntegerArray,
6561
)
66-
from pandas.core import algorithms
6762
from pandas.core.algorithms import unique
6863
from pandas.core.arrays.base import ExtensionArray
6964
from pandas.core.arrays.datetimes import (
@@ -407,7 +402,6 @@ def _convert_listlike_datetimes(
407402

408403
# warn if passing timedelta64, raise for PeriodDtype
409404
# NB: this must come after unit transformation
410-
orig_arg = arg
411405
try:
412406
arg, _ = maybe_convert_dtype(arg, copy=False, tz=libtimezones.maybe_get_tz(tz))
413407
except TypeError:
@@ -435,8 +429,8 @@ def _convert_listlike_datetimes(
435429
require_iso8601 = not infer_datetime_format
436430

437431
if format is not None and not require_iso8601:
438-
res = _to_datetime_with_format(
439-
arg, orig_arg, name, utc, format, exact, errors, infer_datetime_format
432+
res = _array_strptime_with_fallback(
433+
arg, name, utc, format, exact, errors, infer_datetime_format
440434
)
441435
if res is not None:
442436
return res
@@ -523,23 +517,6 @@ def _to_datetime_with_format(
523517
"""
524518
Try parsing with the given format, returning None on failure.
525519
"""
526-
result = None
527-
528-
# shortcut formatting here
529-
if fmt == "%Y%m%d":
530-
# pass orig_arg as float-dtype may have been converted to
531-
# datetime64[ns]
532-
orig_arg = ensure_object(orig_arg)
533-
try:
534-
# may return None without raising
535-
result = _attempt_YYYYMMDD(orig_arg, errors=errors)
536-
except (ValueError, TypeError, OutOfBoundsDatetime) as err:
537-
raise ValueError(
538-
"cannot convert the input to '%Y%m%d' date format"
539-
) from err
540-
if result is not None:
541-
return _box_as_indexlike(result, utc=utc, name=name)
542-
543520
# fallback
544521
res = _array_strptime_with_fallback(
545522
arg, name, utc, fmt, exact, errors, infer_datetime_format
@@ -1244,60 +1221,6 @@ def coerce(values):
12441221
return values
12451222

12461223

1247-
def _attempt_YYYYMMDD(arg: npt.NDArray[np.object_], errors: str) -> np.ndarray | None:
1248-
"""
1249-
try to parse the YYYYMMDD/%Y%m%d format, try to deal with NaT-like,
1250-
arg is a passed in as an object dtype, but could really be ints/strings
1251-
with nan-like/or floats (e.g. with nan)
1252-
1253-
Parameters
1254-
----------
1255-
arg : np.ndarray[object]
1256-
errors : {'raise','ignore','coerce'}
1257-
"""
1258-
1259-
def calc(carg):
1260-
# calculate the actual result
1261-
carg = carg.astype(object, copy=False)
1262-
parsed = parsing.try_parse_year_month_day(
1263-
carg / 10000, carg / 100 % 100, carg % 100
1264-
)
1265-
return tslib.array_to_datetime(parsed, errors=errors)[0]
1266-
1267-
def calc_with_mask(carg, mask):
1268-
result = np.empty(carg.shape, dtype="M8[ns]")
1269-
iresult = result.view("i8")
1270-
iresult[~mask] = iNaT
1271-
1272-
masked_result = calc(carg[mask].astype(np.float64).astype(np.int64))
1273-
result[mask] = masked_result.astype("M8[ns]")
1274-
return result
1275-
1276-
# try intlike / strings that are ints
1277-
try:
1278-
return calc(arg.astype(np.int64))
1279-
except (ValueError, OverflowError, TypeError):
1280-
pass
1281-
1282-
# a float with actual np.nan
1283-
try:
1284-
carg = arg.astype(np.float64)
1285-
return calc_with_mask(carg, notna(carg))
1286-
except (ValueError, OverflowError, TypeError):
1287-
pass
1288-
1289-
# string with NaN-like
1290-
try:
1291-
# error: Argument 2 to "isin" has incompatible type "List[Any]"; expected
1292-
# "Union[Union[ExtensionArray, ndarray], Index, Series]"
1293-
mask = ~algorithms.isin(arg, list(nat_strings)) # type: ignore[arg-type]
1294-
return calc_with_mask(arg, mask)
1295-
except (ValueError, OverflowError, TypeError):
1296-
pass
1297-
1298-
return None
1299-
1300-
13011224
__all__ = [
13021225
"DateParseError",
13031226
"should_cache",

pandas/tests/tools/test_to_datetime.py

+12-12
Original file line numberDiff line numberDiff line change
@@ -125,24 +125,21 @@ def test_to_datetime_format_YYYYMMDD_with_nat(self, cache):
125125
expected[2] = np.nan
126126
ser[2] = np.nan
127127

128-
result = to_datetime(ser, format="%Y%m%d", cache=cache)
129-
tm.assert_series_equal(result, expected)
128+
with pytest.raises(ValueError, match=None):
129+
to_datetime(ser, format="%Y%m%d", cache=cache)
130130

131131
# string with NaT
132132
ser2 = ser.apply(str)
133133
ser2[2] = "nat"
134-
result = to_datetime(ser2, format="%Y%m%d", cache=cache)
135-
tm.assert_series_equal(result, expected)
134+
with pytest.raises(ValueError, match=None):
135+
to_datetime(ser2, format="%Y%m%d", cache=cache)
136136

137137
def test_to_datetime_format_YYYYMMDD_ignore(self, cache):
138138
# coercion
139139
# GH 7930
140140
ser = Series([20121231, 20141231, 99991231])
141+
expected = Series([20121231, 20141231, 99991231], dtype=object)
141142
result = to_datetime(ser, format="%Y%m%d", errors="ignore", cache=cache)
142-
expected = Series(
143-
[datetime(2012, 12, 31), datetime(2014, 12, 31), datetime(9999, 12, 31)],
144-
dtype=object,
145-
)
146143
tm.assert_series_equal(result, expected)
147144

148145
def test_to_datetime_format_YYYYMMDD_coercion(self, cache):
@@ -249,7 +246,7 @@ def test_to_datetime_format_integer(self, cache):
249246
# valid date, length == 8
250247
[20121030, datetime(2012, 10, 30)],
251248
# short valid date, length == 6
252-
[199934, datetime(1999, 3, 4)],
249+
[199934, 199934],
253250
# long integer date partially parsed to datetime(2012,1,1), length > 8
254251
[2012010101, 2012010101],
255252
# invalid date partially parsed to datetime(2012,9,9), length == 8
@@ -1714,8 +1711,8 @@ def test_dataframe_coerce(self, cache):
17141711
df2 = DataFrame({"year": [2015, 2016], "month": [2, 20], "day": [4, 5]})
17151712

17161713
msg = (
1717-
"cannot assemble the datetimes: time data .+ does not "
1718-
r"match format '%Y%m%d' \(match\)"
1714+
r"cannot assemble the datetimes: time data .+ doesn't "
1715+
r'match format "%Y%m%d"'
17191716
)
17201717
with pytest.raises(ValueError, match=msg):
17211718
to_datetime(df2, cache=cache)
@@ -1791,7 +1788,10 @@ def test_dataframe_mixed(self, cache):
17911788
def test_dataframe_float(self, cache):
17921789
# float
17931790
df = DataFrame({"year": [2000, 2001], "month": [1.5, 1], "day": [1, 1]})
1794-
msg = "cannot assemble the datetimes: unconverted data remains: 1"
1791+
msg = (
1792+
r'cannot assemble the datetimes: time data "20000151" at '
1793+
r'position 0 doesn\'t match format "%Y%m%d"'
1794+
)
17951795
with pytest.raises(ValueError, match=msg):
17961796
to_datetime(df, cache=cache)
17971797

0 commit comments

Comments
 (0)