19
19
20
20
import numpy as np
21
21
22
- from pandas ._libs import tslib
22
+ from pandas ._libs import (
23
+ lib ,
24
+ tslib ,
25
+ )
23
26
from pandas ._libs .tslibs import (
24
27
OutOfBoundsDatetime ,
25
28
Timedelta ,
@@ -129,7 +132,16 @@ def _guess_datetime_format_for_array(arr, dayfirst: bool | None = False) -> str
129
132
if (first_non_null := tslib .first_non_null (arr )) != - 1 :
130
133
if type (first_non_nan_element := arr [first_non_null ]) is str :
131
134
# GH#32264 np.str_ object
132
- return guess_datetime_format (first_non_nan_element , dayfirst = dayfirst )
135
+ guessed_format = guess_datetime_format (
136
+ first_non_nan_element , dayfirst = dayfirst
137
+ )
138
+ if guessed_format is not None :
139
+ return guessed_format
140
+ warnings .warn (
141
+ "Could not infer format - "
142
+ "to ensure consistent parsing, specify a format." ,
143
+ stacklevel = find_stack_level (),
144
+ )
133
145
return None
134
146
135
147
@@ -331,7 +343,6 @@ def _convert_listlike_datetimes(
331
343
tz : Timezone | None = None ,
332
344
unit : str | None = None ,
333
345
errors : DateTimeErrorChoices = "raise" ,
334
- infer_datetime_format : bool = False ,
335
346
dayfirst : bool | None = None ,
336
347
yearfirst : bool | None = None ,
337
348
exact : bool = True ,
@@ -415,27 +426,19 @@ def _convert_listlike_datetimes(
415
426
arg = ensure_object (arg )
416
427
require_iso8601 = False
417
428
418
- if infer_datetime_format and format is None :
429
+ if format is None :
419
430
format = _guess_datetime_format_for_array (arg , dayfirst = dayfirst )
420
431
432
+ # There is a special fast-path for iso8601 formatted
433
+ # datetime strings, so in those cases don't use the inferred
434
+ # format because this path makes process slower in this
435
+ # special case
436
+ if format is not None and format_is_iso (format ):
437
+ require_iso8601 = True
438
+ format = None
421
439
if format is not None :
422
- # There is a special fast-path for iso8601 formatted
423
- # datetime strings, so in those cases don't use the inferred
424
- # format because this path makes process slower in this
425
- # special case
426
- format_is_iso8601 = format_is_iso (format )
427
- if format_is_iso8601 :
428
- require_iso8601 = not infer_datetime_format
429
- format = None
430
-
431
- if format is not None :
432
- res = _to_datetime_with_format (
433
- arg , orig_arg , name , tz , format , exact , errors , infer_datetime_format
434
- )
435
- if res is not None :
436
- return res
440
+ return _to_datetime_with_format (arg , orig_arg , name , tz , format , exact , errors )
437
441
438
- assert format is None or infer_datetime_format
439
442
utc = tz == "utc"
440
443
result , tz_parsed = objects_to_datetime64ns (
441
444
arg ,
@@ -464,8 +467,7 @@ def _array_strptime_with_fallback(
464
467
fmt : str ,
465
468
exact : bool ,
466
469
errors : str ,
467
- infer_datetime_format : bool ,
468
- ) -> Index | None :
470
+ ) -> Index :
469
471
"""
470
472
Call array_strptime, with fallback behavior depending on 'errors'.
471
473
"""
@@ -486,18 +488,14 @@ def _array_strptime_with_fallback(
486
488
# if fmt was inferred, try falling back
487
489
# to array_to_datetime - terminate here
488
490
# for specified formats
489
- if not infer_datetime_format :
490
- if errors == "raise" :
491
- raise
492
- elif errors == "coerce" :
493
- result = np .empty (arg .shape , dtype = "M8[ns]" )
494
- iresult = result .view ("i8" )
495
- iresult .fill (iNaT )
496
- else :
497
- result = arg
491
+ if errors == "raise" :
492
+ raise
493
+ elif errors == "coerce" :
494
+ result = np .empty (arg .shape , dtype = "M8[ns]" )
495
+ iresult = result .view ("i8" )
496
+ iresult .fill (iNaT )
498
497
else :
499
- # Indicates to the caller to fallback to objects_to_datetime64ns
500
- return None
498
+ result = arg
501
499
else :
502
500
if "%Z" in fmt or "%z" in fmt :
503
501
return _return_parsed_timezone_results (result , timezones , tz , name )
@@ -513,10 +511,9 @@ def _to_datetime_with_format(
513
511
fmt : str ,
514
512
exact : bool ,
515
513
errors : str ,
516
- infer_datetime_format : bool ,
517
- ) -> Index | None :
514
+ ) -> Index :
518
515
"""
519
- Try parsing with the given format, returning None on failure .
516
+ Try parsing with the given format.
520
517
"""
521
518
result = None
522
519
@@ -537,9 +534,7 @@ def _to_datetime_with_format(
537
534
return _box_as_indexlike (result , utc = utc , name = name )
538
535
539
536
# fallback
540
- res = _array_strptime_with_fallback (
541
- arg , name , tz , fmt , exact , errors , infer_datetime_format
542
- )
537
+ res = _array_strptime_with_fallback (arg , name , tz , fmt , exact , errors )
543
538
return res
544
539
545
540
@@ -713,7 +708,7 @@ def to_datetime(
713
708
format : str | None = None ,
714
709
exact : bool = True ,
715
710
unit : str | None = None ,
716
- infer_datetime_format : bool = False ,
711
+ infer_datetime_format : lib . NoDefault | bool = lib . no_default ,
717
712
origin : str = "unix" ,
718
713
cache : bool = True ,
719
714
) -> DatetimeIndex | Series | DatetimeScalar | NaTType | None :
@@ -926,24 +921,6 @@ def to_datetime(
926
921
1 2016-03-05
927
922
dtype: datetime64[ns]
928
923
929
- Passing ``infer_datetime_format=True`` can often-times speedup a parsing
930
- if its not an ISO8601 format exactly, but in a regular format.
931
-
932
- >>> s = pd.Series(['3/11/2000', '3/12/2000', '3/13/2000'] * 1000)
933
- >>> s.head()
934
- 0 3/11/2000
935
- 1 3/12/2000
936
- 2 3/13/2000
937
- 3 3/11/2000
938
- 4 3/12/2000
939
- dtype: object
940
-
941
- >>> %timeit pd.to_datetime(s, infer_datetime_format=True) # doctest: +SKIP
942
- 100 loops, best of 3: 10.4 ms per loop
943
-
944
- >>> %timeit pd.to_datetime(s, infer_datetime_format=False) # doctest: +SKIP
945
- 1 loop, best of 3: 471 ms per loop
946
-
947
924
Using a unix epoch time
948
925
949
926
>>> pd.to_datetime(1490195805, unit='s')
@@ -1025,7 +1002,7 @@ def to_datetime(
1025
1002
are constant:
1026
1003
1027
1004
>>> from datetime import datetime
1028
- >>> pd.to_datetime(["2020-01-01 01:00 -01:00", datetime(2020, 1, 1, 3, 0)])
1005
+ >>> pd.to_datetime(["2020-01-01 01:00:00 -01:00", datetime(2020, 1, 1, 3, 0)])
1029
1006
DatetimeIndex(['2020-01-01 01:00:00-01:00', '2020-01-01 02:00:00-01:00'],
1030
1007
dtype='datetime64[ns, pytz.FixedOffset(-60)]', freq=None)
1031
1008
@@ -1060,6 +1037,14 @@ def to_datetime(
1060
1037
'2020-01-01 18:00:00+00:00', '2020-01-01 19:00:00+00:00'],
1061
1038
dtype='datetime64[ns, UTC]', freq=None)
1062
1039
"""
1040
+ if infer_datetime_format is not lib .no_default :
1041
+ # Kept for compatibility with old code - TODO remove
1042
+ warnings .warn (
1043
+ "The argument 'infer_datetime_format' has been removed - a strict version "
1044
+ "of it is now the default, see "
1045
+ "https://pandas.pydata.org/pdeps/0004-consistent-to-datetime-parsing.html" ,
1046
+ stacklevel = find_stack_level (),
1047
+ )
1063
1048
if arg is None :
1064
1049
return None
1065
1050
@@ -1075,7 +1060,6 @@ def to_datetime(
1075
1060
yearfirst = yearfirst ,
1076
1061
errors = errors ,
1077
1062
exact = exact ,
1078
- infer_datetime_format = infer_datetime_format ,
1079
1063
)
1080
1064
1081
1065
result : Timestamp | NaTType | Series | Index
0 commit comments