@@ -691,111 +691,192 @@ def to_datetime(
691
691
"""
692
692
Convert argument to datetime.
693
693
694
+ This function converts a scalar, array-like, :class:`Series` or
695
+ :class:`DataFrame`/dict-like to a pandas datetime object.
696
+
694
697
Parameters
695
698
----------
696
699
arg : int, float, str, datetime, list, tuple, 1-d array, Series, DataFrame/dict-like
697
- The object to convert to a datetime. If the DataFrame is provided, the method
698
- expects minimally the following columns: "year", "month", "day".
700
+ The object to convert to a datetime. If a :class:`DataFrame` is provided, the
701
+ method expects minimally the following columns: :const:`"year"`,
702
+ :const:`"month"`, :const:`"day"`.
699
703
errors : {'ignore', 'raise', 'coerce'}, default 'raise'
700
- - If 'raise', then invalid parsing will raise an exception.
701
- - If 'coerce', then invalid parsing will be set as NaT.
702
- - If 'ignore', then invalid parsing will return the input.
704
+ - If :const:` 'raise'` , then invalid parsing will raise an exception.
705
+ - If :const:` 'coerce'` , then invalid parsing will be set as :const:` NaT` .
706
+ - If :const:` 'ignore'` , then invalid parsing will return the input.
703
707
dayfirst : bool, default False
704
- Specify a date parse order if `arg` is str or its list-likes .
705
- If True, parses dates with the day first, eg 10/11/12 is parsed as
706
- 2012-11-10.
708
+ Specify a date parse order if `arg` is str or is list-like .
709
+ If :const:` True` , parses dates with the day first, e.g. :const:`" 10/11/12"`
710
+ is parsed as :const:` 2012-11-10` .
707
711
708
712
.. warning::
709
713
710
- dayfirst=True is not strict, but will prefer to parse
714
+ `` dayfirst=True`` is not strict, but will prefer to parse
711
715
with day first. If a delimited date string cannot be parsed in
712
716
accordance with the given `dayfirst` option, e.g.
713
717
``to_datetime(['31-12-2021'])``, then a warning will be shown.
714
718
715
719
yearfirst : bool, default False
716
- Specify a date parse order if `arg` is str or its list-likes .
720
+ Specify a date parse order if `arg` is str or is list-like .
717
721
718
- - If True parses dates with the year first, eg 10/11/12 is parsed as
719
- 2010-11-12.
720
- - If both dayfirst and yearfirst are True, yearfirst is preceded (same
721
- as dateutil).
722
+ - If :const:` True` parses dates with the year first, e.g.
723
+ :const:`"10/11/12"` is parsed as :const:` 2010-11-12` .
724
+ - If both ` dayfirst` and ` yearfirst` are :const:` True`, ` yearfirst` is
725
+ preceded (same as :mod:` dateutil` ).
722
726
723
727
.. warning::
724
728
725
- yearfirst=True is not strict, but will prefer to parse
729
+ `` yearfirst=True`` is not strict, but will prefer to parse
726
730
with year first.
727
731
728
732
utc : bool, default None
729
- Return UTC DatetimeIndex if True (converting any tz-aware
730
- datetime.datetime objects as well).
733
+ Control timezone-related parsing, localization and conversion.
734
+
735
+ - If :const:`True`, the function *always* returns a timezone-aware
736
+ UTC-localized :class:`Timestamp`, :class:`Series` or
737
+ :class:`DatetimeIndex`. To do this, timezone-naive inputs are
738
+ *localized* as UTC, while timezone-aware inputs are *converted* to UTC.
739
+
740
+ - If :const:`False` (default), inputs will not be coerced to UTC.
741
+ Timezone-naive inputs will remain naive, while timezone-aware ones
742
+ will keep their time offsets. Limitations exist for mixed
743
+ offsets (typically, daylight savings), see :ref:`Examples
744
+ <to_datetime_tz_examples>` section for details.
745
+
746
+ See also: pandas general documentation about `timezone conversion and
747
+ localization
748
+ <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html
749
+ #time-zone-handling>`_.
750
+
731
751
format : str, default None
732
- The strftime to parse time, eg "%d/%m/%Y", note that "%f" will parse
733
- all the way up to nanoseconds.
734
- See strftime documentation for more information on choices:
735
- https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior.
736
- exact : bool, True by default
737
- Behaves as:
738
- - If True, require an exact format match.
739
- - If False, allow the format to match anywhere in the target string.
752
+ The strftime to parse time, e.g. :const:`"%d/%m/%Y"`. Note that
753
+ :const:`"%f"` will parse all the way up to nanoseconds. See
754
+ `strftime documentation
755
+ <https://docs.python.org/3/library/datetime.html
756
+ #strftime-and-strptime-behavior>`_ for more information on choices.
757
+ exact : bool, default True
758
+ Control how `format` is used:
759
+
760
+ - If :const:`True`, require an exact `format` match.
761
+ - If :const:`False`, allow the `format` to match anywhere in the target
762
+ string.
740
763
741
764
unit : str, default 'ns'
742
765
The unit of the arg (D,s,ms,us,ns) denote the unit, which is an
743
766
integer or float number. This will be based off the origin.
744
- Example, with unit='ms' and origin='unix' (the default), this
767
+ Example, with `` unit='ms'`` and `` origin='unix'`` (the default), this
745
768
would calculate the number of milliseconds to the unix epoch start.
746
769
infer_datetime_format : bool, default False
747
- If True and no `format` is given, attempt to infer the format of the
748
- datetime strings based on the first non-NaN element,
770
+ If :const:` True` and no `format` is given, attempt to infer the format
771
+ of the datetime strings based on the first non-NaN element,
749
772
and if it can be inferred, switch to a faster method of parsing them.
750
773
In some cases this can increase the parsing speed by ~5-10x.
751
774
origin : scalar, default 'unix'
752
775
Define the reference date. The numeric values would be parsed as number
753
776
of units (defined by `unit`) since this reference date.
754
777
755
- - If 'unix' (or POSIX) time; origin is set to 1970-01-01.
756
- - If 'julian', unit must be 'D', and origin is set to beginning of
757
- Julian Calendar. Julian day number 0 is assigned to the day starting
758
- at noon on January 1, 4713 BC.
778
+ - If :const:` 'unix'` (or POSIX) time; origin is set to 1970-01-01.
779
+ - If :const:` 'julian'` , unit must be :const:` 'D'` , and origin is set to
780
+ beginning of Julian Calendar. Julian day number :const:`0` is assigned
781
+ to the day starting at noon on January 1, 4713 BC.
759
782
- If Timestamp convertible, origin is set to Timestamp identified by
760
783
origin.
761
784
cache : bool, default True
762
- If True, use a cache of unique, converted dates to apply the datetime
763
- conversion. May produce significant speed-up when parsing duplicate
764
- date strings, especially ones with timezone offsets. The cache is only
765
- used when there are at least 50 values. The presence of out-of-bounds
766
- values will render the cache unusable and may slow down parsing.
785
+ If :const:`True`, use a cache of unique, converted dates to apply the
786
+ datetime conversion. May produce significant speed-up when parsing
787
+ duplicate date strings, especially ones with timezone offsets. The cache
788
+ is only used when there are at least 50 values. The presence of
789
+ out-of-bounds values will render the cache unusable and may slow down
790
+ parsing.
767
791
768
792
.. versionchanged:: 0.25.0
769
- - changed default value from False to True.
793
+ changed default value from :const:` False` to :const:` True` .
770
794
771
795
Returns
772
796
-------
773
797
datetime
774
798
If parsing succeeded.
775
- Return type depends on input:
776
-
777
- - list-like:
778
- - DatetimeIndex, if timezone naive or aware with the same timezone
779
- - Index of object dtype, if timezone aware with mixed time offsets
780
- - Series: Series of datetime64 dtype
781
- - DataFrame: Series of datetime64 dtype
782
- - scalar: Timestamp
783
-
784
- In case when it is not possible to return designated types (e.g. when
785
- any element of input is before Timestamp.min or after Timestamp.max)
786
- return will have datetime.datetime type (or corresponding
787
- array/Series).
799
+ Return type depends on input (types in parenthesis correspond to
800
+ fallback in case of unsuccessful timezone or out-of-range timestamp
801
+ parsing):
802
+
803
+ - scalar: :class:`Timestamp` (or :class:`datetime.datetime`)
804
+ - array-like: :class:`DatetimeIndex` (or :class:`Series` with
805
+ :class:`object` dtype containing :class:`datetime.datetime`)
806
+ - Series: :class:`Series` of :class:`datetime64` dtype (or
807
+ :class:`Series` of :class:`object` dtype containing
808
+ :class:`datetime.datetime`)
809
+ - DataFrame: :class:`Series` of :class:`datetime64` dtype (or
810
+ :class:`Series` of :class:`object` dtype containing
811
+ :class:`datetime.datetime`)
812
+
813
+ Raises
814
+ ------
815
+ ParserError
816
+ When parsing a date from string fails.
817
+ ValueError
818
+ When another datetime conversion error happens. For example when one
819
+ of 'year', 'month', day' columns is missing in a :class:`DataFrame`, or
820
+ when a Timezone-aware :class:`datetime.datetime` is found in an array-like
821
+ of mixed time offsets, and ``utc=False``.
788
822
789
823
See Also
790
824
--------
791
825
DataFrame.astype : Cast argument to a specified dtype.
792
826
to_timedelta : Convert argument to timedelta.
793
827
convert_dtypes : Convert dtypes.
794
828
829
+ Notes
830
+ -----
831
+
832
+ Many input types are supported, and lead to different output types:
833
+
834
+ - **scalars** can be int, float, str, datetime object (from stdlib :mod:`datetime`
835
+ module or :mod:`numpy`). They are converted to :class:`Timestamp` when
836
+ possible, otherwise they are converted to :class:`datetime.datetime`.
837
+ None/NaN/null scalars are converted to :const:`NaT`.
838
+
839
+ - **array-like** can contain int, float, str, datetime objects. They are
840
+ converted to :class:`DatetimeIndex` when possible, otherwise they are
841
+ converted to :class:`Index` with :class:`object` dtype, containing
842
+ :class:`datetime.datetime`. None/NaN/null entries are converted to
843
+ :const:`NaT` in both cases.
844
+
845
+ - **Series** are converted to :class:`Series` with :class:`datetime64`
846
+ dtype when possible, otherwise they are converted to :class:`Series` with
847
+ :class:`object` dtype, containing :class:`datetime.datetime`. None/NaN/null
848
+ entries are converted to :const:`NaT` in both cases.
849
+
850
+ - **DataFrame/dict-like** are converted to :class:`Series` with
851
+ :class:`datetime64` dtype. For each row a datetime is created from assembling
852
+ the various dataframe columns. Column keys can be common abbreviations
853
+ like [‘year’, ‘month’, ‘day’, ‘minute’, ‘second’, ‘ms’, ‘us’, ‘ns’]) or
854
+ plurals of the same.
855
+
856
+ The following causes are responsible for :class:`datetime.datetime` objects
857
+ being returned (possibly inside an :class:`Index` or a :class:`Series` with
858
+ :class:`object` dtype) instead of a proper pandas designated type
859
+ (:class:`Timestamp`, :class:`DatetimeIndex` or :class:`Series`
860
+ with :class:`datetime64` dtype):
861
+
862
+ - when any input element is before :const:`Timestamp.min` or after
863
+ :const:`Timestamp.max`, see `timestamp limitations
864
+ <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html
865
+ #timeseries-timestamp-limits>`_.
866
+
867
+ - when ``utc=False`` (default) and the input is an array-like or
868
+ :class:`Series` containing mixed naive/aware datetime, or aware with mixed
869
+ time offsets. Note that this happens in the (quite frequent) situation when
870
+ the timezone has a daylight savings policy. In that case you may wish to
871
+ use ``utc=True``.
872
+
795
873
Examples
796
874
--------
797
- Assembling a datetime from multiple columns of a DataFrame. The keys can be
798
- common abbreviations like ['year', 'month', 'day', 'minute', 'second',
875
+
876
+ **Handling various input formats**
877
+
878
+ Assembling a datetime from multiple columns of a :class:`DataFrame`. The keys
879
+ can be common abbreviations like ['year', 'month', 'day', 'minute', 'second',
799
880
'ms', 'us', 'ns']) or plurals of the same
800
881
801
882
>>> df = pd.DataFrame({'year': [2015, 2016],
@@ -806,20 +887,7 @@ def to_datetime(
806
887
1 2016-03-05
807
888
dtype: datetime64[ns]
808
889
809
- If a date does not meet the `timestamp limitations
810
- <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html
811
- #timeseries-timestamp-limits>`_, passing errors='ignore'
812
- will return the original input instead of raising any exception.
813
-
814
- Passing errors='coerce' will force an out-of-bounds date to NaT,
815
- in addition to forcing non-dates (or non-parseable dates) to NaT.
816
-
817
- >>> pd.to_datetime('13000101', format='%Y%m%d', errors='ignore')
818
- datetime.datetime(1300, 1, 1, 0, 0)
819
- >>> pd.to_datetime('13000101', format='%Y%m%d', errors='coerce')
820
- NaT
821
-
822
- Passing infer_datetime_format=True can often-times speedup a parsing
890
+ Passing ``infer_datetime_format=True`` can often-times speedup a parsing
823
891
if its not an ISO8601 format exactly, but in a regular format.
824
892
825
893
>>> s = pd.Series(['3/11/2000', '3/12/2000', '3/13/2000'] * 1000)
@@ -854,16 +922,99 @@ def to_datetime(
854
922
DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'],
855
923
dtype='datetime64[ns]', freq=None)
856
924
857
- In case input is list-like and the elements of input are of mixed
858
- timezones, return will have object type Index if utc=False.
925
+ **Non-convertible date/times**
926
+
927
+ If a date does not meet the `timestamp limitations
928
+ <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html
929
+ #timeseries-timestamp-limits>`_, passing ``errors='ignore'``
930
+ will return the original input instead of raising any exception.
931
+
932
+ Passing ``errors='coerce'`` will force an out-of-bounds date to :const:`NaT`,
933
+ in addition to forcing non-dates (or non-parseable dates) to :const:`NaT`.
934
+
935
+ >>> pd.to_datetime('13000101', format='%Y%m%d', errors='ignore')
936
+ datetime.datetime(1300, 1, 1, 0, 0)
937
+ >>> pd.to_datetime('13000101', format='%Y%m%d', errors='coerce')
938
+ NaT
939
+
940
+ .. _to_datetime_tz_examples:
941
+
942
+ **Timezones and time offsets**
943
+
944
+ The default behaviour (``utc=False``) is as follows:
945
+
946
+ - Timezone-naive inputs are converted to timezone-naive :class:`DatetimeIndex`:
947
+
948
+ >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 13:00:15'])
949
+ DatetimeIndex(['2018-10-26 12:00:00', '2018-10-26 13:00:15'],
950
+ dtype='datetime64[ns]', freq=None)
951
+
952
+ - Timezone-aware inputs *with constant time offset* are converted to
953
+ timezone-aware :class:`DatetimeIndex`:
954
+
955
+ >>> pd.to_datetime(['2018-10-26 12:00 -0500', '2018-10-26 13:00 -0500'])
956
+ DatetimeIndex(['2018-10-26 12:00:00-05:00', '2018-10-26 13:00:00-05:00'],
957
+ dtype='datetime64[ns, pytz.FixedOffset(-300)]', freq=None)
958
+
959
+ - However, timezone-aware inputs *with mixed time offsets* (for example
960
+ issued from a timezone with daylight savings, such as Europe/Paris)
961
+ are **not successfully converted** to a :class:`DatetimeIndex`. Instead a
962
+ simple :class:`Index` containing :class:`datetime.datetime` objects is
963
+ returned:
859
964
860
- >>> pd.to_datetime(['2018-10-26 12:00 -0530', '2018-10-26 12:00 -0500'])
861
- Index([2018-10-26 12:00:00-05:30, 2018-10-26 12:00:00-05:00], dtype='object')
965
+ >>> pd.to_datetime(['2020-10-25 02:00 +0200', '2020-10-25 04:00 +0100'])
966
+ Index([2020-10-25 02:00:00+02:00, 2020-10-25 04:00:00+01:00],
967
+ dtype='object')
968
+
969
+ - A mix of timezone-aware and timezone-naive inputs is converted to
970
+ a timezone-aware :class:`DatetimeIndex` if the offsets of the timezone-aware
971
+ are constant:
972
+
973
+ >>> from datetime import datetime
974
+ >>> pd.to_datetime(["2020-01-01 01:00 -01:00", datetime(2020, 1, 1, 3, 0)])
975
+ DatetimeIndex(['2020-01-01 01:00:00-01:00', '2020-01-01 02:00:00-01:00'],
976
+ dtype='datetime64[ns, pytz.FixedOffset(-60)]', freq=None)
977
+
978
+ - Finally, mixing timezone-aware strings and :class:`datetime.datetime` always
979
+ raises an error, even if the elements all have the same time offset.
980
+
981
+ >>> from datetime import datetime, timezone, timedelta
982
+ >>> d = datetime(2020, 1, 1, 18, tzinfo=timezone(-timedelta(hours=1)))
983
+ >>> pd.to_datetime(["2020-01-01 17:00 -0100", d])
984
+ Traceback (most recent call last):
985
+ ...
986
+ ValueError: Tz-aware datetime.datetime cannot be converted to datetime64
987
+ unless utc=True
988
+
989
+ |
990
+
991
+ Setting ``utc=True`` solves most of the above issues:
992
+
993
+ - Timezone-naive inputs are *localized* as UTC
994
+
995
+ >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 13:00'], utc=True)
996
+ DatetimeIndex(['2018-10-26 12:00:00+00:00', '2018-10-26 13:00:00+00:00'],
997
+ dtype='datetime64[ns, UTC]', freq=None)
998
+
999
+ - Timezone-aware inputs are *converted* to UTC (the output represents the
1000
+ exact same datetime, but viewed from the UTC time offset `+00:00`).
862
1001
863
1002
>>> pd.to_datetime(['2018-10-26 12:00 -0530', '2018-10-26 12:00 -0500'],
864
1003
... utc=True)
865
1004
DatetimeIndex(['2018-10-26 17:30:00+00:00', '2018-10-26 17:00:00+00:00'],
866
1005
dtype='datetime64[ns, UTC]', freq=None)
1006
+
1007
+ - Inputs can contain both naive and aware, string or datetime, the above
1008
+ rules still apply
1009
+
1010
+ >>> pd.to_datetime(['2018-10-26 12:00', '2018-10-26 12:00 -0530',
1011
+ ... datetime(2020, 1, 1, 18),
1012
+ ... datetime(2020, 1, 1, 18,
1013
+ ... tzinfo=timezone(-timedelta(hours=1)))],
1014
+ ... utc=True)
1015
+ DatetimeIndex(['2018-10-26 12:00:00+00:00', '2018-10-26 17:30:00+00:00',
1016
+ '2020-01-01 18:00:00+00:00', '2020-01-01 19:00:00+00:00'],
1017
+ dtype='datetime64[ns, UTC]', freq=None)
867
1018
"""
868
1019
if arg is None :
869
1020
return None
@@ -968,9 +1119,9 @@ def _assemble_from_unit_mappings(arg, errors, tz):
968
1119
arg : DataFrame
969
1120
errors : {'ignore', 'raise', 'coerce'}, default 'raise'
970
1121
971
- - If 'raise', then invalid parsing will raise an exception
972
- - If 'coerce', then invalid parsing will be set as NaT
973
- - If 'ignore', then invalid parsing will return the input
1122
+ - If :const:` 'raise'` , then invalid parsing will raise an exception
1123
+ - If :const:` 'coerce'` , then invalid parsing will be set as :const:` NaT`
1124
+ - If :const:` 'ignore'` , then invalid parsing will return the input
974
1125
tz : None or 'utc'
975
1126
976
1127
Returns
0 commit comments