diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index a12d5b64bb06c..2a7cd0eac04a6 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -8,6 +8,7 @@ from pandas._libs import NaT, Timedelta, iNaT, join as libjoin, lib from pandas._libs.tslibs import timezones +from pandas._libs.tslibs.parsing import DateParseError from pandas._typing import Label from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError @@ -41,7 +42,6 @@ from pandas.core.indexes.numeric import Int64Index from pandas.core.ops import get_op_result_name from pandas.core.sorting import ensure_key_mapped -from pandas.core.tools.datetimes import DateParseError from pandas.core.tools.timedeltas import to_timedelta from pandas.tseries.offsets import DateOffset, Tick diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 9cdd27143d3cb..6f1614d050cad 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -27,7 +27,7 @@ from pandas.core.indexes.base import Index, InvalidIndexError, maybe_extract_name from pandas.core.indexes.datetimelike import DatetimeTimedeltaMixin from pandas.core.indexes.extension import inherit_names -import pandas.core.tools.datetimes as tools +from pandas.core.tools.times import to_time from pandas.tseries.frequencies import to_offset from pandas.tseries.offsets import prefix_mapping @@ -778,8 +778,8 @@ def indexer_between_time( indexer_at_time : Get index locations of values at particular time of day. DataFrame.between_time : Select values between particular times of day. """ - start_time = tools.to_time(start_time) - end_time = tools.to_time(end_time) + start_time = to_time(start_time) + end_time = to_time(end_time) time_micros = self._get_time_micros() start_micros = _time_to_micros(start_time) end_micros = _time_to_micros(end_time) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 68fe709ba56d8..14922000c9707 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -7,7 +7,7 @@ from pandas._libs.lib import no_default from pandas._libs.tslibs import Period from pandas._libs.tslibs.frequencies import get_freq_group -from pandas._libs.tslibs.parsing import parse_time_string +from pandas._libs.tslibs.parsing import DateParseError, parse_time_string from pandas._typing import DtypeObj, Label from pandas.util._decorators import Appender, cache_readonly, doc @@ -43,7 +43,6 @@ from pandas.core.indexes.extension import inherit_names from pandas.core.indexes.numeric import Int64Index from pandas.core.ops import get_op_result_name -from pandas.core.tools.datetimes import DateParseError from pandas.tseries.offsets import DateOffset, Tick diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 829dd7f7b94c8..42bffa0374472 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -1,8 +1,9 @@ from collections import abc -from datetime import datetime, time +from datetime import datetime from functools import partial from itertools import islice -from typing import List, Optional, TypeVar, Union +from typing import TYPE_CHECKING, Optional, TypeVar, Union +import warnings import numpy as np @@ -28,28 +29,31 @@ is_numeric_dtype, is_scalar, ) -from pandas.core.dtypes.generic import ( - ABCDataFrame, - ABCDatetimeIndex, - ABCIndex, - ABCIndexClass, - ABCSeries, -) +from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries from pandas.core.dtypes.missing import notna from pandas.arrays import DatetimeArray, IntegerArray from pandas.core import algorithms from pandas.core.algorithms import unique -from pandas.core.arrays.datetimes import tz_to_dtype +from pandas.core.arrays.datetimes import ( + maybe_convert_dtype, + objects_to_datetime64ns, + tz_to_dtype, +) +from pandas.core.indexes.base import Index +from pandas.core.indexes.datetimes import DatetimeIndex + +if TYPE_CHECKING: + from pandas import Series # noqa:F401 # --------------------------------------------------------------------- # types used in annotations -ArrayConvertible = Union[list, tuple, ArrayLike, ABCSeries] +ArrayConvertible = Union[list, tuple, ArrayLike, "Series"] Scalar = Union[int, float, str] DatetimeScalar = TypeVar("DatetimeScalar", Scalar, datetime) DatetimeScalarOrArrayConvertible = Union[ - DatetimeScalar, list, tuple, ArrayLike, ABCSeries + DatetimeScalar, list, tuple, ArrayLike, "Series" ] @@ -156,7 +160,7 @@ def _maybe_cache(arg, format, cache, convert_listlike): def _box_as_indexlike( dt_array: ArrayLike, utc: Optional[bool] = None, name: Optional[str] = None -) -> Union[ABCIndex, ABCDatetimeIndex]: +) -> Index: """ Properly boxes the ndarray of datetimes to DatetimeIndex if it is possible or to generic Index instead @@ -176,7 +180,6 @@ def _box_as_indexlike( - DatetimeIndex if convertible to sole datetime64 type - general Index otherwise """ - from pandas import DatetimeIndex, Index if is_datetime64_dtype(dt_array): tz = "utc" if utc else None @@ -186,9 +189,9 @@ def _box_as_indexlike( def _convert_and_box_cache( arg: DatetimeScalarOrArrayConvertible, - cache_array: ABCSeries, + cache_array: "Series", name: Optional[str] = None, -) -> ABCIndexClass: +) -> "Index": """ Convert array of dates with a cache and wrap the result in an Index. @@ -235,7 +238,6 @@ def _return_parsed_timezone_results(result, timezones, tz, name): if tz is not None: # Convert to the same tz tz_results = np.array([tz_result.tz_convert(tz) for tz_result in tz_results]) - from pandas import Index return Index(tz_results, name=name) @@ -281,11 +283,6 @@ def _convert_listlike_datetimes( ------- Index-like of parsed dates """ - from pandas import DatetimeIndex - from pandas.core.arrays.datetimes import ( - maybe_convert_dtype, - objects_to_datetime64ns, - ) if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype="O") @@ -332,7 +329,6 @@ def _convert_listlike_datetimes( ) if errors == "ignore": - from pandas import Index result = Index(result, name=name) else: @@ -366,8 +362,6 @@ def _convert_listlike_datetimes( result = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg)) return DatetimeIndex(result, name=name) elif errors == "ignore": - from pandas import Index - result = Index(arg, name=name) return result raise @@ -539,9 +533,7 @@ def _adjust_to_origin(arg, origin, unit): offset = offset // tslibs.Timedelta(1, unit=unit) # scalars & ndarray-like can handle the addition - if is_list_like(arg) and not isinstance( - arg, (ABCSeries, ABCIndexClass, np.ndarray) - ): + if is_list_like(arg) and not isinstance(arg, (ABCSeries, Index, np.ndarray)): arg = np.asarray(arg) arg = arg + offset return arg @@ -749,7 +741,7 @@ def to_datetime( result = arg._constructor(values, index=arg.index, name=arg.name) elif isinstance(arg, (ABCDataFrame, abc.MutableMapping)): result = _assemble_from_unit_mappings(arg, errors, tz) - elif isinstance(arg, ABCIndexClass): + elif isinstance(arg, Index): cache_array = _maybe_cache(arg, format, cache, convert_listlike) if not cache_array.empty: result = _convert_and_box_cache(arg, cache_array, name=arg.name) @@ -944,131 +936,14 @@ def calc_with_mask(carg, mask): return None -# Fixed time formats for time parsing -_time_formats = [ - "%H:%M", - "%H%M", - "%I:%M%p", - "%I%M%p", - "%H:%M:%S", - "%H%M%S", - "%I:%M:%S%p", - "%I%M%S%p", -] - - -def _guess_time_format_for_array(arr): - # Try to guess the format based on the first non-NaN element - non_nan_elements = notna(arr).nonzero()[0] - if len(non_nan_elements): - element = arr[non_nan_elements[0]] - for time_format in _time_formats: - try: - datetime.strptime(element, time_format) - return time_format - except ValueError: - pass - - return None - - def to_time(arg, format=None, infer_time_format=False, errors="raise"): - """ - Parse time strings to time objects using fixed strptime formats ("%H:%M", - "%H%M", "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p", - "%I%M%S%p") - - Use infer_time_format if all the strings are in the same format to speed - up conversion. - - Parameters - ---------- - arg : string in time format, datetime.time, list, tuple, 1-d array, Series - format : str, default None - Format used to convert arg into a time object. If None, fixed formats - are used. - infer_time_format: bool, default False - Infer the time format based on the first non-NaN element. If all - strings are in the same format, this will speed up conversion. - errors : {'ignore', 'raise', 'coerce'}, default 'raise' - - If 'raise', then invalid parsing will raise an exception - - If 'coerce', then invalid parsing will be set as None - - If 'ignore', then invalid parsing will return the input - - Returns - ------- - datetime.time - """ - - def _convert_listlike(arg, format): - - if isinstance(arg, (list, tuple)): - arg = np.array(arg, dtype="O") - - elif getattr(arg, "ndim", 1) > 1: - raise TypeError( - "arg must be a string, datetime, list, tuple, 1-d array, or Series" - ) - - arg = ensure_object(arg) - - if infer_time_format and format is None: - format = _guess_time_format_for_array(arg) - - times: List[Optional[time]] = [] - if format is not None: - for element in arg: - try: - times.append(datetime.strptime(element, format).time()) - except (ValueError, TypeError) as err: - if errors == "raise": - msg = ( - f"Cannot convert {element} to a time with given " - f"format {format}" - ) - raise ValueError(msg) from err - elif errors == "ignore": - return arg - else: - times.append(None) - else: - formats = _time_formats[:] - format_found = False - for element in arg: - time_object = None - for time_format in formats: - try: - time_object = datetime.strptime(element, time_format).time() - if not format_found: - # Put the found format in front - fmt = formats.pop(formats.index(time_format)) - formats.insert(0, fmt) - format_found = True - break - except (ValueError, TypeError): - continue - - if time_object is not None: - times.append(time_object) - elif errors == "raise": - raise ValueError(f"Cannot convert arg {arg} to a time") - elif errors == "ignore": - return arg - else: - times.append(None) - - return times - - if arg is None: - return arg - elif isinstance(arg, time): - return arg - elif isinstance(arg, ABCSeries): - values = _convert_listlike(arg._values, format) - return arg._constructor(values, index=arg.index, name=arg.name) - elif isinstance(arg, ABCIndexClass): - return _convert_listlike(arg, format) - elif is_list_like(arg): - return _convert_listlike(arg, format) + # GH#34145 + warnings.warn( + "`to_time` has been moved, should be imported from pandas.core.tools.times. " + "This alias will be removed in a future version.", + FutureWarning, + stacklevel=2, + ) + from pandas.core.tools.times import to_time - return _convert_listlike(np.array([arg]), format)[0] + return to_time(arg, format, infer_time_format, errors) diff --git a/pandas/core/tools/times.py b/pandas/core/tools/times.py new file mode 100644 index 0000000000000..3bac4cf0edb63 --- /dev/null +++ b/pandas/core/tools/times.py @@ -0,0 +1,141 @@ +from datetime import datetime, time +from typing import List, Optional + +import numpy as np + +from pandas._libs.lib import is_list_like + +from pandas.core.dtypes.generic import ABCSeries +from pandas.core.dtypes.missing import notna + +from pandas.core.indexes.base import Index + + +def to_time(arg, format=None, infer_time_format=False, errors="raise"): + """ + Parse time strings to time objects using fixed strptime formats ("%H:%M", + "%H%M", "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p", + "%I%M%S%p") + + Use infer_time_format if all the strings are in the same format to speed + up conversion. + + Parameters + ---------- + arg : string in time format, datetime.time, list, tuple, 1-d array, Series + format : str, default None + Format used to convert arg into a time object. If None, fixed formats + are used. + infer_time_format: bool, default False + Infer the time format based on the first non-NaN element. If all + strings are in the same format, this will speed up conversion. + errors : {'ignore', 'raise', 'coerce'}, default 'raise' + - If 'raise', then invalid parsing will raise an exception + - If 'coerce', then invalid parsing will be set as None + - If 'ignore', then invalid parsing will return the input + + Returns + ------- + datetime.time + """ + + def _convert_listlike(arg, format): + + if isinstance(arg, (list, tuple)): + arg = np.array(arg, dtype="O") + + elif getattr(arg, "ndim", 1) > 1: + raise TypeError( + "arg must be a string, datetime, list, tuple, 1-d array, or Series" + ) + + arg = np.asarray(arg, dtype="O") + + if infer_time_format and format is None: + format = _guess_time_format_for_array(arg) + + times: List[Optional[time]] = [] + if format is not None: + for element in arg: + try: + times.append(datetime.strptime(element, format).time()) + except (ValueError, TypeError) as err: + if errors == "raise": + msg = ( + f"Cannot convert {element} to a time with given " + f"format {format}" + ) + raise ValueError(msg) from err + elif errors == "ignore": + return arg + else: + times.append(None) + else: + formats = _time_formats[:] + format_found = False + for element in arg: + time_object = None + for time_format in formats: + try: + time_object = datetime.strptime(element, time_format).time() + if not format_found: + # Put the found format in front + fmt = formats.pop(formats.index(time_format)) + formats.insert(0, fmt) + format_found = True + break + except (ValueError, TypeError): + continue + + if time_object is not None: + times.append(time_object) + elif errors == "raise": + raise ValueError(f"Cannot convert arg {arg} to a time") + elif errors == "ignore": + return arg + else: + times.append(None) + + return times + + if arg is None: + return arg + elif isinstance(arg, time): + return arg + elif isinstance(arg, ABCSeries): + values = _convert_listlike(arg._values, format) + return arg._constructor(values, index=arg.index, name=arg.name) + elif isinstance(arg, Index): + return _convert_listlike(arg, format) + elif is_list_like(arg): + return _convert_listlike(arg, format) + + return _convert_listlike(np.array([arg]), format)[0] + + +# Fixed time formats for time parsing +_time_formats = [ + "%H:%M", + "%H%M", + "%I:%M%p", + "%I%M%p", + "%H:%M:%S", + "%H%M%S", + "%I:%M:%S%p", + "%I%M%S%p", +] + + +def _guess_time_format_for_array(arr): + # Try to guess the format based on the first non-NaN element + non_nan_elements = notna(arr).nonzero()[0] + if len(non_nan_elements): + element = arr[non_nan_elements[0]] + for time_format in _time_formats: + try: + datetime.strptime(element, time_format) + return time_format + except ValueError: + pass + + return None diff --git a/pandas/tests/tools/test_to_time.py b/pandas/tests/tools/test_to_time.py index 937570d89fb77..bfd347fd122c3 100644 --- a/pandas/tests/tools/test_to_time.py +++ b/pandas/tests/tools/test_to_time.py @@ -7,7 +7,8 @@ from pandas import Series import pandas._testing as tm -from pandas.core.tools.datetimes import to_time +from pandas.core.tools.datetimes import to_time as to_time_alias +from pandas.core.tools.times import to_time class TestToTime: @@ -57,3 +58,12 @@ def test_parsers_time(self): res = to_time(np.array(arg)) assert isinstance(res, list) assert res == expected_arr + + +def test_to_time_alias(): + expected = time(14, 15) + + with tm.assert_produces_warning(FutureWarning): + result = to_time_alias(expected) + + assert result == expected