Skip to content

REF: separate to_time, avoid runtime imports #34145

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 18, 2020
2 changes: 1 addition & 1 deletion pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from pandas._libs import NaT, Timedelta, iNaT, join as libjoin, lib
from pandas._libs.tslibs import timezones
from pandas._libs.tslibs.parsing import DateParseError
from pandas._typing import Label
from pandas.compat.numpy import function as nv
from pandas.errors import AbstractMethodError
Expand Down Expand Up @@ -41,7 +42,6 @@
from pandas.core.indexes.numeric import Int64Index
from pandas.core.ops import get_op_result_name
from pandas.core.sorting import ensure_key_mapped
from pandas.core.tools.datetimes import DateParseError
from pandas.core.tools.timedeltas import to_timedelta

from pandas.tseries.offsets import DateOffset, Tick
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from pandas.core.indexes.base import Index, InvalidIndexError, maybe_extract_name
from pandas.core.indexes.datetimelike import DatetimeTimedeltaMixin
from pandas.core.indexes.extension import inherit_names
import pandas.core.tools.datetimes as tools
from pandas.core.tools.times import to_time

from pandas.tseries.frequencies import Resolution, to_offset
from pandas.tseries.offsets import prefix_mapping
Expand Down Expand Up @@ -777,8 +777,8 @@ def indexer_between_time(
indexer_at_time : Get index locations of values at particular time of day.
DataFrame.between_time : Select values between particular times of day.
"""
start_time = tools.to_time(start_time)
end_time = tools.to_time(end_time)
start_time = to_time(start_time)
end_time = to_time(end_time)
time_micros = self._get_time_micros()
start_micros = _time_to_micros(start_time)
end_micros = _time_to_micros(end_time)
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from pandas._libs import index as libindex
from pandas._libs.lib import no_default
from pandas._libs.tslibs import Period, frequencies as libfrequencies, resolution
from pandas._libs.tslibs.parsing import parse_time_string
from pandas._libs.tslibs.parsing import DateParseError, parse_time_string
from pandas._typing import DtypeObj, Label
from pandas.util._decorators import Appender, cache_readonly, doc

Expand Down Expand Up @@ -42,7 +42,6 @@
from pandas.core.indexes.extension import inherit_names
from pandas.core.indexes.numeric import Int64Index
from pandas.core.ops import get_op_result_name
from pandas.core.tools.datetimes import DateParseError

from pandas.tseries import frequencies
from pandas.tseries.offsets import DateOffset, Tick
Expand Down
185 changes: 30 additions & 155 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from collections import abc
from datetime import datetime, time
from datetime import datetime
from functools import partial
from itertools import islice
from typing import List, Optional, TypeVar, Union
from typing import TYPE_CHECKING, Optional, TypeVar, Union
import warnings

import numpy as np

Expand All @@ -28,28 +29,31 @@
is_numeric_dtype,
is_scalar,
)
from pandas.core.dtypes.generic import (
ABCDataFrame,
ABCDatetimeIndex,
ABCIndex,
ABCIndexClass,
ABCSeries,
)
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
from pandas.core.dtypes.missing import notna

from pandas.arrays import DatetimeArray, IntegerArray
from pandas.core import algorithms
from pandas.core.algorithms import unique
from pandas.core.arrays.datetimes import tz_to_dtype
from pandas.core.arrays.datetimes import (
maybe_convert_dtype,
objects_to_datetime64ns,
tz_to_dtype,
)
from pandas.core.indexes.base import Index
from pandas.core.indexes.datetimes import DatetimeIndex

if TYPE_CHECKING:
from pandas import Series # noqa:F401

# ---------------------------------------------------------------------
# types used in annotations

ArrayConvertible = Union[list, tuple, ArrayLike, ABCSeries]
ArrayConvertible = Union[list, tuple, ArrayLike, "Series"]
Scalar = Union[int, float, str]
DatetimeScalar = TypeVar("DatetimeScalar", Scalar, datetime)
DatetimeScalarOrArrayConvertible = Union[
DatetimeScalar, list, tuple, ArrayLike, ABCSeries
DatetimeScalar, list, tuple, ArrayLike, "Series"
]


Expand Down Expand Up @@ -156,7 +160,7 @@ def _maybe_cache(arg, format, cache, convert_listlike):

def _box_as_indexlike(
dt_array: ArrayLike, utc: Optional[bool] = None, name: Optional[str] = None
) -> Union[ABCIndex, ABCDatetimeIndex]:
) -> Index:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@simonjayhawkins do we have a way of annotating a return type is either an Index or DatetimeIndex, but no other Index subclass?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not that i'm aware of. If Index subclasses obey Liskov, shouldn't be an issue. why is this needed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this needed?

Not "needed", but i guess its a more-specific-is-better thing

"""
Properly boxes the ndarray of datetimes to DatetimeIndex
if it is possible or to generic Index instead
Expand All @@ -176,7 +180,6 @@ def _box_as_indexlike(
- DatetimeIndex if convertible to sole datetime64 type
- general Index otherwise
"""
from pandas import DatetimeIndex, Index

if is_datetime64_dtype(dt_array):
tz = "utc" if utc else None
Expand All @@ -186,9 +189,9 @@ def _box_as_indexlike(

def _convert_and_box_cache(
arg: DatetimeScalarOrArrayConvertible,
cache_array: ABCSeries,
cache_array: "Series",
name: Optional[str] = None,
) -> ABCIndexClass:
) -> "Index":
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

quotes no longer needed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch. will edit in follow-up

"""
Convert array of dates with a cache and wrap the result in an Index.

Expand Down Expand Up @@ -235,7 +238,6 @@ def _return_parsed_timezone_results(result, timezones, tz, name):
if tz is not None:
# Convert to the same tz
tz_results = np.array([tz_result.tz_convert(tz) for tz_result in tz_results])
from pandas import Index

return Index(tz_results, name=name)

Expand Down Expand Up @@ -281,11 +283,6 @@ def _convert_listlike_datetimes(
-------
Index-like of parsed dates
"""
from pandas import DatetimeIndex
from pandas.core.arrays.datetimes import (
maybe_convert_dtype,
objects_to_datetime64ns,
)

if isinstance(arg, (list, tuple)):
arg = np.array(arg, dtype="O")
Expand Down Expand Up @@ -332,7 +329,6 @@ def _convert_listlike_datetimes(
)

if errors == "ignore":
from pandas import Index

result = Index(result, name=name)
else:
Expand Down Expand Up @@ -366,8 +362,6 @@ def _convert_listlike_datetimes(
result = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg))
return DatetimeIndex(result, name=name)
elif errors == "ignore":
from pandas import Index

result = Index(arg, name=name)
return result
raise
Expand Down Expand Up @@ -539,9 +533,7 @@ def _adjust_to_origin(arg, origin, unit):
offset = offset // tslibs.Timedelta(1, unit=unit)

# scalars & ndarray-like can handle the addition
if is_list_like(arg) and not isinstance(
arg, (ABCSeries, ABCIndexClass, np.ndarray)
):
if is_list_like(arg) and not isinstance(arg, (ABCSeries, Index, np.ndarray)):
arg = np.asarray(arg)
arg = arg + offset
return arg
Expand Down Expand Up @@ -749,7 +741,7 @@ def to_datetime(
result = arg._constructor(values, index=arg.index, name=arg.name)
elif isinstance(arg, (ABCDataFrame, abc.MutableMapping)):
result = _assemble_from_unit_mappings(arg, errors, tz)
elif isinstance(arg, ABCIndexClass):
elif isinstance(arg, Index):
cache_array = _maybe_cache(arg, format, cache, convert_listlike)
if not cache_array.empty:
result = _convert_and_box_cache(arg, cache_array, name=arg.name)
Expand Down Expand Up @@ -944,131 +936,14 @@ def calc_with_mask(carg, mask):
return None

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you leave to_time here and show a FutureWarning (I know at least ibis is using this)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated per request


# Fixed time formats for time parsing
_time_formats = [
"%H:%M",
"%H%M",
"%I:%M%p",
"%I%M%p",
"%H:%M:%S",
"%H%M%S",
"%I:%M:%S%p",
"%I%M%S%p",
]


def _guess_time_format_for_array(arr):
# Try to guess the format based on the first non-NaN element
non_nan_elements = notna(arr).nonzero()[0]
if len(non_nan_elements):
element = arr[non_nan_elements[0]]
for time_format in _time_formats:
try:
datetime.strptime(element, time_format)
return time_format
except ValueError:
pass

return None


def to_time(arg, format=None, infer_time_format=False, errors="raise"):
"""
Parse time strings to time objects using fixed strptime formats ("%H:%M",
"%H%M", "%I:%M%p", "%I%M%p", "%H:%M:%S", "%H%M%S", "%I:%M:%S%p",
"%I%M%S%p")

Use infer_time_format if all the strings are in the same format to speed
up conversion.

Parameters
----------
arg : string in time format, datetime.time, list, tuple, 1-d array, Series
format : str, default None
Format used to convert arg into a time object. If None, fixed formats
are used.
infer_time_format: bool, default False
Infer the time format based on the first non-NaN element. If all
strings are in the same format, this will speed up conversion.
errors : {'ignore', 'raise', 'coerce'}, default 'raise'
- If 'raise', then invalid parsing will raise an exception
- If 'coerce', then invalid parsing will be set as None
- If 'ignore', then invalid parsing will return the input

Returns
-------
datetime.time
"""

def _convert_listlike(arg, format):

if isinstance(arg, (list, tuple)):
arg = np.array(arg, dtype="O")

elif getattr(arg, "ndim", 1) > 1:
raise TypeError(
"arg must be a string, datetime, list, tuple, 1-d array, or Series"
)

arg = ensure_object(arg)

if infer_time_format and format is None:
format = _guess_time_format_for_array(arg)

times: List[Optional[time]] = []
if format is not None:
for element in arg:
try:
times.append(datetime.strptime(element, format).time())
except (ValueError, TypeError) as err:
if errors == "raise":
msg = (
f"Cannot convert {element} to a time with given "
f"format {format}"
)
raise ValueError(msg) from err
elif errors == "ignore":
return arg
else:
times.append(None)
else:
formats = _time_formats[:]
format_found = False
for element in arg:
time_object = None
for time_format in formats:
try:
time_object = datetime.strptime(element, time_format).time()
if not format_found:
# Put the found format in front
fmt = formats.pop(formats.index(time_format))
formats.insert(0, fmt)
format_found = True
break
except (ValueError, TypeError):
continue

if time_object is not None:
times.append(time_object)
elif errors == "raise":
raise ValueError(f"Cannot convert arg {arg} to a time")
elif errors == "ignore":
return arg
else:
times.append(None)

return times

if arg is None:
return arg
elif isinstance(arg, time):
return arg
elif isinstance(arg, ABCSeries):
values = _convert_listlike(arg._values, format)
return arg._constructor(values, index=arg.index, name=arg.name)
elif isinstance(arg, ABCIndexClass):
return _convert_listlike(arg, format)
elif is_list_like(arg):
return _convert_listlike(arg, format)
# GH#34145
warnings.warn(
"`to_time` has been moved, should be imported from pandas.core.tools.times. "
"This alias will be removed in a future version.",
FutureWarning,
stacklevel=2,
)
from pandas.core.tools.times import to_time

return _convert_listlike(np.array([arg]), format)[0]
return to_time(arg, format, infer_time_format, errors)
Loading