Skip to content

Handle utc and box parameters for to_datetime #24082

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 18 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1125,6 +1125,7 @@ Deprecations
- :func:`pandas.types.is_period` is deprecated in favor of `pandas.types.is_period_dtype` (:issue:`23917`)
- :func:`pandas.types.is_datetimetz` is deprecated in favor of `pandas.types.is_datetime64tz` (:issue:`23917`)
- Creating a :class:`TimedeltaIndex` or :class:`DatetimeIndex` by passing range arguments `start`, `end`, and `periods` is deprecated in favor of :func:`timedelta_range` and :func:`date_range` (:issue:`23919`)
- Passing a string alias like ``'datetime64[ns, UTC]'`` as the `unit` parameter to :class:`DatetimeTZDtype` is deprecated. Use :class:`DatetimeTZDtype.construct_from_string` instead (:issue:`23990`).

.. _whatsnew_0240.deprecations.datetimelike_int_ops:

Expand Down
25 changes: 15 additions & 10 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1174,16 +1174,21 @@ def validate_tz_from_dtype(dtype, tz):
ValueError : on tzinfo mismatch
"""
if dtype is not None:
try:
dtype = DatetimeTZDtype.construct_from_string(dtype)
dtz = getattr(dtype, 'tz', None)
if dtz is not None:
if tz is not None and not timezones.tz_compare(tz, dtz):
raise ValueError("cannot supply both a tz and a dtype"
" with a tz")
tz = dtz
except TypeError:
pass
if isinstance(dtype, compat.string_types):
try:
dtype = DatetimeTZDtype.construct_from_string(dtype)
except TypeError:
# Things like `datetime64[ns]`, which is OK for the
# constructors, but also nonsense, which should be validated
# but not by us. We *do* allow non-existent tz errors to
# go through
pass
dtz = getattr(dtype, 'tz', None)
if dtz is not None:
if tz is not None and not timezones.tz_compare(tz, dtz):
raise ValueError("cannot supply both a tz and a dtype"
" with a tz")
tz = dtz
return tz


Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
resolution as libresolution, timezones)
import pandas.compat as compat
from pandas.errors import PerformanceWarning
from pandas.util._decorators import Appender, cache_readonly
from pandas.util._decorators import Appender

from pandas.core.dtypes.common import (
_NS_DTYPE, is_datetime64_dtype, is_datetime64tz_dtype, is_extension_type,
Expand Down Expand Up @@ -333,7 +333,7 @@ def _generate_range(cls, start, end, periods, freq, tz=None,
def _box_func(self):
return lambda x: Timestamp(x, freq=self.freq, tz=self.tz)

@cache_readonly
@property
def dtype(self):
if self.tz is None:
return _NS_DTYPE
Expand Down
32 changes: 0 additions & 32 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1767,38 +1767,6 @@ def is_complex_dtype(arr_or_dtype):
return issubclass(tipo, np.complexfloating)


def _coerce_to_dtype(dtype):
"""
Coerce a string or np.dtype to a pandas or numpy
dtype if possible.

If we cannot convert to a pandas dtype initially,
we convert to a numpy dtype.

Parameters
----------
dtype : The dtype that we want to coerce.

Returns
-------
pd_or_np_dtype : The coerced dtype.
"""

if is_categorical_dtype(dtype):
categories = getattr(dtype, 'categories', None)
ordered = getattr(dtype, 'ordered', False)
dtype = CategoricalDtype(categories=categories, ordered=ordered)
elif is_datetime64tz_dtype(dtype):
dtype = DatetimeTZDtype(dtype)
elif is_period_dtype(dtype):
dtype = PeriodDtype(dtype)
elif is_interval_dtype(dtype):
dtype = IntervalDtype(dtype)
else:
dtype = np.dtype(dtype)
return dtype


def _get_dtype(arr_or_dtype):
"""
Get the dtype instance associated with an array
Expand Down
133 changes: 81 additions & 52 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
""" define extension dtypes """

import re
import warnings

import numpy as np
import pytz

from pandas._libs.interval import Interval
from pandas._libs.tslibs import NaT, Period, Timestamp, timezones
Expand Down Expand Up @@ -491,64 +492,69 @@ class DatetimeTZDtype(PandasExtensionDtype):
_match = re.compile(r"(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]")
_cache = {}

def __new__(cls, unit=None, tz=None):
def __init__(self, unit="ns", tz=None):
"""
Create a new unit if needed, otherwise return from the cache
An ExtensionDtype for timezone-aware datetime data.

Parameters
----------
unit : string unit that this represents, currently must be 'ns'
tz : string tz that this represents
"""

if isinstance(unit, DatetimeTZDtype):
unit, tz = unit.unit, unit.tz

elif unit is None:
# we are called as an empty constructor
# generally for pickle compat
return object.__new__(cls)
unit : str, default "ns"
The precision of the datetime data. Currently limited
to ``"ns"``.
tz : str, int, or datetime.tzinfo
The timezone.

elif tz is None:
Raises
------
pytz.UnknownTimeZoneError
When the requested timezone cannot be found.

# we were passed a string that we can construct
try:
m = cls._match.search(unit)
if m is not None:
unit = m.groupdict()['unit']
tz = timezones.maybe_get_tz(m.groupdict()['tz'])
except TypeError:
raise ValueError("could not construct DatetimeTZDtype")
Examples
--------
>>> pd.core.dtypes.dtypes.DatetimeTZDtype(tz='UTC')
datetime64[ns, UTC]

elif isinstance(unit, compat.string_types):
>>> pd.core.dtypes.dtypes.DatetimeTZDtype(tz='dateutil/US/Central')
datetime64[ns, tzfile('/usr/share/zoneinfo/US/Central')]
"""
if isinstance(unit, DatetimeTZDtype):
unit, tz = unit.unit, unit.tz

if unit != 'ns':
if unit != 'ns':
if isinstance(unit, compat.string_types) and tz is None:
# maybe a string like datetime64[ns, tz], which we support for
# now.
result = type(self).construct_from_string(unit)
unit = result.unit
tz = result.tz
msg = (
"Passing a dtype alias like 'datetime64[ns, {tz}]' "
"to DatetimeTZDtype is deprecated. Use "
"'DatetimeTZDtype.construct_from_string()' instead."
)
warnings.warn(msg.format(tz=tz), FutureWarning, stacklevel=2)
else:
raise ValueError("DatetimeTZDtype only supports ns units")

unit = unit
tz = tz
if tz:
tz = timezones.maybe_get_tz(tz)
elif tz is not None:
raise pytz.UnknownTimeZoneError(tz)
elif tz is None:
raise TypeError("A 'tz' is required.")

if tz is None:
raise ValueError("DatetimeTZDtype constructor must have a tz "
"supplied")
self._unit = unit
self._tz = tz

# hash with the actual tz if we can
# some cannot be hashed, so stringfy
try:
key = (unit, tz)
hash(key)
except TypeError:
key = (unit, str(tz))
@property
def unit(self):
"""The precision of the datetime data."""
return self._unit

# set/retrieve from cache
try:
return cls._cache[key]
except KeyError:
u = object.__new__(cls)
u.unit = unit
u.tz = tz
cls._cache[key] = u
return u
@property
def tz(self):
"""The timezone."""
return self._tz

@classmethod
def construct_array_type(cls):
Expand All @@ -565,24 +571,42 @@ def construct_array_type(cls):
@classmethod
def construct_from_string(cls, string):
"""
attempt to construct this type from a string, raise a TypeError if
it's not possible
Construct a DatetimeTZDtype from a string.

Parameters
----------
string : str
The string alias for this DatetimeTZDtype.
Should be formatted like ``datetime64[ns, <tz>]``,
where ``<tz>`` is the timezone name.

Examples
--------
>>> DatetimeTZDtype.construct_from_string('datetime64[ns, UTC]')
datetime64[ns, UTC]
"""
msg = "Could not construct DatetimeTZDtype from '{}'"
try:
return cls(unit=string)
except ValueError:
raise TypeError("could not construct DatetimeTZDtype")
match = cls._match.match(string)
if match:
d = match.groupdict()
return cls(unit=d['unit'], tz=d['tz'])
except Exception:
# TODO(py3): Change this pass to `raise TypeError(msg) from e`
pass
raise TypeError(msg.format(string))

def __unicode__(self):
# format the tz
return "datetime64[{unit}, {tz}]".format(unit=self.unit, tz=self.tz)

@property
def name(self):
"""A string representation of the dtype."""
return str(self)

def __hash__(self):
# make myself hashable
# TODO: update this.
return hash(str(self))

def __eq__(self, other):
Expand All @@ -593,6 +617,11 @@ def __eq__(self, other):
self.unit == other.unit and
str(self.tz) == str(other.tz))

def __setstate__(self, state):
# for pickle compat.
self._tz = state['tz']
self._unit = state['unit']


class PeriodDtype(ExtensionDtype, PandasExtensionDtype):
"""
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def _isna_ndarraylike(obj):
vec = libmissing.isnaobj(values.ravel())
result[...] = vec.reshape(shape)

elif needs_i8_conversion(obj):
elif needs_i8_conversion(dtype):
# this is the NaT pattern
result = values.view('i8') == iNaT
else:
Expand Down
3 changes: 1 addition & 2 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2669,11 +2669,10 @@ def _astype(self, dtype, **kwargs):
these automatically copy, so copy=True has no effect
raise on an except if raise == True
"""
dtype = pandas_dtype(dtype)

# if we are passed a datetime64[ns, tz]
if is_datetime64tz_dtype(dtype):
dtype = DatetimeTZDtype(dtype)

values = self.values
if getattr(values, 'tz', None) is None:
values = DatetimeIndex(values).tz_localize('UTC')
Expand Down
8 changes: 6 additions & 2 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,12 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
values = convert_listlike(arg._values, True, format)
result = Series(values, index=arg.index, name=arg.name)
elif isinstance(arg, (ABCDataFrame, MutableMapping)):
result = _assemble_from_unit_mappings(arg, errors=errors)
if not box:
result = _assemble_from_unit_mappings(arg, errors=errors).values
else:
result = _assemble_from_unit_mappings(arg, errors=errors)
if utc:
result = result.dt.tz_localize('UTC')
elif isinstance(arg, ABCIndexClass):
cache_array = _maybe_cache(arg, format, cache, convert_listlike)
if not cache_array.empty:
Expand All @@ -588,7 +593,6 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
result = convert_listlike(arg, box, format)
else:
result = convert_listlike(np.array([arg]), box, format)[0]

return result


Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,29 @@
class TestDatetimeArrayComparisons(object):
# TODO: merge this into tests/arithmetic/test_datetime64 once it is
# sufficiently robust
def test_box(self):
df = pd.DataFrame({'year': [2015, 2016],
'month': [2, 3], 'day': [4, 5]})
res = pd.to_datetime(df, box=False)
assert isinstance(res, np.ndarray) is True
res = pd.to_datetime(df, box=True)
assert isinstance(res, np.ndarray) is False

def test_utc(self):
df = pd.DataFrame({'year': [2015, 2016],
'month': [2, 3], 'day': [4, 5]})
res = pd.to_datetime(df, utc=True)
assert str(res[0].tz) == 'UTC'
res = pd.to_datetime(df, utc=False)
assert str(res[0].tz) != 'UTC'

def test_box_to_datetime_box_False(self):
df = pd.DataFrame({'year': [2015, 2016],
'month': [2, 3], 'day': [4, 5]})
res = pd.to_datetime(df, box=False)
arr = array(['2015-02-04T00:00:00.000000000',
'2016-03-05T00:00:00.000000000'], dtype='datetime64[ns]')
assert res == arr

def test_cmp_dt64_arraylike_tznaive(self, all_compare_operators):
# arbitrary tz-naive DatetimeIndex
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/dtypes/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ def test_numpy_string_dtype(self):
'datetime64[ns, Asia/Tokyo]',
'datetime64[ns, UTC]'])
def test_datetimetz_dtype(self, dtype):
assert com.pandas_dtype(dtype) is DatetimeTZDtype(dtype)
assert com.pandas_dtype(dtype) == DatetimeTZDtype(dtype)
assert (com.pandas_dtype(dtype) ==
DatetimeTZDtype.construct_from_string(dtype))
assert com.pandas_dtype(dtype) == dtype

def test_categorical_dtype(self):
Expand Down
Loading