-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
REF/API: DatetimeTZDtype #23990
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
REF/API: DatetimeTZDtype #23990
Changes from 7 commits
1ca7fa4
2fa4bb0
7e6d8ea
9e4faf8
ad2723c
e0b7b77
6cc9ce5
7ab2a74
c14b45f
10d2c8a
50e1aeb
c23c9e2
e3616e9
6d626e6
42fe484
22699f1
d89a6cc
c82999d
a9b929a
5cde369
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,8 @@ | ||
""" define extension dtypes """ | ||
|
||
import re | ||
|
||
import numpy as np | ||
import pytz | ||
|
||
from pandas._libs.interval import Interval | ||
from pandas._libs.tslibs import NaT, Period, Timestamp, timezones | ||
|
@@ -483,67 +483,63 @@ class DatetimeTZDtype(PandasExtensionDtype): | |
str = '|M8[ns]' | ||
num = 101 | ||
base = np.dtype('M8[ns]') | ||
na_value = NaT | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what is this needed for? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Part of the ExtensionDtype interface. Slipped in here. |
||
_metadata = ('unit', 'tz') | ||
_match = re.compile(r"(datetime64|M8)\[(?P<unit>.+), (?P<tz>.+)\]") | ||
_cache = {} | ||
# TODO: restore caching? who cares though? It seems needlessly complex. | ||
# np.dtype('datetime64[ns]') isn't a singleton | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is a huge performance penalty w/o caching There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did you see the perf numbers I posted in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. try running a good part of the test suite. Its the repeated consruction that's a problem, not the single contruction which is fine. W/o caching you end up creating a huge number of these There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. guess could remove the comment now |
||
|
||
def __new__(cls, unit=None, tz=None): | ||
""" Create a new unit if needed, otherwise return from the cache | ||
def __init__(self, unit="ns", tz=None): | ||
""" | ||
An ExtensionDtype for timezone-aware datetime data. | ||
|
||
Parameters | ||
---------- | ||
unit : string unit that this represents, currently must be 'ns' | ||
tz : string tz that this represents | ||
""" | ||
unit : str, default "ns" | ||
The precision of the datetime data. Currently limited | ||
to ``"ns"``. | ||
tz : str, int, or datetime.tzinfo | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
The timezone. | ||
|
||
Raises | ||
------ | ||
pytz.UnknownTimeZoneError | ||
When the requested timezone cannot be found. | ||
|
||
Examples | ||
-------- | ||
>>> pd.core.dtypes.dtypes.DatetimeTZDtype(tz='UTC') | ||
datetime64[ns, UTC] | ||
|
||
>>> pd.core.dtypes.dtypes.DatetimeTZDtype(tz='dateutil/US/Central') | ||
datetime64[ns, tzfile('/usr/share/zoneinfo/US/Central')] | ||
""" | ||
if isinstance(unit, DatetimeTZDtype): | ||
unit, tz = unit.unit, unit.tz | ||
|
||
elif unit is None: | ||
# we are called as an empty constructor | ||
# generally for pickle compat | ||
return object.__new__(cls) | ||
if unit != 'ns': | ||
raise ValueError("DatetimeTZDtype only supports ns units") | ||
|
||
if tz: | ||
tz = timezones.maybe_get_tz(tz) | ||
elif tz is not None: | ||
raise pytz.UnknownTimeZoneError(tz) | ||
elif tz is None: | ||
raise TypeError("A 'tz' is required.") | ||
|
||
# we were passed a string that we can construct | ||
try: | ||
m = cls._match.search(unit) | ||
if m is not None: | ||
unit = m.groupdict()['unit'] | ||
tz = timezones.maybe_get_tz(m.groupdict()['tz']) | ||
except TypeError: | ||
raise ValueError("could not construct DatetimeTZDtype") | ||
|
||
elif isinstance(unit, compat.string_types): | ||
|
||
if unit != 'ns': | ||
raise ValueError("DatetimeTZDtype only supports ns units") | ||
|
||
unit = unit | ||
tz = tz | ||
|
||
if tz is None: | ||
raise ValueError("DatetimeTZDtype constructor must have a tz " | ||
"supplied") | ||
self._unit = unit | ||
self._tz = tz | ||
|
||
# hash with the actual tz if we can | ||
# some cannot be hashed, so stringfy | ||
try: | ||
key = (unit, tz) | ||
hash(key) | ||
except TypeError: | ||
key = (unit, str(tz)) | ||
@property | ||
def unit(self): | ||
"""The precision of the datetime data.""" | ||
return self._unit | ||
|
||
# set/retrieve from cache | ||
try: | ||
return cls._cache[key] | ||
except KeyError: | ||
u = object.__new__(cls) | ||
u.unit = unit | ||
u.tz = tz | ||
cls._cache[key] = u | ||
return u | ||
@property | ||
def tz(self): | ||
"""The timezone.""" | ||
return self._tz | ||
|
||
@classmethod | ||
def construct_array_type(cls): | ||
|
@@ -558,24 +554,43 @@ def construct_array_type(cls): | |
|
||
@classmethod | ||
def construct_from_string(cls, string): | ||
""" attempt to construct this type from a string, raise a TypeError if | ||
it's not possible | ||
""" | ||
Construct a DatetimeTZDtype from a string. | ||
|
||
Parameters | ||
---------- | ||
string : str | ||
The string alias for this DatetimeTZDtype. | ||
Should be formatted like ``datetime64[ns, <tz>]``, | ||
where ``<tz>`` is the timezone name. | ||
|
||
Examples | ||
-------- | ||
>>> DatetimeTZDtype.construct_from_string('datetime64[ns, UTC]') | ||
datetime64[ns, UTC] | ||
""" | ||
msg = "could not construct DatetimeTZDtype""" | ||
try: | ||
return cls(unit=string) | ||
match = cls._match.match(string) | ||
if match: | ||
d = match.groupdict() | ||
return cls(unit=d['unit'], tz=d['tz']) | ||
else: | ||
raise TypeError(msg) | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
except ValueError: | ||
raise TypeError("could not construct DatetimeTZDtype") | ||
raise TypeError(msg) | ||
|
||
def __unicode__(self): | ||
# format the tz | ||
return "datetime64[{unit}, {tz}]".format(unit=self.unit, tz=self.tz) | ||
|
||
@property | ||
def name(self): | ||
"""A string representation of the dtype.""" | ||
return str(self) | ||
|
||
def __hash__(self): | ||
# make myself hashable | ||
# TODO: update this. | ||
return hash(str(self)) | ||
|
||
def __eq__(self, other): | ||
|
@@ -586,6 +601,10 @@ def __eq__(self, other): | |
self.unit == other.unit and | ||
str(self.tz) == str(other.tz)) | ||
|
||
def __getstate__(self): | ||
TomAugspurger marked this conversation as resolved.
Show resolved
Hide resolved
|
||
# for pickle compat. | ||
return self.__dict__ | ||
|
||
|
||
class PeriodDtype(ExtensionDtype, PandasExtensionDtype): | ||
""" | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why is this not cached?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Very strange behavior I couldn't figure out.
I really don't know why, but that only occurred when the tz was cached.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
right the tz itself can change depending on what date is being localized