Skip to content

REF: mix PeriodPseudoDtype into PeriodDtype #34590

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pandas/_libs/tslibs/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
__all__ = [
"dtypes",
"localize_pydatetime",
"NaT",
"NaTType",
Expand All @@ -17,7 +18,7 @@
"to_offset",
]


from . import dtypes # type: ignore
from .conversion import localize_pydatetime
from .nattype import NaT, NaTType, iNaT, is_null_datetimelike, nat_strings
from .np_datetime import OutOfBoundsDatetime
Expand Down
4 changes: 3 additions & 1 deletion pandas/_libs/tslibs/dtypes.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ cdef enum PeriodDtypeCode:
U = 11000 # Microsecondly
N = 12000 # Nanosecondly

UNDEFINED = -10_000

cdef class PeriodPseudoDtype:

cdef class PeriodDtypeBase:
cdef readonly:
PeriodDtypeCode dtype_code
6 changes: 3 additions & 3 deletions pandas/_libs/tslibs/dtypes.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# originals


cdef class PeriodPseudoDtype:
cdef class PeriodDtypeBase:
"""
Similar to an actual dtype, this contains all of the information
describing a PeriodDtype in an integer code.
Expand All @@ -14,9 +14,9 @@ cdef class PeriodPseudoDtype:
self.dtype_code = code

def __eq__(self, other):
if not isinstance(other, PeriodPseudoDtype):
if not isinstance(other, PeriodDtypeBase):
return False
if not isinstance(self, PeriodPseudoDtype):
if not isinstance(self, PeriodDtypeBase):
# cython semantics, this is a reversed op
return False
return self.dtype_code == other.dtype_code
Expand Down
4 changes: 1 addition & 3 deletions pandas/_libs/tslibs/offsets.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2528,12 +2528,10 @@ cdef class Week(SingleConstructorOffset):
-------
result : DatetimeIndex
"""
from .frequencies import get_freq_code # TODO: avoid circular import

i8other = dtindex.asi8
off = (i8other % DAY_NANOS).view("timedelta64[ns]")

base, mult = get_freq_code(self.freqstr)
base = self._period_dtype_code
base_period = dtindex.to_period(base)

if self.n > 0:
Expand Down
8 changes: 4 additions & 4 deletions pandas/_libs/tslibs/period.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ from pandas._libs.tslibs.ccalendar cimport (
)
from pandas._libs.tslibs.ccalendar cimport c_MONTH_NUMBERS

from pandas._libs.tslibs.dtypes cimport PeriodPseudoDtype
from pandas._libs.tslibs.dtypes cimport PeriodDtypeBase

from pandas._libs.tslibs.frequencies cimport (
attrname_to_abbrevs,
Expand Down Expand Up @@ -1517,7 +1517,7 @@ cdef class _Period:

cdef readonly:
int64_t ordinal
PeriodPseudoDtype _dtype
PeriodDtypeBase _dtype
BaseOffset freq

def __cinit__(self, int64_t ordinal, BaseOffset freq):
Expand All @@ -1526,7 +1526,7 @@ cdef class _Period:
# Note: this is more performant than PeriodDtype.from_date_offset(freq)
# because from_date_offset cannot be made a cdef method (until cython
# supported cdef classmethods)
self._dtype = PeriodPseudoDtype(freq._period_dtype_code)
self._dtype = PeriodDtypeBase(freq._period_dtype_code)

@classmethod
def _maybe_convert_freq(cls, object freq) -> BaseOffset:
Expand Down Expand Up @@ -2463,7 +2463,7 @@ class Period(_Period):
raise ValueError(msg)

if ordinal is None:
base, mult = get_freq_code(freq)
base, _ = get_freq_code(freq)
ordinal = period_ordinal(dt.year, dt.month, dt.day,
dt.hour, dt.minute, dt.second,
dt.microsecond, 0, base)
Expand Down
16 changes: 8 additions & 8 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@

def _field_accessor(name: str, docstring=None):
def f(self):
base, _ = libfrequencies.get_freq_code(self.freq)
base = self.freq._period_dtype_code
result = get_period_field_arr(name, self.asi8, base)
return result

Expand Down Expand Up @@ -440,12 +440,12 @@ def to_timestamp(self, freq=None, how="start"):
return (self + self.freq).to_timestamp(how="start") - adjust

if freq is None:
base, mult = libfrequencies.get_freq_code(self.freq)
base = self.freq._period_dtype_code
freq = libfrequencies.get_to_timestamp_base(base)
else:
freq = Period._maybe_convert_freq(freq)

base, mult = libfrequencies.get_freq_code(freq)
base, _ = libfrequencies.get_freq_code(freq)
new_data = self.asfreq(freq, how=how)

new_data = libperiod.periodarr_to_dt64arr(new_data.asi8, base)
Expand Down Expand Up @@ -523,14 +523,14 @@ def asfreq(self, freq=None, how: str = "E") -> "PeriodArray":

freq = Period._maybe_convert_freq(freq)

base1, mult1 = libfrequencies.get_freq_code(self.freq)
base2, mult2 = libfrequencies.get_freq_code(freq)
base1 = self.freq._period_dtype_code
base2 = freq._period_dtype_code

asi8 = self.asi8
# mult1 can't be negative or 0
# self.freq.n can't be negative or 0
end = how == "E"
if end:
ordinal = asi8 + mult1 - 1
ordinal = asi8 + self.freq.n - 1
else:
ordinal = asi8

Expand Down Expand Up @@ -950,7 +950,7 @@ def dt64arr_to_periodarr(data, freq, tz=None):
if isinstance(data, (ABCIndexClass, ABCSeries)):
data = data._values

base, mult = libfrequencies.get_freq_code(freq)
base = freq._period_dtype_code
return libperiod.dt64arr_to_periodarr(data.view("i8"), base, tz), freq


Expand Down
15 changes: 10 additions & 5 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import pytz

from pandas._libs.interval import Interval
from pandas._libs.tslibs import NaT, Period, Timestamp, timezones, to_offset
from pandas._libs.tslibs import NaT, Period, Timestamp, dtypes, timezones, to_offset
from pandas._libs.tslibs.offsets import BaseOffset
from pandas._typing import DtypeObj, Ordered

Expand Down Expand Up @@ -848,7 +848,7 @@ def __setstate__(self, state) -> None:


@register_extension_dtype
class PeriodDtype(PandasExtensionDtype):
class PeriodDtype(dtypes.PeriodDtypeBase, PandasExtensionDtype):
"""
An ExtensionDtype for Period data.

Expand Down Expand Up @@ -896,7 +896,8 @@ def __new__(cls, freq=None):

elif freq is None:
# empty constructor for pickle compat
u = object.__new__(cls)
# -10_000 corresponds to PeriodDtypeCode.UNDEFINED
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this actually hit in tests? what is the point of UNDEFINED? I mean, when does this become defined

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. We have a test that constructs PeriodDtype() which goes through this path. the existing comment a couple lines up suggests this is for pickle compat

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right, how does this get resolved though? do you see periods with undefined after this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think the idea is to use it as a placeholder until you can get a "real" PeriodDtype. e.g. if you know that you are parsing a csv column to Period but dont yet know the freq. just speculating here

u = dtypes.PeriodDtypeBase.__new__(cls, -10_000)
u._freq = None
return u

Expand All @@ -906,11 +907,15 @@ def __new__(cls, freq=None):
try:
return cls._cache[freq.freqstr]
except KeyError:
u = object.__new__(cls)
dtype_code = freq._period_dtype_code
u = dtypes.PeriodDtypeBase.__new__(cls, dtype_code)
u._freq = freq
cls._cache[freq.freqstr] = u
return u

def __reduce__(self):
return type(self), (self.freq,)

@property
def freq(self):
"""
Expand Down Expand Up @@ -977,7 +982,7 @@ def __eq__(self, other: Any) -> bool:
return isinstance(other, PeriodDtype) and self.freq == other.freq

def __setstate__(self, state):
# for pickle compat. __get_state__ is defined in the
# for pickle compat. __getstate__ is defined in the
# PandasExtensionDtype superclass and uses the public properties to
# pickle -> need to set the settable private ones here (see GH26067)
self._freq = state["freq"]
Expand Down
4 changes: 2 additions & 2 deletions pandas/plotting/_matplotlib/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np

from pandas._libs.tslibs import Period, to_offset
from pandas._libs.tslibs.frequencies import FreqGroup, base_and_stride, get_freq_code
from pandas._libs.tslibs.frequencies import FreqGroup, base_and_stride
from pandas._typing import FrameOrSeriesUnion

from pandas.core.dtypes.generic import (
Expand Down Expand Up @@ -213,7 +213,7 @@ def _use_dynamic_x(ax, data: "FrameOrSeriesUnion") -> bool:

# FIXME: hack this for 0.10.1, creating more technical debt...sigh
if isinstance(data.index, ABCDatetimeIndex):
base = get_freq_code(freq)[0]
base = to_offset(freq)._period_dtype_code
x = data.index
if base <= FreqGroup.FR_DAY:
return x[:1].is_normalized
Expand Down
6 changes: 5 additions & 1 deletion pandas/tests/dtypes/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,11 @@ def test_pickle(self, dtype):

# force back to the cache
result = tm.round_trip_pickle(dtype)
assert not len(dtype._cache)
if not isinstance(dtype, PeriodDtype):
# Because PeriodDtype has a cython class as a base class,
# it has different pickle semantics, and its cache is re-populated
# on un-pickling.
assert not len(dtype._cache)
assert result == dtype


Expand Down