Skip to content

DOC: better document Dtypes docstrings + avoid sphinx warnings #26067

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
39 changes: 39 additions & 0 deletions doc/source/reference/arrays.rst
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,11 @@ If the data are tz-aware, then every value in the array must have the same timez
:toctree: api/

arrays.DatetimeArray

.. autosummary::
:toctree: api/
:template: autosummary/class_without_autosummary.rst

DatetimeTZDtype

.. _api.arrays.timedelta:
Expand Down Expand Up @@ -260,6 +265,11 @@ Every period in a ``PeriodArray`` must have the same ``freq``.
:toctree: api/

arrays.PeriodArray

.. autosummary::
:toctree: api/
:template: autosummary/class_without_autosummary.rst

PeriodDtype

.. _api.arrays.interval:
Expand Down Expand Up @@ -296,6 +306,11 @@ A collection of intervals may be stored in an :class:`arrays.IntervalArray`.
:toctree: api/

arrays.IntervalArray

.. autosummary::
:toctree: api/
:template: autosummary/class_without_autosummary.rst

IntervalDtype

.. _api.arrays.integer_na:
Expand All @@ -310,6 +325,11 @@ Pandas provides this through :class:`arrays.IntegerArray`.
:toctree: api/

arrays.IntegerArray

.. autosummary::
:toctree: api/
:template: autosummary/class_without_autosummary.rst

Int8Dtype
Int16Dtype
Int32Dtype
Expand Down Expand Up @@ -396,8 +416,27 @@ be stored efficiently as a :class:`SparseArray`.
:toctree: api/

SparseArray

.. autosummary::
:toctree: api/
:template: autosummary/class_without_autosummary.rst

SparseDtype

The ``Series.sparse`` accessor may be used to access sparse-specific attributes
and methods if the :class:`Series` contains sparse values. See
:ref:`api.series.sparse` for more.



.. Dtype attributes which are manually listed in their docstrings: including
.. it here to make sure a docstring page is built for them

..
.. autosummary::
:toctree: api/

DatetimeTZDtype.unit
DatetimeTZDtype.tz
PeriodDtype.freq
IntervalDtype.subtype
14 changes: 13 additions & 1 deletion pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,6 +682,17 @@ def integer_arithmetic_method(self, other):

module = sys.modules[__name__]

_dtype_docstring = """
An ExtensionDtype for {dtype} integer data.

Attributes
----------
None

Methods
-------
None
"""

# create the Dtype
_dtypes = {}
Expand All @@ -695,7 +706,8 @@ def integer_arithmetic_method(self, other):
classname = "{}Dtype".format(name)
numpy_dtype = getattr(np, dtype)
attributes_dict = {'type': numpy_dtype,
'name': name}
'name': name,
'__doc__': _dtype_docstring.format(dtype=dtype)}
dtype_type = register_extension_dtype(
type(classname, (_IntegerDtype, ), attributes_dict)
)
Expand Down
8 changes: 8 additions & 0 deletions pandas/core/arrays/sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,14 @@ class SparseDtype(ExtensionDtype):
=========== ==========

The default value may be overridden by specifying a `fill_value`.

Attributes
----------
None

Methods
-------
None
"""
# We include `_is_na_fill_value` in the metadata to avoid hash collisions
# between SparseDtype(float, 0.0) and SparseDtype(float, nan).
Expand Down
154 changes: 110 additions & 44 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ class CategoricalDtypeType(type):
@register_extension_dtype
class CategoricalDtype(PandasExtensionDtype, ExtensionDtype):
"""
Type for categorical data with the categories and orderedness
Type for categorical data with the categories and orderedness.

.. versionchanged:: 0.21.0

Expand Down Expand Up @@ -334,6 +334,9 @@ def _finalize(self, categories, ordered, fastpath=False):
self._ordered = ordered

def __setstate__(self, state):
# for pickle compat. __get_state__ is defined in the
# PandasExtensionDtype superclass and uses the public properties to
# pickle -> need to set the settable private ones here (see GH26067)
self._categories = state.pop('categories', None)
self._ordered = state.pop('ordered', False)

Expand Down Expand Up @@ -570,13 +573,40 @@ def _is_boolean(self):

@register_extension_dtype
class DatetimeTZDtype(PandasExtensionDtype, ExtensionDtype):

"""
A np.dtype duck-typed class, suitable for holding a custom datetime with tz
dtype.
An ExtensionDtype for timezone-aware datetime data.

**This is not an actual numpy dtype**, but a duck type.

Parameters
----------
unit : str, default "ns"
The precision of the datetime data. Currently limited
to ``"ns"``.
tz : str, int, or datetime.tzinfo
The timezone.

Attributes
----------
unit
tz

Methods
-------
None

THIS IS NOT A REAL NUMPY DTYPE, but essentially a sub-class of
np.datetime64[ns]
Raises
------
pytz.UnknownTimeZoneError
When the requested timezone cannot be found.

Examples
--------
>>> pd.DatetimeTZDtype(tz='UTC')
datetime64[ns, UTC]

>>> pd.DatetimeTZDtype(tz='dateutil/US/Central')
datetime64[ns, tzfile('/usr/share/zoneinfo/US/Central')]
"""
type = Timestamp # type: Type[Timestamp]
kind = 'M' # type: str_type
Expand All @@ -589,30 +619,6 @@ class DatetimeTZDtype(PandasExtensionDtype, ExtensionDtype):
_cache = {} # type: Dict[str_type, PandasExtensionDtype]

def __init__(self, unit="ns", tz=None):
"""
An ExtensionDtype for timezone-aware datetime data.

Parameters
----------
unit : str, default "ns"
The precision of the datetime data. Currently limited
to ``"ns"``.
tz : str, int, or datetime.tzinfo
The timezone.

Raises
------
pytz.UnknownTimeZoneError
When the requested timezone cannot be found.

Examples
--------
>>> pd.core.dtypes.dtypes.DatetimeTZDtype(tz='UTC')
datetime64[ns, UTC]

>>> pd.core.dtypes.dtypes.DatetimeTZDtype(tz='dateutil/US/Central')
datetime64[ns, tzfile('/usr/share/zoneinfo/US/Central')]
"""
if isinstance(unit, DatetimeTZDtype):
unit, tz = unit.unit, unit.tz

Expand Down Expand Up @@ -718,17 +724,40 @@ def __eq__(self, other):
str(self.tz) == str(other.tz))

def __setstate__(self, state):
# for pickle compat.
# for pickle compat. __get_state__ is defined in the
# PandasExtensionDtype superclass and uses the public properties to
# pickle -> need to set the settable private ones here (see GH26067)
self._tz = state['tz']
self._unit = state['unit']


@register_extension_dtype
class PeriodDtype(ExtensionDtype, PandasExtensionDtype):
"""
A Period duck-typed class, suitable for holding a period with freq dtype.
An ExtensionDtype for Period data.

**This is not an actual numpy dtype**, but a duck type.

Parameters
----------
freq : str or DateOffset
The frequency of this PeriodDtype

Attributes
----------
freq

THIS IS NOT A REAL NUMPY DTYPE, but essentially a sub-class of np.int64.
Methods
-------
None

Examples
--------
>>> pd.PeriodDtype(freq='D')
period[D]

>>> pd.PeriodDtype(freq=pd.offsets.MonthEnd())
period[M]
"""
type = Period # type: Type[Period]
kind = 'O' # type: str_type
Expand All @@ -751,7 +780,9 @@ def __new__(cls, freq=None):

elif freq is None:
# empty constructor for pickle compat
return object.__new__(cls)
u = object.__new__(cls)
u._freq = None
return u

if not isinstance(freq, ABCDateOffset):
freq = cls._parse_dtype_strict(freq)
Expand All @@ -760,10 +791,15 @@ def __new__(cls, freq=None):
return cls._cache[freq.freqstr]
except KeyError:
u = object.__new__(cls)
u.freq = freq
u._freq = freq
cls._cache[freq.freqstr] = u
return u

@property
def freq(self):
"""The frequency object of this PeriodDtype."""
return self._freq

@classmethod
def _parse_dtype_strict(cls, freq):
if isinstance(freq, str):
Expand Down Expand Up @@ -817,6 +853,12 @@ def __eq__(self, other):

return isinstance(other, PeriodDtype) and self.freq == other.freq

def __setstate__(self, state):
# for pickle compat. __get_state__ is defined in the
# PandasExtensionDtype superclass and uses the public properties to
# pickle -> need to set the settable private ones here (see GH26067)
self._freq = state['freq']

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we have a round-trip on this test? shouldn't you also need __getstate__? or maybe not as you are setting _freq which is now good.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

__getstate__ is implemented in the super class:

def __getstate__(self):
# pickle support; we don't want to pickle the cache
return {k: getattr(self, k, None) for k in self._metadata}

Added in 154a647

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And it's the round trip test that was failing

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, what if you move _freq into metdata then? (and so on for the other subclasses)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I have been thinking about that as well. _metadata is also used for other things though (that are included in the EA interface, this __getstate__ is only for our internal dtypes). Eg for equality. For equality it would not matter if freq or _freq is included, but if we would want to use it eg also for a default repr, then it need to be the public attributes

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, yeah they are coupled. i don't really like having to be explicit about the pickle compat here.....

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i don't really like having to be explicit about the pickle compat here.....

Yep, I agree. Although it is consistent with how it is already done in the other dtypes (Categorical actually has the same), it would be nice to clean it up. I could also add a different _metadata_pickles list, so we can handle both getstate and setstate in the PandasExtensionDtype superclass. Of course that doesn't change the implementation, but it does reduce some duplication in the code.

@classmethod
def is_dtype(cls, dtype):
"""
Expand Down Expand Up @@ -849,9 +891,27 @@ def construct_array_type(cls):
@register_extension_dtype
class IntervalDtype(PandasExtensionDtype, ExtensionDtype):
"""
A Interval duck-typed class, suitable for holding an interval
An ExtensionDtype for Interval data.

THIS IS NOT A REAL NUMPY DTYPE
**This is not an actual numpy dtype**, but a duck type.

Parameters
----------
subtype : str, np.dtype
The dtype of the Interval bounds.

Attributes
----------
subtype

Methods
-------
None

Examples
--------
>>> pd.IntervalDtype(subtype='int64')
interval[int64]
"""
name = 'interval'
kind = None # type: Optional[str_type]
Expand All @@ -863,11 +923,6 @@ class IntervalDtype(PandasExtensionDtype, ExtensionDtype):
_cache = {} # type: Dict[str_type, PandasExtensionDtype]

def __new__(cls, subtype=None):
"""
Parameters
----------
subtype : the dtype of the Interval
"""
from pandas.core.dtypes.common import (
is_categorical_dtype, is_string_dtype, pandas_dtype)

Expand All @@ -877,7 +932,7 @@ def __new__(cls, subtype=None):
# we are called as an empty constructor
# generally for pickle compat
u = object.__new__(cls)
u.subtype = None
u._subtype = None
return u
elif (isinstance(subtype, str) and
subtype.lower() == 'interval'):
Expand All @@ -903,10 +958,15 @@ def __new__(cls, subtype=None):
return cls._cache[str(subtype)]
except KeyError:
u = object.__new__(cls)
u.subtype = subtype
u._subtype = subtype
cls._cache[str(subtype)] = u
return u

@property
def subtype(self):
"""The dtype of the Interval bounds."""
return self._subtype

@classmethod
def construct_array_type(cls):
"""
Expand Down Expand Up @@ -963,6 +1023,12 @@ def __eq__(self, other):
from pandas.core.dtypes.common import is_dtype_equal
return is_dtype_equal(self.subtype, other.subtype)

def __setstate__(self, state):
# for pickle compat. __get_state__ is defined in the
# PandasExtensionDtype superclass and uses the public properties to
# pickle -> need to set the settable private ones here (see GH26067)
self._subtype = state['subtype']

@classmethod
def is_dtype(cls, dtype):
"""
Expand Down