Skip to content

WIP: prototype for unit support #10349 #17153

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 9 commits into from
18 changes: 18 additions & 0 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
ABCIndexClass)
from .inference import is_string_like
from .inference import * # noqa
from .costum_dtypes import NumpyDtypeWithMetadata, NumpyDtypeWithMetadataType


_POSSIBLY_CAST_DTYPES = set([np.dtype(t).name
Expand Down Expand Up @@ -508,6 +509,15 @@ def is_categorical_dtype(arr_or_dtype):
return CategoricalDtype.is_dtype(arr_or_dtype)


def is_numpy_dtype_with_metadata(arr_or_dtype):
"""
Check whether the array or dtype is an instance of NumpyDtypeWithMetadata
"""
if arr_or_dtype is None:
return False
return NumpyDtypeWithMetadata.is_dtype(arr_or_dtype)


def is_string_dtype(arr_or_dtype):
"""
Check whether the provided array or dtype is of the string dtype.
Expand Down Expand Up @@ -1615,6 +1625,8 @@ def is_extension_type(arr):
return True
elif is_datetimetz(arr):
return True
elif is_numpy_dtype_with_metadata(arr):
return True
return False


Expand Down Expand Up @@ -1717,6 +1729,8 @@ def _get_dtype(arr_or_dtype):
return arr_or_dtype
elif isinstance(arr_or_dtype, IntervalDtype):
return arr_or_dtype
elif isinstance(arr_or_dtype, NumpyDtypeWithMetadata):
return arr_or_dtype
elif isinstance(arr_or_dtype, string_types):
if is_categorical_dtype(arr_or_dtype):
return CategoricalDtype.construct_from_string(arr_or_dtype)
Expand Down Expand Up @@ -1762,6 +1776,8 @@ def _get_dtype_type(arr_or_dtype):
return IntervalDtypeType
elif isinstance(arr_or_dtype, PeriodDtype):
return PeriodDtypeType
elif isinstance(arr_or_dtype, NumpyDtypeWithMetadata):
return NumpyDtypeWithMetadataType
elif isinstance(arr_or_dtype, string_types):
if is_categorical_dtype(arr_or_dtype):
return CategoricalDtypeType
Expand Down Expand Up @@ -1879,6 +1895,8 @@ def pandas_dtype(dtype):

if isinstance(dtype, DatetimeTZDtype):
return dtype
elif isinstance(dtype, NumpyDtypeWithMetadata):
return dtype
elif isinstance(dtype, PeriodDtype):
return dtype
elif isinstance(dtype, CategoricalDtype):
Expand Down
187 changes: 187 additions & 0 deletions pandas/core/dtypes/costum_dtypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
"""
This module contains an interface that external libraries can use to define
their own dtypes compatible with pandas (but NOT NUMPY).
"""

import numpy as np

from .dtypes import ExtensionDtype

class NumpyDtypeWithMetadataType(type): # Do we need this?
"""
The type of NumpyDtypeWithMetadata
"""
pass

class NumpyDtypeWithMetadata(ExtensionDtype):

"""
An ExtentionDtype for data where the data
can be stored in a numpy dtype, but the dtype itself
contains meta-data and may redefine arithmetic operations.

To properly implement caching behaviour,
you might have to implement a __new__ method.
"""
type = NumpyDtypeWithMetadataType
# What attributes should be stored during pickling?
# If this is provided, you usually do not have to
# override __getstate__
_metadata = []

def base(self):
"""
In what numpy-compatible dtype the actual data is stored.

Example: np.dtype('f8')
"""
raise NotImplementedError("'base' must be implemented by subclass "
"(probably as class-level variable)")


@classmethod
def construct_from_string(cls, string):
""" attempt to construct this type from a string, raise a TypeError if
it's not possible """
raise NotImplementedError("'construct_from_string' must be implemented by subclass.")

def operation_typecompatible(self, operation_name, other_dtype, is_left=True):
"""
Is the desired operation possible between this dtype and other_dtype?

Parameters
----------
opertation_name: The name of the desired operation, e.g. '__eq__'
other_dtype: The dtype of the other operand
is_left: If this dtype is on the left-hand side of the binary operation.

Returns
-------
Boolean or NotImplemented
"""
return False

def get_operation_wrapper(self):
"""
This is called by `pandas.ops._Op.get_op` to get an object
responsible for type-coercion (which should have the same interface as _Op)

Returns
-------
A class implementing the same interface as pandas.ops._Op or None
It should return None, if the default _Op class should be used.
"""
return None

def to_dtype(self, data):
"""
Convert arbitrary data to this dtype.

Override this, if you need any additional conversions.

Parameters
----------
data: array-like

Returns
-------
An numpy array with the same dtype as self.base
"""
return np.asarray(data, dtype = self.base)

class AlwaysSame(NumpyDtypeWithMetadata):
"""
This is an example how a library could implement a
subclass of NumpyDtypeWithMetadata, but is it (except for testing)
not useful for anything else.
"""
_metadata = [ "_target_value", "base"]
def __new__(cls, target_value=None):
if target_value is None:
#We are unpickling
return object.__new__(cls)
try:
return cls._cache[target_value]
except KeyError:
d = object.__new__(cls)
d._target_value = target_value
# In this case, we set the base numpy dtype upon object construction.
d.base = np.dtype(type(target_value)) #Raises, if target_value is not a simple number
cls._cache[target_value] = d
return d

def __hash__(self):
return hash(self._target_value)

def __unicode__(self):
return "always[{}]".format(repr(self._target_value))

def __setstate__(self, state):
try:
self._target_value = state["_target_value"]
except KeyError:
print("state", state)
raise
self.base = np.dtype(type(self._target_value))

def __eq__(self, other):
if not isinstance(other, AlwaysSame):
return NotImplemented
return self._target_value == other._target_value

def to_dtype(self, data):
"""
Fill the array with the target value.
"""
# Since performance is irrelevant for this Test-dtype, we
# do not try to modify data in-place
data = np.ones(np.shape(data), dtype=self.base)
data = data*self._target_value
return data

def get_operation_wrapper(self):
"""
This is called by `pandas.ops._Op.get_op` to get an object
responsible for type-coercion (which should have the same interface as _Op)

Returns
-------
A class implementing the same interface as pandas.ops._Op or None
It should return None, if the default _Op class should be used.
"""
class AlwaysSameOp():
dtype = None
fill_value = self._target_value
def __init__(self, left, right, name, na_op):
self.left = left
self.right = right

self.name = name
self.na_op = na_op

# Here, a conversion of left and right to lvalues and rvalues could take place.
# lvalues must be a type that has the desired operator defined.
self.lvalues = left
self.rvalues = right
return None
def wrap_results(self, results):
print("l,r ", type(self.left), type(self.right))
# Comparison operators return dtype bool.
if self.name in ["__eq__", "__lt__", "__gt__", "__ge__", "__le__", "__ne__"]:
return results
# All other operators return dtype AlwaysSame
if isinstance(self.left.dtype, AlwaysSame):
target_dtype = self.left.dtype
else:
assert isinstance(self.right.dtype, AlwaysSame)
target_dtype = self.right.dtype
return target_dtype.to_dtype(results)
return AlwaysSameOp

def operation_typecompatible(self, name, other, is_left=True):
if isinstance(other, AlwaysSame):
if other._target_value != self._target_value:
if type(other) != AlwaysSame:
return NotImplemented #Allow
return False
return True
28 changes: 27 additions & 1 deletion pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
is_datetimelike_v_numeric,
is_integer_dtype, is_categorical_dtype,
is_object_dtype, is_timedelta64_dtype,
is_numpy_dtype_with_metadata,
is_datetime64_dtype, is_datetime64tz_dtype,
is_bool_dtype, is_datetimetz,
is_list_like,
Expand Down Expand Up @@ -330,12 +331,37 @@ def get_op(cls, left, right, name, na_op):
is_datetime_lhs = (is_datetime64_dtype(left) or
is_datetime64tz_dtype(left))

if is_numpy_dtype_with_metadata(left):
left_compatible = left.dtype.operation_typecompatible(name, right.dtype, is_left=True)
if left_compatible is not NotImplemented:
if left_compatible:
op_class = left.dtype.get_operation_wrapper()
if op_class is not None:
return op_class(left, right, name, na_op)
else:
return _Op(left, right, name, na_op)
else:
raise TypeError("Operation {} not permitted between "
"dtype {} and type {}".format(name, left.dtype,
right.dtype))
# left is either not a NumpyDtypeWithMetadata or did not implement the Operation.
if is_numpy_dtype_with_metadata(right):
if right.dtype.operation_typecompatible(name, left.dtype, is_left=False):
op_class = right.dtype.get_operation_wrapper()
if op_class is not None:
return op_class(left, right, name, na_op)
else:
return _Op(left, right, name, na_op)
else:
raise TypeError("Operation {} not permitted between "
"dtype {} and type {}".format(left.dtype,
right.dtype))
# No NumpyDtypeWithMetadata involved.
if not (is_datetime_lhs or is_timedelta_lhs):
return _Op(left, right, name, na_op)
else:
return _TimeOp(left, right, name, na_op)


class _TimeOp(_Op):
"""
Wrapper around Series datetime/time/timedelta arithmetic operations.
Expand Down
27 changes: 21 additions & 6 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
is_datetimelike,
is_datetime64tz_dtype,
is_timedelta64_dtype,
is_numpy_dtype_with_metadata,
is_list_like,
is_hashable,
is_iterator,
Expand Down Expand Up @@ -150,6 +151,7 @@ class Series(base.IndexOpsMixin, strings.StringAccessorMixin,

def __init__(self, data=None, index=None, dtype=None, name=None,
copy=False, fastpath=False):
print("__init__: data = {}, dtype = {}".format(repr(data), repr(dtype)))

# we are called internally, so short-circuit
if fastpath:
Expand Down Expand Up @@ -262,7 +264,10 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
data = SingleBlockManager(data, index, fastpath=True)

generic.NDFrame.__init__(self, data, fastpath=True)

if is_numpy_dtype_with_metadata(dtype):
self._extension_dtype = dtype
else:
self._extension_dtype = None
self.name = name
self._set_axis(0, index, fastpath=True)

Expand Down Expand Up @@ -343,7 +348,10 @@ def name(self, value):
@property
def dtype(self):
""" return the dtype object of the underlying data """
return self._data.dtype
if self._extension_dtype is None:
return self._data.dtype
else:
return self._extension_dtype

@property
def dtypes(self):
Expand Down Expand Up @@ -2990,7 +2998,6 @@ def _sanitize_array(data, index, dtype=None, copy=False,

if dtype is not None:
dtype = pandas_dtype(dtype)

if isinstance(data, ma.MaskedArray):
mask = ma.getmaskarray(data)
if mask.any():
Expand All @@ -3005,11 +3012,13 @@ def _try_cast(arr, take_fast_path):
if take_fast_path:
if maybe_castable(arr) and not copy and dtype is None:
return arr

try:
subarr = maybe_cast_to_datetime(arr, dtype)
if not is_extension_type(subarr):
if not is_extension_type(subarr) and not is_extension_type(dtype):
subarr = np.array(subarr, dtype=dtype, copy=copy)
else:
if is_numpy_dtype_with_metadata(dtype):
subarr = dtype.to_dtype(subarr)
except (ValueError, TypeError):
if is_categorical_dtype(dtype):
subarr = Categorical(arr)
Expand Down Expand Up @@ -3056,6 +3065,7 @@ def _try_cast(arr, take_fast_path):
if dtype is not None:
try:
subarr = _try_cast(data, False)

except Exception:
if raise_cast_failure: # pragma: no cover
raise
Expand All @@ -3077,16 +3087,21 @@ def create_from_value(value, index, dtype):
subarr = DatetimeIndex([value] * len(index), dtype=dtype)
elif is_categorical_dtype(dtype):
subarr = Categorical([value] * len(index))
elif is_numpy_dtype_with_metadata(dtype):
subarr = np.empty(len(index), dtype=dtype.base)
subarr.fill(value)
subarr = dtype.to_dtype(subarr)
else:
if not isinstance(dtype, (np.dtype, type(np.dtype))):
dtype = dtype.dtype
subarr = np.empty(len(index), dtype=dtype)
subarr.fill(value)

return subarr

# scalar like, GH
print(type(subarr), dir(subarr))
if getattr(subarr, 'ndim', 0) == 0:
print("subarr", type(subarr), subarr)
if isinstance(data, list): # pragma: no cover
subarr = np.array(data, dtype=object)
elif index is not None:
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,8 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
2 1960-01-04

"""

print("to datetime. Box is {}".format(box))
from pandas.core.indexes.datetimes import DatetimeIndex

tz = 'utc' if utc else None
Expand Down
Loading