pandas-dev · Bernhard10 · Aug 2, 2017 · Aug 2, 2017 · Aug 2, 2017 · Aug 2, 2017
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
@@ -15,6 +15,7 @@
                       ABCIndexClass)
 from .inference import is_string_like
 from .inference import *  # noqa
+from .costum_dtypes import NumpyDtypeWithMetadata, NumpyDtypeWithMetadataType
 
 
 _POSSIBLY_CAST_DTYPES = set([np.dtype(t).name
@@ -508,6 +509,15 @@ def is_categorical_dtype(arr_or_dtype):
     return CategoricalDtype.is_dtype(arr_or_dtype)
 
 
+def is_numpy_dtype_with_metadata(arr_or_dtype):
+    """
+    Check whether the array or dtype is an instance of NumpyDtypeWithMetadata
+    """
+    if arr_or_dtype is None:
+        return False
+    return NumpyDtypeWithMetadata.is_dtype(arr_or_dtype)
+
+
 def is_string_dtype(arr_or_dtype):
     """
     Check whether the provided array or dtype is of the string dtype.
@@ -1615,6 +1625,8 @@ def is_extension_type(arr):
         return True
     elif is_datetimetz(arr):
         return True
+    elif is_numpy_dtype_with_metadata(arr):
+        return True
     return False
 
 
@@ -1717,6 +1729,8 @@ def _get_dtype(arr_or_dtype):
         return arr_or_dtype
     elif isinstance(arr_or_dtype, IntervalDtype):
         return arr_or_dtype
+    elif isinstance(arr_or_dtype, NumpyDtypeWithMetadata):
+        return arr_or_dtype
     elif isinstance(arr_or_dtype, string_types):
         if is_categorical_dtype(arr_or_dtype):
             return CategoricalDtype.construct_from_string(arr_or_dtype)
@@ -1762,6 +1776,8 @@ def _get_dtype_type(arr_or_dtype):
         return IntervalDtypeType
     elif isinstance(arr_or_dtype, PeriodDtype):
         return PeriodDtypeType
+    elif isinstance(arr_or_dtype, NumpyDtypeWithMetadata):
+        return NumpyDtypeWithMetadataType
     elif isinstance(arr_or_dtype, string_types):
         if is_categorical_dtype(arr_or_dtype):
             return CategoricalDtypeType
@@ -1879,6 +1895,8 @@ def pandas_dtype(dtype):
 
     if isinstance(dtype, DatetimeTZDtype):
         return dtype
+    elif isinstance(dtype, NumpyDtypeWithMetadata):
+        return dtype
     elif isinstance(dtype, PeriodDtype):
         return dtype
     elif isinstance(dtype, CategoricalDtype):

diff --git a/pandas/core/dtypes/costum_dtypes.py b/pandas/core/dtypes/costum_dtypes.py
@@ -0,0 +1,187 @@
+"""
+This module contains an interface that external libraries can use to define
+their own dtypes compatible with pandas (but NOT NUMPY).
+"""
+
+import numpy as np
+
+from .dtypes import ExtensionDtype
+
+class NumpyDtypeWithMetadataType(type): # Do we need this?
+    """
+    The type of NumpyDtypeWithMetadata
+    """
+    pass
+
+class NumpyDtypeWithMetadata(ExtensionDtype):
+
+    """
+    An ExtentionDtype for data where the data
+    can be stored in a numpy dtype, but the dtype itself
+    contains meta-data and may redefine arithmetic operations.
+
+    To properly implement caching behaviour,
+    you might have to implement a __new__ method.
+    """
+    type = NumpyDtypeWithMetadataType
+    # What attributes should be stored during pickling?
+    # If this is provided, you usually do not have to
+    # override __getstate__
+    _metadata = []
+
+    def base(self):
+        """
+        In what numpy-compatible dtype the actual data is stored.
+
+        Example: np.dtype('f8')
+        """
+        raise NotImplementedError("'base' must be implemented by subclass "
+                                  "(probably as class-level variable)")
+
+
+    @classmethod
+    def construct_from_string(cls, string):
+        """ attempt to construct this type from a string, raise a TypeError if
+        it's not possible """
+        raise NotImplementedError("'construct_from_string' must be implemented by subclass.")
+
+    def operation_typecompatible(self, operation_name, other_dtype, is_left=True):
+        """
+        Is the desired operation possible between this dtype and other_dtype?
+
+        Parameters
+        ----------
+        opertation_name: The name of the desired operation, e.g. '__eq__'
+        other_dtype: The dtype of the other operand
+        is_left: If this dtype is on the left-hand side of the binary operation.
+
+        Returns
+        -------
+        Boolean or NotImplemented
+        """
+        return False
+
+    def get_operation_wrapper(self):
+        """
+        This is called by `pandas.ops._Op.get_op` to get an object
+        responsible for type-coercion (which should have the same interface as _Op)
+
+        Returns
+        -------
+        A class implementing the same interface as pandas.ops._Op or None
+        It should return None, if the default _Op class should be used.
+        """
+        return None
+
+    def to_dtype(self, data):
+        """
+        Convert arbitrary data to this dtype.
+
+        Override this, if you need any additional conversions.
+
+        Parameters
+        ----------
+        data: array-like
+
+        Returns
+        -------
+        An numpy array with the same dtype as self.base
+        """
+        return np.asarray(data, dtype = self.base)
+
+class AlwaysSame(NumpyDtypeWithMetadata):
+    """
+    This is an example how a library could implement a
+    subclass of NumpyDtypeWithMetadata, but is it (except for testing)
+    not useful for anything else.
+    """
+    _metadata = [ "_target_value", "base"]
+    def __new__(cls, target_value=None):
+        if target_value is None:
+            #We are unpickling
+            return object.__new__(cls)
+        try:
+            return cls._cache[target_value]
+        except KeyError:
+            d = object.__new__(cls)
+            d._target_value = target_value
+            # In this case, we set the base numpy dtype upon object construction.
+            d.base = np.dtype(type(target_value)) #Raises, if target_value is not a simple number
+            cls._cache[target_value] = d
+            return d
+
+    def __hash__(self):
+        return hash(self._target_value)
+
+    def __unicode__(self):
+        return "always[{}]".format(repr(self._target_value))
+
+    def __setstate__(self, state):
+        try:
+            self._target_value = state["_target_value"]
+        except KeyError:
+            print("state", state)
+            raise
+        self.base = np.dtype(type(self._target_value))
+
+    def __eq__(self, other):
+        if not isinstance(other, AlwaysSame):
+            return NotImplemented
+        return self._target_value == other._target_value
+
+    def to_dtype(self, data):
+        """
+        Fill the array with the target value.
+        """
+        # Since performance is irrelevant for this Test-dtype, we
+        # do not try to modify data in-place
+        data = np.ones(np.shape(data), dtype=self.base)
+        data = data*self._target_value
+        return data
+
+    def get_operation_wrapper(self):
+        """
+        This is called by `pandas.ops._Op.get_op` to get an object
+        responsible for type-coercion (which should have the same interface as _Op)
+
+        Returns
+        -------
+        A class implementing the same interface as pandas.ops._Op or None
+        It should return None, if the default _Op class should be used.
+        """
+        class AlwaysSameOp():
+            dtype = None
+            fill_value = self._target_value
+            def __init__(self, left, right, name, na_op):
+                self.left = left
+                self.right = right
+
+                self.name = name
+                self.na_op = na_op
+
+                # Here, a conversion of left and right to lvalues and rvalues could take place.
+                # lvalues must be a type that has the desired operator defined.
+                self.lvalues = left
+                self.rvalues = right
+                return None
+            def wrap_results(self, results):
+                print("l,r ", type(self.left), type(self.right))
+                #  Comparison operators return dtype bool.
+                if self.name in ["__eq__", "__lt__", "__gt__", "__ge__", "__le__", "__ne__"]:
+                    return results
+                #  All other operators return dtype AlwaysSame
+                if isinstance(self.left.dtype, AlwaysSame):
+                    target_dtype = self.left.dtype
+                else:
+                    assert isinstance(self.right.dtype, AlwaysSame)
+                    target_dtype = self.right.dtype
+                return target_dtype.to_dtype(results)
+        return AlwaysSameOp
+
+    def operation_typecompatible(self, name, other, is_left=True):
+        if isinstance(other, AlwaysSame):
+            if other._target_value != self._target_value:
+                if type(other) != AlwaysSame:
+                    return NotImplemented #Allow
+                return False
+        return True
diff --git a/pandas/core/ops.py b/pandas/core/ops.py
@@ -29,6 +29,7 @@
     is_datetimelike_v_numeric,
     is_integer_dtype, is_categorical_dtype,
     is_object_dtype, is_timedelta64_dtype,
+    is_numpy_dtype_with_metadata,
     is_datetime64_dtype, is_datetime64tz_dtype,
     is_bool_dtype, is_datetimetz,
     is_list_like,
@@ -330,12 +331,37 @@ def get_op(cls, left, right, name, na_op):
         is_datetime_lhs = (is_datetime64_dtype(left) or
                            is_datetime64tz_dtype(left))
 
+        if is_numpy_dtype_with_metadata(left):
+            left_compatible = left.dtype.operation_typecompatible(name, right.dtype, is_left=True)
+            if left_compatible is not NotImplemented:
+                if left_compatible:
+                    op_class = left.dtype.get_operation_wrapper()
+                    if op_class is not None:
+                        return op_class(left, right, name, na_op)
+                    else:
+                        return _Op(left, right, name, na_op)
+                else:
+                    raise TypeError("Operation {} not permitted between "
+                                    "dtype {} and type {}".format(name, left.dtype,
+                                                                  right.dtype))
+        # left is either not a NumpyDtypeWithMetadata or did not implement the Operation.
+        if is_numpy_dtype_with_metadata(right):
+            if right.dtype.operation_typecompatible(name, left.dtype, is_left=False):
+                op_class = right.dtype.get_operation_wrapper()
+                if op_class is not None:
+                    return op_class(left, right, name, na_op)
+                else:
+                    return _Op(left, right, name, na_op)
+            else:
+                raise TypeError("Operation {} not permitted between "
+                                "dtype {} and type {}".format(left.dtype,
+                                                              right.dtype))
+        # No NumpyDtypeWithMetadata involved.
         if not (is_datetime_lhs or is_timedelta_lhs):
             return _Op(left, right, name, na_op)
         else:
             return _TimeOp(left, right, name, na_op)
 
-
 class _TimeOp(_Op):
     """
     Wrapper around Series datetime/time/timedelta arithmetic operations.

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -23,6 +23,7 @@
     is_datetimelike,
     is_datetime64tz_dtype,
     is_timedelta64_dtype,
+    is_numpy_dtype_with_metadata,
     is_list_like,
     is_hashable,
     is_iterator,
@@ -150,6 +151,7 @@ class Series(base.IndexOpsMixin, strings.StringAccessorMixin,
 
     def __init__(self, data=None, index=None, dtype=None, name=None,
                  copy=False, fastpath=False):
+        print("__init__: data = {}, dtype = {}".format(repr(data), repr(dtype)))
 
         # we are called internally, so short-circuit
         if fastpath:
@@ -262,7 +264,10 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
                 data = SingleBlockManager(data, index, fastpath=True)
 
         generic.NDFrame.__init__(self, data, fastpath=True)
-
+        if is_numpy_dtype_with_metadata(dtype):
+            self._extension_dtype = dtype
+        else:
+            self._extension_dtype = None
         self.name = name
         self._set_axis(0, index, fastpath=True)
 
@@ -343,7 +348,10 @@ def name(self, value):
     @property
     def dtype(self):
         """ return the dtype object of the underlying data """
-        return self._data.dtype
+        if self._extension_dtype is None:
+            return self._data.dtype
+        else:
+            return self._extension_dtype
 
     @property
     def dtypes(self):
@@ -2990,7 +2998,6 @@ def _sanitize_array(data, index, dtype=None, copy=False,
 
     if dtype is not None:
         dtype = pandas_dtype(dtype)
-
     if isinstance(data, ma.MaskedArray):
         mask = ma.getmaskarray(data)
         if mask.any():
@@ -3005,11 +3012,13 @@ def _try_cast(arr, take_fast_path):
         if take_fast_path:
             if maybe_castable(arr) and not copy and dtype is None:
                 return arr
-
         try:
             subarr = maybe_cast_to_datetime(arr, dtype)
-            if not is_extension_type(subarr):
+            if not is_extension_type(subarr) and not is_extension_type(dtype):
                 subarr = np.array(subarr, dtype=dtype, copy=copy)
+            else:
+                if is_numpy_dtype_with_metadata(dtype):
+                    subarr = dtype.to_dtype(subarr)
         except (ValueError, TypeError):
             if is_categorical_dtype(dtype):
                 subarr = Categorical(arr)
@@ -3056,6 +3065,7 @@ def _try_cast(arr, take_fast_path):
         if dtype is not None:
             try:
                 subarr = _try_cast(data, False)
+
             except Exception:
                 if raise_cast_failure:  # pragma: no cover
                     raise
@@ -3077,16 +3087,21 @@ def create_from_value(value, index, dtype):
             subarr = DatetimeIndex([value] * len(index), dtype=dtype)
         elif is_categorical_dtype(dtype):
             subarr = Categorical([value] * len(index))
+        elif is_numpy_dtype_with_metadata(dtype):
+            subarr = np.empty(len(index), dtype=dtype.base)
+            subarr.fill(value)
+            subarr = dtype.to_dtype(subarr)
         else:
             if not isinstance(dtype, (np.dtype, type(np.dtype))):
                 dtype = dtype.dtype
             subarr = np.empty(len(index), dtype=dtype)
             subarr.fill(value)
 
         return subarr
-
     # scalar like, GH
+    print(type(subarr), dir(subarr))
     if getattr(subarr, 'ndim', 0) == 0:
+        print("subarr", type(subarr), subarr)
         if isinstance(data, list):  # pragma: no cover
             subarr = np.array(data, dtype=object)
         elif index is not None:

diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
@@ -336,6 +336,8 @@ def to_datetime(arg, errors='raise', dayfirst=False, yearfirst=False,
     2    1960-01-04
 
     """
+
+    print("to datetime. Box is {}".format(box))
     from pandas.core.indexes.datetimes import DatetimeIndex
 
     tz = 'utc' if utc else None