pandas-dev · jbrockmendel · Nov 18, 2020 · Nov 21, 2020 · Nov 21, 2020 · Nov 21, 2020
diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -694,8 +694,8 @@ def float_frame():
 # ----------------------------------------------------------------
 @pytest.fixture(
     params=[
-        (Interval(left=0, right=5), IntervalDtype("int64")),
-        (Interval(left=0.1, right=0.5), IntervalDtype("float64")),
+        (Interval(left=0, right=5), IntervalDtype("int64", "right")),
+        (Interval(left=0.1, right=0.5), IntervalDtype("float64", "right")),
         (Period("2012-01", freq="M"), "period[M]"),
         (Period("2012-02-01", freq="D"), "period[D]"),
         (

diff --git a/pandas/core/arrays/_arrow_utils.py b/pandas/core/arrays/_arrow_utils.py
@@ -127,7 +127,7 @@ def __hash__(self):
         def to_pandas_dtype(self):
             import pandas as pd
 
-            return pd.IntervalDtype(self.subtype.to_pandas_dtype())
+            return pd.IntervalDtype(self.subtype.to_pandas_dtype(), self.closed)
 
     # register the type with a dummy instance
     _interval_type = ArrowIntervalType(pyarrow.int64(), "left")

diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -147,7 +147,7 @@
     >>> pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)])
     <IntervalArray>
     [(0, 1], (1, 5]]
-    Length: 2, closed: right, dtype: interval[int64]
+    Length: 2, closed: right, dtype: interval[int64, right]
 
     It may also be constructed using one of the constructor
     methods: :meth:`IntervalArray.from_arrays`,
@@ -222,10 +222,12 @@ def __new__(
     def _simple_new(cls, data, closed="right"):
         result = IntervalMixin.__new__(cls)
 
+        dtype = IntervalDtype(data.dtype, closed=closed)
+        result._dtype = dtype
+
         result._combined = data
         result._left = data[:, 0]
         result._right = data[:, 1]
-        result._closed = closed
         return result
 
     @classmethod
@@ -283,7 +285,7 @@ def _from_factorized(cls, values, original):
         >>> pd.arrays.IntervalArray.from_breaks([0, 1, 2, 3])
         <IntervalArray>
         [(0, 1], (1, 2], (2, 3]]
-        Length: 3, closed: right, dtype: interval[int64]
+        Length: 3, closed: right, dtype: interval[int64, right]
         """
             ),
         )
@@ -352,7 +354,7 @@ def from_breaks(cls, breaks, closed="right", copy=False, dtype=None):
         >>> pd.arrays.IntervalArray.from_arrays([0, 1, 2], [1, 2, 3])
         <IntervalArray>
         [(0, 1], (1, 2], (2, 3]]
-        Length: 3, closed: right, dtype: interval[int64]
+        Length: 3, closed: right, dtype: interval[int64, right]
         """
             ),
         )
@@ -415,7 +417,7 @@ def from_arrays(cls, left, right, closed="right", copy=False, dtype=None):
         >>> pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)])
         <IntervalArray>
         [(0, 1], (1, 2]]
-        Length: 2, closed: right, dtype: interval[int64]
+        Length: 2, closed: right, dtype: interval[int64, right]
         """
             ),
         )
@@ -480,7 +482,7 @@ def _validate(self):
 
     @property
     def dtype(self):
-        return IntervalDtype(self.left.dtype)
+        return self._dtype
 
     @property
     def nbytes(self) -> int:
@@ -1106,7 +1108,7 @@ def mid(self):
         >>> intervals
         <IntervalArray>
         [(0, 1], (1, 3], (2, 4]]
-        Length: 3, closed: right, dtype: interval[int64]
+        Length: 3, closed: right, dtype: interval[int64, right]
         """
             ),
         )
@@ -1135,7 +1137,7 @@ def closed(self):
         Whether the intervals are closed on the left-side, right-side, both or
         neither.
         """
-        return self._closed
+        return self.dtype.closed
 
     _interval_shared_docs["set_closed"] = textwrap.dedent(
         """
@@ -1170,11 +1172,11 @@ def closed(self):
         >>> index
         <IntervalArray>
         [(0, 1], (1, 2], (2, 3]]
-        Length: 3, closed: right, dtype: interval[int64]
+        Length: 3, closed: right, dtype: interval[int64, right]
         >>> index.set_closed('both')
         <IntervalArray>
         [[0, 1], [1, 2], [2, 3]]
-        Length: 3, closed: both, dtype: interval[int64]
+        Length: 3, closed: both, dtype: interval[int64, both]
         """
             ),
         )
@@ -1230,7 +1232,7 @@ def __array__(self, dtype=None) -> np.ndarray:
         left = self._left
         right = self._right
         mask = self.isna()
-        closed = self._closed
+        closed = self.closed
 
         result = np.empty(len(left), dtype=object)
         for i in range(len(left)):
@@ -1369,7 +1371,7 @@ def repeat(self, repeats, axis=None):
         >>> intervals
         <IntervalArray>
         [(0, 1], (1, 3], (2, 4]]
-        Length: 3, closed: right, dtype: interval[int64]
+        Length: 3, closed: right, dtype: interval[int64, right]
         """
             ),
         )

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -815,7 +815,7 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj,
             dtype = PeriodDtype(freq=val.freq)
         elif lib.is_interval(val):
             subtype = infer_dtype_from_scalar(val.left, pandas_dtype=True)[0]
-            dtype = IntervalDtype(subtype=subtype)
+            dtype = IntervalDtype(subtype=subtype, closed=val.closed)
 
     return dtype, val
 

diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
@@ -998,8 +998,8 @@ class IntervalDtype(PandasExtensionDtype):
 
     Examples
     --------
-    >>> pd.IntervalDtype(subtype='int64')
-    interval[int64]
+    >>> pd.IntervalDtype(subtype='int64', closed='both')
+    interval[int64, both]
     """
 
     name = "interval"
@@ -1008,27 +1008,37 @@ class IntervalDtype(PandasExtensionDtype):
     base = np.dtype("O")
     num = 103
     _metadata = ("subtype",)
-    _match = re.compile(r"(I|i)nterval\[(?P<subtype>.+)\]")
+    _match = re.compile(
+        r"(I|i)nterval\[(?P<subtype>[^,]+)(, (?P<closed>(right|left|both|neither)))?\]"
+    )
     _cache: Dict[str_type, PandasExtensionDtype] = {}
 
-    def __new__(cls, subtype=None):
+    def __new__(cls, subtype=None, closed: Optional[str_type] = None):
         from pandas.core.dtypes.common import is_string_dtype, pandas_dtype
 
         if isinstance(subtype, IntervalDtype):
+            if closed is not None and closed != subtype.closed:
+                raise ValueError(
+                    "dtype.closed and 'closed' do not match. "
+                    "Try IntervalDtype(dtype.subtype, closed) instead."
+                )
             return subtype
         elif subtype is None:
             # we are called as an empty constructor
             # generally for pickle compat
             u = object.__new__(cls)
             u._subtype = None
+            u._closed = closed
             return u
         elif isinstance(subtype, str) and subtype.lower() == "interval":
             subtype = None
         else:
             if isinstance(subtype, str):
                 m = cls._match.search(subtype)
                 if m is not None:
-                    subtype = m.group("subtype")
+                    gd = m.groupdict()
+                    subtype = gd["subtype"]
+                    closed = gd.get("closed", closed)
 
             try:
                 subtype = pandas_dtype(subtype)
@@ -1043,14 +1053,20 @@ def __new__(cls, subtype=None):
             )
             raise TypeError(msg)
 
+        key = str(subtype) + str(closed)
         try:
-            return cls._cache[str(subtype)]
+            return cls._cache[key]
         except KeyError:
             u = object.__new__(cls)
             u._subtype = subtype
-            cls._cache[str(subtype)] = u
+            u._closed = closed
+            cls._cache[key] = u
             return u
 
+    @property
+    def closed(self):
+        return self._closed
+
     @property
     def subtype(self):
         """
@@ -1100,7 +1116,7 @@ def type(self):
     def __str__(self) -> str_type:
         if self.subtype is None:
             return "interval"
-        return f"interval[{self.subtype}]"
+        return f"interval[{self.subtype}, {self.closed}]"
 
     def __hash__(self) -> int:
         # make myself hashable
@@ -1114,6 +1130,8 @@ def __eq__(self, other: Any) -> bool:
         elif self.subtype is None or other.subtype is None:
             # None should match any subtype
             return True
+        elif self.closed != other.closed:
+            return False
         else:
             from pandas.core.dtypes.common import is_dtype_equal
 
@@ -1124,6 +1142,8 @@ def __setstate__(self, state):
         # PandasExtensionDtype superclass and uses the public properties to
         # pickle -> need to set the settable private ones here (see GH26067)
         self._subtype = state["subtype"]
+        # backward-compat older pickles won't have "closed" key
+        self._closed = state.pop("closed", None)
 
     @classmethod
     def is_dtype(cls, dtype: object) -> bool:

diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
@@ -163,7 +163,7 @@ def wrapped(self, other, sort=False):
     >>> pd.interval_range(start=0, end=5)
     IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
                   closed='right',
-                  dtype='interval[int64]')
+                  dtype='interval[int64, right]')
 
     It may also be constructed using one of the constructor
     methods: :meth:`IntervalIndex.from_arrays`,
@@ -248,7 +248,7 @@ def _simple_new(cls, array: IntervalArray, name: Label = None):
         >>> pd.IntervalIndex.from_breaks([0, 1, 2, 3])
         IntervalIndex([(0, 1], (1, 2], (2, 3]],
                       closed='right',
-                      dtype='interval[int64]')
+                      dtype='interval[int64, right]')
         """
             ),
         )
@@ -274,7 +274,7 @@ def from_breaks(
         >>> pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3])
         IntervalIndex([(0, 1], (1, 2], (2, 3]],
                       closed='right',
-                      dtype='interval[int64]')
+                      dtype='interval[int64, right]')
         """
             ),
         )
@@ -306,7 +306,7 @@ def from_arrays(
         >>> pd.IntervalIndex.from_tuples([(0, 1), (1, 2)])
         IntervalIndex([(0, 1], (1, 2]],
                        closed='right',
-                       dtype='interval[int64]')
+                       dtype='interval[int64, right]')
         """
             ),
         )
@@ -448,7 +448,7 @@ def is_overlapping(self) -> bool:
         >>> index
         IntervalIndex([(0, 2], (1, 3], (4, 5]],
               closed='right',
-              dtype='interval[int64]')
+              dtype='interval[int64, right]')
         >>> index.is_overlapping
         True
 
@@ -458,7 +458,7 @@ def is_overlapping(self) -> bool:
         >>> index
         IntervalIndex([[0, 1], [1, 2], [2, 3]],
               closed='both',
-              dtype='interval[int64]')
+              dtype='interval[int64, both]')
         >>> index.is_overlapping
         True
 
@@ -468,7 +468,7 @@ def is_overlapping(self) -> bool:
         >>> index
         IntervalIndex([[0, 1), [1, 2), [2, 3)],
               closed='left',
-              dtype='interval[int64]')
+              dtype='interval[int64, left]')
         >>> index.is_overlapping
         False
         """
@@ -1134,23 +1134,23 @@ def interval_range(
 
     >>> pd.interval_range(start=0, end=5)
     IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
-                  closed='right', dtype='interval[int64]')
+                  closed='right', dtype='interval[int64, right]')
 
     Additionally, datetime-like input is also supported.
 
     >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
     ...                   end=pd.Timestamp('2017-01-04'))
     IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03],
                    (2017-01-03, 2017-01-04]],
-                  closed='right', dtype='interval[datetime64[ns]]')
+                  closed='right', dtype='interval[datetime64[ns], right]')
 
     The ``freq`` parameter specifies the frequency between the left and right.
     endpoints of the individual intervals within the ``IntervalIndex``.  For
     numeric ``start`` and ``end``, the frequency must also be numeric.
 
     >>> pd.interval_range(start=0, periods=4, freq=1.5)
     IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
-                  closed='right', dtype='interval[float64]')
+                  closed='right', dtype='interval[float64, right]')
 
     Similarly, for datetime-like ``start`` and ``end``, the frequency must be
     convertible to a DateOffset.
@@ -1159,22 +1159,22 @@ def interval_range(
     ...                   periods=3, freq='MS')
     IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01],
                    (2017-03-01, 2017-04-01]],
-                  closed='right', dtype='interval[datetime64[ns]]')
+                  closed='right', dtype='interval[datetime64[ns], right]')
 
     Specify ``start``, ``end``, and ``periods``; the frequency is generated
     automatically (linearly spaced).
 
     >>> pd.interval_range(start=0, end=6, periods=4)
     IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
               closed='right',
-              dtype='interval[float64]')
+              dtype='interval[float64, right]')
 
     The ``closed`` parameter specifies which endpoints of the individual
     intervals within the ``IntervalIndex`` are closed.
 
     >>> pd.interval_range(end=5, periods=4, closed='both')
     IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]],
-                  closed='both', dtype='interval[int64]')
+                  closed='both', dtype='interval[int64, both]')
     """
     start = maybe_box_datetimelike(start)
     end = maybe_box_datetimelike(end)

diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py
@@ -135,12 +135,12 @@ def cut(
     >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3)
     ... # doctest: +ELLIPSIS
     [(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ...
-    Categories (3, interval[float64]): [(0.994, 3.0] < (3.0, 5.0] ...
+    Categories (3, interval[float64, right]): [(0.994, 3.0] < (3.0, 5.0] ...
 
     >>> pd.cut(np.array([1, 7, 5, 4, 6, 3]), 3, retbins=True)
     ... # doctest: +ELLIPSIS
     ([(0.994, 3.0], (5.0, 7.0], (3.0, 5.0], (3.0, 5.0], (5.0, 7.0], ...
-    Categories (3, interval[float64]): [(0.994, 3.0] < (3.0, 5.0] ...
+    Categories (3, interval[float64, right]): [(0.994, 3.0] < (3.0, 5.0] ...
     array([0.994, 3.   , 5.   , 7.   ]))
 
     Discovers the same bins, but assign them specific labels. Notice that
@@ -176,7 +176,7 @@ def cut(
     d     (7.333, 10.0]
     e     (7.333, 10.0]
     dtype: category
-    Categories (3, interval[float64]): [(1.992, 4.667] < (4.667, ...
+    Categories (3, interval[float64, right]): [(1.992, 4.667] < (4.667, ...
 
     Passing a Series as an input returns a Series with mapping value.
     It is used to map numerically to intervals based on bins.
@@ -214,7 +214,7 @@ def cut(
     >>> bins = pd.IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)])
     >>> pd.cut([0, 0.5, 1.5, 2.5, 4.5], bins)
     [NaN, (0.0, 1.0], NaN, (2.0, 3.0], (4.0, 5.0]]
-    Categories (3, interval[int64]): [(0, 1] < (2, 3] < (4, 5]]
+    Categories (3, interval[int64, right]): [(0, 1] < (2, 3] < (4, 5]]
     """
     # NOTE: this binning code is changed a bit from histogram for var(x) == 0
 
@@ -336,7 +336,7 @@ def qcut(
     >>> pd.qcut(range(5), 4)
     ... # doctest: +ELLIPSIS
     [(-0.001, 1.0], (-0.001, 1.0], (1.0, 2.0], (2.0, 3.0], (3.0, 4.0]]
-    Categories (4, interval[float64]): [(-0.001, 1.0] < (1.0, 2.0] ...
+    Categories (4, interval[float64, right]): [(-0.001, 1.0] < (1.0, 2.0] ...
 
     >>> pd.qcut(range(5), 3, labels=["good", "medium", "bad"])
     ... # doctest: +SKIP