pandas-dev · erikmannerfelt · May 13, 2021 · May 13, 2021 · May 22, 2021 · May 22, 2021
diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py
@@ -5,6 +5,7 @@
     le,
     lt,
 )
+import re
 import textwrap
 from typing import (
     Sequence,
@@ -574,6 +575,129 @@ def from_tuples(
 
         return cls.from_arrays(left, right, closed, copy=False, dtype=dtype)
 
+    _interval_shared_docs["from_strings"] = textwrap.dedent(
+        """
+        Construct from string representations of the left and right bounds.
+
+        Parameters
+        ----------
+        data : array-like (1-dimensional)
+            Strings representing the Interval's to parse.
+        closed : {'left', 'right', 'both', 'neither'}, default 'right'
+            Whether the intervals are closed on the left-side, right-side, both
+            or neither.
+        dtype : dtype, optional
+            If None, dtype will be inferred.
+
+        Returns
+        -------
+        %(klass)s
+
+        Raises
+        ------
+        ValueError
+            When a string cannot be parsed as an Interval
+            When the dtype of the string cannot be parsed as either float,
+            Timestamp or Timedelta
+
+        See Also
+        --------
+        interval_range : Function to create a fixed frequency IntervalIndex.
+        %(klass)s.from_breaks : Construct an %(klass)s from an array of
+            splits.
+        %(klass)s.from_tuples : Construct an %(klass)s from an
+            array-like of tuples.
+
+        %(examples)s\
+        """
+    )
+
+    @classmethod
+    @Appender(
+        _interval_shared_docs["from_strings"]
+        % {
+            "klass": "IntervalIndex",
+            "examples": textwrap.dedent(
+                """\
+        Examples
+        --------
+        >>> pd.IntervalIndex.from_strings(["(0, 1]", "(1, 2]"])
+        IntervalIndex([(0, 1], (1, 2]],
+                       dtype='interval[int64, right]')
+        """
+            ),
+        }
+    )
+    def from_strings(
+        cls: type[IntervalArrayT],
+        data: Sequence[str],
+        closed: str = "right",
+        dtype: Dtype | None = None,
+    ) -> IntervalArrayT:
+        from pandas import (
+            to_datetime,
+            to_numeric,
+            to_timedelta,
+        )
+
+        # The different closing brackets define which pattern to look for.
+        brackets = {
+            "right": ("(", "]"),
+            "left": ("[", ")"),
+            "both": ("[", "]"),
+            "neither": ("(", ")"),
+        }
+        pattern = re.compile(
+            "\\" + brackets[closed][0] + ".*,.*\\" + brackets[closed][1]
+        )
+
+        left, right = [], []
+        for string in data:
+
+            # Try to match "(left, right]" where 'left' and 'right' are breaks.
+            breaks_match = pattern.match(string)
+
+            if breaks_match is None:
+                raise ValueError(
+                    f"Could not find opening '{brackets[closed][0]}' "
+                    f"and closing '{brackets[closed][1]}' "
+                    f"brackets in string: '{string}'"
+                )
+            # Try to split 'left' and 'right' based on a comma and a space.
+            breaks = breaks_match.string[1:-1].split(", ", 1)
+
+            if len(breaks) != 2:
+                raise ValueError(
+                    f"Delimiter ', ' (comma + space) not found in string: {string}"
+                )
+
+            # Try different types of string parsers in succession
+            # First try to parse the breaks as numbers (int, float etc.)
+            try:
+                newleft, newright = to_numeric(breaks, errors="raise")
+            except ValueError:
+                # If that failed, try parsing as datetime
+                try:
+                    newleft, newright = to_datetime(breaks, errors="raise")
+                except ValueError:
+                    # If that also failed, try as timedelta
+                    try:
+                        newleft, newright = to_timedelta(breaks, errors="raise")
+                    except ValueError:
+                        # Finally, if all fails, raise an exception
+                        raise ValueError(
+                            "Could not parse string as numeric, Timedelta "
+                            f"or Timestamp Interval: {string}"
+                        )
+            left.append(newleft)
+            right.append(newright)
+
+        # If dtype was not an IntervalDtype, try to parse it as such.
+        if dtype is not None and not isinstance(dtype, IntervalDtype):
+            dtype = IntervalDtype(subtype=dtype, closed=closed)
+
+        return cls.from_arrays(left, right, closed=closed, copy=False, dtype=dtype)
+
     def _validate(self):
         """
         Verify that the IntervalArray is valid.

diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
@@ -9,6 +9,7 @@
 from typing import (
     Any,
     Hashable,
+    Sequence,
 )
 
 import numpy as np
@@ -317,6 +318,34 @@ def from_tuples(
             arr = IntervalArray.from_tuples(data, closed=closed, copy=copy, dtype=dtype)
         return cls._simple_new(arr, name=name)
 
+    @classmethod
+    @Appender(
+        _interval_shared_docs["from_strings"]
+        % {
+            "klass": "IntervalIndex",
+            "examples": textwrap.dedent(
+                """\
+        Examples
+        --------
+        >>> pd.IntervalIndex.from_strings(["(0, 1]", "(1, 2]"])
+        IntervalIndex([(0, 1], (1, 2]],
+                       dtype='interval[int64, right]')
+        """
+            ),
+        }
+    )
+    def from_strings(
+        cls,
+        data: Sequence[str],
+        closed: str = "right",
+        dtype: Dtype | None = None,
+        name: Hashable = None,
+    ) -> IntervalIndex:
+        with rewrite_exception("IntervalArray", cls.__name__):
+            arr = IntervalArray.from_strings(data=data, dtype=dtype, closed=closed)
+
+        return cls._simple_new(arr, name=name)
+
     # --------------------------------------------------------------------
 
     @cache_readonly

diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py
@@ -927,6 +927,71 @@ def test_pickle_round_trip_closed(self, closed):
         tm.assert_index_equal(result, idx)
 
 
+@pytest.mark.parametrize("closed", ["left", "right", "both", "neither"])
+@pytest.mark.parametrize(
+    "test_case",
+    [
+        (
+            "float64",
+            [0.0, 0.5, 1.0],
+            ["0.0, 0.5", "0.5, 1.0"],
+        ),
+        ("int64", [0, 5, 10], ["0, 5", "5, 10"]),
+        (
+            "datetime64[ns]",
+            [Timestamp(2015, 7, 1), Timestamp(2016, 8, 1), Timestamp(2018, 9, 1)],
+            ["2015-07-01, 2016-08-01", "2016-08-01, 2018-09-01"],
+        ),
+    ],
+)
+def test_from_strings(closed, test_case):
+    """Test the IntervalIndex.from_strings class method."""
+    # See https://github.com/pandas-dev/pandas/pull/41451
+    dtype, expected, strings = test_case
+
+    brackets = {
+        "right": ("(", "]"),
+        "left": ("[", ")"),
+        "both": ("[", "]"),
+        "neither": ("(", ")"),
+    }
+    # Assign the brackets associated to the closed type to be tested
+    interval_strings = [brackets[closed][0] + s + brackets[closed][1] for s in strings]
+
+    # Attempt to infer the type dynamically
+    tm.assert_index_equal(
+        IntervalIndex.from_strings(interval_strings, closed=closed),
+        IntervalIndex.from_breaks(expected, closed=closed),
+        exact=True,
+    )
+
+    # Parse it with a fixed dtype and assert that the result is correct.
+    tm.assert_index_equal(
+        IntervalIndex.from_strings(
+            interval_strings, dtype=np.dtype(dtype), closed=closed
+        ),
+        IntervalIndex.from_breaks(expected, closed=closed),
+        exact=True,
+    )
+
+
+@pytest.mark.parametrize(
+    "wrong_indices",
+    [
+        ("('hello', 'there']", r"Could not parse string as numeric"),
+        ("(0.1,0.1)", r"Could not find opening '\(' and closing ']'"),
+        ("(0.0,0.5]", r"Delimiter ', ' .* not found"),
+    ],
+)
+def test_from_strings_errors(wrong_indices):
+    """Validate the error messages from the IntervalIndex.from_strings method."""
+    # See https://github.com/pandas-dev/pandas/pull/41451
+    string, error = wrong_indices
+
+    with pytest.raises(ValueError, match=error):
+        IntervalIndex.from_strings([string])
+
+
 def test_dir():
     # GH#27571 dir(interval_index) should not raise
     index = IntervalIndex.from_arrays([0, 1], [1, 2])