From 6d8e9ab30bfc1e6e93f9fbd4de0b122966186e1f Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 29 Jul 2019 19:29:34 -0700 Subject: [PATCH 1/7] BUG: Fix dir(interval_index), closes #27571 --- doc/source/whatsnew/v0.25.1.rst | 2 +- pandas/core/strings.py | 7 +++++-- pandas/tests/indexes/interval/test_interval.py | 7 +++++++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index fa9ca98f9c8d8..e34f3c8b75701 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -78,7 +78,7 @@ Strings Interval ^^^^^^^^ - +- Bug in :class:`IntervalIndex` where `dir(obj)` would raise ``ValueError`` (:issue:`27571`) - - - diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 54882d039f135..b1cff09dbe669 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1953,8 +1953,11 @@ def _validate(data): values = getattr(data, "values", data) # Series / Index values = getattr(values, "categories", values) # categorical / normal - # missing values obfuscate type inference -> skip - inferred_dtype = lib.infer_dtype(values, skipna=True) + inferred_dtype = None + if isinstance(values, np.ndarray): + # exclude e.g. IntervalArray, which will cause infer_dtype to raise + # missing values obfuscate type inference -> skip + inferred_dtype = lib.infer_dtype(values, skipna=True) if inferred_dtype not in allowed_types: raise AttributeError("Can only use .str accessor with string values!") diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index c61af1ce70aed..c1a21e6a7f152 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -1095,3 +1095,10 @@ def test_is_all_dates(self): ) year_2017_index = pd.IntervalIndex([year_2017]) assert not year_2017_index.is_all_dates + + +def test_dir(): + # GH#27571 dir(interval_index) should not raise + index = IntervalIndex.from_arrays([0, 1], [1, 2]) + result = dir(index) + assert "str" not in result From 49e3247810ec9aa24947012f9b181c71ae4b6787 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 29 Jul 2019 20:33:45 -0700 Subject: [PATCH 2/7] fixup --- pandas/core/strings.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index b1cff09dbe669..93db73132a1c5 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -27,6 +27,7 @@ from pandas.core.algorithms import take_1d from pandas.core.base import NoNewAttributesMixin import pandas.core.common as com +from pandas.core.construction import extract_array _cpython_optimized_encoders = ( "utf-8", @@ -1950,14 +1951,14 @@ def _validate(data): # see _libs/lib.pyx for list of inferred types allowed_types = ["string", "empty", "bytes", "mixed", "mixed-integer"] - values = getattr(data, "values", data) # Series / Index + values = extract_array(data) # unpack Series / Index values = getattr(values, "categories", values) # categorical / normal - inferred_dtype = None - if isinstance(values, np.ndarray): - # exclude e.g. IntervalArray, which will cause infer_dtype to raise - # missing values obfuscate type inference -> skip + try: + # GH#27571 make sure this doesn't raise too early inferred_dtype = lib.infer_dtype(values, skipna=True) + except (ValueError, TypeError): + inferred_dtype = None if inferred_dtype not in allowed_types: raise AttributeError("Can only use .str accessor with string values!") From 4a74ac054b08d68aa1b31c39625c5ffbe7fc0da4 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 30 Jul 2019 09:14:05 -0700 Subject: [PATCH 3/7] infer_dtpye for interval --- pandas/_libs/lib.pyx | 1 + pandas/core/strings.py | 4 ++-- pandas/tests/indexes/interval/test_construction.py | 8 ++++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index d430cb3d3913f..abaa1c639f048 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -925,6 +925,7 @@ _TYPE_MAP = { 'M': 'datetime64', 'timedelta64[ns]': 'timedelta64', 'm': 'timedelta64', + 'interval': 'interval', } # types only exist on certain platform diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 93db73132a1c5..a79a0281b8111 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1955,9 +1955,9 @@ def _validate(data): values = getattr(values, "categories", values) # categorical / normal try: - # GH#27571 make sure this doesn't raise too early inferred_dtype = lib.infer_dtype(values, skipna=True) - except (ValueError, TypeError): + except ValueError: + # GH#27571 mostly occurs with ExtensionArray inferred_dtype = None if inferred_dtype not in allowed_types: diff --git a/pandas/tests/indexes/interval/test_construction.py b/pandas/tests/indexes/interval/test_construction.py index e2abb4531525a..cebc42d954704 100644 --- a/pandas/tests/indexes/interval/test_construction.py +++ b/pandas/tests/indexes/interval/test_construction.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._libs import lib + from pandas.core.dtypes.common import is_categorical_dtype from pandas.core.dtypes.dtypes import IntervalDtype @@ -450,3 +452,9 @@ def test_index_object_dtype(self): @pytest.mark.skip(reason="parent class test that is not applicable") def test_index_mixed_closed(self): pass + + +def test_infer_dtype_interval(): + idx = IntervalIndex.from_breaks(range(5), closed="both") + inferred = lib.infer_dtype(idx, skipna=False) + assert inferred == 'interval' From 126ccb8199dab4d0321e9080346571e3aa9f9d12 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 30 Jul 2019 09:27:41 -0700 Subject: [PATCH 4/7] fix for PandasArray --- pandas/core/strings.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index a79a0281b8111..f52f98db9fc84 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -27,7 +27,6 @@ from pandas.core.algorithms import take_1d from pandas.core.base import NoNewAttributesMixin import pandas.core.common as com -from pandas.core.construction import extract_array _cpython_optimized_encoders = ( "utf-8", @@ -1951,7 +1950,7 @@ def _validate(data): # see _libs/lib.pyx for list of inferred types allowed_types = ["string", "empty", "bytes", "mixed", "mixed-integer"] - values = extract_array(data) # unpack Series / Index + values = getattr(data, "values", data) # Series / Index values = getattr(values, "categories", values) # categorical / normal try: From 2ca72b68f732f9c127a522b52e1b165655efe8d9 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 30 Jul 2019 09:30:20 -0700 Subject: [PATCH 5/7] move test --- pandas/tests/dtypes/test_inference.py | 11 +++++++++++ pandas/tests/indexes/interval/test_construction.py | 8 -------- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index ff48ae9b3c2e5..49c056a05fd8e 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1153,6 +1153,17 @@ def test_categorical(self): result = lib.infer_dtype(Series(arr), skipna=True) assert result == "categorical" + def test_interval(self): + idx = pd.IntervalIndex.from_breaks(range(5), closed="both") + inferred = lib.infer_dtype(idx, skipna=False) + assert inferred == 'interval' + + inferred = lib.infer_dtype(idx._data, skipna=False) + assert inferred == 'interval' + + inferred = lib.infer_dtype(pd.Series(idx), skipna=False) + assert inferred == 'interval' + class TestNumberScalar: def test_is_number(self): diff --git a/pandas/tests/indexes/interval/test_construction.py b/pandas/tests/indexes/interval/test_construction.py index cebc42d954704..e2abb4531525a 100644 --- a/pandas/tests/indexes/interval/test_construction.py +++ b/pandas/tests/indexes/interval/test_construction.py @@ -3,8 +3,6 @@ import numpy as np import pytest -from pandas._libs import lib - from pandas.core.dtypes.common import is_categorical_dtype from pandas.core.dtypes.dtypes import IntervalDtype @@ -452,9 +450,3 @@ def test_index_object_dtype(self): @pytest.mark.skip(reason="parent class test that is not applicable") def test_index_mixed_closed(self): pass - - -def test_infer_dtype_interval(): - idx = IntervalIndex.from_breaks(range(5), closed="both") - inferred = lib.infer_dtype(idx, skipna=False) - assert inferred == 'interval' From e438224fdbe586e33454344f8e5b194071fe4a01 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 30 Jul 2019 09:59:05 -0700 Subject: [PATCH 6/7] blackify --- pandas/tests/dtypes/test_inference.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 49c056a05fd8e..2933dfca736be 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1156,13 +1156,13 @@ def test_categorical(self): def test_interval(self): idx = pd.IntervalIndex.from_breaks(range(5), closed="both") inferred = lib.infer_dtype(idx, skipna=False) - assert inferred == 'interval' + assert inferred == "interval" inferred = lib.infer_dtype(idx._data, skipna=False) - assert inferred == 'interval' + assert inferred == "interval" inferred = lib.infer_dtype(pd.Series(idx), skipna=False) - assert inferred == 'interval' + assert inferred == "interval" class TestNumberScalar: From 01b1687b33f069dbfd1abe4881bef562dc82f3fc Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 30 Jul 2019 15:13:02 -0700 Subject: [PATCH 7/7] dummy commit to force CI