pandas-dev · mroeschke · Nov 3, 2022 · Oct 25, 2022 · Oct 28, 2022 · Oct 28, 2022
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -142,6 +142,7 @@ Other API changes
 - The ``other`` argument in :meth:`DataFrame.mask` and :meth:`Series.mask` now defaults to ``no_default`` instead of ``np.nan`` consistent with :meth:`DataFrame.where` and :meth:`Series.where`. Entries will be filled with the corresponding NULL value (``np.nan`` for numpy dtypes, ``pd.NA`` for extension dtypes). (:issue:`49111`)
 - When creating a :class:`Series` with a object-dtype :class:`Index` of datetime objects, pandas no longer silently converts the index to a :class:`DatetimeIndex` (:issue:`39307`, :issue:`23598`)
 - :meth:`Series.unique` with dtype "timedelta64[ns]" or "datetime64[ns]" now returns :class:`TimedeltaArray` or :class:`DatetimeArray` instead of ``numpy.ndarray`` (:issue:`49176`)
+- :func:`pandas.api.dtypes.is_string_dtype` now only returns ``True`` for array-likes with ``dtype=object`` when the elements are inferred to be strings (:issue:`15585`)
 - Passing strings that cannot be parsed as datetimes to :class:`Series` or :class:`DataFrame` with ``dtype="datetime64[ns]"`` will raise instead of silently ignoring the keyword and returning ``object`` dtype (:issue:`24435`)
 -
 
@@ -366,7 +367,7 @@ Conversion
 
 Strings
 ^^^^^^^
--
+- Bug in :func:`pandas.api.dtypes.is_string_dtype` that would not return ``True`` for :class:`StringDtype` (:issue:`15585`)
 -
 
 Interval

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
@@ -500,6 +500,9 @@ def is_string_dtype(arr_or_dtype) -> bool:
     """
     Check whether the provided array or dtype is of the string dtype.
 
+    If an array is passed with an object dtype, the elements must be
+    inferred as strings.
+
     Parameters
     ----------
     arr_or_dtype : array-like or dtype
@@ -518,21 +521,23 @@ def is_string_dtype(arr_or_dtype) -> bool:
     True
     >>> is_string_dtype(int)
     False
-    >>>
     >>> is_string_dtype(np.array(['a', 'b']))
     True
     >>> is_string_dtype(pd.Series([1, 2]))
     False
+    >>> is_string_dtype(pd.Series([1, 2], dtype=object))
+    False
     """
-    # TODO: gh-15585: consider making the checks stricter.
-    def condition(dtype) -> bool:
-        return dtype.kind in ("O", "S", "U") and not is_excluded_dtype(dtype)
+    if hasattr(arr_or_dtype, "dtype") and get_dtype(arr_or_dtype).kind == "O":
+        return is_all_strings(arr_or_dtype)
 
-    def is_excluded_dtype(dtype) -> bool:
-        """
-        These have kind = "O" but aren't string dtypes so need to be explicitly excluded
-        """
-        return isinstance(dtype, (PeriodDtype, IntervalDtype, CategoricalDtype))
+    def condition(dtype) -> bool:
+        if is_string_or_object_np_dtype(dtype):
+            return True
+        try:
+            return dtype == "string"
+        except TypeError:
+            return False
 
     return _is_dtype(arr_or_dtype, condition)
 

diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py
@@ -290,6 +290,15 @@ def test_is_string_dtype():
     assert com.is_string_dtype(pd.StringDtype())
 
 
+@pytest.mark.parametrize(
+    "data",
+    [[(0, 1), (1, 1)], pd.Categorical([1, 2, 3]), np.array([1, 2], dtype=object)],
+)
+def test_is_string_dtype_arraylike_with_object_elements_not_strings(data):
+    # GH 15585
+    assert not com.is_string_dtype(pd.Series(data))
+
+
 def test_is_string_dtype_nullable(nullable_string_dtype):
     assert com.is_string_dtype(pd.array(["a", "b"], dtype=nullable_string_dtype))
 

diff --git a/pandas/tests/extension/base/dtype.py b/pandas/tests/extension/base/dtype.py
@@ -45,10 +45,10 @@ def test_is_dtype_other_input(self, dtype):
         assert dtype.is_dtype([1, 2, 3]) is False
 
     def test_is_not_string_type(self, dtype):
-        return not is_string_dtype(dtype)
+        assert not is_string_dtype(dtype)
 
     def test_is_not_object_type(self, dtype):
-        return not is_object_dtype(dtype)
+        assert not is_object_dtype(dtype)
 
     def test_eq_with_str(self, dtype):
         assert dtype == dtype.name

diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py
@@ -218,6 +218,13 @@ def test_check_dtype(self, data, request):
             )
         super().test_check_dtype(data)
 
+    def test_is_not_object_type(self, dtype, request):
+        if dtype.numpy_dtype == "object":
+            request.node.add_marker(
+                pytest.mark.xfail(reason="PandasDtype(object) should be object")
+            )
+        super().test_is_not_object_type(dtype)
+
 
 class TestGetitem(BaseNumPyTests, base.BaseGetitemTests):
     @skip_nested

diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py
@@ -106,6 +106,10 @@ def test_eq_with_str(self, dtype):
         assert dtype == f"string[{dtype.storage}]"
         super().test_eq_with_str(dtype)
 
+    @pytest.mark.xfail(reason="StringDtype is a string dtype")
+    def test_is_not_string_type(self, dtype):
+        super().test_is_not_string_type(dtype)
+
 
 class TestInterface(base.BaseInterfaceTests):
     def test_view(self, data, request):