Skip to content

REF/API: Stricter extension checking. #22031

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
2 changes: 2 additions & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2501,6 +2501,8 @@ Dtype introspection
api.types.is_datetime64_ns_dtype
api.types.is_datetime64tz_dtype
api.types.is_extension_type
api.types.is_extension_array_dtype
api.types.is_extension_dtype
api.types.is_float_dtype
api.types.is_int64_dtype
api.types.is_integer_dtype
Expand Down
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,8 @@ ExtensionType Changes
- Bug in :meth:`Series.get` for ``Series`` using ``ExtensionArray`` and integer index (:issue:`21257`)
- :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`)
- :meth:`Series.combine()` with scalar argument now works for any function type (:issue:`21248`)
-
- Added :func:`pandas.api.types.is_extension_array_dtype` for testing whether an array is an ExtensionArray and :func:`pandas.api.types.is_extension_dtype` for testing whether a dtype is an ExtensionDtype (:issue:`22021`)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't it be more logical to call this is_extension_array instead of is_extension_array_dtype? (I suppose the reason is backwards compatibility for at least one of the two use cases?)
Because I find the 'dtype' part of the name quite confusing.



.. _whatsnew_0240.api.incompatibilities:

Expand Down
3 changes: 2 additions & 1 deletion pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
is_integer_dtype, is_complex_dtype,
is_object_dtype,
is_extension_array_dtype,
is_extension_dtype,
is_categorical_dtype, is_sparse,
is_period_dtype,
is_numeric_dtype, is_float_dtype,
Expand Down Expand Up @@ -153,7 +154,7 @@ def _reconstruct_data(values, dtype, original):
Index for extension types, otherwise ndarray casted to dtype
"""
from pandas import Index
if is_extension_array_dtype(dtype):
if is_extension_dtype(dtype):
values = dtype.construct_array_type()._from_sequence(values)
elif is_datetime64tz_dtype(dtype) or is_period_dtype(dtype):
values = Index(original)._shallow_copy(values, name=None)
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/dtypes/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

from .common import (pandas_dtype,
is_dtype_equal,
is_extension_dtype,
is_extension_array_dtype,
is_extension_type,

# categorical
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
is_complex, is_datetimetz, is_categorical_dtype,
is_datetimelike,
is_extension_type,
is_extension_array_dtype,
is_extension_dtype,
is_object_dtype,
is_datetime64tz_dtype, is_datetime64_dtype,
is_datetime64_ns_dtype,
Expand Down Expand Up @@ -294,7 +294,7 @@ def maybe_promote(dtype, fill_value=np.nan):
elif is_datetimetz(dtype):
if isna(fill_value):
fill_value = iNaT
elif is_extension_array_dtype(dtype) and isna(fill_value):
elif is_extension_dtype(dtype) and isna(fill_value):
fill_value = dtype.na_value
elif is_float(fill_value):
if issubclass(dtype.type, np.bool_):
Expand Down Expand Up @@ -332,7 +332,7 @@ def maybe_promote(dtype, fill_value=np.nan):
dtype = np.object_

# in case we have a string that looked like a number
if is_extension_array_dtype(dtype):
if is_extension_dtype(dtype):
pass
elif is_datetimetz(dtype):
pass
Expand Down Expand Up @@ -650,7 +650,7 @@ def astype_nansafe(arr, dtype, copy=True):
need to be very careful as the result shape could change! """

# dispatch on extension dtype if needed
if is_extension_array_dtype(dtype):
if is_extension_dtype(dtype):
return dtype.construct_array_type()._from_sequence(
arr, dtype=dtype, copy=copy)

Expand Down
57 changes: 44 additions & 13 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1688,38 +1688,69 @@ def is_extension_type(arr):
return False


def is_extension_array_dtype(arr_or_dtype):
"""Check if an object is a pandas extension array type.
def is_extension_array_dtype(arr):
"""Check if an array object is a pandas extension array type.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you clarify (maybe in extended summary paragraph or in parameters section) that this is "array-like" (also Series and Index, in practice anything that has a dtype property)


Parameters
----------
arr_or_dtype : object
arr : object

Returns
-------
bool

Notes
-----
This checks whether an object implements the pandas extension
This checks whether an array object implements the pandas extension
array interface. In pandas, this includes:

* Categorical
* Interval

Third-party libraries may implement arrays or types satisfying
Third-party libraries may implement arrays satisfying
this interface as well.
"""
from pandas.core.arrays import ExtensionArray

if isinstance(arr_or_dtype, (ABCIndexClass, ABCSeries)):
arr_or_dtype = arr_or_dtype._values
See Also
--------
is_extension_dtype : Similar method for dtypes.
"""
from pandas.core.dtypes.base import ExtensionDtype

try:
arr_or_dtype = pandas_dtype(arr_or_dtype)
except TypeError:
pass
dtype = getattr(arr, 'dtype')
except AttributeError:
return False

return isinstance(dtype, ExtensionDtype)

return isinstance(arr_or_dtype, (ExtensionDtype, ExtensionArray))

def is_extension_dtype(dtype):
"""Check if a dtype object is a pandas extension dtype.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One question is whether we want to accept strings here as well?
Or keep it strictly for actual dtype objects?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good point, I suspect we should accept strings that have been registered.

I'll see what I can do.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

meant for the comment here
this is what pandas_dtype does


Parameters
----------
arr : object
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

array -> dtype


Returns
-------
bool

Notes
-----
This checks whether a dtype object implements the pandas extension
array interface. In pandas, this includes:

* CategoricalDtype
* IntervalDtype

Third-party libraries may implement dtypes satisfying
this interface as well.

See Also
--------
is_extension_array_dtype : Similar method for arrays.
"""
return isinstance(dtype, ExtensionDtype)


def is_complex_dtype(arr_or_dtype):
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
is_datetime64tz_dtype,
is_timedelta64_dtype,
is_extension_array_dtype,
is_extension_dtype,
is_hashable,
is_iterator, is_list_like,
is_scalar)
Expand Down Expand Up @@ -275,7 +276,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
closed=closed)

# extension dtype
elif is_extension_array_dtype(data) or is_extension_array_dtype(dtype):
elif is_extension_array_dtype(data) or is_extension_dtype(dtype):
data = np.asarray(data)
if not (dtype is None or is_object_dtype(dtype)):

Expand Down Expand Up @@ -1191,7 +1192,7 @@ def astype(self, dtype, copy=True):
return CategoricalIndex(self.values, name=self.name, dtype=dtype,
copy=copy)

elif is_extension_array_dtype(dtype):
elif is_extension_dtype(dtype):
return Index(np.asarray(self), dtype=dtype, copy=copy)

try:
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
is_float_dtype,
is_extension_type,
is_extension_array_dtype,
is_extension_dtype,
is_datetimelike,
is_datetime64tz_dtype,
is_timedelta64_dtype,
Expand Down Expand Up @@ -4088,7 +4089,7 @@ def _try_cast(arr, take_fast_path):
# that Categorical is the only array type for 'category'.
subarr = Categorical(arr, dtype.categories,
ordered=dtype.ordered)
elif is_extension_array_dtype(dtype):
elif is_extension_dtype(dtype):
# create an extension array from its dtype
array_type = dtype.construct_array_type()
subarr = array_type(subarr, dtype=dtype, copy=copy)
Expand Down
2 changes: 2 additions & 0 deletions pandas/tests/api/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ class TestTypes(Base):
'is_dict_like', 'is_iterator', 'is_file_like',
'is_list_like', 'is_hashable', 'is_array_like',
'is_named_tuple',
'is_extension_dtype',
'is_extension_array_dtype',
'pandas_dtype', 'union_categoricals', 'infer_dtype']
deprecated = ['is_any_int_dtype', 'is_floating_dtype', 'is_sequence']
dtypes = ['CategoricalDtype', 'DatetimeTZDtype',
Expand Down
10 changes: 7 additions & 3 deletions pandas/tests/extension/base/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@

import pandas as pd
from pandas.compat import StringIO
from pandas.core.dtypes.common import is_extension_array_dtype
from pandas.core.dtypes.dtypes import ExtensionDtype
from pandas.api.types import (
is_extension_array_dtype, is_extension_dtype
)
from pandas.api.extensions import ExtensionDtype

from .base import BaseExtensionTests

Expand Down Expand Up @@ -58,10 +60,12 @@ def test_dtype_name_in_info(self, data):

def test_is_extension_array_dtype(self, data):
assert is_extension_array_dtype(data)
assert is_extension_array_dtype(data.dtype)
assert is_extension_array_dtype(pd.Series(data))
assert isinstance(data.dtype, ExtensionDtype)

def test_is_extension_dtype(self, data):
assert is_extension_dtype(data.dtype)

def test_no_values_attribute(self, data):
# GH-20735: EA's with .values attribute give problems with internal
# code, disallowing this for now until solved
Expand Down
16 changes: 12 additions & 4 deletions pandas/tests/extension/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
import pandas as pd
import pandas.util.testing as tm
from pandas.core.arrays import ExtensionArray
from pandas.core.dtypes.common import is_extension_array_dtype
from pandas.core.dtypes.common import (
is_extension_array_dtype, is_extension_dtype
)
from pandas.core.dtypes import dtypes


Expand Down Expand Up @@ -38,14 +40,20 @@ class TestExtensionArrayDtype(object):

@pytest.mark.parametrize('values', [
pd.Categorical([]),
pd.Categorical([]).dtype,
pd.Series(pd.Categorical([])),
DummyDtype(),

DummyArray(np.array([1, 2])),
])
def test_is_extension_array_dtype(self, values):
assert is_extension_array_dtype(values)

@pytest.mark.parametrize('dtype', [
pd.Categorical([]).dtype,
DummyDtype(),
])
def test_is_extension_dtype(self, dtype):
assert is_extension_dtype(dtype)

@pytest.mark.parametrize('values', [
np.array([]),
pd.Series(np.array([])),
Expand Down Expand Up @@ -91,4 +99,4 @@ def test_is_not_extension_array_dtype(dtype):
])
def test_is_extension_array_dtype(dtype):
assert isinstance(dtype, dtypes.ExtensionDtype)
assert is_extension_array_dtype(dtype)
assert is_extension_dtype(dtype)