Skip to content

REF/API: Stricter extension checking. #22031

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
2 changes: 2 additions & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2501,6 +2501,8 @@ Dtype introspection
api.types.is_datetime64_ns_dtype
api.types.is_datetime64tz_dtype
api.types.is_extension_type
api.types.is_extension_array
api.types.is_extension_dtype
api.types.is_float_dtype
api.types.is_int64_dtype
api.types.is_integer_dtype
Expand Down
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,8 @@ ExtensionType Changes
- Bug in :meth:`Series.get` for ``Series`` using ``ExtensionArray`` and integer index (:issue:`21257`)
- :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`)
- :meth:`Series.combine()` with scalar argument now works for any function type (:issue:`21248`)
-
- Added :func:`pandas.api.types.is_extension_array` for testing whether an array is an ExtensionArray and :func:`pandas.api.types.is_extension_dtype` for testing whether a dtype is an ExtensionDtype (:issue:`22021`)


.. _whatsnew_0240.api.incompatibilities:

Expand Down
13 changes: 7 additions & 6 deletions pandas/core/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
is_unsigned_integer_dtype, is_signed_integer_dtype,
is_integer_dtype, is_complex_dtype,
is_object_dtype,
is_extension_array_dtype,
is_extension_array,
is_extension_dtype,
is_categorical_dtype, is_sparse,
is_period_dtype,
is_numeric_dtype, is_float_dtype,
Expand Down Expand Up @@ -153,7 +154,7 @@ def _reconstruct_data(values, dtype, original):
Index for extension types, otherwise ndarray casted to dtype
"""
from pandas import Index
if is_extension_array_dtype(dtype):
if is_extension_dtype(dtype):
values = dtype.construct_array_type()._from_sequence(values)
elif is_datetime64tz_dtype(dtype) or is_period_dtype(dtype):
values = Index(original)._shallow_copy(values, name=None)
Expand Down Expand Up @@ -357,7 +358,7 @@ def unique(values):

values = _ensure_arraylike(values)

if is_extension_array_dtype(values):
if is_extension_array(values):
# Dispatch to extension dtype's unique.
return values.unique()

Expand Down Expand Up @@ -610,7 +611,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
values = _ensure_arraylike(values)
original = values

if is_extension_array_dtype(values):
if is_extension_array(values):
values = getattr(values, '_values', values)
labels, uniques = values.factorize(na_sentinel=na_sentinel)
dtype = original.dtype
Expand Down Expand Up @@ -705,7 +706,7 @@ def value_counts(values, sort=True, ascending=False, normalize=False,

else:

if is_extension_array_dtype(values) or is_sparse(values):
if is_extension_array(values) or is_sparse(values):

# handle Categorical and sparse,
result = Series(values)._values.value_counts(dropna=dropna)
Expand Down Expand Up @@ -1591,7 +1592,7 @@ def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None,

# TODO(EA): Remove these if / elifs as datetimeTZ, interval, become EAs
# dispatch to internal type takes
if is_extension_array_dtype(arr):
if is_extension_array(arr):
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
elif is_datetimetz(arr):
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
ensure_int64,
ensure_object,
ensure_platform_int,
is_extension_array_dtype,
is_extension_array,
is_dtype_equal,
is_datetimelike,
is_datetime64_dtype,
Expand Down Expand Up @@ -1244,7 +1244,7 @@ def __array__(self, dtype=None):
ret = take_1d(self.categories.values, self._codes)
if dtype and not is_dtype_equal(dtype, self.categories.dtype):
return np.asarray(ret, dtype)
if is_extension_array_dtype(ret):
if is_extension_array(ret):
# When we're a Categorical[ExtensionArray], like Interval,
# we need to ensure __array__ get's all the way to an
# ndarray.
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
is_list_like,
is_scalar,
is_extension_type,
is_extension_array_dtype)
is_extension_array)

from pandas.util._validators import validate_bool_kwarg
from pandas.errors import AbstractMethodError
Expand Down Expand Up @@ -749,7 +749,7 @@ def _ndarray_values(self):

- categorical -> codes
"""
if is_extension_array_dtype(self):
if is_extension_array(self):
return self.values._ndarray_values
return self.values

Expand Down Expand Up @@ -857,7 +857,7 @@ def tolist(self):
"""
if is_datetimelike(self._values):
return [com._maybe_box_datetimelike(x) for x in self._values]
elif is_extension_array_dtype(self._values):
elif is_extension_array(self._values):
return list(self._values)
else:
return self._values.tolist()
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/dtypes/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

from .common import (pandas_dtype,
is_dtype_equal,
is_extension_dtype,
is_extension_array,
is_extension_type,

# categorical
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/dtypes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@


class _DtypeOpsMixin(object):
# Not all of pandas' extension dtypes are compatibile with
# Not all of pandas' extension dtypes are compatible with
# the new ExtensionArray interface. This means PandasExtensionDtype
# can't subclass ExtensionDtype yet, as is_extension_array_dtype would
# can't subclass ExtensionDtype yet, as is_extension_dtype would
# incorrectly say that these types are extension types.
#
# In the interim, we put methods that are shared between the two base
Expand Down
8 changes: 4 additions & 4 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
is_complex, is_datetimetz, is_categorical_dtype,
is_datetimelike,
is_extension_type,
is_extension_array_dtype,
is_extension_dtype,
is_object_dtype,
is_datetime64tz_dtype, is_datetime64_dtype,
is_datetime64_ns_dtype,
Expand Down Expand Up @@ -294,7 +294,7 @@ def maybe_promote(dtype, fill_value=np.nan):
elif is_datetimetz(dtype):
if isna(fill_value):
fill_value = iNaT
elif is_extension_array_dtype(dtype) and isna(fill_value):
elif is_extension_dtype(dtype) and isna(fill_value):
fill_value = dtype.na_value
elif is_float(fill_value):
if issubclass(dtype.type, np.bool_):
Expand Down Expand Up @@ -332,7 +332,7 @@ def maybe_promote(dtype, fill_value=np.nan):
dtype = np.object_

# in case we have a string that looked like a number
if is_extension_array_dtype(dtype):
if is_extension_dtype(dtype):
pass
elif is_datetimetz(dtype):
pass
Expand Down Expand Up @@ -650,7 +650,7 @@ def astype_nansafe(arr, dtype, copy=True):
need to be very careful as the result shape could change! """

# dispatch on extension dtype if needed
if is_extension_array_dtype(dtype):
if is_extension_dtype(dtype):
return dtype.construct_array_type()._from_sequence(
arr, dtype=dtype, copy=copy)

Expand Down
57 changes: 44 additions & 13 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1688,38 +1688,69 @@ def is_extension_type(arr):
return False


def is_extension_array_dtype(arr_or_dtype):
"""Check if an object is a pandas extension array type.
def is_extension_array(arr):
"""Check if an array object is a pandas extension array type.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you clarify (maybe in extended summary paragraph or in parameters section) that this is "array-like" (also Series and Index, in practice anything that has a dtype property)


Parameters
----------
arr_or_dtype : object
arr : object

Returns
-------
bool

Notes
-----
This checks whether an object implements the pandas extension
This checks whether an array object implements the pandas extension
array interface. In pandas, this includes:

* Categorical
* Interval

Third-party libraries may implement arrays or types satisfying
Third-party libraries may implement arrays satisfying
this interface as well.
"""
from pandas.core.arrays import ExtensionArray

if isinstance(arr_or_dtype, (ABCIndexClass, ABCSeries)):
arr_or_dtype = arr_or_dtype._values
See Also
--------
is_extension_dtype : Similar method for dtypes.
"""
from pandas.core.dtypes.base import ExtensionDtype

try:
arr_or_dtype = pandas_dtype(arr_or_dtype)
except TypeError:
pass
dtype = getattr(arr, 'dtype')
except AttributeError:
return False

return isinstance(dtype, ExtensionDtype)

return isinstance(arr_or_dtype, (ExtensionDtype, ExtensionArray))

def is_extension_dtype(dtype):
"""Check if a dtype object is a pandas extension dtype.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One question is whether we want to accept strings here as well?
Or keep it strictly for actual dtype objects?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good point, I suspect we should accept strings that have been registered.

I'll see what I can do.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

meant for the comment here
this is what pandas_dtype does


Parameters
----------
dtype : dtype

Returns
-------
bool

Notes
-----
This checks whether a dtype object implements the pandas extension
array interface. In pandas, this includes:

* CategoricalDtype
* IntervalDtype

Third-party libraries may implement dtypes satisfying
this interface as well.

See Also
--------
is_extension_array : Similar method for arrays.
"""
return isinstance(dtype, ExtensionDtype)


def is_complex_dtype(arr_or_dtype):
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from pandas.core.dtypes.common import (
is_categorical_dtype,
is_sparse,
is_extension_array_dtype,
is_extension_array,
is_datetimetz,
is_datetime64_dtype,
is_timedelta64_dtype,
Expand Down Expand Up @@ -177,7 +177,7 @@ def is_nonempty(x):
elif 'sparse' in typs:
return _concat_sparse(to_concat, axis=axis, typs=typs)

extensions = [is_extension_array_dtype(x) for x in to_concat]
extensions = [is_extension_array(x) for x in to_concat]
if any(extensions) and axis == 1:
to_concat = [np.atleast_2d(x.astype('object')) for x in to_concat]

Expand Down
8 changes: 4 additions & 4 deletions pandas/core/dtypes/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
is_complex_dtype,
is_string_like_dtype, is_bool_dtype,
is_integer_dtype, is_dtype_equal,
is_extension_array_dtype,
is_extension_array,
needs_i8_conversion, ensure_object,
pandas_dtype,
is_scalar,
Expand Down Expand Up @@ -190,7 +190,7 @@ def _isna_ndarraylike(obj):
values = getattr(obj, 'values', obj)
dtype = values.dtype

if is_extension_array_dtype(obj):
if is_extension_array(obj):
if isinstance(obj, (ABCIndexClass, ABCSeries)):
values = obj._values
else:
Expand Down Expand Up @@ -502,7 +502,7 @@ def na_value_for_dtype(dtype, compat=True):
"""
dtype = pandas_dtype(dtype)

if is_extension_array_dtype(dtype):
if is_extension_array(dtype):
return dtype.na_value
if (is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype) or
is_timedelta64_dtype(dtype) or is_period_dtype(dtype)):
Expand All @@ -522,7 +522,7 @@ def remove_na_arraylike(arr):
"""
Return array-like containing only true/non-NaN values, possibly empty.
"""
if is_extension_array_dtype(arr):
if is_extension_array(arr):
return arr[notna(arr)]
else:
return arr[notna(lib.values_from_object(arr))]
6 changes: 3 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
is_categorical_dtype,
is_object_dtype,
is_extension_type,
is_extension_array_dtype,
is_extension_array,
is_datetimetz,
is_datetime64_any_dtype,
is_bool_dtype,
Expand Down Expand Up @@ -517,7 +517,7 @@ def _get_axes(N, K, index=index, columns=columns):
index, columns = _get_axes(len(values), 1)
return _arrays_to_mgr([values], columns, index, columns,
dtype=dtype)
elif (is_datetimetz(values) or is_extension_array_dtype(values)):
elif (is_datetimetz(values) or is_extension_array(values)):
# GH19157
if columns is None:
columns = [0]
Expand Down Expand Up @@ -3507,7 +3507,7 @@ def reindexer(value):
value = maybe_cast_to_datetime(value, value.dtype)

# return internal types directly
if is_extension_type(value) or is_extension_array_dtype(value):
if is_extension_type(value) or is_extension_array(value):
return value

# broadcast across multiple columns if necessary
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@
is_datetime64_any_dtype,
is_datetime64tz_dtype,
is_timedelta64_dtype,
is_extension_array_dtype,
is_extension_array,
is_extension_dtype,
is_hashable,
is_iterator, is_list_like,
is_scalar)
Expand Down Expand Up @@ -275,7 +276,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
closed=closed)

# extension dtype
elif is_extension_array_dtype(data) or is_extension_array_dtype(dtype):
elif is_extension_array(data) or is_extension_dtype(dtype):
data = np.asarray(data)
if not (dtype is None or is_object_dtype(dtype)):

Expand Down Expand Up @@ -1191,7 +1192,7 @@ def astype(self, dtype, copy=True):
return CategoricalIndex(self.values, name=self.name, dtype=dtype,
copy=copy)

elif is_extension_array_dtype(dtype):
elif is_extension_dtype(dtype):
return Index(np.asarray(self), dtype=dtype, copy=copy)

try:
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/internals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
is_datetimelike_v_numeric,
is_float_dtype, is_numeric_dtype,
is_numeric_v_string_like, is_extension_type,
is_extension_array_dtype,
is_extension_array,
is_scalar,
_get_dtype)
from pandas.core.dtypes.cast import (
Expand Down Expand Up @@ -1071,7 +1071,7 @@ def set(self, item, value, check=False):
# TODO(EA): Remove an is_extension_ when all extension types satisfy
# the interface
value_is_extension_type = (is_extension_type(value) or
is_extension_array_dtype(value))
is_extension_array(value))

# categorical/spares/datetimetz
if value_is_extension_type:
Expand Down
Loading