Skip to content

COMPAT: Use actual isinstance check for pyarrow.Array instead of hasattr(.. 'type') duck typing #52830

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jun 2, 2023
2 changes: 2 additions & 0 deletions pandas/_libs/lib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ from pandas._libs.interval import Interval
from pandas._libs.tslibs import Period
from pandas._typing import (
ArrayLike,
ArrowArrayTypeGuard,
DtypeObj,
TypeGuard,
npt,
Expand All @@ -42,6 +43,7 @@ def infer_dtype(value: object, skipna: bool = ...) -> str: ...
def is_iterator(obj: object) -> bool: ...
def is_scalar(val: object) -> bool: ...
def is_list_like(obj: object, allow_sets: bool = ...) -> bool: ...
def is_pyarrow_array(obj: object) -> ArrowArrayTypeGuard: ...
def is_period(val: object) -> TypeGuard[Period]: ...
def is_interval(val: object) -> TypeGuard[Interval]: ...
def is_decimal(val: object) -> TypeGuard[Decimal]: ...
Expand Down
23 changes: 23 additions & 0 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,16 @@ i8max = <int64_t>INT64_MAX
u8max = <uint64_t>UINT64_MAX


cdef bint PYARROW_INSTALLED = False

try:
import pyarrow as pa

PYARROW_INSTALLED = True
except ImportError:
pa = None


@cython.wraparound(False)
@cython.boundscheck(False)
def memory_usage_of_objects(arr: object[:]) -> int64_t:
Expand Down Expand Up @@ -1173,6 +1183,19 @@ cdef bint c_is_list_like(object obj, bint allow_sets) except -1:
)


def is_pyarrow_array(obj):
"""
Return True if given object is a pyarrow Array or ChunkedArray.

Returns
-------
bool
"""
if PYARROW_INSTALLED:
return isinstance(obj, (pa.Array, pa.ChunkedArray))
return False


_TYPE_MAP = {
"categorical": "categorical",
"category": "categorical",
Expand Down
11 changes: 11 additions & 0 deletions pandas/_typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,21 @@
from typing import Self
else:
from typing_extensions import Self # pyright: reportUnusedImport = false

try:
from pyarrow import (
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC we expect optional dependencies to be present for mypy checks, but not sure. who would know?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's also fine with me, that would just simplify this a bit.

Array,
ChunkedArray,
)

ArrowArrayTypeGuard = TypeGuard[Union[Array, ChunkedArray]]
except ImportError:
ArrowArrayTypeGuard = bool
else:
npt: Any = None
Self: Any = None
TypeGuard: Any = None
ArrowArrayTypeGuard = bool

HashableT = TypeVar("HashableT", bound=Hashable)

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal
result[na_values] = libmissing.NA

else:
if hasattr(scalars, "type"):
if lib.is_pyarrow_array(scalars):
# pyarrow array; we cannot rely on the "to_numpy" check in
# ensure_string_array because calling scalars.to_numpy would set
# zero_copy_only to True which caused problems see GH#52076
Expand Down