Skip to content

ENH/TST: Add TestBaseDtype tests for ArrowExtensionArray #47358

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jun 17, 2022
15 changes: 8 additions & 7 deletions pandas/core/arrays/arrow/dtype.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def name(self) -> str: # type: ignore[override]
"""
A string identifying the data type.
"""
return str(self.pyarrow_dtype)
return f"{str(self.pyarrow_dtype)}[{self.storage}]"

@cache_readonly
def numpy_dtype(self) -> np.dtype:
Expand Down Expand Up @@ -92,20 +92,21 @@ def construct_from_string(cls, string: str):
f"'construct_from_string' expects a string, got {type(string)}"
)
if not string.endswith("[pyarrow]"):
raise TypeError(f"string {string} must end with '[pyarrow]'")
raise TypeError(f"'{string}' must end with '[pyarrow]'")
base_type = string.split("[pyarrow]")[0]
pa_dtype = getattr(pa, base_type, None)
if pa_dtype is None:
try:
pa_dtype = pa.type_for_alias(base_type)
except ValueError as err:
has_parameters = re.search(r"\[.*\]", base_type)
if has_parameters:
raise NotImplementedError(
"Passing pyarrow type specific parameters "
f"({has_parameters.group()}) in the string is not supported. "
"Please construct an ArrowDtype object with a pyarrow_dtype "
"instance with specific parameters."
)
raise TypeError(f"'{base_type}' is not a valid pyarrow data type.")
return cls(pa_dtype())
) from err
raise TypeError(f"'{base_type}' is not a valid pyarrow data type.") from err
return cls(pa_dtype)

@property
def _is_numeric(self) -> bool:
Expand Down
92 changes: 81 additions & 11 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from pandas.core.arrays.arrow.dtype import ArrowDtype # isort:skip


@pytest.fixture(params=tm.ALL_PYARROW_DTYPES)
@pytest.fixture(params=tm.ALL_PYARROW_DTYPES, ids=str)
def dtype(request):
return ArrowDtype(pyarrow_dtype=request.param)

Expand Down Expand Up @@ -104,14 +104,23 @@ class TestBaseCasting(base.BaseCastingTests):


class TestConstructors(base.BaseConstructorsTests):
@pytest.mark.xfail(
reason=(
"str(dtype) constructs "
"e.g. in64[pyarrow] like int64 (numpy) "
"due to StorageExtensionDtype.__str__"
)
)
def test_from_dtype(self, data):
def test_from_dtype(self, data, request):
pa_dtype = data.dtype.pyarrow_dtype
if pa.types.is_timestamp(pa_dtype) and pa_dtype.tz:
if pa_version_under2p0:
request.node.add_marker(
pytest.mark.xfail(
reason=f"timestamp data with tz={pa_dtype.tz} "
"converted to integer when pyarrow < 2.0",
)
)
else:
request.node.add_marker(
pytest.mark.xfail(
raises=NotImplementedError,
reason=f"pyarrow.type_for_alias cannot infer {pa_dtype}",
)
)
super().test_from_dtype(data)


Expand Down Expand Up @@ -197,10 +206,71 @@ def test_loc_iloc_frame_single_dtype(self, request, using_array_manager, data):
super().test_loc_iloc_frame_single_dtype(data)


class TestBaseDtype(base.BaseDtypeTests):
def test_construct_from_string_own_name(self, dtype, request):
pa_dtype = dtype.pyarrow_dtype
if pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None:
request.node.add_marker(
pytest.mark.xfail(
raises=NotImplementedError,
reason=f"pyarrow.type_for_alias cannot infer {pa_dtype}",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we have an issue for followup on these?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, but I can post an issue on the Arrow JIRA board.

If pyarrow.type_for_alias decides to implement better parsing, we should get this for free in a future version

)
)
super().test_construct_from_string_own_name(dtype)

def test_is_dtype_from_name(self, dtype, request):
pa_dtype = dtype.pyarrow_dtype
if pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None:
request.node.add_marker(
pytest.mark.xfail(
raises=NotImplementedError,
reason=f"pyarrow.type_for_alias cannot infer {pa_dtype}",
)
)
super().test_is_dtype_from_name(dtype)

def test_construct_from_string(self, dtype, request):
pa_dtype = dtype.pyarrow_dtype
if pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None:
request.node.add_marker(
pytest.mark.xfail(
raises=NotImplementedError,
reason=f"pyarrow.type_for_alias cannot infer {pa_dtype}",
)
)
super().test_construct_from_string(dtype)

def test_construct_from_string_another_type_raises(self, dtype):
msg = r"'another_type' must end with '\[pyarrow\]'"
with pytest.raises(TypeError, match=msg):
type(dtype).construct_from_string("another_type")

def test_get_common_dtype(self, dtype, request):
pa_dtype = dtype.pyarrow_dtype
if (
pa.types.is_date(pa_dtype)
or pa.types.is_time(pa_dtype)
or (
pa.types.is_timestamp(pa_dtype)
and (pa_dtype.unit != "ns" or pa_dtype.tz is not None)
)
or (pa.types.is_duration(pa_dtype) and pa_dtype.unit != "ns")
):
request.node.add_marker(
pytest.mark.xfail(
reason=(
f"{pa_dtype} does not have associated numpy "
f"dtype findable by find_common_type"
)
)
)
super().test_get_common_dtype(dtype)


class TestBaseIndex(base.BaseIndexTests):
pass


def test_arrowdtype_construct_from_string_type_with_parameters():
def test_arrowdtype_construct_from_string_type_with_unsupported_parameters():
with pytest.raises(NotImplementedError, match="Passing pyarrow type"):
ArrowDtype.construct_from_string("timestamp[s][pyarrow]")
ArrowDtype.construct_from_string("timestamp[s, tz=UTC][pyarrow]")