diff --git a/pandas/core/arrays/arrow/dtype.py b/pandas/core/arrays/arrow/dtype.py index af5b51a39b9c3..346c4e8d19379 100644 --- a/pandas/core/arrays/arrow/dtype.py +++ b/pandas/core/arrays/arrow/dtype.py @@ -44,7 +44,7 @@ def name(self) -> str: # type: ignore[override] """ A string identifying the data type. """ - return str(self.pyarrow_dtype) + return f"{str(self.pyarrow_dtype)}[{self.storage}]" @cache_readonly def numpy_dtype(self) -> np.dtype: @@ -92,10 +92,11 @@ def construct_from_string(cls, string: str): f"'construct_from_string' expects a string, got {type(string)}" ) if not string.endswith("[pyarrow]"): - raise TypeError(f"string {string} must end with '[pyarrow]'") + raise TypeError(f"'{string}' must end with '[pyarrow]'") base_type = string.split("[pyarrow]")[0] - pa_dtype = getattr(pa, base_type, None) - if pa_dtype is None: + try: + pa_dtype = pa.type_for_alias(base_type) + except ValueError as err: has_parameters = re.search(r"\[.*\]", base_type) if has_parameters: raise NotImplementedError( @@ -103,9 +104,9 @@ def construct_from_string(cls, string: str): f"({has_parameters.group()}) in the string is not supported. " "Please construct an ArrowDtype object with a pyarrow_dtype " "instance with specific parameters." - ) - raise TypeError(f"'{base_type}' is not a valid pyarrow data type.") - return cls(pa_dtype()) + ) from err + raise TypeError(f"'{base_type}' is not a valid pyarrow data type.") from err + return cls(pa_dtype) @property def _is_numeric(self) -> bool: diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 03616267c3f86..95cb7045ac68d 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -34,7 +34,7 @@ from pandas.core.arrays.arrow.dtype import ArrowDtype # isort:skip -@pytest.fixture(params=tm.ALL_PYARROW_DTYPES) +@pytest.fixture(params=tm.ALL_PYARROW_DTYPES, ids=str) def dtype(request): return ArrowDtype(pyarrow_dtype=request.param) @@ -104,14 +104,23 @@ class TestBaseCasting(base.BaseCastingTests): class TestConstructors(base.BaseConstructorsTests): - @pytest.mark.xfail( - reason=( - "str(dtype) constructs " - "e.g. in64[pyarrow] like int64 (numpy) " - "due to StorageExtensionDtype.__str__" - ) - ) - def test_from_dtype(self, data): + def test_from_dtype(self, data, request): + pa_dtype = data.dtype.pyarrow_dtype + if pa.types.is_timestamp(pa_dtype) and pa_dtype.tz: + if pa_version_under2p0: + request.node.add_marker( + pytest.mark.xfail( + reason=f"timestamp data with tz={pa_dtype.tz} " + "converted to integer when pyarrow < 2.0", + ) + ) + else: + request.node.add_marker( + pytest.mark.xfail( + raises=NotImplementedError, + reason=f"pyarrow.type_for_alias cannot infer {pa_dtype}", + ) + ) super().test_from_dtype(data) @@ -197,10 +206,71 @@ def test_loc_iloc_frame_single_dtype(self, request, using_array_manager, data): super().test_loc_iloc_frame_single_dtype(data) +class TestBaseDtype(base.BaseDtypeTests): + def test_construct_from_string_own_name(self, dtype, request): + pa_dtype = dtype.pyarrow_dtype + if pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None: + request.node.add_marker( + pytest.mark.xfail( + raises=NotImplementedError, + reason=f"pyarrow.type_for_alias cannot infer {pa_dtype}", + ) + ) + super().test_construct_from_string_own_name(dtype) + + def test_is_dtype_from_name(self, dtype, request): + pa_dtype = dtype.pyarrow_dtype + if pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None: + request.node.add_marker( + pytest.mark.xfail( + raises=NotImplementedError, + reason=f"pyarrow.type_for_alias cannot infer {pa_dtype}", + ) + ) + super().test_is_dtype_from_name(dtype) + + def test_construct_from_string(self, dtype, request): + pa_dtype = dtype.pyarrow_dtype + if pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None: + request.node.add_marker( + pytest.mark.xfail( + raises=NotImplementedError, + reason=f"pyarrow.type_for_alias cannot infer {pa_dtype}", + ) + ) + super().test_construct_from_string(dtype) + + def test_construct_from_string_another_type_raises(self, dtype): + msg = r"'another_type' must end with '\[pyarrow\]'" + with pytest.raises(TypeError, match=msg): + type(dtype).construct_from_string("another_type") + + def test_get_common_dtype(self, dtype, request): + pa_dtype = dtype.pyarrow_dtype + if ( + pa.types.is_date(pa_dtype) + or pa.types.is_time(pa_dtype) + or ( + pa.types.is_timestamp(pa_dtype) + and (pa_dtype.unit != "ns" or pa_dtype.tz is not None) + ) + or (pa.types.is_duration(pa_dtype) and pa_dtype.unit != "ns") + ): + request.node.add_marker( + pytest.mark.xfail( + reason=( + f"{pa_dtype} does not have associated numpy " + f"dtype findable by find_common_type" + ) + ) + ) + super().test_get_common_dtype(dtype) + + class TestBaseIndex(base.BaseIndexTests): pass -def test_arrowdtype_construct_from_string_type_with_parameters(): +def test_arrowdtype_construct_from_string_type_with_unsupported_parameters(): with pytest.raises(NotImplementedError, match="Passing pyarrow type"): - ArrowDtype.construct_from_string("timestamp[s][pyarrow]") + ArrowDtype.construct_from_string("timestamp[s, tz=UTC][pyarrow]")