diff --git a/.github/actions/build_pandas/action.yml b/.github/actions/build_pandas/action.yml index a65718ba045a0..73d7723e2fb49 100644 --- a/.github/actions/build_pandas/action.yml +++ b/.github/actions/build_pandas/action.yml @@ -12,6 +12,7 @@ runs: run: | micromamba info micromamba list + pip list --pre shell: bash -el {0} - name: Uninstall existing Pandas installation diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index de4b91e44da19..bfcfd5c74351a 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -28,6 +28,7 @@ pa_version_under8p0, pa_version_under9p0, pa_version_under11p0, + pa_version_under13p0, ) if TYPE_CHECKING: @@ -183,6 +184,7 @@ def get_bz2_file() -> type[pandas.compat.compressors.BZ2File]: "pa_version_under8p0", "pa_version_under9p0", "pa_version_under11p0", + "pa_version_under13p0", "IS64", "ISMUSL", "PY310", diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py index 020ec346490ff..2f2fb6a3662f4 100644 --- a/pandas/compat/pyarrow.py +++ b/pandas/compat/pyarrow.py @@ -7,16 +7,17 @@ try: import pyarrow as pa - _pa_version = pa.__version__ - _palv = Version(_pa_version) + _palv = Version(Version(pa.__version__).base_version) pa_version_under7p0 = _palv < Version("7.0.0") pa_version_under8p0 = _palv < Version("8.0.0") pa_version_under9p0 = _palv < Version("9.0.0") pa_version_under10p0 = _palv < Version("10.0.0") pa_version_under11p0 = _palv < Version("11.0.0") + pa_version_under13p0 = _palv < Version("13.0.0") except ImportError: pa_version_under7p0 = True pa_version_under8p0 = True pa_version_under9p0 = True pa_version_under10p0 = True pa_version_under11p0 = True + pa_version_under13p0 = True diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 106ec28a93f80..931d45319ea71 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -24,6 +24,7 @@ pa_version_under8p0, pa_version_under9p0, pa_version_under11p0, + pa_version_under13p0, ) from pandas.util._decorators import doc from pandas.util._validators import validate_fillna_kwargs @@ -1218,7 +1219,7 @@ def to_numpy( else: result = result.to_numpy(dtype=dtype) return result - elif pa.types.is_time(pa_type): + elif pa.types.is_time(pa_type) or pa.types.is_date(pa_type): # convert to list of python datetime.time objects before # wrapping in ndarray result = np.array(list(self), dtype=dtype) @@ -1416,6 +1417,8 @@ def _reduce_pyarrow(self, name: str, *, skipna: bool = True, **kwargs) -> pa.Sca data_to_reduce = self._pa_array + cast_kwargs = {} if pa_version_under13p0 else {"safe": False} + if name in ["any", "all"] and ( pa.types.is_integer(pa_type) or pa.types.is_floating(pa_type) @@ -1491,9 +1494,15 @@ def pyarrow_meth(data, skip_nulls, **kwargs): if name in ["min", "max", "sum"] and pa.types.is_duration(pa_type): result = result.cast(pa_type) if name in ["median", "mean"] and pa.types.is_temporal(pa_type): + if not pa_version_under13p0: + nbits = pa_type.bit_width + if nbits == 32: + result = result.cast(pa.int32(), **cast_kwargs) + else: + result = result.cast(pa.int64(), **cast_kwargs) result = result.cast(pa_type) if name in ["std", "sem"] and pa.types.is_temporal(pa_type): - result = result.cast(pa.int64()) + result = result.cast(pa.int64(), **cast_kwargs) if pa.types.is_duration(pa_type): result = result.cast(pa_type) elif pa.types.is_time(pa_type): diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index f622ef770b63f..7b2bedc531076 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -1706,7 +1706,11 @@ def test_to_numpy_with_defaults(data): result = data.to_numpy() pa_type = data._pa_array.type - if pa.types.is_duration(pa_type) or pa.types.is_timestamp(pa_type): + if ( + pa.types.is_duration(pa_type) + or pa.types.is_timestamp(pa_type) + or pa.types.is_date(pa_type) + ): expected = np.array(list(data)) else: expected = np.array(data._pa_array) @@ -2969,7 +2973,7 @@ def test_date32_repr(): # GH48238 arrow_dt = pa.array([date.fromisoformat("2020-01-01")], type=pa.date32()) ser = pd.Series(arrow_dt, dtype=ArrowDtype(arrow_dt.type)) - assert repr(ser) == "0 2020-01-01\ndtype: date32[day][pyarrow]" + assert repr(ser) == "0 2020-01-01\ndtype: date32[day][pyarrow]" @pytest.mark.xfail( diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 35bf75d3928f8..0d8afbf220b0c 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -17,6 +17,7 @@ from pandas.compat.pyarrow import ( pa_version_under7p0, pa_version_under8p0, + pa_version_under13p0, ) import pandas.util._test_decorators as td @@ -1006,14 +1007,15 @@ def test_read_dtype_backend_pyarrow_config(self, pa, df_full): pa_table = pyarrow.Table.from_pandas(df) expected = pa_table.to_pandas(types_mapper=pd.ArrowDtype) - # pyarrow infers datetimes as us instead of ns - expected["datetime"] = expected["datetime"].astype("timestamp[us][pyarrow]") - expected["datetime_with_nat"] = expected["datetime_with_nat"].astype( - "timestamp[us][pyarrow]" - ) - expected["datetime_tz"] = expected["datetime_tz"].astype( - pd.ArrowDtype(pyarrow.timestamp(unit="us", tz="Europe/Brussels")) - ) + if pa_version_under13p0: + # pyarrow infers datetimes as us instead of ns + expected["datetime"] = expected["datetime"].astype("timestamp[us][pyarrow]") + expected["datetime_with_nat"] = expected["datetime_with_nat"].astype( + "timestamp[us][pyarrow]" + ) + expected["datetime_tz"] = expected["datetime_tz"].astype( + pd.ArrowDtype(pyarrow.timestamp(unit="us", tz="Europe/Brussels")) + ) check_round_trip( df,