Skip to content

CI: Fix pyarrow nightly build #54110

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jul 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/actions/build_pandas/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ runs:
run: |
micromamba info
micromamba list
pip list --pre
shell: bash -el {0}

- name: Uninstall existing Pandas installation
Expand Down
2 changes: 2 additions & 0 deletions pandas/compat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
pa_version_under8p0,
pa_version_under9p0,
pa_version_under11p0,
pa_version_under13p0,
)

if TYPE_CHECKING:
Expand Down Expand Up @@ -183,6 +184,7 @@ def get_bz2_file() -> type[pandas.compat.compressors.BZ2File]:
"pa_version_under8p0",
"pa_version_under9p0",
"pa_version_under11p0",
"pa_version_under13p0",
"IS64",
"ISMUSL",
"PY310",
Expand Down
5 changes: 3 additions & 2 deletions pandas/compat/pyarrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,17 @@
try:
import pyarrow as pa

_pa_version = pa.__version__
_palv = Version(_pa_version)
_palv = Version(Version(pa.__version__).base_version)
pa_version_under7p0 = _palv < Version("7.0.0")
pa_version_under8p0 = _palv < Version("8.0.0")
pa_version_under9p0 = _palv < Version("9.0.0")
pa_version_under10p0 = _palv < Version("10.0.0")
pa_version_under11p0 = _palv < Version("11.0.0")
pa_version_under13p0 = _palv < Version("13.0.0")
except ImportError:
pa_version_under7p0 = True
pa_version_under8p0 = True
pa_version_under9p0 = True
pa_version_under10p0 = True
pa_version_under11p0 = True
pa_version_under13p0 = True
13 changes: 11 additions & 2 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
pa_version_under8p0,
pa_version_under9p0,
pa_version_under11p0,
pa_version_under13p0,
)
from pandas.util._decorators import doc
from pandas.util._validators import validate_fillna_kwargs
Expand Down Expand Up @@ -1218,7 +1219,7 @@ def to_numpy(
else:
result = result.to_numpy(dtype=dtype)
return result
elif pa.types.is_time(pa_type):
elif pa.types.is_time(pa_type) or pa.types.is_date(pa_type):
# convert to list of python datetime.time objects before
# wrapping in ndarray
result = np.array(list(self), dtype=dtype)
Expand Down Expand Up @@ -1416,6 +1417,8 @@ def _reduce_pyarrow(self, name: str, *, skipna: bool = True, **kwargs) -> pa.Sca

data_to_reduce = self._pa_array

cast_kwargs = {} if pa_version_under13p0 else {"safe": False}

if name in ["any", "all"] and (
pa.types.is_integer(pa_type)
or pa.types.is_floating(pa_type)
Expand Down Expand Up @@ -1491,9 +1494,15 @@ def pyarrow_meth(data, skip_nulls, **kwargs):
if name in ["min", "max", "sum"] and pa.types.is_duration(pa_type):
result = result.cast(pa_type)
if name in ["median", "mean"] and pa.types.is_temporal(pa_type):
if not pa_version_under13p0:
nbits = pa_type.bit_width
if nbits == 32:
result = result.cast(pa.int32(), **cast_kwargs)
else:
result = result.cast(pa.int64(), **cast_kwargs)
result = result.cast(pa_type)
if name in ["std", "sem"] and pa.types.is_temporal(pa_type):
result = result.cast(pa.int64())
result = result.cast(pa.int64(), **cast_kwargs)
if pa.types.is_duration(pa_type):
result = result.cast(pa_type)
elif pa.types.is_time(pa_type):
Expand Down
8 changes: 6 additions & 2 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -1706,7 +1706,11 @@ def test_to_numpy_with_defaults(data):
result = data.to_numpy()

pa_type = data._pa_array.type
if pa.types.is_duration(pa_type) or pa.types.is_timestamp(pa_type):
if (
pa.types.is_duration(pa_type)
or pa.types.is_timestamp(pa_type)
or pa.types.is_date(pa_type)
):
expected = np.array(list(data))
else:
expected = np.array(data._pa_array)
Expand Down Expand Up @@ -2969,7 +2973,7 @@ def test_date32_repr():
# GH48238
arrow_dt = pa.array([date.fromisoformat("2020-01-01")], type=pa.date32())
ser = pd.Series(arrow_dt, dtype=ArrowDtype(arrow_dt.type))
assert repr(ser) == "0 2020-01-01\ndtype: date32[day][pyarrow]"
assert repr(ser) == "0 2020-01-01\ndtype: date32[day][pyarrow]"


@pytest.mark.xfail(
Expand Down
18 changes: 10 additions & 8 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from pandas.compat.pyarrow import (
pa_version_under7p0,
pa_version_under8p0,
pa_version_under13p0,
)
import pandas.util._test_decorators as td

Expand Down Expand Up @@ -1006,14 +1007,15 @@ def test_read_dtype_backend_pyarrow_config(self, pa, df_full):

pa_table = pyarrow.Table.from_pandas(df)
expected = pa_table.to_pandas(types_mapper=pd.ArrowDtype)
# pyarrow infers datetimes as us instead of ns
expected["datetime"] = expected["datetime"].astype("timestamp[us][pyarrow]")
expected["datetime_with_nat"] = expected["datetime_with_nat"].astype(
"timestamp[us][pyarrow]"
)
expected["datetime_tz"] = expected["datetime_tz"].astype(
pd.ArrowDtype(pyarrow.timestamp(unit="us", tz="Europe/Brussels"))
)
if pa_version_under13p0:
# pyarrow infers datetimes as us instead of ns
expected["datetime"] = expected["datetime"].astype("timestamp[us][pyarrow]")
expected["datetime_with_nat"] = expected["datetime_with_nat"].astype(
"timestamp[us][pyarrow]"
)
expected["datetime_tz"] = expected["datetime_tz"].astype(
pd.ArrowDtype(pyarrow.timestamp(unit="us", tz="Europe/Brussels"))
)

check_round_trip(
df,
Expand Down