Skip to content

REF: hold PeriodArray in NDArrayBackedExtensionBlock #44681

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Dec 14, 2021
7 changes: 5 additions & 2 deletions pandas/core/dtypes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1408,11 +1408,12 @@ def is_1d_only_ea_obj(obj: Any) -> bool:
from pandas.core.arrays import (
DatetimeArray,
ExtensionArray,
PeriodArray,
TimedeltaArray,
)

return isinstance(obj, ExtensionArray) and not isinstance(
obj, (DatetimeArray, TimedeltaArray)
obj, (DatetimeArray, TimedeltaArray, PeriodArray)
)


Expand All @@ -1424,7 +1425,9 @@ def is_1d_only_ea_dtype(dtype: DtypeObj | None) -> bool:
# here too.
# NB: need to check DatetimeTZDtype and not is_datetime64tz_dtype
# to exclude ArrowTimestampUSDtype
return isinstance(dtype, ExtensionDtype) and not isinstance(dtype, DatetimeTZDtype)
return isinstance(dtype, ExtensionDtype) and not isinstance(
dtype, (DatetimeTZDtype, PeriodDtype)
)


def is_extension_array_dtype(arr_or_dtype) -> bool:
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/internals/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from pandas.core.dtypes.common import (
is_datetime64tz_dtype,
is_period_dtype,
pandas_dtype,
)

Expand Down Expand Up @@ -62,8 +63,9 @@ def make_block(
placement = BlockPlacement(placement)

ndim = maybe_infer_ndim(values, placement, ndim)
if is_datetime64tz_dtype(values.dtype):
if is_datetime64tz_dtype(values.dtype) or is_period_dtype(values.dtype):
# GH#41168 ensure we can pass 1D dt64tz values
# More generally, any EA dtype that isn't is_1d_only_ea_dtype
values = extract_array(values, extract_numpy=True)
values = ensure_block_shape(values, ndim)

Expand Down
12 changes: 12 additions & 0 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
CategoricalDtype,
ExtensionDtype,
PandasDtype,
PeriodDtype,
)
from pandas.core.dtypes.generic import (
ABCDataFrame,
Expand Down Expand Up @@ -1728,6 +1729,12 @@ class NDArrayBackedExtensionBlock(libinternals.NDArrayBackedBlock, EABackedBlock

values: NDArrayBackedExtensionArray

# error: Signature of "is_extension" incompatible with supertype "Block"
@cache_readonly
def is_extension(self) -> bool: # type: ignore[override]
# i.e. datetime64tz, PeriodDtype
return not isinstance(self.dtype, np.dtype)

@property
def is_view(self) -> bool:
"""return a boolean if I am possibly a view"""
Expand Down Expand Up @@ -1756,6 +1763,9 @@ def where(self, other, cond) -> list[Block]:
try:
res_values = arr.T._where(cond, other).T
except (ValueError, TypeError):
if isinstance(self.dtype, PeriodDtype):
# TODO: don't special-case
raise
blk = self.coerce_to_target_dtype(other)
nbs = blk.where(other, cond)
return self._maybe_downcast(nbs, "infer")
Expand Down Expand Up @@ -1949,6 +1959,8 @@ def get_block_type(dtype: DtypeObj):
cls = CategoricalBlock
elif vtype is Timestamp:
cls = DatetimeTZBlock
elif isinstance(dtype, PeriodDtype):
cls = NDArrayBackedExtensionBlock
elif isinstance(dtype, ExtensionDtype):
# Note: need to be sure PandasArray is unwrapped before we get here
cls = ExtensionBlock
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ def ndarray_to_mgr(
return arrays_to_mgr(values, columns, index, dtype=dtype, typ=typ)

elif is_extension_array_dtype(vdtype) and not is_1d_only_ea_dtype(vdtype):
# i.e. Datetime64TZ
# i.e. Datetime64TZ, PeriodDtype
values = extract_array(values, extract_numpy=True)
if copy:
values = values.copy()
Expand Down
3 changes: 3 additions & 0 deletions pandas/tests/arithmetic/test_period.py
Original file line number Diff line number Diff line change
Expand Up @@ -1262,6 +1262,9 @@ def test_parr_add_timedeltalike_scalar(self, three_days, box_with_array):
)

obj = tm.box_expected(ser, box_with_array)
if box_with_array is pd.DataFrame:
assert (obj.dtypes == "Period[D]").all()

expected = tm.box_expected(expected, box_with_array)

result = obj + three_days
Expand Down
11 changes: 11 additions & 0 deletions pandas/tests/arrays/period/test_arrow_compat.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import pytest

from pandas.compat import pa_version_under2p0

from pandas.core.dtypes.dtypes import PeriodDtype

import pandas as pd
Expand Down Expand Up @@ -69,6 +71,9 @@ def test_arrow_array_missing():
assert result.storage.equals(expected)


@pytest.mark.xfail(
pa_version_under2p0, reason="pyarrow incorrectly uses pandas internals API"
)
def test_arrow_table_roundtrip():
from pandas.core.arrays._arrow_utils import ArrowPeriodType

Expand All @@ -88,6 +93,9 @@ def test_arrow_table_roundtrip():
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(
pa_version_under2p0, reason="pyarrow incorrectly uses pandas internals API"
)
def test_arrow_load_from_zero_chunks():
# GH-41040

Expand All @@ -106,6 +114,9 @@ def test_arrow_load_from_zero_chunks():
tm.assert_frame_equal(result, df)


@pytest.mark.xfail(
pa_version_under2p0, reason="pyarrow incorrectly uses pandas internals API"
)
def test_arrow_table_roundtrip_without_metadata():
arr = PeriodArray([1, 2, 3], freq="H")
arr[1] = pd.NaT
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1275,7 +1275,7 @@ def test_interval_can_hold_element(self, dtype, element):

def test_period_can_hold_element_emptylist(self):
pi = period_range("2016", periods=3, freq="A")
blk = new_block(pi._data, [1], ndim=2)
blk = new_block(pi._data.reshape(1, 3), [1], ndim=2)

assert blk._can_hold_element([])

Expand Down
8 changes: 7 additions & 1 deletion pandas/tests/io/test_feather.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import numpy as np
import pytest

from pandas.compat.pyarrow import pa_version_under2p0

import pandas as pd
import pandas._testing as tm

Expand Down Expand Up @@ -85,7 +87,11 @@ def test_basic(self):
),
}
)
df["periods"] = pd.period_range("2013", freq="M", periods=3)
if not pa_version_under2p0:
# older pyarrow incorrectly uses pandas internal API, so
# constructs invalid Block
df["periods"] = pd.period_range("2013", freq="M", periods=3)

df["timedeltas"] = pd.timedelta_range("1 day", periods=3)
df["intervals"] = pd.interval_range(0, 3, 3)

Expand Down
13 changes: 12 additions & 1 deletion pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -648,7 +648,15 @@ def test_use_nullable_dtypes(self, engine, request):
"object",
"datetime64[ns, UTC]",
"float",
"period[D]",
pytest.param(
"period[D]",
# Note: I don't know exactly what version the cutoff is;
# On the CI it fails with 1.0.1
marks=pytest.mark.xfail(
pa_version_under2p0,
reason="pyarrow uses pandas internal API incorrectly",
),
),
"Float64",
"string",
],
Expand Down Expand Up @@ -887,6 +895,9 @@ def test_pyarrow_backed_string_array(self, pa, string_storage):
check_round_trip(df, pa, expected=df.astype(f"string[{string_storage}]"))

@td.skip_if_no("pyarrow")
@pytest.mark.xfail(
pa_version_under2p0, reason="pyarrow uses pandas internal API incorrectly"
)
def test_additional_extension_types(self, pa):
# test additional ExtensionArrays that are supported through the
# __arrow_array__ protocol + by defining a custom ExtensionType
Expand Down