Skip to content

Commit 79660d7

Browse files
authored
Revert "REF: Back DatetimeTZBlock with sometimes-2D DTA (#41082)"
This reverts commit 6b6d8fd.
1 parent 4caf4c7 commit 79660d7

File tree

15 files changed

+103
-183
lines changed

15 files changed

+103
-183
lines changed

pandas/core/dtypes/common.py

-27
Original file line numberDiff line numberDiff line change
@@ -1413,33 +1413,6 @@ def is_extension_type(arr) -> bool:
14131413
return False
14141414

14151415

1416-
def is_1d_only_ea_obj(obj: Any) -> bool:
1417-
"""
1418-
ExtensionArray that does not support 2D, or more specifically that does
1419-
not use HybridBlock.
1420-
"""
1421-
from pandas.core.arrays import (
1422-
DatetimeArray,
1423-
ExtensionArray,
1424-
TimedeltaArray,
1425-
)
1426-
1427-
return isinstance(obj, ExtensionArray) and not isinstance(
1428-
obj, (DatetimeArray, TimedeltaArray)
1429-
)
1430-
1431-
1432-
def is_1d_only_ea_dtype(dtype: Optional[DtypeObj]) -> bool:
1433-
"""
1434-
Analogue to is_extension_array_dtype but excluding DatetimeTZDtype.
1435-
"""
1436-
# Note: if other EA dtypes are ever held in HybridBlock, exclude those
1437-
# here too.
1438-
# NB: need to check DatetimeTZDtype and not is_datetime64tz_dtype
1439-
# to exclude ArrowTimestampUSDtype
1440-
return isinstance(dtype, ExtensionDtype) and not isinstance(dtype, DatetimeTZDtype)
1441-
1442-
14431416
def is_extension_array_dtype(arr_or_dtype) -> bool:
14441417
"""
14451418
Check if an object is a pandas extension array type.

pandas/core/dtypes/concat.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -113,15 +113,11 @@ def is_nonempty(x) -> bool:
113113
to_concat = non_empties
114114

115115
kinds = {obj.dtype.kind for obj in to_concat}
116-
contains_datetime = any(kind in ["m", "M"] for kind in kinds)
117116

118117
all_empty = not len(non_empties)
119118
single_dtype = len({x.dtype for x in to_concat}) == 1
120119
any_ea = any(isinstance(x.dtype, ExtensionDtype) for x in to_concat)
121120

122-
if contains_datetime:
123-
return _concat_datetime(to_concat, axis=axis)
124-
125121
if any_ea:
126122
# we ignore axis here, as internally concatting with EAs is always
127123
# for axis=0
@@ -135,6 +131,9 @@ def is_nonempty(x) -> bool:
135131
else:
136132
return np.concatenate(to_concat)
137133

134+
elif any(kind in ["m", "M"] for kind in kinds):
135+
return _concat_datetime(to_concat, axis=axis)
136+
138137
elif all_empty:
139138
# we have all empties, but may need to coerce the result dtype to
140139
# object if we have non-numeric type operands (numpy would otherwise
@@ -350,5 +349,14 @@ def _concat_datetime(to_concat, axis=0):
350349
# in Timestamp/Timedelta
351350
return _concatenate_2d([x.astype(object) for x in to_concat], axis=axis)
352351

352+
if axis == 1:
353+
# TODO(EA2D): kludge not necessary with 2D EAs
354+
to_concat = [x.reshape(1, -1) if x.ndim == 1 else x for x in to_concat]
355+
353356
result = type(to_concat[0])._concat_same_type(to_concat, axis=axis)
357+
358+
if result.ndim == 2 and isinstance(result.dtype, ExtensionDtype):
359+
# TODO(EA2D): kludge not necessary with 2D EAs
360+
assert result.shape[0] == 1
361+
result = result[0]
354362
return result

pandas/core/frame.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,6 @@
9898
from pandas.core.dtypes.common import (
9999
ensure_platform_int,
100100
infer_dtype_from_object,
101-
is_1d_only_ea_dtype,
102101
is_bool_dtype,
103102
is_dataclass,
104103
is_datetime64_any_dtype,
@@ -846,9 +845,7 @@ def _can_fast_transpose(self) -> bool:
846845
if len(blocks) != 1:
847846
return False
848847

849-
dtype = blocks[0].dtype
850-
# TODO(EA2D) special case would be unnecessary with 2D EAs
851-
return not is_1d_only_ea_dtype(dtype)
848+
return not self._mgr.any_extension_types
852849

853850
# ----------------------------------------------------------------------
854851
# Rendering Methods

pandas/core/internals/api.py

+3-9
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
2) Use only functions exposed here (or in core.internals)
77
88
"""
9-
from __future__ import annotations
9+
from typing import Optional
1010

1111
import numpy as np
1212

@@ -23,15 +23,14 @@
2323
Block,
2424
DatetimeTZBlock,
2525
check_ndim,
26-
ensure_block_shape,
2726
extract_pandas_array,
2827
get_block_type,
2928
maybe_coerce_values,
3029
)
3130

3231

3332
def make_block(
34-
values, placement, klass=None, ndim=None, dtype: Dtype | None = None
33+
values, placement, klass=None, ndim=None, dtype: Optional[Dtype] = None
3534
) -> Block:
3635
"""
3736
This is a pseudo-public analogue to blocks.new_block.
@@ -49,29 +48,24 @@ def make_block(
4948

5049
values, dtype = extract_pandas_array(values, dtype, ndim)
5150

52-
needs_reshape = False
5351
if klass is None:
5452
dtype = dtype or values.dtype
5553
klass = get_block_type(values, dtype)
5654

5755
elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values.dtype):
5856
# pyarrow calls get here
5957
values = DatetimeArray._simple_new(values, dtype=dtype)
60-
needs_reshape = True
6158

6259
if not isinstance(placement, BlockPlacement):
6360
placement = BlockPlacement(placement)
6461

6562
ndim = maybe_infer_ndim(values, placement, ndim)
66-
if needs_reshape:
67-
values = ensure_block_shape(values, ndim)
68-
6963
check_ndim(values, placement, ndim)
7064
values = maybe_coerce_values(values)
7165
return klass(values, ndim=ndim, placement=placement)
7266

7367

74-
def maybe_infer_ndim(values, placement: BlockPlacement, ndim: int | None) -> int:
68+
def maybe_infer_ndim(values, placement: BlockPlacement, ndim: Optional[int]) -> int:
7569
"""
7670
If `ndim` is not provided, infer it from placment and values.
7771
"""

pandas/core/internals/blocks.py

+33-25
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,6 @@
4242
soft_convert_objects,
4343
)
4444
from pandas.core.dtypes.common import (
45-
is_1d_only_ea_dtype,
46-
is_1d_only_ea_obj,
4745
is_categorical_dtype,
4846
is_dtype_equal,
4947
is_extension_array_dtype,
@@ -226,6 +224,7 @@ def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:
226224
# expected "ndarray")
227225
return self.values # type: ignore[return-value]
228226

227+
@final
229228
def get_block_values_for_json(self) -> np.ndarray:
230229
"""
231230
This is used in the JSON C code.
@@ -416,11 +415,7 @@ def _split_op_result(self, result) -> list[Block]:
416415
# if we get a 2D ExtensionArray, we need to split it into 1D pieces
417416
nbs = []
418417
for i, loc in enumerate(self._mgr_locs):
419-
if not is_1d_only_ea_obj(result):
420-
vals = result[i : i + 1]
421-
else:
422-
vals = result[i]
423-
418+
vals = result[i]
424419
block = self.make_block(values=vals, placement=loc)
425420
nbs.append(block)
426421
return nbs
@@ -1675,7 +1670,7 @@ class NumericBlock(NumpyBlock):
16751670
is_numeric = True
16761671

16771672

1678-
class NDArrayBackedExtensionBlock(libinternals.Block, EABackedBlock):
1673+
class NDArrayBackedExtensionBlock(EABackedBlock):
16791674
"""
16801675
Block backed by an NDArrayBackedExtensionArray
16811676
"""
@@ -1688,6 +1683,11 @@ def is_view(self) -> bool:
16881683
# check the ndarray values of the DatetimeIndex values
16891684
return self.values._ndarray.base is not None
16901685

1686+
def iget(self, key):
1687+
# GH#31649 we need to wrap scalars in Timestamp/Timedelta
1688+
# TODO(EA2D): this can be removed if we ever have 2D EA
1689+
return self.values.reshape(self.shape)[key]
1690+
16911691
def setitem(self, indexer, value):
16921692
if not self._can_hold_element(value):
16931693
# TODO: general case needs casting logic.
@@ -1707,21 +1707,24 @@ def putmask(self, mask, new) -> list[Block]:
17071707
if not self._can_hold_element(new):
17081708
return self.astype(object).putmask(mask, new)
17091709

1710-
arr = self.values
1710+
# TODO(EA2D): reshape unnecessary with 2D EAs
1711+
arr = self.values.reshape(self.shape)
17111712
arr.T.putmask(mask, new)
17121713
return [self]
17131714

17141715
def where(self, other, cond, errors="raise") -> list[Block]:
17151716
# TODO(EA2D): reshape unnecessary with 2D EAs
1716-
arr = self.values
1717+
arr = self.values.reshape(self.shape)
17171718

17181719
cond = extract_bool_array(cond)
17191720

17201721
try:
17211722
res_values = arr.T.where(cond, other).T
17221723
except (ValueError, TypeError):
1723-
return Block.where(self, other, cond, errors=errors)
1724+
return super().where(other, cond, errors=errors)
17241725

1726+
# TODO(EA2D): reshape not needed with 2D EAs
1727+
res_values = res_values.reshape(self.values.shape)
17251728
nb = self.make_block_same_class(res_values)
17261729
return [nb]
17271730

@@ -1745,13 +1748,15 @@ def diff(self, n: int, axis: int = 0) -> list[Block]:
17451748
The arguments here are mimicking shift so they are called correctly
17461749
by apply.
17471750
"""
1748-
values = self.values
1751+
# TODO(EA2D): reshape not necessary with 2D EAs
1752+
values = self.values.reshape(self.shape)
17491753

17501754
new_values = values - values.shift(n, axis=axis)
17511755
return [self.make_block(new_values)]
17521756

17531757
def shift(self, periods: int, axis: int = 0, fill_value: Any = None) -> list[Block]:
1754-
values = self.values
1758+
# TODO(EA2D) this is unnecessary if these blocks are backed by 2D EAs
1759+
values = self.values.reshape(self.shape)
17551760
new_values = values.shift(periods, fill_value=fill_value, axis=axis)
17561761
return [self.make_block_same_class(new_values)]
17571762

@@ -1771,27 +1776,31 @@ def fillna(
17711776
return [self.make_block_same_class(values=new_values)]
17721777

17731778

1774-
class DatetimeLikeBlock(NDArrayBackedExtensionBlock):
1779+
class DatetimeLikeBlock(libinternals.Block, NDArrayBackedExtensionBlock):
17751780
"""Block for datetime64[ns], timedelta64[ns]."""
17761781

17771782
__slots__ = ()
17781783
is_numeric = False
17791784
values: DatetimeArray | TimedeltaArray
17801785

1781-
def get_block_values_for_json(self):
1782-
# Not necessary to override, but helps perf
1783-
return self.values._ndarray
17841786

1785-
1786-
class DatetimeTZBlock(DatetimeLikeBlock):
1787+
class DatetimeTZBlock(ExtensionBlock, NDArrayBackedExtensionBlock):
17871788
""" implement a datetime64 block with a tz attribute """
17881789

17891790
values: DatetimeArray
17901791

17911792
__slots__ = ()
17921793
is_extension = True
1793-
_validate_ndim = True
1794-
_can_consolidate = False
1794+
is_numeric = False
1795+
1796+
diff = NDArrayBackedExtensionBlock.diff
1797+
where = NDArrayBackedExtensionBlock.where
1798+
putmask = NDArrayBackedExtensionBlock.putmask
1799+
fillna = NDArrayBackedExtensionBlock.fillna
1800+
1801+
get_values = NDArrayBackedExtensionBlock.get_values
1802+
1803+
is_view = NDArrayBackedExtensionBlock.is_view
17951804

17961805

17971806
class ObjectBlock(NumpyBlock):
@@ -1958,7 +1967,7 @@ def check_ndim(values, placement: BlockPlacement, ndim: int):
19581967
f"values.ndim > ndim [{values.ndim} > {ndim}]"
19591968
)
19601969

1961-
elif not is_1d_only_ea_dtype(values.dtype):
1970+
elif isinstance(values.dtype, np.dtype):
19621971
# TODO(EA2D): special case not needed with 2D EAs
19631972
if values.ndim != ndim:
19641973
raise ValueError(
@@ -1972,7 +1981,7 @@ def check_ndim(values, placement: BlockPlacement, ndim: int):
19721981
)
19731982
elif ndim == 2 and len(placement) != 1:
19741983
# TODO(EA2D): special case unnecessary with 2D EAs
1975-
raise ValueError("need to split")
1984+
raise AssertionError("block.size != values.size")
19761985

19771986

19781987
def extract_pandas_array(
@@ -2017,9 +2026,8 @@ def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike:
20172026
"""
20182027
Reshape if possible to have values.ndim == ndim.
20192028
"""
2020-
20212029
if values.ndim < ndim:
2022-
if not is_1d_only_ea_dtype(values.dtype):
2030+
if not is_extension_array_dtype(values.dtype):
20232031
# TODO(EA2D): https://github.com/pandas-dev/pandas/issues/23023
20242032
# block.shape is incorrect for "2D" ExtensionArrays
20252033
# We can't, and don't need to, reshape.

pandas/core/internals/concat.py

+9-18
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from typing import (
66
TYPE_CHECKING,
77
Sequence,
8-
cast,
98
)
109

1110
import numpy as np
@@ -24,8 +23,6 @@
2423
find_common_type,
2524
)
2625
from pandas.core.dtypes.common import (
27-
is_1d_only_ea_dtype,
28-
is_1d_only_ea_obj,
2926
is_datetime64tz_dtype,
3027
is_dtype_equal,
3128
is_extension_array_dtype,
@@ -213,8 +210,8 @@ def concatenate_managers(
213210
values = np.concatenate(vals, axis=blk.ndim - 1)
214211
else:
215212
# TODO(EA2D): special-casing not needed with 2D EAs
216-
values = concat_compat(vals, axis=1)
217-
values = ensure_block_shape(values, blk.ndim)
213+
values = concat_compat(vals)
214+
values = ensure_block_shape(values, ndim=2)
218215

219216
values = ensure_wrapped_if_datetimelike(values)
220217

@@ -415,16 +412,13 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
415412
fill_value = None
416413

417414
if is_datetime64tz_dtype(empty_dtype):
418-
i8values = np.full(self.shape, fill_value.value)
415+
# TODO(EA2D): special case unneeded with 2D EAs
416+
i8values = np.full(self.shape[1], fill_value.value)
419417
return DatetimeArray(i8values, dtype=empty_dtype)
420-
421418
elif is_extension_array_dtype(blk_dtype):
422419
pass
423-
424-
elif is_1d_only_ea_dtype(empty_dtype):
425-
empty_dtype = cast(ExtensionDtype, empty_dtype)
420+
elif isinstance(empty_dtype, ExtensionDtype):
426421
cls = empty_dtype.construct_array_type()
427-
428422
missing_arr = cls._from_sequence([], dtype=empty_dtype)
429423
ncols, nrows = self.shape
430424
assert ncols == 1, ncols
@@ -435,7 +429,6 @@ def get_reindexed_values(self, empty_dtype: DtypeObj, upcasted_na) -> ArrayLike:
435429
else:
436430
# NB: we should never get here with empty_dtype integer or bool;
437431
# if we did, the missing_arr.fill would cast to gibberish
438-
empty_dtype = cast(np.dtype, empty_dtype)
439432

440433
missing_arr = np.empty(self.shape, dtype=empty_dtype)
441434
missing_arr.fill(fill_value)
@@ -500,17 +493,15 @@ def _concatenate_join_units(
500493
concat_values = concat_values.copy()
501494
else:
502495
concat_values = concat_values.copy()
503-
504-
elif any(is_1d_only_ea_obj(t) for t in to_concat):
505-
# TODO(EA2D): special case not needed if all EAs used HybridBlocks
506-
# NB: we are still assuming here that Hybrid blocks have shape (1, N)
496+
elif any(isinstance(t, ExtensionArray) and t.ndim == 1 for t in to_concat):
507497
# concatting with at least one EA means we are concatting a single column
508498
# the non-EA values are 2D arrays with shape (1, n)
509-
510499
# error: Invalid index type "Tuple[int, slice]" for
511500
# "Union[ExtensionArray, ndarray]"; expected type "Union[int, slice, ndarray]"
512501
to_concat = [
513-
t if is_1d_only_ea_obj(t) else t[0, :] # type: ignore[index]
502+
t
503+
if (isinstance(t, ExtensionArray) and t.ndim == 1)
504+
else t[0, :] # type: ignore[index]
514505
for t in to_concat
515506
]
516507
concat_values = concat_compat(to_concat, axis=0, ea_compat_axis=True)

0 commit comments

Comments
 (0)