Skip to content

Commit 8519d26

Browse files
authored
PERF: column_arrays (#43278)
1 parent 2238dae commit 8519d26

File tree

2 files changed

+30
-16
lines changed

2 files changed

+30
-16
lines changed

pandas/core/internals/blocks.py

+13
Original file line numberDiff line numberDiff line change
@@ -228,6 +228,11 @@ def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:
228228
# expected "ndarray")
229229
return self.values # type: ignore[return-value]
230230

231+
def values_for_json(self) -> np.ndarray:
232+
# Incompatible return value type (got "Union[ndarray[Any, Any],
233+
# ExtensionArray]", expected "ndarray[Any, Any]")
234+
return self.values # type: ignore[return-value]
235+
231236
@final
232237
@cache_readonly
233238
def fill_value(self):
@@ -1375,6 +1380,9 @@ def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:
13751380
# TODO(EA2D): reshape not needed with 2D EAs
13761381
return np.asarray(values).reshape(self.shape)
13771382

1383+
def values_for_json(self) -> np.ndarray:
1384+
return np.asarray(self.values)
1385+
13781386
def interpolate(
13791387
self, method="pad", axis=0, inplace=False, limit=None, fill_value=None, **kwargs
13801388
):
@@ -1805,6 +1813,11 @@ class DatetimeLikeBlock(NDArrayBackedExtensionBlock):
18051813
is_numeric = False
18061814
values: DatetimeArray | TimedeltaArray
18071815

1816+
def values_for_json(self) -> np.ndarray:
1817+
# special casing datetimetz to avoid conversion through
1818+
# object dtype
1819+
return self.values._ndarray
1820+
18081821

18091822
class DatetimeTZBlock(DatetimeLikeBlock):
18101823
"""implement a datetime64 block with a tz attribute"""

pandas/core/internals/managers.py

+17-16
Original file line numberDiff line numberDiff line change
@@ -998,24 +998,25 @@ def column_arrays(self) -> list[np.ndarray]:
998998
"""
999999
Used in the JSON C code to access column arrays.
10001000
This optimizes compared to using `iget_values` by converting each
1001-
block.values to a np.ndarray only once up front
10021001
"""
1003-
# special casing datetimetz to avoid conversion through object dtype
1004-
arrays = [
1005-
blk.values._ndarray
1006-
if isinstance(blk, DatetimeTZBlock)
1007-
else np.asarray(blk.values)
1008-
for blk in self.blocks
1009-
]
1010-
result = []
1011-
for i in range(len(self.items)):
1012-
arr = arrays[self.blknos[i]]
1013-
if arr.ndim == 2:
1014-
values = arr[self.blklocs[i]]
1002+
# This is an optimized equivalent to
1003+
# result = [self.iget_values(i) for i in range(len(self.items))]
1004+
result: list[np.ndarray | None] = [None] * len(self.items)
1005+
1006+
for blk in self.blocks:
1007+
mgr_locs = blk._mgr_locs
1008+
values = blk.values_for_json()
1009+
if values.ndim == 1:
1010+
# TODO(EA2D): special casing not needed with 2D EAs
1011+
result[mgr_locs[0]] = values
1012+
10151013
else:
1016-
values = arr
1017-
result.append(values)
1018-
return result
1014+
for i, loc in enumerate(mgr_locs):
1015+
result[loc] = values[i]
1016+
1017+
# error: Incompatible return value type (got "List[None]",
1018+
# expected "List[ndarray[Any, Any]]")
1019+
return result # type: ignore[return-value]
10191020

10201021
def iset(self, loc: int | slice | np.ndarray, value: ArrayLike):
10211022
"""

0 commit comments

Comments
 (0)