Skip to content

PERF: column_arrays #43278

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Sep 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,11 @@ def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:
# expected "ndarray")
return self.values # type: ignore[return-value]

def values_for_json(self) -> np.ndarray:
# Incompatible return value type (got "Union[ndarray[Any, Any],
# ExtensionArray]", expected "ndarray[Any, Any]")
return self.values # type: ignore[return-value]

@final
@cache_readonly
def fill_value(self):
Expand Down Expand Up @@ -1375,6 +1380,9 @@ def get_values(self, dtype: DtypeObj | None = None) -> np.ndarray:
# TODO(EA2D): reshape not needed with 2D EAs
return np.asarray(values).reshape(self.shape)

def values_for_json(self) -> np.ndarray:
return np.asarray(self.values)

def interpolate(
self, method="pad", axis=0, inplace=False, limit=None, fill_value=None, **kwargs
):
Expand Down Expand Up @@ -1805,6 +1813,11 @@ class DatetimeLikeBlock(NDArrayBackedExtensionBlock):
is_numeric = False
values: DatetimeArray | TimedeltaArray

def values_for_json(self) -> np.ndarray:
# special casing datetimetz to avoid conversion through
# object dtype
return self.values._ndarray


class DatetimeTZBlock(DatetimeLikeBlock):
"""implement a datetime64 block with a tz attribute"""
Expand Down
33 changes: 17 additions & 16 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -973,24 +973,25 @@ def column_arrays(self) -> list[np.ndarray]:
"""
Used in the JSON C code to access column arrays.
This optimizes compared to using `iget_values` by converting each
block.values to a np.ndarray only once up front
"""
# special casing datetimetz to avoid conversion through object dtype
arrays = [
blk.values._ndarray
if isinstance(blk, DatetimeTZBlock)
else np.asarray(blk.values)
for blk in self.blocks
]
result = []
for i in range(len(self.items)):
arr = arrays[self.blknos[i]]
if arr.ndim == 2:
values = arr[self.blklocs[i]]
# This is an optimized equivalent to
# result = [self.iget_values(i) for i in range(len(self.items))]
result: list[np.ndarray | None] = [None] * len(self.items)

for blk in self.blocks:
mgr_locs = blk._mgr_locs
values = blk.values_for_json()
if values.ndim == 1:
# TODO(EA2D): special casing not needed with 2D EAs
result[mgr_locs[0]] = values

else:
values = arr
result.append(values)
return result
for i, loc in enumerate(mgr_locs):
result[loc] = values[i]

# error: Incompatible return value type (got "List[None]",
# expected "List[ndarray[Any, Any]]")
return result # type: ignore[return-value]

def iset(self, loc: int | slice | np.ndarray, value: ArrayLike):
"""
Expand Down