Skip to content

Commit de77847

Browse files
jbrockmendelyeshsurya
authored andcommitted
REF: separate 2D only methods from ArrayManager (pandas-dev#40863)
1 parent 8d9b4fe commit de77847

File tree

1 file changed

+121
-138
lines changed

1 file changed

+121
-138
lines changed

pandas/core/internals/array_manager.py

+121-138
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,47 @@ def __repr__(self) -> str:
201201
output += f"\n{arr.dtype}"
202202
return output
203203

204+
def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T:
205+
"""
206+
Apply grouped reduction function columnwise, returning a new ArrayManager.
207+
208+
Parameters
209+
----------
210+
func : grouped reduction function
211+
ignore_failures : bool, default False
212+
Whether to drop columns where func raises TypeError.
213+
214+
Returns
215+
-------
216+
ArrayManager
217+
"""
218+
result_arrays: list[np.ndarray] = []
219+
result_indices: list[int] = []
220+
221+
for i, arr in enumerate(self.arrays):
222+
try:
223+
res = func(arr)
224+
except (TypeError, NotImplementedError):
225+
if not ignore_failures:
226+
raise
227+
continue
228+
result_arrays.append(res)
229+
result_indices.append(i)
230+
231+
if len(result_arrays) == 0:
232+
index = Index([None]) # placeholder
233+
else:
234+
index = Index(range(result_arrays[0].shape[0]))
235+
236+
if ignore_failures:
237+
columns = self.items[np.array(result_indices, dtype="int64")]
238+
else:
239+
columns = self.items
240+
241+
# error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]";
242+
# expected "List[Union[ndarray, ExtensionArray]]"
243+
return type(self)(result_arrays, [index, columns]) # type: ignore[arg-type]
244+
204245
def apply(
205246
self: T,
206247
f,
@@ -281,6 +322,25 @@ def apply(
281322
# expected "List[Union[ndarray, ExtensionArray]]"
282323
return type(self)(result_arrays, new_axes) # type: ignore[arg-type]
283324

325+
def apply_2d(self: T, f, ignore_failures: bool = False, **kwargs) -> T:
326+
"""
327+
Variant of `apply`, but where the function should not be applied to
328+
each column independently, but to the full data as a 2D array.
329+
"""
330+
values = self.as_array()
331+
try:
332+
result = f(values, **kwargs)
333+
except (TypeError, NotImplementedError):
334+
if not ignore_failures:
335+
raise
336+
result_arrays = []
337+
new_axes = [self._axes[0], self.axes[1].take([])]
338+
else:
339+
result_arrays = [result[:, i] for i in range(len(self._axes[1]))]
340+
new_axes = self._axes
341+
342+
return type(self)(result_arrays, new_axes)
343+
284344
def apply_with_block(self: T, f, align_keys=None, swap_axis=True, **kwargs) -> T:
285345
# switch axis to follow BlockManager logic
286346
if swap_axis and "axis" in kwargs and self.ndim == 2:
@@ -546,6 +606,67 @@ def copy_func(ax):
546606
new_arrays = self.arrays
547607
return type(self)(new_arrays, new_axes)
548608

609+
def as_array(
610+
self,
611+
transpose: bool = False,
612+
dtype=None,
613+
copy: bool = False,
614+
na_value=lib.no_default,
615+
) -> np.ndarray:
616+
"""
617+
Convert the blockmanager data into an numpy array.
618+
619+
Parameters
620+
----------
621+
transpose : bool, default False
622+
If True, transpose the return array.
623+
dtype : object, default None
624+
Data type of the return array.
625+
copy : bool, default False
626+
If True then guarantee that a copy is returned. A value of
627+
False does not guarantee that the underlying data is not
628+
copied.
629+
na_value : object, default lib.no_default
630+
Value to be used as the missing value sentinel.
631+
632+
Returns
633+
-------
634+
arr : ndarray
635+
"""
636+
if len(self.arrays) == 0:
637+
arr = np.empty(self.shape, dtype=float)
638+
return arr.transpose() if transpose else arr
639+
640+
# We want to copy when na_value is provided to avoid
641+
# mutating the original object
642+
copy = copy or na_value is not lib.no_default
643+
644+
if not dtype:
645+
dtype = interleaved_dtype([arr.dtype for arr in self.arrays])
646+
647+
if isinstance(dtype, SparseDtype):
648+
dtype = dtype.subtype
649+
elif isinstance(dtype, PandasDtype):
650+
dtype = dtype.numpy_dtype
651+
elif is_extension_array_dtype(dtype):
652+
dtype = "object"
653+
elif is_dtype_equal(dtype, str):
654+
dtype = "object"
655+
656+
result = np.empty(self.shape_proper, dtype=dtype)
657+
658+
# error: Incompatible types in assignment (expression has type "Union[ndarray,
659+
# ExtensionArray]", variable has type "ndarray")
660+
for i, arr in enumerate(self.arrays): # type: ignore[assignment]
661+
arr = arr.astype(dtype, copy=copy)
662+
result[:, i] = arr
663+
664+
if na_value is not lib.no_default:
665+
result[isna(result)] = na_value
666+
667+
return result
668+
# return arr.transpose() if transpose else arr
669+
549670
def reindex_indexer(
550671
self: T,
551672
new_axis,
@@ -798,13 +919,6 @@ def iget_values(self, i: int) -> ArrayLike:
798919
"""
799920
return self.arrays[i]
800921

801-
@property
802-
def column_arrays(self) -> list[ArrayLike]:
803-
"""
804-
Used in the JSON C code to access column arrays.
805-
"""
806-
return self.arrays
807-
808922
def iset(self, loc: int | slice | np.ndarray, value: ArrayLike):
809923
"""
810924
Set new column(s).
@@ -914,55 +1028,6 @@ def idelete(self, indexer):
9141028
# --------------------------------------------------------------------
9151029
# Array-wise Operation
9161030

917-
def grouped_reduce(self: T, func: Callable, ignore_failures: bool = False) -> T:
918-
"""
919-
Apply grouped reduction function columnwise, returning a new ArrayManager.
920-
921-
Parameters
922-
----------
923-
func : grouped reduction function
924-
ignore_failures : bool, default False
925-
Whether to drop columns where func raises TypeError.
926-
927-
Returns
928-
-------
929-
ArrayManager
930-
"""
931-
result_arrays: list[np.ndarray] = []
932-
result_indices: list[int] = []
933-
934-
for i, arr in enumerate(self.arrays):
935-
# grouped_reduce functions all expect 2D arrays
936-
arr = ensure_block_shape(arr, ndim=2)
937-
try:
938-
res = func(arr)
939-
except (TypeError, NotImplementedError):
940-
if not ignore_failures:
941-
raise
942-
continue
943-
944-
if res.ndim == 2:
945-
# reverse of ensure_block_shape
946-
assert res.shape[0] == 1
947-
res = res[0]
948-
949-
result_arrays.append(res)
950-
result_indices.append(i)
951-
952-
if len(result_arrays) == 0:
953-
index = Index([None]) # placeholder
954-
else:
955-
index = Index(range(result_arrays[0].shape[0]))
956-
957-
if ignore_failures:
958-
columns = self.items[np.array(result_indices, dtype="int64")]
959-
else:
960-
columns = self.items
961-
962-
# error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]";
963-
# expected "List[Union[ndarray, ExtensionArray]]"
964-
return type(self)(result_arrays, [index, columns]) # type: ignore[arg-type]
965-
9661031
def reduce(
9671032
self: T, func: Callable, ignore_failures: bool = False
9681033
) -> tuple[T, np.ndarray]:
@@ -1050,27 +1115,6 @@ def quantile(
10501115
axes = [qs, self._axes[1]]
10511116
return type(self)(new_arrs, axes)
10521117

1053-
def apply_2d(
1054-
self: ArrayManager, f, ignore_failures: bool = False, **kwargs
1055-
) -> ArrayManager:
1056-
"""
1057-
Variant of `apply`, but where the function should not be applied to
1058-
each column independently, but to the full data as a 2D array.
1059-
"""
1060-
values = self.as_array()
1061-
try:
1062-
result = f(values, **kwargs)
1063-
except (TypeError, NotImplementedError):
1064-
if not ignore_failures:
1065-
raise
1066-
result_arrays = []
1067-
new_axes = [self._axes[0], self.axes[1].take([])]
1068-
else:
1069-
result_arrays = [result[:, i] for i in range(len(self._axes[1]))]
1070-
new_axes = self._axes
1071-
1072-
return type(self)(result_arrays, new_axes)
1073-
10741118
# ----------------------------------------------------------------
10751119

10761120
def unstack(self, unstacker, fill_value) -> ArrayManager:
@@ -1115,67 +1159,6 @@ def unstack(self, unstacker, fill_value) -> ArrayManager:
11151159

11161160
return type(self)(new_arrays, new_axes, verify_integrity=False)
11171161

1118-
def as_array(
1119-
self,
1120-
transpose: bool = False,
1121-
dtype=None,
1122-
copy: bool = False,
1123-
na_value=lib.no_default,
1124-
) -> np.ndarray:
1125-
"""
1126-
Convert the blockmanager data into an numpy array.
1127-
1128-
Parameters
1129-
----------
1130-
transpose : bool, default False
1131-
If True, transpose the return array.
1132-
dtype : object, default None
1133-
Data type of the return array.
1134-
copy : bool, default False
1135-
If True then guarantee that a copy is returned. A value of
1136-
False does not guarantee that the underlying data is not
1137-
copied.
1138-
na_value : object, default lib.no_default
1139-
Value to be used as the missing value sentinel.
1140-
1141-
Returns
1142-
-------
1143-
arr : ndarray
1144-
"""
1145-
if len(self.arrays) == 0:
1146-
arr = np.empty(self.shape, dtype=float)
1147-
return arr.transpose() if transpose else arr
1148-
1149-
# We want to copy when na_value is provided to avoid
1150-
# mutating the original object
1151-
copy = copy or na_value is not lib.no_default
1152-
1153-
if not dtype:
1154-
dtype = interleaved_dtype([arr.dtype for arr in self.arrays])
1155-
1156-
if isinstance(dtype, SparseDtype):
1157-
dtype = dtype.subtype
1158-
elif isinstance(dtype, PandasDtype):
1159-
dtype = dtype.numpy_dtype
1160-
elif is_extension_array_dtype(dtype):
1161-
dtype = "object"
1162-
elif is_dtype_equal(dtype, str):
1163-
dtype = "object"
1164-
1165-
result = np.empty(self.shape_proper, dtype=dtype)
1166-
1167-
# error: Incompatible types in assignment (expression has type "Union[ndarray,
1168-
# ExtensionArray]", variable has type "ndarray")
1169-
for i, arr in enumerate(self.arrays): # type: ignore[assignment]
1170-
arr = arr.astype(dtype, copy=copy)
1171-
result[:, i] = arr
1172-
1173-
if na_value is not lib.no_default:
1174-
result[isna(result)] = na_value
1175-
1176-
return result
1177-
# return arr.transpose() if transpose else arr
1178-
11791162

11801163
class SingleArrayManager(BaseArrayManager, SingleDataManager):
11811164

0 commit comments

Comments
 (0)