@@ -201,6 +201,47 @@ def __repr__(self) -> str:
201
201
output += f"\n { arr .dtype } "
202
202
return output
203
203
204
+ def grouped_reduce (self : T , func : Callable , ignore_failures : bool = False ) -> T :
205
+ """
206
+ Apply grouped reduction function columnwise, returning a new ArrayManager.
207
+
208
+ Parameters
209
+ ----------
210
+ func : grouped reduction function
211
+ ignore_failures : bool, default False
212
+ Whether to drop columns where func raises TypeError.
213
+
214
+ Returns
215
+ -------
216
+ ArrayManager
217
+ """
218
+ result_arrays : list [np .ndarray ] = []
219
+ result_indices : list [int ] = []
220
+
221
+ for i , arr in enumerate (self .arrays ):
222
+ try :
223
+ res = func (arr )
224
+ except (TypeError , NotImplementedError ):
225
+ if not ignore_failures :
226
+ raise
227
+ continue
228
+ result_arrays .append (res )
229
+ result_indices .append (i )
230
+
231
+ if len (result_arrays ) == 0 :
232
+ index = Index ([None ]) # placeholder
233
+ else :
234
+ index = Index (range (result_arrays [0 ].shape [0 ]))
235
+
236
+ if ignore_failures :
237
+ columns = self .items [np .array (result_indices , dtype = "int64" )]
238
+ else :
239
+ columns = self .items
240
+
241
+ # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]";
242
+ # expected "List[Union[ndarray, ExtensionArray]]"
243
+ return type (self )(result_arrays , [index , columns ]) # type: ignore[arg-type]
244
+
204
245
def apply (
205
246
self : T ,
206
247
f ,
@@ -281,6 +322,25 @@ def apply(
281
322
# expected "List[Union[ndarray, ExtensionArray]]"
282
323
return type (self )(result_arrays , new_axes ) # type: ignore[arg-type]
283
324
325
+ def apply_2d (self : T , f , ignore_failures : bool = False , ** kwargs ) -> T :
326
+ """
327
+ Variant of `apply`, but where the function should not be applied to
328
+ each column independently, but to the full data as a 2D array.
329
+ """
330
+ values = self .as_array ()
331
+ try :
332
+ result = f (values , ** kwargs )
333
+ except (TypeError , NotImplementedError ):
334
+ if not ignore_failures :
335
+ raise
336
+ result_arrays = []
337
+ new_axes = [self ._axes [0 ], self .axes [1 ].take ([])]
338
+ else :
339
+ result_arrays = [result [:, i ] for i in range (len (self ._axes [1 ]))]
340
+ new_axes = self ._axes
341
+
342
+ return type (self )(result_arrays , new_axes )
343
+
284
344
def apply_with_block (self : T , f , align_keys = None , swap_axis = True , ** kwargs ) -> T :
285
345
# switch axis to follow BlockManager logic
286
346
if swap_axis and "axis" in kwargs and self .ndim == 2 :
@@ -546,6 +606,67 @@ def copy_func(ax):
546
606
new_arrays = self .arrays
547
607
return type (self )(new_arrays , new_axes )
548
608
609
+ def as_array (
610
+ self ,
611
+ transpose : bool = False ,
612
+ dtype = None ,
613
+ copy : bool = False ,
614
+ na_value = lib .no_default ,
615
+ ) -> np .ndarray :
616
+ """
617
+ Convert the blockmanager data into an numpy array.
618
+
619
+ Parameters
620
+ ----------
621
+ transpose : bool, default False
622
+ If True, transpose the return array.
623
+ dtype : object, default None
624
+ Data type of the return array.
625
+ copy : bool, default False
626
+ If True then guarantee that a copy is returned. A value of
627
+ False does not guarantee that the underlying data is not
628
+ copied.
629
+ na_value : object, default lib.no_default
630
+ Value to be used as the missing value sentinel.
631
+
632
+ Returns
633
+ -------
634
+ arr : ndarray
635
+ """
636
+ if len (self .arrays ) == 0 :
637
+ arr = np .empty (self .shape , dtype = float )
638
+ return arr .transpose () if transpose else arr
639
+
640
+ # We want to copy when na_value is provided to avoid
641
+ # mutating the original object
642
+ copy = copy or na_value is not lib .no_default
643
+
644
+ if not dtype :
645
+ dtype = interleaved_dtype ([arr .dtype for arr in self .arrays ])
646
+
647
+ if isinstance (dtype , SparseDtype ):
648
+ dtype = dtype .subtype
649
+ elif isinstance (dtype , PandasDtype ):
650
+ dtype = dtype .numpy_dtype
651
+ elif is_extension_array_dtype (dtype ):
652
+ dtype = "object"
653
+ elif is_dtype_equal (dtype , str ):
654
+ dtype = "object"
655
+
656
+ result = np .empty (self .shape_proper , dtype = dtype )
657
+
658
+ # error: Incompatible types in assignment (expression has type "Union[ndarray,
659
+ # ExtensionArray]", variable has type "ndarray")
660
+ for i , arr in enumerate (self .arrays ): # type: ignore[assignment]
661
+ arr = arr .astype (dtype , copy = copy )
662
+ result [:, i ] = arr
663
+
664
+ if na_value is not lib .no_default :
665
+ result [isna (result )] = na_value
666
+
667
+ return result
668
+ # return arr.transpose() if transpose else arr
669
+
549
670
def reindex_indexer (
550
671
self : T ,
551
672
new_axis ,
@@ -798,13 +919,6 @@ def iget_values(self, i: int) -> ArrayLike:
798
919
"""
799
920
return self .arrays [i ]
800
921
801
- @property
802
- def column_arrays (self ) -> list [ArrayLike ]:
803
- """
804
- Used in the JSON C code to access column arrays.
805
- """
806
- return self .arrays
807
-
808
922
def iset (self , loc : int | slice | np .ndarray , value : ArrayLike ):
809
923
"""
810
924
Set new column(s).
@@ -914,55 +1028,6 @@ def idelete(self, indexer):
914
1028
# --------------------------------------------------------------------
915
1029
# Array-wise Operation
916
1030
917
- def grouped_reduce (self : T , func : Callable , ignore_failures : bool = False ) -> T :
918
- """
919
- Apply grouped reduction function columnwise, returning a new ArrayManager.
920
-
921
- Parameters
922
- ----------
923
- func : grouped reduction function
924
- ignore_failures : bool, default False
925
- Whether to drop columns where func raises TypeError.
926
-
927
- Returns
928
- -------
929
- ArrayManager
930
- """
931
- result_arrays : list [np .ndarray ] = []
932
- result_indices : list [int ] = []
933
-
934
- for i , arr in enumerate (self .arrays ):
935
- # grouped_reduce functions all expect 2D arrays
936
- arr = ensure_block_shape (arr , ndim = 2 )
937
- try :
938
- res = func (arr )
939
- except (TypeError , NotImplementedError ):
940
- if not ignore_failures :
941
- raise
942
- continue
943
-
944
- if res .ndim == 2 :
945
- # reverse of ensure_block_shape
946
- assert res .shape [0 ] == 1
947
- res = res [0 ]
948
-
949
- result_arrays .append (res )
950
- result_indices .append (i )
951
-
952
- if len (result_arrays ) == 0 :
953
- index = Index ([None ]) # placeholder
954
- else :
955
- index = Index (range (result_arrays [0 ].shape [0 ]))
956
-
957
- if ignore_failures :
958
- columns = self .items [np .array (result_indices , dtype = "int64" )]
959
- else :
960
- columns = self .items
961
-
962
- # error: Argument 1 to "ArrayManager" has incompatible type "List[ndarray]";
963
- # expected "List[Union[ndarray, ExtensionArray]]"
964
- return type (self )(result_arrays , [index , columns ]) # type: ignore[arg-type]
965
-
966
1031
def reduce (
967
1032
self : T , func : Callable , ignore_failures : bool = False
968
1033
) -> tuple [T , np .ndarray ]:
@@ -1050,27 +1115,6 @@ def quantile(
1050
1115
axes = [qs , self ._axes [1 ]]
1051
1116
return type (self )(new_arrs , axes )
1052
1117
1053
- def apply_2d (
1054
- self : ArrayManager , f , ignore_failures : bool = False , ** kwargs
1055
- ) -> ArrayManager :
1056
- """
1057
- Variant of `apply`, but where the function should not be applied to
1058
- each column independently, but to the full data as a 2D array.
1059
- """
1060
- values = self .as_array ()
1061
- try :
1062
- result = f (values , ** kwargs )
1063
- except (TypeError , NotImplementedError ):
1064
- if not ignore_failures :
1065
- raise
1066
- result_arrays = []
1067
- new_axes = [self ._axes [0 ], self .axes [1 ].take ([])]
1068
- else :
1069
- result_arrays = [result [:, i ] for i in range (len (self ._axes [1 ]))]
1070
- new_axes = self ._axes
1071
-
1072
- return type (self )(result_arrays , new_axes )
1073
-
1074
1118
# ----------------------------------------------------------------
1075
1119
1076
1120
def unstack (self , unstacker , fill_value ) -> ArrayManager :
@@ -1115,67 +1159,6 @@ def unstack(self, unstacker, fill_value) -> ArrayManager:
1115
1159
1116
1160
return type (self )(new_arrays , new_axes , verify_integrity = False )
1117
1161
1118
- def as_array (
1119
- self ,
1120
- transpose : bool = False ,
1121
- dtype = None ,
1122
- copy : bool = False ,
1123
- na_value = lib .no_default ,
1124
- ) -> np .ndarray :
1125
- """
1126
- Convert the blockmanager data into an numpy array.
1127
-
1128
- Parameters
1129
- ----------
1130
- transpose : bool, default False
1131
- If True, transpose the return array.
1132
- dtype : object, default None
1133
- Data type of the return array.
1134
- copy : bool, default False
1135
- If True then guarantee that a copy is returned. A value of
1136
- False does not guarantee that the underlying data is not
1137
- copied.
1138
- na_value : object, default lib.no_default
1139
- Value to be used as the missing value sentinel.
1140
-
1141
- Returns
1142
- -------
1143
- arr : ndarray
1144
- """
1145
- if len (self .arrays ) == 0 :
1146
- arr = np .empty (self .shape , dtype = float )
1147
- return arr .transpose () if transpose else arr
1148
-
1149
- # We want to copy when na_value is provided to avoid
1150
- # mutating the original object
1151
- copy = copy or na_value is not lib .no_default
1152
-
1153
- if not dtype :
1154
- dtype = interleaved_dtype ([arr .dtype for arr in self .arrays ])
1155
-
1156
- if isinstance (dtype , SparseDtype ):
1157
- dtype = dtype .subtype
1158
- elif isinstance (dtype , PandasDtype ):
1159
- dtype = dtype .numpy_dtype
1160
- elif is_extension_array_dtype (dtype ):
1161
- dtype = "object"
1162
- elif is_dtype_equal (dtype , str ):
1163
- dtype = "object"
1164
-
1165
- result = np .empty (self .shape_proper , dtype = dtype )
1166
-
1167
- # error: Incompatible types in assignment (expression has type "Union[ndarray,
1168
- # ExtensionArray]", variable has type "ndarray")
1169
- for i , arr in enumerate (self .arrays ): # type: ignore[assignment]
1170
- arr = arr .astype (dtype , copy = copy )
1171
- result [:, i ] = arr
1172
-
1173
- if na_value is not lib .no_default :
1174
- result [isna (result )] = na_value
1175
-
1176
- return result
1177
- # return arr.transpose() if transpose else arr
1178
-
1179
1162
1180
1163
class SingleArrayManager (BaseArrayManager , SingleDataManager ):
1181
1164
0 commit comments