4
4
5
5
import numpy as np
6
6
7
+ from pandas ._config import option_context
8
+
7
9
from pandas ._libs import reduction as libreduction
8
10
from pandas ._typing import Axis
9
11
from pandas .util ._decorators import cache_readonly
10
12
11
- from pandas .core .dtypes .common import (
12
- is_dict_like ,
13
- is_extension_array_dtype ,
14
- is_list_like ,
15
- is_sequence ,
16
- )
13
+ from pandas .core .dtypes .common import is_dict_like , is_list_like , is_sequence
17
14
from pandas .core .dtypes .generic import ABCSeries
18
15
19
16
from pandas .core .construction import create_series_with_explicit_dtype
@@ -260,53 +257,6 @@ def apply_standard(self):
260
257
# partial result that may be returned from reduction
261
258
partial_result = None
262
259
263
- # try to reduce first (by default)
264
- # this only matters if the reduction in values is of different dtype
265
- # e.g. if we want to apply to a SparseFrame, then can't directly reduce
266
-
267
- # we cannot reduce using non-numpy dtypes,
268
- # as demonstrated in gh-12244
269
- if (
270
- self .result_type in ["reduce" , None ]
271
- and not self .dtypes .apply (is_extension_array_dtype ).any ()
272
- # Disallow dtypes where setting _index_data will break
273
- # ExtensionArray values, see GH#31182
274
- and not self .dtypes .apply (lambda x : x .kind in ["m" , "M" ]).any ()
275
- # Disallow complex_internals since libreduction shortcut raises a TypeError
276
- and not self .agg_axis ._has_complex_internals
277
- ):
278
-
279
- values = self .values
280
- index = self .obj ._get_axis (self .axis )
281
- labels = self .agg_axis
282
- empty_arr = np .empty (len (index ), dtype = values .dtype )
283
-
284
- # Preserve subclass for e.g. test_subclassed_apply
285
- dummy = self .obj ._constructor_sliced (
286
- empty_arr , index = index , dtype = values .dtype
287
- )
288
-
289
- try :
290
- result , reduction_success = libreduction .compute_reduction (
291
- values , self .f , axis = self .axis , dummy = dummy , labels = labels
292
- )
293
- except TypeError :
294
- # e.g. test_apply_ignore_failures we just ignore
295
- if not self .ignore_failures :
296
- raise
297
- except ZeroDivisionError :
298
- # reached via numexpr; fall back to python implementation
299
- pass
300
- else :
301
- if reduction_success :
302
- return self .obj ._constructor_sliced (result , index = labels )
303
-
304
- # no exceptions - however reduction was unsuccessful,
305
- # use the computed function result for first element
306
- partial_result = result [0 ]
307
- if isinstance (partial_result , ABCSeries ):
308
- partial_result = partial_result .infer_objects ()
309
-
310
260
# compute the result using the series generator,
311
261
# use the result computed while trying to reduce if available.
312
262
results , res_index = self .apply_series_generator (partial_result )
@@ -344,7 +294,14 @@ def apply_series_generator(self, partial_result=None) -> Tuple[ResType, "Index"]
344
294
else :
345
295
for i , v in series_gen_enumeration :
346
296
347
- results [i ] = self .f (v )
297
+ with option_context ("mode.chained_assignment" , None ):
298
+ # ignore SettingWithCopy here in case the user mutates
299
+ results [i ] = self .f (v )
300
+
301
+ if isinstance (results [i ], ABCSeries ):
302
+ # If we have a view on v, we need to make a copy because
303
+ # series_generator will swap out the underlying data
304
+ results [i ] = results [i ].copy (deep = False )
348
305
349
306
return results , res_index
350
307
@@ -355,7 +312,6 @@ def wrap_results(
355
312
356
313
# see if we can infer the results
357
314
if len (results ) > 0 and 0 in results and is_sequence (results [0 ]):
358
-
359
315
return self .wrap_results_for_axis (results , res_index )
360
316
361
317
# dict of scalars
@@ -395,9 +351,30 @@ def result_columns(self) -> "Index":
395
351
396
352
def wrap_results_for_axis (
397
353
self , results : ResType , res_index : "Index"
398
- ) -> " DataFrame" :
354
+ ) -> Union [ "Series" , " DataFrame"] :
399
355
""" return the results for the rows """
400
- result = self .obj ._constructor (data = results )
356
+
357
+ if self .result_type == "reduce" :
358
+ # e.g. test_apply_dict GH#8735
359
+ return self .obj ._constructor_sliced (results )
360
+ elif self .result_type is None and all (
361
+ isinstance (x , dict ) for x in results .values ()
362
+ ):
363
+ # Our operation was a to_dict op e.g.
364
+ # test_apply_dict GH#8735, test_apply_reduce_rows_to_dict GH#25196
365
+ return self .obj ._constructor_sliced (results )
366
+
367
+ try :
368
+ result = self .obj ._constructor (data = results )
369
+ except ValueError as err :
370
+ if "arrays must all be same length" in str (err ):
371
+ # e.g. result = [[2, 3], [1.5], ['foo', 'bar']]
372
+ # see test_agg_listlike_result GH#29587
373
+ res = self .obj ._constructor_sliced (results )
374
+ res .index = res_index
375
+ return res
376
+ else :
377
+ raise
401
378
402
379
if not isinstance (results [0 ], ABCSeries ):
403
380
if len (result .index ) == len (self .res_columns ):
@@ -418,11 +395,19 @@ def apply_broadcast(self, target: "DataFrame") -> "DataFrame":
418
395
419
396
@property
420
397
def series_generator (self ):
421
- constructor = self .obj ._constructor_sliced
422
- return (
423
- constructor (arr , index = self .columns , name = name )
424
- for i , (arr , name ) in enumerate (zip (self .values , self .index ))
425
- )
398
+ values = self .values
399
+ assert len (values ) > 0
400
+
401
+ # We create one Series object, and will swap out the data inside
402
+ # of it. Kids: don't do this at home.
403
+ ser = self .obj ._ixs (0 , axis = 0 )
404
+ mgr = ser ._mgr
405
+ blk = mgr .blocks [0 ]
406
+
407
+ for (arr , name ) in zip (values , self .index ):
408
+ blk .values = arr
409
+ ser .name = name
410
+ yield ser
426
411
427
412
@property
428
413
def result_index (self ) -> "Index" :
@@ -444,9 +429,7 @@ def wrap_results_for_axis(
444
429
445
430
# we have a non-series and don't want inference
446
431
elif not isinstance (results [0 ], ABCSeries ):
447
- from pandas import Series
448
-
449
- result = Series (results )
432
+ result = self .obj ._constructor_sliced (results )
450
433
result .index = res_index
451
434
452
435
# we may want to infer results
0 commit comments