@@ -350,11 +350,10 @@ def test_apply_attach_name(self):
350
350
351
351
result = self .frame .apply (lambda x : np .repeat (x .name , len (x )),
352
352
axis = 1 )
353
- expected = DataFrame (np .tile (self .frame .index ,
354
- (len (self .frame .columns ), 1 )).T ,
355
- index = self .frame .index ,
356
- columns = self .frame .columns )
357
- assert_frame_equal (result , expected )
353
+ expected = Series (np .repeat (t [0 ], len (self .frame .columns ))
354
+ for t in self .frame .itertuples ())
355
+ expected .index = self .frame .index
356
+ assert_series_equal (result , expected )
358
357
359
358
def test_apply_multi_index (self ):
360
359
s = DataFrame ([[1 , 2 ], [3 , 4 ], [5 , 6 ]])
@@ -367,10 +366,10 @@ def test_apply_dict(self):
367
366
368
367
# GH 8735
369
368
A = DataFrame ([['foo' , 'bar' ], ['spam' , 'eggs' ]])
370
- A_dicts = pd . Series ([dict ([(0 , 'foo' ), (1 , 'spam' )]),
371
- dict ([(0 , 'bar' ), (1 , 'eggs' )])])
369
+ A_dicts = Series ([dict ([(0 , 'foo' ), (1 , 'spam' )]),
370
+ dict ([(0 , 'bar' ), (1 , 'eggs' )])])
372
371
B = DataFrame ([[0 , 1 ], [2 , 3 ]])
373
- B_dicts = pd . Series ([dict ([(0 , 0 ), (1 , 2 )]), dict ([(0 , 1 ), (1 , 3 )])])
372
+ B_dicts = Series ([dict ([(0 , 0 ), (1 , 2 )]), dict ([(0 , 1 ), (1 , 3 )])])
374
373
fn = lambda x : x .to_dict ()
375
374
376
375
for df , dicts in [(A , A_dicts ), (B , B_dicts )]:
@@ -472,6 +471,130 @@ def test_apply_non_numpy_dtype(self):
472
471
assert_frame_equal (result , df )
473
472
474
473
474
+ class TestInferOutputShape (object ):
475
+ # the user has supplied an opaque UDF where
476
+ # they are transforming the input that requires
477
+ # us to infer the output
478
+
479
+ def test_infer_row_shape (self ):
480
+ # gh-17437
481
+ # if row shape is changing, infer it
482
+ df = pd .DataFrame (np .random .rand (10 , 2 ))
483
+ result = df .apply (np .fft .fft , axis = 0 )
484
+ assert result .shape == (10 , 2 )
485
+
486
+ result = df .apply (np .fft .rfft , axis = 0 )
487
+ assert result .shape == (6 , 2 )
488
+
489
+ def test_with_dictlike_columns (self ):
490
+ # gh 17602
491
+
492
+ df = DataFrame ([[1 , 2 ], [1 , 2 ]], columns = ['a' , 'b' ])
493
+ result = df .apply (lambda x : {'s' : x ['a' ] + x ['b' ]}, 1 )
494
+ expected = Series ([{'s' : 3 } for t in df .itertuples ()])
495
+ assert_series_equal (result , expected )
496
+
497
+ df ['tm' ] = [pd .Timestamp ('2017-05-01 00:00:00' ),
498
+ pd .Timestamp ('2017-05-02 00:00:00' )]
499
+ assert_series_equal (result , expected )
500
+
501
+ # compose a series
502
+ result = (df ['a' ] + df ['b' ]).apply (lambda x : {'s' : x })
503
+ expected = Series ([{'s' : 3 }, {'s' : 3 }])
504
+ assert_series_equal (result , expected )
505
+
506
+ # gh-18775
507
+ df = DataFrame ()
508
+ df ["author" ] = ["X" , "Y" , "Z" ]
509
+ df ["publisher" ] = ["BBC" , "NBC" , "N24" ]
510
+ df ["date" ] = pd .to_datetime (['17-10-2010 07:15:30' ,
511
+ '13-05-2011 08:20:35' ,
512
+ '15-01-2013 09:09:09' ])
513
+ result = df .apply (lambda x : {}, axis = 1 )
514
+ expected = Series ([{}, {}, {}])
515
+ assert_series_equal (result , expected )
516
+
517
+ def test_with_listlike_columns (self ):
518
+ # gh-17348
519
+ df = DataFrame ({'a' : Series (np .random .randn (4 )),
520
+ 'b' : ['a' , 'list' , 'of' , 'words' ],
521
+ 'ts' : date_range ('2016-10-01' , periods = 4 , freq = 'H' )})
522
+
523
+ result = df [['a' , 'b' ]].apply (tuple , axis = 1 )
524
+ expected = Series ([t [1 :] for t in df [['a' , 'b' ]].itertuples ()])
525
+ assert_series_equal (result , expected )
526
+
527
+ result = df [['a' , 'ts' ]].apply (tuple , axis = 1 )
528
+ expected = Series ([t [1 :] for t in df [['a' , 'ts' ]].itertuples ()])
529
+ assert_series_equal (result , expected )
530
+
531
+ def test_infer_output_shape_columns (self ):
532
+ # gh-18573
533
+
534
+ df = DataFrame ({'number' : [1. , 2. ],
535
+ 'string' : ['foo' , 'bar' ],
536
+ 'datetime' : [pd .Timestamp ('2017-11-29 03:30:00' ),
537
+ pd .Timestamp ('2017-11-29 03:45:00' )]})
538
+ result = df .apply (lambda row : (row .number , row .string ), axis = 1 )
539
+ expected = Series ([t [2 :] for t in df .itertuples ()])
540
+ assert_series_equal (result , expected )
541
+
542
+ def test_infer_output_shape_listlike_columns (self ):
543
+ # gh-16353
544
+
545
+ df = DataFrame (np .random .randn (6 , 3 ), columns = ['A' , 'B' , 'C' ])
546
+
547
+ result = df .apply (lambda x : [1 , 2 , 3 ], axis = 1 )
548
+ expected = Series ([[1 , 2 , 3 ] for t in df .itertuples ()])
549
+ assert_series_equal (result , expected )
550
+
551
+ result = df .apply (lambda x : [1 , 2 ], axis = 1 )
552
+ expected = Series ([[1 , 2 ] for t in df .itertuples ()])
553
+ assert_series_equal (result , expected )
554
+
555
+ # gh-17970
556
+ df = DataFrame ({"a" : [1 , 2 , 3 ]}, index = list ('abc' ))
557
+
558
+ result = df .apply (lambda row : np .ones (1 ), axis = 1 )
559
+ expected = Series ([np .ones (1 ) for t in df .itertuples ()],
560
+ index = df .index )
561
+ assert_series_equal (result , expected )
562
+
563
+ result = df .apply (lambda row : np .ones (2 ), axis = 1 )
564
+ expected = Series ([np .ones (2 ) for t in df .itertuples ()],
565
+ index = df .index )
566
+ assert_series_equal (result , expected )
567
+
568
+ # gh-17892
569
+ df = pd .DataFrame ({'a' : [pd .Timestamp ('2010-02-01' ),
570
+ pd .Timestamp ('2010-02-04' ),
571
+ pd .Timestamp ('2010-02-05' ),
572
+ pd .Timestamp ('2010-02-06' )],
573
+ 'b' : [9 , 5 , 4 , 3 ],
574
+ 'c' : [5 , 3 , 4 , 2 ],
575
+ 'd' : [1 , 2 , 3 , 4 ]})
576
+
577
+ def fun (x ):
578
+ return (1 , 2 )
579
+
580
+ result = df .apply (fun , axis = 1 )
581
+ expected = Series ([(1 , 2 ) for t in df .itertuples ()])
582
+ assert_series_equal (result , expected )
583
+
584
+ def test_consistent_coerce_for_shapes (self ):
585
+ # we want column names to NOT be propagated
586
+ # just because the shape matches the input shape
587
+ df = DataFrame (np .random .randn (4 , 3 ), columns = ['A' , 'B' , 'C' ])
588
+
589
+ result = df .apply (lambda x : [1 , 2 , 3 ], axis = 1 )
590
+ expected = Series ([[1 , 2 , 3 ] for t in df .itertuples ()])
591
+ assert_series_equal (result , expected )
592
+
593
+ result = df .apply (lambda x : [1 , 2 ], axis = 1 )
594
+ expected = Series ([[1 , 2 ] for t in df .itertuples ()])
595
+ assert_series_equal (result , expected )
596
+
597
+
475
598
def zip_frames (* frames ):
476
599
"""
477
600
take a list of frames, zip the columns together for each
@@ -649,13 +772,13 @@ def test_non_callable_aggregates(self):
649
772
650
773
# Function aggregate
651
774
result = df .agg ({'A' : 'count' })
652
- expected = pd . Series ({'A' : 2 })
775
+ expected = Series ({'A' : 2 })
653
776
654
777
assert_series_equal (result , expected )
655
778
656
779
# Non-function aggregate
657
780
result = df .agg ({'A' : 'size' })
658
- expected = pd . Series ({'A' : 3 })
781
+ expected = Series ({'A' : 3 })
659
782
660
783
assert_series_equal (result , expected )
661
784
0 commit comments