@@ -350,11 +350,10 @@ def test_apply_attach_name(self):
350
350
351
351
result = self .frame .apply (lambda x : np .repeat (x .name , len (x )),
352
352
axis = 1 )
353
- expected = DataFrame (np .tile (self .frame .index ,
354
- (len (self .frame .columns ), 1 )).T ,
355
- index = self .frame .index ,
356
- columns = self .frame .columns )
357
- assert_frame_equal (result , expected )
353
+ expected = Series (np .repeat (t [0 ], len (self .frame .columns ))
354
+ for t in self .frame .itertuples ())
355
+ expected .index = self .frame .index
356
+ assert_series_equal (result , expected )
358
357
359
358
def test_apply_multi_index (self ):
360
359
s = DataFrame ([[1 , 2 ], [3 , 4 ], [5 , 6 ]])
@@ -367,10 +366,10 @@ def test_apply_dict(self):
367
366
368
367
# GH 8735
369
368
A = DataFrame ([['foo' , 'bar' ], ['spam' , 'eggs' ]])
370
- A_dicts = pd . Series ([dict ([(0 , 'foo' ), (1 , 'spam' )]),
371
- dict ([(0 , 'bar' ), (1 , 'eggs' )])])
369
+ A_dicts = Series ([dict ([(0 , 'foo' ), (1 , 'spam' )]),
370
+ dict ([(0 , 'bar' ), (1 , 'eggs' )])])
372
371
B = DataFrame ([[0 , 1 ], [2 , 3 ]])
373
- B_dicts = pd . Series ([dict ([(0 , 0 ), (1 , 2 )]), dict ([(0 , 1 ), (1 , 3 )])])
372
+ B_dicts = Series ([dict ([(0 , 0 ), (1 , 2 )]), dict ([(0 , 1 ), (1 , 3 )])])
374
373
fn = lambda x : x .to_dict ()
375
374
376
375
for df , dicts in [(A , A_dicts ), (B , B_dicts )]:
@@ -472,6 +471,141 @@ def test_apply_non_numpy_dtype(self):
472
471
assert_frame_equal (result , df )
473
472
474
473
474
+ class TestInferOutputShape (object ):
475
+ # the user has supplied an opaque UDF where
476
+ # they are transforming the input that requires
477
+ # us to infer the output
478
+
479
+ def test_infer_row_shape (self ):
480
+ # gh-17437
481
+ # if row shape is changing, infer it
482
+ df = pd .DataFrame (np .random .rand (10 , 2 ))
483
+ result = df .apply (np .fft .fft , axis = 0 )
484
+ assert result .shape == (10 , 2 )
485
+
486
+ result = df .apply (np .fft .rfft , axis = 0 )
487
+ assert result .shape == (6 , 2 )
488
+
489
+ def test_with_dictlike_columns (self ):
490
+ # gh 17602
491
+
492
+ df = DataFrame ([[1 , 2 ], [1 , 2 ]], columns = ['a' , 'b' ])
493
+ result = df .apply (lambda x : {'s' : x ['a' ] + x ['b' ]}, 1 )
494
+ expected = Series ([{'s' : 3 } for t in df .itertuples ()])
495
+ assert_series_equal (result , expected )
496
+
497
+ df ['tm' ] = [pd .Timestamp ('2017-05-01 00:00:00' ),
498
+ pd .Timestamp ('2017-05-02 00:00:00' )]
499
+ assert_series_equal (result , expected )
500
+
501
+ # compose a series
502
+ result = (df ['a' ] + df ['b' ]).apply (lambda x : {'s' : x })
503
+ expected = Series ([{'s' : 3 }, {'s' : 3 }])
504
+ assert_series_equal (result , expected )
505
+
506
+ # gh-18775
507
+ df = DataFrame ()
508
+ df ["author" ] = ["X" , "Y" , "Z" ]
509
+ df ["publisher" ] = ["BBC" , "NBC" , "N24" ]
510
+ df ["date" ] = pd .to_datetime (['17-10-2010 07:15:30' ,
511
+ '13-05-2011 08:20:35' ,
512
+ '15-01-2013 09:09:09' ])
513
+ result = df .apply (lambda x : {}, axis = 1 )
514
+ expected = Series ([{}, {}, {}])
515
+ assert_series_equal (result , expected )
516
+
517
+ def test_with_listlike_columns (self ):
518
+ # gh-17348
519
+ df = DataFrame ({'a' : Series (np .random .randn (4 )),
520
+ 'b' : ['a' , 'list' , 'of' , 'words' ],
521
+ 'ts' : date_range ('2016-10-01' , periods = 4 , freq = 'H' )})
522
+
523
+ result = df [['a' , 'b' ]].apply (tuple , axis = 1 )
524
+ expected = Series ([t [1 :] for t in df [['a' , 'b' ]].itertuples ()])
525
+ assert_series_equal (result , expected )
526
+
527
+ result = df [['a' , 'ts' ]].apply (tuple , axis = 1 )
528
+ expected = Series ([t [1 :] for t in df [['a' , 'ts' ]].itertuples ()])
529
+ assert_series_equal (result , expected )
530
+
531
+ # gh-18919
532
+ df = DataFrame ({'x' : Series ([['a' , 'b' ], ['q' ]]),
533
+ 'y' : Series ([['z' ], ['q' , 't' ]])})
534
+ df .index = MultiIndex .from_tuples ([('i0' , 'j0' ), ('i1' , 'j1' )])
535
+
536
+ result = df .apply (
537
+ lambda row : [el for el in row ['x' ] if el in row ['y' ]],
538
+ axis = 1 )
539
+ expected = Series ([[], ['q' ]], index = df .index )
540
+ assert_series_equal (result , expected )
541
+
542
+ def test_infer_output_shape_columns (self ):
543
+ # gh-18573
544
+
545
+ df = DataFrame ({'number' : [1. , 2. ],
546
+ 'string' : ['foo' , 'bar' ],
547
+ 'datetime' : [pd .Timestamp ('2017-11-29 03:30:00' ),
548
+ pd .Timestamp ('2017-11-29 03:45:00' )]})
549
+ result = df .apply (lambda row : (row .number , row .string ), axis = 1 )
550
+ expected = Series ([t [2 :] for t in df .itertuples ()])
551
+ assert_series_equal (result , expected )
552
+
553
+ def test_infer_output_shape_listlike_columns (self ):
554
+ # gh-16353
555
+
556
+ df = DataFrame (np .random .randn (6 , 3 ), columns = ['A' , 'B' , 'C' ])
557
+
558
+ result = df .apply (lambda x : [1 , 2 , 3 ], axis = 1 )
559
+ expected = Series ([[1 , 2 , 3 ] for t in df .itertuples ()])
560
+ assert_series_equal (result , expected )
561
+
562
+ result = df .apply (lambda x : [1 , 2 ], axis = 1 )
563
+ expected = Series ([[1 , 2 ] for t in df .itertuples ()])
564
+ assert_series_equal (result , expected )
565
+
566
+ # gh-17970
567
+ df = DataFrame ({"a" : [1 , 2 , 3 ]}, index = list ('abc' ))
568
+
569
+ result = df .apply (lambda row : np .ones (1 ), axis = 1 )
570
+ expected = Series ([np .ones (1 ) for t in df .itertuples ()],
571
+ index = df .index )
572
+ assert_series_equal (result , expected )
573
+
574
+ result = df .apply (lambda row : np .ones (2 ), axis = 1 )
575
+ expected = Series ([np .ones (2 ) for t in df .itertuples ()],
576
+ index = df .index )
577
+ assert_series_equal (result , expected )
578
+
579
+ # gh-17892
580
+ df = pd .DataFrame ({'a' : [pd .Timestamp ('2010-02-01' ),
581
+ pd .Timestamp ('2010-02-04' ),
582
+ pd .Timestamp ('2010-02-05' ),
583
+ pd .Timestamp ('2010-02-06' )],
584
+ 'b' : [9 , 5 , 4 , 3 ],
585
+ 'c' : [5 , 3 , 4 , 2 ],
586
+ 'd' : [1 , 2 , 3 , 4 ]})
587
+
588
+ def fun (x ):
589
+ return (1 , 2 )
590
+
591
+ result = df .apply (fun , axis = 1 )
592
+ expected = Series ([(1 , 2 ) for t in df .itertuples ()])
593
+ assert_series_equal (result , expected )
594
+
595
+ def test_consistent_coerce_for_shapes (self ):
596
+ # we want column names to NOT be propagated
597
+ # just because the shape matches the input shape
598
+ df = DataFrame (np .random .randn (4 , 3 ), columns = ['A' , 'B' , 'C' ])
599
+
600
+ result = df .apply (lambda x : [1 , 2 , 3 ], axis = 1 )
601
+ expected = Series ([[1 , 2 , 3 ] for t in df .itertuples ()])
602
+ assert_series_equal (result , expected )
603
+
604
+ result = df .apply (lambda x : [1 , 2 ], axis = 1 )
605
+ expected = Series ([[1 , 2 ] for t in df .itertuples ()])
606
+ assert_series_equal (result , expected )
607
+
608
+
475
609
def zip_frames (* frames ):
476
610
"""
477
611
take a list of frames, zip the columns together for each
@@ -649,13 +783,13 @@ def test_non_callable_aggregates(self):
649
783
650
784
# Function aggregate
651
785
result = df .agg ({'A' : 'count' })
652
- expected = pd . Series ({'A' : 2 })
786
+ expected = Series ({'A' : 2 })
653
787
654
788
assert_series_equal (result , expected )
655
789
656
790
# Non-function aggregate
657
791
result = df .agg ({'A' : 'size' })
658
- expected = pd . Series ({'A' : 3 })
792
+ expected = Series ({'A' : 3 })
659
793
660
794
assert_series_equal (result , expected )
661
795
0 commit comments