@@ -350,11 +350,10 @@ def test_apply_attach_name(self):
350
350
351
351
result = self .frame .apply (lambda x : np .repeat (x .name , len (x )),
352
352
axis = 1 )
353
- expected = DataFrame (np .tile (self .frame .index ,
354
- (len (self .frame .columns ), 1 )).T ,
355
- index = self .frame .index ,
356
- columns = self .frame .columns )
357
- assert_frame_equal (result , expected )
353
+ expected = Series (np .repeat (t [0 ], len (self .frame .columns ))
354
+ for t in self .frame .itertuples ())
355
+ expected .index = self .frame .index
356
+ assert_series_equal (result , expected )
358
357
359
358
def test_apply_multi_index (self ):
360
359
s = DataFrame ([[1 , 2 ], [3 , 4 ], [5 , 6 ]])
@@ -367,10 +366,10 @@ def test_apply_dict(self):
367
366
368
367
# GH 8735
369
368
A = DataFrame ([['foo' , 'bar' ], ['spam' , 'eggs' ]])
370
- A_dicts = pd . Series ([dict ([(0 , 'foo' ), (1 , 'spam' )]),
371
- dict ([(0 , 'bar' ), (1 , 'eggs' )])])
369
+ A_dicts = Series ([dict ([(0 , 'foo' ), (1 , 'spam' )]),
370
+ dict ([(0 , 'bar' ), (1 , 'eggs' )])])
372
371
B = DataFrame ([[0 , 1 ], [2 , 3 ]])
373
- B_dicts = pd . Series ([dict ([(0 , 0 ), (1 , 2 )]), dict ([(0 , 1 ), (1 , 3 )])])
372
+ B_dicts = Series ([dict ([(0 , 0 ), (1 , 2 )]), dict ([(0 , 1 ), (1 , 3 )])])
374
373
fn = lambda x : x .to_dict ()
375
374
376
375
for df , dicts in [(A , A_dicts ), (B , B_dicts )]:
@@ -482,6 +481,141 @@ def test_apply_non_numpy_dtype(self):
482
481
assert_frame_equal (result , df )
483
482
484
483
484
+ class TestInferOutputShape (object ):
485
+ # the user has supplied an opaque UDF where
486
+ # they are transforming the input that requires
487
+ # us to infer the output
488
+
489
+ def test_infer_row_shape (self ):
490
+ # gh-17437
491
+ # if row shape is changing, infer it
492
+ df = pd .DataFrame (np .random .rand (10 , 2 ))
493
+ result = df .apply (np .fft .fft , axis = 0 )
494
+ assert result .shape == (10 , 2 )
495
+
496
+ result = df .apply (np .fft .rfft , axis = 0 )
497
+ assert result .shape == (6 , 2 )
498
+
499
+ def test_with_dictlike_columns (self ):
500
+ # gh 17602
501
+
502
+ df = DataFrame ([[1 , 2 ], [1 , 2 ]], columns = ['a' , 'b' ])
503
+ result = df .apply (lambda x : {'s' : x ['a' ] + x ['b' ]}, 1 )
504
+ expected = Series ([{'s' : 3 } for t in df .itertuples ()])
505
+ assert_series_equal (result , expected )
506
+
507
+ df ['tm' ] = [pd .Timestamp ('2017-05-01 00:00:00' ),
508
+ pd .Timestamp ('2017-05-02 00:00:00' )]
509
+ assert_series_equal (result , expected )
510
+
511
+ # compose a series
512
+ result = (df ['a' ] + df ['b' ]).apply (lambda x : {'s' : x })
513
+ expected = Series ([{'s' : 3 }, {'s' : 3 }])
514
+ assert_series_equal (result , expected )
515
+
516
+ # gh-18775
517
+ df = DataFrame ()
518
+ df ["author" ] = ["X" , "Y" , "Z" ]
519
+ df ["publisher" ] = ["BBC" , "NBC" , "N24" ]
520
+ df ["date" ] = pd .to_datetime (['17-10-2010 07:15:30' ,
521
+ '13-05-2011 08:20:35' ,
522
+ '15-01-2013 09:09:09' ])
523
+ result = df .apply (lambda x : {}, axis = 1 )
524
+ expected = Series ([{}, {}, {}])
525
+ assert_series_equal (result , expected )
526
+
527
+ def test_with_listlike_columns (self ):
528
+ # gh-17348
529
+ df = DataFrame ({'a' : Series (np .random .randn (4 )),
530
+ 'b' : ['a' , 'list' , 'of' , 'words' ],
531
+ 'ts' : date_range ('2016-10-01' , periods = 4 , freq = 'H' )})
532
+
533
+ result = df [['a' , 'b' ]].apply (tuple , axis = 1 )
534
+ expected = Series ([t [1 :] for t in df [['a' , 'b' ]].itertuples ()])
535
+ assert_series_equal (result , expected )
536
+
537
+ result = df [['a' , 'ts' ]].apply (tuple , axis = 1 )
538
+ expected = Series ([t [1 :] for t in df [['a' , 'ts' ]].itertuples ()])
539
+ assert_series_equal (result , expected )
540
+
541
+ # gh-18919
542
+ df = DataFrame ({'x' : Series ([['a' , 'b' ], ['q' ]]),
543
+ 'y' : Series ([['z' ], ['q' , 't' ]])})
544
+ df .index = MultiIndex .from_tuples ([('i0' , 'j0' ), ('i1' , 'j1' )])
545
+
546
+ result = df .apply (
547
+ lambda row : [el for el in row ['x' ] if el in row ['y' ]],
548
+ axis = 1 )
549
+ expected = Series ([[], ['q' ]], index = df .index )
550
+ assert_series_equal (result , expected )
551
+
552
+ def test_infer_output_shape_columns (self ):
553
+ # gh-18573
554
+
555
+ df = DataFrame ({'number' : [1. , 2. ],
556
+ 'string' : ['foo' , 'bar' ],
557
+ 'datetime' : [pd .Timestamp ('2017-11-29 03:30:00' ),
558
+ pd .Timestamp ('2017-11-29 03:45:00' )]})
559
+ result = df .apply (lambda row : (row .number , row .string ), axis = 1 )
560
+ expected = Series ([t [2 :] for t in df .itertuples ()])
561
+ assert_series_equal (result , expected )
562
+
563
+ def test_infer_output_shape_listlike_columns (self ):
564
+ # gh-16353
565
+
566
+ df = DataFrame (np .random .randn (6 , 3 ), columns = ['A' , 'B' , 'C' ])
567
+
568
+ result = df .apply (lambda x : [1 , 2 , 3 ], axis = 1 )
569
+ expected = Series ([[1 , 2 , 3 ] for t in df .itertuples ()])
570
+ assert_series_equal (result , expected )
571
+
572
+ result = df .apply (lambda x : [1 , 2 ], axis = 1 )
573
+ expected = Series ([[1 , 2 ] for t in df .itertuples ()])
574
+ assert_series_equal (result , expected )
575
+
576
+ # gh-17970
577
+ df = DataFrame ({"a" : [1 , 2 , 3 ]}, index = list ('abc' ))
578
+
579
+ result = df .apply (lambda row : np .ones (1 ), axis = 1 )
580
+ expected = Series ([np .ones (1 ) for t in df .itertuples ()],
581
+ index = df .index )
582
+ assert_series_equal (result , expected )
583
+
584
+ result = df .apply (lambda row : np .ones (2 ), axis = 1 )
585
+ expected = Series ([np .ones (2 ) for t in df .itertuples ()],
586
+ index = df .index )
587
+ assert_series_equal (result , expected )
588
+
589
+ # gh-17892
590
+ df = pd .DataFrame ({'a' : [pd .Timestamp ('2010-02-01' ),
591
+ pd .Timestamp ('2010-02-04' ),
592
+ pd .Timestamp ('2010-02-05' ),
593
+ pd .Timestamp ('2010-02-06' )],
594
+ 'b' : [9 , 5 , 4 , 3 ],
595
+ 'c' : [5 , 3 , 4 , 2 ],
596
+ 'd' : [1 , 2 , 3 , 4 ]})
597
+
598
+ def fun (x ):
599
+ return (1 , 2 )
600
+
601
+ result = df .apply (fun , axis = 1 )
602
+ expected = Series ([(1 , 2 ) for t in df .itertuples ()])
603
+ assert_series_equal (result , expected )
604
+
605
+ def test_consistent_coerce_for_shapes (self ):
606
+ # we want column names to NOT be propagated
607
+ # just because the shape matches the input shape
608
+ df = DataFrame (np .random .randn (4 , 3 ), columns = ['A' , 'B' , 'C' ])
609
+
610
+ result = df .apply (lambda x : [1 , 2 , 3 ], axis = 1 )
611
+ expected = Series ([[1 , 2 , 3 ] for t in df .itertuples ()])
612
+ assert_series_equal (result , expected )
613
+
614
+ result = df .apply (lambda x : [1 , 2 ], axis = 1 )
615
+ expected = Series ([[1 , 2 ] for t in df .itertuples ()])
616
+ assert_series_equal (result , expected )
617
+
618
+
485
619
def zip_frames (* frames ):
486
620
"""
487
621
take a list of frames, zip the columns together for each
@@ -659,13 +793,13 @@ def test_non_callable_aggregates(self):
659
793
660
794
# Function aggregate
661
795
result = df .agg ({'A' : 'count' })
662
- expected = pd . Series ({'A' : 2 })
796
+ expected = Series ({'A' : 2 })
663
797
664
798
assert_series_equal (result , expected )
665
799
666
800
# Non-function aggregate
667
801
result = df .agg ({'A' : 'size' })
668
- expected = pd . Series ({'A' : 3 })
802
+ expected = Series ({'A' : 3 })
669
803
670
804
assert_series_equal (result , expected )
671
805
0 commit comments