4
4
from warnings import catch_warnings
5
5
6
6
import numpy as np
7
- from numpy .random import randn
8
7
import pytest
9
8
10
9
import pandas as pd
11
- from pandas import (
12
- DataFrame ,
13
- DatetimeIndex ,
14
- Index ,
15
- MultiIndex ,
16
- Series ,
17
- concat ,
18
- date_range ,
19
- read_csv ,
20
- )
10
+ from pandas import DataFrame , Index , MultiIndex , Series , concat , date_range , read_csv
21
11
import pandas ._testing as tm
22
12
from pandas .core .arrays import SparseArray
23
13
from pandas .core .construction import create_series_with_explicit_dtype
@@ -123,29 +113,6 @@ def test_concat_dataframe_keys_bug(self, sort):
123
113
result = concat ([t1 , t2 ], axis = 1 , keys = ["t1" , "t2" ], sort = sort )
124
114
assert list (result .columns ) == [("t1" , "value" ), ("t2" , "value" )]
125
115
126
- def test_concat_series_partial_columns_names (self ):
127
- # GH10698
128
- foo = Series ([1 , 2 ], name = "foo" )
129
- bar = Series ([1 , 2 ])
130
- baz = Series ([4 , 5 ])
131
-
132
- result = concat ([foo , bar , baz ], axis = 1 )
133
- expected = DataFrame (
134
- {"foo" : [1 , 2 ], 0 : [1 , 2 ], 1 : [4 , 5 ]}, columns = ["foo" , 0 , 1 ]
135
- )
136
- tm .assert_frame_equal (result , expected )
137
-
138
- result = concat ([foo , bar , baz ], axis = 1 , keys = ["red" , "blue" , "yellow" ])
139
- expected = DataFrame (
140
- {"red" : [1 , 2 ], "blue" : [1 , 2 ], "yellow" : [4 , 5 ]},
141
- columns = ["red" , "blue" , "yellow" ],
142
- )
143
- tm .assert_frame_equal (result , expected )
144
-
145
- result = concat ([foo , bar , baz ], axis = 1 , ignore_index = True )
146
- expected = DataFrame ({0 : [1 , 2 ], 1 : [1 , 2 ], 2 : [4 , 5 ]})
147
- tm .assert_frame_equal (result , expected )
148
-
149
116
@pytest .mark .parametrize ("mapping" , ["mapping" , "dict" ])
150
117
def test_concat_mapping (self , mapping , non_dict_mapping_subclass ):
151
118
constructor = dict if mapping == "dict" else non_dict_mapping_subclass
@@ -438,38 +405,6 @@ def test_with_mixed_tuples(self, sort):
438
405
# it works
439
406
concat ([df1 , df2 ], sort = sort )
440
407
441
- def test_handle_empty_objects (self , sort ):
442
- df = DataFrame (np .random .randn (10 , 4 ), columns = list ("abcd" ))
443
-
444
- baz = df [:5 ].copy ()
445
- baz ["foo" ] = "bar"
446
- empty = df [5 :5 ]
447
-
448
- frames = [baz , empty , empty , df [5 :]]
449
- concatted = concat (frames , axis = 0 , sort = sort )
450
-
451
- expected = df .reindex (columns = ["a" , "b" , "c" , "d" , "foo" ])
452
- expected ["foo" ] = expected ["foo" ].astype ("O" )
453
- expected .loc [0 :4 , "foo" ] = "bar"
454
-
455
- tm .assert_frame_equal (concatted , expected )
456
-
457
- # empty as first element with time series
458
- # GH3259
459
- df = DataFrame (
460
- dict (A = range (10000 )), index = date_range ("20130101" , periods = 10000 , freq = "s" )
461
- )
462
- empty = DataFrame ()
463
- result = concat ([df , empty ], axis = 1 )
464
- tm .assert_frame_equal (result , df )
465
- result = concat ([empty , df ], axis = 1 )
466
- tm .assert_frame_equal (result , df )
467
-
468
- result = concat ([df , empty ])
469
- tm .assert_frame_equal (result , df )
470
- result = concat ([empty , df ])
471
- tm .assert_frame_equal (result , df )
472
-
473
408
def test_concat_mixed_objs (self ):
474
409
475
410
# concat mixed series/frames
@@ -539,20 +474,6 @@ def test_concat_mixed_objs(self):
539
474
result = concat ([s1 , df , s2 ], ignore_index = True )
540
475
tm .assert_frame_equal (result , expected )
541
476
542
- def test_empty_dtype_coerce (self ):
543
-
544
- # xref to #12411
545
- # xref to #12045
546
- # xref to #11594
547
- # see below
548
-
549
- # 10571
550
- df1 = DataFrame (data = [[1 , None ], [2 , None ]], columns = ["a" , "b" ])
551
- df2 = DataFrame (data = [[3 , None ], [4 , None ]], columns = ["a" , "b" ])
552
- result = concat ([df1 , df2 ])
553
- expected = df1 .dtypes
554
- tm .assert_series_equal (result .dtypes , expected )
555
-
556
477
def test_dtype_coerceion (self ):
557
478
558
479
# 12411
@@ -575,76 +496,6 @@ def test_dtype_coerceion(self):
575
496
result = concat ([df .iloc [[0 ]], df .iloc [[1 ]]])
576
497
tm .assert_series_equal (result .dtypes , df .dtypes )
577
498
578
- def test_concat_series (self ):
579
-
580
- ts = tm .makeTimeSeries ()
581
- ts .name = "foo"
582
-
583
- pieces = [ts [:5 ], ts [5 :15 ], ts [15 :]]
584
-
585
- result = concat (pieces )
586
- tm .assert_series_equal (result , ts )
587
- assert result .name == ts .name
588
-
589
- result = concat (pieces , keys = [0 , 1 , 2 ])
590
- expected = ts .copy ()
591
-
592
- ts .index = DatetimeIndex (np .array (ts .index .values , dtype = "M8[ns]" ))
593
-
594
- exp_codes = [np .repeat ([0 , 1 , 2 ], [len (x ) for x in pieces ]), np .arange (len (ts ))]
595
- exp_index = MultiIndex (levels = [[0 , 1 , 2 ], ts .index ], codes = exp_codes )
596
- expected .index = exp_index
597
- tm .assert_series_equal (result , expected )
598
-
599
- def test_concat_series_axis1 (self , sort = sort ):
600
- ts = tm .makeTimeSeries ()
601
-
602
- pieces = [ts [:- 2 ], ts [2 :], ts [2 :- 2 ]]
603
-
604
- result = concat (pieces , axis = 1 )
605
- expected = DataFrame (pieces ).T
606
- tm .assert_frame_equal (result , expected )
607
-
608
- result = concat (pieces , keys = ["A" , "B" , "C" ], axis = 1 )
609
- expected = DataFrame (pieces , index = ["A" , "B" , "C" ]).T
610
- tm .assert_frame_equal (result , expected )
611
-
612
- # preserve series names, #2489
613
- s = Series (randn (5 ), name = "A" )
614
- s2 = Series (randn (5 ), name = "B" )
615
-
616
- result = concat ([s , s2 ], axis = 1 )
617
- expected = DataFrame ({"A" : s , "B" : s2 })
618
- tm .assert_frame_equal (result , expected )
619
-
620
- s2 .name = None
621
- result = concat ([s , s2 ], axis = 1 )
622
- tm .assert_index_equal (result .columns , Index (["A" , 0 ], dtype = "object" ))
623
-
624
- # must reindex, #2603
625
- s = Series (randn (3 ), index = ["c" , "a" , "b" ], name = "A" )
626
- s2 = Series (randn (4 ), index = ["d" , "a" , "b" , "c" ], name = "B" )
627
- result = concat ([s , s2 ], axis = 1 , sort = sort )
628
- expected = DataFrame ({"A" : s , "B" : s2 })
629
- tm .assert_frame_equal (result , expected )
630
-
631
- def test_concat_series_axis1_names_applied (self ):
632
- # ensure names argument is not ignored on axis=1, #23490
633
- s = Series ([1 , 2 , 3 ])
634
- s2 = Series ([4 , 5 , 6 ])
635
- result = concat ([s , s2 ], axis = 1 , keys = ["a" , "b" ], names = ["A" ])
636
- expected = DataFrame (
637
- [[1 , 4 ], [2 , 5 ], [3 , 6 ]], columns = Index (["a" , "b" ], name = "A" )
638
- )
639
- tm .assert_frame_equal (result , expected )
640
-
641
- result = concat ([s , s2 ], axis = 1 , keys = [("a" , 1 ), ("b" , 2 )], names = ["A" , "B" ])
642
- expected = DataFrame (
643
- [[1 , 4 ], [2 , 5 ], [3 , 6 ]],
644
- columns = MultiIndex .from_tuples ([("a" , 1 ), ("b" , 2 )], names = ["A" , "B" ]),
645
- )
646
- tm .assert_frame_equal (result , expected )
647
-
648
499
def test_concat_single_with_key (self ):
649
500
df = DataFrame (np .random .randn (10 , 4 ))
650
501
@@ -733,26 +584,6 @@ def test_concat_bug_3602(self):
733
584
result = concat ([df1 , df2 ], axis = 1 )
734
585
tm .assert_frame_equal (result , expected )
735
586
736
- def test_concat_inner_join_empty (self ):
737
- # GH 15328
738
- df_empty = DataFrame ()
739
- df_a = DataFrame ({"a" : [1 , 2 ]}, index = [0 , 1 ], dtype = "int64" )
740
- df_expected = DataFrame ({"a" : []}, index = [], dtype = "int64" )
741
-
742
- for how , expected in [("inner" , df_expected ), ("outer" , df_a )]:
743
- result = pd .concat ([df_a , df_empty ], axis = 1 , join = how )
744
- tm .assert_frame_equal (result , expected )
745
-
746
- def test_concat_series_axis1_same_names_ignore_index (self ):
747
- dates = date_range ("01-Jan-2013" , "01-Jan-2014" , freq = "MS" )[0 :- 1 ]
748
- s1 = Series (randn (len (dates )), index = dates , name = "value" )
749
- s2 = Series (randn (len (dates )), index = dates , name = "value" )
750
-
751
- result = concat ([s1 , s2 ], axis = 1 , ignore_index = True )
752
- expected = Index ([0 , 1 ])
753
-
754
- tm .assert_index_equal (result .columns , expected )
755
-
756
587
def test_concat_iterables (self ):
757
588
# GH8645 check concat works with tuples, list, generators, and weird
758
589
# stuff like deque and custom iterables
@@ -827,53 +658,6 @@ def test_concat_invalid_first_argument(self):
827
658
expected = read_csv (StringIO (data ))
828
659
tm .assert_frame_equal (result , expected )
829
660
830
- def test_concat_empty_series (self ):
831
- # GH 11082
832
- s1 = Series ([1 , 2 , 3 ], name = "x" )
833
- s2 = Series (name = "y" , dtype = "float64" )
834
- res = pd .concat ([s1 , s2 ], axis = 1 )
835
- exp = DataFrame (
836
- {"x" : [1 , 2 , 3 ], "y" : [np .nan , np .nan , np .nan ]},
837
- index = Index ([0 , 1 , 2 ], dtype = "O" ),
838
- )
839
- tm .assert_frame_equal (res , exp )
840
-
841
- s1 = Series ([1 , 2 , 3 ], name = "x" )
842
- s2 = Series (name = "y" , dtype = "float64" )
843
- res = pd .concat ([s1 , s2 ], axis = 0 )
844
- # name will be reset
845
- exp = Series ([1 , 2 , 3 ])
846
- tm .assert_series_equal (res , exp )
847
-
848
- # empty Series with no name
849
- s1 = Series ([1 , 2 , 3 ], name = "x" )
850
- s2 = Series (name = None , dtype = "float64" )
851
- res = pd .concat ([s1 , s2 ], axis = 1 )
852
- exp = DataFrame (
853
- {"x" : [1 , 2 , 3 ], 0 : [np .nan , np .nan , np .nan ]},
854
- columns = ["x" , 0 ],
855
- index = Index ([0 , 1 , 2 ], dtype = "O" ),
856
- )
857
- tm .assert_frame_equal (res , exp )
858
-
859
- @pytest .mark .parametrize ("tz" , [None , "UTC" ])
860
- @pytest .mark .parametrize ("values" , [[], [1 , 2 , 3 ]])
861
- def test_concat_empty_series_timelike (self , tz , values ):
862
- # GH 18447
863
-
864
- first = Series ([], dtype = "M8[ns]" ).dt .tz_localize (tz )
865
- dtype = None if values else np .float64
866
- second = Series (values , dtype = dtype )
867
-
868
- expected = DataFrame (
869
- {
870
- 0 : Series ([pd .NaT ] * len (values ), dtype = "M8[ns]" ).dt .tz_localize (tz ),
871
- 1 : values ,
872
- }
873
- )
874
- result = concat ([first , second ], axis = 1 )
875
- tm .assert_frame_equal (result , expected )
876
-
877
661
def test_default_index (self ):
878
662
# is_series and ignore_index
879
663
s1 = Series ([1 , 2 , 3 ], name = "x" )
@@ -1025,16 +809,6 @@ def test_concat_empty_and_non_empty_frame_regression():
1025
809
tm .assert_frame_equal (result , expected )
1026
810
1027
811
1028
- def test_concat_empty_and_non_empty_series_regression ():
1029
- # GH 18187 regression test
1030
- s1 = Series ([1 ])
1031
- s2 = Series ([], dtype = object )
1032
-
1033
- expected = s1
1034
- result = pd .concat ([s1 , s2 ])
1035
- tm .assert_series_equal (result , expected )
1036
-
1037
-
1038
812
def test_concat_sorts_columns (sort ):
1039
813
# GH-4588
1040
814
df1 = DataFrame ({"a" : [1 , 2 ], "b" : [1 , 2 ]}, columns = ["b" , "a" ])
@@ -1112,25 +886,6 @@ def test_concat_aligned_sort_does_not_raise():
1112
886
tm .assert_frame_equal (result , expected )
1113
887
1114
888
1115
- @pytest .mark .parametrize ("s1name,s2name" , [(np .int64 (190 ), (43 , 0 )), (190 , (43 , 0 ))])
1116
- def test_concat_series_name_npscalar_tuple (s1name , s2name ):
1117
- # GH21015
1118
- s1 = Series ({"a" : 1 , "b" : 2 }, name = s1name )
1119
- s2 = Series ({"c" : 5 , "d" : 6 }, name = s2name )
1120
- result = pd .concat ([s1 , s2 ])
1121
- expected = Series ({"a" : 1 , "b" : 2 , "c" : 5 , "d" : 6 })
1122
- tm .assert_series_equal (result , expected )
1123
-
1124
-
1125
- def test_concat_empty_df_object_dtype ():
1126
- # GH 9149
1127
- df_1 = DataFrame ({"Row" : [0 , 1 , 1 ], "EmptyCol" : np .nan , "NumberCol" : [1 , 2 , 3 ]})
1128
- df_2 = DataFrame (columns = df_1 .columns )
1129
- result = pd .concat ([df_1 , df_2 ], axis = 0 )
1130
- expected = df_1 .astype (object )
1131
- tm .assert_frame_equal (result , expected )
1132
-
1133
-
1134
889
def test_concat_sparse ():
1135
890
# GH 23557
1136
891
a = Series (SparseArray ([0 , 1 , 2 ]))
0 commit comments