@@ -2925,3 +2925,349 @@ def test_concat_preserves_extension_int64_dtype():
2925
2925
result = pd .concat ([df_a , df_b ], ignore_index = True )
2926
2926
expected = pd .DataFrame ({"a" : [- 1 , None ], "b" : [None , 1 ]}, dtype = "Int64" )
2927
2927
tm .assert_frame_equal (result , expected )
2928
+
2929
+
2930
+ class TestSeriesConcat :
2931
+ @pytest .mark .parametrize (
2932
+ "dtype" , ["float64" , "int8" , "uint8" , "bool" , "m8[ns]" , "M8[ns]" ]
2933
+ )
2934
+ def test_concat_empty_series_dtypes_match_roundtrips (self , dtype ):
2935
+ dtype = np .dtype (dtype )
2936
+
2937
+ result = pd .concat ([Series (dtype = dtype )])
2938
+ assert result .dtype == dtype
2939
+
2940
+ result = pd .concat ([Series (dtype = dtype ), Series (dtype = dtype )])
2941
+ assert result .dtype == dtype
2942
+
2943
+ def test_concat_empty_series_dtypes_roundtrips (self ):
2944
+
2945
+ # round-tripping with self & like self
2946
+ dtypes = map (np .dtype , ["float64" , "int8" , "uint8" , "bool" , "m8[ns]" , "M8[ns]" ])
2947
+
2948
+ def int_result_type (dtype , dtype2 ):
2949
+ typs = {dtype .kind , dtype2 .kind }
2950
+ if not len (typs - {"i" , "u" , "b" }) and (
2951
+ dtype .kind == "i" or dtype2 .kind == "i"
2952
+ ):
2953
+ return "i"
2954
+ elif not len (typs - {"u" , "b" }) and (
2955
+ dtype .kind == "u" or dtype2 .kind == "u"
2956
+ ):
2957
+ return "u"
2958
+ return None
2959
+
2960
+ def float_result_type (dtype , dtype2 ):
2961
+ typs = {dtype .kind , dtype2 .kind }
2962
+ if not len (typs - {"f" , "i" , "u" }) and (
2963
+ dtype .kind == "f" or dtype2 .kind == "f"
2964
+ ):
2965
+ return "f"
2966
+ return None
2967
+
2968
+ def get_result_type (dtype , dtype2 ):
2969
+ result = float_result_type (dtype , dtype2 )
2970
+ if result is not None :
2971
+ return result
2972
+ result = int_result_type (dtype , dtype2 )
2973
+ if result is not None :
2974
+ return result
2975
+ return "O"
2976
+
2977
+ for dtype in dtypes :
2978
+ for dtype2 in dtypes :
2979
+ if dtype == dtype2 :
2980
+ continue
2981
+
2982
+ expected = get_result_type (dtype , dtype2 )
2983
+ result = pd .concat ([Series (dtype = dtype ), Series (dtype = dtype2 )]).dtype
2984
+ assert result .kind == expected
2985
+
2986
+ @pytest .mark .parametrize (
2987
+ "left,right,expected" ,
2988
+ [
2989
+ # booleans
2990
+ (np .bool_ , np .int32 , np .int32 ),
2991
+ (np .bool_ , np .float32 , np .object_ ),
2992
+ # datetime-like
2993
+ ("m8[ns]" , np .bool_ , np .object_ ),
2994
+ ("m8[ns]" , np .int64 , np .object_ ),
2995
+ ("M8[ns]" , np .bool_ , np .object_ ),
2996
+ ("M8[ns]" , np .int64 , np .object_ ),
2997
+ # categorical
2998
+ ("category" , "category" , "category" ),
2999
+ ("category" , "object" , "object" ),
3000
+ ],
3001
+ )
3002
+ def test_concat_empty_series_dtypes (self , left , right , expected ):
3003
+ result = pd .concat ([Series (dtype = left ), Series (dtype = right )])
3004
+ assert result .dtype == expected
3005
+
3006
+ def test_concat_empty_series_dtypes_triple (self ):
3007
+
3008
+ assert (
3009
+ pd .concat (
3010
+ [Series (dtype = "M8[ns]" ), Series (dtype = np .bool_ ), Series (dtype = np .int64 )]
3011
+ ).dtype
3012
+ == np .object_
3013
+ )
3014
+
3015
+ def test_concat_empty_series_dtype_category_with_array (self ):
3016
+ # GH#18515
3017
+ assert (
3018
+ pd .concat (
3019
+ [Series (np .array ([]), dtype = "category" ), Series (dtype = "float64" )]
3020
+ ).dtype
3021
+ == "float64"
3022
+ )
3023
+
3024
+ def test_concat_empty_series_dtypes_sparse (self ):
3025
+ result = pd .concat (
3026
+ [
3027
+ Series (dtype = "float64" ).astype ("Sparse" ),
3028
+ Series (dtype = "float64" ).astype ("Sparse" ),
3029
+ ]
3030
+ )
3031
+ assert result .dtype == "Sparse[float64]"
3032
+
3033
+ result = pd .concat (
3034
+ [Series (dtype = "float64" ).astype ("Sparse" ), Series (dtype = "float64" )]
3035
+ )
3036
+ # TODO: release-note: concat sparse dtype
3037
+ expected = pd .SparseDtype (np .float64 )
3038
+ assert result .dtype == expected
3039
+
3040
+ result = pd .concat (
3041
+ [Series (dtype = "float64" ).astype ("Sparse" ), Series (dtype = "object" )]
3042
+ )
3043
+ # TODO: release-note: concat sparse dtype
3044
+ expected = pd .SparseDtype ("object" )
3045
+ assert result .dtype == expected
3046
+
3047
+
3048
+ class TestDataFrameConcat :
3049
+ def test_concat_multiple_frames_dtypes (self ):
3050
+
3051
+ # GH#2759
3052
+ A = DataFrame (data = np .ones ((10 , 2 )), columns = ["foo" , "bar" ], dtype = np .float64 )
3053
+ B = DataFrame (data = np .ones ((10 , 2 )), dtype = np .float32 )
3054
+ results = pd .concat ((A , B ), axis = 1 ).dtypes
3055
+ expected = Series (
3056
+ [np .dtype ("float64" )] * 2 + [np .dtype ("float32" )] * 2 ,
3057
+ index = ["foo" , "bar" , 0 , 1 ],
3058
+ )
3059
+ tm .assert_series_equal (results , expected )
3060
+
3061
+ def test_concat_multiple_tzs (self ):
3062
+ # GH#12467
3063
+ # combining datetime tz-aware and naive DataFrames
3064
+ ts1 = Timestamp ("2015-01-01" , tz = None )
3065
+ ts2 = Timestamp ("2015-01-01" , tz = "UTC" )
3066
+ ts3 = Timestamp ("2015-01-01" , tz = "EST" )
3067
+
3068
+ df1 = DataFrame (dict (time = [ts1 ]))
3069
+ df2 = DataFrame (dict (time = [ts2 ]))
3070
+ df3 = DataFrame (dict (time = [ts3 ]))
3071
+
3072
+ results = pd .concat ([df1 , df2 ]).reset_index (drop = True )
3073
+ expected = DataFrame (dict (time = [ts1 , ts2 ]), dtype = object )
3074
+ tm .assert_frame_equal (results , expected )
3075
+
3076
+ results = pd .concat ([df1 , df3 ]).reset_index (drop = True )
3077
+ expected = DataFrame (dict (time = [ts1 , ts3 ]), dtype = object )
3078
+ tm .assert_frame_equal (results , expected )
3079
+
3080
+ results = pd .concat ([df2 , df3 ]).reset_index (drop = True )
3081
+ expected = DataFrame (dict (time = [ts2 , ts3 ]))
3082
+ tm .assert_frame_equal (results , expected )
3083
+
3084
+ @pytest .mark .parametrize (
3085
+ "t1" ,
3086
+ [
3087
+ "2015-01-01" ,
3088
+ pytest .param (
3089
+ pd .NaT ,
3090
+ marks = pytest .mark .xfail (
3091
+ reason = "GH23037 incorrect dtype when concatenating"
3092
+ ),
3093
+ ),
3094
+ ],
3095
+ )
3096
+ def test_concat_tz_NaT (self , t1 ):
3097
+ # GH#22796
3098
+ # Concating tz-aware multicolumn DataFrames
3099
+ ts1 = Timestamp (t1 , tz = "UTC" )
3100
+ ts2 = Timestamp ("2015-01-01" , tz = "UTC" )
3101
+ ts3 = Timestamp ("2015-01-01" , tz = "UTC" )
3102
+
3103
+ df1 = DataFrame ([[ts1 , ts2 ]])
3104
+ df2 = DataFrame ([[ts3 ]])
3105
+
3106
+ result = pd .concat ([df1 , df2 ])
3107
+ expected = DataFrame ([[ts1 , ts2 ], [ts3 , pd .NaT ]], index = [0 , 0 ])
3108
+
3109
+ tm .assert_frame_equal (result , expected )
3110
+
3111
+ def test_concat_tz_not_aligned (self ):
3112
+ # GH#22796
3113
+ ts = pd .to_datetime ([1 , 2 ]).tz_localize ("UTC" )
3114
+ a = pd .DataFrame ({"A" : ts })
3115
+ b = pd .DataFrame ({"A" : ts , "B" : ts })
3116
+ result = pd .concat ([a , b ], sort = True , ignore_index = True )
3117
+ expected = pd .DataFrame (
3118
+ {"A" : list (ts ) + list (ts ), "B" : [pd .NaT , pd .NaT ] + list (ts )}
3119
+ )
3120
+ tm .assert_frame_equal (result , expected )
3121
+
3122
+ def test_concat_tuple_keys (self ):
3123
+ # GH#14438
3124
+ df1 = pd .DataFrame (np .ones ((2 , 2 )), columns = list ("AB" ))
3125
+ df2 = pd .DataFrame (np .ones ((3 , 2 )) * 2 , columns = list ("AB" ))
3126
+ results = pd .concat ((df1 , df2 ), keys = [("bee" , "bah" ), ("bee" , "boo" )])
3127
+ expected = pd .DataFrame (
3128
+ {
3129
+ "A" : {
3130
+ ("bee" , "bah" , 0 ): 1.0 ,
3131
+ ("bee" , "bah" , 1 ): 1.0 ,
3132
+ ("bee" , "boo" , 0 ): 2.0 ,
3133
+ ("bee" , "boo" , 1 ): 2.0 ,
3134
+ ("bee" , "boo" , 2 ): 2.0 ,
3135
+ },
3136
+ "B" : {
3137
+ ("bee" , "bah" , 0 ): 1.0 ,
3138
+ ("bee" , "bah" , 1 ): 1.0 ,
3139
+ ("bee" , "boo" , 0 ): 2.0 ,
3140
+ ("bee" , "boo" , 1 ): 2.0 ,
3141
+ ("bee" , "boo" , 2 ): 2.0 ,
3142
+ },
3143
+ }
3144
+ )
3145
+ tm .assert_frame_equal (results , expected )
3146
+
3147
+ def test_concat_named_keys (self ):
3148
+ # GH#14252
3149
+ df = pd .DataFrame ({"foo" : [1 , 2 ], "bar" : [0.1 , 0.2 ]})
3150
+ index = Index (["a" , "b" ], name = "baz" )
3151
+ concatted_named_from_keys = pd .concat ([df , df ], keys = index )
3152
+ expected_named = pd .DataFrame (
3153
+ {"foo" : [1 , 2 , 1 , 2 ], "bar" : [0.1 , 0.2 , 0.1 , 0.2 ]},
3154
+ index = pd .MultiIndex .from_product ((["a" , "b" ], [0 , 1 ]), names = ["baz" , None ]),
3155
+ )
3156
+ tm .assert_frame_equal (concatted_named_from_keys , expected_named )
3157
+
3158
+ index_no_name = Index (["a" , "b" ], name = None )
3159
+ concatted_named_from_names = pd .concat (
3160
+ [df , df ], keys = index_no_name , names = ["baz" ]
3161
+ )
3162
+ tm .assert_frame_equal (concatted_named_from_names , expected_named )
3163
+
3164
+ concatted_unnamed = pd .concat ([df , df ], keys = index_no_name )
3165
+ expected_unnamed = pd .DataFrame (
3166
+ {"foo" : [1 , 2 , 1 , 2 ], "bar" : [0.1 , 0.2 , 0.1 , 0.2 ]},
3167
+ index = pd .MultiIndex .from_product ((["a" , "b" ], [0 , 1 ]), names = [None , None ]),
3168
+ )
3169
+ tm .assert_frame_equal (concatted_unnamed , expected_unnamed )
3170
+
3171
+ def test_concat_axis_parameter (self ):
3172
+ # GH#14369
3173
+ df1 = pd .DataFrame ({"A" : [0.1 , 0.2 ]}, index = range (2 ))
3174
+ df2 = pd .DataFrame ({"A" : [0.3 , 0.4 ]}, index = range (2 ))
3175
+
3176
+ # Index/row/0 DataFrame
3177
+ expected_index = pd .DataFrame ({"A" : [0.1 , 0.2 , 0.3 , 0.4 ]}, index = [0 , 1 , 0 , 1 ])
3178
+
3179
+ concatted_index = pd .concat ([df1 , df2 ], axis = "index" )
3180
+ tm .assert_frame_equal (concatted_index , expected_index )
3181
+
3182
+ concatted_row = pd .concat ([df1 , df2 ], axis = "rows" )
3183
+ tm .assert_frame_equal (concatted_row , expected_index )
3184
+
3185
+ concatted_0 = pd .concat ([df1 , df2 ], axis = 0 )
3186
+ tm .assert_frame_equal (concatted_0 , expected_index )
3187
+
3188
+ # Columns/1 DataFrame
3189
+ expected_columns = pd .DataFrame (
3190
+ [[0.1 , 0.3 ], [0.2 , 0.4 ]], index = [0 , 1 ], columns = ["A" , "A" ]
3191
+ )
3192
+
3193
+ concatted_columns = pd .concat ([df1 , df2 ], axis = "columns" )
3194
+ tm .assert_frame_equal (concatted_columns , expected_columns )
3195
+
3196
+ concatted_1 = pd .concat ([df1 , df2 ], axis = 1 )
3197
+ tm .assert_frame_equal (concatted_1 , expected_columns )
3198
+
3199
+ series1 = pd .Series ([0.1 , 0.2 ])
3200
+ series2 = pd .Series ([0.3 , 0.4 ])
3201
+
3202
+ # Index/row/0 Series
3203
+ expected_index_series = pd .Series ([0.1 , 0.2 , 0.3 , 0.4 ], index = [0 , 1 , 0 , 1 ])
3204
+
3205
+ concatted_index_series = pd .concat ([series1 , series2 ], axis = "index" )
3206
+ tm .assert_series_equal (concatted_index_series , expected_index_series )
3207
+
3208
+ concatted_row_series = pd .concat ([series1 , series2 ], axis = "rows" )
3209
+ tm .assert_series_equal (concatted_row_series , expected_index_series )
3210
+
3211
+ concatted_0_series = pd .concat ([series1 , series2 ], axis = 0 )
3212
+ tm .assert_series_equal (concatted_0_series , expected_index_series )
3213
+
3214
+ # Columns/1 Series
3215
+ expected_columns_series = pd .DataFrame (
3216
+ [[0.1 , 0.3 ], [0.2 , 0.4 ]], index = [0 , 1 ], columns = [0 , 1 ]
3217
+ )
3218
+
3219
+ concatted_columns_series = pd .concat ([series1 , series2 ], axis = "columns" )
3220
+ tm .assert_frame_equal (concatted_columns_series , expected_columns_series )
3221
+
3222
+ concatted_1_series = pd .concat ([series1 , series2 ], axis = 1 )
3223
+ tm .assert_frame_equal (concatted_1_series , expected_columns_series )
3224
+
3225
+ # Testing ValueError
3226
+ with pytest .raises (ValueError , match = "No axis named" ):
3227
+ pd .concat ([series1 , series2 ], axis = "something" )
3228
+
3229
+ def test_concat_numerical_names (self ):
3230
+ # GH#15262, GH#12223
3231
+ df = pd .DataFrame (
3232
+ {"col" : range (9 )},
3233
+ dtype = "int32" ,
3234
+ index = (
3235
+ pd .MultiIndex .from_product (
3236
+ [["A0" , "A1" , "A2" ], ["B0" , "B1" , "B2" ]], names = [1 , 2 ]
3237
+ )
3238
+ ),
3239
+ )
3240
+ result = pd .concat ((df .iloc [:2 , :], df .iloc [- 2 :, :]))
3241
+ expected = pd .DataFrame (
3242
+ {"col" : [0 , 1 , 7 , 8 ]},
3243
+ dtype = "int32" ,
3244
+ index = pd .MultiIndex .from_tuples (
3245
+ [("A0" , "B0" ), ("A0" , "B1" ), ("A2" , "B1" ), ("A2" , "B2" )], names = [1 , 2 ]
3246
+ ),
3247
+ )
3248
+ tm .assert_frame_equal (result , expected )
3249
+
3250
+ def test_concat_astype_dup_col (self ):
3251
+ # GH#23049
3252
+ df = pd .DataFrame ([{"a" : "b" }])
3253
+ df = pd .concat ([df , df ], axis = 1 )
3254
+
3255
+ result = df .astype ("category" )
3256
+ expected = pd .DataFrame (
3257
+ np .array (["b" , "b" ]).reshape (1 , 2 ), columns = ["a" , "a" ]
3258
+ ).astype ("category" )
3259
+ tm .assert_frame_equal (result , expected )
3260
+
3261
+ def test_concat_datetime_datetime64_frame (self ):
3262
+ # GH#2624
3263
+ rows = []
3264
+ rows .append ([datetime (2010 , 1 , 1 ), 1 ])
3265
+ rows .append ([datetime (2010 , 1 , 2 ), "hi" ])
3266
+
3267
+ df2_obj = DataFrame .from_records (rows , columns = ["date" , "test" ])
3268
+
3269
+ ind = date_range (start = "2000/1/1" , freq = "D" , periods = 10 )
3270
+ df1 = DataFrame ({"date" : ind , "test" : range (10 )})
3271
+
3272
+ # it works!
3273
+ pd .concat ([df1 , df2_obj ])
0 commit comments