@@ -103,6 +103,7 @@ def test_combine_first_mixed_bug(self):
103
103
combined = frame1 .combine_first (frame2 )
104
104
assert len (combined .columns ) == 5
105
105
106
+ def test_combine_first_same_as_in_update (self ):
106
107
# gh 3016 (same as in update)
107
108
df = DataFrame (
108
109
[[1.0 , 2.0 , False , True ], [4.0 , 5.0 , True , False ]],
@@ -118,6 +119,7 @@ def test_combine_first_mixed_bug(self):
118
119
df .loc [0 , "A" ] = 45
119
120
tm .assert_frame_equal (result , df )
120
121
122
+ def test_combine_first_doc_example (self ):
121
123
# doc example
122
124
df1 = DataFrame (
123
125
{"A" : [1.0 , np .nan , 3.0 , 5.0 , np .nan ], "B" : [np .nan , 2.0 , 3.0 , np .nan , 6.0 ]}
@@ -134,38 +136,56 @@ def test_combine_first_mixed_bug(self):
134
136
expected = DataFrame ({"A" : [1 , 2 , 3 , 5 , 3 , 7.0 ], "B" : [np .nan , 2 , 3 , 4 , 6 , 8 ]})
135
137
tm .assert_frame_equal (result , expected )
136
138
137
- # GH3552, return object dtype with bools
139
+ def test_combine_first_return_obj_type_with_bools (self ):
140
+ # GH3552
141
+
138
142
df1 = DataFrame (
139
143
[[np .nan , 3.0 , True ], [- 4.6 , np .nan , True ], [np .nan , 7.0 , False ]]
140
144
)
141
145
df2 = DataFrame ([[- 42.6 , np .nan , True ], [- 5.0 , 1.6 , False ]], index = [1 , 2 ])
142
146
143
- result = df1 .combine_first (df2 )[2 ]
144
- expected = Series ([True , True , False ], name = 2 )
145
- tm .assert_series_equal (result , expected )
146
-
147
- # GH 3593, converting datetime64[ns] incorrectly
148
- df0 = DataFrame (
149
- {"a" : [datetime (2000 , 1 , 1 ), datetime (2000 , 1 , 2 ), datetime (2000 , 1 , 3 )]}
150
- )
151
- df1 = DataFrame ({"a" : [None , None , None ]})
152
- df2 = df1 .combine_first (df0 )
153
- tm .assert_frame_equal (df2 , df0 )
154
-
155
- df2 = df0 .combine_first (df1 )
156
- tm .assert_frame_equal (df2 , df0 )
157
-
158
- df0 = DataFrame (
159
- {"a" : [datetime (2000 , 1 , 1 ), datetime (2000 , 1 , 2 ), datetime (2000 , 1 , 3 )]}
160
- )
161
- df1 = DataFrame ({"a" : [datetime (2000 , 1 , 2 ), None , None ]})
162
- df2 = df1 .combine_first (df0 )
163
- result = df0 .copy ()
164
- result .iloc [0 , :] = df1 .iloc [0 , :]
165
- tm .assert_frame_equal (df2 , result )
147
+ expected = Series ([True , True , False ], name = 2 , dtype = object )
148
+
149
+ result_12 = df1 .combine_first (df2 )[2 ]
150
+ tm .assert_series_equal (result_12 , expected )
151
+
152
+ result_21 = df2 .combine_first (df1 )[2 ]
153
+ tm .assert_series_equal (result_21 , expected )
154
+
155
+ @pytest .mark .parametrize (
156
+ "data1, data2, data_expected" ,
157
+ (
158
+ (
159
+ [datetime (2000 , 1 , 1 ), datetime (2000 , 1 , 2 ), datetime (2000 , 1 , 3 )],
160
+ [None , None , None ],
161
+ [datetime (2000 , 1 , 1 ), datetime (2000 , 1 , 2 ), datetime (2000 , 1 , 3 )],
162
+ ),
163
+ (
164
+ [None , None , None ],
165
+ [datetime (2000 , 1 , 1 ), datetime (2000 , 1 , 2 ), datetime (2000 , 1 , 3 )],
166
+ [datetime (2000 , 1 , 1 ), datetime (2000 , 1 , 2 ), datetime (2000 , 1 , 3 )],
167
+ ),
168
+ (
169
+ [datetime (2000 , 1 , 2 ), None , None ],
170
+ [datetime (2000 , 1 , 1 ), datetime (2000 , 1 , 2 ), datetime (2000 , 1 , 3 )],
171
+ [datetime (2000 , 1 , 2 ), datetime (2000 , 1 , 2 ), datetime (2000 , 1 , 3 )],
172
+ ),
173
+ (
174
+ [datetime (2000 , 1 , 1 ), datetime (2000 , 1 , 2 ), datetime (2000 , 1 , 3 )],
175
+ [datetime (2000 , 1 , 2 ), None , None ],
176
+ [datetime (2000 , 1 , 1 ), datetime (2000 , 1 , 2 ), datetime (2000 , 1 , 3 )],
177
+ ),
178
+ ),
179
+ )
180
+ def test_combine_first_convert_datatime_correctly (
181
+ self , data1 , data2 , data_expected
182
+ ):
183
+ # GH 3593
166
184
167
- df2 = df0 .combine_first (df1 )
168
- tm .assert_frame_equal (df2 , df0 )
185
+ df1 , df2 = DataFrame ({"a" : data1 }), DataFrame ({"a" : data2 })
186
+ result = df1 .combine_first (df2 )
187
+ expected = DataFrame ({"a" : data_expected })
188
+ tm .assert_frame_equal (result , expected )
169
189
170
190
def test_combine_first_align_nan (self ):
171
191
# GH 7509 (not fixed)
@@ -339,9 +359,14 @@ def test_combine_first_int(self):
339
359
df1 = DataFrame ({"a" : [0 , 1 , 3 , 5 ]}, dtype = "int64" )
340
360
df2 = DataFrame ({"a" : [1 , 4 ]}, dtype = "int64" )
341
361
342
- res = df1 .combine_first (df2 )
343
- tm .assert_frame_equal (res , df1 )
344
- assert res ["a" ].dtype == "int64"
362
+ result_12 = df1 .combine_first (df2 )
363
+ expected_12 = DataFrame ({"a" : [0 , 1 , 3 , 5 ]}, dtype = "float64" )
364
+ tm .assert_frame_equal (result_12 , expected_12 )
365
+
366
+ result_21 = df2 .combine_first (df1 )
367
+ expected_21 = DataFrame ({"a" : [1 , 4 , 3 , 5 ]}, dtype = "float64" )
368
+
369
+ tm .assert_frame_equal (result_21 , expected_21 )
345
370
346
371
@pytest .mark .parametrize ("val" , [1 , 1.0 ])
347
372
def test_combine_first_with_asymmetric_other (self , val ):
@@ -367,6 +392,26 @@ def test_combine_first_string_dtype_only_na(self):
367
392
tm .assert_frame_equal (result , expected )
368
393
369
394
395
+ @pytest .mark .parametrize (
396
+ "scalar1, scalar2" ,
397
+ [
398
+ (datetime (2020 , 1 , 1 ), datetime (2020 , 1 , 2 )),
399
+ (pd .Period ("2020-01-01" , "D" ), pd .Period ("2020-01-02" , "D" )),
400
+ (pd .Timedelta ("89 days" ), pd .Timedelta ("60 min" )),
401
+ (pd .Interval (left = 0 , right = 1 ), pd .Interval (left = 2 , right = 3 , closed = "left" )),
402
+ ],
403
+ )
404
+ def test_combine_first_timestamp_bug (scalar1 , scalar2 , nulls_fixture ):
405
+ # GH28481
406
+ na_value = nulls_fixture
407
+ frame = DataFrame ([[na_value , na_value ]], columns = ["a" , "b" ])
408
+ other = DataFrame ([[scalar1 , scalar2 ]], columns = ["b" , "c" ])
409
+
410
+ result = frame .combine_first (other )
411
+ expected = DataFrame ([[na_value , scalar1 , scalar2 ]], columns = ["a" , "b" , "c" ])
412
+ tm .assert_frame_equal (result , expected )
413
+
414
+
370
415
def test_combine_first_with_nan_multiindex ():
371
416
# gh-36562
372
417
0 commit comments