Skip to content

Commit 2046cb5

Browse files
committed
BUG/DEPR: combine dtype fixes
1 parent 4a80521 commit 2046cb5

File tree

7 files changed

+410
-227
lines changed

7 files changed

+410
-227
lines changed

doc/source/whatsnew/v0.19.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,7 @@ Deprecations
788788
- ``pandas.tseries.frequencies.get_standard_freq`` is deprecated. Use ``pandas.tseries.frequencies.to_offset(freq).rule_code`` instead. (:issue:`13874`)
789789
- ``pandas.tseries.frequencies.to_offset``'s ``freqstr`` keyword is deprecated in favor of ``freq``. (:issue:`13874`)
790790

791+
791792
.. _whatsnew_0190.prior_deprecations:
792793

793794
Removal of prior version deprecations/changes
@@ -939,6 +940,7 @@ Bug Fixes
939940

940941

941942
- Bug in ``Series`` comparison operators when dealing with zero dim NumPy arrays (:issue:`13006`)
943+
- Bug in ``.combine_first`` may return incorrect ``dtype`` (:issue:`7630`, :issue:`10567`)
942944
- Bug in ``groupby`` where ``apply`` returns different result depending on whether first result is ``None`` or not (:issue:`12824`)
943945
- Bug in ``groupby(..).nth()`` where the group key is included inconsistently if called after ``.head()/.tail()`` (:issue:`12839`)
944946
- Bug in ``.to_html``, ``.to_latex`` and ``.to_string`` silently ignore custom datetime formatter passed through the ``formatters`` key word (:issue:`10690`)

pandas/core/frame.py

+18-8
Original file line numberDiff line numberDiff line change
@@ -31,20 +31,22 @@
3131
_possibly_downcast_to_dtype,
3232
_invalidate_string_dtypes,
3333
_coerce_to_dtypes,
34-
_maybe_upcast_putmask)
34+
_maybe_upcast_putmask,
35+
_find_common_type)
3536
from pandas.types.common import (is_categorical_dtype,
3637
is_object_dtype,
3738
is_extension_type,
3839
is_datetimetz,
3940
is_datetime64_dtype,
41+
is_datetime64tz_dtype,
4042
is_bool_dtype,
4143
is_integer_dtype,
4244
is_float_dtype,
4345
is_integer,
4446
is_scalar,
47+
is_dtype_equal,
4548
needs_i8_conversion,
4649
_get_dtype_from_object,
47-
_lcd_dtypes,
4850
_ensure_float,
4951
_ensure_float64,
5052
_ensure_int64,
@@ -3700,17 +3702,20 @@ def combine(self, other, func, fill_value=None, overwrite=True):
37003702
otherSeries[other_mask] = fill_value
37013703

37023704
# if we have different dtypes, possibily promote
3703-
new_dtype = this_dtype
3704-
if this_dtype != other_dtype:
3705-
new_dtype = _lcd_dtypes(this_dtype, other_dtype)
3706-
series = series.astype(new_dtype)
3705+
if notnull(series).all():
3706+
new_dtype = this_dtype
37073707
otherSeries = otherSeries.astype(new_dtype)
3708+
else:
3709+
new_dtype = _find_common_type([this_dtype, other_dtype])
3710+
if not is_dtype_equal(this_dtype, new_dtype):
3711+
series = series.astype(new_dtype)
3712+
if not is_dtype_equal(other_dtype, new_dtype):
3713+
otherSeries = otherSeries.astype(new_dtype)
37083714

37093715
# see if we need to be represented as i8 (datetimelike)
37103716
# try to keep us at this dtype
37113717
needs_i8_conversion_i = needs_i8_conversion(new_dtype)
37123718
if needs_i8_conversion_i:
3713-
this_dtype = new_dtype
37143719
arr = func(series, otherSeries, True)
37153720
else:
37163721
arr = func(series, otherSeries)
@@ -3721,7 +3726,12 @@ def combine(self, other, func, fill_value=None, overwrite=True):
37213726

37223727
# try to downcast back to the original dtype
37233728
if needs_i8_conversion_i:
3724-
arr = _possibly_cast_to_datetime(arr, this_dtype)
3729+
# ToDo: This conversion should be handled in
3730+
# _possibly_cast_to_datetime but the change affects lot...
3731+
if is_datetime64tz_dtype(new_dtype):
3732+
arr = DatetimeIndex._simple_new(arr, tz=new_dtype.tz)
3733+
else:
3734+
arr = _possibly_cast_to_datetime(arr, new_dtype)
37253735
else:
37263736
arr = _possibly_downcast_to_dtype(arr, this_dtype)
37273737

0 commit comments

Comments
 (0)