From abd0dc9c22a088b05ae5c34416e51821e2ec38cd Mon Sep 17 00:00:00 2001 From: David Krych Date: Thu, 14 Jun 2018 15:12:37 -0400 Subject: [PATCH 1/4] TENTATIVE: Fix for combine_first; tests for combine_first and where. No solution yet for where, and combine_first might require cleanup. --- pandas/core/series.py | 6 +++++- pandas/tests/series/indexing/test_boolean.py | 13 +++++++++++++ pandas/tests/series/test_combine_concat.py | 13 +++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 0450f28087f66..01f2a908c9f7f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2305,7 +2305,11 @@ def combine_first(self, other): # TODO: do we need name? name = ops.get_op_result_name(self, other) # noqa rs_vals = com._where_compat(isna(this), other._values, this._values) - return self._constructor(rs_vals, index=new_index).__finalize__(self) + result = self._constructor(rs_vals, index=new_index).__finalize__(self) + # TODO DK can we simplify this using internals rather than public astype + if is_datetime64tz_dtype(self.dtype): + result = result.astype(self.dtype) + return result def update(self, other): """ diff --git a/pandas/tests/series/indexing/test_boolean.py b/pandas/tests/series/indexing/test_boolean.py index 2aef0df5349cb..0c83bf292c321 100644 --- a/pandas/tests/series/indexing/test_boolean.py +++ b/pandas/tests/series/indexing/test_boolean.py @@ -551,6 +551,19 @@ def test_where_datetime_conversion(): assert_series_equal(rs, expected) +def test_where_dt_tz_values(self): + dts1 = pd.date_range('20150101', '20150105', tz='America/New_York') + df1 = pd.DataFrame({'date':dts1}) + dts2 = pd.date_range('20150103', '20150107', tz='America/New_York') + df2 = pd.DataFrame({'date':dts2}) + result = df1.date.where(df1.date < df1.date[3], df2.date) + exp_vals = pd.DatetimeIndex(['20150101', '20150102', '20150103', + '20150106', '20150107'], + tz='America/New_York') + exp = Series(exp_vals) + assert_series_equal(exp, result) + + def test_mask(): # compare with tested results in test_where s = Series(np.random.randn(5)) diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index f35cce6ac9d71..0f0d12825327e 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -170,6 +170,19 @@ def get_result_type(dtype, dtype2): ]).dtype assert result.kind == expected + def test_combine_first_dt_tz_values(self): + dts1 = pd.date_range('20150101', '20150105', tz='America/New_York') + df1 = pd.DataFrame({'date':dts1}) + dts2 = pd.date_range('20160514', '20160518', tz='America/New_York') + df2 = pd.DataFrame({'date':dts2}, index=range(3, 8)) + result = df1.date.combine_first(df2.date) + exp_vals = pd.DatetimeIndex(['20150101', '20150102', '20150103', + '20150104', '20150105', '20160516', + '20160517', '20160518'], + tz='America/New_York') + exp = pd.Series(exp_vals, name='date') + assert_series_equal(exp, result) + def test_concat_empty_series_dtypes(self): # booleans From 952513145eaf8a5d60c5b85e0173928208dabd4a Mon Sep 17 00:00:00 2001 From: David Krych Date: Thu, 14 Jun 2018 15:13:20 -0400 Subject: [PATCH 2/4] PEP8 fixes --- pandas/core/series.py | 3 ++- pandas/tests/series/indexing/test_boolean.py | 4 ++-- pandas/tests/series/test_combine_concat.py | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 01f2a908c9f7f..6907a00b3518f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -2306,7 +2306,8 @@ def combine_first(self, other): name = ops.get_op_result_name(self, other) # noqa rs_vals = com._where_compat(isna(this), other._values, this._values) result = self._constructor(rs_vals, index=new_index).__finalize__(self) - # TODO DK can we simplify this using internals rather than public astype + # TODO DK can we simplify this using internals rather than public + # astype if is_datetime64tz_dtype(self.dtype): result = result.astype(self.dtype) return result diff --git a/pandas/tests/series/indexing/test_boolean.py b/pandas/tests/series/indexing/test_boolean.py index 0c83bf292c321..1b31b9164aaf2 100644 --- a/pandas/tests/series/indexing/test_boolean.py +++ b/pandas/tests/series/indexing/test_boolean.py @@ -553,9 +553,9 @@ def test_where_datetime_conversion(): def test_where_dt_tz_values(self): dts1 = pd.date_range('20150101', '20150105', tz='America/New_York') - df1 = pd.DataFrame({'date':dts1}) + df1 = pd.DataFrame({'date': dts1}) dts2 = pd.date_range('20150103', '20150107', tz='America/New_York') - df2 = pd.DataFrame({'date':dts2}) + df2 = pd.DataFrame({'date': dts2}) result = df1.date.where(df1.date < df1.date[3], df2.date) exp_vals = pd.DatetimeIndex(['20150101', '20150102', '20150103', '20150106', '20150107'], diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index 0f0d12825327e..d3e4720a756f1 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -172,9 +172,9 @@ def get_result_type(dtype, dtype2): def test_combine_first_dt_tz_values(self): dts1 = pd.date_range('20150101', '20150105', tz='America/New_York') - df1 = pd.DataFrame({'date':dts1}) + df1 = pd.DataFrame({'date': dts1}) dts2 = pd.date_range('20160514', '20160518', tz='America/New_York') - df2 = pd.DataFrame({'date':dts2}, index=range(3, 8)) + df2 = pd.DataFrame({'date': dts2}, index=range(3, 8)) result = df1.date.combine_first(df2.date) exp_vals = pd.DatetimeIndex(['20150101', '20150102', '20150103', '20150104', '20150105', '20160516', From 26338638952ac81ec7cf174e270d14ad841450cc Mon Sep 17 00:00:00 2001 From: David Krych Date: Fri, 15 Jun 2018 13:23:53 -0400 Subject: [PATCH 3/4] Pattern Series.combine_first on DataFrame.combine_first DataFrame.combine_first was fixed in #13970. Fix Series using same logic. Remove common._where_compat since it was only referred to by Series.combine_first, name was confusing, and not necessary to the new solution. --- pandas/core/common.py | 13 ------------- pandas/core/series.py | 31 +++++++++++++++++++++++-------- 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 1de8269c9a0c6..ec516d9d80023 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -410,19 +410,6 @@ def _apply_if_callable(maybe_callable, obj, **kwargs): return maybe_callable -def _where_compat(mask, arr1, arr2): - if arr1.dtype == _NS_DTYPE and arr2.dtype == _NS_DTYPE: - new_vals = np.where(mask, arr1.view('i8'), arr2.view('i8')) - return new_vals.view(_NS_DTYPE) - - if arr1.dtype == _NS_DTYPE: - arr1 = tslib.ints_to_pydatetime(arr1.view('i8')) - if arr2.dtype == _NS_DTYPE: - arr2 = tslib.ints_to_pydatetime(arr2.view('i8')) - - return np.where(mask, arr1, arr2) - - def _dict_compat(d): """ Helper function to convert datetimelike-keyed dicts to Timestamp-keyed dict diff --git a/pandas/core/series.py b/pandas/core/series.py index 6907a00b3518f..801a8ddeb67f7 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -29,13 +29,16 @@ is_hashable, is_iterator, is_dict_like, + is_dtype_equal, is_scalar, _is_unorderable_exception, _ensure_platform_int, - pandas_dtype) + pandas_dtype, + needs_i8_conversion) from pandas.core.dtypes.generic import ( ABCSparseArray, ABCDataFrame, ABCIndexClass) from pandas.core.dtypes.cast import ( + find_common_type, maybe_downcast_to_dtype, maybe_upcast, infer_dtype_from_scalar, maybe_convert_platform, maybe_cast_to_datetime, maybe_castable, @@ -2304,13 +2307,25 @@ def combine_first(self, other): other = other.reindex(new_index, copy=False) # TODO: do we need name? name = ops.get_op_result_name(self, other) # noqa - rs_vals = com._where_compat(isna(this), other._values, this._values) - result = self._constructor(rs_vals, index=new_index).__finalize__(self) - # TODO DK can we simplify this using internals rather than public - # astype - if is_datetime64tz_dtype(self.dtype): - result = result.astype(self.dtype) - return result + if not is_dtype_equal(this.dtype, other.dtype): + new_dtype = find_common_type([this.dtype, other.dtype]) + if not is_dtype_equal(this.dtype, new_dtype): + this = this.astype(new_dtype) + if not is_dtype_equal(other.dtype, new_dtype): + other = other.astype(new_dtype) + + if needs_i8_conversion(this.dtype): + mask = isna(this) + this_values = this.values.view('i8') + other_values = other.values.view('i8') + else: + this_values = this.values + other_values = other.values + mask = isna(this_values) + + rs_vals = np.where(mask, other_values, this_values) + rs_vals = maybe_downcast_to_dtype(rs_vals, this.dtype) + return self._constructor(rs_vals, index=new_index).__finalize__(self) def update(self, other): """ From 2f4d393d6e0b7cfeed0691c3230ea6ece418bb20 Mon Sep 17 00:00:00 2001 From: David Krych Date: Tue, 19 Jun 2018 11:18:04 -0400 Subject: [PATCH 4/4] Remove where unit test-- separate issue. Where is broken for DT TZ on both DataFrame and Series and uses a separate code path. Should open a new issue for this. --- pandas/tests/series/indexing/test_boolean.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/pandas/tests/series/indexing/test_boolean.py b/pandas/tests/series/indexing/test_boolean.py index 1b31b9164aaf2..2aef0df5349cb 100644 --- a/pandas/tests/series/indexing/test_boolean.py +++ b/pandas/tests/series/indexing/test_boolean.py @@ -551,19 +551,6 @@ def test_where_datetime_conversion(): assert_series_equal(rs, expected) -def test_where_dt_tz_values(self): - dts1 = pd.date_range('20150101', '20150105', tz='America/New_York') - df1 = pd.DataFrame({'date': dts1}) - dts2 = pd.date_range('20150103', '20150107', tz='America/New_York') - df2 = pd.DataFrame({'date': dts2}) - result = df1.date.where(df1.date < df1.date[3], df2.date) - exp_vals = pd.DatetimeIndex(['20150101', '20150102', '20150103', - '20150106', '20150107'], - tz='America/New_York') - exp = Series(exp_vals) - assert_series_equal(exp, result) - - def test_mask(): # compare with tested results in test_where s = Series(np.random.randn(5))