Skip to content

Commit 8b2070a

Browse files
Liam3851jreback
authored andcommitted
BUG: Issues with DatetimeTZ values in where and combine_first (pandas-dev#21469 + pandas-dev#21546) (pandas-dev#21660)
1 parent 823478c commit 8b2070a

File tree

8 files changed

+64
-28
lines changed

8 files changed

+64
-28
lines changed

doc/source/whatsnew/v0.24.0.txt

+8-2
Original file line numberDiff line numberDiff line change
@@ -255,7 +255,6 @@ Timezones
255255
- Bug in :class:`Series` constructor which would coerce tz-aware and tz-naive :class:`Timestamp`s to tz-aware (:issue:`13051`)
256256
- Bug in :class:`Index` with ``datetime64[ns, tz]`` dtype that did not localize integer data correctly (:issue:`20964`)
257257
- Bug in :class:`DatetimeIndex` where constructing with an integer and tz would not localize correctly (:issue:`12619`)
258-
- Bug in :func:`DataFrame.fillna` where a ``ValueError`` would raise when one column contained a ``datetime64[ns, tz]`` dtype (:issue:`15522`)
259258

260259
Offsets
261260
^^^^^^^
@@ -288,13 +287,18 @@ Indexing
288287
^^^^^^^^
289288

290289
- The traceback from a ``KeyError`` when asking ``.loc`` for a single missing label is now shorter and more clear (:issue:`21557`)
291-
- When ``.ix`` is asked for a missing integer label in a :class:`MultiIndex` with a first level of integer type, it now raises a ``KeyError`` - consistently with the case of a flat :class:`Int64Index` - rather than falling back to positional indexing (:issue:`21593`)
290+
- When ``.ix`` is asked for a missing integer label in a :class:`MultiIndex` with a first level of integer type, it now raises a ``KeyError``, consistently with the case of a flat :class:`Int64Index, rather than falling back to positional indexing (:issue:`21593`)
292291
- Bug in :meth:`DatetimeIndex.reindex` when reindexing a tz-naive and tz-aware :class:`DatetimeIndex` (:issue:`8306`)
293292
- Bug in :class:`DataFrame` when setting values with ``.loc`` and a timezone aware :class:`DatetimeIndex` (:issue:`11365`)
294293
- Bug when indexing :class:`DatetimeIndex` with nanosecond resolution dates and timezones (:issue:`11679`)
295294

296295
-
297296

297+
Missing
298+
^^^^^^^
299+
300+
- Bug in :func:`DataFrame.fillna` where a ``ValueError`` would raise when one column contained a ``datetime64[ns, tz]`` dtype (:issue:`15522`)
301+
298302
MultiIndex
299303
^^^^^^^^^^
300304

@@ -335,6 +339,8 @@ Reshaping
335339
^^^^^^^^^
336340

337341
- Bug in :func:`pandas.concat` when joining resampled DataFrames with timezone aware index (:issue:`13783`)
342+
- Bug in :meth:`Series.combine_first` with ``datetime64[ns, tz]`` dtype which would return tz-naive result (:issue:`21469`)
343+
- Bug in :meth:`Series.where` and :meth:`DataFrame.where` with ``datetime64[ns, tz]`` dtype (:issue:`21546`)
338344
-
339345
-
340346

pandas/core/common.py

+1-14
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from pandas.compat import long, zip, iteritems, PY36, OrderedDict
1515
from pandas.core.config import get_option
1616
from pandas.core.dtypes.generic import ABCSeries, ABCIndex
17-
from pandas.core.dtypes.common import _NS_DTYPE, is_integer
17+
from pandas.core.dtypes.common import is_integer
1818
from pandas.core.dtypes.inference import _iterable_not_string
1919
from pandas.core.dtypes.missing import isna, isnull, notnull # noqa
2020
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
@@ -410,19 +410,6 @@ def _apply_if_callable(maybe_callable, obj, **kwargs):
410410
return maybe_callable
411411

412412

413-
def _where_compat(mask, arr1, arr2):
414-
if arr1.dtype == _NS_DTYPE and arr2.dtype == _NS_DTYPE:
415-
new_vals = np.where(mask, arr1.view('i8'), arr2.view('i8'))
416-
return new_vals.view(_NS_DTYPE)
417-
418-
if arr1.dtype == _NS_DTYPE:
419-
arr1 = tslib.ints_to_pydatetime(arr1.view('i8'))
420-
if arr2.dtype == _NS_DTYPE:
421-
arr2 = tslib.ints_to_pydatetime(arr2.view('i8'))
422-
423-
return np.where(mask, arr1, arr2)
424-
425-
426413
def _dict_compat(d):
427414
"""
428415
Helper function to convert datetimelike-keyed dicts to Timestamp-keyed dict

pandas/core/internals.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -1476,14 +1476,16 @@ def where(self, other, cond, align=True, errors='raise',
14761476
if transpose:
14771477
values = values.T
14781478

1479-
other = getattr(other, 'values', other)
1479+
other = getattr(other, '_values', getattr(other, 'values', other))
14801480
cond = getattr(cond, 'values', cond)
14811481

14821482
# If the default broadcasting would go in the wrong direction, then
14831483
# explicitly reshape other instead
14841484
if getattr(other, 'ndim', 0) >= 1:
14851485
if values.ndim - 1 == other.ndim and axis == 1:
14861486
other = other.reshape(tuple(other.shape + (1, )))
1487+
elif transpose and values.ndim == self.ndim - 1:
1488+
cond = cond.T
14871489

14881490
if not hasattr(cond, 'shape'):
14891491
raise ValueError("where must have a condition that is ndarray "
@@ -2888,8 +2890,8 @@ def _try_coerce_args(self, values, other):
28882890
elif isinstance(other, self._holder):
28892891
if other.tz != self.values.tz:
28902892
raise ValueError("incompatible or non tz-aware value")
2891-
other = other.asi8
2892-
other_mask = isna(other)
2893+
other_mask = _block_shape(isna(other), ndim=self.ndim)
2894+
other = _block_shape(other.asi8, ndim=self.ndim)
28932895
elif isinstance(other, (np.datetime64, datetime, date)):
28942896
other = tslib.Timestamp(other)
28952897
tz = getattr(other, 'tz', None)

pandas/core/series.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
is_float_dtype,
2323
is_extension_type,
2424
is_extension_array_dtype,
25+
is_datetimelike,
2526
is_datetime64tz_dtype,
2627
is_timedelta64_dtype,
2728
is_object_dtype,
@@ -78,6 +79,7 @@
7879
from pandas._libs import index as libindex, tslib as libts, lib, iNaT
7980
from pandas.core.config import get_option
8081
from pandas.core.strings import StringMethods
82+
from pandas.core.tools.datetimes import to_datetime
8183

8284
import pandas.plotting._core as gfx
8385

@@ -2303,10 +2305,10 @@ def combine_first(self, other):
23032305
new_index = self.index.union(other.index)
23042306
this = self.reindex(new_index, copy=False)
23052307
other = other.reindex(new_index, copy=False)
2306-
# TODO: do we need name?
2307-
name = ops.get_op_result_name(self, other) # noqa
2308-
rs_vals = com._where_compat(isna(this), other._values, this._values)
2309-
return self._constructor(rs_vals, index=new_index).__finalize__(self)
2308+
if is_datetimelike(this) and not is_datetimelike(other):
2309+
other = to_datetime(other)
2310+
2311+
return this.where(notna(this), other)
23102312

23112313
def update(self, other):
23122314
"""

pandas/tests/frame/test_indexing.py

+14
Original file line numberDiff line numberDiff line change
@@ -2936,6 +2936,20 @@ def test_where_callable(self):
29362936
tm.assert_frame_equal(result,
29372937
(df + 2).where((df + 2) > 8, (df + 2) + 10))
29382938

2939+
def test_where_tz_values(self, tz_naive_fixture):
2940+
df1 = DataFrame(DatetimeIndex(['20150101', '20150102', '20150103'],
2941+
tz=tz_naive_fixture),
2942+
columns=['date'])
2943+
df2 = DataFrame(DatetimeIndex(['20150103', '20150104', '20150105'],
2944+
tz=tz_naive_fixture),
2945+
columns=['date'])
2946+
mask = DataFrame([True, True, False], columns=['date'])
2947+
exp = DataFrame(DatetimeIndex(['20150101', '20150102', '20150105'],
2948+
tz=tz_naive_fixture),
2949+
columns=['date'])
2950+
result = df1.where(mask, df2)
2951+
assert_frame_equal(exp, result)
2952+
29392953
def test_mask(self):
29402954
df = DataFrame(np.random.randn(5, 3))
29412955
cond = df > 0

pandas/tests/indexing/test_coercion.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -580,12 +580,11 @@ def test_where_series_datetime64(self, fill_val, exp_dtype):
580580
values = pd.Series(pd.date_range(fill_val, periods=4))
581581
if fill_val.tz:
582582
exp = pd.Series([pd.Timestamp('2011-01-01'),
583-
pd.Timestamp('2012-01-02 05:00'),
583+
pd.Timestamp('2012-01-02 00:00', tz='US/Eastern'),
584584
pd.Timestamp('2011-01-03'),
585-
pd.Timestamp('2012-01-04 05:00')])
586-
self._assert_where_conversion(obj, cond, values, exp,
587-
'datetime64[ns]')
588-
pytest.xfail("ToDo: do not coerce to UTC, must be object")
585+
pd.Timestamp('2012-01-04 00:00',
586+
tz='US/Eastern')])
587+
self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
589588

590589
exp = pd.Series([pd.Timestamp('2011-01-01'), values[1],
591590
pd.Timestamp('2011-01-03'), values[3]])

pandas/tests/series/indexing/test_boolean.py

+12
Original file line numberDiff line numberDiff line change
@@ -551,6 +551,18 @@ def test_where_datetime_conversion():
551551
assert_series_equal(rs, expected)
552552

553553

554+
def test_where_dt_tz_values(tz_naive_fixture):
555+
ser1 = pd.Series(pd.DatetimeIndex(['20150101', '20150102', '20150103'],
556+
tz=tz_naive_fixture))
557+
ser2 = pd.Series(pd.DatetimeIndex(['20160514', '20160515', '20160516'],
558+
tz=tz_naive_fixture))
559+
mask = pd.Series([True, True, False])
560+
result = ser1.where(mask, ser2)
561+
exp = pd.Series(pd.DatetimeIndex(['20150101', '20150102', '20160516'],
562+
tz=tz_naive_fixture))
563+
assert_series_equal(exp, result)
564+
565+
554566
def test_mask():
555567
# compare with tested results in test_where
556568
s = Series(np.random.randn(5))

pandas/tests/series/test_combine_concat.py

+14
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,20 @@ def get_result_type(dtype, dtype2):
170170
]).dtype
171171
assert result.kind == expected
172172

173+
def test_combine_first_dt_tz_values(self, tz_naive_fixture):
174+
ser1 = pd.Series(pd.DatetimeIndex(['20150101', '20150102', '20150103'],
175+
tz=tz_naive_fixture),
176+
name='ser1')
177+
ser2 = pd.Series(pd.DatetimeIndex(['20160514', '20160515', '20160516'],
178+
tz=tz_naive_fixture),
179+
index=[2, 3, 4], name='ser2')
180+
result = ser1.combine_first(ser2)
181+
exp_vals = pd.DatetimeIndex(['20150101', '20150102', '20150103',
182+
'20160515', '20160516'],
183+
tz=tz_naive_fixture)
184+
exp = pd.Series(exp_vals, name='ser1')
185+
assert_series_equal(exp, result)
186+
173187
def test_concat_empty_series_dtypes(self):
174188

175189
# booleans

0 commit comments

Comments
 (0)