Skip to content

Commit db1be4d

Browse files
authored
BUG: Series.where casting dt64 to int64 (#38073)
* ENH: support 2D in DatetimeArray._from_sequence * BUG: Series.where casting dt64 to int64 * whatsnew * move whatsnew * use fixture, remove unnecessary check
1 parent d201fcc commit db1be4d

File tree

5 files changed

+75
-14
lines changed

5 files changed

+75
-14
lines changed

doc/source/whatsnew/v1.3.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,8 @@ Datetimelike
191191
- Bug in :meth:`DataFrame.first` and :meth:`Series.first` returning two months for offset one month when first day is last calendar day (:issue:`29623`)
192192
- Bug in constructing a :class:`DataFrame` or :class:`Series` with mismatched ``datetime64`` data and ``timedelta64`` dtype, or vice-versa, failing to raise ``TypeError`` (:issue:`38575`)
193193
- Bug in :meth:`DatetimeIndex.intersection`, :meth:`DatetimeIndex.symmetric_difference`, :meth:`PeriodIndex.intersection`, :meth:`PeriodIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38741`)
194+
- Bug in :meth:`Series.where` incorrectly casting ``datetime64`` values to ``int64`` (:issue:`37682`)
195+
-
194196

195197
Timedelta
196198
^^^^^^^^^

pandas/core/arrays/numpy_.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,8 @@ def __init__(self, values: Union[np.ndarray, "PandasArray"], copy: bool = False)
161161
f"'values' must be a NumPy array, not {type(values).__name__}"
162162
)
163163

164-
if values.ndim != 1:
164+
if values.ndim == 0:
165+
# Technically we support 2, but do not advertise that fact.
165166
raise ValueError("PandasArray must be 1-dimensional.")
166167

167168
if copy:

pandas/core/internals/blocks.py

+38-12
Original file line numberDiff line numberDiff line change
@@ -1332,6 +1332,22 @@ def shift(self, periods: int, axis: int = 0, fill_value=None):
13321332

13331333
return [self.make_block(new_values)]
13341334

1335+
def _maybe_reshape_where_args(self, values, other, cond, axis):
1336+
transpose = self.ndim == 2
1337+
1338+
cond = _extract_bool_array(cond)
1339+
1340+
# If the default broadcasting would go in the wrong direction, then
1341+
# explicitly reshape other instead
1342+
if getattr(other, "ndim", 0) >= 1:
1343+
if values.ndim - 1 == other.ndim and axis == 1:
1344+
other = other.reshape(tuple(other.shape + (1,)))
1345+
elif transpose and values.ndim == self.ndim - 1:
1346+
# TODO(EA2D): not neceesssary with 2D EAs
1347+
cond = cond.T
1348+
1349+
return other, cond
1350+
13351351
def where(
13361352
self, other, cond, errors="raise", try_cast: bool = False, axis: int = 0
13371353
) -> List["Block"]:
@@ -1354,7 +1370,6 @@ def where(
13541370
"""
13551371
import pandas.core.computation.expressions as expressions
13561372

1357-
cond = _extract_bool_array(cond)
13581373
assert not isinstance(other, (ABCIndex, ABCSeries, ABCDataFrame))
13591374

13601375
assert errors in ["raise", "ignore"]
@@ -1365,17 +1380,7 @@ def where(
13651380
if transpose:
13661381
values = values.T
13671382

1368-
# If the default broadcasting would go in the wrong direction, then
1369-
# explicitly reshape other instead
1370-
if getattr(other, "ndim", 0) >= 1:
1371-
if values.ndim - 1 == other.ndim and axis == 1:
1372-
other = other.reshape(tuple(other.shape + (1,)))
1373-
elif transpose and values.ndim == self.ndim - 1:
1374-
# TODO(EA2D): not neceesssary with 2D EAs
1375-
cond = cond.T
1376-
1377-
if not hasattr(cond, "shape"):
1378-
raise ValueError("where must have a condition that is ndarray like")
1383+
other, cond = self._maybe_reshape_where_args(values, other, cond, axis)
13791384

13801385
if cond.ravel("K").all():
13811386
result = values
@@ -2128,6 +2133,26 @@ def to_native_types(self, na_rep="NaT", **kwargs):
21282133
result = arr._format_native_types(na_rep=na_rep, **kwargs)
21292134
return self.make_block(result)
21302135

2136+
def where(
2137+
self, other, cond, errors="raise", try_cast: bool = False, axis: int = 0
2138+
) -> List["Block"]:
2139+
# TODO(EA2D): reshape unnecessary with 2D EAs
2140+
arr = self.array_values().reshape(self.shape)
2141+
2142+
other, cond = self._maybe_reshape_where_args(arr, other, cond, axis)
2143+
2144+
try:
2145+
res_values = arr.T.where(cond, other).T
2146+
except (ValueError, TypeError):
2147+
return super().where(
2148+
other, cond, errors=errors, try_cast=try_cast, axis=axis
2149+
)
2150+
2151+
# TODO(EA2D): reshape not needed with 2D EAs
2152+
res_values = res_values.reshape(self.values.shape)
2153+
nb = self.make_block_same_class(res_values)
2154+
return [nb]
2155+
21312156
def _can_hold_element(self, element: Any) -> bool:
21322157
arr = self.array_values()
21332158

@@ -2196,6 +2221,7 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeBlock):
21962221
fillna = DatetimeBlock.fillna # i.e. Block.fillna
21972222
fill_value = DatetimeBlock.fill_value
21982223
_can_hold_na = DatetimeBlock._can_hold_na
2224+
where = DatetimeBlock.where
21992225

22002226
array_values = ExtensionBlock.array_values
22012227

pandas/tests/arrays/test_array.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ def test_array_inference_fails(data):
278278
tm.assert_extension_array_equal(result, expected)
279279

280280

281-
@pytest.mark.parametrize("data", [np.array([[1, 2], [3, 4]]), [[1, 2], [3, 4]]])
281+
@pytest.mark.parametrize("data", [np.array(0)])
282282
def test_nd_raises(data):
283283
with pytest.raises(ValueError, match="PandasArray must be 1-dimensional"):
284284
pd.array(data, dtype="int64")

pandas/tests/series/indexing/test_where.py

+32
Original file line numberDiff line numberDiff line change
@@ -464,3 +464,35 @@ def test_where_categorical(klass):
464464
df = klass(["A", "A", "B", "B", "C"], dtype="category")
465465
res = df.where(df != "C")
466466
tm.assert_equal(exp, res)
467+
468+
469+
def test_where_datetimelike_categorical(tz_naive_fixture):
470+
# GH#37682
471+
tz = tz_naive_fixture
472+
473+
dr = pd.date_range("2001-01-01", periods=3, tz=tz)._with_freq(None)
474+
lvals = pd.DatetimeIndex([dr[0], dr[1], pd.NaT])
475+
rvals = pd.Categorical([dr[0], pd.NaT, dr[2]])
476+
477+
mask = np.array([True, True, False])
478+
479+
# DatetimeIndex.where
480+
res = lvals.where(mask, rvals)
481+
tm.assert_index_equal(res, dr)
482+
483+
# DatetimeArray.where
484+
res = lvals._data.where(mask, rvals)
485+
tm.assert_datetime_array_equal(res, dr._data)
486+
487+
# Series.where
488+
res = Series(lvals).where(mask, rvals)
489+
tm.assert_series_equal(res, Series(dr))
490+
491+
# DataFrame.where
492+
if tz is None:
493+
res = pd.DataFrame(lvals).where(mask[:, None], pd.DataFrame(rvals))
494+
else:
495+
with pytest.xfail(reason="frame._values loses tz"):
496+
res = pd.DataFrame(lvals).where(mask[:, None], pd.DataFrame(rvals))
497+
498+
tm.assert_frame_equal(res, pd.DataFrame(dr))

0 commit comments

Comments
 (0)