Skip to content

BUG: DatetimeArray._from_sequence accepting bool dtype #32668

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Mar 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ Datetimelike
- Bug in :class:`Timestamp` where constructing :class:`Timestamp` with dateutil timezone less than 128 nanoseconds before daylight saving time switch from winter to summer would result in nonexistent time (:issue:`31043`)
- Bug in :meth:`Period.to_timestamp`, :meth:`Period.start_time` with microsecond frequency returning a timestamp one nanosecond earlier than the correct time (:issue:`31475`)
- :class:`Timestamp` raising confusing error message when year, month or day is missing (:issue:`31200`)
- Bug in :class:`DatetimeIndex` constructor incorrectly accepting ``bool``-dtyped inputs (:issue:`32668`)

Timedelta
^^^^^^^^^
Expand Down
15 changes: 10 additions & 5 deletions pandas/core/arrays/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from pandas.core.dtypes.common import (
_INT64_DTYPE,
_NS_DTYPE,
is_bool_dtype,
is_categorical_dtype,
is_datetime64_any_dtype,
is_datetime64_dtype,
Expand Down Expand Up @@ -1903,32 +1904,36 @@ def maybe_convert_dtype(data, copy):
------
TypeError : PeriodDType data is passed
"""
if is_float_dtype(data):
if not hasattr(data, "dtype"):
# e.g. collections.deque
return data, copy

if is_float_dtype(data.dtype):
# Note: we must cast to datetime64[ns] here in order to treat these
# as wall-times instead of UTC timestamps.
data = data.astype(_NS_DTYPE)
copy = False
# TODO: deprecate this behavior to instead treat symmetrically
# with integer dtypes. See discussion in GH#23675

elif is_timedelta64_dtype(data):
elif is_timedelta64_dtype(data.dtype) or is_bool_dtype(data.dtype):
# GH#29794 enforcing deprecation introduced in GH#23539
raise TypeError(f"dtype {data.dtype} cannot be converted to datetime64[ns]")
elif is_period_dtype(data):
elif is_period_dtype(data.dtype):
# Note: without explicitly raising here, PeriodIndex
# test_setops.test_join_does_not_recur fails
raise TypeError(
"Passing PeriodDtype data is invalid. Use `data.to_timestamp()` instead"
)

elif is_categorical_dtype(data):
elif is_categorical_dtype(data.dtype):
# GH#18664 preserve tz in going DTI->Categorical->DTI
# TODO: cases where we need to do another pass through this func,
# e.g. the categories are timedelta64s
data = data.categories.take(data.codes, fill_value=NaT)._values
copy = False

elif is_extension_array_dtype(data) and not is_datetime64tz_dtype(data):
elif is_extension_array_dtype(data.dtype) and not is_datetime64tz_dtype(data.dtype):
# Includes categorical
# TODO: We have no tests for these
data = np.array(data, dtype=np.object_)
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2666,9 +2666,9 @@ def combine(self, other, func, fill_value=None) -> "Series":
new_values = [func(lv, other) for lv in self._values]
new_name = self.name

if is_categorical_dtype(self.values):
if is_categorical_dtype(self.dtype):
pass
elif is_extension_array_dtype(self.values):
elif is_extension_array_dtype(self.dtype):
# The function can return something of any type, so check
# if the type is compatible with the calling EA.
new_values = try_cast_to_ea(self._values, new_values)
Expand Down
13 changes: 12 additions & 1 deletion pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,18 @@ def _convert_listlike_datetimes(
# warn if passing timedelta64, raise for PeriodDtype
# NB: this must come after unit transformation
orig_arg = arg
arg, _ = maybe_convert_dtype(arg, copy=False)
try:
arg, _ = maybe_convert_dtype(arg, copy=False)
except TypeError:
if errors == "coerce":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually do we have coverage for all of these paths? i seem to recall we using similar code elsewhere?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, we have tests in tests.tools.test_to_datetime that hit all of these cases

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

kk

also maybe able to de-duplicate some of this code

result = np.array(["NaT"], dtype="datetime64[ns]").repeat(len(arg))
return DatetimeIndex(result, name=name)
elif errors == "ignore":
from pandas import Index

result = Index(arg, name=name)
return result
raise

arg = ensure_object(arg)
require_iso8601 = False
Expand Down
19 changes: 17 additions & 2 deletions pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,26 @@ def test_non_array_raises(self):
with pytest.raises(ValueError, match="list"):
DatetimeArray([1, 2, 3])

def test_other_type_raises(self):
def test_bool_dtype_raises(self):
arr = np.array([1, 2, 3], dtype="bool")

with pytest.raises(
ValueError, match="The dtype of 'values' is incorrect.*bool"
):
DatetimeArray(np.array([1, 2, 3], dtype="bool"))
DatetimeArray(arr)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is this a ValueError when DatetimeArray._from_sequence(arr) is a TypeError? Is is also worth considering making error messages consistent?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

originally i made it type error, then changed for consistenty with... something, but i dont remember what. will try to figure that out and report bakc


msg = r"dtype bool cannot be converted to datetime64\[ns\]"
with pytest.raises(TypeError, match=msg):
DatetimeArray._from_sequence(arr)

with pytest.raises(TypeError, match=msg):
sequence_to_dt64ns(arr)

with pytest.raises(TypeError, match=msg):
pd.DatetimeIndex(arr)

with pytest.raises(TypeError, match=msg):
pd.to_datetime(arr)

def test_incorrect_dtype_raises(self):
with pytest.raises(ValueError, match="Unexpected value for 'dtype'."):
Expand Down