Skip to content

API: Series([pydate, pydatetime]) #49446

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ Other API changes
- The ``other`` argument in :meth:`DataFrame.mask` and :meth:`Series.mask` now defaults to ``no_default`` instead of ``np.nan`` consistent with :meth:`DataFrame.where` and :meth:`Series.where`. Entries will be filled with the corresponding NULL value (``np.nan`` for numpy dtypes, ``pd.NA`` for extension dtypes). (:issue:`49111`)
- When creating a :class:`Series` with a object-dtype :class:`Index` of datetime objects, pandas no longer silently converts the index to a :class:`DatetimeIndex` (:issue:`39307`, :issue:`23598`)
- :meth:`Series.unique` with dtype "timedelta64[ns]" or "datetime64[ns]" now returns :class:`TimedeltaArray` or :class:`DatetimeArray` instead of ``numpy.ndarray`` (:issue:`49176`)
- Passing a sequence containing ``datetime`` objects and ``date`` objects to :class:`Series` constructor will return with ``object`` dtype instead of ``datetime64[ns]`` dtype, consistent with :class:`Index` behavior (:issue:`49341`)
- Passing strings that cannot be parsed as datetimes to :class:`Series` or :class:`DataFrame` with ``dtype="datetime64[ns]"`` will raise instead of silently ignoring the keyword and returning ``object`` dtype (:issue:`24435`)
-

Expand Down
18 changes: 13 additions & 5 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1640,8 +1640,11 @@ def infer_datetimelike_array(arr: ndarray[object]) -> tuple[str, bool]:
return "interval"
return "mixed"

if seen_date and not (seen_datetime or seen_timedelta):
return "date"
if seen_date:
if not seen_datetime and not seen_timedelta:
return "date"
return "mixed"

elif seen_datetime and not seen_timedelta:
return "datetime"
elif seen_timedelta and not seen_datetime:
Expand Down Expand Up @@ -2570,10 +2573,15 @@ def maybe_convert_objects(ndarray[object] objects,
if seen.datetimetz_:
if is_datetime_with_singletz_array(objects):
from pandas import DatetimeIndex
dti = DatetimeIndex(objects)

# unbox to DatetimeArray
return dti._data
try:
dti = DatetimeIndex(objects)
except OutOfBoundsDatetime:
# e.g. test_to_datetime_cache_coerce_50_lines_outofbounds
pass
else:
# unbox to DatetimeArray
return dti._data
seen.object_ = True

elif seen.datetime_:
Expand Down
60 changes: 13 additions & 47 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1224,71 +1224,37 @@ def maybe_infer_to_datetimelike(

v = np.array(value, copy=False)

shape = v.shape
if v.ndim != 1:
v = v.ravel()

if not len(v):
return value

def try_datetime(v: np.ndarray) -> np.ndarray | DatetimeArray:
# Coerce to datetime64, datetime64tz, or in corner cases
# object[datetimes]
from pandas.core.arrays.datetimes import sequence_to_datetimes

try:
dta = sequence_to_datetimes(v)
except (ValueError, OutOfBoundsDatetime):
# ValueError for e.g. mixed tzs
# GH#19761 we may have mixed timezones, in which cast 'dta' is
# an ndarray[object]. Only 1 test
# relies on this behavior, see GH#40111
return v.reshape(shape)
else:
return dta.reshape(shape)

def try_timedelta(v: np.ndarray) -> np.ndarray:
# safe coerce to timedelta64

# will try first with a string & object conversion
try:
# bc we know v.dtype == object, this is equivalent to
# `np.asarray(to_timedelta(v))`, but using a lower-level API that
# does not require a circular import.
td_values = array_to_timedelta64(v).view("m8[ns]")
except OutOfBoundsTimedelta:
return v.reshape(shape)
else:
return td_values.reshape(shape)

# TODO: this is _almost_ equivalent to lib.maybe_convert_objects,
# the main differences are described in GH#49340 and GH#49341
# and maybe_convert_objects doesn't catch OutOfBoundsDatetime
inferred_type = lib.infer_datetimelike_array(ensure_object(v))

if inferred_type in ["period", "interval"]:
if inferred_type in ["period", "interval", "timedelta", "datetime"]:
# Incompatible return value type (got "Union[ExtensionArray, ndarray]",
# expected "Union[ndarray, DatetimeArray, TimedeltaArray, PeriodArray,
# IntervalArray]")
return lib.maybe_convert_objects( # type: ignore[return-value]
v, convert_period=True, convert_interval=True
v,
convert_period=True,
convert_interval=True,
convert_timedelta=True,
convert_datetime=True,
dtype_if_all_nat=np.dtype("M8[ns]"),
)

if inferred_type == "datetime":
# Incompatible types in assignment (expression has type
# "Union[ndarray[Any, Any], DatetimeArray]", variable has type
# "ndarray[Any, Any]")
value = try_datetime(v) # type: ignore[assignment]
elif inferred_type == "timedelta":
value = try_timedelta(v)
elif inferred_type == "nat":
# if all NaT, return as datetime
# only reached if we have at least 1 NaT and the rest (NaT or None or np.nan)
# This is slightly different from what we'd get with maybe_convert_objects,
# which only converts of all-NaT
from pandas.core.arrays.datetimes import sequence_to_datetimes

# Incompatible types in assignment (expression has type
# "Union[ndarray[Any, Any], DatetimeArray]", variable has type
# "ndarray[Any, Any]")
value = try_datetime(v) # type: ignore[assignment]
# Incompatible types in assignment (expression has type "DatetimeArray",
# variable has type "ndarray[Any, Any]")
value = sequence_to_datetimes(v) # type: ignore[assignment]
assert value.dtype == "M8[ns]"

return value
Expand Down
11 changes: 9 additions & 2 deletions pandas/tests/dtypes/test_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -1340,19 +1340,26 @@ def test_infer_dtype_period_with_na(self, na_value):
Timestamp("20170612", tz="US/Eastern"),
Timestamp("20170311", tz="US/Eastern"),
],
[date(2017, 6, 12), Timestamp("20170311", tz="US/Eastern")],
[np.datetime64("2017-06-12"), np.datetime64("2017-03-11")],
[np.datetime64("2017-06-12"), datetime(2017, 3, 11, 1, 15)],
],
)
def test_infer_datetimelike_array_datetime(self, data):
assert lib.infer_datetimelike_array(data) == "datetime"

def test_infer_datetimelike_array_date_mixed(self):
# GH49341 pre-2.0 we these were inferred as "datetime" and "timedelta",
# respectively
data = [date(2017, 6, 12), Timestamp("20170311", tz="US/Eastern")]
assert lib.infer_datetimelike_array(data) == "mixed"

data = ([timedelta(2017, 6, 12), date(2017, 3, 11)],)
assert lib.infer_datetimelike_array(data) == "mixed"

@pytest.mark.parametrize(
"data",
[
[timedelta(2017, 6, 12), timedelta(2017, 3, 11)],
[timedelta(2017, 6, 12), date(2017, 3, 11)],
[np.timedelta64(2017, "D"), np.timedelta64(6, "s")],
[np.timedelta64(2017, "D"), timedelta(2017, 3, 11)],
],
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,20 @@


class TestSeriesConstructors:
def test_infer_with_date_and_datetime(self):
# GH#49341 pre-2.0 we inferred datetime-and-date to datetime64, which
# was inconsistent with Index behavior
ts = Timestamp(2016, 1, 1)
vals = [ts.to_pydatetime(), ts.date()]

ser = Series(vals)
expected = Series(vals, dtype=object)
tm.assert_series_equal(ser, expected)

idx = Index(vals)
expected = Index(vals, dtype=object)
tm.assert_index_equal(idx, expected)

def test_unparseable_strings_with_dt64_dtype(self):
# pre-2.0 these would be silently ignored and come back with object dtype
vals = ["aa"]
Expand Down