Skip to content

BUG: DataFrame(dt64data, dtype=td64) corner cases #38792

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Dec 31, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,9 +615,12 @@ def _try_cast(arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bo
except OutOfBoundsDatetime:
# in case of out of bound datetime64 -> always raise
raise
except (ValueError, TypeError):
except (ValueError, TypeError) as err:
if dtype is not None and raise_cast_failure:
raise
elif "Cannot cast" in str(err):
# via _disallow_mismatched_datetimelike
raise
else:
subarr = np.array(arr, dtype=object, copy=copy)
return subarr
Expand Down
10 changes: 8 additions & 2 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ def maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar:
return value


def _disallow_mismatched_datetimelike(value: DtypeObj, dtype: DtypeObj):
def _disallow_mismatched_datetimelike(value, dtype: DtypeObj):
"""
numpy allows np.array(dt64values, dtype="timedelta64[ns]") and
vice-versa, but we do not want to allow this, so we need to
Expand Down Expand Up @@ -725,7 +725,11 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj,
dtype = np.dtype(object)

elif isinstance(val, (np.datetime64, datetime)):
val = Timestamp(val)
try:
val = Timestamp(val)
except OutOfBoundsDatetime:
return np.dtype(object), val

if val is NaT or val.tz is None:
dtype = np.dtype("M8[ns]")
else:
Expand Down Expand Up @@ -1472,6 +1476,8 @@ def maybe_cast_to_datetime(value, dtype: Optional[DtypeObj]):

# we have an array of datetime or timedeltas & nulls
elif np.prod(value.shape) or not is_dtype_equal(value.dtype, dtype):
_disallow_mismatched_datetimelike(value, dtype)

try:
if is_datetime64:
value = to_datetime(value, errors="raise")
Expand Down
4 changes: 3 additions & 1 deletion pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,9 @@ def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None):
nan_dtype = np.dtype(object)
else:
nan_dtype = dtype
val = construct_1d_arraylike_from_scalar(np.nan, len(index), nan_dtype)

na_val = arrays[missing].iat[0]
val = construct_1d_arraylike_from_scalar(na_val, len(index), nan_dtype)
arrays.loc[missing] = [val] * missing.sum()

else:
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/frame/methods/test_to_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,3 +379,12 @@ def test_to_records_datetimeindex_with_tz(self, tz):

# both converted to UTC, so they are equal
tm.assert_numpy_array_equal(result, expected)

def test_to_records_timeseries(self):
index = date_range("1/1/2000", periods=10)
df = DataFrame(np.random.randn(10, 3), index=index, columns=["a", "b", "c"])

result = df.to_records()
result["index"].dtype == "M8[ns]"

result = df.to_records(index=False)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you assert the expected (or something about it)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is just moving the existing test to the correct location. i can revert the move as out of scope

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

its fine to move, but if you can an expected value would be good

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reverted this move

24 changes: 7 additions & 17 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2823,15 +2823,6 @@ def test_frame_datetime64_mixed_index_ctor_1681(self):
d = DataFrame({"A": "foo", "B": ts}, index=dr)
assert d["B"].isna().all()

def test_frame_timeseries_to_records(self):
index = date_range("1/1/2000", periods=10)
df = DataFrame(np.random.randn(10, 3), index=index, columns=["a", "b", "c"])

result = df.to_records()
result["index"].dtype == "M8[ns]"

result = df.to_records(index=False)

def test_frame_timeseries_column(self):
# GH19157
dr = date_range(start="20130101T10:00:00", periods=3, freq="T", tz="US/Eastern")
Expand Down Expand Up @@ -2984,14 +2975,7 @@ def test_from_timedelta64_scalar_object(self, constructor, request):
assert isinstance(get1(obj), np.timedelta64)

@pytest.mark.parametrize("cls", [np.datetime64, np.timedelta64])
def test_from_scalar_datetimelike_mismatched(self, constructor, cls, request):
node = request.node
params = node.callspec.params
if params["frame_or_series"] is DataFrame and params["constructor"] is not None:
mark = pytest.mark.xfail(
reason="DataFrame incorrectly allows mismatched datetimelike"
)
node.add_marker(mark)
def test_from_scalar_datetimelike_mismatched(self, constructor, cls):

scalar = cls("NaT", "ns")
dtype = {np.datetime64: "m8[ns]", np.timedelta64: "M8[ns]"}[cls]
Expand All @@ -3002,3 +2986,9 @@ def test_from_scalar_datetimelike_mismatched(self, constructor, cls, request):
scalar = cls(4, "ns")
with pytest.raises(TypeError, match="Cannot cast"):
constructor(scalar, dtype=dtype)

def test_from_out_of_bounds_datetime(self, constructor):
scalar = datetime(9999, 1, 1)
result = constructor(scalar)

assert type(get1(result)) is datetime