Skip to content

Commit 2dfa309

Browse files
Merge branch 'main' into regular-expression-50465
2 parents 77a8f02 + 83c2a5f commit 2dfa309

File tree

3 files changed

+77
-2
lines changed

3 files changed

+77
-2
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -496,6 +496,7 @@ Other API changes
496496
new DataFrame (shallow copy) instead of the original DataFrame, consistent with other
497497
methods to get a full slice (for example ``df.loc[:]`` or ``df[:]``) (:issue:`49469`)
498498
- Disallow computing ``cumprod`` for :class:`Timedelta` object; previously this returned incorrect values (:issue:`50246`)
499+
- :func:`to_datetime` with ``unit`` of either "Y" or "M" will now raise if a sequence contains a non-round ``float`` value, matching the ``Timestamp`` behavior (:issue:`50301`)
499500
-
500501

501502
.. ---------------------------------------------------------------------------

pandas/_libs/tslib.pyx

+44-2
Original file line numberDiff line numberDiff line change
@@ -263,9 +263,13 @@ def array_with_unit_to_datetime(
263263
ndarray[int64_t] iresult
264264
ndarray[object] oresult
265265
object tz = None
266+
bint is_ym
267+
float fval
266268

267269
assert is_ignore or is_coerce or is_raise
268270

271+
is_ym = unit in "YM"
272+
269273
if unit == "ns":
270274
result, tz = array_to_datetime(
271275
values.astype(object, copy=False),
@@ -290,6 +294,18 @@ def array_with_unit_to_datetime(
290294
if val != val or val == NPY_NAT:
291295
iresult[i] = NPY_NAT
292296
else:
297+
if is_ym and is_float_object(val) and not val.is_integer():
298+
# Analogous to GH#47266 for Timestamp
299+
if is_raise:
300+
raise ValueError(
301+
f"Conversion of non-round float with unit={unit} "
302+
"is ambiguous"
303+
)
304+
elif is_ignore:
305+
raise AssertionError
306+
iresult[i] = NPY_NAT
307+
continue
308+
293309
try:
294310
iresult[i] = cast_from_unit(val, unit)
295311
except OverflowError:
@@ -306,8 +322,33 @@ def array_with_unit_to_datetime(
306322
iresult[i] = NPY_NAT
307323

308324
else:
325+
326+
try:
327+
fval = float(val)
328+
except ValueError:
329+
if is_raise:
330+
raise ValueError(
331+
f"non convertible value {val} with the unit '{unit}'"
332+
)
333+
elif is_ignore:
334+
raise AssertionError
335+
iresult[i] = NPY_NAT
336+
continue
337+
338+
if is_ym and not fval.is_integer():
339+
# Analogous to GH#47266 for Timestamp
340+
if is_raise:
341+
raise ValueError(
342+
f"Conversion of non-round float with unit={unit} "
343+
"is ambiguous"
344+
)
345+
elif is_ignore:
346+
raise AssertionError
347+
iresult[i] = NPY_NAT
348+
continue
349+
309350
try:
310-
iresult[i] = cast_from_unit(float(val), unit)
351+
iresult[i] = cast_from_unit(fval, unit)
311352
except ValueError:
312353
if is_raise:
313354
raise ValueError(
@@ -345,6 +386,7 @@ def array_with_unit_to_datetime(
345386
# and are in ignore mode
346387
# redo as object
347388

389+
# TODO: fix subtle differences between this and no-unit code
348390
oresult = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0)
349391
for i in range(n):
350392
val = values[i]
@@ -357,7 +399,7 @@ def array_with_unit_to_datetime(
357399
oresult[i] = <object>NaT
358400
else:
359401
try:
360-
oresult[i] = Timestamp(cast_from_unit(val, unit))
402+
oresult[i] = Timestamp(val, unit=unit)
361403
except OverflowError:
362404
oresult[i] = val
363405

pandas/tests/tools/test_to_datetime.py

+32
Original file line numberDiff line numberDiff line change
@@ -1498,6 +1498,38 @@ def test_to_datetime_fixed_offset(self):
14981498

14991499

15001500
class TestToDatetimeUnit:
1501+
@pytest.mark.parametrize("unit", ["Y", "M"])
1502+
def test_to_datetime_month_or_year_unit_non_round_float(self, cache, unit):
1503+
# GH#50301
1504+
# Match Timestamp behavior in disallowing non-round floats with
1505+
# Y or M unit
1506+
msg = f"Conversion of non-round float with unit={unit} is ambiguous"
1507+
with pytest.raises(ValueError, match=msg):
1508+
to_datetime([1.5], unit=unit, errors="raise")
1509+
with pytest.raises(ValueError, match=msg):
1510+
to_datetime(["1.5"], unit=unit, errors="raise")
1511+
1512+
# with errors="ignore" we also end up raising within the Timestamp
1513+
# constructor; this may not be ideal
1514+
with pytest.raises(ValueError, match=msg):
1515+
to_datetime([1.5], unit=unit, errors="ignore")
1516+
# TODO: we are NOT consistent with the Timestamp behavior in the
1517+
# float-like string case
1518+
# with pytest.raises(ValueError, match=msg):
1519+
# to_datetime(["1.5"], unit=unit, errors="ignore")
1520+
1521+
res = to_datetime([1.5], unit=unit, errors="coerce")
1522+
expected = Index([NaT], dtype="M8[ns]")
1523+
tm.assert_index_equal(res, expected)
1524+
1525+
res = to_datetime(["1.5"], unit=unit, errors="coerce")
1526+
tm.assert_index_equal(res, expected)
1527+
1528+
# round floats are OK
1529+
res = to_datetime([1.0], unit=unit)
1530+
expected = to_datetime([1], unit=unit)
1531+
tm.assert_index_equal(res, expected)
1532+
15011533
def test_unit(self, cache):
15021534
# GH 11758
15031535
# test proper behavior with errors

0 commit comments

Comments
 (0)