Skip to content

Commit 8448d39

Browse files
authored
DEPR: casting strings to float in to_datetime with unit (#50909)
1 parent fd9a9ea commit 8448d39

File tree

5 files changed

+26
-9
lines changed

5 files changed

+26
-9
lines changed

doc/source/whatsnew/v2.0.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -630,13 +630,15 @@ Other API changes
630630
Deprecations
631631
~~~~~~~~~~~~
632632
- Deprecated argument ``infer_datetime_format`` in :func:`to_datetime` and :func:`read_csv`, as a strict version of it is now the default (:issue:`48621`)
633+
- Deprecated behavior of :func:`to_datetime` with ``unit`` when parsing strings, in a future version these will be parsed as datetimes (matching unit-less behavior) instead of cast to floats. To retain the old behavior, cast strings to numeric types before calling :func:`to_datetime` (:issue:`50735`)
633634
- Deprecated :func:`pandas.io.sql.execute` (:issue:`50185`)
634635
- :meth:`Index.is_boolean` has been deprecated. Use :func:`pandas.api.types.is_bool_dtype` instead (:issue:`50042`)
635636
- :meth:`Index.is_integer` has been deprecated. Use :func:`pandas.api.types.is_integer_dtype` instead (:issue:`50042`)
636637
- :meth:`Index.is_floating` has been deprecated. Use :func:`pandas.api.types.is_float_dtype` instead (:issue:`50042`)
637638
- :meth:`Index.holds_integer` has been deprecated. Use :func:`pandas.api.types.infer_dtype` instead (:issue:`50243`)
638639
- :meth:`Index.is_categorical` has been deprecated. Use :func:`pandas.api.types.is_categorical_dtype` instead (:issue:`50042`)
639640
- :meth:`Index.is_interval` has been deprecated. Use :func:`pandas.api.types.is_intterval_dtype` instead (:issue:`50042`)
641+
-
640642

641643
.. ---------------------------------------------------------------------------
642644
.. _whatsnew_200.prior_deprecations:

pandas/_libs/tslib.pyx

+14
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
import warnings
2+
3+
from pandas.util._exceptions import find_stack_level
4+
15
cimport cython
26

37
from datetime import timezone
@@ -303,6 +307,16 @@ def array_with_unit_to_datetime(
303307
raise ValueError(
304308
f"non convertible value {val} with the unit '{unit}'"
305309
)
310+
warnings.warn(
311+
"The behavior of 'to_datetime' with 'unit' when parsing "
312+
"strings is deprecated. In a future version, strings will "
313+
"be parsed as datetime strings, matching the behavior "
314+
"without a 'unit'. To retain the old behavior, explicitly "
315+
"cast ints or floats to numeric type before calling "
316+
"to_datetime.",
317+
FutureWarning,
318+
stacklevel=find_stack_level(),
319+
)
306320

307321
iresult[i] = cast_from_unit(fval, unit)
308322

pandas/io/json/_json.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1222,7 +1222,9 @@ def _try_convert_to_date(self, data):
12221222
if new_data.dtype == "object":
12231223
try:
12241224
new_data = data.astype("int64")
1225-
except (TypeError, ValueError, OverflowError):
1225+
except OverflowError:
1226+
return data, False
1227+
except (TypeError, ValueError):
12261228
pass
12271229

12281230
# ignore numbers that are out of range

pandas/tests/groupby/test_value_counts.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -135,9 +135,7 @@ def test_series_groupby_value_counts_with_grouper(utc):
135135
}
136136
).drop([3])
137137

138-
df["Datetime"] = to_datetime(
139-
df["Timestamp"].apply(lambda t: str(t)), utc=utc, unit="s"
140-
)
138+
df["Datetime"] = to_datetime(df["Timestamp"], utc=utc, unit="s")
141139
dfg = df.groupby(Grouper(freq="1D", key="Datetime"))
142140

143141
# have to sort on index because of unstable sort on values xref GH9212
@@ -1010,9 +1008,7 @@ def test_value_counts_time_grouper(utc):
10101008
}
10111009
).drop([3])
10121010

1013-
df["Datetime"] = to_datetime(
1014-
df["Timestamp"].apply(lambda t: str(t)), utc=utc, unit="s"
1015-
)
1011+
df["Datetime"] = to_datetime(df["Timestamp"], utc=utc, unit="s")
10161012
gb = df.groupby(Grouper(freq="1D", key="Datetime"))
10171013
result = gb.value_counts()
10181014
dates = to_datetime(

pandas/tests/tools/test_to_datetime.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -1723,11 +1723,13 @@ def test_to_datetime_month_or_year_unit_non_round_float(self, cache, unit):
17231723
# GH#50301
17241724
# Match Timestamp behavior in disallowing non-round floats with
17251725
# Y or M unit
1726+
warn_msg = "strings will be parsed as datetime strings"
17261727
msg = f"Conversion of non-round float with unit={unit} is ambiguous"
17271728
with pytest.raises(ValueError, match=msg):
17281729
to_datetime([1.5], unit=unit, errors="raise")
17291730
with pytest.raises(ValueError, match=msg):
1730-
to_datetime(["1.5"], unit=unit, errors="raise")
1731+
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
1732+
to_datetime(["1.5"], unit=unit, errors="raise")
17311733

17321734
# with errors="ignore" we also end up raising within the Timestamp
17331735
# constructor; this may not be ideal
@@ -1742,7 +1744,8 @@ def test_to_datetime_month_or_year_unit_non_round_float(self, cache, unit):
17421744
expected = Index([NaT], dtype="M8[ns]")
17431745
tm.assert_index_equal(res, expected)
17441746

1745-
res = to_datetime(["1.5"], unit=unit, errors="coerce")
1747+
with tm.assert_produces_warning(FutureWarning, match=warn_msg):
1748+
res = to_datetime(["1.5"], unit=unit, errors="coerce")
17461749
tm.assert_index_equal(res, expected)
17471750

17481751
# round floats are OK

0 commit comments

Comments
 (0)