diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index a30d68319cafe..b9ee5d5590861 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -630,6 +630,7 @@ Other API changes Deprecations ~~~~~~~~~~~~ - Deprecated argument ``infer_datetime_format`` in :func:`to_datetime` and :func:`read_csv`, as a strict version of it is now the default (:issue:`48621`) +- Deprecated behavior of :func:`to_datetime` with ``unit`` when parsing strings, in a future version these will be parsed as datetimes (matching unit-less behavior) instead of cast to floats. To retain the old behavior, cast strings to numeric types before calling :func:`to_datetime` (:issue:`50735`) - Deprecated :func:`pandas.io.sql.execute` (:issue:`50185`) - :meth:`Index.is_boolean` has been deprecated. Use :func:`pandas.api.types.is_bool_dtype` instead (:issue:`50042`) - :meth:`Index.is_integer` has been deprecated. Use :func:`pandas.api.types.is_integer_dtype` instead (:issue:`50042`) @@ -637,6 +638,7 @@ Deprecations - :meth:`Index.holds_integer` has been deprecated. Use :func:`pandas.api.types.infer_dtype` instead (:issue:`50243`) - :meth:`Index.is_categorical` has been deprecated. Use :func:`pandas.api.types.is_categorical_dtype` instead (:issue:`50042`) - :meth:`Index.is_interval` has been deprecated. Use :func:`pandas.api.types.is_intterval_dtype` instead (:issue:`50042`) +- .. --------------------------------------------------------------------------- .. _whatsnew_200.prior_deprecations: diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 2d31eeaa53a61..9d9b93f274c60 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -1,3 +1,7 @@ +import warnings + +from pandas.util._exceptions import find_stack_level + cimport cython from datetime import timezone @@ -303,6 +307,16 @@ def array_with_unit_to_datetime( raise ValueError( f"non convertible value {val} with the unit '{unit}'" ) + warnings.warn( + "The behavior of 'to_datetime' with 'unit' when parsing " + "strings is deprecated. In a future version, strings will " + "be parsed as datetime strings, matching the behavior " + "without a 'unit'. To retain the old behavior, explicitly " + "cast ints or floats to numeric type before calling " + "to_datetime.", + FutureWarning, + stacklevel=find_stack_level(), + ) iresult[i] = cast_from_unit(fval, unit) diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index afb0be0729344..fb64e089d53a8 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -1222,7 +1222,9 @@ def _try_convert_to_date(self, data): if new_data.dtype == "object": try: new_data = data.astype("int64") - except (TypeError, ValueError, OverflowError): + except OverflowError: + return data, False + except (TypeError, ValueError): pass # ignore numbers that are out of range diff --git a/pandas/tests/groupby/test_value_counts.py b/pandas/tests/groupby/test_value_counts.py index 3d1228d65ac7c..ae4b74fc814da 100644 --- a/pandas/tests/groupby/test_value_counts.py +++ b/pandas/tests/groupby/test_value_counts.py @@ -135,9 +135,7 @@ def test_series_groupby_value_counts_with_grouper(utc): } ).drop([3]) - df["Datetime"] = to_datetime( - df["Timestamp"].apply(lambda t: str(t)), utc=utc, unit="s" - ) + df["Datetime"] = to_datetime(df["Timestamp"], utc=utc, unit="s") dfg = df.groupby(Grouper(freq="1D", key="Datetime")) # have to sort on index because of unstable sort on values xref GH9212 @@ -1010,9 +1008,7 @@ def test_value_counts_time_grouper(utc): } ).drop([3]) - df["Datetime"] = to_datetime( - df["Timestamp"].apply(lambda t: str(t)), utc=utc, unit="s" - ) + df["Datetime"] = to_datetime(df["Timestamp"], utc=utc, unit="s") gb = df.groupby(Grouper(freq="1D", key="Datetime")) result = gb.value_counts() dates = to_datetime( diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index dfbe78e53de40..a1217b268613a 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1723,11 +1723,13 @@ def test_to_datetime_month_or_year_unit_non_round_float(self, cache, unit): # GH#50301 # Match Timestamp behavior in disallowing non-round floats with # Y or M unit + warn_msg = "strings will be parsed as datetime strings" msg = f"Conversion of non-round float with unit={unit} is ambiguous" with pytest.raises(ValueError, match=msg): to_datetime([1.5], unit=unit, errors="raise") with pytest.raises(ValueError, match=msg): - to_datetime(["1.5"], unit=unit, errors="raise") + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + to_datetime(["1.5"], unit=unit, errors="raise") # with errors="ignore" we also end up raising within the Timestamp # constructor; this may not be ideal @@ -1742,7 +1744,8 @@ def test_to_datetime_month_or_year_unit_non_round_float(self, cache, unit): expected = Index([NaT], dtype="M8[ns]") tm.assert_index_equal(res, expected) - res = to_datetime(["1.5"], unit=unit, errors="coerce") + with tm.assert_produces_warning(FutureWarning, match=warn_msg): + res = to_datetime(["1.5"], unit=unit, errors="coerce") tm.assert_index_equal(res, expected) # round floats are OK