diff --git a/pandas/_libs/lib.pyi b/pandas/_libs/lib.pyi index c51492c92f44c..188494c7c60db 100644 --- a/pandas/_libs/lib.pyi +++ b/pandas/_libs/lib.pyi @@ -169,9 +169,6 @@ def ensure_string_array( copy: bool = ..., skipna: bool = ..., ) -> npt.NDArray[np.object_]: ... -def infer_datetimelike_array( - arr: npt.NDArray[np.object_], -) -> str: ... def convert_nans_to_NA( arr: npt.NDArray[np.object_], ) -> npt.NDArray[np.object_]: ... diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 7880709418adc..3769bbf087fee 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1565,99 +1565,6 @@ def infer_dtype(value: object, skipna: bool = True) -> str: return "mixed" -def infer_datetimelike_array(arr: ndarray[object]) -> tuple[str, bool]: - """ - Infer if we have a datetime or timedelta array. - - date: we have *only* date and maybe strings, nulls - - datetime: we have *only* datetimes and maybe strings, nulls - - timedelta: we have *only* timedeltas and maybe strings, nulls - - nat: we do not have *any* date, datetimes or timedeltas, but do have - at least a NaT - - mixed: other objects (strings, a mix of tz-aware and tz-naive, or - actual objects) - - Parameters - ---------- - arr : ndarray[object] - - Returns - ------- - str: {datetime, timedelta, date, nat, mixed} - """ - cdef: - Py_ssize_t i, n = len(arr) - bint seen_timedelta = False, seen_date = False, seen_datetime = False - bint seen_tz_aware = False, seen_tz_naive = False - bint seen_nat = False - bint seen_period = False, seen_interval = False - object v - - for i in range(n): - v = arr[i] - if isinstance(v, str): - return "mixed" - - elif v is None or util.is_nan(v): - # nan or None - pass - elif v is NaT: - seen_nat = True - elif PyDateTime_Check(v): - # datetime - seen_datetime = True - - # disambiguate between tz-naive and tz-aware - if v.tzinfo is None: - seen_tz_naive = True - else: - seen_tz_aware = True - - if seen_tz_naive and seen_tz_aware: - return "mixed" - elif util.is_datetime64_object(v): - # np.datetime64 - seen_datetime = True - elif PyDate_Check(v): - seen_date = True - elif is_timedelta(v): - # timedelta, or timedelta64 - seen_timedelta = True - elif is_period_object(v): - seen_period = True - break - elif is_interval(v): - seen_interval = True - break - else: - return "mixed" - - if seen_period: - if is_period_array(arr): - return "period" - return "mixed" - - if seen_interval: - if is_interval_array(arr): - return "interval" - return "mixed" - - if seen_date: - if not seen_datetime and not seen_timedelta: - return "date" - return "mixed" - - elif seen_datetime and not seen_timedelta: - return "datetime" - elif seen_timedelta and not seen_datetime: - return "timedelta" - elif seen_datetime and seen_timedelta: - return "mixed" - elif seen_nat: - return "nat" - - return "mixed" - - cdef inline bint is_timedelta(object o): return PyDelta_Check(o) or util.is_timedelta64_object(o) diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index dd4e801af5894..a5d762a280566 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -771,7 +771,7 @@ def _infer_types( result = BooleanArray(result, bool_mask) elif result.dtype == np.object_ and use_nullable_dtypes: # read_excel sends array of datetime objects - inferred_type = lib.infer_datetimelike_array(result) + inferred_type = lib.infer_dtype(result) if inferred_type != "datetime": result = StringDtype().construct_array_type()._from_sequence(values) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 50fe8379ffa06..df2afad51abf8 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1354,79 +1354,6 @@ def test_infer_dtype_period_with_na(self, na_value): arr = np.array([na_value, Period("2011-01", freq="D"), na_value]) assert lib.infer_dtype(arr, skipna=True) == "period" - @pytest.mark.parametrize( - "data", - [ - [datetime(2017, 6, 12, 19, 30), datetime(2017, 3, 11, 1, 15)], - [Timestamp("20170612"), Timestamp("20170311")], - [ - Timestamp("20170612", tz="US/Eastern"), - Timestamp("20170311", tz="US/Eastern"), - ], - [np.datetime64("2017-06-12"), np.datetime64("2017-03-11")], - [np.datetime64("2017-06-12"), datetime(2017, 3, 11, 1, 15)], - ], - ) - def test_infer_datetimelike_array_datetime(self, data): - assert lib.infer_datetimelike_array(data) == "datetime" - - def test_infer_datetimelike_array_date_mixed(self): - # GH49341 pre-2.0 we these were inferred as "datetime" and "timedelta", - # respectively - data = [date(2017, 6, 12), Timestamp("20170311", tz="US/Eastern")] - assert lib.infer_datetimelike_array(data) == "mixed" - - data = ([timedelta(2017, 6, 12), date(2017, 3, 11)],) - assert lib.infer_datetimelike_array(data) == "mixed" - - @pytest.mark.parametrize( - "data", - [ - [timedelta(2017, 6, 12), timedelta(2017, 3, 11)], - [np.timedelta64(2017, "D"), np.timedelta64(6, "s")], - [np.timedelta64(2017, "D"), timedelta(2017, 3, 11)], - ], - ) - def test_infer_datetimelike_array_timedelta(self, data): - assert lib.infer_datetimelike_array(data) == "timedelta" - - def test_infer_datetimelike_array_date(self): - arr = [date(2017, 6, 12), date(2017, 3, 11)] - assert lib.infer_datetimelike_array(arr) == "date" - - @pytest.mark.parametrize( - "data", - [ - ["2017-06-12", "2017-03-11"], - [20170612, 20170311], - [20170612.5, 20170311.8], - [Dummy(), Dummy()], - [Timestamp("20170612"), Timestamp("20170311", tz="US/Eastern")], - [Timestamp("20170612"), 20170311], - [timedelta(2017, 6, 12), Timestamp("20170311", tz="US/Eastern")], - ], - ) - def test_infer_datetimelike_array_mixed(self, data): - assert lib.infer_datetimelike_array(data) == "mixed" - - @pytest.mark.parametrize( - "first, expected", - [ - [[None], "mixed"], - [[np.nan], "mixed"], - [[pd.NaT], "nat"], - [[datetime(2017, 6, 12, 19, 30), pd.NaT], "datetime"], - [[np.datetime64("2017-06-12"), pd.NaT], "datetime"], - [[date(2017, 6, 12), pd.NaT], "date"], - [[timedelta(2017, 6, 12), pd.NaT], "timedelta"], - [[np.timedelta64(2017, "D"), pd.NaT], "timedelta"], - ], - ) - @pytest.mark.parametrize("second", [None, np.nan]) - def test_infer_datetimelike_array_nan_nat_like(self, first, second, expected): - first.append(second) - assert lib.infer_datetimelike_array(first) == expected - def test_infer_dtype_all_nan_nat_like(self): arr = np.array([np.nan, np.nan]) assert lib.infer_dtype(arr, skipna=True) == "floating"