Skip to content

Commit 8e4f050

Browse files
phoflcbpygit
authored andcommitted
DEPR: Deprecate dtype inference on pandas objects (pandas-dev#56244)
1 parent 819bcce commit 8e4f050

File tree

16 files changed

+141
-26
lines changed

16 files changed

+141
-26
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,7 @@ Other Deprecations
555555
- Deprecated behavior of :meth:`Index.insert` with an object-dtype index silently performing type inference on the result, explicitly call ``result.infer_objects(copy=False)`` for the old behavior instead (:issue:`51363`)
556556
- Deprecated casting non-datetimelike values (mainly strings) in :meth:`Series.isin` and :meth:`Index.isin` with ``datetime64``, ``timedelta64``, and :class:`PeriodDtype` dtypes (:issue:`53111`)
557557
- Deprecated downcasting behavior in :meth:`Series.where`, :meth:`DataFrame.where`, :meth:`Series.mask`, :meth:`DataFrame.mask`, :meth:`Series.clip`, :meth:`DataFrame.clip`; in a future version these will not infer object-dtype columns to non-object dtype, or all-round floats to integer dtype. Call ``result.infer_objects(copy=False)`` on the result for object inference, or explicitly cast floats to ints. To opt in to the future version, use ``pd.set_option("future.no_silent_downcasting", True)`` (:issue:`53656`)
558+
- Deprecated dtype inference in :class:`Index`, :class:`Series` and :class:`DataFrame` constructors when giving a pandas input, call ``.infer_objects`` on the input to keep the current behavior (:issue:`56012`)
558559
- Deprecated dtype inference when setting a :class:`Index` into a :class:`DataFrame`, cast explicitly instead (:issue:`56102`)
559560
- Deprecated including the groups in computations when using :meth:`.DataFrameGroupBy.apply` and :meth:`.DataFrameGroupBy.resample`; pass ``include_groups=False`` to exclude the groups (:issue:`7155`)
560561
- Deprecated indexing an :class:`Index` with a boolean indexer of length zero (:issue:`55820`)

pandas/_testing/__init__.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
ContextManager,
1111
cast,
1212
)
13+
import warnings
1314

1415
import numpy as np
1516

@@ -285,11 +286,17 @@ def box_expected(expected, box_cls, transpose: bool = True):
285286
else:
286287
expected = pd.array(expected, copy=False)
287288
elif box_cls is Index:
288-
expected = Index(expected)
289+
with warnings.catch_warnings():
290+
warnings.filterwarnings("ignore", "Dtype inference", category=FutureWarning)
291+
expected = Index(expected)
289292
elif box_cls is Series:
290-
expected = Series(expected)
293+
with warnings.catch_warnings():
294+
warnings.filterwarnings("ignore", "Dtype inference", category=FutureWarning)
295+
expected = Series(expected)
291296
elif box_cls is DataFrame:
292-
expected = Series(expected).to_frame()
297+
with warnings.catch_warnings():
298+
warnings.filterwarnings("ignore", "Dtype inference", category=FutureWarning)
299+
expected = Series(expected).to_frame()
293300
if transpose:
294301
# for vector operations, we need a DataFrame to be a single-row,
295302
# not a single-column, in order to operate against non-DataFrame

pandas/core/frame.py

+16
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,10 @@ def __init__(
722722

723723
manager = _get_option("mode.data_manager", silent=True)
724724

725+
is_pandas_object = isinstance(data, (Series, Index, ExtensionArray))
726+
data_dtype = getattr(data, "dtype", None)
727+
original_dtype = dtype
728+
725729
# GH47215
726730
if isinstance(index, set):
727731
raise ValueError("index cannot be a set")
@@ -908,6 +912,18 @@ def __init__(
908912

909913
NDFrame.__init__(self, mgr)
910914

915+
if original_dtype is None and is_pandas_object and data_dtype == np.object_:
916+
if self.dtypes.iloc[0] != data_dtype:
917+
warnings.warn(
918+
"Dtype inference on a pandas object "
919+
"(Series, Index, ExtensionArray) is deprecated. The DataFrame "
920+
"constructor will keep the original dtype in the future. "
921+
"Call `infer_objects` on the result to get the old "
922+
"behavior.",
923+
FutureWarning,
924+
stacklevel=2,
925+
)
926+
911927
# ----------------------------------------------------------------------
912928

913929
def __dataframe__(

pandas/core/indexes/base.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,8 @@ def __new__(
493493
if not copy and isinstance(data, (ABCSeries, Index)):
494494
refs = data._references
495495

496+
is_pandas_object = isinstance(data, (ABCSeries, Index, ExtensionArray))
497+
496498
# range
497499
if isinstance(data, (range, RangeIndex)):
498500
result = RangeIndex(start=data, copy=copy, name=name)
@@ -572,7 +574,19 @@ def __new__(
572574
klass = cls._dtype_to_subclass(arr.dtype)
573575

574576
arr = klass._ensure_array(arr, arr.dtype, copy=False)
575-
return klass._simple_new(arr, name, refs=refs)
577+
result = klass._simple_new(arr, name, refs=refs)
578+
if dtype is None and is_pandas_object and data_dtype == np.object_:
579+
if result.dtype != data_dtype:
580+
warnings.warn(
581+
"Dtype inference on a pandas object "
582+
"(Series, Index, ExtensionArray) is deprecated. The Index "
583+
"constructor will keep the original dtype in the future. "
584+
"Call `infer_objects` on the result to get the old "
585+
"behavior.",
586+
FutureWarning,
587+
stacklevel=2,
588+
)
589+
return result # type: ignore[return-value]
576590

577591
@classmethod
578592
def _ensure_array(cls, data, dtype, copy: bool):

pandas/core/series.py

+15
Original file line numberDiff line numberDiff line change
@@ -424,6 +424,10 @@ def __init__(
424424
self.name = name
425425
return
426426

427+
is_pandas_object = isinstance(data, (Series, Index, ExtensionArray))
428+
data_dtype = getattr(data, "dtype", None)
429+
original_dtype = dtype
430+
427431
if isinstance(data, (ExtensionArray, np.ndarray)):
428432
if copy is not False and using_copy_on_write():
429433
if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)):
@@ -581,6 +585,17 @@ def __init__(
581585
self.name = name
582586
self._set_axis(0, index)
583587

588+
if original_dtype is None and is_pandas_object and data_dtype == np.object_:
589+
if self.dtype != data_dtype:
590+
warnings.warn(
591+
"Dtype inference on a pandas object "
592+
"(Series, Index, ExtensionArray) is deprecated. The Series "
593+
"constructor will keep the original dtype in the future. "
594+
"Call `infer_objects` on the result to get the old behavior.",
595+
FutureWarning,
596+
stacklevel=find_stack_level(),
597+
)
598+
584599
def _init_dict(
585600
self, data, index: Index | None = None, dtype: DtypeObj | None = None
586601
):

pandas/core/strings/accessor.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -689,19 +689,18 @@ def cat(
689689
result = cat_safe(all_cols, sep)
690690

691691
out: Index | Series
692+
if isinstance(self._orig.dtype, CategoricalDtype):
693+
# We need to infer the new categories.
694+
dtype = self._orig.dtype.categories.dtype
695+
else:
696+
dtype = self._orig.dtype
692697
if isinstance(self._orig, ABCIndex):
693698
# add dtype for case that result is all-NA
694-
dtype = None
695699
if isna(result).all():
696-
dtype = object
700+
dtype = object # type: ignore[assignment]
697701

698702
out = Index(result, dtype=dtype, name=self._orig.name)
699703
else: # Series
700-
if isinstance(self._orig.dtype, CategoricalDtype):
701-
# We need to infer the new categories.
702-
dtype = self._orig.dtype.categories.dtype # type: ignore[assignment]
703-
else:
704-
dtype = self._orig.dtype
705704
res_ser = Series(
706705
result, dtype=dtype, index=data.index, name=self._orig.name, copy=False
707706
)

pandas/tests/copy_view/test_constructors.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -314,7 +314,8 @@ def test_dataframe_from_series_or_index_different_dtype(using_copy_on_write, con
314314

315315
def test_dataframe_from_series_infer_datetime(using_copy_on_write):
316316
ser = Series([Timestamp("2019-12-31"), Timestamp("2020-12-31")], dtype=object)
317-
df = DataFrame(ser)
317+
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
318+
df = DataFrame(ser)
318319
assert not np.shares_memory(get_array(ser), get_array(df, 0))
319320
if using_copy_on_write:
320321
assert df._mgr._has_no_reference(0)

pandas/tests/frame/test_constructors.py

+17
Original file line numberDiff line numberDiff line change
@@ -2768,6 +2768,23 @@ def test_frame_string_inference_block_dim(self):
27682768
df = DataFrame(np.array([["hello", "goodbye"], ["hello", "Hello"]]))
27692769
assert df._mgr.blocks[0].ndim == 2
27702770

2771+
def test_inference_on_pandas_objects(self):
2772+
# GH#56012
2773+
idx = Index([Timestamp("2019-12-31")], dtype=object)
2774+
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
2775+
result = DataFrame(idx, columns=["a"])
2776+
assert result.dtypes.iloc[0] != np.object_
2777+
result = DataFrame({"a": idx})
2778+
assert result.dtypes.iloc[0] == np.object_
2779+
2780+
ser = Series([Timestamp("2019-12-31")], dtype=object)
2781+
2782+
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
2783+
result = DataFrame(ser, columns=["a"])
2784+
assert result.dtypes.iloc[0] != np.object_
2785+
result = DataFrame({"a": ser})
2786+
assert result.dtypes.iloc[0] == np.object_
2787+
27712788

27722789
class TestDataFrameConstructorIndexInference:
27732790
def test_frame_from_dict_of_series_overlapping_monthly_period_indexes(self):

pandas/tests/indexes/base_class/test_constructors.py

+14
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from pandas import (
66
Index,
77
MultiIndex,
8+
Series,
89
)
910
import pandas._testing as tm
1011

@@ -57,3 +58,16 @@ def test_index_string_inference(self):
5758
with pd.option_context("future.infer_string", True):
5859
ser = Index(["a", 1])
5960
tm.assert_index_equal(ser, expected)
61+
62+
def test_inference_on_pandas_objects(self):
63+
# GH#56012
64+
idx = Index([pd.Timestamp("2019-12-31")], dtype=object)
65+
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
66+
result = Index(idx)
67+
assert result.dtype != np.object_
68+
69+
ser = Series([pd.Timestamp("2019-12-31")], dtype=object)
70+
71+
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
72+
result = Index(ser)
73+
assert result.dtype != np.object_

pandas/tests/indexes/test_base.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,8 @@ def test_constructor_copy(self, index, using_infer_string):
104104
)
105105
def test_constructor_from_index_dtlike(self, cast_as_obj, index):
106106
if cast_as_obj:
107-
result = Index(index.astype(object))
107+
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
108+
result = Index(index.astype(object))
108109
else:
109110
result = Index(index)
110111

pandas/tests/series/accessors/test_dt_accessor.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -259,9 +259,9 @@ def test_dt_accessor_limited_display_api(self):
259259
tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods)))
260260

261261
# Period
262-
ser = Series(
263-
period_range("20130101", periods=5, freq="D", name="xxx").astype(object)
264-
)
262+
idx = period_range("20130101", periods=5, freq="D", name="xxx").astype(object)
263+
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
264+
ser = Series(idx)
265265
results = get_dir(ser)
266266
tm.assert_almost_equal(
267267
results, sorted(set(ok_for_period + ok_for_period_methods))

pandas/tests/series/methods/test_between.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def test_between(self):
2020
tm.assert_series_equal(result, expected)
2121

2222
def test_between_datetime_object_dtype(self):
23-
ser = Series(bdate_range("1/1/2000", periods=20).astype(object))
23+
ser = Series(bdate_range("1/1/2000", periods=20), dtype=object)
2424
ser[::2] = np.nan
2525

2626
result = ser[ser.between(ser[3], ser[17])]

pandas/tests/series/methods/test_equals.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -82,13 +82,15 @@ def test_equals_matching_nas():
8282
left = Series([np.datetime64("NaT")], dtype=object)
8383
right = Series([np.datetime64("NaT")], dtype=object)
8484
assert left.equals(right)
85-
assert Index(left).equals(Index(right))
85+
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
86+
assert Index(left).equals(Index(right))
8687
assert left.array.equals(right.array)
8788

8889
left = Series([np.timedelta64("NaT")], dtype=object)
8990
right = Series([np.timedelta64("NaT")], dtype=object)
9091
assert left.equals(right)
91-
assert Index(left).equals(Index(right))
92+
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
93+
assert Index(left).equals(Index(right))
9294
assert left.array.equals(right.array)
9395

9496
left = Series([np.float64("NaN")], dtype=object)

pandas/tests/series/test_constructors.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -1316,7 +1316,8 @@ def test_constructor_periodindex(self):
13161316
pi = period_range("20130101", periods=5, freq="D")
13171317
s = Series(pi)
13181318
assert s.dtype == "Period[D]"
1319-
expected = Series(pi.astype(object))
1319+
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
1320+
expected = Series(pi.astype(object))
13201321
tm.assert_series_equal(s, expected)
13211322

13221323
def test_constructor_dict(self):
@@ -2137,6 +2138,20 @@ def test_series_string_inference_na_first(self):
21372138
result = Series([pd.NA, "b"])
21382139
tm.assert_series_equal(result, expected)
21392140

2141+
def test_inference_on_pandas_objects(self):
2142+
# GH#56012
2143+
ser = Series([Timestamp("2019-12-31")], dtype=object)
2144+
with tm.assert_produces_warning(None):
2145+
# This doesn't do inference
2146+
result = Series(ser)
2147+
assert result.dtype == np.object_
2148+
2149+
idx = Index([Timestamp("2019-12-31")], dtype=object)
2150+
2151+
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
2152+
result = Series(idx)
2153+
assert result.dtype != np.object_
2154+
21402155

21412156
class TestSeriesConstructorIndexCoercion:
21422157
def test_series_constructor_datetimelike_index_coercion(self):

pandas/tests/strings/test_cat.py

+16-5
Original file line numberDiff line numberDiff line change
@@ -98,14 +98,18 @@ def test_str_cat_categorical(
9898

9999
with option_context("future.infer_string", infer_string):
100100
s = Index(["a", "a", "b", "a"], dtype=dtype_caller)
101-
s = s if box == Index else Series(s, index=s)
101+
s = s if box == Index else Series(s, index=s, dtype=s.dtype)
102102
t = Index(["b", "a", "b", "c"], dtype=dtype_target)
103103

104-
expected = Index(["ab", "aa", "bb", "ac"])
104+
expected = Index(
105+
["ab", "aa", "bb", "ac"], dtype=object if dtype_caller == "object" else None
106+
)
105107
expected = (
106108
expected
107109
if box == Index
108-
else Series(expected, index=Index(s, dtype=dtype_caller))
110+
else Series(
111+
expected, index=Index(s, dtype=dtype_caller), dtype=expected.dtype
112+
)
109113
)
110114

111115
# Series/Index with unaligned Index -> t.values
@@ -123,12 +127,19 @@ def test_str_cat_categorical(
123127

124128
# Series/Index with Series having different Index
125129
t = Series(t.values, index=t.values)
126-
expected = Index(["aa", "aa", "bb", "bb", "aa"])
130+
expected = Index(
131+
["aa", "aa", "bb", "bb", "aa"],
132+
dtype=object if dtype_caller == "object" else None,
133+
)
127134
dtype = object if dtype_caller == "object" else s.dtype.categories.dtype
128135
expected = (
129136
expected
130137
if box == Index
131-
else Series(expected, index=Index(expected.str[:1], dtype=dtype))
138+
else Series(
139+
expected,
140+
index=Index(expected.str[:1], dtype=dtype),
141+
dtype=expected.dtype,
142+
)
132143
)
133144

134145
result = s.str.cat(t, sep=sep)

pandas/tests/tseries/frequencies/test_inference.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
date_range,
2424
period_range,
2525
)
26+
import pandas._testing as tm
2627
from pandas.core.arrays import (
2728
DatetimeArray,
2829
TimedeltaArray,
@@ -206,7 +207,8 @@ def test_infer_freq_custom(base_delta_code_pair, constructor):
206207
)
207208
def test_infer_freq_index(freq, expected):
208209
rng = period_range("1959Q2", "2009Q3", freq=freq)
209-
rng = Index(rng.to_timestamp("D", how="e").astype(object))
210+
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
211+
rng = Index(rng.to_timestamp("D", how="e").astype(object))
210212

211213
assert rng.inferred_freq == expected
212214

0 commit comments

Comments
 (0)