Skip to content

Commit 9f71476

Browse files
authored
CLN: Stopped object inference in constructors for pandas objects (#58758)
* CLN: Stopped object inference in constructors for pandas objects * Adjust tests
1 parent a5492ee commit 9f71476

File tree

15 files changed

+48
-136
lines changed

15 files changed

+48
-136
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,7 @@ Removal of prior version deprecations/changes
311311
- Removed the deprecated ``delim_whitespace`` keyword in :func:`read_csv` and :func:`read_table`, use ``sep=r"\s+"`` instead (:issue:`55569`)
312312
- Require :meth:`SparseDtype.fill_value` to be a valid value for the :meth:`SparseDtype.subtype` (:issue:`53043`)
313313
- Stopped automatically casting non-datetimelike values (mainly strings) in :meth:`Series.isin` and :meth:`Index.isin` with ``datetime64``, ``timedelta64``, and :class:`PeriodDtype` dtypes (:issue:`53111`)
314+
- Stopped performing dtype inference in :class:`Index`, :class:`Series` and :class:`DataFrame` constructors when given a pandas object (:class:`Series`, :class:`Index`, :class:`ExtensionArray`), call ``.infer_objects`` on the input to keep the current behavior (:issue:`56012`)
314315
- Stopped performing dtype inference when setting a :class:`Index` into a :class:`DataFrame` (:issue:`56102`)
315316
- Stopped performing dtype inference with in :meth:`Index.insert` with object-dtype index; this often affects the index/columns that result when setting new entries into an empty :class:`Series` or :class:`DataFrame` (:issue:`51363`)
316317
- Removed the "closed" and "unit" keywords in :meth:`TimedeltaIndex.__new__` (:issue:`52628`, :issue:`55499`)

pandas/_testing/__init__.py

+3-10
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
ContextManager,
1111
cast,
1212
)
13-
import warnings
1413

1514
import numpy as np
1615

@@ -290,17 +289,11 @@ def box_expected(expected, box_cls, transpose: bool = True):
290289
else:
291290
expected = pd.array(expected, copy=False)
292291
elif box_cls is Index:
293-
with warnings.catch_warnings():
294-
warnings.filterwarnings("ignore", "Dtype inference", category=FutureWarning)
295-
expected = Index(expected)
292+
expected = Index(expected)
296293
elif box_cls is Series:
297-
with warnings.catch_warnings():
298-
warnings.filterwarnings("ignore", "Dtype inference", category=FutureWarning)
299-
expected = Series(expected)
294+
expected = Series(expected)
300295
elif box_cls is DataFrame:
301-
with warnings.catch_warnings():
302-
warnings.filterwarnings("ignore", "Dtype inference", category=FutureWarning)
303-
expected = Series(expected).to_frame()
296+
expected = Series(expected).to_frame()
304297
if transpose:
305298
# for vector operations, we need a DataFrame to be a single-row,
306299
# not a single-column, in order to operate against non-DataFrame

pandas/core/construction.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -554,7 +554,7 @@ def sanitize_array(
554554
# Avoid ending up with a NumpyExtensionArray
555555
dtype = dtype.numpy_dtype
556556

557-
data_was_index = isinstance(data, ABCIndex)
557+
infer_object = not isinstance(data, (ABCIndex, ABCSeries))
558558

559559
# extract ndarray or ExtensionArray, ensure we have no NumpyExtensionArray
560560
data = extract_array(data, extract_numpy=True, extract_range=True)
@@ -607,7 +607,7 @@ def sanitize_array(
607607

608608
if dtype is None:
609609
subarr = data
610-
if data.dtype == object and not data_was_index:
610+
if data.dtype == object and infer_object:
611611
subarr = maybe_infer_to_datetimelike(data)
612612
elif data.dtype.kind == "U" and using_pyarrow_string_dtype():
613613
from pandas.core.arrays.string_ import StringDtype

pandas/core/frame.py

-16
Original file line numberDiff line numberDiff line change
@@ -728,10 +728,6 @@ def __init__(
728728
NDFrame.__init__(self, data)
729729
return
730730

731-
is_pandas_object = isinstance(data, (Series, Index, ExtensionArray))
732-
data_dtype = getattr(data, "dtype", None)
733-
original_dtype = dtype
734-
735731
# GH47215
736732
if isinstance(index, set):
737733
raise ValueError("index cannot be a set")
@@ -896,18 +892,6 @@ def __init__(
896892

897893
NDFrame.__init__(self, mgr)
898894

899-
if original_dtype is None and is_pandas_object and data_dtype == np.object_:
900-
if self.dtypes.iloc[0] != data_dtype:
901-
warnings.warn(
902-
"Dtype inference on a pandas object "
903-
"(Series, Index, ExtensionArray) is deprecated. The DataFrame "
904-
"constructor will keep the original dtype in the future. "
905-
"Call `infer_objects` on the result to get the old "
906-
"behavior.",
907-
FutureWarning,
908-
stacklevel=2,
909-
)
910-
911895
# ----------------------------------------------------------------------
912896

913897
def __dataframe__(

pandas/core/indexes/base.py

+5-17
Original file line numberDiff line numberDiff line change
@@ -490,8 +490,6 @@ def __new__(
490490
if not copy and isinstance(data, (ABCSeries, Index)):
491491
refs = data._references
492492

493-
is_pandas_object = isinstance(data, (ABCSeries, Index, ExtensionArray))
494-
495493
# range
496494
if isinstance(data, (range, RangeIndex)):
497495
result = RangeIndex(start=data, copy=copy, name=name)
@@ -508,7 +506,7 @@ def __new__(
508506
elif is_ea_or_datetimelike_dtype(data_dtype):
509507
pass
510508

511-
elif isinstance(data, (np.ndarray, Index, ABCSeries)):
509+
elif isinstance(data, (np.ndarray, ABCMultiIndex)):
512510
if isinstance(data, ABCMultiIndex):
513511
data = data._values
514512

@@ -518,7 +516,9 @@ def __new__(
518516
# they are actually ints, e.g. '0' and 0.0
519517
# should not be coerced
520518
data = com.asarray_tuplesafe(data, dtype=_dtype_obj)
521-
519+
elif isinstance(data, (ABCSeries, Index)):
520+
# GH 56244: Avoid potential inference on object types
521+
pass
522522
elif is_scalar(data):
523523
raise cls._raise_scalar_data_error(data)
524524
elif hasattr(data, "__array__"):
@@ -571,19 +571,7 @@ def __new__(
571571
klass = cls._dtype_to_subclass(arr.dtype)
572572

573573
arr = klass._ensure_array(arr, arr.dtype, copy=False)
574-
result = klass._simple_new(arr, name, refs=refs)
575-
if dtype is None and is_pandas_object and data_dtype == np.object_:
576-
if result.dtype != data_dtype:
577-
warnings.warn(
578-
"Dtype inference on a pandas object "
579-
"(Series, Index, ExtensionArray) is deprecated. The Index "
580-
"constructor will keep the original dtype in the future. "
581-
"Call `infer_objects` on the result to get the old "
582-
"behavior.",
583-
FutureWarning,
584-
stacklevel=2,
585-
)
586-
return result # type: ignore[return-value]
574+
return klass._simple_new(arr, name, refs=refs)
587575

588576
@classmethod
589577
def _ensure_array(cls, data, dtype, copy: bool):

pandas/core/internals/construction.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ def ndarray_to_mgr(
192192
) -> Manager:
193193
# used in DataFrame.__init__
194194
# input must be a ndarray, list, Series, Index, ExtensionArray
195+
infer_object = not isinstance(values, (ABCSeries, Index, ExtensionArray))
195196

196197
if isinstance(values, ABCSeries):
197198
if columns is None:
@@ -287,15 +288,14 @@ def ndarray_to_mgr(
287288
# if we don't have a dtype specified, then try to convert objects
288289
# on the entire block; this is to convert if we have datetimelike's
289290
# embedded in an object type
290-
if dtype is None and is_object_dtype(values.dtype):
291+
if dtype is None and infer_object and is_object_dtype(values.dtype):
291292
obj_columns = list(values)
292293
maybe_datetime = [maybe_infer_to_datetimelike(x) for x in obj_columns]
293294
# don't convert (and copy) the objects if no type inference occurs
294295
if any(x is not y for x, y in zip(obj_columns, maybe_datetime)):
295-
dvals_list = [ensure_block_shape(dval, 2) for dval in maybe_datetime]
296296
block_values = [
297-
new_block_2d(dvals_list[n], placement=BlockPlacement(n))
298-
for n in range(len(dvals_list))
297+
new_block_2d(ensure_block_shape(dval, 2), placement=BlockPlacement(n))
298+
for n, dval in enumerate(maybe_datetime)
299299
]
300300
else:
301301
bp = BlockPlacement(slice(len(columns)))

pandas/core/series.py

-16
Original file line numberDiff line numberDiff line change
@@ -389,10 +389,6 @@ def __init__(
389389
self.name = name
390390
return
391391

392-
is_pandas_object = isinstance(data, (Series, Index, ExtensionArray))
393-
data_dtype = getattr(data, "dtype", None)
394-
original_dtype = dtype
395-
396392
if isinstance(data, (ExtensionArray, np.ndarray)):
397393
if copy is not False:
398394
if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)):
@@ -438,7 +434,6 @@ def __init__(
438434
data = data.astype(dtype)
439435

440436
refs = data._references
441-
data = data._values
442437
copy = False
443438

444439
elif isinstance(data, np.ndarray):
@@ -512,17 +507,6 @@ def __init__(
512507
self.name = name
513508
self._set_axis(0, index)
514509

515-
if original_dtype is None and is_pandas_object and data_dtype == np.object_:
516-
if self.dtype != data_dtype:
517-
warnings.warn(
518-
"Dtype inference on a pandas object "
519-
"(Series, Index, ExtensionArray) is deprecated. The Series "
520-
"constructor will keep the original dtype in the future. "
521-
"Call `infer_objects` on the result to get the old behavior.",
522-
FutureWarning,
523-
stacklevel=find_stack_level(),
524-
)
525-
526510
def _init_dict(
527511
self, data: Mapping, index: Index | None = None, dtype: DtypeObj | None = None
528512
):

pandas/tests/copy_view/test_constructors.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -228,12 +228,12 @@ def test_dataframe_from_series_or_index_different_dtype(index_or_series):
228228
assert df._mgr._has_no_reference(0)
229229

230230

231-
def test_dataframe_from_series_infer_datetime():
231+
def test_dataframe_from_series_dont_infer_datetime():
232232
ser = Series([Timestamp("2019-12-31"), Timestamp("2020-12-31")], dtype=object)
233-
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
234-
df = DataFrame(ser)
235-
assert not np.shares_memory(get_array(ser), get_array(df, 0))
236-
assert df._mgr._has_no_reference(0)
233+
df = DataFrame(ser)
234+
assert df.dtypes.iloc[0] == np.dtype(object)
235+
assert np.shares_memory(get_array(ser), get_array(df, 0))
236+
assert not df._mgr._has_no_reference(0)
237237

238238

239239
@pytest.mark.parametrize("index", [None, [0, 1, 2]])

pandas/tests/frame/test_constructors.py

+5-12
Original file line numberDiff line numberDiff line change
@@ -2702,21 +2702,14 @@ def test_frame_string_inference_block_dim(self):
27022702
df = DataFrame(np.array([["hello", "goodbye"], ["hello", "Hello"]]))
27032703
assert df._mgr.blocks[0].ndim == 2
27042704

2705-
def test_inference_on_pandas_objects(self):
2705+
@pytest.mark.parametrize("klass", [Series, Index])
2706+
def test_inference_on_pandas_objects(self, klass):
27062707
# GH#56012
2707-
idx = Index([Timestamp("2019-12-31")], dtype=object)
2708-
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
2709-
result = DataFrame(idx, columns=["a"])
2710-
assert result.dtypes.iloc[0] != np.object_
2711-
result = DataFrame({"a": idx})
2708+
obj = klass([Timestamp("2019-12-31")], dtype=object)
2709+
result = DataFrame(obj, columns=["a"])
27122710
assert result.dtypes.iloc[0] == np.object_
27132711

2714-
ser = Series([Timestamp("2019-12-31")], dtype=object)
2715-
2716-
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
2717-
result = DataFrame(ser, columns=["a"])
2718-
assert result.dtypes.iloc[0] != np.object_
2719-
result = DataFrame({"a": ser})
2712+
result = DataFrame({"a": obj})
27202713
assert result.dtypes.iloc[0] == np.object_
27212714

27222715
def test_dict_keys_returns_rangeindex(self):

pandas/tests/indexes/base_class/test_constructors.py

+5-11
Original file line numberDiff line numberDiff line change
@@ -59,18 +59,12 @@ def test_index_string_inference(self):
5959
ser = Index(["a", 1])
6060
tm.assert_index_equal(ser, expected)
6161

62-
def test_inference_on_pandas_objects(self):
62+
@pytest.mark.parametrize("klass", [Series, Index])
63+
def test_inference_on_pandas_objects(self, klass):
6364
# GH#56012
64-
idx = Index([pd.Timestamp("2019-12-31")], dtype=object)
65-
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
66-
result = Index(idx)
67-
assert result.dtype != np.object_
68-
69-
ser = Series([pd.Timestamp("2019-12-31")], dtype=object)
70-
71-
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
72-
result = Index(ser)
73-
assert result.dtype != np.object_
65+
obj = klass([pd.Timestamp("2019-12-31")], dtype=object)
66+
result = Index(obj)
67+
assert result.dtype == np.object_
7468

7569
def test_constructor_not_read_only(self):
7670
# GH#57130

pandas/tests/indexes/test_base.py

+7-10
Original file line numberDiff line numberDiff line change
@@ -104,23 +104,20 @@ def test_constructor_copy(self, using_infer_string):
104104
)
105105
def test_constructor_from_index_dtlike(self, cast_as_obj, index):
106106
if cast_as_obj:
107-
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
108-
result = Index(index.astype(object))
109-
else:
110-
result = Index(index)
111-
112-
tm.assert_index_equal(result, index)
113-
114-
if isinstance(index, DatetimeIndex):
115-
assert result.tz == index.tz
116-
if cast_as_obj:
107+
result = Index(index.astype(object))
108+
assert result.dtype == np.dtype(object)
109+
if isinstance(index, DatetimeIndex):
117110
# GH#23524 check that Index(dti, dtype=object) does not
118111
# incorrectly raise ValueError, and that nanoseconds are not
119112
# dropped
120113
index += pd.Timedelta(nanoseconds=50)
121114
result = Index(index, dtype=object)
122115
assert result.dtype == np.object_
123116
assert list(result) == list(index)
117+
else:
118+
result = Index(index)
119+
120+
tm.assert_index_equal(result, index)
124121

125122
@pytest.mark.parametrize(
126123
"index,has_tz",

pandas/tests/series/accessors/test_dt_accessor.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -256,9 +256,8 @@ def test_dt_accessor_limited_display_api(self):
256256
tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods)))
257257

258258
# Period
259-
idx = period_range("20130101", periods=5, freq="D", name="xxx").astype(object)
260-
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
261-
ser = Series(idx)
259+
idx = period_range("20130101", periods=5, freq="D", name="xxx")
260+
ser = Series(idx)
262261
results = get_dir(ser)
263262
tm.assert_almost_equal(
264263
results, sorted(set(ok_for_period + ok_for_period_methods))

pandas/tests/series/methods/test_equals.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -82,15 +82,13 @@ def test_equals_matching_nas():
8282
left = Series([np.datetime64("NaT")], dtype=object)
8383
right = Series([np.datetime64("NaT")], dtype=object)
8484
assert left.equals(right)
85-
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
86-
assert Index(left).equals(Index(right))
85+
assert Index(left).equals(Index(right))
8786
assert left.array.equals(right.array)
8887

8988
left = Series([np.timedelta64("NaT")], dtype=object)
9089
right = Series([np.timedelta64("NaT")], dtype=object)
9190
assert left.equals(right)
92-
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
93-
assert Index(left).equals(Index(right))
91+
assert Index(left).equals(Index(right))
9492
assert left.array.equals(right.array)
9593

9694
left = Series([np.float64("NaN")], dtype=object)

pandas/tests/series/test_constructors.py

+7-14
Original file line numberDiff line numberDiff line change
@@ -1318,9 +1318,8 @@ def test_constructor_periodindex(self):
13181318
pi = period_range("20130101", periods=5, freq="D")
13191319
s = Series(pi)
13201320
assert s.dtype == "Period[D]"
1321-
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
1322-
expected = Series(pi.astype(object))
1323-
tm.assert_series_equal(s, expected)
1321+
expected = Series(pi.astype(object))
1322+
assert expected.dtype == object
13241323

13251324
def test_constructor_dict(self):
13261325
d = {"a": 0.0, "b": 1.0, "c": 2.0}
@@ -2137,20 +2136,14 @@ def test_series_string_inference_na_first(self):
21372136
result = Series([pd.NA, "b"])
21382137
tm.assert_series_equal(result, expected)
21392138

2140-
def test_inference_on_pandas_objects(self):
2139+
@pytest.mark.parametrize("klass", [Series, Index])
2140+
def test_inference_on_pandas_objects(self, klass):
21412141
# GH#56012
2142-
ser = Series([Timestamp("2019-12-31")], dtype=object)
2143-
with tm.assert_produces_warning(None):
2144-
# This doesn't do inference
2145-
result = Series(ser)
2142+
obj = klass([Timestamp("2019-12-31")], dtype=object)
2143+
# This doesn't do inference
2144+
result = Series(obj)
21462145
assert result.dtype == np.object_
21472146

2148-
idx = Index([Timestamp("2019-12-31")], dtype=object)
2149-
2150-
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
2151-
result = Series(idx)
2152-
assert result.dtype != np.object_
2153-
21542147

21552148
class TestSeriesConstructorIndexCoercion:
21562149
def test_series_constructor_datetimelike_index_coercion(self):

pandas/tests/tseries/frequencies/test_inference.py

-12
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
date_range,
2424
period_range,
2525
)
26-
import pandas._testing as tm
2726
from pandas.core.arrays import (
2827
DatetimeArray,
2928
TimedeltaArray,
@@ -202,17 +201,6 @@ def test_infer_freq_custom(base_delta_code_pair, constructor):
202201
assert frequencies.infer_freq(index) is None
203202

204203

205-
@pytest.mark.parametrize(
206-
"freq,expected", [("Q", "QE-DEC"), ("Q-NOV", "QE-NOV"), ("Q-OCT", "QE-OCT")]
207-
)
208-
def test_infer_freq_index(freq, expected):
209-
rng = period_range("1959Q2", "2009Q3", freq=freq)
210-
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
211-
rng = Index(rng.to_timestamp("D", how="e").astype(object))
212-
213-
assert rng.inferred_freq == expected
214-
215-
216204
@pytest.mark.parametrize(
217205
"expected,dates",
218206
list(

0 commit comments

Comments
 (0)