Skip to content

DEPR: Index.insert dtype inference #58059

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ Removal of prior version deprecations/changes
- Removed "freq" keyword from :class:`PeriodArray` constructor, use "dtype" instead (:issue:`52462`)
- Removed deprecated "method" and "limit" keywords from :meth:`Series.replace` and :meth:`DataFrame.replace` (:issue:`53492`)
- Removed the "closed" and "normalize" keywords in :meth:`DatetimeIndex.__new__` (:issue:`52628`)
- Stopped performing dtype inference with in :meth:`Index.insert` with object-dtype index; this often affects the index/columns that result when setting new entries into an empty :class:`Series` or :class:`DataFrame` (:issue:`51363`)
- Removed the "closed" and "unit" keywords in :meth:`TimedeltaIndex.__new__` (:issue:`52628`, :issue:`55499`)
- All arguments in :meth:`Index.sort_values` are now keyword only (:issue:`56493`)
- All arguments in :meth:`Series.to_dict` are now keyword only (:issue:`56493`)
Expand Down
24 changes: 3 additions & 21 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,7 @@

import numpy as np

from pandas._config import (
get_option,
using_pyarrow_string_dtype,
)
from pandas._config import get_option

from pandas._libs import (
NaT,
Expand Down Expand Up @@ -6614,23 +6611,8 @@ def insert(self, loc: int, item) -> Index:
loc = loc if loc >= 0 else loc - 1
new_values[loc] = item

out = Index._with_infer(new_values, name=self.name)
if (
using_pyarrow_string_dtype()
and is_string_dtype(out.dtype)
and new_values.dtype == object
):
out = out.astype(new_values.dtype)
if self.dtype == object and out.dtype != object:
# GH#51363
warnings.warn(
"The behavior of Index.insert with object-dtype is deprecated, "
"in a future version this will return an object-dtype Index "
"instead of inferring a non-object dtype. To retain the old "
"behavior, do `idx.insert(loc, item).infer_objects(copy=False)`",
FutureWarning,
stacklevel=find_stack_level(),
)
# GH#51363 stopped doing dtype inference here
out = Index(new_values, dtype=new_values.dtype, name=self.name)
return out

def drop(
Expand Down
19 changes: 2 additions & 17 deletions pandas/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1896,15 +1896,7 @@ def _setitem_with_indexer(self, indexer, value, name: str = "iloc") -> None:
# just replacing the block manager here
# so the object is the same
index = self.obj._get_axis(i)
with warnings.catch_warnings():
# TODO: re-issue this with setitem-specific message?
warnings.filterwarnings(
"ignore",
"The behavior of Index.insert with object-dtype "
"is deprecated",
category=FutureWarning,
)
labels = index.insert(len(index), key)
labels = index.insert(len(index), key)

# We are expanding the Series/DataFrame values to match
# the length of thenew index `labels`. GH#40096 ensure
Expand Down Expand Up @@ -2222,14 +2214,7 @@ def _setitem_with_indexer_missing(self, indexer, value):
# and set inplace
if self.ndim == 1:
index = self.obj.index
with warnings.catch_warnings():
# TODO: re-issue this with setitem-specific message?
warnings.filterwarnings(
"ignore",
"The behavior of Index.insert with object-dtype is deprecated",
category=FutureWarning,
)
new_index = index.insert(len(index), indexer)
new_index = index.insert(len(index), indexer)

# we have a coerced indexer, e.g. a float
# that matches in an int64 Index, so
Expand Down
9 changes: 1 addition & 8 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1480,14 +1480,7 @@ def insert(self, loc: int, item: Hashable, value: ArrayLike, refs=None) -> None:
value : np.ndarray or ExtensionArray
refs : The reference tracking object of the value to set.
"""
with warnings.catch_warnings():
# TODO: re-issue this with setitem-specific message?
warnings.filterwarnings(
"ignore",
"The behavior of Index.insert with object-dtype is deprecated",
category=FutureWarning,
)
new_axis = self.items.insert(loc, item)
new_axis = self.items.insert(loc, item)

if value.ndim == 2:
value = value.T
Expand Down
10 changes: 2 additions & 8 deletions pandas/tests/indexes/test_old_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,19 +409,13 @@ def test_where(self, listlike_box, simple_index):
tm.assert_index_equal(result, expected)

def test_insert_base(self, index):
# GH#51363
trimmed = index[1:4]

if not len(index):
pytest.skip("Not applicable for empty index")

# test 0th element
warn = None
if index.dtype == object and index.inferred_type == "boolean":
# GH#51363
warn = FutureWarning
msg = "The behavior of Index.insert with object-dtype is deprecated"
with tm.assert_produces_warning(warn, match=msg):
result = trimmed.insert(0, index[0])
result = trimmed.insert(0, index[0])
assert index[0:4].equals(result)

@pytest.mark.skipif(
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexing/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2025,7 +2025,7 @@ def test_loc_setitem_incremental_with_dst(self):
ids=["self", "to_datetime64", "to_pydatetime", "np.datetime64"],
)
def test_loc_setitem_datetime_keys_cast(self, conv):
# GH#9516
# GH#9516, GH#51363 changed in 3.0 to not cast on Index.insert
dt1 = Timestamp("20130101 09:00:00")
dt2 = Timestamp("20130101 10:00:00")
df = DataFrame()
Expand All @@ -2034,7 +2034,7 @@ def test_loc_setitem_datetime_keys_cast(self, conv):

expected = DataFrame(
{"one": [100.0, 200.0]},
index=[dt1, dt2],
index=Index([conv(dt1), conv(dt2)], dtype=object),
columns=Index(["one"], dtype=object),
)
tm.assert_frame_equal(df, expected)
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/series/indexing/test_set_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,17 @@
import numpy as np

from pandas import (
DatetimeIndex,
Index,
Series,
)
import pandas._testing as tm


def test_series_set_value():
# GH#1561
# GH#1561, GH#51363 as of 3.0 we do not do inference in Index.insert

dates = [datetime(2001, 1, 1), datetime(2001, 1, 2)]
index = DatetimeIndex(dates)
index = Index(dates, dtype=object)

s = Series(dtype=object)
s._set_value(dates[0], 1.0)
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -495,11 +495,11 @@ def test_setitem_callable_other(self):

class TestSetitemWithExpansion:
def test_setitem_empty_series(self):
# GH#10193
# GH#10193, GH#51363 changed in 3.0 to not do inference in Index.insert
key = Timestamp("2012-01-01")
series = Series(dtype=object)
series[key] = 47
expected = Series(47, [key])
expected = Series(47, Index([key], dtype=object))
tm.assert_series_equal(series, expected)

def test_setitem_empty_series_datetimeindex_preserves_freq(self):
Expand Down