Skip to content

Commit 412554b

Browse files
authored
BUG/API: DTI/TDI/PI.insert cast to object on failure (#39068)
1 parent 612cd05 commit 412554b

File tree

9 files changed

+202
-81
lines changed

9 files changed

+202
-81
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,7 @@ Indexing
290290
- Bug in :meth:`DataFrame.loc` dropping levels of :class:`MultiIndex` when :class:`DataFrame` used as input has only one row (:issue:`10521`)
291291
- Bug in setting ``timedelta64`` values into numeric :class:`Series` failing to cast to object dtype (:issue:`39086`)
292292
- Bug in setting :class:`Interval` values into a :class:`Series` or :class:`DataFrame` with mismatched :class:`IntervalDtype` incorrectly casting the new values to the existing dtype (:issue:`39120`)
293+
- Bug in incorrectly raising in :meth:`Index.insert`, when setting a new column that cannot be held in the existing ``frame.columns``, or in :meth:`Series.reset_index` or :meth:`DataFrame.reset_index` instead of casting to a compatible dtype (:issue:`39068`)
293294

294295
Missing
295296
^^^^^^^

pandas/core/indexes/base.py

+1
Original file line numberDiff line numberDiff line change
@@ -5717,6 +5717,7 @@ def insert(self, loc: int, item):
57175717
"""
57185718
# Note: this method is overridden by all ExtensionIndex subclasses,
57195719
# so self is never backed by an EA.
5720+
item = lib.item_from_zerodim(item)
57205721

57215722
try:
57225723
item = self._validate_fill_value(item)

pandas/core/indexes/datetimelike.py

+5-13
Original file line numberDiff line numberDiff line change
@@ -600,7 +600,11 @@ def delete(self, loc):
600600

601601
@doc(NDArrayBackedExtensionIndex.insert)
602602
def insert(self, loc: int, item):
603-
result = super().insert(loc, item)
603+
try:
604+
result = super().insert(loc, item)
605+
except (ValueError, TypeError):
606+
# i.e. self._data._validate_scalar raised
607+
return self.astype(object).insert(loc, item)
604608

605609
result._data._freq = self._get_insert_freq(loc, item)
606610
return result
@@ -865,15 +869,3 @@ def join(
865869
def _maybe_utc_convert(self: _T, other: Index) -> Tuple[_T, Index]:
866870
# Overridden by DatetimeIndex
867871
return self, other
868-
869-
# --------------------------------------------------------------------
870-
# List-Like Methods
871-
872-
@Appender(DatetimeIndexOpsMixin.insert.__doc__)
873-
def insert(self, loc, item):
874-
if isinstance(item, str):
875-
# TODO: Why are strings special?
876-
# TODO: Should we attempt _scalar_from_string?
877-
return self.astype(object).insert(loc, item)
878-
879-
return DatetimeIndexOpsMixin.insert(self, loc, item)

pandas/core/indexes/period.py

-6
Original file line numberDiff line numberDiff line change
@@ -422,12 +422,6 @@ def inferred_type(self) -> str:
422422
# indexing
423423
return "period"
424424

425-
def insert(self, loc: int, item):
426-
if not isinstance(item, Period) or self.freq != item.freq:
427-
return self.astype(object).insert(loc, item)
428-
429-
return DatetimeIndexOpsMixin.insert(self, loc, item)
430-
431425
# ------------------------------------------------------------------------
432426
# Indexing Methods
433427

pandas/tests/frame/methods/test_reset_index.py

+30-3
Original file line numberDiff line numberDiff line change
@@ -314,18 +314,45 @@ def test_reset_index_multiindex_nan(self):
314314
rs = df.set_index(["A", "B"]).reset_index()
315315
tm.assert_frame_equal(rs, df)
316316

317-
def test_reset_index_with_datetimeindex_cols(self):
317+
@pytest.mark.parametrize(
318+
"name",
319+
[
320+
None,
321+
"foo",
322+
2,
323+
3.0,
324+
pd.Timedelta(6),
325+
Timestamp("2012-12-30", tz="UTC"),
326+
"2012-12-31",
327+
],
328+
)
329+
def test_reset_index_with_datetimeindex_cols(self, name):
318330
# GH#5818
331+
warn = None
332+
if isinstance(name, Timestamp) and name.tz is not None:
333+
# _deprecate_mismatched_indexing
334+
warn = FutureWarning
335+
319336
df = DataFrame(
320337
[[1, 2], [3, 4]],
321338
columns=date_range("1/1/2013", "1/2/2013"),
322339
index=["A", "B"],
323340
)
341+
df.index.name = name
342+
343+
with tm.assert_produces_warning(warn, check_stacklevel=False):
344+
result = df.reset_index()
345+
346+
item = name if name is not None else "index"
347+
columns = Index([item, datetime(2013, 1, 1), datetime(2013, 1, 2)])
348+
if isinstance(item, str) and item == "2012-12-31":
349+
columns = columns.astype("datetime64[ns]")
350+
else:
351+
assert columns.dtype == object
324352

325-
result = df.reset_index()
326353
expected = DataFrame(
327354
[["A", 1, 2], ["B", 3, 4]],
328-
columns=["index", datetime(2013, 1, 1), datetime(2013, 1, 2)],
355+
columns=columns,
329356
)
330357
tm.assert_frame_equal(result, expected)
331358

pandas/tests/indexes/datetimes/test_insert.py

+72-20
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,11 @@ def test_insert_nat(self, tz, null):
2121
@pytest.mark.parametrize("tz", [None, "UTC", "US/Eastern"])
2222
def test_insert_invalid_na(self, tz):
2323
idx = DatetimeIndex(["2017-01-01"], tz=tz)
24-
msg = "value should be a 'Timestamp' or 'NaT'. Got 'timedelta64' instead."
25-
with pytest.raises(TypeError, match=msg):
26-
idx.insert(0, np.timedelta64("NaT"))
24+
25+
item = np.timedelta64("NaT")
26+
result = idx.insert(0, item)
27+
expected = Index([item] + list(idx), dtype=object)
28+
tm.assert_index_equal(result, expected)
2729

2830
def test_insert_empty_preserves_freq(self, tz_naive_fixture):
2931
# GH#33573
@@ -114,17 +116,6 @@ def test_insert(self):
114116
assert result.name == expected.name
115117
assert result.freq is None
116118

117-
# see gh-7299
118-
idx = date_range("1/1/2000", periods=3, freq="D", tz="Asia/Tokyo", name="idx")
119-
with pytest.raises(TypeError, match="Cannot compare tz-naive and tz-aware"):
120-
idx.insert(3, Timestamp("2000-01-04"))
121-
with pytest.raises(TypeError, match="Cannot compare tz-naive and tz-aware"):
122-
idx.insert(3, datetime(2000, 1, 4))
123-
with pytest.raises(ValueError, match="Timezones don't match"):
124-
idx.insert(3, Timestamp("2000-01-04", tz="US/Eastern"))
125-
with pytest.raises(ValueError, match="Timezones don't match"):
126-
idx.insert(3, datetime(2000, 1, 4, tzinfo=pytz.timezone("US/Eastern")))
127-
128119
for tz in ["US/Pacific", "Asia/Singapore"]:
129120
idx = date_range("1/1/2000 09:00", periods=6, freq="H", tz=tz, name="idx")
130121
# preserve freq
@@ -167,6 +158,48 @@ def test_insert(self):
167158
assert result.tz == expected.tz
168159
assert result.freq is None
169160

161+
# TODO: also changes DataFrame.__setitem__ with expansion
162+
def test_insert_mismatched_tzawareness(self):
163+
# see GH#7299
164+
idx = date_range("1/1/2000", periods=3, freq="D", tz="Asia/Tokyo", name="idx")
165+
166+
# mismatched tz-awareness
167+
item = Timestamp("2000-01-04")
168+
result = idx.insert(3, item)
169+
expected = Index(
170+
list(idx[:3]) + [item] + list(idx[3:]), dtype=object, name="idx"
171+
)
172+
tm.assert_index_equal(result, expected)
173+
174+
# mismatched tz-awareness
175+
item = datetime(2000, 1, 4)
176+
result = idx.insert(3, item)
177+
expected = Index(
178+
list(idx[:3]) + [item] + list(idx[3:]), dtype=object, name="idx"
179+
)
180+
tm.assert_index_equal(result, expected)
181+
182+
# TODO: also changes DataFrame.__setitem__ with expansion
183+
def test_insert_mismatched_tz(self):
184+
# see GH#7299
185+
idx = date_range("1/1/2000", periods=3, freq="D", tz="Asia/Tokyo", name="idx")
186+
187+
# mismatched tz -> cast to object (could reasonably cast to same tz or UTC)
188+
item = Timestamp("2000-01-04", tz="US/Eastern")
189+
result = idx.insert(3, item)
190+
expected = Index(
191+
list(idx[:3]) + [item] + list(idx[3:]), dtype=object, name="idx"
192+
)
193+
tm.assert_index_equal(result, expected)
194+
195+
# mismatched tz -> cast to object (could reasonably cast to same tz)
196+
item = datetime(2000, 1, 4, tzinfo=pytz.timezone("US/Eastern"))
197+
result = idx.insert(3, item)
198+
expected = Index(
199+
list(idx[:3]) + [item] + list(idx[3:]), dtype=object, name="idx"
200+
)
201+
tm.assert_index_equal(result, expected)
202+
170203
@pytest.mark.parametrize(
171204
"item", [0, np.int64(0), np.float64(0), np.array(0), np.timedelta64(456)]
172205
)
@@ -175,17 +208,36 @@ def test_insert_mismatched_types_raises(self, tz_aware_fixture, item):
175208
tz = tz_aware_fixture
176209
dti = date_range("2019-11-04", periods=9, freq="-1D", name=9, tz=tz)
177210

178-
msg = "value should be a 'Timestamp' or 'NaT'. Got '.*' instead"
179-
with pytest.raises(TypeError, match=msg):
180-
dti.insert(1, item)
211+
result = dti.insert(1, item)
212+
213+
if isinstance(item, np.ndarray):
214+
# FIXME: without doing .item() here this segfaults
215+
assert item.item() == 0
216+
expected = Index([dti[0], 0] + list(dti[1:]), dtype=object, name=9)
217+
else:
218+
expected = Index([dti[0], item] + list(dti[1:]), dtype=object, name=9)
219+
220+
tm.assert_index_equal(result, expected)
181221

182-
def test_insert_object_casting(self, tz_aware_fixture):
222+
def test_insert_castable_str(self, tz_aware_fixture):
183223
# GH#33703
184224
tz = tz_aware_fixture
185225
dti = date_range("2019-11-04", periods=3, freq="-1D", name=9, tz=tz)
186226

187-
# ATM we treat this as a string, but we could plausibly wrap it in Timestamp
188227
value = "2019-11-05"
189228
result = dti.insert(0, value)
190-
expected = Index(["2019-11-05"] + list(dti), dtype=object, name=9)
229+
230+
ts = Timestamp(value).tz_localize(tz)
231+
expected = DatetimeIndex([ts] + list(dti), dtype=dti.dtype, name=9)
232+
tm.assert_index_equal(result, expected)
233+
234+
def test_insert_non_castable_str(self, tz_aware_fixture):
235+
# GH#33703
236+
tz = tz_aware_fixture
237+
dti = date_range("2019-11-04", periods=3, freq="-1D", name=9, tz=tz)
238+
239+
value = "foo"
240+
result = dti.insert(0, value)
241+
242+
expected = Index(["foo"] + list(dti), dtype=object, name=9)
191243
tm.assert_index_equal(result, expected)

pandas/tests/indexes/timedeltas/test_insert.py

+30-11
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import numpy as np
44
import pytest
55

6+
from pandas._libs import lib
7+
68
import pandas as pd
79
from pandas import Index, Timedelta, TimedeltaIndex, timedelta_range
810
import pandas._testing as tm
@@ -79,9 +81,14 @@ def test_insert_nat(self, null):
7981

8082
def test_insert_invalid_na(self):
8183
idx = TimedeltaIndex(["4day", "1day", "2day"], name="idx")
82-
msg = r"value should be a 'Timedelta' or 'NaT'\. Got 'datetime64' instead\."
83-
with pytest.raises(TypeError, match=msg):
84-
idx.insert(0, np.datetime64("NaT"))
84+
85+
# FIXME: assert_index_equal fails if we pass a different
86+
# instance of np.datetime64("NaT")
87+
item = np.datetime64("NaT")
88+
result = idx.insert(0, item)
89+
90+
expected = Index([item] + list(idx), dtype=object, name="idx")
91+
tm.assert_index_equal(result, expected)
8592

8693
@pytest.mark.parametrize(
8794
"item", [0, np.int64(0), np.float64(0), np.array(0), np.datetime64(456, "us")]
@@ -90,18 +97,30 @@ def test_insert_mismatched_types_raises(self, item):
9097
# GH#33703 dont cast these to td64
9198
tdi = TimedeltaIndex(["4day", "1day", "2day"], name="idx")
9299

93-
msg = r"value should be a 'Timedelta' or 'NaT'\. Got '.*' instead\."
94-
with pytest.raises(TypeError, match=msg):
95-
tdi.insert(1, item)
100+
result = tdi.insert(1, item)
101+
102+
expected = Index(
103+
[tdi[0], lib.item_from_zerodim(item)] + list(tdi[1:]),
104+
dtype=object,
105+
name="idx",
106+
)
107+
tm.assert_index_equal(result, expected)
96108

97-
def test_insert_dont_cast_strings(self):
98-
# To match DatetimeIndex and PeriodIndex behavior, dont try to
99-
# parse strings to Timedelta
109+
def test_insert_castable_str(self):
100110
idx = timedelta_range("1day", "3day")
101111

102112
result = idx.insert(0, "1 Day")
103-
assert result.dtype == object
104-
assert result[0] == "1 Day"
113+
114+
expected = TimedeltaIndex([idx[0]] + list(idx))
115+
tm.assert_index_equal(result, expected)
116+
117+
def test_insert_non_castable_str(self):
118+
idx = timedelta_range("1day", "3day")
119+
120+
result = idx.insert(0, "foo")
121+
122+
expected = Index(["foo"] + list(idx), dtype=object)
123+
tm.assert_index_equal(result, expected)
105124

106125
def test_insert_empty(self):
107126
# Corner case inserting with length zero doesnt raise IndexError

pandas/tests/indexing/test_coercion.py

+51-18
Original file line numberDiff line numberDiff line change
@@ -441,14 +441,6 @@ def test_insert_index_float64(self, insert, coerced_val, coerced_dtype):
441441
[pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01", tz="Asia/Tokyo"), 1],
442442
)
443443
def test_insert_index_datetimes(self, request, fill_val, exp_dtype, insert_value):
444-
if not hasattr(insert_value, "tz"):
445-
request.node.add_marker(
446-
pytest.mark.xfail(reason="ToDo: must coerce to object")
447-
)
448-
elif fill_val.tz != insert_value.tz:
449-
request.node.add_marker(
450-
pytest.mark.xfail(reason="GH 37605 - require tz equality?")
451-
)
452444

453445
obj = pd.DatetimeIndex(
454446
["2011-01-01", "2011-01-02", "2011-01-03", "2011-01-04"], tz=fill_val.tz
@@ -461,7 +453,36 @@ def test_insert_index_datetimes(self, request, fill_val, exp_dtype, insert_value
461453
)
462454
self._assert_insert_conversion(obj, fill_val, exp, exp_dtype)
463455

464-
obj.insert(1, insert_value)
456+
if fill_val.tz:
457+
458+
# mismatched tzawareness
459+
ts = pd.Timestamp("2012-01-01")
460+
result = obj.insert(1, ts)
461+
expected = obj.astype(object).insert(1, ts)
462+
assert expected.dtype == object
463+
tm.assert_index_equal(result, expected)
464+
465+
# mismatched tz --> cast to object (could reasonably cast to commom tz)
466+
ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo")
467+
result = obj.insert(1, ts)
468+
expected = obj.astype(object).insert(1, ts)
469+
assert expected.dtype == object
470+
tm.assert_index_equal(result, expected)
471+
472+
else:
473+
# mismatched tzawareness
474+
ts = pd.Timestamp("2012-01-01", tz="Asia/Tokyo")
475+
result = obj.insert(1, ts)
476+
expected = obj.astype(object).insert(1, ts)
477+
assert expected.dtype == object
478+
tm.assert_index_equal(result, expected)
479+
480+
item = 1
481+
result = obj.insert(1, item)
482+
expected = obj.astype(object).insert(1, item)
483+
assert expected[1] == item
484+
assert expected.dtype == object
485+
tm.assert_index_equal(result, expected)
465486

466487
def test_insert_index_timedelta64(self):
467488
obj = pd.TimedeltaIndex(["1 day", "2 day", "3 day", "4 day"])
@@ -473,15 +494,11 @@ def test_insert_index_timedelta64(self):
473494
obj, pd.Timedelta("10 day"), exp, "timedelta64[ns]"
474495
)
475496

476-
# ToDo: must coerce to object
477-
msg = "value should be a 'Timedelta' or 'NaT'. Got 'Timestamp' instead."
478-
with pytest.raises(TypeError, match=msg):
479-
obj.insert(1, pd.Timestamp("2012-01-01"))
480-
481-
# ToDo: must coerce to object
482-
msg = "value should be a 'Timedelta' or 'NaT'. Got 'int' instead."
483-
with pytest.raises(TypeError, match=msg):
484-
obj.insert(1, 1)
497+
for item in [pd.Timestamp("2012-01-01"), 1]:
498+
result = obj.insert(1, item)
499+
expected = obj.astype(object).insert(1, item)
500+
assert expected.dtype == object
501+
tm.assert_index_equal(result, expected)
485502

486503
@pytest.mark.parametrize(
487504
"insert, coerced_val, coerced_dtype",
@@ -506,7 +523,23 @@ def test_insert_index_period(self, insert, coerced_val, coerced_dtype):
506523
if isinstance(insert, pd.Period):
507524
exp = pd.PeriodIndex(data, freq="M")
508525
self._assert_insert_conversion(obj, insert, exp, coerced_dtype)
526+
527+
# string that can be parsed to appropriate PeriodDtype
528+
self._assert_insert_conversion(obj, str(insert), exp, coerced_dtype)
529+
509530
else:
531+
result = obj.insert(0, insert)
532+
expected = obj.astype(object).insert(0, insert)
533+
tm.assert_index_equal(result, expected)
534+
535+
# TODO: ATM inserting '2012-01-01 00:00:00' when we have obj.freq=="M"
536+
# casts that string to Period[M], not clear that is desirable
537+
if not isinstance(insert, pd.Timestamp):
538+
# non-castable string
539+
result = obj.insert(0, str(insert))
540+
expected = obj.astype(object).insert(0, str(insert))
541+
tm.assert_index_equal(result, expected)
542+
510543
msg = r"Unexpected keyword arguments {'freq'}"
511544
with pytest.raises(TypeError, match=msg):
512545
with tm.assert_produces_warning(FutureWarning):

0 commit comments

Comments
 (0)