Skip to content

Commit 9c9a09f

Browse files
jschendeljreback
authored andcommitted
BUG: Fix IntervalIndex.insert to allow inserting NaN (#18300)
1 parent b71ecbd commit 9c9a09f

File tree

14 files changed

+119
-20
lines changed

14 files changed

+119
-20
lines changed

doc/source/whatsnew/v0.22.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ Other API Changes
7878
- :func:`Index.map` can now accept ``Series`` and dictionary input objects (:issue:`12756`).
7979
- :func:`Dataframe.unstack` will now default to filling with ``np.nan`` for ``object`` columns. (:issue:`12815`)
8080
- :class:`IntervalIndex` constructor will raise if the ``closed`` parameter conflicts with how the input data is inferred to be closed (:issue:`18421`)
81+
- Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`)
8182

8283

8384
.. _whatsnew_0220.deprecations:

pandas/core/indexes/base.py

+4
Original file line numberDiff line numberDiff line change
@@ -3767,6 +3767,10 @@ def insert(self, loc, item):
37673767
-------
37683768
new_index : Index
37693769
"""
3770+
if is_scalar(item) and isna(item):
3771+
# GH 18295
3772+
item = self._na_value
3773+
37703774
_self = np.asarray(self)
37713775
item = self._coerce_scalar_to_index(item)._values
37723776
idx = np.concatenate((_self[:loc], item, _self[loc:]))

pandas/core/indexes/category.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
is_scalar)
1313
from pandas.core.common import (_asarray_tuplesafe,
1414
_values_from_object)
15-
from pandas.core.dtypes.missing import array_equivalent
15+
from pandas.core.dtypes.missing import array_equivalent, isna
1616
from pandas.core.algorithms import take_1d
1717

1818

@@ -690,7 +690,7 @@ def insert(self, loc, item):
690690
691691
"""
692692
code = self.categories.get_indexer([item])
693-
if (code == -1):
693+
if (code == -1) and not (is_scalar(item) and isna(item)):
694694
raise TypeError("cannot insert an item into a CategoricalIndex "
695695
"that is not already an existing category")
696696

pandas/core/indexes/datetimes.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1768,6 +1768,9 @@ def insert(self, loc, item):
17681768
-------
17691769
new_index : Index
17701770
"""
1771+
if is_scalar(item) and isna(item):
1772+
# GH 18295
1773+
item = self._na_value
17711774

17721775
freq = None
17731776

@@ -1784,14 +1787,14 @@ def insert(self, loc, item):
17841787
elif (loc == len(self)) and item - self.freq == self[-1]:
17851788
freq = self.freq
17861789
item = _to_m8(item, tz=self.tz)
1790+
17871791
try:
17881792
new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)],
17891793
self[loc:].asi8))
17901794
if self.tz is not None:
17911795
new_dates = conversion.tz_convert(new_dates, 'UTC', self.tz)
17921796
return DatetimeIndex(new_dates, name=self.name, freq=freq,
17931797
tz=self.tz)
1794-
17951798
except (AttributeError, TypeError):
17961799

17971800
# fall back to object index

pandas/core/indexes/interval.py

+15-8
Original file line numberDiff line numberDiff line change
@@ -1001,14 +1001,21 @@ def delete(self, loc):
10011001
return self._shallow_copy(new_left, new_right)
10021002

10031003
def insert(self, loc, item):
1004-
if not isinstance(item, Interval):
1005-
raise ValueError('can only insert Interval objects into an '
1006-
'IntervalIndex')
1007-
if not item.closed == self.closed:
1008-
raise ValueError('inserted item must be closed on the same side '
1009-
'as the index')
1010-
new_left = self.left.insert(loc, item.left)
1011-
new_right = self.right.insert(loc, item.right)
1004+
if isinstance(item, Interval):
1005+
if item.closed != self.closed:
1006+
raise ValueError('inserted item must be closed on the same '
1007+
'side as the index')
1008+
left_insert = item.left
1009+
right_insert = item.right
1010+
elif is_scalar(item) and isna(item):
1011+
# GH 18295
1012+
left_insert = right_insert = item
1013+
else:
1014+
raise ValueError('can only insert Interval objects and NA into '
1015+
'an IntervalIndex')
1016+
1017+
new_left = self.left.insert(loc, left_insert)
1018+
new_right = self.right.insert(loc, right_insert)
10121019
return self._shallow_copy(new_left, new_right)
10131020

10141021
def _as_like_interval_index(self, other, error_msg):

pandas/core/indexes/timedeltas.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -852,16 +852,18 @@ def insert(self, loc, item):
852852
-------
853853
new_index : Index
854854
"""
855-
856855
# try to convert if possible
857856
if _is_convertible_to_td(item):
858857
try:
859858
item = Timedelta(item)
860859
except Exception:
861860
pass
861+
elif is_scalar(item) and isna(item):
862+
# GH 18295
863+
item = self._na_value
862864

863865
freq = None
864-
if isinstance(item, Timedelta) or item is NaT:
866+
if isinstance(item, Timedelta) or (is_scalar(item) and isna(item)):
865867

866868
# check freq can be preserved on edge cases
867869
if self.freq is not None:

pandas/tests/indexes/datetimes/test_indexing.py

+7
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,13 @@ def test_insert(self):
145145
assert result.tz == expected.tz
146146
assert result.freq is None
147147

148+
# GH 18295 (test missing)
149+
expected = DatetimeIndex(
150+
['20170101', pd.NaT, '20170102', '20170103', '20170104'])
151+
for na in (np.nan, pd.NaT, None):
152+
result = date_range('20170101', periods=4).insert(1, na)
153+
tm.assert_index_equal(result, expected)
154+
148155
def test_delete(self):
149156
idx = date_range(start='2000-01-01', periods=5, freq='M', name='idx')
150157

pandas/tests/indexes/period/test_period.py

+8
Original file line numberDiff line numberDiff line change
@@ -695,3 +695,11 @@ def test_join_self(self, how):
695695
index = period_range('1/1/2000', periods=10)
696696
joined = index.join(index, how=how)
697697
assert index is joined
698+
699+
def test_insert(self):
700+
# GH 18295 (test missing)
701+
expected = PeriodIndex(
702+
['2017Q1', pd.NaT, '2017Q2', '2017Q3', '2017Q4'], freq='Q')
703+
for na in (np.nan, pd.NaT, None):
704+
result = period_range('2017Q1', periods=4, freq='Q').insert(1, na)
705+
tm.assert_index_equal(result, expected)

pandas/tests/indexes/test_base.py

+6
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,12 @@ def test_insert(self):
459459
null_index = Index([])
460460
tm.assert_index_equal(Index(['a']), null_index.insert(0, 'a'))
461461

462+
# GH 18295 (test missing)
463+
expected = Index(['a', np.nan, 'b', 'c'])
464+
for na in (np.nan, pd.NaT, None):
465+
result = Index(list('abc')).insert(1, na)
466+
tm.assert_index_equal(result, expected)
467+
462468
def test_delete(self):
463469
idx = Index(['a', 'b', 'c', 'd'], name='idx')
464470

pandas/tests/indexes/test_category.py

+6
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,12 @@ def test_insert(self):
362362
# invalid
363363
pytest.raises(TypeError, lambda: ci.insert(0, 'd'))
364364

365+
# GH 18295 (test missing)
366+
expected = CategoricalIndex(['a', np.nan, 'a', 'b', 'c', 'b'])
367+
for na in (np.nan, pd.NaT, None):
368+
result = CategoricalIndex(list('aabcb')).insert(1, na)
369+
tm.assert_index_equal(result, expected)
370+
365371
def test_delete(self):
366372

367373
ci = self.create_index()

pandas/tests/indexes/test_interval.py

+43-7
Original file line numberDiff line numberDiff line change
@@ -366,14 +366,50 @@ def test_delete(self, closed):
366366
result = self.create_index(closed=closed).delete(0)
367367
tm.assert_index_equal(result, expected)
368368

369-
def test_insert(self):
370-
expected = IntervalIndex.from_breaks(range(4))
371-
actual = self.index.insert(2, Interval(2, 3))
372-
assert expected.equals(actual)
369+
@pytest.mark.parametrize('data', [
370+
interval_range(0, periods=10, closed='neither'),
371+
interval_range(1.7, periods=8, freq=2.5, closed='both'),
372+
interval_range(Timestamp('20170101'), periods=12, closed='left'),
373+
interval_range(Timedelta('1 day'), periods=6, closed='right'),
374+
IntervalIndex.from_tuples([('a', 'd'), ('e', 'j'), ('w', 'z')]),
375+
IntervalIndex.from_tuples([(1, 2), ('a', 'z'), (3.14, 6.28)])])
376+
def test_insert(self, data):
377+
item = data[0]
378+
idx_item = IntervalIndex([item])
379+
380+
# start
381+
expected = idx_item.append(data)
382+
result = data.insert(0, item)
383+
tm.assert_index_equal(result, expected)
384+
385+
# end
386+
expected = data.append(idx_item)
387+
result = data.insert(len(data), item)
388+
tm.assert_index_equal(result, expected)
389+
390+
# mid
391+
expected = data[:3].append(idx_item).append(data[3:])
392+
result = data.insert(3, item)
393+
tm.assert_index_equal(result, expected)
394+
395+
# invalid type
396+
msg = 'can only insert Interval objects and NA into an IntervalIndex'
397+
with tm.assert_raises_regex(ValueError, msg):
398+
data.insert(1, 'foo')
373399

374-
pytest.raises(ValueError, self.index.insert, 0, 1)
375-
pytest.raises(ValueError, self.index.insert, 0,
376-
Interval(2, 3, closed='left'))
400+
# invalid closed
401+
msg = 'inserted item must be closed on the same side as the index'
402+
for closed in {'left', 'right', 'both', 'neither'} - {item.closed}:
403+
with tm.assert_raises_regex(ValueError, msg):
404+
bad_item = Interval(item.left, item.right, closed=closed)
405+
data.insert(1, bad_item)
406+
407+
# GH 18295 (test missing)
408+
na_idx = IntervalIndex([np.nan], closed=data.closed)
409+
for na in (np.nan, pd.NaT, None):
410+
expected = data[:1].append(na_idx).append(data[1:])
411+
result = data.insert(1, na)
412+
tm.assert_index_equal(result, expected)
377413

378414
def test_take(self, closed):
379415
index = self.create_index(closed=closed)

pandas/tests/indexes/test_numeric.py

+7
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,13 @@ def test_where(self, klass):
187187
result = i.where(klass(cond))
188188
tm.assert_index_equal(result, expected)
189189

190+
def test_insert(self):
191+
# GH 18295 (test missing)
192+
expected = Float64Index([0, np.nan, 1, 2, 3, 4])
193+
for na in (np.nan, pd.NaT, None):
194+
result = self.create_index().insert(1, na)
195+
tm.assert_index_equal(result, expected)
196+
190197

191198
class TestFloat64Index(Numeric):
192199
_holder = Float64Index

pandas/tests/indexes/test_range.py

+6
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,12 @@ def test_insert(self):
295295
# test 0th element
296296
tm.assert_index_equal(idx[0:4], result.insert(0, idx[0]))
297297

298+
# GH 18295 (test missing)
299+
expected = Float64Index([0, np.nan, 1, 2, 3, 4])
300+
for na in (np.nan, pd.NaT, None):
301+
result = RangeIndex(5).insert(1, na)
302+
tm.assert_index_equal(result, expected)
303+
298304
def test_delete(self):
299305

300306
idx = RangeIndex(5, name='Foo')

pandas/tests/indexes/timedeltas/test_indexing.py

+6
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,12 @@ def test_insert(self):
5757
assert result.name == expected.name
5858
assert result.freq == expected.freq
5959

60+
# GH 18295 (test missing)
61+
expected = TimedeltaIndex(['1day', pd.NaT, '2day', '3day'])
62+
for na in (np.nan, pd.NaT, None):
63+
result = timedelta_range('1day', '3day').insert(1, na)
64+
tm.assert_index_equal(result, expected)
65+
6066
def test_delete(self):
6167
idx = timedelta_range(start='1 Days', periods=5, freq='D', name='idx')
6268

0 commit comments

Comments
 (0)