Skip to content

Commit 8986439

Browse files
committed
Generalize NA Compat
1 parent 4644c53 commit 8986439

File tree

15 files changed

+109
-40
lines changed

15 files changed

+109
-40
lines changed

doc/source/whatsnew/v0.21.1.txt

-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ Bug Fixes
6262
- Bug in ``pd.Series.rolling.skew()`` and ``rolling.kurt()`` with all equal values has floating issue (:issue:`18044`)
6363
- Bug in ``pd.DataFrameGroupBy.count()`` when counting over a datetimelike column (:issue:`13393`)
6464
- Bug in ``pd.concat`` when empty and non-empty DataFrames or Series are concatenated (:issue:`18178` :issue:`18187`)
65-
- Bug in ``IntervalIndex.insert`` when attempting to insert ``NaN`` (:issue:`18295`)
6665

6766
Conversion
6867
^^^^^^^^^^

doc/source/whatsnew/v0.22.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ Other API Changes
4747
- :class:`CacheableOffset` and :class:`WeekDay` are no longer available in the ``pandas.tseries.offsets`` module (:issue:`17830`)
4848
- `tseries.frequencies.get_freq_group()` and `tseries.frequencies.DAYS` are removed from the public API (:issue:`18034`)
4949
- :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`)
50-
50+
- Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`)
5151

5252
.. _whatsnew_0220.deprecations:
5353

pandas/core/indexes/base.py

+4
Original file line numberDiff line numberDiff line change
@@ -3728,6 +3728,10 @@ def insert(self, loc, item):
37283728
-------
37293729
new_index : Index
37303730
"""
3731+
if lib.checknull(item):
3732+
# GH 18295
3733+
item = self._na_value
3734+
37313735
_self = np.asarray(self)
37323736
item = self._coerce_scalar_to_index(item)._values
37333737
idx = np.concatenate((_self[:loc], item, _self[loc:]))

pandas/core/indexes/category.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import numpy as np
2-
from pandas._libs import index as libindex
2+
from pandas._libs import index as libindex, lib
33

44
from pandas import compat
55
from pandas.compat.numpy import function as nv
@@ -688,7 +688,7 @@ def insert(self, loc, item):
688688
689689
"""
690690
code = self.categories.get_indexer([item])
691-
if (code == -1):
691+
if (code == -1) and not lib.checknull(item):
692692
raise TypeError("cannot insert an item into a CategoricalIndex "
693693
"that is not already an existing category")
694694

pandas/core/indexes/datetimes.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1751,6 +1751,9 @@ def insert(self, loc, item):
17511751
-------
17521752
new_index : Index
17531753
"""
1754+
if lib.checknull(item):
1755+
# GH 18295
1756+
item = self._na_value
17541757

17551758
freq = None
17561759

@@ -1767,14 +1770,14 @@ def insert(self, loc, item):
17671770
elif (loc == len(self)) and item - self.freq == self[-1]:
17681771
freq = self.freq
17691772
item = _to_m8(item, tz=self.tz)
1773+
17701774
try:
17711775
new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)],
17721776
self[loc:].asi8))
17731777
if self.tz is not None:
17741778
new_dates = conversion.tz_convert(new_dates, 'UTC', self.tz)
17751779
return DatetimeIndex(new_dates, name=self.name, freq=freq,
17761780
tz=self.tz)
1777-
17781781
except (AttributeError, TypeError):
17791782

17801783
# fall back to object index

pandas/core/indexes/interval.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
Index, _ensure_index,
2323
default_pprint, _index_shared_docs)
2424

25-
from pandas._libs import Timestamp, Timedelta
25+
from pandas._libs import lib, Timestamp, Timedelta
2626
from pandas._libs.interval import (
2727
Interval, IntervalMixin, IntervalTree,
2828
intervals_to_interval_bounds)
@@ -985,12 +985,8 @@ def insert(self, loc, item):
985985
'side as the index')
986986
left_insert = item.left
987987
right_insert = item.right
988-
elif is_scalar(item) and isna(item):
988+
elif lib.checknull(item):
989989
# GH 18295
990-
if item is not self.left._na_value:
991-
raise TypeError('cannot insert with incompatible NA value: '
992-
'got {item}, expected {na}'
993-
.format(item=item, na=self.left._na_value))
994990
left_insert = right_insert = item
995991
else:
996992
raise ValueError('can only insert Interval objects and NA into '

pandas/core/indexes/timedeltas.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -849,16 +849,18 @@ def insert(self, loc, item):
849849
-------
850850
new_index : Index
851851
"""
852-
853852
# try to convert if possible
854853
if _is_convertible_to_td(item):
855854
try:
856855
item = Timedelta(item)
857856
except Exception:
858857
pass
858+
elif lib.checknull(item):
859+
# GH 18295
860+
item = self._na_value
859861

860862
freq = None
861-
if isinstance(item, Timedelta) or item is NaT:
863+
if isinstance(item, Timedelta) or (item is self._na_value):
862864

863865
# check freq can be preserved on edge cases
864866
if self.freq is not None:

pandas/tests/indexes/datetimes/test_indexing.py

+7
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,13 @@ def test_insert(self):
145145
assert result.tz == expected.tz
146146
assert result.freq is None
147147

148+
# GH 18295 (test missing)
149+
expected = DatetimeIndex(
150+
['20170101', pd.NaT, '20170102', '20170103', '20170104'])
151+
for na in (np.nan, pd.NaT, None):
152+
result = date_range('20170101', periods=4).insert(1, na)
153+
tm.assert_index_equal(result, expected)
154+
148155
def test_delete(self):
149156
idx = date_range(start='2000-01-01', periods=5, freq='M', name='idx')
150157

pandas/tests/indexes/period/test_period.py

+8
Original file line numberDiff line numberDiff line change
@@ -706,3 +706,11 @@ def test_join_self(self, how):
706706
index = period_range('1/1/2000', periods=10)
707707
joined = index.join(index, how=how)
708708
assert index is joined
709+
710+
def test_insert(self):
711+
# GH 18295 (test missing)
712+
expected = PeriodIndex(
713+
['2017Q1', pd.NaT, '2017Q2', '2017Q3', '2017Q4'], freq='Q')
714+
for na in (np.nan, pd.NaT, None):
715+
result = period_range('2017Q1', periods=4, freq='Q').insert(1, na)
716+
tm.assert_index_equal(result, expected)

pandas/tests/indexes/test_base.py

+6
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,12 @@ def test_insert(self):
442442
null_index = Index([])
443443
tm.assert_index_equal(Index(['a']), null_index.insert(0, 'a'))
444444

445+
# GH 18295 (test missing)
446+
expected = Index(['a', np.nan, 'b', 'c'])
447+
for na in (np.nan, pd.NaT, None):
448+
result = Index(list('abc')).insert(1, na)
449+
tm.assert_index_equal(result, expected)
450+
445451
def test_delete(self):
446452
idx = Index(['a', 'b', 'c', 'd'], name='idx')
447453

pandas/tests/indexes/test_category.py

+6
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,12 @@ def test_insert(self):
353353
# invalid
354354
pytest.raises(TypeError, lambda: ci.insert(0, 'd'))
355355

356+
# GH 18295 (test missing)
357+
expected = CategoricalIndex(['a', np.nan, 'a', 'b', 'c', 'b'])
358+
for na in (np.nan, pd.NaT, None):
359+
result = CategoricalIndex(list('aabcb')).insert(1, na)
360+
tm.assert_index_equal(result, expected)
361+
356362
def test_delete(self):
357363

358364
ci = self.create_index()

pandas/tests/indexes/test_interval.py

+39-27
Original file line numberDiff line numberDiff line change
@@ -246,38 +246,50 @@ def test_delete(self):
246246
actual = self.index.delete(0)
247247
assert expected.equals(actual)
248248

249-
def test_insert(self):
250-
expected = IntervalIndex.from_breaks(range(4))
251-
actual = self.index.insert(2, Interval(2, 3))
252-
assert expected.equals(actual)
253-
254-
pytest.raises(ValueError, self.index.insert, 0, 1)
255-
pytest.raises(ValueError, self.index.insert, 0,
256-
Interval(2, 3, closed='left'))
257-
258249
@pytest.mark.parametrize('data', [
259-
interval_range(0, periods=10),
260-
interval_range(1.7, periods=8, freq=2.5),
261-
interval_range(Timestamp('20170101'), periods=12),
262-
interval_range(Timedelta('1 day'), periods=6),
250+
interval_range(0, periods=10, closed='neither'),
251+
interval_range(1.7, periods=8, freq=2.5, closed='both'),
252+
interval_range(Timestamp('20170101'), periods=12, closed='left'),
253+
interval_range(Timedelta('1 day'), periods=6, closed='right'),
263254
IntervalIndex.from_tuples([('a', 'd'), ('e', 'j'), ('w', 'z')]),
264255
IntervalIndex.from_tuples([(1, 2), ('a', 'z'), (3.14, 6.28)])])
265-
def test_insert_na(self, data):
266-
# GH 18295
267-
valid_na, invalid_na = np.nan, pd.NaT
268-
if data.left._na_value is pd.NaT:
269-
valid_na, invalid_na = invalid_na, valid_na
270-
271-
# valid insertion
272-
expected = IntervalIndex([data[0], np.nan]).append(data[1:])
273-
result = data.insert(1, valid_na)
256+
def test_insert(self, data):
257+
item = data[0]
258+
idx_item = IntervalIndex([item], closed=data.closed)
259+
260+
# start
261+
expected = idx_item.append(data)
262+
result = data.insert(0, item)
274263
tm.assert_index_equal(result, expected)
275264

276-
# invalid insertion
277-
msg = ('cannot insert with incompatible NA value: got {invalid}, '
278-
'expected {valid}').format(invalid=invalid_na, valid=valid_na)
279-
with tm.assert_raises_regex(TypeError, msg):
280-
data.insert(1, invalid_na)
265+
# end
266+
expected = data.append(idx_item)
267+
result = data.insert(len(data), item)
268+
tm.assert_index_equal(result, expected)
269+
270+
# mid
271+
expected = data[:3].append(idx_item).append(data[3:])
272+
result = data.insert(3, item)
273+
tm.assert_index_equal(result, expected)
274+
275+
# invalid type
276+
msg = 'can only insert Interval objects and NA into an IntervalIndex'
277+
with tm.assert_raises_regex(ValueError, msg):
278+
data.insert(1, 'foo')
279+
280+
# invalid closed
281+
msg = 'inserted item must be closed on the same side as the index'
282+
for closed in {'left', 'right', 'both', 'neither'} - {item.closed}:
283+
with tm.assert_raises_regex(ValueError, msg):
284+
bad_item = Interval(item.left, item.right, closed=closed)
285+
data.insert(1, bad_item)
286+
287+
# GH 18295 (test missing)
288+
na_idx = IntervalIndex([np.nan], closed=data.closed)
289+
for na in (np.nan, pd.NaT, None):
290+
expected = data[:1].append(na_idx).append(data[1:])
291+
result = data.insert(1, na)
292+
tm.assert_index_equal(result, expected)
281293

282294
def test_take(self):
283295
actual = self.index.take([0, 1])

pandas/tests/indexes/test_numeric.py

+14
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,13 @@ def test_modulo(self):
175175
expected = Index(index.values % 2)
176176
tm.assert_index_equal(index % 2, expected)
177177

178+
def test_insert(self):
179+
# GH 18295 (test missing)
180+
expected = Float64Index([0, np.nan, 1, 2, 3, 4])
181+
for na in (np.nan, pd.NaT, None):
182+
result = self.create_index().insert(1, na)
183+
tm.assert_index_equal(result, expected)
184+
178185

179186
class TestFloat64Index(Numeric):
180187
_holder = Float64Index
@@ -1206,3 +1213,10 @@ def test_join_outer(self):
12061213
tm.assert_index_equal(res, eres)
12071214
tm.assert_numpy_array_equal(lidx, elidx)
12081215
tm.assert_numpy_array_equal(ridx, eridx)
1216+
1217+
def test_insert(self):
1218+
# GH 18295 (test missing)
1219+
expected = UInt64Index([0, 0, 1, 2, 3, 4])
1220+
for na in (np.nan, pd.NaT, None):
1221+
result = self.create_index().insert(1, na)
1222+
tm.assert_index_equal(result, expected)

pandas/tests/indexes/test_range.py

+6
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,12 @@ def test_insert(self):
295295
# test 0th element
296296
tm.assert_index_equal(idx[0:4], result.insert(0, idx[0]))
297297

298+
# GH 18295 (test missing)
299+
expected = Float64Index([0, np.nan, 1, 2, 3, 4])
300+
for na in (np.nan, pd.NaT, None):
301+
result = RangeIndex(5).insert(1, na)
302+
tm.assert_index_equal(result, expected)
303+
298304
def test_delete(self):
299305

300306
idx = RangeIndex(5, name='Foo')

pandas/tests/indexes/timedeltas/test_indexing.py

+6
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,12 @@ def test_insert(self):
5757
assert result.name == expected.name
5858
assert result.freq == expected.freq
5959

60+
# GH 18295 (test missing)
61+
expected = TimedeltaIndex(['1day', pd.NaT, '2day', '3day'])
62+
for na in (np.nan, pd.NaT, None):
63+
result = timedelta_range('1day', '3day').insert(1, na)
64+
tm.assert_index_equal(result, expected)
65+
6066
def test_delete(self):
6167
idx = timedelta_range(start='1 Days', periods=5, freq='D', name='idx')
6268

0 commit comments

Comments
 (0)