Skip to content

Commit 15e8e9a

Browse files
gfyoungjreback
authored andcommitted
BUG: Error when specifying int index containing NaN
xref pandas-dev#15187. Author: gfyoung <[email protected]> Closes pandas-dev#15616 from gfyoung/nan-int-index and squashes the following commits: 195b830 [gfyoung] BUG: Error when specifying int index containing NaN
1 parent 1be66ac commit 15e8e9a

File tree

5 files changed

+94
-6
lines changed

5 files changed

+94
-6
lines changed

doc/source/whatsnew/v0.20.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -792,6 +792,7 @@ Bug Fixes
792792
- Bug in ``DataFrame.isin`` comparing datetimelike to empty frame (:issue:`15473`)
793793

794794
- Bug in ``Series.where()`` and ``DataFrame.where()`` where array-like conditionals were being rejected (:issue:`15414`)
795+
- Bug in ``Index`` construction with ``NaN`` elements and integer dtype specified (:issue:`15187`)
795796
- Bug in ``Series`` construction with a datetimetz (:issue:`14928`)
796797
- Bug in output formatting of a ``MultiIndex`` when names are integers (:issue:`12223`, :issue:`15262`)
797798

pandas/indexes/base.py

+23-4
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,9 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
203203
if inferred == 'integer':
204204
data = np.array(data, copy=copy, dtype=dtype)
205205
elif inferred in ['floating', 'mixed-integer-float']:
206+
if isnull(data).any():
207+
raise ValueError('cannot convert float '
208+
'NaN to integer')
206209

207210
# If we are actually all equal to integers,
208211
# then coerce to integer.
@@ -230,8 +233,10 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
230233
else:
231234
data = np.array(data, dtype=dtype, copy=copy)
232235

233-
except (TypeError, ValueError):
234-
pass
236+
except (TypeError, ValueError) as e:
237+
msg = str(e)
238+
if 'cannot convert float' in msg:
239+
raise
235240

236241
# maybe coerce to a sub-class
237242
from pandas.tseries.period import (PeriodIndex,
@@ -585,7 +590,14 @@ def where(self, cond, other=None):
585590
if other is None:
586591
other = self._na_value
587592
values = np.where(cond, self.values, other)
588-
return self._shallow_copy_with_infer(values, dtype=self.dtype)
593+
594+
dtype = self.dtype
595+
if self._is_numeric_dtype and np.any(isnull(values)):
596+
# We can't coerce to the numeric dtype of "self" (unless
597+
# it's float) if there are NaN values in our output.
598+
dtype = None
599+
600+
return self._shallow_copy_with_infer(values, dtype=dtype)
589601

590602
def ravel(self, order='C'):
591603
"""
@@ -689,7 +701,14 @@ def _coerce_scalar_to_index(self, item):
689701
----------
690702
item : scalar item to coerce
691703
"""
692-
return Index([item], dtype=self.dtype, **self._get_attributes_dict())
704+
dtype = self.dtype
705+
706+
if self._is_numeric_dtype and isnull(item):
707+
# We can't coerce to the numeric dtype of "self" (unless
708+
# it's float) if there are NaN values in our output.
709+
dtype = None
710+
711+
return Index([item], dtype=dtype, **self._get_attributes_dict())
693712

694713
_index_shared_docs['copy'] = """
695714
Make a copy of this object. Name and dtype sets those attributes on

pandas/tests/indexes/test_base.py

+17
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,23 @@ def __array__(self, dtype=None):
199199
result = pd.Index(ArrayLike(array))
200200
self.assert_index_equal(result, expected)
201201

202+
def test_constructor_int_dtype_nan(self):
203+
# see gh-15187
204+
data = [np.nan]
205+
msg = "cannot convert"
206+
207+
with tm.assertRaisesRegexp(ValueError, msg):
208+
Index(data, dtype='int64')
209+
210+
with tm.assertRaisesRegexp(ValueError, msg):
211+
Index(data, dtype='uint64')
212+
213+
# This, however, should not break
214+
# because NaN is float.
215+
expected = Float64Index(data)
216+
result = Index(data, dtype='float')
217+
tm.assert_index_equal(result, expected)
218+
202219
def test_index_ctor_infer_nan_nat(self):
203220
# GH 13467
204221
exp = pd.Float64Index([np.nan, np.nan])

pandas/tests/indexes/test_numeric.py

+26-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import numpy as np
77

8-
from pandas import (date_range, Series, Index, Float64Index,
8+
from pandas import (date_range, notnull, Series, Index, Float64Index,
99
Int64Index, UInt64Index, RangeIndex)
1010

1111
import pandas.util.testing as tm
@@ -686,6 +686,31 @@ def test_coerce_list(self):
686686
arr = Index([1, 2, 3, 4], dtype=object)
687687
tm.assertIsInstance(arr, Index)
688688

689+
def test_where(self):
690+
i = self.create_index()
691+
result = i.where(notnull(i))
692+
expected = i
693+
tm.assert_index_equal(result, expected)
694+
695+
_nan = i._na_value
696+
cond = [False] + [True] * len(i[1:])
697+
expected = pd.Index([_nan] + i[1:].tolist())
698+
699+
result = i.where(cond)
700+
tm.assert_index_equal(result, expected)
701+
702+
def test_where_array_like(self):
703+
i = self.create_index()
704+
705+
_nan = i._na_value
706+
cond = [False] + [True] * (len(i) - 1)
707+
klasses = [list, tuple, np.array, pd.Series]
708+
expected = pd.Index([_nan] + i[1:].tolist())
709+
710+
for klass in klasses:
711+
result = i.where(klass(cond))
712+
tm.assert_index_equal(result, expected)
713+
689714
def test_get_indexer(self):
690715
target = Int64Index(np.arange(10))
691716
indexer = self.index.get_indexer(target)

pandas/tests/indexes/test_range.py

+27-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@
88

99
import numpy as np
1010

11-
from pandas import (Series, Index, Float64Index, Int64Index, RangeIndex)
11+
from pandas import (notnull, Series, Index, Float64Index,
12+
Int64Index, RangeIndex)
1213
from pandas.util.testing import assertRaisesRegexp
1314

1415
import pandas.util.testing as tm
@@ -915,3 +916,28 @@ def test_len_specialised(self):
915916

916917
i = RangeIndex(0, 5, step)
917918
self.assertEqual(len(i), 0)
919+
920+
def test_where(self):
921+
i = self.create_index()
922+
result = i.where(notnull(i))
923+
expected = i
924+
tm.assert_index_equal(result, expected)
925+
926+
_nan = i._na_value
927+
cond = [False] + [True] * len(i[1:])
928+
expected = pd.Index([_nan] + i[1:].tolist())
929+
930+
result = i.where(cond)
931+
tm.assert_index_equal(result, expected)
932+
933+
def test_where_array_like(self):
934+
i = self.create_index()
935+
936+
_nan = i._na_value
937+
cond = [False] + [True] * (len(i) - 1)
938+
klasses = [list, tuple, np.array, pd.Series]
939+
expected = pd.Index([_nan] + i[1:].tolist())
940+
941+
for klass in klasses:
942+
result = i.where(klass(cond))
943+
tm.assert_index_equal(result, expected)

0 commit comments

Comments
 (0)