Skip to content

Commit aaee541

Browse files
jschendeljreback
authored andcommitted
Change UInt64Index._na_value from 0 to np.nan (#18401)
1 parent e728f94 commit aaee541

File tree

10 files changed

+79
-130
lines changed

10 files changed

+79
-130
lines changed

doc/source/whatsnew/v0.22.0.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ Backwards incompatible API changes
5252

5353
- :func:`Series.fillna` now raises a ``TypeError`` instead of a ``ValueError`` when passed a list, tuple or DataFrame as a ``value`` (:issue:`18293`)
5454
- :func:`pandas.DataFrame.merge` no longer casts a ``float`` column to ``object`` when merging on ``int`` and ``float`` columns (:issue:`16572`)
55+
- The default NA value for :class:`UInt64Index` has changed from 0 to ``NaN``, which impacts methods that mask with NA, such as ``UInt64Index.where()`` (:issue:`18398`)
5556
-
5657

5758

@@ -129,7 +130,7 @@ Bug Fixes
129130
Conversion
130131
^^^^^^^^^^
131132

132-
-
133+
- Bug in :class:`Index` constructor with `dtype='uint64'` where int-like floats were not coerced to :class:`UInt64Index` (:issue:`18400`)
133134
-
134135
-
135136

pandas/core/indexes/base.py

+14-11
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
251251
# then coerce to integer.
252252
try:
253253
return cls._try_convert_to_int_index(
254-
data, copy, name)
254+
data, copy, name, dtype)
255255
except ValueError:
256256
pass
257257

@@ -307,7 +307,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None,
307307
if inferred == 'integer':
308308
try:
309309
return cls._try_convert_to_int_index(
310-
subarr, copy, name)
310+
subarr, copy, name, dtype)
311311
except ValueError:
312312
pass
313313

@@ -664,7 +664,7 @@ def ravel(self, order='C'):
664664

665665
# construction helpers
666666
@classmethod
667-
def _try_convert_to_int_index(cls, data, copy, name):
667+
def _try_convert_to_int_index(cls, data, copy, name, dtype):
668668
"""
669669
Attempt to convert an array of data into an integer index.
670670
@@ -685,15 +685,18 @@ def _try_convert_to_int_index(cls, data, copy, name):
685685
"""
686686

687687
from .numeric import Int64Index, UInt64Index
688-
try:
689-
res = data.astype('i8', copy=False)
690-
if (res == data).all():
691-
return Int64Index(res, copy=copy, name=name)
692-
except (OverflowError, TypeError, ValueError):
693-
pass
688+
if not is_unsigned_integer_dtype(dtype):
689+
# skip int64 conversion attempt if uint-like dtype is passed, as
690+
# this could return Int64Index when UInt64Index is what's desrired
691+
try:
692+
res = data.astype('i8', copy=False)
693+
if (res == data).all():
694+
return Int64Index(res, copy=copy, name=name)
695+
except (OverflowError, TypeError, ValueError):
696+
pass
694697

695-
# Conversion to int64 failed (possibly due to
696-
# overflow), so let's try now with uint64.
698+
# Conversion to int64 failed (possibly due to overflow) or was skipped,
699+
# so let's try now with uint64.
697700
try:
698701
res = data.astype('u8', copy=False)
699702
if (res == data).all():

pandas/core/indexes/numeric.py

-1
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,6 @@ class UInt64Index(NumericIndex):
216216
_inner_indexer = libjoin.inner_join_indexer_uint64
217217
_outer_indexer = libjoin.outer_join_indexer_uint64
218218
_can_hold_na = False
219-
_na_value = 0
220219
_engine_type = libindex.UInt64Engine
221220
_default_dtype = np.uint64
222221

pandas/tests/indexes/common.py

+8-20
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,7 @@
99

1010
from pandas import (Series, Index, Float64Index, Int64Index, UInt64Index,
1111
RangeIndex, MultiIndex, CategoricalIndex, DatetimeIndex,
12-
TimedeltaIndex, PeriodIndex, IntervalIndex,
13-
notna, isna)
12+
TimedeltaIndex, PeriodIndex, IntervalIndex, isna)
1413
from pandas.core.indexes.base import InvalidIndexError
1514
from pandas.core.indexes.datetimelike import DatetimeIndexOpsMixin
1615
from pandas.core.dtypes.common import needs_i8_conversion
@@ -529,31 +528,20 @@ def test_numpy_repeat(self):
529528
tm.assert_raises_regex(ValueError, msg, np.repeat,
530529
i, rep, axis=0)
531530

532-
def test_where(self):
531+
@pytest.mark.parametrize('klass', [list, tuple, np.array, Series])
532+
def test_where(self, klass):
533533
i = self.create_index()
534-
result = i.where(notna(i))
534+
535+
cond = [True] * len(i)
536+
result = i.where(klass(cond))
535537
expected = i
536538
tm.assert_index_equal(result, expected)
537539

538-
_nan = i._na_value
539540
cond = [False] + [True] * len(i[1:])
540-
expected = pd.Index([_nan] + i[1:].tolist(), dtype=i.dtype)
541-
542-
result = i.where(cond)
541+
expected = pd.Index([i._na_value] + i[1:].tolist(), dtype=i.dtype)
542+
result = i.where(klass(cond))
543543
tm.assert_index_equal(result, expected)
544544

545-
def test_where_array_like(self):
546-
i = self.create_index()
547-
548-
_nan = i._na_value
549-
cond = [False] + [True] * (len(i) - 1)
550-
klasses = [list, tuple, np.array, pd.Series]
551-
expected = pd.Index([_nan] + i[1:].tolist(), dtype=i.dtype)
552-
553-
for klass in klasses:
554-
result = i.where(klass(cond))
555-
tm.assert_index_equal(result, expected)
556-
557545
def test_setops_errorcases(self):
558546
for name, idx in compat.iteritems(self.indices):
559547
# # non-iterable input

pandas/tests/indexes/period/test_period.py

+7-16
Original file line numberDiff line numberDiff line change
@@ -61,27 +61,18 @@ def test_pickle_round_trip(self):
6161
result = tm.round_trip_pickle(idx)
6262
tm.assert_index_equal(result, idx)
6363

64-
def test_where(self):
64+
@pytest.mark.parametrize('klass', [list, tuple, np.array, Series])
65+
def test_where(self, klass):
6566
i = self.create_index()
66-
result = i.where(notna(i))
67+
cond = [True] * len(i)
6768
expected = i
69+
result = i.where(klass(cond))
6870
tm.assert_index_equal(result, expected)
6971

70-
i2 = pd.PeriodIndex([pd.NaT, pd.NaT] + i[2:].tolist(),
71-
freq='D')
72-
result = i.where(notna(i2))
73-
expected = i2
74-
tm.assert_index_equal(result, expected)
75-
76-
def test_where_array_like(self):
77-
i = self.create_index()
7872
cond = [False] + [True] * (len(i) - 1)
79-
klasses = [list, tuple, np.array, Series]
80-
expected = pd.PeriodIndex([pd.NaT] + i[1:].tolist(), freq='D')
81-
82-
for klass in klasses:
83-
result = i.where(klass(cond))
84-
tm.assert_index_equal(result, expected)
73+
expected = PeriodIndex([NaT] + i[1:].tolist(), freq='D')
74+
result = i.where(klass(cond))
75+
tm.assert_index_equal(result, expected)
8576

8677
def test_where_other(self):
8778

pandas/tests/indexes/test_base.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from datetime import datetime, timedelta
66

77
import pandas.util.testing as tm
8+
from pandas.core.dtypes.common import is_unsigned_integer_dtype
89
from pandas.core.indexes.api import Index, MultiIndex
910
from pandas.tests.indexes.common import Base
1011

@@ -14,7 +15,7 @@
1415
import numpy as np
1516

1617
from pandas import (period_range, date_range, Series,
17-
DataFrame, Float64Index, Int64Index,
18+
DataFrame, Float64Index, Int64Index, UInt64Index,
1819
CategoricalIndex, DatetimeIndex, TimedeltaIndex,
1920
PeriodIndex, isna)
2021
from pandas.core.index import _get_combined_index, _ensure_index_from_sequences
@@ -201,6 +202,20 @@ def __array__(self, dtype=None):
201202
result = pd.Index(ArrayLike(array))
202203
tm.assert_index_equal(result, expected)
203204

205+
@pytest.mark.parametrize('dtype', [
206+
int, 'int64', 'int32', 'int16', 'int8', 'uint64', 'uint32',
207+
'uint16', 'uint8'])
208+
def test_constructor_int_dtype_float(self, dtype):
209+
# GH 18400
210+
if is_unsigned_integer_dtype(dtype):
211+
index_type = UInt64Index
212+
else:
213+
index_type = Int64Index
214+
215+
expected = index_type([0, 1, 2, 3])
216+
result = Index([0., 1., 2., 3.], dtype=dtype)
217+
tm.assert_index_equal(result, expected)
218+
204219
def test_constructor_int_dtype_nan(self):
205220
# see gh-15187
206221
data = [np.nan]

pandas/tests/indexes/test_category.py

+9-18
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
import numpy as np
1313

14-
from pandas import Categorical, IntervalIndex, compat, notna
14+
from pandas import Categorical, IntervalIndex, compat
1515
from pandas.util.testing import assert_almost_equal
1616
import pandas.core.config as cf
1717
import pandas as pd
@@ -269,28 +269,19 @@ def f(x):
269269
ordered=False)
270270
tm.assert_index_equal(result, exp)
271271

272-
def test_where(self):
272+
@pytest.mark.parametrize('klass', [list, tuple, np.array, pd.Series])
273+
def test_where(self, klass):
273274
i = self.create_index()
274-
result = i.where(notna(i))
275+
cond = [True] * len(i)
275276
expected = i
277+
result = i.where(klass(cond))
276278
tm.assert_index_equal(result, expected)
277279

278-
i2 = pd.CategoricalIndex([np.nan, np.nan] + i[2:].tolist(),
279-
categories=i.categories)
280-
result = i.where(notna(i2))
281-
expected = i2
282-
tm.assert_index_equal(result, expected)
283-
284-
def test_where_array_like(self):
285-
i = self.create_index()
286280
cond = [False] + [True] * (len(i) - 1)
287-
klasses = [list, tuple, np.array, pd.Series]
288-
expected = pd.CategoricalIndex([np.nan] + i[1:].tolist(),
289-
categories=i.categories)
290-
291-
for klass in klasses:
292-
result = i.where(klass(cond))
293-
tm.assert_index_equal(result, expected)
281+
expected = CategoricalIndex([np.nan] + i[1:].tolist(),
282+
categories=i.categories)
283+
result = i.where(klass(cond))
284+
tm.assert_index_equal(result, expected)
294285

295286
def test_append(self):
296287

pandas/tests/indexes/test_interval.py

+9-10
Original file line numberDiff line numberDiff line change
@@ -348,20 +348,19 @@ def test_astype(self, closed):
348348
expected = pd.Categorical(idx, ordered=True)
349349
tm.assert_categorical_equal(result, expected)
350350

351-
def test_where(self, closed):
352-
expected = self.create_index(closed=closed)
353-
result = expected.where(expected.notna())
351+
@pytest.mark.parametrize('klass', [list, tuple, np.array, pd.Series])
352+
def test_where(self, closed, klass):
353+
idx = self.create_index(closed=closed)
354+
cond = [True] * len(idx)
355+
expected = idx
356+
result = expected.where(klass(cond))
354357
tm.assert_index_equal(result, expected)
355358

356-
idx = IntervalIndex.from_breaks([1, 2], closed=closed)
357-
result = idx.where([True, False])
358-
expected = IntervalIndex.from_intervals(
359-
[Interval(1.0, 2.0, closed=closed), np.nan])
359+
cond = [False] + [True] * len(idx[1:])
360+
expected = IntervalIndex([np.nan] + idx[1:].tolist())
361+
result = idx.where(klass(cond))
360362
tm.assert_index_equal(result, expected)
361363

362-
def test_where_array_like(self):
363-
pass
364-
365364
def test_delete(self, closed):
366365
expected = IntervalIndex.from_breaks([1, 2], closed=closed)
367366
result = self.create_index(closed=closed).delete(0)

pandas/tests/indexes/test_numeric.py

+13-26
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import numpy as np
99

10-
from pandas import (date_range, notna, Series, Index, Float64Index,
10+
from pandas import (date_range, Series, Index, Float64Index,
1111
Int64Index, UInt64Index, RangeIndex)
1212

1313
import pandas.util.testing as tm
@@ -175,6 +175,18 @@ def test_modulo(self):
175175
expected = Index(index.values % 2)
176176
tm.assert_index_equal(index % 2, expected)
177177

178+
@pytest.mark.parametrize('klass', [list, tuple, np.array, Series])
179+
def test_where(self, klass):
180+
i = self.create_index()
181+
cond = [True] * len(i)
182+
expected = i
183+
result = i.where(klass(cond))
184+
185+
cond = [False] + [True] * (len(i) - 1)
186+
expected = Float64Index([i._na_value] + i[1:].tolist())
187+
result = i.where(klass(cond))
188+
tm.assert_index_equal(result, expected)
189+
178190

179191
class TestFloat64Index(Numeric):
180192
_holder = Float64Index
@@ -726,31 +738,6 @@ def test_coerce_list(self):
726738
arr = Index([1, 2, 3, 4], dtype=object)
727739
assert isinstance(arr, Index)
728740

729-
def test_where(self):
730-
i = self.create_index()
731-
result = i.where(notna(i))
732-
expected = i
733-
tm.assert_index_equal(result, expected)
734-
735-
_nan = i._na_value
736-
cond = [False] + [True] * len(i[1:])
737-
expected = pd.Index([_nan] + i[1:].tolist())
738-
739-
result = i.where(cond)
740-
tm.assert_index_equal(result, expected)
741-
742-
def test_where_array_like(self):
743-
i = self.create_index()
744-
745-
_nan = i._na_value
746-
cond = [False] + [True] * (len(i) - 1)
747-
klasses = [list, tuple, np.array, pd.Series]
748-
expected = pd.Index([_nan] + i[1:].tolist())
749-
750-
for klass in klasses:
751-
result = i.where(klass(cond))
752-
tm.assert_index_equal(result, expected)
753-
754741
def test_get_indexer(self):
755742
target = Int64Index(np.arange(10))
756743
indexer = self.index.get_indexer(target)

pandas/tests/indexes/test_range.py

+1-26
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
import numpy as np
1212

13-
from pandas import (isna, notna, Series, Index, Float64Index,
13+
from pandas import (isna, Series, Index, Float64Index,
1414
Int64Index, RangeIndex)
1515

1616
import pandas.util.testing as tm
@@ -934,31 +934,6 @@ def test_len_specialised(self):
934934
i = RangeIndex(0, 5, step)
935935
assert len(i) == 0
936936

937-
def test_where(self):
938-
i = self.create_index()
939-
result = i.where(notna(i))
940-
expected = i
941-
tm.assert_index_equal(result, expected)
942-
943-
_nan = i._na_value
944-
cond = [False] + [True] * len(i[1:])
945-
expected = pd.Index([_nan] + i[1:].tolist())
946-
947-
result = i.where(cond)
948-
tm.assert_index_equal(result, expected)
949-
950-
def test_where_array_like(self):
951-
i = self.create_index()
952-
953-
_nan = i._na_value
954-
cond = [False] + [True] * (len(i) - 1)
955-
klasses = [list, tuple, np.array, pd.Series]
956-
expected = pd.Index([_nan] + i[1:].tolist())
957-
958-
for klass in klasses:
959-
result = i.where(klass(cond))
960-
tm.assert_index_equal(result, expected)
961-
962937
def test_append(self):
963938
# GH16212
964939
RI = RangeIndex

0 commit comments

Comments
 (0)