Skip to content

Commit 04aada8

Browse files
committed
BUG: fix initialization of Series with dict containing NaN as key
closes pandas-dev#18480
1 parent 2a0e54b commit 04aada8

File tree

5 files changed

+49
-17
lines changed

5 files changed

+49
-17
lines changed

doc/source/whatsnew/v0.22.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -208,4 +208,5 @@ Other
208208

209209
- Improved error message when attempting to use a Python keyword as an identifier in a numexpr query (:issue:`18221`)
210210
- Fixed a bug where creating a Series from an array that contains both tz-naive and tz-aware values will result in a Series whose dtype is tz-aware instead of object (:issue:`16406`)
211+
- Fixed construction of :class:`Series` from ``dict`` containing ``NaN`` as key (:issue:`18480`)
211212
-

pandas/core/base.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -859,7 +859,6 @@ def _map_values(self, mapper, na_action=None):
859859
a MultiIndex will be returned.
860860
861861
"""
862-
863862
# we can fastpath dict/Series to an efficient map
864863
# as we know that we are not going to have to yield
865864
# python types
@@ -875,7 +874,7 @@ def _map_values(self, mapper, na_action=None):
875874
# we specify the keys here to handle the
876875
# possibility that they are tuples
877876
from pandas import Series, Index
878-
index = Index(mapper, tupleize_cols=False)
877+
index = Index(mapper, tupleize_cols=True)
879878
mapper = Series(mapper, index=index)
880879

881880
if isinstance(mapper, ABCSeries):

pandas/core/series.py

+20-12
Original file line numberDiff line numberDiff line change
@@ -198,18 +198,9 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
198198
data = data.reindex(index, copy=copy)
199199
data = data._data
200200
elif isinstance(data, dict):
201-
if index is None:
202-
if isinstance(data, OrderedDict):
203-
index = Index(data)
204-
else:
205-
index = Index(_try_sort(data))
206-
207-
try:
208-
data = index._get_values_from_dict(data)
209-
except TypeError:
210-
data = ([data.get(i, np.nan) for i in index]
211-
if data else np.nan)
212-
201+
data, index = self._init_from_dict(data, index, dtype)
202+
dtype = None
203+
copy = False
213204
elif isinstance(data, SingleBlockManager):
214205
if index is None:
215206
index = data.index
@@ -303,6 +294,23 @@ def _can_hold_na(self):
303294

304295
_index = None
305296

297+
def _init_from_dict(self, data, index, dtype):
298+
# Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')]
299+
# raises KeyError), so we iterate the entire dict, and align
300+
if data:
301+
keys, values = zip(*compat.iteritems(data))
302+
else:
303+
keys, values = [], []
304+
s = Series(values, index=keys, dtype=dtype)
305+
if index is not None and not index.identical(keys):
306+
s = s.reindex(index)
307+
elif not isinstance(data, OrderedDict):
308+
try:
309+
s = s.sort_index()
310+
except TypeError:
311+
pass
312+
return s._data, s.index
313+
306314
def _set_axis(self, axis, labels, fastpath=False):
307315
""" override generic, we want to set the _typ here """
308316

pandas/tests/series/test_combine_concat.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ def test_concat_empty_series_dtypes(self):
181181
# categorical
182182
assert pd.concat([Series(dtype='category'),
183183
Series(dtype='category')]).dtype == 'category'
184-
assert pd.concat([Series(dtype='category'),
184+
assert pd.concat([Series(np.array([]), dtype='category'),
185185
Series(dtype='float64')]).dtype == 'float64'
186186
assert pd.concat([Series(dtype='category'),
187187
Series(dtype='object')]).dtype == 'object'

pandas/tests/series/test_constructors.py

+26-2
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,32 @@ def test_constructor_dict(self):
625625
expected.iloc[1] = 1
626626
assert_series_equal(result, expected)
627627

628+
def test_constructor_dict_nan_key(self):
629+
# GH 18480
630+
d = {1: 'a', 2: 'b', np.nan: 'c'}
631+
result = Series(d).sort_index()
632+
expected = Series(['a', 'b', 'c'], index=[1, 2, np.nan])
633+
assert_series_equal(result, expected)
634+
635+
# Different NaNs:
636+
d = {1: 'a', 2: 'b', float('nan'): 'c', float('nan'): 'd'}
637+
result = Series(d).sort_values()
638+
expected = Series(['a', 'b', 'c', 'd'], index=[1, 2, np.nan, np.nan])
639+
assert_series_equal(result, expected)
640+
641+
# NaN vs. None:
642+
d = {1: 'a', 2: 'b', np.nan: 'c', None: 'd'}
643+
result = Series(d).sort_values()
644+
expected = Series(['a', 'b', 'c', 'd'], index=[1, 2, np.nan, None])
645+
assert_series_equal(result, expected)
646+
647+
# MultiIndex:
648+
d = {(1, 1): 'a', (2, 2): 'b', (3, np.nan): 'c'}
649+
result = Series(d).sort_values()
650+
expected = Series(['a', 'b', 'c'],
651+
index=Index([(1, 1), (2, 2), (3, np.nan)]))
652+
assert_series_equal(result, expected)
653+
628654
def test_constructor_dict_datetime64_index(self):
629655
# GH 9456
630656

@@ -658,8 +684,6 @@ def test_constructor_tuple_of_tuples(self):
658684
s = Series(data)
659685
assert tuple(s) == data
660686

661-
@pytest.mark.xfail(reason='GH 18480 (Series initialization from dict with '
662-
'NaN keys')
663687
def test_constructor_dict_of_tuples(self):
664688
data = {(1, 2): 3,
665689
(None, 5): 6}

0 commit comments

Comments
 (0)