Skip to content

Commit 04fb7a2

Browse files
committed
BUG: fix initialization of Series with dict containing NaN key
closes pandas-dev#18480
1 parent b45325e commit 04fb7a2

File tree

3 files changed

+44
-25
lines changed

3 files changed

+44
-25
lines changed

doc/source/whatsnew/v0.22.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -196,4 +196,5 @@ Other
196196

197197
- Improved error message when attempting to use a Python keyword as an identifier in a numexpr query (:issue:`18221`)
198198
- Fixed a bug where creating a Series from an array that contains both tz-naive and tz-aware values will result in a Series whose dtype is tz-aware instead of object (:issue:`16406`)
199+
- Fixed initialization of Series from dict containing NaN as key (:issue:`18480`)
199200
-

pandas/core/series.py

+31-25
Original file line numberDiff line numberDiff line change
@@ -198,32 +198,39 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
198198
data = data.reindex(index, copy=copy)
199199
data = data._data
200200
elif isinstance(data, dict):
201-
if index is None:
202-
if isinstance(data, OrderedDict):
203-
index = Index(data)
204-
else:
205-
index = Index(_try_sort(data))
206-
try:
207-
if isinstance(index, DatetimeIndex):
208-
if len(data):
209-
# coerce back to datetime objects for lookup
210-
data = _dict_compat(data)
211-
data = lib.fast_multiget(data,
212-
index.asobject.values,
213-
default=np.nan)
201+
if data:
202+
keys, values = zip(*compat.iteritems(data))
203+
keys = Index(list(keys), tupleize_cols=True)
204+
values = np.asarray(values, dtype='object')
205+
try:
206+
values = lib.maybe_convert_objects(values)
207+
except:
208+
pass
209+
if index is None:
210+
if isinstance(data, OrderedDict):
211+
order = np.arange(len(keys))
214212
else:
215-
data = np.nan
216-
# GH #12169
217-
elif isinstance(index, (PeriodIndex, TimedeltaIndex)):
218-
data = ([data.get(i, np.nan) for i in index]
219-
if data else np.nan)
213+
try:
214+
order = keys.argsort()
215+
except TypeError:
216+
order = np.arange(len(keys))
217+
index = keys[order]
220218
else:
221-
data = lib.fast_multiget(data, index.values,
222-
default=np.nan)
223-
except TypeError:
224-
data = ([data.get(i, np.nan) for i in index]
225-
if data else np.nan)
226-
219+
locs = index.get_indexer(keys)
220+
order = - np.ones(len(index), dtype=int)
221+
order[locs] = np.arange(len(keys))
222+
data = values[order]
223+
nan_idxs = np.where(order == -1)[0]
224+
if len(nan_idxs):
225+
if is_integer_dtype(data):
226+
data = data.astype(float)
227+
data[nan_idxs] = np.nan
228+
else:
229+
if index is None:
230+
index = Index([])
231+
data = np.array([np.nan] * len(index))
232+
if any([is_list_like(item) for item in data]):
233+
data = list(data)
227234
elif isinstance(data, SingleBlockManager):
228235
if index is None:
229236
index = data.index
@@ -263,7 +270,6 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
263270
else:
264271
data = _sanitize_array(data, index, dtype, copy,
265272
raise_cast_failure=True)
266-
267273
data = SingleBlockManager(data, index, fastpath=True)
268274

269275
generic.NDFrame.__init__(self, data, fastpath=True)

pandas/tests/series/test_constructors.py

+12
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,18 @@ def test_constructor_dict(self):
625625
expected.iloc[1] = 1
626626
assert_series_equal(result, expected)
627627

628+
# GH 18480 - NaN key
629+
d = {1: 'a', 2: 'b', np.nan: 'c'}
630+
result = Series(d).sort_index()
631+
expected = Series(['a', 'b', 'c'], index=[1, 2, np.nan])
632+
assert_series_equal(result, expected)
633+
634+
# Different NaNs:
635+
d = {1: 'a', 2: 'b', float('nan'): 'c', float('nan'): 'd'}
636+
result = Series(d).sort_values()
637+
expected = Series(['a', 'b', 'c', 'd'], index=[1, 2, np.nan, np.nan])
638+
assert_series_equal(result, expected)
639+
628640
def test_constructor_dict_datetime64_index(self):
629641
# GH 9456
630642

0 commit comments

Comments
 (0)