Skip to content

Commit 4952a9b

Browse files
committed
BUG: fix construction of Series from dict with nested lists
closes #18626
1 parent 6e56195 commit 4952a9b

File tree

3 files changed

+31
-28
lines changed

3 files changed

+31
-28
lines changed

doc/source/whatsnew/v0.22.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ Other API Changes
116116
- :func:`DataFrame.from_items` provides a more informative error message when passed scalar values (:issue:`17312`)
117117
- When created with duplicate labels, ``MultiIndex`` now raises a ``ValueError``. (:issue:`17464`)
118118
- Building from source now explicity requires ``setuptools`` in ``setup.py`` (:issue:`18113`)
119+
- Construction of :class:`Series` from list of length 1 and index of length > 1, which used to interpret the list as a scalar, now raises a ``ValueError``.
119120

120121
.. _whatsnew_0220.deprecations:
121122

@@ -246,6 +247,7 @@ Other
246247

247248
- Improved error message when attempting to use a Python keyword as an identifier in a numexpr query (:issue:`18221`)
248249
- Fixed a bug where creating a Series from an array that contains both tz-naive and tz-aware values will result in a Series whose dtype is tz-aware instead of object (:issue:`16406`)
250+
- Fixed construction of a :class:`Series` from a ``dict`` containing nested lists as values (:issue:`18625`)
249251
- Fixed construction of a :class:`Series` from a ``dict`` containing ``NaN`` as key (:issue:`18480`)
250252
- Adding a ``Period`` object to a ``datetime`` or ``Timestamp`` object will now correctly raise a ``TypeError`` (:issue:`17983`)
251253
-

pandas/core/series.py

+18-28
Original file line numberDiff line numberDiff line change
@@ -3177,7 +3177,8 @@ def _try_cast(arr, take_fast_path):
31773177

31783178
# GH #846
31793179
if isinstance(data, (np.ndarray, Index, Series)):
3180-
3180+
if data.ndim > 1:
3181+
raise ValueError('Data must be 1-dimensional')
31813182
if dtype is not None:
31823183
subarr = np.array(data, copy=False)
31833184

@@ -3208,7 +3209,11 @@ def _try_cast(arr, take_fast_path):
32083209
return subarr
32093210

32103211
elif isinstance(data, (list, tuple)) and len(data) > 0:
3211-
if dtype is not None:
3212+
if all(is_list_like(item) for item in data):
3213+
# Ensure nested lists are not interpreted as further dimensions:
3214+
subarr = np.empty(len(data), dtype='object')
3215+
subarr[:] = data
3216+
elif dtype is not None:
32123217
try:
32133218
subarr = _try_cast(data, False)
32143219
except Exception:
@@ -3230,40 +3235,25 @@ def _try_cast(arr, take_fast_path):
32303235
else:
32313236
subarr = _try_cast(data, False)
32323237

3233-
# scalar like, GH
3234-
if getattr(subarr, 'ndim', 0) == 0:
3235-
if isinstance(data, list): # pragma: no cover
3236-
subarr = np.array(data, dtype=object)
3237-
elif index is not None:
3238-
value = data
3238+
if subarr.ndim == 0 or is_scalar(data):
3239+
if index is None:
3240+
return subarr.item()
32393241

3242+
if subarr.ndim == 1:
3243+
# a scalar upcasted to 1-dimensional by maybe_cast_to_datetime()
3244+
value = subarr[0]
3245+
dtype = subarr.dtype
3246+
else:
3247+
value = data
32403248
# figure out the dtype from the value (upcast if necessary)
32413249
if dtype is None:
32423250
dtype, value = infer_dtype_from_scalar(value)
32433251
else:
32443252
# need to possibly convert the value here
32453253
value = maybe_cast_to_datetime(value, dtype)
32463254

3247-
subarr = construct_1d_arraylike_from_scalar(
3248-
value, len(index), dtype)
3249-
3250-
else:
3251-
return subarr.item()
3252-
3253-
# the result that we want
3254-
elif subarr.ndim == 1:
3255-
if index is not None:
3256-
3257-
# a 1-element ndarray
3258-
if len(subarr) != len(index) and len(subarr) == 1:
3259-
subarr = construct_1d_arraylike_from_scalar(
3260-
subarr[0], len(index), subarr.dtype)
3261-
3262-
elif subarr.ndim > 1:
3263-
if isinstance(data, np.ndarray):
3264-
raise Exception('Data must be 1-dimensional')
3265-
else:
3266-
subarr = _asarray_tuplesafe(data, dtype=dtype)
3255+
subarr = construct_1d_arraylike_from_scalar(
3256+
value, len(index), dtype)
32673257

32683258
# This is to prevent mixed-type Series getting all casted to
32693259
# NumPy string type, e.g. NaN --> '-1#IND'.

pandas/tests/series/test_constructors.py

+11
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,17 @@ def test_constructor_dict(self):
651651
expected.iloc[1] = 1
652652
assert_series_equal(result, expected)
653653

654+
@pytest.mark.parametrize('input_class', [list, tuple])
655+
@pytest.mark.parametrize('dtype', ['object', None])
656+
def test_constructor_dict_nested_lists(self, input_class, dtype):
657+
# GH 18625
658+
d = {'a': input_class([input_class([1, 2, 3]),
659+
input_class([4, 5, 6])]),
660+
'b': input_class([input_class([7, 8, 9])])}
661+
result = Series(d, index=['a', 'b'], dtype=dtype)
662+
expected = Series([d['a'], d['b']], index=['a', 'b'])
663+
assert_series_equal(result, expected)
664+
654665
@pytest.mark.parametrize("value", [2, np.nan, None, float('nan')])
655666
def test_constructor_dict_nan_key(self, value):
656667
# GH 18480

0 commit comments

Comments
 (0)