Skip to content

Commit 40d26e4

Browse files
committed
BUG: fix construction of Series from dict with nested lists
closes pandas-dev#18626
1 parent 52838e6 commit 40d26e4

File tree

3 files changed

+31
-28
lines changed

3 files changed

+31
-28
lines changed

doc/source/whatsnew/v0.22.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ Other API Changes
121121
- Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`)
122122
- Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`).
123123
- :func:`DataFrame.from_items` provides a more informative error message when passed scalar values (:issue:`17312`)
124+
- Construction of :class:`Series` from list of length 1 and index of length > 1, which used to interpret the list as a scalar, now raises a ``ValueError``.
124125
- When created with duplicate labels, ``MultiIndex`` now raises a ``ValueError``. (:issue:`17464`)
125126
- Building from source now explicity requires ``setuptools`` in ``setup.py`` (:issue:`18113`)
126127
- :func:`Series.fillna` now raises a ``TypeError`` instead of a ``ValueError`` when passed a list, tuple or DataFrame as a ``value`` (:issue:`18293`)
@@ -259,6 +260,7 @@ Other
259260

260261
- Improved error message when attempting to use a Python keyword as an identifier in a numexpr query (:issue:`18221`)
261262
- Fixed a bug where creating a Series from an array that contains both tz-naive and tz-aware values will result in a Series whose dtype is tz-aware instead of object (:issue:`16406`)
263+
- Fixed construction of a :class:`Series` from a ``dict`` containing nested lists as values (:issue:`18625`)
262264
- Fixed construction of a :class:`Series` from a ``dict`` containing ``NaN`` as key (:issue:`18480`)
263265
- Adding a ``Period`` object to a ``datetime`` or ``Timestamp`` object will now correctly raise a ``TypeError`` (:issue:`17983`)
264266
-

pandas/core/series.py

+18-28
Original file line numberDiff line numberDiff line change
@@ -3181,7 +3181,8 @@ def _try_cast(arr, take_fast_path):
31813181

31823182
# GH #846
31833183
if isinstance(data, (np.ndarray, Index, Series)):
3184-
3184+
if data.ndim > 1:
3185+
raise ValueError('Data must be 1-dimensional')
31853186
if dtype is not None:
31863187
subarr = np.array(data, copy=False)
31873188

@@ -3212,7 +3213,11 @@ def _try_cast(arr, take_fast_path):
32123213
return subarr
32133214

32143215
elif isinstance(data, (list, tuple)) and len(data) > 0:
3215-
if dtype is not None:
3216+
if all(is_list_like(item) for item in data):
3217+
# Ensure nested lists are not interpreted as further dimensions:
3218+
subarr = np.empty(len(data), dtype='object')
3219+
subarr[:] = data
3220+
elif dtype is not None:
32163221
try:
32173222
subarr = _try_cast(data, False)
32183223
except Exception:
@@ -3234,40 +3239,25 @@ def _try_cast(arr, take_fast_path):
32343239
else:
32353240
subarr = _try_cast(data, False)
32363241

3237-
# scalar like, GH
3238-
if getattr(subarr, 'ndim', 0) == 0:
3239-
if isinstance(data, list): # pragma: no cover
3240-
subarr = np.array(data, dtype=object)
3241-
elif index is not None:
3242-
value = data
3242+
if subarr.ndim == 0 or is_scalar(data):
3243+
if index is None:
3244+
return subarr.item()
32433245

3246+
if subarr.ndim == 1:
3247+
# a scalar upcasted to 1-dimensional by maybe_cast_to_datetime()
3248+
value = subarr[0]
3249+
dtype = subarr.dtype
3250+
else:
3251+
value = data
32443252
# figure out the dtype from the value (upcast if necessary)
32453253
if dtype is None:
32463254
dtype, value = infer_dtype_from_scalar(value)
32473255
else:
32483256
# need to possibly convert the value here
32493257
value = maybe_cast_to_datetime(value, dtype)
32503258

3251-
subarr = construct_1d_arraylike_from_scalar(
3252-
value, len(index), dtype)
3253-
3254-
else:
3255-
return subarr.item()
3256-
3257-
# the result that we want
3258-
elif subarr.ndim == 1:
3259-
if index is not None:
3260-
3261-
# a 1-element ndarray
3262-
if len(subarr) != len(index) and len(subarr) == 1:
3263-
subarr = construct_1d_arraylike_from_scalar(
3264-
subarr[0], len(index), subarr.dtype)
3265-
3266-
elif subarr.ndim > 1:
3267-
if isinstance(data, np.ndarray):
3268-
raise Exception('Data must be 1-dimensional')
3269-
else:
3270-
subarr = _asarray_tuplesafe(data, dtype=dtype)
3259+
subarr = construct_1d_arraylike_from_scalar(
3260+
value, len(index), dtype)
32713261

32723262
# This is to prevent mixed-type Series getting all casted to
32733263
# NumPy string type, e.g. NaN --> '-1#IND'.

pandas/tests/series/test_constructors.py

+11
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,17 @@ def test_constructor_dict(self):
651651
expected.iloc[1] = 1
652652
assert_series_equal(result, expected)
653653

654+
@pytest.mark.parametrize('input_class', [list, tuple, iter])
655+
@pytest.mark.parametrize('dtype', ['object', None])
656+
def test_constructor_dict_nested_lists(self, input_class, dtype):
657+
# GH 18625
658+
d = {'a': input_class([input_class([1, 2, 3]),
659+
input_class([4, 5, 6])]),
660+
'b': input_class([input_class([7, 8, 9])])}
661+
result = Series(d, index=['a', 'b'], dtype=dtype)
662+
expected = Series([d['a'], d['b']], index=['a', 'b'])
663+
assert_series_equal(result, expected)
664+
654665
@pytest.mark.parametrize("value", [2, np.nan, None, float('nan')])
655666
def test_constructor_dict_nan_key(self, value):
656667
# GH 18480

0 commit comments

Comments
 (0)