Skip to content

Commit 9ef97f7

Browse files
committed
BUG: fix construction of Series from dict with nested lists
1 parent 4efb39f commit 9ef97f7

File tree

3 files changed

+28
-27
lines changed

3 files changed

+28
-27
lines changed

doc/source/whatsnew/v0.23.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -761,6 +761,7 @@ Datetimelike API Changes
761761
- ``pandas.tseries.frequencies.get_freq_group()`` and ``pandas.tseries.frequencies.DAYS`` are removed from the public API (:issue:`18034`)
762762
- :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`)
763763
- Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`).
764+
- Construction of :class:`Series` from list of length 1 and index of length > 1, which used to interpret the list as a scalar, now raises a ``ValueError`` (:issue:`18626`).
764765
- Subtracting ``NaT`` from a :class:`Series` with ``dtype='datetime64[ns]'`` returns a ``Series`` with ``dtype='timedelta64[ns]'`` instead of ``dtype='datetime64[ns]'`` (:issue:`18808`)
765766
- Operations between a :class:`Series` with dtype ``dtype='datetime64[ns]'`` and a :class:`PeriodIndex` will correctly raises ``TypeError`` (:issue:`18850`)
766767
- Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (:issue:`18817`)
@@ -1135,6 +1136,7 @@ Reshaping
11351136
^^^^^^^^^
11361137

11371138
- Bug in :func:`DataFrame.stack` which fails trying to sort mixed type levels under Python 3 (:issue:`18310`)
1139+
- Fixed construction of a :class:`Series` from a ``dict`` containing nested lists as values (:issue:`18625`)
11381140
- Bug in :func:`DataFrame.unstack` which casts int to float if ``columns`` is a ``MultiIndex`` with unused levels (:issue:`17845`)
11391141
- Bug in :func:`DataFrame.unstack` which raises an error if ``index`` is a ``MultiIndex`` with unused labels on the unstacked level (:issue:`18562`)
11401142
- Fixed construction of a :class:`Series` from a ``dict`` containing ``NaN`` as key (:issue:`18480`)

pandas/core/series.py

+15-27
Original file line numberDiff line numberDiff line change
@@ -4048,7 +4048,8 @@ def _try_cast(arr, take_fast_path):
40484048

40494049
# GH #846
40504050
if isinstance(data, (np.ndarray, Index, Series)):
4051-
4051+
if data.ndim > 1:
4052+
raise ValueError('Data must be 1-dimensional')
40524053
if dtype is not None:
40534054
subarr = np.array(data, copy=False)
40544055

@@ -4085,7 +4086,9 @@ def _try_cast(arr, take_fast_path):
40854086
return subarr
40864087

40874088
elif isinstance(data, (list, tuple)) and len(data) > 0:
4088-
if dtype is not None:
4089+
if all(is_list_like(item) for item in data):
4090+
subarr = construct_1d_object_array_from_listlike(data)
4091+
elif dtype is not None:
40894092
try:
40904093
subarr = _try_cast(data, False)
40914094
except Exception:
@@ -4107,11 +4110,15 @@ def _try_cast(arr, take_fast_path):
41074110
else:
41084111
subarr = _try_cast(data, False)
41094112

4110-
# scalar like, GH
4111-
if getattr(subarr, 'ndim', 0) == 0:
4112-
if isinstance(data, list): # pragma: no cover
4113-
subarr = np.array(data, dtype=object)
4114-
elif index is not None:
4113+
if subarr.ndim == 0 or is_scalar(data):
4114+
if index is None:
4115+
return subarr.item()
4116+
4117+
if subarr.ndim == 1:
4118+
# a scalar upcasted to 1-dimensional by maybe_cast_to_datetime()
4119+
value = subarr[0]
4120+
dtype = subarr.dtype
4121+
else:
41154122
value = data
41164123

41174124
# figure out the dtype from the value (upcast if necessary)
@@ -4121,26 +4128,7 @@ def _try_cast(arr, take_fast_path):
41214128
# need to possibly convert the value here
41224129
value = maybe_cast_to_datetime(value, dtype)
41234130

4124-
subarr = construct_1d_arraylike_from_scalar(
4125-
value, len(index), dtype)
4126-
4127-
else:
4128-
return subarr.item()
4129-
4130-
# the result that we want
4131-
elif subarr.ndim == 1:
4132-
if index is not None:
4133-
4134-
# a 1-element ndarray
4135-
if len(subarr) != len(index) and len(subarr) == 1:
4136-
subarr = construct_1d_arraylike_from_scalar(
4137-
subarr[0], len(index), subarr.dtype)
4138-
4139-
elif subarr.ndim > 1:
4140-
if isinstance(data, np.ndarray):
4141-
raise Exception('Data must be 1-dimensional')
4142-
else:
4143-
subarr = com._asarray_tuplesafe(data, dtype=dtype)
4131+
subarr = construct_1d_arraylike_from_scalar(value, len(index), dtype)
41444132

41454133
# This is to prevent mixed-type Series getting all casted to
41464134
# NumPy string type, e.g. NaN --> '-1#IND'.

pandas/tests/series/test_constructors.py

+11
Original file line numberDiff line numberDiff line change
@@ -828,6 +828,17 @@ def test_constructor_dict_order(self):
828828
expected = Series([0, 1, 2], index=list('abc'))
829829
tm.assert_series_equal(result, expected)
830830

831+
@pytest.mark.parametrize('input_class', [list, tuple, iter])
832+
@pytest.mark.parametrize('dtype', ['object', None])
833+
def test_constructor_dict_nested_lists(self, input_class, dtype):
834+
# GH 18625
835+
d = {'a': input_class([input_class([1, 2, 3]),
836+
input_class([4, 5, 6])]),
837+
'b': input_class([input_class([7, 8, 9])])}
838+
result = Series(d, index=['a', 'b'], dtype=dtype)
839+
expected = Series([d['a'], d['b']], index=['a', 'b'])
840+
assert_series_equal(result, expected)
841+
831842
@pytest.mark.parametrize("value", [2, np.nan, None, float('nan')])
832843
def test_constructor_dict_nan_key(self, value):
833844
# GH 18480

0 commit comments

Comments
 (0)