BUG: fix construction of Series from dict with nested lists

toobaz · toobaz · commit 9ef97f713bc7 · 2018-04-02T11:26:36.000+02:00
diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -761,6 +761,7 @@ Datetimelike API Changes
 - ``pandas.tseries.frequencies.get_freq_group()`` and ``pandas.tseries.frequencies.DAYS`` are removed from the public API (:issue:`18034`)
 - :func:`Series.truncate` and :func:`DataFrame.truncate` will raise a ``ValueError`` if the index is not sorted instead of an unhelpful ``KeyError`` (:issue:`17935`)
 - Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`).
+- Construction of :class:`Series` from list of length 1 and index of length > 1, which used to interpret the list as a scalar, now raises a ``ValueError`` (:issue:`18626`).
 - Subtracting ``NaT`` from a :class:`Series` with ``dtype='datetime64[ns]'`` returns a ``Series`` with ``dtype='timedelta64[ns]'`` instead of ``dtype='datetime64[ns]'`` (:issue:`18808`)
 - Operations between a :class:`Series` with dtype ``dtype='datetime64[ns]'`` and a :class:`PeriodIndex` will correctly raises ``TypeError`` (:issue:`18850`)
 - Subtraction of :class:`Series` with timezone-aware ``dtype='datetime64[ns]'`` with mis-matched timezones will raise ``TypeError`` instead of ``ValueError`` (:issue:`18817`)
@@ -1135,6 +1136,7 @@ Reshaping
 ^^^^^^^^^
 
 - Bug in :func:`DataFrame.stack` which fails trying to sort mixed type levels under Python 3 (:issue:`18310`)
+- Fixed construction of a :class:`Series` from a ``dict`` containing nested lists as values (:issue:`18625`)
 - Bug in :func:`DataFrame.unstack` which casts int to float if ``columns`` is a ``MultiIndex`` with unused levels (:issue:`17845`)
 - Bug in :func:`DataFrame.unstack` which raises an error if ``index`` is a ``MultiIndex`` with unused labels on the unstacked level (:issue:`18562`)
 - Fixed construction of a :class:`Series` from a ``dict`` containing ``NaN`` as key (:issue:`18480`)
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -4048,7 +4048,8 @@ def _try_cast(arr, take_fast_path):
 
     # GH #846
     if isinstance(data, (np.ndarray, Index, Series)):
-
+        if data.ndim > 1:
+            raise ValueError('Data must be 1-dimensional')
         if dtype is not None:
             subarr = np.array(data, copy=False)
 
@@ -4085,7 +4086,9 @@ def _try_cast(arr, take_fast_path):
         return subarr
 
     elif isinstance(data, (list, tuple)) and len(data) > 0:
-        if dtype is not None:
+        if all(is_list_like(item) for item in data):
+            subarr = construct_1d_object_array_from_listlike(data)
+        elif dtype is not None:
             try:
                 subarr = _try_cast(data, False)
             except Exception:
@@ -4107,11 +4110,15 @@ def _try_cast(arr, take_fast_path):
     else:
         subarr = _try_cast(data, False)
 
-    # scalar like, GH
-    if getattr(subarr, 'ndim', 0) == 0:
-        if isinstance(data, list):  # pragma: no cover
-            subarr = np.array(data, dtype=object)
-        elif index is not None:
+    if subarr.ndim == 0 or is_scalar(data):
+        if index is None:
+            return subarr.item()
+
+        if subarr.ndim == 1:
+            # a scalar upcasted to 1-dimensional by maybe_cast_to_datetime()
+            value = subarr[0]
+            dtype = subarr.dtype
+        else:
             value = data
 
             # figure out the dtype from the value (upcast if necessary)
@@ -4121,26 +4128,7 @@ def _try_cast(arr, take_fast_path):
                 # need to possibly convert the value here
                 value = maybe_cast_to_datetime(value, dtype)
 
-            subarr = construct_1d_arraylike_from_scalar(
-                value, len(index), dtype)
-
-        else:
-            return subarr.item()
-
-    # the result that we want
-    elif subarr.ndim == 1:
-        if index is not None:
-
-            # a 1-element ndarray
-            if len(subarr) != len(index) and len(subarr) == 1:
-                subarr = construct_1d_arraylike_from_scalar(
-                    subarr[0], len(index), subarr.dtype)
-
-    elif subarr.ndim > 1:
-        if isinstance(data, np.ndarray):
-            raise Exception('Data must be 1-dimensional')
-        else:
-            subarr = com._asarray_tuplesafe(data, dtype=dtype)
+        subarr = construct_1d_arraylike_from_scalar(value, len(index), dtype)
 
     # This is to prevent mixed-type Series getting all casted to
     # NumPy string type, e.g. NaN --> '-1#IND'.
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
@@ -828,6 +828,17 @@ def test_constructor_dict_order(self):
             expected = Series([0, 1, 2], index=list('abc'))
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.parametrize('input_class', [list, tuple, iter])
+    @pytest.mark.parametrize('dtype', ['object', None])
+    def test_constructor_dict_nested_lists(self, input_class, dtype):
+        # GH 18625
+        d = {'a': input_class([input_class([1, 2, 3]),
+                               input_class([4, 5, 6])]),
+             'b': input_class([input_class([7, 8, 9])])}
+        result = Series(d, index=['a', 'b'], dtype=dtype)
+        expected = Series([d['a'], d['b']], index=['a', 'b'])
+        assert_series_equal(result, expected)
+
     @pytest.mark.parametrize("value", [2, np.nan, None, float('nan')])
     def test_constructor_dict_nan_key(self, value):
         # GH 18480