BUG: fix construction of Series from dict with nested lists

toobaz · toobaz · commit 40d26e4e1eb2 · 2017-12-06T10:28:12.000+01:00
closes pandas-dev#18626
diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt
@@ -121,6 +121,7 @@ Other API Changes
 - Inserting missing values into indexes will work for all types of indexes and automatically insert the correct type of missing value (``NaN``, ``NaT``, etc.) regardless of the type passed in (:issue:`18295`)
 - Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`).
 - :func:`DataFrame.from_items` provides a more informative error message when passed scalar values (:issue:`17312`)
+- Construction of :class:`Series` from list of length 1 and index of length > 1, which used to interpret the list as a scalar, now raises a ``ValueError``.
 - When created with duplicate labels, ``MultiIndex`` now raises a ``ValueError``. (:issue:`17464`)
 - Building from source now explicity requires ``setuptools`` in ``setup.py`` (:issue:`18113`)
 - :func:`Series.fillna` now raises a ``TypeError`` instead of a ``ValueError`` when passed a list, tuple or DataFrame as a ``value`` (:issue:`18293`)
@@ -259,6 +260,7 @@ Other
 
 - Improved error message when attempting to use a Python keyword as an identifier in a numexpr query (:issue:`18221`)
 - Fixed a bug where creating a Series from an array that contains both tz-naive and tz-aware values will result in a Series whose dtype is tz-aware instead of object (:issue:`16406`)
+- Fixed construction of a :class:`Series` from a ``dict`` containing nested lists as values (:issue:`18625`)
 - Fixed construction of a :class:`Series` from a ``dict`` containing ``NaN`` as key (:issue:`18480`)
 - Adding a ``Period`` object to a ``datetime`` or ``Timestamp`` object will now correctly raise a ``TypeError`` (:issue:`17983`)
 -
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -3181,7 +3181,8 @@ def _try_cast(arr, take_fast_path):
 
     # GH #846
     if isinstance(data, (np.ndarray, Index, Series)):
-
+        if data.ndim > 1:
+            raise ValueError('Data must be 1-dimensional')
         if dtype is not None:
             subarr = np.array(data, copy=False)
 
@@ -3212,7 +3213,11 @@ def _try_cast(arr, take_fast_path):
         return subarr
 
     elif isinstance(data, (list, tuple)) and len(data) > 0:
-        if dtype is not None:
+        if all(is_list_like(item) for item in data):
+            # Ensure nested lists are not interpreted as further dimensions:
+            subarr = np.empty(len(data), dtype='object')
+            subarr[:] = data
+        elif dtype is not None:
             try:
                 subarr = _try_cast(data, False)
             except Exception:
@@ -3234,40 +3239,25 @@ def _try_cast(arr, take_fast_path):
     else:
         subarr = _try_cast(data, False)
 
-    # scalar like, GH
-    if getattr(subarr, 'ndim', 0) == 0:
-        if isinstance(data, list):  # pragma: no cover
-            subarr = np.array(data, dtype=object)
-        elif index is not None:
-            value = data
+    if subarr.ndim == 0 or is_scalar(data):
+        if index is None:
+            return subarr.item()
 
+        if subarr.ndim == 1:
+            # a scalar upcasted to 1-dimensional by maybe_cast_to_datetime()
+            value = subarr[0]
+            dtype = subarr.dtype
+        else:
+            value = data
             # figure out the dtype from the value (upcast if necessary)
             if dtype is None:
                 dtype, value = infer_dtype_from_scalar(value)
             else:
                 # need to possibly convert the value here
                 value = maybe_cast_to_datetime(value, dtype)
 
-            subarr = construct_1d_arraylike_from_scalar(
-                value, len(index), dtype)
-
-        else:
-            return subarr.item()
-
-    # the result that we want
-    elif subarr.ndim == 1:
-        if index is not None:
-
-            # a 1-element ndarray
-            if len(subarr) != len(index) and len(subarr) == 1:
-                subarr = construct_1d_arraylike_from_scalar(
-                    subarr[0], len(index), subarr.dtype)
-
-    elif subarr.ndim > 1:
-        if isinstance(data, np.ndarray):
-            raise Exception('Data must be 1-dimensional')
-        else:
-            subarr = _asarray_tuplesafe(data, dtype=dtype)
+        subarr = construct_1d_arraylike_from_scalar(
+            value, len(index), dtype)
 
     # This is to prevent mixed-type Series getting all casted to
     # NumPy string type, e.g. NaN --> '-1#IND'.
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
@@ -651,6 +651,17 @@ def test_constructor_dict(self):
         expected.iloc[1] = 1
         assert_series_equal(result, expected)
 
+    @pytest.mark.parametrize('input_class', [list, tuple, iter])
+    @pytest.mark.parametrize('dtype', ['object', None])
+    def test_constructor_dict_nested_lists(self, input_class, dtype):
+        # GH 18625
+        d = {'a': input_class([input_class([1, 2, 3]),
+                               input_class([4, 5, 6])]),
+             'b': input_class([input_class([7, 8, 9])])}
+        result = Series(d, index=['a', 'b'], dtype=dtype)
+        expected = Series([d['a'], d['b']], index=['a', 'b'])
+        assert_series_equal(result, expected)
+
     @pytest.mark.parametrize("value", [2, np.nan, None, float('nan')])
     def test_constructor_dict_nan_key(self, value):
         # GH 18480