MAINT: refactor from_items() using from_dict(). Fixes pandas-dev#21850

jzwinck · jzwinck · commit d4219df688ae · 2018-07-28T13:45:30.000+08:00
This removes the deprecation warnings introduced in pandas-dev#18262, by reimplementing DataFrame.from_items() in the recommended way using DataFrame.from_dict() and collections.OrderedDict. This eliminates the maintenance burden of separate code for from_items(), while allowing existing uses to keep working. A small cleanup can be done once pandas-dev#8425 is fixed.
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1445,17 +1445,13 @@ def to_records(self, index=True, convert_datetime64=None):
     def from_items(cls, items, columns=None, orient='columns'):
         """Construct a dataframe from a list of tuples
 
-        .. deprecated:: 0.23.0
-          `from_items` is deprecated and will be removed in a future version.
-          Use :meth:`DataFrame.from_dict(dict(items)) <DataFrame.from_dict>`
-          instead.
-          :meth:`DataFrame.from_dict(OrderedDict(items)) <DataFrame.from_dict>`
-          may be used to preserve the key order.
-
         Convert (key, value) pairs to DataFrame. The keys will be the axis
         index (usually the columns, but depends on the specified
         orientation). The values should be arrays or Series.
 
+        `from_items(items)` is equivalent to
+        :meth:`DataFrame.from_dict(OrderedDict(items)) <DataFrame.from_dict>`.
+
         Parameters
         ----------
         items : sequence of (key, value) pairs
@@ -1473,57 +1469,23 @@ def from_items(cls, items, columns=None, orient='columns'):
         frame : DataFrame
         """
 
-        warnings.warn("from_items is deprecated. Please use "
-                      "DataFrame.from_dict(dict(items), ...) instead. "
-                      "DataFrame.from_dict(OrderedDict(items)) may be used to "
-                      "preserve the key order.",
-                      FutureWarning, stacklevel=2)
-
-        keys, values = lzip(*items)
+        odict = collections.OrderedDict(items)
 
         if orient == 'columns':
             if columns is not None:
-                columns = ensure_index(columns)
-
-                idict = dict(items)
-                if len(idict) < len(items):
-                    if not columns.equals(ensure_index(keys)):
-                        raise ValueError('With non-unique item names, passed '
-                                         'columns must be identical')
-                    arrays = values
-                else:
-                    arrays = [idict[k] for k in columns if k in idict]
+                return cls.from_dict(odict).reindex(columns=columns)
             else:
-                columns = ensure_index(keys)
-                arrays = values
-
-            # GH 17312
-            # Provide more informative error msg when scalar values passed
-            try:
-                return cls._from_arrays(arrays, columns, None)
-
-            except ValueError:
-                if not is_nested_list_like(values):
-                    raise ValueError('The value in each (key, value) pair '
-                                     'must be an array, Series, or dict')
+                return cls.from_dict(odict, orient)
 
         elif orient == 'index':
             if columns is None:
+                # we can produce a DataFrame even in this case,
+                # but raise for consistency with previous versions
                 raise TypeError("Must pass columns with orient='index'")
 
-            keys = ensure_index(keys)
-
-            # GH 17312
-            # Provide more informative error msg when scalar values passed
-            try:
-                arr = np.array(values, dtype=object).T
-                data = [lib.maybe_convert_objects(v) for v in arr]
-                return cls._from_arrays(data, columns, keys)
-
-            except TypeError:
-                if not is_nested_list_like(values):
-                    raise ValueError('The value in each (key, value) pair '
-                                     'must be an array, Series, or dict')
+            # reindex will not be needed once GH 8425 is fixed
+            idx = odict.keys()
+            return cls.from_dict(odict, orient, columns=columns).reindex(idx)
 
         else:  # pragma: no cover
             raise ValueError("'orient' must be either 'columns' or 'index'")
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -1283,87 +1283,56 @@ def test_constructor_manager_resize(self):
 
     def test_constructor_from_items(self):
         items = [(c, self.frame[c]) for c in self.frame.columns]
-        with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False):
-            recons = DataFrame.from_items(items)
+        recons = DataFrame.from_items(items)
         tm.assert_frame_equal(recons, self.frame)
 
         # pass some columns
-        with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False):
-            recons = DataFrame.from_items(items, columns=['C', 'B', 'A'])
+        recons = DataFrame.from_items(items, columns=['C', 'B', 'A'])
         tm.assert_frame_equal(recons, self.frame.loc[:, ['C', 'B', 'A']])
 
         # orient='index'
 
         row_items = [(idx, self.mixed_frame.xs(idx))
                      for idx in self.mixed_frame.index]
-        with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False):
-            recons = DataFrame.from_items(row_items,
-                                          columns=self.mixed_frame.columns,
-                                          orient='index')
+        recons = DataFrame.from_items(row_items,
+                                      columns=self.mixed_frame.columns,
+                                      orient='index')
         tm.assert_frame_equal(recons, self.mixed_frame)
         assert recons['A'].dtype == np.float64
 
         with tm.assert_raises_regex(TypeError,
                                     "Must pass columns with "
                                     "orient='index'"):
-            with tm.assert_produces_warning(FutureWarning,
-                                            check_stacklevel=False):
-                DataFrame.from_items(row_items, orient='index')
+            DataFrame.from_items(row_items, orient='index')
 
         # orient='index', but thar be tuples
         arr = construct_1d_object_array_from_listlike(
             [('bar', 'baz')] * len(self.mixed_frame))
         self.mixed_frame['foo'] = arr
         row_items = [(idx, list(self.mixed_frame.xs(idx)))
                      for idx in self.mixed_frame.index]
-        with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False):
-            recons = DataFrame.from_items(row_items,
-                                          columns=self.mixed_frame.columns,
-                                          orient='index')
+        recons = DataFrame.from_items(row_items,
+                                      columns=self.mixed_frame.columns,
+                                      orient='index')
         tm.assert_frame_equal(recons, self.mixed_frame)
         assert isinstance(recons['foo'][0], tuple)
 
-        with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False):
-            rs = DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])],
-                                      orient='index',
-                                      columns=['one', 'two', 'three'])
+        rs = DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])],
+                                  orient='index',
+                                  columns=['one', 'two', 'three'])
         xp = DataFrame([[1, 2, 3], [4, 5, 6]], index=['A', 'B'],
                        columns=['one', 'two', 'three'])
         tm.assert_frame_equal(rs, xp)
 
     def test_constructor_from_items_scalars(self):
         # GH 17312
         with tm.assert_raises_regex(ValueError,
-                                    r'The value in each \(key, value\) '
-                                    'pair must be an array, Series, or dict'):
-            with tm.assert_produces_warning(FutureWarning,
-                                            check_stacklevel=False):
-                DataFrame.from_items([('A', 1), ('B', 4)])
-
-        with tm.assert_raises_regex(ValueError,
-                                    r'The value in each \(key, value\) '
-                                    'pair must be an array, Series, or dict'):
-            with tm.assert_produces_warning(FutureWarning,
-                                            check_stacklevel=False):
-                DataFrame.from_items([('A', 1), ('B', 2)], columns=['col1'],
-                                     orient='index')
-
-    def test_from_items_deprecation(self):
-        # GH 17320
-        with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False):
-            DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])])
+                                    r'If using all scalar values, '
+                                    'you must pass an index'):
+            DataFrame.from_items([('A', 1), ('B', 4)])
 
-        with tm.assert_produces_warning(FutureWarning,
-                                        check_stacklevel=False):
-            DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])],
-                                 columns=['col1', 'col2', 'col3'],
-                                 orient='index')
+        DataFrame.from_items([('A', 1), ('B', 2)], columns=['col1'],
+                             orient='index')
 
     def test_constructor_mix_series_nonseries(self):
         df = DataFrame({'A': self.frame['A'],