diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 1890636bc8e1a..4e59f2d0f844a 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -329,7 +329,7 @@ Deprecations - :func:`read_excel` has deprecated the ``skip_footer`` parameter. Use ``skipfooter`` instead (:issue:`18836`) - The ``is_copy`` attribute is deprecated and will be removed in a future version (:issue:`18801`). - ``IntervalIndex.from_intervals`` is deprecated in favor of the :class:`IntervalIndex` constructor (:issue:`19263`) - +- :func:``DataFrame.from_items`` is deprecated. Use :func:``DataFrame.from_dict()`` instead, or :func:``DataFrame.from_dict(OrderedDict())`` if you wish to preserve the key order (:issue:`17320`) .. _whatsnew_0230.prior_deprecations: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 788b236b0ec59..96d28581cfdd9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -313,7 +313,7 @@ def _constructor(self): _constructor_sliced = Series _deprecations = NDFrame._deprecations | frozenset( - ['sortlevel', 'get_value', 'set_value', 'from_csv']) + ['sortlevel', 'get_value', 'set_value', 'from_csv', 'from_items']) @property def _constructor_expanddim(self): @@ -1246,6 +1246,12 @@ def to_records(self, index=True, convert_datetime64=True): @classmethod def from_items(cls, items, columns=None, orient='columns'): """ + .. deprecated:: 0.23.0 + from_items is deprecated and will be removed in a + future version. Use :meth:`DataFrame.from_dict(dict())` + instead. :meth:`DataFrame.from_dict(OrderedDict(...))` may be used + to preserve the key order. + Convert (key, value) pairs to DataFrame. The keys will be the axis index (usually the columns, but depends on the specified orientation). The values should be arrays or Series. @@ -1266,6 +1272,13 @@ def from_items(cls, items, columns=None, orient='columns'): ------- frame : DataFrame """ + + warnings.warn("from_items is deprecated. Please use " + "DataFrame.from_dict(dict()) instead. " + "DataFrame.from_dict(OrderedDict()) may be used to " + "preserve the key order.", + FutureWarning, stacklevel=2) + keys, values = lzip(*items) if orient == 'columns': diff --git a/pandas/io/stata.py b/pandas/io/stata.py index b409cf20e9a09..0922a4a9c3e9b 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -13,6 +13,7 @@ import datetime import struct import sys +from collections import OrderedDict import numpy as np from dateutil.relativedelta import relativedelta @@ -1571,7 +1572,7 @@ def read(self, nrows=None, convert_dates=None, else: data_formatted.append((col, data[col])) if requires_type_conversion: - data = DataFrame.from_items(data_formatted) + data = DataFrame.from_dict(OrderedDict(data_formatted)) del data_formatted self._do_convert_missing(data, convert_missing) @@ -1609,7 +1610,7 @@ def read(self, nrows=None, convert_dates=None, convert = True retyped_data.append((col, data[col].astype(dtype))) if convert: - data = DataFrame.from_items(retyped_data) + data = DataFrame.from_dict(OrderedDict(retyped_data)) if index_col is not None: data = data.set_index(data.pop(index_col)) @@ -1722,7 +1723,7 @@ def _do_convert_categoricals(self, data, value_label_dict, lbllist, cat_converted_data.append((col, cat_data)) else: cat_converted_data.append((col, data[col])) - data = DataFrame.from_items(cat_converted_data) + data = DataFrame.from_dict(OrderedDict(cat_converted_data)) return data def data_label(self): @@ -1997,7 +1998,7 @@ def _prepare_categoricals(self, data): data_formatted.append((col, values)) else: data_formatted.append((col, data[col])) - return DataFrame.from_items(data_formatted) + return DataFrame.from_dict(OrderedDict(data_formatted)) def _replace_nans(self, data): # return data diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index b24ae22162a34..8abd88d8a379c 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -871,7 +871,7 @@ def __len__(self, n): # GH 4297 # support Array import array - result = DataFrame.from_items([('A', array.array('i', range(10)))]) + result = DataFrame({'A': array.array('i', range(10))}) expected = DataFrame({'A': list(range(10))}) tm.assert_frame_equal(result, expected, check_dtype=False) @@ -1175,28 +1175,35 @@ def test_constructor_manager_resize(self): def test_constructor_from_items(self): items = [(c, self.frame[c]) for c in self.frame.columns] - recons = DataFrame.from_items(items) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + recons = DataFrame.from_items(items) tm.assert_frame_equal(recons, self.frame) # pass some columns - recons = DataFrame.from_items(items, columns=['C', 'B', 'A']) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + recons = DataFrame.from_items(items, columns=['C', 'B', 'A']) tm.assert_frame_equal(recons, self.frame.loc[:, ['C', 'B', 'A']]) # orient='index' row_items = [(idx, self.mixed_frame.xs(idx)) for idx in self.mixed_frame.index] - - recons = DataFrame.from_items(row_items, - columns=self.mixed_frame.columns, - orient='index') + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + recons = DataFrame.from_items(row_items, + columns=self.mixed_frame.columns, + orient='index') tm.assert_frame_equal(recons, self.mixed_frame) assert recons['A'].dtype == np.float64 with tm.assert_raises_regex(TypeError, "Must pass columns with " "orient='index'"): - DataFrame.from_items(row_items, orient='index') + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + DataFrame.from_items(row_items, orient='index') # orient='index', but thar be tuples arr = construct_1d_object_array_from_listlike( @@ -1204,15 +1211,19 @@ def test_constructor_from_items(self): self.mixed_frame['foo'] = arr row_items = [(idx, list(self.mixed_frame.xs(idx))) for idx in self.mixed_frame.index] - recons = DataFrame.from_items(row_items, - columns=self.mixed_frame.columns, - orient='index') + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + recons = DataFrame.from_items(row_items, + columns=self.mixed_frame.columns, + orient='index') tm.assert_frame_equal(recons, self.mixed_frame) assert isinstance(recons['foo'][0], tuple) - rs = DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])], - orient='index', - columns=['one', 'two', 'three']) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + rs = DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])], + orient='index', + columns=['one', 'two', 'three']) xp = DataFrame([[1, 2, 3], [4, 5, 6]], index=['A', 'B'], columns=['one', 'two', 'three']) tm.assert_frame_equal(rs, xp) @@ -1222,12 +1233,28 @@ def test_constructor_from_items_scalars(self): with tm.assert_raises_regex(ValueError, r'The value in each \(key, value\) ' 'pair must be an array, Series, or dict'): - DataFrame.from_items([('A', 1), ('B', 4)]) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + DataFrame.from_items([('A', 1), ('B', 4)]) with tm.assert_raises_regex(ValueError, r'The value in each \(key, value\) ' 'pair must be an array, Series, or dict'): - DataFrame.from_items([('A', 1), ('B', 2)], columns=['col1'], + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + DataFrame.from_items([('A', 1), ('B', 2)], columns=['col1'], + orient='index') + + def test_from_items_deprecation(self): + # GH 17320 + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])]) + + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])], + columns=['col1', 'col2', 'col3'], orient='index') def test_constructor_mix_series_nonseries(self): @@ -1256,13 +1283,13 @@ def test_constructor_column_duplicates(self): tm.assert_frame_equal(df, edf) - idf = DataFrame.from_items( - [('a', [8]), ('a', [5])], columns=['a', 'a']) + idf = DataFrame.from_records([(8, 5)], + columns=['a', 'a']) + tm.assert_frame_equal(idf, edf) - pytest.raises(ValueError, DataFrame.from_items, - [('a', [8]), ('a', [5]), ('b', [6])], - columns=['b', 'a', 'a']) + pytest.raises(ValueError, DataFrame.from_dict, + OrderedDict([('b', 8), ('a', 5), ('a', 6)])) def test_constructor_empty_with_string_dtype(self): # GH 9428 diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index f0a21cde4fbd9..36465db78361f 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -214,9 +214,10 @@ def check(result, expected=None): for index in [df.index, pd.Index(list('edcba'))]: this_df = df.copy() expected_ser = pd.Series(index.values, index=this_df.index) - expected_df = DataFrame.from_items([('A', expected_ser), - ('B', this_df['B']), - ('A', expected_ser)]) + expected_df = DataFrame({'A': expected_ser, + 'B': this_df['B'], + 'A': expected_ser}, + columns=['A', 'B', 'A']) this_df['A'] = index check(this_df, expected_df) diff --git a/pandas/tests/io/parser/common.py b/pandas/tests/io/parser/common.py index 8525cb42c2455..f677b356a77a5 100644 --- a/pandas/tests/io/parser/common.py +++ b/pandas/tests/io/parser/common.py @@ -8,6 +8,7 @@ import re import sys from datetime import datetime +from collections import OrderedDict import pytest import numpy as np @@ -924,8 +925,9 @@ def test_float_parser(self): def test_scientific_no_exponent(self): # see gh-12215 - df = DataFrame.from_items([('w', ['2e']), ('x', ['3E']), - ('y', ['42e']), ('z', ['632E'])]) + df = DataFrame.from_dict(OrderedDict([('w', ['2e']), ('x', ['3E']), + ('y', ['42e']), + ('z', ['632E'])])) data = df.to_csv(index=False) for prec in self.float_precision_choices: df_roundtrip = self.read_csv( diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index efbabcfd8fc4c..ebb8424b78ed4 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -6,6 +6,7 @@ from distutils.version import LooseVersion from functools import partial from warnings import catch_warnings +from collections import OrderedDict import numpy as np import pytest @@ -315,7 +316,7 @@ def test_excel_table(self): def test_reader_special_dtypes(self): - expected = DataFrame.from_items([ + expected = DataFrame.from_dict(OrderedDict([ ("IntCol", [1, 2, -3, 4, 0]), ("FloatCol", [1.25, 2.25, 1.83, 1.92, 0.0000000005]), ("BoolCol", [True, False, True, True, False]), @@ -325,8 +326,7 @@ def test_reader_special_dtypes(self): ("DateCol", [datetime(2013, 10, 30), datetime(2013, 10, 31), datetime(1905, 1, 1), datetime(2013, 12, 14), datetime(2015, 3, 14)]) - ]) - + ])) basename = 'test_types' # should read in correctly and infer types @@ -363,12 +363,12 @@ def test_reader_converters(self): basename = 'test_converters' - expected = DataFrame.from_items([ + expected = DataFrame.from_dict(OrderedDict([ ("IntCol", [1, 2, -3, -1000, 0]), ("FloatCol", [12.5, np.nan, 18.3, 19.2, 0.000000005]), ("BoolCol", ['Found', 'Found', 'Found', 'Not found', 'Found']), ("StrCol", ['1', np.nan, '3', '4', '5']), - ]) + ])) converters = {'IntCol': lambda x: int(x) if x != '' else -1000, 'FloatCol': lambda x: 10 * x if x else np.nan, @@ -718,32 +718,30 @@ def test_reader_seconds(self): if LooseVersion(xlrd.__VERSION__) >= LooseVersion("0.9.3"): # Xlrd >= 0.9.3 can handle Excel milliseconds. - expected = DataFrame.from_items([("Time", - [time(1, 2, 3), - time(2, 45, 56, 100000), - time(4, 29, 49, 200000), - time(6, 13, 42, 300000), - time(7, 57, 35, 400000), - time(9, 41, 28, 500000), - time(11, 25, 21, 600000), - time(13, 9, 14, 700000), - time(14, 53, 7, 800000), - time(16, 37, 0, 900000), - time(18, 20, 54)])]) + expected = DataFrame.from_dict({"Time": [time(1, 2, 3), + time(2, 45, 56, 100000), + time(4, 29, 49, 200000), + time(6, 13, 42, 300000), + time(7, 57, 35, 400000), + time(9, 41, 28, 500000), + time(11, 25, 21, 600000), + time(13, 9, 14, 700000), + time(14, 53, 7, 800000), + time(16, 37, 0, 900000), + time(18, 20, 54)]}) else: # Xlrd < 0.9.3 rounds Excel milliseconds. - expected = DataFrame.from_items([("Time", - [time(1, 2, 3), - time(2, 45, 56), - time(4, 29, 49), - time(6, 13, 42), - time(7, 57, 35), - time(9, 41, 29), - time(11, 25, 22), - time(13, 9, 15), - time(14, 53, 8), - time(16, 37, 1), - time(18, 20, 54)])]) + expected = DataFrame.from_dict({"Time": [time(1, 2, 3), + time(2, 45, 56), + time(4, 29, 49), + time(6, 13, 42), + time(7, 57, 35), + time(9, 41, 29), + time(11, 25, 22), + time(13, 9, 15), + time(14, 53, 8), + time(16, 37, 1), + time(18, 20, 54)]}) actual = self.get_exceldf('times_1900', 'Sheet1') tm.assert_frame_equal(actual, expected) @@ -1988,7 +1986,7 @@ def test_datetimes(self): datetime(2013, 1, 13, 18, 20, 52)] with ensure_clean(self.ext) as path: - write_frame = DataFrame.from_items([('A', datetimes)]) + write_frame = DataFrame({'A': datetimes}) write_frame.to_excel(path, 'Sheet1') read_frame = read_excel(path, 'Sheet1', header=0) diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index d0d7f881b37d0..89d76061329a3 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -8,6 +8,7 @@ import warnings from datetime import datetime from distutils.version import LooseVersion +from collections import OrderedDict import numpy as np import pandas as pd @@ -945,7 +946,7 @@ def test_categorical_order(self, file): cols.append((col, pd.Categorical.from_codes(codes, labels))) else: cols.append((col, pd.Series(labels, dtype=np.float32))) - expected = DataFrame.from_items(cols) + expected = DataFrame.from_dict(OrderedDict(cols)) # Read with and with out categoricals, ensure order is identical file = getattr(self, file)