Skip to content

MAINT: refactor from_items() using from_dict() #22094

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,7 @@ Removal of prior version deprecations/changes

- The ``LongPanel`` and ``WidePanel`` classes have been removed (:issue:`10892`)
- Several private functions were removed from the (non-public) module ``pandas.core.common`` (:issue:`22001`)
- :meth: `~pandas.DataFrame.from_items` has been refactored and the deprecation warning from v0.23.0 has been removed (i.e. it is no longer deprecated)
-
-

Expand Down
61 changes: 11 additions & 50 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@
ensure_int64,
ensure_platform_int,
is_list_like,
is_nested_list_like,
is_iterator,
is_sequence,
is_named_tuple)
Expand Down Expand Up @@ -1445,17 +1444,13 @@ def to_records(self, index=True, convert_datetime64=None):
def from_items(cls, items, columns=None, orient='columns'):
"""Construct a dataframe from a list of tuples

.. deprecated:: 0.23.0
`from_items` is deprecated and will be removed in a future version.
Use :meth:`DataFrame.from_dict(dict(items)) <DataFrame.from_dict>`
instead.
:meth:`DataFrame.from_dict(OrderedDict(items)) <DataFrame.from_dict>`
may be used to preserve the key order.

Convert (key, value) pairs to DataFrame. The keys will be the axis
index (usually the columns, but depends on the specified
orientation). The values should be arrays or Series.

`from_items(items)` is equivalent to
:meth:`DataFrame.from_dict(OrderedDict(items)) <DataFrame.from_dict>`.

Parameters
----------
items : sequence of (key, value) pairs
Expand All @@ -1473,57 +1468,23 @@ def from_items(cls, items, columns=None, orient='columns'):
frame : DataFrame
"""

warnings.warn("from_items is deprecated. Please use "
"DataFrame.from_dict(dict(items), ...) instead. "
"DataFrame.from_dict(OrderedDict(items)) may be used to "
"preserve the key order.",
FutureWarning, stacklevel=2)

keys, values = lzip(*items)
odict = collections.OrderedDict(items)

if orient == 'columns':
if columns is not None:
columns = ensure_index(columns)

idict = dict(items)
if len(idict) < len(items):
if not columns.equals(ensure_index(keys)):
raise ValueError('With non-unique item names, passed '
'columns must be identical')
arrays = values
else:
arrays = [idict[k] for k in columns if k in idict]
return cls.from_dict(odict).reindex(columns=columns)
else:
columns = ensure_index(keys)
arrays = values

# GH 17312
# Provide more informative error msg when scalar values passed
try:
return cls._from_arrays(arrays, columns, None)

except ValueError:
if not is_nested_list_like(values):
raise ValueError('The value in each (key, value) pair '
'must be an array, Series, or dict')
return cls.from_dict(odict, orient)

elif orient == 'index':
if columns is None:
# we can produce a DataFrame even in this case,
# but raise for consistency with previous versions
raise TypeError("Must pass columns with orient='index'")

keys = ensure_index(keys)

# GH 17312
# Provide more informative error msg when scalar values passed
try:
arr = np.array(values, dtype=object).T
data = [lib.maybe_convert_objects(v) for v in arr]
return cls._from_arrays(data, columns, keys)

except TypeError:
if not is_nested_list_like(values):
raise ValueError('The value in each (key, value) pair '
'must be an array, Series, or dict')
# reindex will not be needed once GH 8425 is fixed
idx = odict.keys()
return cls.from_dict(odict, orient, columns=columns).reindex(idx)

else: # pragma: no cover
raise ValueError("'orient' must be either 'columns' or 'index'")
Expand Down
65 changes: 17 additions & 48 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1283,87 +1283,56 @@ def test_constructor_manager_resize(self):

def test_constructor_from_items(self):
items = [(c, self.frame[c]) for c in self.frame.columns]
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
recons = DataFrame.from_items(items)
recons = DataFrame.from_items(items)
tm.assert_frame_equal(recons, self.frame)

# pass some columns
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
recons = DataFrame.from_items(items, columns=['C', 'B', 'A'])
recons = DataFrame.from_items(items, columns=['C', 'B', 'A'])
tm.assert_frame_equal(recons, self.frame.loc[:, ['C', 'B', 'A']])

# orient='index'

row_items = [(idx, self.mixed_frame.xs(idx))
for idx in self.mixed_frame.index]
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
recons = DataFrame.from_items(row_items,
columns=self.mixed_frame.columns,
orient='index')
recons = DataFrame.from_items(row_items,
columns=self.mixed_frame.columns,
orient='index')
tm.assert_frame_equal(recons, self.mixed_frame)
assert recons['A'].dtype == np.float64

with tm.assert_raises_regex(TypeError,
"Must pass columns with "
"orient='index'"):
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
DataFrame.from_items(row_items, orient='index')
DataFrame.from_items(row_items, orient='index')

# orient='index', but thar be tuples
arr = construct_1d_object_array_from_listlike(
[('bar', 'baz')] * len(self.mixed_frame))
self.mixed_frame['foo'] = arr
row_items = [(idx, list(self.mixed_frame.xs(idx)))
for idx in self.mixed_frame.index]
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
recons = DataFrame.from_items(row_items,
columns=self.mixed_frame.columns,
orient='index')
recons = DataFrame.from_items(row_items,
columns=self.mixed_frame.columns,
orient='index')
tm.assert_frame_equal(recons, self.mixed_frame)
assert isinstance(recons['foo'][0], tuple)

with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
rs = DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])],
orient='index',
columns=['one', 'two', 'three'])
rs = DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])],
orient='index',
columns=['one', 'two', 'three'])
xp = DataFrame([[1, 2, 3], [4, 5, 6]], index=['A', 'B'],
columns=['one', 'two', 'three'])
tm.assert_frame_equal(rs, xp)

def test_constructor_from_items_scalars(self):
# GH 17312
with tm.assert_raises_regex(ValueError,
r'The value in each \(key, value\) '
'pair must be an array, Series, or dict'):
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
DataFrame.from_items([('A', 1), ('B', 4)])

with tm.assert_raises_regex(ValueError,
r'The value in each \(key, value\) '
'pair must be an array, Series, or dict'):
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
DataFrame.from_items([('A', 1), ('B', 2)], columns=['col1'],
orient='index')

def test_from_items_deprecation(self):
# GH 17320
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])])
r'If using all scalar values, '
'you must pass an index'):
DataFrame.from_items([('A', 1), ('B', 4)])

with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])],
columns=['col1', 'col2', 'col3'],
orient='index')
DataFrame.from_items([('A', 1), ('B', 2)], columns=['col1'],
orient='index')

def test_constructor_mix_series_nonseries(self):
df = DataFrame({'A': self.frame['A'],
Expand Down