From db65af97012dfeff9af53d1b2ac48e26f896bf23 Mon Sep 17 00:00:00 2001 From: reidy-p Date: Tue, 20 Feb 2018 21:48:12 +0000 Subject: [PATCH 1/2] ENH: Add columns parameter to from_dict --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/frame.py | 15 ++++++++++++--- pandas/tests/frame/test_constructors.py | 18 ++++++++++++++++++ 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index a4b943f995a33..03bf0426f9184 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -589,6 +589,7 @@ Other API Changes - :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`) - Set operations (union, difference...) on :class:`IntervalIndex` with incompatible index types will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`19329`) - :class:`DateOffset` objects render more simply, e.g. ```` instead of ```` (:issue:`19403`) +- :func:`DataFrame.from_dict` now accepts a ``columns`` argument that can be used to specify the column names when ``orient='index'`` is used (:issue:`18529`) .. _whatsnew_0230.deprecations: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d81d22173bfbd..d006fd755cd35 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -876,7 +876,7 @@ def dot(self, other): # IO methods (to / from other formats) @classmethod - def from_dict(cls, data, orient='columns', dtype=None): + def from_dict(cls, data, orient='columns', dtype=None, columns=None): """ Construct DataFrame from dict of array-like or dicts @@ -890,12 +890,17 @@ def from_dict(cls, data, orient='columns', dtype=None): (default). Otherwise if the keys should be rows, pass 'index'. dtype : dtype, default None Data type to force, otherwise infer + columns: list, default None + Column labels to use when orient='index'. Raises a ValueError + if used with orient='columns' + + .. versionadded:: 0.23.0 Returns ------- DataFrame """ - index, columns = None, None + index = None orient = orient.lower() if orient == 'index': if len(data) > 0: @@ -904,7 +909,11 @@ def from_dict(cls, data, orient='columns', dtype=None): data = _from_nested_dict(data) else: data, index = list(data.values()), list(data.keys()) - elif orient != 'columns': # pragma: no cover + elif orient == 'columns': + if columns is not None: + raise ValueError("cannot use columns parameter with " + "orient='columns'") + else: # pragma: no cover raise ValueError('only recognize index or columns for orient') return cls(data, index=index, columns=columns, dtype=dtype) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 8abd88d8a379c..c5734e78da755 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1091,6 +1091,24 @@ def test_constructor_orient(self): xp = DataFrame.from_dict(a).T.reindex(list(a.keys())) tm.assert_frame_equal(rs, xp) + def test_from_dict_columns_parameter(self): + # GH 18529 + # Test new columns parameter for from_dict that was added to make + # from_items(..., orient='index', columns=[...]) easier to replicate + result = DataFrame.from_dict(dict([('A', [1, 2]), ('B', [4, 5])]), + orient='index', columns=['one', 'two']) + expected = DataFrame([[1, 2], [4, 5]], index=['A', 'B'], + columns=['one', 'two']) + tm.assert_frame_equal(result, expected) + + msg = "cannot use columns parameter with orient='columns'" + with tm.assert_raises_regex(ValueError, msg): + DataFrame.from_dict(dict([('A', [1, 2]), ('B', [4, 5])]), + orient='columns', columns=['one', 'two']) + with tm.assert_raises_regex(ValueError, msg): + DataFrame.from_dict(dict([('A', [1, 2]), ('B', [4, 5])]), + columns=['one', 'two']) + def test_constructor_Series_named(self): a = Series([1, 2, 3], index=['a', 'b', 'c'], name='x') df = DataFrame(a) From e7554624b2a1716b644714267bdd99d974512bb4 Mon Sep 17 00:00:00 2001 From: reidy-p Date: Wed, 21 Feb 2018 19:29:02 +0000 Subject: [PATCH 2/2] Move whatsnew note and use OrderedDict in test --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/tests/frame/test_constructors.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 03bf0426f9184..f3dca23d1039f 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -295,6 +295,7 @@ Other Enhancements - ``IntervalIndex.astype`` now supports conversions between subtypes when passed an ``IntervalDtype`` (:issue:`19197`) - :class:`IntervalIndex` and its associated constructor methods (``from_arrays``, ``from_breaks``, ``from_tuples``) have gained a ``dtype`` parameter (:issue:`19262`) - Added :func:`SeriesGroupBy.is_monotonic_increasing` and :func:`SeriesGroupBy.is_monotonic_decreasing` (:issue:`17015`) +- :func:`DataFrame.from_dict` now accepts a ``columns`` argument that can be used to specify the column names when ``orient='index'`` is used (:issue:`18529`) .. _whatsnew_0230.api_breaking: @@ -589,7 +590,6 @@ Other API Changes - :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`) - Set operations (union, difference...) on :class:`IntervalIndex` with incompatible index types will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`19329`) - :class:`DateOffset` objects render more simply, e.g. ```` instead of ```` (:issue:`19403`) -- :func:`DataFrame.from_dict` now accepts a ``columns`` argument that can be used to specify the column names when ``orient='index'`` is used (:issue:`18529`) .. _whatsnew_0230.deprecations: diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index c5734e78da755..394997201f320 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -1095,7 +1095,8 @@ def test_from_dict_columns_parameter(self): # GH 18529 # Test new columns parameter for from_dict that was added to make # from_items(..., orient='index', columns=[...]) easier to replicate - result = DataFrame.from_dict(dict([('A', [1, 2]), ('B', [4, 5])]), + result = DataFrame.from_dict(OrderedDict([('A', [1, 2]), + ('B', [4, 5])]), orient='index', columns=['one', 'two']) expected = DataFrame([[1, 2], [4, 5]], index=['A', 'B'], columns=['one', 'two'])