pandas-dev · max-sixty · Mar 23, 2016 · shoyer · Aug 22, 2016 · max-sixty
diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -10,6 +10,8 @@
 import numpy as np
 import pandas.lib as lib
 import pandas.tslib as tslib
+
+import pandas as pd
 from pandas import compat
 from pandas.compat import long, zip, iteritems
 from pandas.core.config import get_option
@@ -159,7 +161,6 @@ def _get_info_slice(obj, indexer):
 
 
 def _maybe_box(indexer, values, obj, key):
-
     # if we have multiples coming back, box em
     if isinstance(values, np.ndarray):
         return obj[indexer.get_loc(key)]

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -54,7 +54,8 @@
                                  is_list_like,
                                  is_iterator,
                                  is_sequence,
-                                 is_named_tuple)
+                                 is_named_tuple,
+                                 is_dict_like)
 from pandas.types.missing import isnull, notnull
 
 from pandas.core.common import (PandasError, _try_sort,
@@ -64,11 +65,11 @@
                                 _dict_compat)
 from pandas.core.generic import NDFrame, _shared_docs
 from pandas.core.index import Index, MultiIndex, _ensure_index
-from pandas.core.indexing import (maybe_droplevels, convert_to_index_sliceable,
-                                  check_bool_indexer)
-from pandas.core.internals import (BlockManager,
-                                   create_block_manager_from_arrays,
-                                   create_block_manager_from_blocks)
+from pandas.core.indexing import (
+    maybe_droplevels, convert_to_index_sliceable, check_bool_indexer)
+from pandas.core.internals import (
+    BlockManager, create_block_manager_from_arrays,
+    create_block_manager_from_blocks)
 from pandas.core.series import Series
 from pandas.core.categorical import Categorical
 import pandas.computation.expressions as expressions
@@ -259,11 +260,16 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
         if isinstance(data, DataFrame):
             data = data._data
 
+        if hasattr(data, 'to_dataframe'):  # xr.Dataset
+            if index or columns or dtype or copy:
+                raise ValueError("Supply only a Dataset if supplying a "
+                                 "Dataset")
+            data = data.to_dataframe()._data
+
         if isinstance(data, BlockManager):
             mgr = self._init_mgr(data, axes=dict(index=index, columns=columns),
                                  dtype=dtype, copy=copy)
-        elif isinstance(data, dict):
-            mgr = self._init_dict(data, index, columns, dtype=dtype)
+
         elif isinstance(data, ma.MaskedArray):
             import numpy.ma.mrecords as mrecords
             # masked recarray
@@ -295,6 +301,8 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
             else:
                 mgr = self._init_ndarray(data, index, columns, dtype=dtype,
                                          copy=copy)
+        elif is_dict_like(data):
+                mgr = self._init_dict(data, index, columns, dtype=dtype)
         elif isinstance(data, (list, types.GeneratorType)):
             if isinstance(data, types.GeneratorType):
                 data = list(data)

diff --git a/pandas/core/panel.py b/pandas/core/panel.py
@@ -11,7 +11,7 @@
 from pandas.types.cast import (_infer_dtype_from_scalar,
                                _possibly_cast_item)
 from pandas.types.common import (is_integer, is_list_like,
-                                 is_string_like, is_scalar)
+                                 is_string_like, is_scalar, is_dict_like)
 from pandas.types.missing import notnull
 
 import pandas.computation.expressions as expressions
@@ -164,7 +164,7 @@ def _init_data(self, data, copy, dtype, **kwargs):
                 axes = [x if x is not None else y
                         for x, y in zip(passed_axes, data.axes)]
             mgr = data
-        elif isinstance(data, dict):
+        elif is_dict_like(data):
             mgr = self._init_dict(data, passed_axes, dtype=dtype)
             copy = False
             dtype = None
@@ -200,9 +200,8 @@ def _init_dict(self, data, axes, dtype=None):
                 ks = _try_sort(ks)
             haxis = Index(ks)
 
-        for k, v in compat.iteritems(data):
-            if isinstance(v, dict):
-                data[k] = self._constructor_sliced(v)
+        data = {k: self._constructor_sliced(v)
+                for k, v in compat.iteritems(data) if is_dict_like(v)}
 
         # extract axis for remaining axes & create the slicemap
         raxes = [self._extract_axis(self, data, axis=i) if a is None else a

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -92,12 +92,13 @@ def wrapper(self):
 
     return wrapper
 
+
 # ----------------------------------------------------------------------
 # Series class
 
 
 class Series(base.IndexOpsMixin, strings.StringAccessorMixin,
-             generic.NDFrame,):
+             generic.NDFrame):
     """
     One-dimensional ndarray with axis labels (including time series).
 
@@ -174,7 +175,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
                 else:
                     data = data.reindex(index, copy=copy)
                 data = data._data
-            elif isinstance(data, dict):
+            elif is_dict_like(data):
                 if index is None:
                     if isinstance(data, OrderedDict):
                         index = Index(data)
@@ -2127,10 +2128,9 @@ def map_f(values, f):
             else:
                 map_f = lib.map_infer
 
-        if isinstance(arg, (dict, Series)):
-            if isinstance(arg, dict):
-                arg = self._constructor(arg, index=arg.keys())
-
+        if is_dict_like(arg):
+            arg = self._constructor(arg, index=arg.keys())
+        if isinstance(arg, Series):
             indexer = arg.index.get_indexer(values)
             new_values = algos.take_1d(arg._values, indexer)
         else:
@@ -2737,6 +2737,7 @@ def _dir_additions(self):
 Series._add_series_or_dataframe_operations()
 _INDEX_TYPES = ndarray, Index, list, tuple
 
+
 # -----------------------------------------------------------------------------
 # Supplementary functions
 
@@ -2928,6 +2929,7 @@ def __init__(self, *args, **kwargs):
 
         super(TimeSeries, self).__init__(*args, **kwargs)
 
+
 # ----------------------------------------------------------------------
 # Add plotting methods to Series
 

diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -123,7 +123,7 @@ def _make_mixed_dtypes_df(typ, ad=None):
 
             zipper = lzip(dtypes, arrays)
             for d, a in zipper:
-                assert(a.dtype == d)
+                assert (a.dtype == d)
             if ad is None:
                 ad = dict()
             ad.update(dict([(d, a) for d, a in zipper]))
@@ -134,7 +134,7 @@ def _check_mixed_dtypes(df, dtypes=None):
                 dtypes = MIXED_FLOAT_DTYPES + MIXED_INT_DTYPES
             for d in dtypes:
                 if d in df:
-                    assert(df.dtypes[d] == d)
+                    assert (df.dtypes[d] == d)
 
         # mixed floating and integer coexinst in the same frame
         df = _make_mixed_dtypes_df('float')
@@ -516,6 +516,15 @@ def test_nested_dict_frame_constructor(self):
         result = DataFrame(data, index=rng).T
         tm.assert_frame_equal(result, df)
 
+    def test_constructor_mapping(self):
+
+        mapping = tm.MappingMock(base=Series([0, 1, 2]))
+
+        result = DataFrame(mapping)
+        expected = DataFrame({4: [0, 4, 8], 5: [0, 5, 10]})
+
+        tm.assert_frame_equal(result, expected)
+
     def _check_basic_constructor(self, empty):
         # mat: 2d matrix with shpae (3, 2) to input. empty - makes sized
         # objects
@@ -826,7 +835,6 @@ def test_constructor_sequence_like(self):
         import collections
 
         class DummyContainer(collections.Sequence):
-
             def __init__(self, lst):
                 self._lst = lst
 
@@ -988,6 +996,7 @@ def test_constructor_list_of_series(self):
     def test_constructor_list_of_derived_dicts(self):
         class CustomDict(dict):
             pass
+
         d = {'a': 1.5, 'b': 3}
 
         data_custom = [CustomDict(d)]
@@ -1473,6 +1482,7 @@ def check(df):
 
                 def f():
                     df.loc[:, np.nan]
+
                 self.assertRaises(TypeError, f)
 
         df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[1, np.nan])
@@ -1624,6 +1634,7 @@ def test_from_records_set_index_name(self):
         def create_dict(order_id):
             return {'order_id': order_id, 'quantity': np.random.randint(1, 10),
                     'price': np.random.randint(1, 10)}
+
         documents = [create_dict(i) for i in range(10)]
         # demo missing data
         documents.append({'order_id': 10, 'quantity': 5})
@@ -1849,7 +1860,6 @@ def test_from_records_bad_index_column(self):
 
     def test_from_records_non_tuple(self):
         class Record(object):
-
             def __init__(self, *args):
                 self.args = args
 
@@ -1875,6 +1885,18 @@ def test_from_records_len0_with_columns(self):
         self.assertEqual(len(result), 0)
         self.assertEqual(result.index.name, 'foo')
 
+    def test_constructor_xarray_dataset(self):
+        tm._skip_if_no_xarray()
+
+        index = pd.Index(['x', 'y'], name='z')
+        expected = DataFrame(
+            dict(a=[4, 5], b=[8, 10]),
+            index=index)
+
+        result = DataFrame(expected.to_xarray())
+
+        tm.assert_frame_equal(result, expected)
+
 
 class TestDataFrameConstructorWithDatetimeTZ(tm.TestCase, TestData):
 

diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
@@ -596,6 +596,15 @@ def test_constructor_subclass_dict(self):
         refseries = Series(dict(compat.iteritems(data)))
         assert_series_equal(refseries, series)
 
+    def test_constructor_mapping(self):
+
+        mapping = tm.MappingMock(base=2)
+
+        result = Series(mapping)
+        expected = pd.Series([8, 10], index=[4, 5])
+
+        assert_series_equal(result, expected)
+
     def test_constructor_dict_datetime64_index(self):
         # GH 9456
 
@@ -769,6 +778,27 @@ def f():
         s = Series([pd.NaT, np.nan, '1 Day'])
         self.assertEqual(s.dtype, 'timedelta64[ns]')
 
+    def test_constructor_dict_numpy_0d_arrays(self):
+
+        data = [np.asarray(i) for i in range(4)]
+
+        result = Series(data)
+        expected = Series(range(4))
+
+        # disabled for the moment (will remove from PR)
+        # assert_series_equal(result, expected)
+
+    def test_constructor_xarray_dataset(self):
+        tm._skip_if_no_xarray()
+        import xarray as xr
+
+        d = {'a': 5, 'b': 10}
+        result = Series(xr.Dataset(d))
+        expected = Series(d)
+
+        # disabled for the moment (will remove from PR)
+        # assert_series_equal(result, expected)
+
     def test_constructor_name_hashable(self):
         for n in [777, 777., 'name', datetime(2001, 11, 11), (1, ), u"\u05D0"]:
             for data in [[1, 2, 3], np.ones(3), {'a': 0, 'b': 1}]:

diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py
@@ -1081,9 +1081,21 @@ def test_constructor_dict_mixed(self):
         data['ItemB'] = self.panel['ItemB'].values[:, :-1]
         self.assertRaises(Exception, Panel, data)
 
+    def test_constructor_mapping(self):
+
+        mapping = tm.MappingMock(base=DataFrame({1: [0, 1], 2: [0, 1]}))
+
+        result = Panel(mapping)
+        expected = Panel({
+            4: DataFrame({1: [0, 4], 2: [0, 4]}),
+            5: DataFrame({1: [0, 5], 2: [0, 5]})
+        })
+
+        assert_panel_equal(result, expected)
+
     def test_ctor_orderedDict(self):
-        keys = list(set(np.random.randint(0, 5000, 100)))[
-            :50]  # unique random int  keys
+        # unique random int  keys
+        keys = list(set(np.random.randint(0, 5000, 100)))[:50]
         d = OrderedDict([(k, mkdf(10, 5)) for k in keys])
         p = Panel(d)
         self.assertTrue(list(p.items) == keys)
@@ -2147,6 +2159,7 @@ def check_drop(drop_val, axis_number, aliases, expected):
                 pprint_thing("Failed with axis_number %d and aliases: %s" %
                              (axis_number, aliases))
                 raise
+
         # Items
         expected = Panel({"One": df})
         check_drop('Two', 0, ['items'], expected)

diff --git a/pandas/util/testing.py b/pandas/util/testing.py
@@ -17,6 +17,7 @@
 from functools import wraps, partial
 from contextlib import contextmanager
 from distutils.version import LooseVersion
+from collections import Mapping
 
 from numpy.random import randn, rand
 from numpy.testing.decorators import slow     # noqa
@@ -1960,9 +1961,7 @@ def add_nans_panel4d(panel4d):
 
 
 class TestSubDict(dict):
-
-    def __init__(self, *args, **kwargs):
-        dict.__init__(self, *args, **kwargs)
+    pass
 
 
 # Dependency checks.  Copied this from Nipy/Nipype (Copyright of
@@ -2726,6 +2725,25 @@ def patch(ob, attr, value):
             setattr(ob, attr, old)
 
 
+class MappingMock(Mapping):
+    """
+    Mock class to represent a Mapping
+    Takes a base, and returns that multiplied by whatever key is passed in
+    """
+
+    def __init__(self, base):
+        self.base = base
+
+    def __getitem__(self, key):
+        return key * self.base
+
+    def __iter__(self):
+        return iter([4, 5])
+
+    def __len__(self):
+        return 2
+
+
 @contextmanager
 def set_timezone(tz):
     """Context manager for temporarily setting a timezone.