From d1b6c62d2549c97bb5d4a67ccea435fdb701e215 Mon Sep 17 00:00:00 2001 From: Maximilian Roos Date: Tue, 22 Mar 2016 23:11:27 -0400 Subject: [PATCH] dataframe & series take Mappings & xr.Datasets --- pandas/core/common.py | 3 ++- pandas/core/frame.py | 24 ++++++++++++------- pandas/core/panel.py | 9 ++++--- pandas/core/series.py | 14 ++++++----- pandas/tests/frame/test_constructors.py | 30 ++++++++++++++++++++---- pandas/tests/series/test_constructors.py | 30 ++++++++++++++++++++++++ pandas/tests/test_panel.py | 17 ++++++++++++-- pandas/util/testing.py | 24 ++++++++++++++++--- 8 files changed, 122 insertions(+), 29 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 341bd3b4cc845..50392e2f5b947 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -10,6 +10,8 @@ import numpy as np import pandas.lib as lib import pandas.tslib as tslib + +import pandas as pd from pandas import compat from pandas.compat import long, zip, iteritems from pandas.core.config import get_option @@ -159,7 +161,6 @@ def _get_info_slice(obj, indexer): def _maybe_box(indexer, values, obj, key): - # if we have multiples coming back, box em if isinstance(values, np.ndarray): return obj[indexer.get_loc(key)] diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 501f4e443b1fc..3f803fc7326ef 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -54,7 +54,8 @@ is_list_like, is_iterator, is_sequence, - is_named_tuple) + is_named_tuple, + is_dict_like) from pandas.types.missing import isnull, notnull from pandas.core.common import (PandasError, _try_sort, @@ -64,11 +65,11 @@ _dict_compat) from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import Index, MultiIndex, _ensure_index -from pandas.core.indexing import (maybe_droplevels, convert_to_index_sliceable, - check_bool_indexer) -from pandas.core.internals import (BlockManager, - create_block_manager_from_arrays, - create_block_manager_from_blocks) +from pandas.core.indexing import ( + maybe_droplevels, convert_to_index_sliceable, check_bool_indexer) +from pandas.core.internals import ( + BlockManager, create_block_manager_from_arrays, + create_block_manager_from_blocks) from pandas.core.series import Series from pandas.core.categorical import Categorical import pandas.computation.expressions as expressions @@ -259,11 +260,16 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, if isinstance(data, DataFrame): data = data._data + if hasattr(data, 'to_dataframe'): # xr.Dataset + if index or columns or dtype or copy: + raise ValueError("Supply only a Dataset if supplying a " + "Dataset") + data = data.to_dataframe()._data + if isinstance(data, BlockManager): mgr = self._init_mgr(data, axes=dict(index=index, columns=columns), dtype=dtype, copy=copy) - elif isinstance(data, dict): - mgr = self._init_dict(data, index, columns, dtype=dtype) + elif isinstance(data, ma.MaskedArray): import numpy.ma.mrecords as mrecords # masked recarray @@ -295,6 +301,8 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, else: mgr = self._init_ndarray(data, index, columns, dtype=dtype, copy=copy) + elif is_dict_like(data): + mgr = self._init_dict(data, index, columns, dtype=dtype) elif isinstance(data, (list, types.GeneratorType)): if isinstance(data, types.GeneratorType): data = list(data) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index b2f318d825db6..cbfa282c904e2 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -11,7 +11,7 @@ from pandas.types.cast import (_infer_dtype_from_scalar, _possibly_cast_item) from pandas.types.common import (is_integer, is_list_like, - is_string_like, is_scalar) + is_string_like, is_scalar, is_dict_like) from pandas.types.missing import notnull import pandas.computation.expressions as expressions @@ -164,7 +164,7 @@ def _init_data(self, data, copy, dtype, **kwargs): axes = [x if x is not None else y for x, y in zip(passed_axes, data.axes)] mgr = data - elif isinstance(data, dict): + elif is_dict_like(data): mgr = self._init_dict(data, passed_axes, dtype=dtype) copy = False dtype = None @@ -200,9 +200,8 @@ def _init_dict(self, data, axes, dtype=None): ks = _try_sort(ks) haxis = Index(ks) - for k, v in compat.iteritems(data): - if isinstance(v, dict): - data[k] = self._constructor_sliced(v) + data = {k: self._constructor_sliced(v) + for k, v in compat.iteritems(data) if is_dict_like(v)} # extract axis for remaining axes & create the slicemap raxes = [self._extract_axis(self, data, axis=i) if a is None else a diff --git a/pandas/core/series.py b/pandas/core/series.py index 7979a230eed84..d6c255fc88790 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -92,12 +92,13 @@ def wrapper(self): return wrapper + # ---------------------------------------------------------------------- # Series class class Series(base.IndexOpsMixin, strings.StringAccessorMixin, - generic.NDFrame,): + generic.NDFrame): """ One-dimensional ndarray with axis labels (including time series). @@ -174,7 +175,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None, else: data = data.reindex(index, copy=copy) data = data._data - elif isinstance(data, dict): + elif is_dict_like(data): if index is None: if isinstance(data, OrderedDict): index = Index(data) @@ -2127,10 +2128,9 @@ def map_f(values, f): else: map_f = lib.map_infer - if isinstance(arg, (dict, Series)): - if isinstance(arg, dict): - arg = self._constructor(arg, index=arg.keys()) - + if is_dict_like(arg): + arg = self._constructor(arg, index=arg.keys()) + if isinstance(arg, Series): indexer = arg.index.get_indexer(values) new_values = algos.take_1d(arg._values, indexer) else: @@ -2737,6 +2737,7 @@ def _dir_additions(self): Series._add_series_or_dataframe_operations() _INDEX_TYPES = ndarray, Index, list, tuple + # ----------------------------------------------------------------------------- # Supplementary functions @@ -2928,6 +2929,7 @@ def __init__(self, *args, **kwargs): super(TimeSeries, self).__init__(*args, **kwargs) + # ---------------------------------------------------------------------- # Add plotting methods to Series diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index d21db5ba52a45..5b9ca3a1bed45 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -123,7 +123,7 @@ def _make_mixed_dtypes_df(typ, ad=None): zipper = lzip(dtypes, arrays) for d, a in zipper: - assert(a.dtype == d) + assert (a.dtype == d) if ad is None: ad = dict() ad.update(dict([(d, a) for d, a in zipper])) @@ -134,7 +134,7 @@ def _check_mixed_dtypes(df, dtypes=None): dtypes = MIXED_FLOAT_DTYPES + MIXED_INT_DTYPES for d in dtypes: if d in df: - assert(df.dtypes[d] == d) + assert (df.dtypes[d] == d) # mixed floating and integer coexinst in the same frame df = _make_mixed_dtypes_df('float') @@ -516,6 +516,15 @@ def test_nested_dict_frame_constructor(self): result = DataFrame(data, index=rng).T tm.assert_frame_equal(result, df) + def test_constructor_mapping(self): + + mapping = tm.MappingMock(base=Series([0, 1, 2])) + + result = DataFrame(mapping) + expected = DataFrame({4: [0, 4, 8], 5: [0, 5, 10]}) + + tm.assert_frame_equal(result, expected) + def _check_basic_constructor(self, empty): # mat: 2d matrix with shpae (3, 2) to input. empty - makes sized # objects @@ -826,7 +835,6 @@ def test_constructor_sequence_like(self): import collections class DummyContainer(collections.Sequence): - def __init__(self, lst): self._lst = lst @@ -988,6 +996,7 @@ def test_constructor_list_of_series(self): def test_constructor_list_of_derived_dicts(self): class CustomDict(dict): pass + d = {'a': 1.5, 'b': 3} data_custom = [CustomDict(d)] @@ -1473,6 +1482,7 @@ def check(df): def f(): df.loc[:, np.nan] + self.assertRaises(TypeError, f) df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[1, np.nan]) @@ -1624,6 +1634,7 @@ def test_from_records_set_index_name(self): def create_dict(order_id): return {'order_id': order_id, 'quantity': np.random.randint(1, 10), 'price': np.random.randint(1, 10)} + documents = [create_dict(i) for i in range(10)] # demo missing data documents.append({'order_id': 10, 'quantity': 5}) @@ -1849,7 +1860,6 @@ def test_from_records_bad_index_column(self): def test_from_records_non_tuple(self): class Record(object): - def __init__(self, *args): self.args = args @@ -1875,6 +1885,18 @@ def test_from_records_len0_with_columns(self): self.assertEqual(len(result), 0) self.assertEqual(result.index.name, 'foo') + def test_constructor_xarray_dataset(self): + tm._skip_if_no_xarray() + + index = pd.Index(['x', 'y'], name='z') + expected = DataFrame( + dict(a=[4, 5], b=[8, 10]), + index=index) + + result = DataFrame(expected.to_xarray()) + + tm.assert_frame_equal(result, expected) + class TestDataFrameConstructorWithDatetimeTZ(tm.TestCase, TestData): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index ed7b0fda19cb7..af9f5780b6743 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -596,6 +596,15 @@ def test_constructor_subclass_dict(self): refseries = Series(dict(compat.iteritems(data))) assert_series_equal(refseries, series) + def test_constructor_mapping(self): + + mapping = tm.MappingMock(base=2) + + result = Series(mapping) + expected = pd.Series([8, 10], index=[4, 5]) + + assert_series_equal(result, expected) + def test_constructor_dict_datetime64_index(self): # GH 9456 @@ -769,6 +778,27 @@ def f(): s = Series([pd.NaT, np.nan, '1 Day']) self.assertEqual(s.dtype, 'timedelta64[ns]') + def test_constructor_dict_numpy_0d_arrays(self): + + data = [np.asarray(i) for i in range(4)] + + result = Series(data) + expected = Series(range(4)) + + # disabled for the moment (will remove from PR) + # assert_series_equal(result, expected) + + def test_constructor_xarray_dataset(self): + tm._skip_if_no_xarray() + import xarray as xr + + d = {'a': 5, 'b': 10} + result = Series(xr.Dataset(d)) + expected = Series(d) + + # disabled for the moment (will remove from PR) + # assert_series_equal(result, expected) + def test_constructor_name_hashable(self): for n in [777, 777., 'name', datetime(2001, 11, 11), (1, ), u"\u05D0"]: for data in [[1, 2, 3], np.ones(3), {'a': 0, 'b': 1}]: diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 10a6693525590..71aae7e814932 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1081,9 +1081,21 @@ def test_constructor_dict_mixed(self): data['ItemB'] = self.panel['ItemB'].values[:, :-1] self.assertRaises(Exception, Panel, data) + def test_constructor_mapping(self): + + mapping = tm.MappingMock(base=DataFrame({1: [0, 1], 2: [0, 1]})) + + result = Panel(mapping) + expected = Panel({ + 4: DataFrame({1: [0, 4], 2: [0, 4]}), + 5: DataFrame({1: [0, 5], 2: [0, 5]}) + }) + + assert_panel_equal(result, expected) + def test_ctor_orderedDict(self): - keys = list(set(np.random.randint(0, 5000, 100)))[ - :50] # unique random int keys + # unique random int keys + keys = list(set(np.random.randint(0, 5000, 100)))[:50] d = OrderedDict([(k, mkdf(10, 5)) for k in keys]) p = Panel(d) self.assertTrue(list(p.items) == keys) @@ -2147,6 +2159,7 @@ def check_drop(drop_val, axis_number, aliases, expected): pprint_thing("Failed with axis_number %d and aliases: %s" % (axis_number, aliases)) raise + # Items expected = Panel({"One": df}) check_drop('Two', 0, ['items'], expected) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 2d1d88b69941b..44728f8fbde01 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -17,6 +17,7 @@ from functools import wraps, partial from contextlib import contextmanager from distutils.version import LooseVersion +from collections import Mapping from numpy.random import randn, rand from numpy.testing.decorators import slow # noqa @@ -1960,9 +1961,7 @@ def add_nans_panel4d(panel4d): class TestSubDict(dict): - - def __init__(self, *args, **kwargs): - dict.__init__(self, *args, **kwargs) + pass # Dependency checks. Copied this from Nipy/Nipype (Copyright of @@ -2726,6 +2725,25 @@ def patch(ob, attr, value): setattr(ob, attr, old) +class MappingMock(Mapping): + """ + Mock class to represent a Mapping + Takes a base, and returns that multiplied by whatever key is passed in + """ + + def __init__(self, base): + self.base = base + + def __getitem__(self, key): + return key * self.base + + def __iter__(self): + return iter([4, 5]) + + def __len__(self): + return 2 + + @contextmanager def set_timezone(tz): """Context manager for temporarily setting a timezone.