Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit d1b6c62

Browse files
committedAug 25, 2016
dataframe & series take Mappings & xr.Datasets
1 parent e23e6f1 commit d1b6c62

File tree

8 files changed

+122
-29
lines changed

8 files changed

+122
-29
lines changed
 

‎pandas/core/common.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
import numpy as np
1111
import pandas.lib as lib
1212
import pandas.tslib as tslib
13+
14+
import pandas as pd
1315
from pandas import compat
1416
from pandas.compat import long, zip, iteritems
1517
from pandas.core.config import get_option
@@ -159,7 +161,6 @@ def _get_info_slice(obj, indexer):
159161

160162

161163
def _maybe_box(indexer, values, obj, key):
162-
163164
# if we have multiples coming back, box em
164165
if isinstance(values, np.ndarray):
165166
return obj[indexer.get_loc(key)]

‎pandas/core/frame.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,8 @@
5454
is_list_like,
5555
is_iterator,
5656
is_sequence,
57-
is_named_tuple)
57+
is_named_tuple,
58+
is_dict_like)
5859
from pandas.types.missing import isnull, notnull
5960

6061
from pandas.core.common import (PandasError, _try_sort,
@@ -64,11 +65,11 @@
6465
_dict_compat)
6566
from pandas.core.generic import NDFrame, _shared_docs
6667
from pandas.core.index import Index, MultiIndex, _ensure_index
67-
from pandas.core.indexing import (maybe_droplevels, convert_to_index_sliceable,
68-
check_bool_indexer)
69-
from pandas.core.internals import (BlockManager,
70-
create_block_manager_from_arrays,
71-
create_block_manager_from_blocks)
68+
from pandas.core.indexing import (
69+
maybe_droplevels, convert_to_index_sliceable, check_bool_indexer)
70+
from pandas.core.internals import (
71+
BlockManager, create_block_manager_from_arrays,
72+
create_block_manager_from_blocks)
7273
from pandas.core.series import Series
7374
from pandas.core.categorical import Categorical
7475
import pandas.computation.expressions as expressions
@@ -259,11 +260,16 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
259260
if isinstance(data, DataFrame):
260261
data = data._data
261262

263+
if hasattr(data, 'to_dataframe'): # xr.Dataset
264+
if index or columns or dtype or copy:
265+
raise ValueError("Supply only a Dataset if supplying a "
266+
"Dataset")
267+
data = data.to_dataframe()._data
268+
262269
if isinstance(data, BlockManager):
263270
mgr = self._init_mgr(data, axes=dict(index=index, columns=columns),
264271
dtype=dtype, copy=copy)
265-
elif isinstance(data, dict):
266-
mgr = self._init_dict(data, index, columns, dtype=dtype)
272+
267273
elif isinstance(data, ma.MaskedArray):
268274
import numpy.ma.mrecords as mrecords
269275
# masked recarray
@@ -295,6 +301,8 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
295301
else:
296302
mgr = self._init_ndarray(data, index, columns, dtype=dtype,
297303
copy=copy)
304+
elif is_dict_like(data):
305+
mgr = self._init_dict(data, index, columns, dtype=dtype)
298306
elif isinstance(data, (list, types.GeneratorType)):
299307
if isinstance(data, types.GeneratorType):
300308
data = list(data)

‎pandas/core/panel.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
from pandas.types.cast import (_infer_dtype_from_scalar,
1212
_possibly_cast_item)
1313
from pandas.types.common import (is_integer, is_list_like,
14-
is_string_like, is_scalar)
14+
is_string_like, is_scalar, is_dict_like)
1515
from pandas.types.missing import notnull
1616

1717
import pandas.computation.expressions as expressions
@@ -164,7 +164,7 @@ def _init_data(self, data, copy, dtype, **kwargs):
164164
axes = [x if x is not None else y
165165
for x, y in zip(passed_axes, data.axes)]
166166
mgr = data
167-
elif isinstance(data, dict):
167+
elif is_dict_like(data):
168168
mgr = self._init_dict(data, passed_axes, dtype=dtype)
169169
copy = False
170170
dtype = None
@@ -200,9 +200,8 @@ def _init_dict(self, data, axes, dtype=None):
200200
ks = _try_sort(ks)
201201
haxis = Index(ks)
202202

203-
for k, v in compat.iteritems(data):
204-
if isinstance(v, dict):
205-
data[k] = self._constructor_sliced(v)
203+
data = {k: self._constructor_sliced(v)
204+
for k, v in compat.iteritems(data) if is_dict_like(v)}
206205

207206
# extract axis for remaining axes & create the slicemap
208207
raxes = [self._extract_axis(self, data, axis=i) if a is None else a

‎pandas/core/series.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -92,12 +92,13 @@ def wrapper(self):
9292

9393
return wrapper
9494

95+
9596
# ----------------------------------------------------------------------
9697
# Series class
9798

9899

99100
class Series(base.IndexOpsMixin, strings.StringAccessorMixin,
100-
generic.NDFrame,):
101+
generic.NDFrame):
101102
"""
102103
One-dimensional ndarray with axis labels (including time series).
103104
@@ -174,7 +175,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
174175
else:
175176
data = data.reindex(index, copy=copy)
176177
data = data._data
177-
elif isinstance(data, dict):
178+
elif is_dict_like(data):
178179
if index is None:
179180
if isinstance(data, OrderedDict):
180181
index = Index(data)
@@ -2127,10 +2128,9 @@ def map_f(values, f):
21272128
else:
21282129
map_f = lib.map_infer
21292130

2130-
if isinstance(arg, (dict, Series)):
2131-
if isinstance(arg, dict):
2132-
arg = self._constructor(arg, index=arg.keys())
2133-
2131+
if is_dict_like(arg):
2132+
arg = self._constructor(arg, index=arg.keys())
2133+
if isinstance(arg, Series):
21342134
indexer = arg.index.get_indexer(values)
21352135
new_values = algos.take_1d(arg._values, indexer)
21362136
else:
@@ -2737,6 +2737,7 @@ def _dir_additions(self):
27372737
Series._add_series_or_dataframe_operations()
27382738
_INDEX_TYPES = ndarray, Index, list, tuple
27392739

2740+
27402741
# -----------------------------------------------------------------------------
27412742
# Supplementary functions
27422743

@@ -2928,6 +2929,7 @@ def __init__(self, *args, **kwargs):
29282929

29292930
super(TimeSeries, self).__init__(*args, **kwargs)
29302931

2932+
29312933
# ----------------------------------------------------------------------
29322934
# Add plotting methods to Series
29332935

‎pandas/tests/frame/test_constructors.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def _make_mixed_dtypes_df(typ, ad=None):
123123

124124
zipper = lzip(dtypes, arrays)
125125
for d, a in zipper:
126-
assert(a.dtype == d)
126+
assert (a.dtype == d)
127127
if ad is None:
128128
ad = dict()
129129
ad.update(dict([(d, a) for d, a in zipper]))
@@ -134,7 +134,7 @@ def _check_mixed_dtypes(df, dtypes=None):
134134
dtypes = MIXED_FLOAT_DTYPES + MIXED_INT_DTYPES
135135
for d in dtypes:
136136
if d in df:
137-
assert(df.dtypes[d] == d)
137+
assert (df.dtypes[d] == d)
138138

139139
# mixed floating and integer coexinst in the same frame
140140
df = _make_mixed_dtypes_df('float')
@@ -516,6 +516,15 @@ def test_nested_dict_frame_constructor(self):
516516
result = DataFrame(data, index=rng).T
517517
tm.assert_frame_equal(result, df)
518518

519+
def test_constructor_mapping(self):
520+
521+
mapping = tm.MappingMock(base=Series([0, 1, 2]))
522+
523+
result = DataFrame(mapping)
524+
expected = DataFrame({4: [0, 4, 8], 5: [0, 5, 10]})
525+
526+
tm.assert_frame_equal(result, expected)
527+
519528
def _check_basic_constructor(self, empty):
520529
# mat: 2d matrix with shpae (3, 2) to input. empty - makes sized
521530
# objects
@@ -826,7 +835,6 @@ def test_constructor_sequence_like(self):
826835
import collections
827836

828837
class DummyContainer(collections.Sequence):
829-
830838
def __init__(self, lst):
831839
self._lst = lst
832840

@@ -988,6 +996,7 @@ def test_constructor_list_of_series(self):
988996
def test_constructor_list_of_derived_dicts(self):
989997
class CustomDict(dict):
990998
pass
999+
9911000
d = {'a': 1.5, 'b': 3}
9921001

9931002
data_custom = [CustomDict(d)]
@@ -1473,6 +1482,7 @@ def check(df):
14731482

14741483
def f():
14751484
df.loc[:, np.nan]
1485+
14761486
self.assertRaises(TypeError, f)
14771487

14781488
df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[1, np.nan])
@@ -1624,6 +1634,7 @@ def test_from_records_set_index_name(self):
16241634
def create_dict(order_id):
16251635
return {'order_id': order_id, 'quantity': np.random.randint(1, 10),
16261636
'price': np.random.randint(1, 10)}
1637+
16271638
documents = [create_dict(i) for i in range(10)]
16281639
# demo missing data
16291640
documents.append({'order_id': 10, 'quantity': 5})
@@ -1849,7 +1860,6 @@ def test_from_records_bad_index_column(self):
18491860

18501861
def test_from_records_non_tuple(self):
18511862
class Record(object):
1852-
18531863
def __init__(self, *args):
18541864
self.args = args
18551865

@@ -1875,6 +1885,18 @@ def test_from_records_len0_with_columns(self):
18751885
self.assertEqual(len(result), 0)
18761886
self.assertEqual(result.index.name, 'foo')
18771887

1888+
def test_constructor_xarray_dataset(self):
1889+
tm._skip_if_no_xarray()
1890+
1891+
index = pd.Index(['x', 'y'], name='z')
1892+
expected = DataFrame(
1893+
dict(a=[4, 5], b=[8, 10]),
1894+
index=index)
1895+
1896+
result = DataFrame(expected.to_xarray())
1897+
1898+
tm.assert_frame_equal(result, expected)
1899+
18781900

18791901
class TestDataFrameConstructorWithDatetimeTZ(tm.TestCase, TestData):
18801902

‎pandas/tests/series/test_constructors.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -596,6 +596,15 @@ def test_constructor_subclass_dict(self):
596596
refseries = Series(dict(compat.iteritems(data)))
597597
assert_series_equal(refseries, series)
598598

599+
def test_constructor_mapping(self):
600+
601+
mapping = tm.MappingMock(base=2)
602+
603+
result = Series(mapping)
604+
expected = pd.Series([8, 10], index=[4, 5])
605+
606+
assert_series_equal(result, expected)
607+
599608
def test_constructor_dict_datetime64_index(self):
600609
# GH 9456
601610

@@ -769,6 +778,27 @@ def f():
769778
s = Series([pd.NaT, np.nan, '1 Day'])
770779
self.assertEqual(s.dtype, 'timedelta64[ns]')
771780

781+
def test_constructor_dict_numpy_0d_arrays(self):
782+
783+
data = [np.asarray(i) for i in range(4)]
784+
785+
result = Series(data)
786+
expected = Series(range(4))
787+
788+
# disabled for the moment (will remove from PR)
789+
# assert_series_equal(result, expected)
790+
791+
def test_constructor_xarray_dataset(self):
792+
tm._skip_if_no_xarray()
793+
import xarray as xr
794+
795+
d = {'a': 5, 'b': 10}
796+
result = Series(xr.Dataset(d))
797+
expected = Series(d)
798+
799+
# disabled for the moment (will remove from PR)
800+
# assert_series_equal(result, expected)
801+
772802
def test_constructor_name_hashable(self):
773803
for n in [777, 777., 'name', datetime(2001, 11, 11), (1, ), u"\u05D0"]:
774804
for data in [[1, 2, 3], np.ones(3), {'a': 0, 'b': 1}]:

‎pandas/tests/test_panel.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1081,9 +1081,21 @@ def test_constructor_dict_mixed(self):
10811081
data['ItemB'] = self.panel['ItemB'].values[:, :-1]
10821082
self.assertRaises(Exception, Panel, data)
10831083

1084+
def test_constructor_mapping(self):
1085+
1086+
mapping = tm.MappingMock(base=DataFrame({1: [0, 1], 2: [0, 1]}))
1087+
1088+
result = Panel(mapping)
1089+
expected = Panel({
1090+
4: DataFrame({1: [0, 4], 2: [0, 4]}),
1091+
5: DataFrame({1: [0, 5], 2: [0, 5]})
1092+
})
1093+
1094+
assert_panel_equal(result, expected)
1095+
10841096
def test_ctor_orderedDict(self):
1085-
keys = list(set(np.random.randint(0, 5000, 100)))[
1086-
:50] # unique random int keys
1097+
# unique random int keys
1098+
keys = list(set(np.random.randint(0, 5000, 100)))[:50]
10871099
d = OrderedDict([(k, mkdf(10, 5)) for k in keys])
10881100
p = Panel(d)
10891101
self.assertTrue(list(p.items) == keys)
@@ -2147,6 +2159,7 @@ def check_drop(drop_val, axis_number, aliases, expected):
21472159
pprint_thing("Failed with axis_number %d and aliases: %s" %
21482160
(axis_number, aliases))
21492161
raise
2162+
21502163
# Items
21512164
expected = Panel({"One": df})
21522165
check_drop('Two', 0, ['items'], expected)

‎pandas/util/testing.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from functools import wraps, partial
1818
from contextlib import contextmanager
1919
from distutils.version import LooseVersion
20+
from collections import Mapping
2021

2122
from numpy.random import randn, rand
2223
from numpy.testing.decorators import slow # noqa
@@ -1960,9 +1961,7 @@ def add_nans_panel4d(panel4d):
19601961

19611962

19621963
class TestSubDict(dict):
1963-
1964-
def __init__(self, *args, **kwargs):
1965-
dict.__init__(self, *args, **kwargs)
1964+
pass
19661965

19671966

19681967
# Dependency checks. Copied this from Nipy/Nipype (Copyright of
@@ -2726,6 +2725,25 @@ def patch(ob, attr, value):
27262725
setattr(ob, attr, old)
27272726

27282727

2728+
class MappingMock(Mapping):
2729+
"""
2730+
Mock class to represent a Mapping
2731+
Takes a base, and returns that multiplied by whatever key is passed in
2732+
"""
2733+
2734+
def __init__(self, base):
2735+
self.base = base
2736+
2737+
def __getitem__(self, key):
2738+
return key * self.base
2739+
2740+
def __iter__(self):
2741+
return iter([4, 5])
2742+
2743+
def __len__(self):
2744+
return 2
2745+
2746+
27292747
@contextmanager
27302748
def set_timezone(tz):
27312749
"""Context manager for temporarily setting a timezone.

0 commit comments

Comments
 (0)
Please sign in to comment.