Skip to content

COMPAT: Objects construction compat with xarray.Dataset #12400

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
import numpy as np
import pandas.lib as lib
import pandas.tslib as tslib

import pandas as pd
from pandas import compat
from pandas.compat import long, zip, iteritems
from pandas.core.config import get_option
Expand Down Expand Up @@ -159,7 +161,6 @@ def _get_info_slice(obj, indexer):


def _maybe_box(indexer, values, obj, key):

# if we have multiples coming back, box em
if isinstance(values, np.ndarray):
return obj[indexer.get_loc(key)]
Expand Down
24 changes: 16 additions & 8 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@
is_list_like,
is_iterator,
is_sequence,
is_named_tuple)
is_named_tuple,
is_dict_like)
from pandas.types.missing import isnull, notnull

from pandas.core.common import (PandasError, _try_sort,
Expand All @@ -64,11 +65,11 @@
_dict_compat)
from pandas.core.generic import NDFrame, _shared_docs
from pandas.core.index import Index, MultiIndex, _ensure_index
from pandas.core.indexing import (maybe_droplevels, convert_to_index_sliceable,
check_bool_indexer)
from pandas.core.internals import (BlockManager,
create_block_manager_from_arrays,
create_block_manager_from_blocks)
from pandas.core.indexing import (
maybe_droplevels, convert_to_index_sliceable, check_bool_indexer)
from pandas.core.internals import (
BlockManager, create_block_manager_from_arrays,
create_block_manager_from_blocks)
from pandas.core.series import Series
from pandas.core.categorical import Categorical
import pandas.computation.expressions as expressions
Expand Down Expand Up @@ -259,11 +260,16 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
if isinstance(data, DataFrame):
data = data._data

if hasattr(data, 'to_dataframe'): # xr.Dataset
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would slightly rather skip this special check. In theory, to_dataframe might return something else.

I don't think it will be that much slower to use the generic dict path, though we then run into the issue that DataFrame._init_dict does a special check for OrderedDict when deciding whether or not to order the keys. Sadly there's no way in check whether an arbitrary Mapping type has intentionally ordered keys or not (Python-ideas discussed adding collections.abc.Ordered but I don't think it was implemented).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The broader question is whether we want objects to be able to define their own conversion to a DataFrame - I think the main options are:

  1. Everything is treated as a dict
  2. We build the code within the DataFrame constructor - i.e. that if statement checks for DataSet (and anything else we want to check)
  3. There is some duck-like method such as to_dataframe that classes can define themselves

Thoughts?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm happy with a duck-like method, but we should probably call it something very explicit like _to_pandas_dataframe_. This would be useful for a lot of other projects, not just xarray.

if index or columns or dtype or copy:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could be an elsif

raise ValueError("Supply only a Dataset if supplying a "
"Dataset")
data = data.to_dataframe()._data

if isinstance(data, BlockManager):
mgr = self._init_mgr(data, axes=dict(index=index, columns=columns),
dtype=dtype, copy=copy)
elif isinstance(data, dict):
mgr = self._init_dict(data, index, columns, dtype=dtype)

elif isinstance(data, ma.MaskedArray):
import numpy.ma.mrecords as mrecords
# masked recarray
Expand Down Expand Up @@ -295,6 +301,8 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
else:
mgr = self._init_ndarray(data, index, columns, dtype=dtype,
copy=copy)
elif is_dict_like(data):
mgr = self._init_dict(data, index, columns, dtype=dtype)
elif isinstance(data, (list, types.GeneratorType)):
if isinstance(data, types.GeneratorType):
data = list(data)
Expand Down
9 changes: 4 additions & 5 deletions pandas/core/panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pandas.types.cast import (_infer_dtype_from_scalar,
_possibly_cast_item)
from pandas.types.common import (is_integer, is_list_like,
is_string_like, is_scalar)
is_string_like, is_scalar, is_dict_like)
from pandas.types.missing import notnull

import pandas.computation.expressions as expressions
Expand Down Expand Up @@ -164,7 +164,7 @@ def _init_data(self, data, copy, dtype, **kwargs):
axes = [x if x is not None else y
for x, y in zip(passed_axes, data.axes)]
mgr = data
elif isinstance(data, dict):
elif is_dict_like(data):
mgr = self._init_dict(data, passed_axes, dtype=dtype)
copy = False
dtype = None
Expand Down Expand Up @@ -200,9 +200,8 @@ def _init_dict(self, data, axes, dtype=None):
ks = _try_sort(ks)
haxis = Index(ks)

for k, v in compat.iteritems(data):
if isinstance(v, dict):
data[k] = self._constructor_sliced(v)
data = {k: self._constructor_sliced(v)
for k, v in compat.iteritems(data) if is_dict_like(v)}

# extract axis for remaining axes & create the slicemap
raxes = [self._extract_axis(self, data, axis=i) if a is None else a
Expand Down
14 changes: 8 additions & 6 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,13 @@ def wrapper(self):

return wrapper


# ----------------------------------------------------------------------
# Series class


class Series(base.IndexOpsMixin, strings.StringAccessorMixin,
generic.NDFrame,):
generic.NDFrame):
"""
One-dimensional ndarray with axis labels (including time series).

Expand Down Expand Up @@ -174,7 +175,7 @@ def __init__(self, data=None, index=None, dtype=None, name=None,
else:
data = data.reindex(index, copy=copy)
data = data._data
elif isinstance(data, dict):
elif is_dict_like(data):
if index is None:
if isinstance(data, OrderedDict):
index = Index(data)
Expand Down Expand Up @@ -2127,10 +2128,9 @@ def map_f(values, f):
else:
map_f = lib.map_infer

if isinstance(arg, (dict, Series)):
if isinstance(arg, dict):
arg = self._constructor(arg, index=arg.keys())

if is_dict_like(arg):
arg = self._constructor(arg, index=arg.keys())
if isinstance(arg, Series):
indexer = arg.index.get_indexer(values)
new_values = algos.take_1d(arg._values, indexer)
else:
Expand Down Expand Up @@ -2737,6 +2737,7 @@ def _dir_additions(self):
Series._add_series_or_dataframe_operations()
_INDEX_TYPES = ndarray, Index, list, tuple


# -----------------------------------------------------------------------------
# Supplementary functions

Expand Down Expand Up @@ -2928,6 +2929,7 @@ def __init__(self, *args, **kwargs):

super(TimeSeries, self).__init__(*args, **kwargs)


# ----------------------------------------------------------------------
# Add plotting methods to Series

Expand Down
30 changes: 26 additions & 4 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ def _make_mixed_dtypes_df(typ, ad=None):

zipper = lzip(dtypes, arrays)
for d, a in zipper:
assert(a.dtype == d)
assert (a.dtype == d)
if ad is None:
ad = dict()
ad.update(dict([(d, a) for d, a in zipper]))
Expand All @@ -134,7 +134,7 @@ def _check_mixed_dtypes(df, dtypes=None):
dtypes = MIXED_FLOAT_DTYPES + MIXED_INT_DTYPES
for d in dtypes:
if d in df:
assert(df.dtypes[d] == d)
assert (df.dtypes[d] == d)

# mixed floating and integer coexinst in the same frame
df = _make_mixed_dtypes_df('float')
Expand Down Expand Up @@ -516,6 +516,15 @@ def test_nested_dict_frame_constructor(self):
result = DataFrame(data, index=rng).T
tm.assert_frame_equal(result, df)

def test_constructor_mapping(self):

mapping = tm.MappingMock(base=Series([0, 1, 2]))

result = DataFrame(mapping)
expected = DataFrame({4: [0, 4, 8], 5: [0, 5, 10]})

tm.assert_frame_equal(result, expected)

def _check_basic_constructor(self, empty):
# mat: 2d matrix with shpae (3, 2) to input. empty - makes sized
# objects
Expand Down Expand Up @@ -826,7 +835,6 @@ def test_constructor_sequence_like(self):
import collections

class DummyContainer(collections.Sequence):

def __init__(self, lst):
self._lst = lst

Expand Down Expand Up @@ -988,6 +996,7 @@ def test_constructor_list_of_series(self):
def test_constructor_list_of_derived_dicts(self):
class CustomDict(dict):
pass

d = {'a': 1.5, 'b': 3}

data_custom = [CustomDict(d)]
Expand Down Expand Up @@ -1473,6 +1482,7 @@ def check(df):

def f():
df.loc[:, np.nan]

self.assertRaises(TypeError, f)

df = DataFrame([[1, 2, 3], [4, 5, 6]], index=[1, np.nan])
Expand Down Expand Up @@ -1624,6 +1634,7 @@ def test_from_records_set_index_name(self):
def create_dict(order_id):
return {'order_id': order_id, 'quantity': np.random.randint(1, 10),
'price': np.random.randint(1, 10)}

documents = [create_dict(i) for i in range(10)]
# demo missing data
documents.append({'order_id': 10, 'quantity': 5})
Expand Down Expand Up @@ -1849,7 +1860,6 @@ def test_from_records_bad_index_column(self):

def test_from_records_non_tuple(self):
class Record(object):

def __init__(self, *args):
self.args = args

Expand All @@ -1875,6 +1885,18 @@ def test_from_records_len0_with_columns(self):
self.assertEqual(len(result), 0)
self.assertEqual(result.index.name, 'foo')

def test_constructor_xarray_dataset(self):
tm._skip_if_no_xarray()

index = pd.Index(['x', 'y'], name='z')
expected = DataFrame(
dict(a=[4, 5], b=[8, 10]),
index=index)

result = DataFrame(expected.to_xarray())

tm.assert_frame_equal(result, expected)


class TestDataFrameConstructorWithDatetimeTZ(tm.TestCase, TestData):

Expand Down
30 changes: 30 additions & 0 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,15 @@ def test_constructor_subclass_dict(self):
refseries = Series(dict(compat.iteritems(data)))
assert_series_equal(refseries, series)

def test_constructor_mapping(self):

mapping = tm.MappingMock(base=2)

result = Series(mapping)
expected = pd.Series([8, 10], index=[4, 5])

assert_series_equal(result, expected)

def test_constructor_dict_datetime64_index(self):
# GH 9456

Expand Down Expand Up @@ -769,6 +778,27 @@ def f():
s = Series([pd.NaT, np.nan, '1 Day'])
self.assertEqual(s.dtype, 'timedelta64[ns]')

def test_constructor_dict_numpy_0d_arrays(self):

data = [np.asarray(i) for i in range(4)]

result = Series(data)
expected = Series(range(4))

# disabled for the moment (will remove from PR)
# assert_series_equal(result, expected)

def test_constructor_xarray_dataset(self):
tm._skip_if_no_xarray()
import xarray as xr

d = {'a': 5, 'b': 10}
result = Series(xr.Dataset(d))
expected = Series(d)

# disabled for the moment (will remove from PR)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because of the issues with 0-d arrays discussed in the issue

# assert_series_equal(result, expected)

def test_constructor_name_hashable(self):
for n in [777, 777., 'name', datetime(2001, 11, 11), (1, ), u"\u05D0"]:
for data in [[1, 2, 3], np.ones(3), {'a': 0, 'b': 1}]:
Expand Down
17 changes: 15 additions & 2 deletions pandas/tests/test_panel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1081,9 +1081,21 @@ def test_constructor_dict_mixed(self):
data['ItemB'] = self.panel['ItemB'].values[:, :-1]
self.assertRaises(Exception, Panel, data)

def test_constructor_mapping(self):

mapping = tm.MappingMock(base=DataFrame({1: [0, 1], 2: [0, 1]}))

result = Panel(mapping)
expected = Panel({
4: DataFrame({1: [0, 4], 2: [0, 4]}),
5: DataFrame({1: [0, 5], 2: [0, 5]})
})

assert_panel_equal(result, expected)

def test_ctor_orderedDict(self):
keys = list(set(np.random.randint(0, 5000, 100)))[
:50] # unique random int keys
# unique random int keys
keys = list(set(np.random.randint(0, 5000, 100)))[:50]
d = OrderedDict([(k, mkdf(10, 5)) for k in keys])
p = Panel(d)
self.assertTrue(list(p.items) == keys)
Expand Down Expand Up @@ -2147,6 +2159,7 @@ def check_drop(drop_val, axis_number, aliases, expected):
pprint_thing("Failed with axis_number %d and aliases: %s" %
(axis_number, aliases))
raise

# Items
expected = Panel({"One": df})
check_drop('Two', 0, ['items'], expected)
Expand Down
24 changes: 21 additions & 3 deletions pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from functools import wraps, partial
from contextlib import contextmanager
from distutils.version import LooseVersion
from collections import Mapping

from numpy.random import randn, rand
from numpy.testing.decorators import slow # noqa
Expand Down Expand Up @@ -1960,9 +1961,7 @@ def add_nans_panel4d(panel4d):


class TestSubDict(dict):

def __init__(self, *args, **kwargs):
dict.__init__(self, *args, **kwargs)
pass


# Dependency checks. Copied this from Nipy/Nipype (Copyright of
Expand Down Expand Up @@ -2726,6 +2725,25 @@ def patch(ob, attr, value):
setattr(ob, attr, old)


class MappingMock(Mapping):
"""
Mock class to represent a Mapping
Takes a base, and returns that multiplied by whatever key is passed in
"""

def __init__(self, base):
self.base = base

def __getitem__(self, key):
return key * self.base

def __iter__(self):
return iter([4, 5])

def __len__(self):
return 2


@contextmanager
def set_timezone(tz):
"""Context manager for temporarily setting a timezone.
Expand Down