-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ENH: add to_xarray conversion method #11972
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,3 +20,4 @@ html5lib=1.0b2 | |
beautiful-soup=4.2.1 | ||
statsmodels | ||
jinja2=2.8 | ||
xarray |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,7 @@ bottleneck | |
sqlalchemy | ||
pymysql | ||
psycopg2 | ||
xarray | ||
|
||
# incompat with conda ATM | ||
# beautiful-soup |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -271,6 +271,7 @@ In addition, ``.round()``, ``.floor()`` and ``.ceil()`` will be available thru t | |
s | ||
s.dt.round('D') | ||
|
||
<<<<<<< 6693a723aa2a8a53a071860a43804c173a7f92c6 | ||
|
||
Formatting of integer in FloatIndex | ||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||
|
@@ -312,6 +313,35 @@ New Behavior: | |
s.index | ||
print(s.to_csv(path=None)) | ||
|
||
.. _whatsnew_0180.enhancements.xarray: | ||
|
||
to_xarray | ||
^^^^^^^^^ | ||
|
||
In a future version of pandas, we will be deprecating ``Panel`` and other > 2 ndim objects. In order to provide for continuity, | ||
all ``NDFrame`` objects have gained the ``.to_xarray()`` method in order to convert to ``xarray`` objects, which have | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. which have -> which has |
||
a pandas-like interface for > 2 ndim. | ||
|
||
See the `xarray full-documentation here <http://xarray.pydata.org/en/stable/>`__. | ||
|
||
.. code-block:: python | ||
|
||
In [1]: p = Panel(np.arange(2*3*4).reshape(2,3,4)) | ||
|
||
In [2]: p.to_xarray() | ||
Out[2]: | ||
<xarray.DataArray (items: 2, major_axis: 3, minor_axis: 4)> | ||
array([[[ 0, 1, 2, 3], | ||
[ 4, 5, 6, 7], | ||
[ 8, 9, 10, 11]], | ||
|
||
[[12, 13, 14, 15], | ||
[16, 17, 18, 19], | ||
[20, 21, 22, 23]]]) | ||
Coordinates: | ||
* items (items) int64 0 1 | ||
* major_axis (major_axis) int64 0 1 2 | ||
* minor_axis (minor_axis) int64 0 1 2 3 | ||
|
||
.. _whatsnew_0180.enhancements.other: | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1039,6 +1039,32 @@ def to_clipboard(self, excel=None, sep=None, **kwargs): | |
from pandas.io import clipboard | ||
clipboard.to_clipboard(self, excel=excel, sep=sep, **kwargs) | ||
|
||
def to_xarray(self): | ||
""" | ||
Return an xarray object from the pandas object. | ||
|
||
Returns | ||
------- | ||
a DataArray for a Series | ||
a Dataset for a DataFrame | ||
a DataArray for higher dims | ||
|
||
See Also | ||
-------- | ||
`xarray docs <http://xarray.pydata.org/en/stable/>`__ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Possibly it is this line where it is choking on, as in a See also should come a python object I would just make a 'Note' section of it instead of See also, and then say "See also the xarray docs .." There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
""" | ||
import xarray | ||
if self.ndim == 1: | ||
return xarray.DataArray.from_series(self) | ||
elif self.ndim == 2: | ||
return xarray.Dataset.from_dataframe(self) | ||
|
||
# > 2 dims | ||
coords = [(a, self._get_axis(a)) for a in self._AXIS_ORDERS] | ||
return xarray.DataArray(self, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if we should add Panel4D support to the DataArray constructor so this could just be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeh its no big deal. you could always add later if its real useful. |
||
coords=coords, | ||
) | ||
|
||
# ---------------------------------------------------------------------- | ||
# Fancy Indexing | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,7 +8,7 @@ | |
|
||
from distutils.version import LooseVersion | ||
from pandas import (Index, Series, DataFrame, Panel, isnull, | ||
date_range, period_range) | ||
date_range, period_range, Panel4D) | ||
from pandas.core.index import MultiIndex | ||
|
||
import pandas.core.common as com | ||
|
@@ -18,6 +18,8 @@ | |
from pandas.util.testing import (assert_series_equal, | ||
assert_frame_equal, | ||
assert_panel_equal, | ||
assert_panel4d_equal, | ||
assert_almost_equal, | ||
assert_equal) | ||
import pandas.util.testing as tm | ||
|
||
|
@@ -1057,6 +1059,52 @@ def test_describe_none(self): | |
expected = Series([0, 0], index=['count', 'unique'], name='None') | ||
assert_series_equal(noneSeries.describe(), expected) | ||
|
||
def test_to_xarray(self): | ||
|
||
tm._skip_if_no_xarray() | ||
from xarray import DataArray | ||
|
||
s = Series([]) | ||
s.index.name = 'foo' | ||
result = s.to_xarray() | ||
self.assertEqual(len(result), 0) | ||
self.assertEqual(len(result.coords), 1) | ||
assert_almost_equal(list(result.coords.keys()), ['foo']) | ||
self.assertIsInstance(result, DataArray) | ||
|
||
def testit(index, check_index_type=True): | ||
s = Series(range(6), index=index(6)) | ||
s.index.name = 'foo' | ||
result = s.to_xarray() | ||
repr(result) | ||
self.assertEqual(len(result), 6) | ||
self.assertEqual(len(result.coords), 1) | ||
assert_almost_equal(list(result.coords.keys()), ['foo']) | ||
self.assertIsInstance(result, DataArray) | ||
|
||
# idempotency | ||
assert_series_equal(result.to_series(), s, | ||
check_index_type=check_index_type) | ||
|
||
for index in [tm.makeFloatIndex, tm.makeIntIndex, | ||
tm.makeStringIndex, tm.makeUnicodeIndex, | ||
tm.makeDateIndex, tm.makePeriodIndex, | ||
tm.makeTimedeltaIndex]: | ||
testit(index) | ||
|
||
# not idempotent | ||
testit(tm.makeCategoricalIndex, check_index_type=False) | ||
|
||
s = Series(range(6)) | ||
s.index.name = 'foo' | ||
s.index = pd.MultiIndex.from_product([['a', 'b'], range(3)], | ||
names=['one', 'two']) | ||
result = s.to_xarray() | ||
self.assertEqual(len(result), 2) | ||
assert_almost_equal(list(result.coords.keys()), ['one', 'two']) | ||
self.assertIsInstance(result, DataArray) | ||
assert_series_equal(result.to_series(), s) | ||
|
||
|
||
class TestDataFrame(tm.TestCase, Generic): | ||
_typ = DataFrame | ||
|
@@ -1777,11 +1825,103 @@ def test_pct_change(self): | |
|
||
self.assert_frame_equal(result, expected) | ||
|
||
def test_to_xarray(self): | ||
|
||
tm._skip_if_no_xarray() | ||
from xarray import Dataset | ||
|
||
df = DataFrame({'a': list('abc'), | ||
'b': list(range(1, 4)), | ||
'c': np.arange(3, 6).astype('u1'), | ||
'd': np.arange(4.0, 7.0, dtype='float64'), | ||
'e': [True, False, True], | ||
'f': pd.Categorical(list('abc')), | ||
'g': pd.date_range('20130101', periods=3), | ||
'h': pd.date_range('20130101', | ||
periods=3, | ||
tz='US/Eastern')} | ||
) | ||
|
||
df.index.name = 'foo' | ||
result = df[0:0].to_xarray() | ||
self.assertEqual(result.dims['foo'], 0) | ||
self.assertIsInstance(result, Dataset) | ||
|
||
for index in [tm.makeFloatIndex, tm.makeIntIndex, | ||
tm.makeStringIndex, tm.makeUnicodeIndex, | ||
tm.makeDateIndex, tm.makePeriodIndex, | ||
tm.makeCategoricalIndex, tm.makeTimedeltaIndex]: | ||
df.index = index(3) | ||
df.index.name = 'foo' | ||
df.columns.name = 'bar' | ||
result = df.to_xarray() | ||
self.assertEqual(result.dims['foo'], 3) | ||
self.assertEqual(len(result.coords), 1) | ||
self.assertEqual(len(result.data_vars), 8) | ||
assert_almost_equal(list(result.coords.keys()), ['foo']) | ||
self.assertIsInstance(result, Dataset) | ||
|
||
# idempotency | ||
# categoricals are not preserved | ||
# datetimes w/tz are not preserved | ||
# column names are lost | ||
expected = df.copy() | ||
expected['f'] = expected['f'].astype(object) | ||
expected['h'] = expected['h'].astype('datetime64[ns]') | ||
expected.columns.name = None | ||
assert_frame_equal(result.to_dataframe(), | ||
expected, | ||
check_index_type=False) | ||
|
||
# not implemented | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what am I doing wrong here?
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @shoyer ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm going to blame this one (in part) on pandas's Categorical:
Instead of erroring, it ignores the reshape argument (to 2D). This certainly needs a fix in xarray, too, though -- we should use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, going to merge as is then. do you want me to create an issue on xarray for this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, let's merge -- I'll fix that in the next xarray bug fix release There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yep thxs |
||
df.index = pd.MultiIndex.from_product([['a'], range(3)], | ||
names=['one', 'two']) | ||
self.assertRaises(ValueError, lambda: df.to_xarray()) | ||
|
||
|
||
class TestPanel(tm.TestCase, Generic): | ||
_typ = Panel | ||
_comparator = lambda self, x, y: assert_panel_equal(x, y) | ||
|
||
def test_to_xarray(self): | ||
|
||
tm._skip_if_no_xarray() | ||
from xarray import DataArray | ||
|
||
p = tm.makePanel() | ||
|
||
result = p.to_xarray() | ||
self.assertIsInstance(result, DataArray) | ||
self.assertEqual(len(result.coords), 3) | ||
assert_almost_equal(list(result.coords.keys()), | ||
['items', 'major_axis', 'minor_axis']) | ||
self.assertEqual(len(result.dims), 3) | ||
|
||
# idempotency | ||
assert_panel_equal(result.to_pandas(), p) | ||
|
||
|
||
class TestPanel4D(tm.TestCase, Generic): | ||
_typ = Panel4D | ||
_comparator = lambda self, x, y: assert_panel4d_equal(x, y) | ||
|
||
def test_to_xarray(self): | ||
|
||
tm._skip_if_no_xarray() | ||
from xarray import DataArray | ||
|
||
p = tm.makePanel4D() | ||
|
||
result = p.to_xarray() | ||
self.assertIsInstance(result, DataArray) | ||
self.assertEqual(len(result.coords), 4) | ||
assert_almost_equal(list(result.coords.keys()), | ||
['labels', 'items', 'major_axis', 'minor_axis']) | ||
self.assertEqual(len(result.dims), 4) | ||
|
||
# non-convertible | ||
self.assertRaises(ValueError, lambda: result.to_pandas()) | ||
|
||
|
||
class TestNDFrame(tm.TestCase): | ||
# tests that don't fit elsewhere | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
merge conflict
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yep fixing as I am merging
thxs