Skip to content

Commit 85de0b7

Browse files
committed
ENH: add to_xarray conversion method
closes pandas-dev#11972
1 parent 67730dd commit 85de0b7

File tree

9 files changed

+223
-1
lines changed

9 files changed

+223
-1
lines changed

ci/requirements-2.7.run

+1
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,4 @@ html5lib=1.0b2
2020
beautiful-soup=4.2.1
2121
statsmodels
2222
jinja2=2.8
23+
xarray

ci/requirements-3.5.run

+1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ bottleneck
1717
sqlalchemy
1818
pymysql
1919
psycopg2
20+
xarray
2021

2122
# incompat with conda ATM
2223
# beautiful-soup

doc/source/api.rst

+10
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,7 @@ Serialization / IO / Conversion
684684
Series.to_csv
685685
Series.to_dict
686686
Series.to_frame
687+
Series.to_xarray
687688
Series.to_hdf
688689
Series.to_sql
689690
Series.to_msgpack
@@ -918,6 +919,7 @@ Reshaping, sorting, transposing
918919
DataFrame.unstack
919920
DataFrame.T
920921
DataFrame.to_panel
922+
DataFrame.to_xarray
921923
DataFrame.transpose
922924

923925
Combining / joining / merging
@@ -1216,6 +1218,7 @@ Serialization / IO / Conversion
12161218
Panel.to_json
12171219
Panel.to_sparse
12181220
Panel.to_frame
1221+
Panel.to_xarray
12191222
Panel.to_clipboard
12201223

12211224
.. _api.panel4d:
@@ -1230,6 +1233,13 @@ Constructor
12301233

12311234
Panel4D
12321235

1236+
Serialization / IO / Conversion
1237+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
1238+
.. autosummary::
1239+
:toctree: generated/
1240+
1241+
Panel4D.to_xarray
1242+
12331243
Attributes and underlying data
12341244
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
12351245
**Axes**

doc/source/install.rst

+1
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,7 @@ Optional Dependencies
244244
* `Cython <http://www.cython.org>`__: Only necessary to build development
245245
version. Version 0.19.1 or higher.
246246
* `SciPy <http://www.scipy.org>`__: miscellaneous statistical functions
247+
* `xarray <http://xarray.readthedocs.org>`__: pandas like handling for > 2 dims, needed for converting Panels to xarray objects. Version 0.7.0 or higher is recommended.
247248
* `PyTables <http://www.pytables.org>`__: necessary for HDF5-based storage. Version 3.0.0 or higher required, Version 3.2.1 or higher highly recommended.
248249
* `SQLAlchemy <http://www.sqlalchemy.org>`__: for SQL database support. Version 0.8.1 or higher recommended. Besides SQLAlchemy, you also need a database specific driver. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs <http://docs.sqlalchemy.org/en/latest/dialects/index.html>`__. Some common drivers are:
249250

doc/source/whatsnew/v0.18.0.txt

+30
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,7 @@ In addition, ``.round()``, ``.floor()`` and ``.ceil()`` will be available thru t
271271
s
272272
s.dt.round('D')
273273

274+
<<<<<<< 6693a723aa2a8a53a071860a43804c173a7f92c6
274275

275276
Formatting of integer in FloatIndex
276277
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -312,6 +313,35 @@ New Behavior:
312313
s.index
313314
print(s.to_csv(path=None))
314315

316+
.. _whatsnew_0180.enhancements.xarray:
317+
318+
to_xarray
319+
^^^^^^^^^
320+
321+
In a future version of pandas, we will be deprecating ``Panel`` and other > 2 ndim objects. In order to provide for continuity,
322+
all ``NDFrame`` objects have gained the ``.to_xarray()`` method in order to convert to ``xarray`` objects, which have
323+
a pandas-like interface for > 2 ndim.
324+
325+
See the `xarray full-documentation here <http://xarray.pydata.org/en/stable/>`__.
326+
327+
.. code-block:: python
328+
329+
In [1]: p = Panel(np.arange(2*3*4).reshape(2,3,4))
330+
331+
In [2]: p.to_xarray()
332+
Out[2]:
333+
<xarray.DataArray (items: 2, major_axis: 3, minor_axis: 4)>
334+
array([[[ 0, 1, 2, 3],
335+
[ 4, 5, 6, 7],
336+
[ 8, 9, 10, 11]],
337+
338+
[[12, 13, 14, 15],
339+
[16, 17, 18, 19],
340+
[20, 21, 22, 23]]])
341+
Coordinates:
342+
* items (items) int64 0 1
343+
* major_axis (major_axis) int64 0 1 2
344+
* minor_axis (minor_axis) int64 0 1 2 3
315345

316346
.. _whatsnew_0180.enhancements.other:
317347

pandas/core/generic.py

+26
Original file line numberDiff line numberDiff line change
@@ -1039,6 +1039,32 @@ def to_clipboard(self, excel=None, sep=None, **kwargs):
10391039
from pandas.io import clipboard
10401040
clipboard.to_clipboard(self, excel=excel, sep=sep, **kwargs)
10411041

1042+
def to_xarray(self):
1043+
"""
1044+
Return an xarray object from the pandas object.
1045+
1046+
Returns
1047+
-------
1048+
a DataArray for a Series
1049+
a Dataset for a DataFrame
1050+
a DataArray for higher dims
1051+
1052+
See Also
1053+
--------
1054+
`xarray docs <http://xarray.pydata.org/en/stable/>`__
1055+
"""
1056+
import xarray
1057+
if self.ndim == 1:
1058+
return xarray.DataArray.from_series(self)
1059+
elif self.ndim == 2:
1060+
return xarray.Dataset.from_dataframe(self)
1061+
1062+
# > 2 dims
1063+
coords = [(a, self._get_axis(a)) for a in self._AXIS_ORDERS]
1064+
return xarray.DataArray(self,
1065+
coords=coords,
1066+
)
1067+
10421068
# ----------------------------------------------------------------------
10431069
# Fancy Indexing
10441070

pandas/tests/test_generic.py

+141-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
from distutils.version import LooseVersion
1010
from pandas import (Index, Series, DataFrame, Panel, isnull,
11-
date_range, period_range)
11+
date_range, period_range, Panel4D)
1212
from pandas.core.index import MultiIndex
1313

1414
import pandas.core.common as com
@@ -18,6 +18,8 @@
1818
from pandas.util.testing import (assert_series_equal,
1919
assert_frame_equal,
2020
assert_panel_equal,
21+
assert_panel4d_equal,
22+
assert_almost_equal,
2123
assert_equal)
2224
import pandas.util.testing as tm
2325

@@ -1057,6 +1059,52 @@ def test_describe_none(self):
10571059
expected = Series([0, 0], index=['count', 'unique'], name='None')
10581060
assert_series_equal(noneSeries.describe(), expected)
10591061

1062+
def test_to_xarray(self):
1063+
1064+
tm._skip_if_no_xarray()
1065+
from xarray import DataArray
1066+
1067+
s = Series([])
1068+
s.index.name = 'foo'
1069+
result = s.to_xarray()
1070+
self.assertEqual(len(result), 0)
1071+
self.assertEqual(len(result.coords), 1)
1072+
assert_almost_equal(list(result.coords.keys()), ['foo'])
1073+
self.assertIsInstance(result, DataArray)
1074+
1075+
def testit(index, check_index_type=True):
1076+
s = Series(range(6), index=index(6))
1077+
s.index.name = 'foo'
1078+
result = s.to_xarray()
1079+
repr(result)
1080+
self.assertEqual(len(result), 6)
1081+
self.assertEqual(len(result.coords), 1)
1082+
assert_almost_equal(list(result.coords.keys()), ['foo'])
1083+
self.assertIsInstance(result, DataArray)
1084+
1085+
# idempotency
1086+
assert_series_equal(result.to_series(), s,
1087+
check_index_type=check_index_type)
1088+
1089+
for index in [tm.makeFloatIndex, tm.makeIntIndex,
1090+
tm.makeStringIndex, tm.makeUnicodeIndex,
1091+
tm.makeDateIndex, tm.makePeriodIndex,
1092+
tm.makeTimedeltaIndex]:
1093+
testit(index)
1094+
1095+
# not idempotent
1096+
testit(tm.makeCategoricalIndex, check_index_type=False)
1097+
1098+
s = Series(range(6))
1099+
s.index.name = 'foo'
1100+
s.index = pd.MultiIndex.from_product([['a', 'b'], range(3)],
1101+
names=['one', 'two'])
1102+
result = s.to_xarray()
1103+
self.assertEqual(len(result), 2)
1104+
assert_almost_equal(list(result.coords.keys()), ['one', 'two'])
1105+
self.assertIsInstance(result, DataArray)
1106+
assert_series_equal(result.to_series(), s)
1107+
10601108

10611109
class TestDataFrame(tm.TestCase, Generic):
10621110
_typ = DataFrame
@@ -1777,11 +1825,103 @@ def test_pct_change(self):
17771825

17781826
self.assert_frame_equal(result, expected)
17791827

1828+
def test_to_xarray(self):
1829+
1830+
tm._skip_if_no_xarray()
1831+
from xarray import Dataset
1832+
1833+
df = DataFrame({'a': list('abc'),
1834+
'b': list(range(1, 4)),
1835+
'c': np.arange(3, 6).astype('u1'),
1836+
'd': np.arange(4.0, 7.0, dtype='float64'),
1837+
'e': [True, False, True],
1838+
'f': pd.Categorical(list('abc')),
1839+
'g': pd.date_range('20130101', periods=3),
1840+
'h': pd.date_range('20130101',
1841+
periods=3,
1842+
tz='US/Eastern')}
1843+
)
1844+
1845+
df.index.name = 'foo'
1846+
result = df[0:0].to_xarray()
1847+
self.assertEqual(result.dims['foo'], 0)
1848+
self.assertIsInstance(result, Dataset)
1849+
1850+
for index in [tm.makeFloatIndex, tm.makeIntIndex,
1851+
tm.makeStringIndex, tm.makeUnicodeIndex,
1852+
tm.makeDateIndex, tm.makePeriodIndex,
1853+
tm.makeCategoricalIndex, tm.makeTimedeltaIndex]:
1854+
df.index = index(3)
1855+
df.index.name = 'foo'
1856+
df.columns.name = 'bar'
1857+
result = df.to_xarray()
1858+
self.assertEqual(result.dims['foo'], 3)
1859+
self.assertEqual(len(result.coords), 1)
1860+
self.assertEqual(len(result.data_vars), 8)
1861+
assert_almost_equal(list(result.coords.keys()), ['foo'])
1862+
self.assertIsInstance(result, Dataset)
1863+
1864+
# idempotency
1865+
# categoricals are not preserved
1866+
# datetimes w/tz are not preserved
1867+
# column names are lost
1868+
expected = df.copy()
1869+
expected['f'] = expected['f'].astype(object)
1870+
expected['h'] = expected['h'].astype('datetime64[ns]')
1871+
expected.columns.name = None
1872+
assert_frame_equal(result.to_dataframe(),
1873+
expected,
1874+
check_index_type=False)
1875+
1876+
# not implemented
1877+
df.index = pd.MultiIndex.from_product([['a'], range(3)],
1878+
names=['one', 'two'])
1879+
self.assertRaises(ValueError, lambda: df.to_xarray())
1880+
17801881

17811882
class TestPanel(tm.TestCase, Generic):
17821883
_typ = Panel
17831884
_comparator = lambda self, x, y: assert_panel_equal(x, y)
17841885

1886+
def test_to_xarray(self):
1887+
1888+
tm._skip_if_no_xarray()
1889+
from xarray import DataArray
1890+
1891+
p = tm.makePanel()
1892+
1893+
result = p.to_xarray()
1894+
self.assertIsInstance(result, DataArray)
1895+
self.assertEqual(len(result.coords), 3)
1896+
assert_almost_equal(list(result.coords.keys()),
1897+
['items', 'major_axis', 'minor_axis'])
1898+
self.assertEqual(len(result.dims), 3)
1899+
1900+
# idempotency
1901+
assert_panel_equal(result.to_pandas(), p)
1902+
1903+
1904+
class TestPanel4D(tm.TestCase, Generic):
1905+
_typ = Panel4D
1906+
_comparator = lambda self, x, y: assert_panel4d_equal(x, y)
1907+
1908+
def test_to_xarray(self):
1909+
1910+
tm._skip_if_no_xarray()
1911+
from xarray import DataArray
1912+
1913+
p = tm.makePanel4D()
1914+
1915+
result = p.to_xarray()
1916+
self.assertIsInstance(result, DataArray)
1917+
self.assertEqual(len(result.coords), 4)
1918+
assert_almost_equal(list(result.coords.keys()),
1919+
['labels', 'items', 'major_axis', 'minor_axis'])
1920+
self.assertEqual(len(result.dims), 4)
1921+
1922+
# non-convertible
1923+
self.assertRaises(ValueError, lambda: result.to_pandas())
1924+
17851925

17861926
class TestNDFrame(tm.TestCase):
17871927
# tests that don't fit elsewhere

pandas/util/print_versions.py

+1
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def show_versions(as_json=False):
6868
("numpy", lambda mod: mod.version.version),
6969
("scipy", lambda mod: mod.version.version),
7070
("statsmodels", lambda mod: mod.__version__),
71+
("xarray", lambda mod: mod.__version__),
7172
("IPython", lambda mod: mod.__version__),
7273
("sphinx", lambda mod: mod.__version__),
7374
("patsy", lambda mod: mod.__version__),

pandas/util/testing.py

+12
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,18 @@ def _skip_if_scipy_0_17():
224224
import nose
225225
raise nose.SkipTest("scipy 0.17")
226226

227+
def _skip_if_no_xarray():
228+
try:
229+
import xarray
230+
except ImportError:
231+
import nose
232+
raise nose.SkipTest("xarray not installed")
233+
234+
v = xarray.__version__
235+
if v < LooseVersion('0.7.0'):
236+
import nose
237+
raise nose.SkipTest("xarray not version is too low: {0}".format(v))
238+
227239
def _skip_if_no_pytz():
228240
try:
229241
import pytz

0 commit comments

Comments
 (0)