Skip to content

Commit 8afa1b5

Browse files
committed
ENH: refactor Concatenator to work for ndim > 2, add join-multiple to Panel, #115
1 parent 66d9bad commit 8afa1b5

File tree

7 files changed

+258
-166
lines changed

7 files changed

+258
-166
lines changed

pandas/core/frame.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,15 @@ def __init__(self, data=None, index=None, columns=None, dtype=None,
299299

300300
NDFrame.__init__(self, mgr)
301301

302+
@classmethod
303+
def _from_axes(cls, data, axes):
304+
# for construction from BlockManager
305+
if isinstance(data, BlockManager):
306+
return cls(data)
307+
else:
308+
columns, index = axes
309+
return cls(data, index=index, columns=columns, copy=False)
310+
302311
def _init_mgr(self, mgr, index, columns, dtype=None, copy=False):
303312
if columns is not None:
304313
mgr = mgr.reindex_axis(columns, axis=0, copy=False)
@@ -2751,9 +2760,6 @@ def append(self, other, ignore_index=False, verify_integrity=True):
27512760
return concat(to_concat, ignore_index=ignore_index,
27522761
verify_integrity=verify_integrity)
27532762

2754-
def _get_raw_column(self, col):
2755-
return self._data.get(col)
2756-
27572763
def join(self, other, on=None, how='left', lsuffix='', rsuffix=''):
27582764
"""
27592765
Join columns with other DataFrame either on index or on a key
@@ -2815,12 +2821,12 @@ def _join_compat(self, other, on=None, how='left', lsuffix='', rsuffix=''):
28152821
# join indexes only using concat
28162822
if how == 'left':
28172823
how = 'outer'
2818-
join_index = self.index
2824+
join_axes = [self.index]
28192825
else:
2820-
join_index = None
2826+
join_axes = None
28212827

28222828
return concat([self] + list(other), axis=1, join=how,
2823-
join_index=join_index, verify_integrity=True)
2829+
join_axes=join_axes, verify_integrity=True)
28242830

28252831
def merge(self, right, how='inner', on=None, left_on=None, right_on=None,
28262832
left_index=False, right_index=False, sort=True,

pandas/core/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1144,7 +1144,7 @@ def transform(self, func, *args, **kwargs):
11441144
applied.append(res)
11451145

11461146
concat_index = obj.columns if self.axis == 0 else obj.index
1147-
concatenated = concat(applied, join_index=concat_index,
1147+
concatenated = concat(applied, join_axes=[concat_index],
11481148
axis=self.axis, verify_integrity=False)
11491149
return concatenated.reindex_like(obj)
11501150

pandas/core/panel.py

+36-12
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,16 @@ def __init__(self, data=None, items=None, major_axis=None, minor_axis=None,
232232

233233
NDFrame.__init__(self, mgr, axes=axes, copy=copy, dtype=dtype)
234234

235+
@classmethod
236+
def _from_axes(cls, data, axes):
237+
# for construction from BlockManager
238+
if isinstance(data, BlockManager):
239+
return cls(data)
240+
else:
241+
items, major, minor = axes
242+
return cls(data, items=items, major_axis=major,
243+
minor_axis=minor, copy=False)
244+
235245
def _init_dict(self, data, axes, dtype=None):
236246
items, major, minor = axes
237247

@@ -1067,13 +1077,13 @@ def truncate(self, before=None, after=None, axis='major'):
10671077

10681078
return self.reindex(**{axis : new_index})
10691079

1070-
def join(self, other, how=None, lsuffix='', rsuffix=''):
1080+
def join(self, other, how='left', lsuffix='', rsuffix=''):
10711081
"""
10721082
Join items with other Panel either on major and minor axes column
10731083
10741084
Parameters
10751085
----------
1076-
other : Panel
1086+
other : Panel or list of Panels
10771087
Index should be similar to one of the columns in this one
10781088
how : {'left', 'right', 'outer', 'inner'}
10791089
How to handle indexes of the two objects. Default: 'left'
@@ -1091,16 +1101,30 @@ def join(self, other, how=None, lsuffix='', rsuffix=''):
10911101
-------
10921102
joined : Panel
10931103
"""
1094-
if how is None:
1095-
how = 'left'
1096-
return self._join_index(other, how, lsuffix, rsuffix)
1097-
1098-
def _join_index(self, other, how, lsuffix, rsuffix):
1099-
join_major, join_minor = self._get_join_index(other, how)
1100-
this = self.reindex(major=join_major, minor=join_minor)
1101-
other = other.reindex(major=join_major, minor=join_minor)
1102-
merged_data = this._data.merge(other._data, lsuffix, rsuffix)
1103-
return self._constructor(merged_data)
1104+
from pandas.tools.merge import concat
1105+
1106+
if isinstance(other, Panel):
1107+
join_major, join_minor = self._get_join_index(other, how)
1108+
this = self.reindex(major=join_major, minor=join_minor)
1109+
other = other.reindex(major=join_major, minor=join_minor)
1110+
merged_data = this._data.merge(other._data, lsuffix, rsuffix)
1111+
return self._constructor(merged_data)
1112+
else:
1113+
if lsuffix or rsuffix:
1114+
raise ValueError('Suffixes not supported when passing multiple '
1115+
'panels')
1116+
1117+
if how == 'left':
1118+
how = 'outer'
1119+
join_axes = [self.major_axis, self.minor_axis]
1120+
elif how == 'right':
1121+
raise ValueError('Right join not supported with multiple '
1122+
'panels')
1123+
else:
1124+
join_axes = None
1125+
1126+
return concat([self] + list(other), axis=0, join=how,
1127+
join_axes=join_axes, verify_integrity=True)
11041128

11051129
def _get_join_index(self, other, how):
11061130
if how == 'left':

pandas/sparse/frame.py

+23-4
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
float64 data
44
"""
55

6-
# pylint: disable=E1101,E1103,W0231
6+
# pylint: disable=E1101,E1103,W0231,E0202
77

88
from numpy import nan
99
import numpy as np
@@ -13,10 +13,24 @@
1313
from pandas.core.series import Series
1414
from pandas.core.frame import (DataFrame, extract_index, _prep_ndarray,
1515
_default_index)
16+
from pandas.util.decorators import cache_readonly
1617
import pandas.core.datetools as datetools
1718

1819
from pandas.sparse.series import SparseSeries
1920

21+
22+
class _SparseMockBlockManager(object):
23+
24+
def __init__(self, sp_frame):
25+
self.sp_frame = sp_frame
26+
27+
def get(self, item):
28+
return self.sp_frame[item].values
29+
30+
@property
31+
def axes(self):
32+
return [self.sp_frame.columns, self.sp_frame.index]
33+
2034
class SparseDataFrame(DataFrame):
2135
"""
2236
DataFrame containing sparse floating point data in the form of SparseSeries
@@ -71,6 +85,14 @@ def __init__(self, data=None, index=None, columns=None,
7185
self.columns = columns
7286
self.index = index
7387

88+
def _from_axes(self, data, axes):
89+
columns, index = axes
90+
return self._constructor(data, index=index, columns=columns)
91+
92+
@cache_readonly
93+
def _data(self):
94+
return _SparseMockBlockManager(self)
95+
7496
def _get_numeric_columns(self):
7597
# everything is necessarily float64
7698
return self.columns
@@ -512,9 +534,6 @@ def _rename_columns_inplace(self, mapper):
512534
self.columns = new_columns
513535
self._series = new_series
514536

515-
def _get_raw_column(self, col):
516-
return self._series[col].values
517-
518537
def add_prefix(self, prefix):
519538
f = (('%s' % prefix) + '%s').__mod__
520539
return self.rename(columns=f)

pandas/tests/test_panel.py

-42
Original file line numberDiff line numberDiff line change
@@ -902,48 +902,6 @@ def test_shift(self):
902902

903903
self.assertRaises(Exception, self.panel.shift, 1, axis='items')
904904

905-
def test_join(self):
906-
p1 = self.panel.ix[:2, :10, :3]
907-
p2 = self.panel.ix[2:, 5:, 2:]
908-
909-
# left join
910-
result = p1.join(p2)
911-
expected = p1.copy()
912-
expected['ItemC'] = p2['ItemC']
913-
assert_panel_equal(result, expected)
914-
915-
# right join
916-
result = p1.join(p2, how='right')
917-
expected = p2.copy()
918-
expected['ItemA'] = p1['ItemA']
919-
expected['ItemB'] = p1['ItemB']
920-
expected = expected.reindex(items=['ItemA', 'ItemB', 'ItemC'])
921-
assert_panel_equal(result, expected)
922-
923-
# inner join
924-
result = p1.join(p2, how='inner')
925-
expected = self.panel.ix[:, 5:10, 2:3]
926-
assert_panel_equal(result, expected)
927-
928-
# outer join
929-
result = p1.join(p2, how='outer')
930-
expected = p1.reindex(major=self.panel.major_axis,
931-
minor=self.panel.minor_axis)
932-
expected = expected.join(p2.reindex(major=self.panel.major_axis,
933-
minor=self.panel.minor_axis))
934-
assert_panel_equal(result, expected)
935-
936-
def test_join_overlap(self):
937-
p1 = self.panel.ix[['ItemA', 'ItemB', 'ItemC']]
938-
p2 = self.panel.ix[['ItemB', 'ItemC']]
939-
940-
joined = p1.join(p2, lsuffix='_p1', rsuffix='_p2')
941-
p1_suf = p1.ix[['ItemB', 'ItemC']].add_suffix('_p1')
942-
p2_suf = p2.ix[['ItemB', 'ItemC']].add_suffix('_p2')
943-
no_overlap = self.panel.ix[['ItemA']]
944-
expected = p1_suf.join(p2_suf).join(no_overlap)
945-
assert_panel_equal(joined, expected)
946-
947905
def test_repr_empty(self):
948906
empty = Panel()
949907
repr(empty)

0 commit comments

Comments
 (0)