Skip to content

Commit 93947df

Browse files
committed
REF: more refactoring for LongPanel removal
1 parent 04edff4 commit 93947df

File tree

4 files changed

+93
-51
lines changed

4 files changed

+93
-51
lines changed

pandas/core/frame.py

+34-19
Original file line numberDiff line numberDiff line change
@@ -100,11 +100,11 @@ def _add_stat_doc(f, name, shortname, na_action=_doc_exclude_na,
100100
f.__doc__ = doc
101101

102102
def _arith_method(func, name, default_axis='columns'):
103-
def f(self, other, axis=default_axis, fill_value=None):
103+
def f(self, other, axis=default_axis, level=None, fill_value=None):
104104
if isinstance(other, DataFrame): # Another DataFrame
105-
return self._combine_frame(other, func, fill_value)
105+
return self._combine_frame(other, func, fill_value, level)
106106
elif isinstance(other, Series):
107-
return self._combine_series(other, func, fill_value, axis)
107+
return self._combine_series(other, func, fill_value, axis, level)
108108
else:
109109
return self._combine_const(other, func)
110110

@@ -1258,7 +1258,7 @@ def xs(self, key, axis=0, copy=True):
12581258
#----------------------------------------------------------------------
12591259
# Reindexing and alignment
12601260

1261-
def align(self, other, join='outer', axis=None, copy=True):
1261+
def align(self, other, join='outer', axis=None, level=None, copy=True):
12621262
"""
12631263
Align two DataFrame object on their index and columns with the specified
12641264
join method for each axis Index
@@ -1276,13 +1276,16 @@ def align(self, other, join='outer', axis=None, copy=True):
12761276
Aligned Series
12771277
"""
12781278
if isinstance(other, DataFrame):
1279-
return self._align_frame(other, join=join, axis=axis, copy=copy)
1279+
return self._align_frame(other, join=join, axis=axis, level=level,
1280+
copy=copy)
12801281
elif isinstance(other, Series):
1281-
return self._align_series(other, join=join, axis=axis, copy=copy)
1282+
return self._align_series(other, join=join, axis=axis, level=level,
1283+
copy=copy)
12821284
else: # pragma: no cover
12831285
raise TypeError('unsupported type: %s' % type(other))
12841286

1285-
def _align_frame(self, other, join='outer', axis=None, copy=True):
1287+
def _align_frame(self, other, join='outer', axis=None, level=None,
1288+
copy=True):
12861289
# defaults
12871290
join_index = self.index
12881291
join_columns = self.columns
@@ -1291,14 +1294,14 @@ def _align_frame(self, other, join='outer', axis=None, copy=True):
12911294

12921295
if axis is None or axis == 0:
12931296
if not self.index.equals(other.index):
1294-
join_index, ilidx, iridx = self.index.join(other.index, how=join,
1295-
return_indexers=True)
1297+
join_index, ilidx, iridx = \
1298+
self.index.join(other.index, how=join, return_indexers=True)
12961299

12971300
if axis is None or axis == 1:
12981301
if not self.columns.equals(other.columns):
1299-
join_columns, clidx, cridx = self.columns.join(other.columns,
1300-
how=join,
1301-
return_indexers=True)
1302+
join_columns, clidx, cridx = \
1303+
self.columns.join(other.columns, how=join,
1304+
return_indexers=True)
13021305

13031306
def _align(frame, row_idx, col_idx):
13041307
new_data = frame._data
@@ -1318,7 +1321,8 @@ def _align(frame, row_idx, col_idx):
13181321
right = _align(other, iridx, cridx)
13191322
return left, right
13201323

1321-
def _align_series(self, other, join='outer', axis=None, copy=True):
1324+
def _align_series(self, other, join='outer', axis=None, level=None,
1325+
copy=True):
13221326
fdata = self._data
13231327
if axis == 0:
13241328
join_index = self.index
@@ -1348,7 +1352,8 @@ def _align_series(self, other, join='outer', axis=None, copy=True):
13481352
right_result = other if ridx is None else other.reindex(join_index)
13491353
return left_result, right_result
13501354

1351-
def reindex(self, index=None, columns=None, method=None, copy=True):
1355+
def reindex(self, index=None, columns=None, method=None, level=None,
1356+
copy=True):
13521357
"""Conform Series to new index with optional filling logic, placing
13531358
NA/NaN in locations having no value in the previous index. A new object
13541359
is produced unless the new index is equivalent to the current one and
@@ -1381,15 +1386,15 @@ def reindex(self, index=None, columns=None, method=None, copy=True):
13811386

13821387
if index is not None:
13831388
index = _ensure_index(index)
1384-
frame = frame._reindex_index(index, method, copy)
1389+
frame = frame._reindex_index(index, method, copy, level)
13851390

13861391
if columns is not None:
13871392
columns = _ensure_index(columns)
1388-
frame = frame._reindex_columns(columns, copy)
1393+
frame = frame._reindex_columns(columns, copy, level)
13891394

13901395
return frame
13911396

1392-
def _reindex_index(self, new_index, method, copy):
1397+
def _reindex_index(self, new_index, method, copy, level):
13931398
if new_index.equals(self.index):
13941399
if copy:
13951400
result = self.copy()
@@ -1916,8 +1921,8 @@ def _rename_columns_inplace(self, mapper):
19161921
#----------------------------------------------------------------------
19171922
# Arithmetic / combination related
19181923

1919-
def _combine_frame(self, other, func, fill_value=None):
1920-
this, other = self.align(other, join='outer', copy=False)
1924+
def _combine_frame(self, other, func, fill_value=None, level=None):
1925+
this, other = self.align(other, join='outer', level=level, copy=False)
19211926
new_index, new_columns = this.index, this.columns
19221927

19231928
this_vals = this.values
@@ -3654,6 +3659,16 @@ def _is_sequence(x):
36543659
except Exception:
36553660
return False
36563661

3662+
def _align_level(frame, multi_index, level, axis=0):
3663+
levnum = multi_index._get_level_number(level)
3664+
3665+
data = frame.reindex(multi_index.levels[levnum], copy=False)._data
3666+
3667+
mgr_axis = 0 if axis == 1 else 1
3668+
new_data = data.reindex_indexer(multi_index, multi_index.labels[levnum],
3669+
axis=mgr_axis)
3670+
return DataFrame(new_data)
3671+
36573672
def install_ipython_completers(): # pragma: no cover
36583673
"""Register the DataFrame type with IPython's tab completion machinery, so
36593674
that it knows about accessing column names as attributes."""

pandas/core/internals.py

+36
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from collections import defaultdict
12
import itertools
23

34
from numpy import nan
@@ -685,6 +686,17 @@ def reindex_indexer(self, new_axis, indexer, axis=1):
685686

686687
return BlockManager(new_blocks, new_axes)
687688

689+
def _reindex_indexer_items(new_axis, indexer):
690+
from collections import defaultdict
691+
692+
dtypes = self.item_dtypes
693+
result_dtypes = dtypes.take(indexer)
694+
counts = defaultdict(int)
695+
for t in result_dtypes:
696+
counts[t] += 1
697+
698+
699+
688700
def reindex_items(self, new_items):
689701
"""
690702
@@ -871,6 +883,17 @@ def block_id_vector(self):
871883
assert((result >= 0).all())
872884
return result
873885

886+
@property
887+
def item_dtypes(self):
888+
result = np.empty(len(self.items), dtype='O')
889+
mask = np.zeros(len(self.items), dtype=bool)
890+
for i, blk in enumerate(self.blocks):
891+
indexer = self.items.get_indexer(blk.items)
892+
result.put(indexer, blk.values.dtype.name)
893+
mask.put(indexer, 1)
894+
assert(mask.all())
895+
return result
896+
874897
def form_blocks(data, axes):
875898
# pre-filter out items if we passed it
876899
items = axes[0]
@@ -1242,3 +1265,16 @@ def _upcast_blocks(blocks):
12421265
# use any ref_items
12431266
return _consolidate(new_blocks, newb.ref_items)
12441267

1268+
def _make_block_indexers(blocks, indexer, block_ids, block_locs, block_dtypes,
1269+
ref_items):
1270+
counts = defaultdict(int)
1271+
for dtype_name in block_dtypes.take(indexer):
1272+
counts[dtype_name] += 1
1273+
1274+
findexer = np.empty(counts['float64'], dtype='i4')
1275+
bindexer = np.empty(counts['bool'], dtype='i4')
1276+
oindexer = np.empty(counts['object'], dtype='i4')
1277+
iindexer = np.empty(counts['int64'], dtype='i4')
1278+
1279+
for idx in indexer:
1280+
pass

pandas/io/pytables.py

+20-32
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,7 @@
99
import time
1010

1111
import numpy as np
12-
from pandas import (Series, TimeSeries, DataFrame, Panel, LongPanel,
13-
Index, MultiIndex)
12+
from pandas import Series, TimeSeries, DataFrame, Panel, Index, MultiIndex
1413
from pandas.core.common import adjoin
1514
import pandas.core.common as com
1615
import pandas._tseries as lib
@@ -20,8 +19,7 @@
2019
Series : 'series',
2120
TimeSeries : 'series',
2221
DataFrame : 'frame',
23-
Panel : 'wide',
24-
LongPanel : 'long'
22+
Panel : 'wide'
2523
}
2624

2725
_NAME_MAP = {
@@ -32,7 +30,6 @@
3230
'wide' : 'Panel',
3331
'wide_table' : 'Panel (Table)',
3432
'long' : 'LongPanel',
35-
3633
# legacy h5 files
3734
'Series' : 'Series',
3835
'TimeSeries' : 'TimeSeries',
@@ -244,7 +241,7 @@ def put(self, key, value, table=False, append=False,
244241
Parameters
245242
----------
246243
key : object
247-
value : {Series, DataFrame, Panel, LongPanel}
244+
value : {Series, DataFrame, Panel}
248245
table : boolean, default False
249246
Write as a PyTables Table structure which may perform worse but
250247
allow more flexible operations like searching / selecting subsets of
@@ -294,7 +291,7 @@ def append(self, key, value):
294291
Parameters
295292
----------
296293
key : object
297-
value : {Series, DataFrame, Panel, LongPanel}
294+
value : {Series, DataFrame, Panel}
298295
299296
Notes
300297
-----
@@ -340,6 +337,22 @@ def _write_frame(self, group, df):
340337
def _read_frame(self, group, where=None):
341338
return DataFrame(self._read_block_manager(group))
342339

340+
def _write_long(self, group, panel):
341+
if len(panel.values) == 0:
342+
raise ValueError('Can not write empty structure, data length was 0')
343+
self._write_block_manager(group, panel._data)
344+
345+
def _read_long(self, group, where=None):
346+
items = self._read_index(group, 'items')
347+
major_axis = self._read_index(group, 'major_axis')
348+
minor_axis = self._read_index(group, 'minor_axis')
349+
major_labels = _read_array(group, 'major_labels')
350+
minor_labels = _read_array(group, 'minor_labels')
351+
values = _read_array(group, 'values')
352+
index = MultiIndex(levels=[major_axis, minor_axis],
353+
labels=[major_labels, minor_labels])
354+
return DataFrame(values, index=index, columns=items)
355+
343356
def _write_block_manager(self, group, data):
344357
if not data.is_consolidated():
345358
data = data.consolidate()
@@ -404,31 +417,6 @@ def _write_wide_table(self, group, panel, append=False, comp=None):
404417
def _read_wide_table(self, group, where=None):
405418
return self._read_panel_table(group, where)
406419

407-
def _write_long(self, group, panel, append=False):
408-
if len(panel.values) == 0:
409-
raise ValueError('Can not write empty structure, data length was 0')
410-
411-
self._write_index(group, 'major_axis', panel.major_axis)
412-
self._write_index(group, 'minor_axis', panel.minor_axis)
413-
self._write_index(group, 'items', panel.items)
414-
self._write_array(group, 'major_labels', panel.major_labels)
415-
self._write_array(group, 'minor_labels', panel.minor_labels)
416-
self._write_array(group, 'values', panel.values)
417-
418-
def _read_long(self, group, where=None):
419-
from pandas.core.index import MultiIndex
420-
421-
items = self._read_index(group, 'items')
422-
major_axis = self._read_index(group, 'major_axis')
423-
minor_axis = self._read_index(group, 'minor_axis')
424-
major_labels = _read_array(group, 'major_labels')
425-
minor_labels = _read_array(group, 'minor_labels')
426-
values = _read_array(group, 'values')
427-
428-
index = MultiIndex(levels=[major_axis, minor_axis],
429-
labels=[major_labels, minor_labels])
430-
return LongPanel(values, index=index, columns=items)
431-
432420
def _write_index(self, group, key, index):
433421
if len(index) == 0:
434422
raise ValueError('Can not write empty structure, axis length was 0')

pandas/tests/test_multilevel.py

+3
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ def test_append(self):
5454
result = a['A'].append(b['A'])
5555
tm.assert_series_equal(result, self.frame['A'])
5656

57+
def test_align_level(self):
58+
pass
59+
5760
def test_pickle(self):
5861
import cPickle
5962
def _test_roundtrip(frame):

0 commit comments

Comments
 (0)