Skip to content

Commit 8af2745

Browse files
committed
tinkering with more general block structure
1 parent 90b7611 commit 8af2745

File tree

1 file changed

+87
-29
lines changed

1 file changed

+87
-29
lines changed

pandas/core/internals.py

+87-29
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,85 @@ def fillna(self, value):
146146
new_values.flat[mask] = value
147147
return make_block(new_values, self.columns, self.ref_columns)
148148

149+
class NDBlock(Block):
150+
151+
def __init__(self, values, columns, ref_columns):
152+
values = _convert_if_1d(values)
153+
if issubclass(values.dtype.type, basestring):
154+
values = np.array(values, dtype=object)
155+
156+
self.values = values
157+
assert(len(columns) == values.shape[1])
158+
self.columns = _ensure_index(columns)
159+
self.ref_columns = _ensure_index(ref_columns)
160+
161+
def merge(self, other):
162+
assert(self.ref_columns.equals(other.ref_columns))
163+
164+
# Not sure whether to allow this or not
165+
# if not union_ref.equals(other.ref_columns):
166+
# union_ref = self.ref_columns + other.ref_columns
167+
return _merge_blocks([self, other], self.ref_columns)
168+
169+
def reindex_axis(self, indexer, notmask, needs_masking, axis=0):
170+
"""
171+
Reindex using pre-computed indexer information
172+
"""
173+
new_values = self.values.take(indexer, axis=axis)
174+
if needs_masking:
175+
new_values = _cast_if_bool_int(new_values)
176+
common.null_out_axis(new_values, notmask, axis)
177+
return make_block(new_values, self.columns, self.ref_columns)
178+
179+
def reindex_columns_from(self, new_columns):
180+
"""
181+
Reindex to only those columns contained in the input set of columns
182+
183+
E.g. if you have ['a', 'b'], and the input columns is ['b', 'c', 'd'],
184+
then the resulting columns will be ['b']
185+
186+
Returns
187+
-------
188+
reindexed : Block
189+
"""
190+
indexer, mask = self.columns.get_indexer(new_columns)
191+
masked_idx = indexer[mask]
192+
new_values = self.values.take(masked_idx, axis=0)
193+
new_cols = self.columns.take(masked_idx)
194+
return make_block(new_values, new_cols, new_columns)
195+
196+
def get(self, col):
197+
loc = self.columns.get_loc(col)
198+
return self.values[loc]
199+
200+
def set(self, col, value):
201+
"""
202+
Modify Block in-place with new column value
203+
204+
Returns
205+
-------
206+
None
207+
"""
208+
loc = self.columns.get_loc(col)
209+
self.values[loc] = value
210+
211+
def delete(self, col):
212+
"""
213+
Returns
214+
-------
215+
y : Block (new object)
216+
"""
217+
loc = self.columns.get_loc(col)
218+
new_cols = np.delete(np.asarray(self.columns), loc)
219+
new_values = np.delete(self.values, loc, 0)
220+
return make_block(new_values, new_cols, self.ref_columns)
221+
222+
def fillna(self, value):
223+
new_values = self.values.copy()
224+
mask = common.isnull(new_values.ravel())
225+
new_values.flat[mask] = value
226+
return make_block(new_values, self.columns, self.ref_columns)
227+
149228
def _insert_into_columns(columns, col, loc):
150229
columns = np.asarray(columns)
151230
new_columns = np.insert(columns, loc, col)
@@ -188,7 +267,7 @@ def can_store(self, value):
188267
return not issubclass(value.dtype.type,
189268
(np.integer, np.floating, np.bool_))
190269

191-
def make_block(values, columns, ref_columns):
270+
def make_block(values, columns, ref_columns, ndim=2):
192271
dtype = values.dtype
193272
vtype = dtype.type
194273

@@ -598,21 +677,6 @@ def _stack_dict(dct):
598677
stacked = np.vstack([dct[k].values for k in columns]).T
599678
return columns, stacked
600679

601-
# def _float_blockify(dct, index, columns):
602-
# n = len(index)
603-
# k = len(columns)
604-
# values = np.empty((n, k), dtype=np.float64)
605-
# values.fill(nan)
606-
607-
# if len(dct) > 0:
608-
# dict_columns, stacked = _stack_dict(dct)
609-
# indexer, mask = columns.get_indexer(dict_columns)
610-
# assert(mask.all())
611-
# values[:, indexer] = stacked
612-
613-
# # do something with dtype?
614-
# return make_block(values, columns)
615-
616680
def add_na_columns(new_columns, index, ref_columns):
617681
# create new block, then consolidate
618682
values = _nan_array(index, new_columns)
@@ -629,9 +693,6 @@ def _slice_blocks(blocks, slice_obj):
629693
def _blocks_to_series_dict(blocks, index=None):
630694
series_dict = {}
631695

632-
# if index is None:
633-
# index = Index(np.arange(len(blocks[0])))
634-
635696
for block in blocks:
636697
for col, vec in zip(block.columns, block.values.T):
637698
series_dict[col] = Series(vec, index=index)
@@ -648,21 +709,12 @@ def _interleave(blocks, columns):
648709
result = np.empty((len(blocks[0]), len(columns)), dtype=dtype)
649710
colmask = np.zeros(len(columns), dtype=bool)
650711

712+
# By construction, all of the column should be covered by one of the blocks
651713
for block in blocks:
652714
indexer, mask = columns.get_indexer(block.columns)
653715
assert(mask.all())
654716
result[:, indexer] = block.values
655-
656-
# may not need this
657-
# if mask.all():
658-
# result[:, indexer] = block.values
659-
# else:
660-
# indexer = indexer[mask]
661-
# result[:, indexer] = block.values[:, mask]
662-
663717
colmask[indexer] = 1
664-
665-
# By construction, all of the column should be covered by one of the blocks
666718
assert(colmask.all())
667719
return result
668720

@@ -720,6 +772,12 @@ def _merge_blocks(blocks, columns):
720772
new_block = make_block(new_values, new_cols, columns)
721773
return new_block.reindex_columns_from(columns)
722774

775+
def _merge_blocks2(blocks, columns):
776+
new_values = np.vstack([b.values for b in blocks])
777+
new_cols = np.concatenate([b.columns for b in blocks])
778+
new_block = make_block(new_values, new_cols, columns)
779+
return new_block.reindex_columns_from(columns)
780+
723781
def _union_block_columns(blocks):
724782
seen = None
725783
for block in blocks:

0 commit comments

Comments
 (0)