@@ -141,9 +141,37 @@ def __init__(
141
141
if do_integrity_check :
142
142
self ._verify_integrity ()
143
143
144
+ # Populate known_consolidate, blknos, and blklocs lazily
144
145
self ._known_consolidated = False
146
+ self ._blknos = None
147
+ self ._blklocs = None
145
148
146
- self ._rebuild_blknos_and_blklocs ()
149
+ @property
150
+ def blknos (self ):
151
+ """
152
+ Suppose we want to find the array corresponding to our i'th column.
153
+
154
+ blknos[i] identifies the block from self.blocks that contains this column.
155
+
156
+ blklocs[i] identifies the column of interest within
157
+ self.blocks[self.blknos[i]]
158
+ """
159
+ if self ._blknos is None :
160
+ # Note: these can be altered by other BlockManager methods.
161
+ self ._rebuild_blknos_and_blklocs ()
162
+
163
+ return self ._blknos
164
+
165
+ @property
166
+ def blklocs (self ):
167
+ """
168
+ See blknos.__doc__
169
+ """
170
+ if self ._blklocs is None :
171
+ # Note: these can be altered by other BlockManager methods.
172
+ self ._rebuild_blknos_and_blklocs ()
173
+
174
+ return self ._blklocs
147
175
148
176
def make_empty (self , axes = None ) -> "BlockManager" :
149
177
""" return an empty BlockManager with the items axis of len 0 """
@@ -230,6 +258,7 @@ def _rebuild_blknos_and_blklocs(self) -> None:
230
258
new_blklocs [rl .indexer ] = np .arange (len (rl ))
231
259
232
260
if (new_blknos == - 1 ).any ():
261
+ # TODO: can we avoid this? it isn't cheap
233
262
raise AssertionError ("Gaps in blk ref_locs" )
234
263
235
264
self ._blknos = new_blknos
@@ -253,7 +282,7 @@ def get_dtype_counts(self):
253
282
254
283
def get_dtypes (self ):
255
284
dtypes = np .array ([blk .dtype for blk in self .blocks ])
256
- return algos .take_1d (dtypes , self ._blknos , allow_fill = False )
285
+ return algos .take_1d (dtypes , self .blknos , allow_fill = False )
257
286
258
287
def __getstate__ (self ):
259
288
block_values = [b .values for b in self .blocks ]
@@ -951,8 +980,8 @@ def iget(self, i: int) -> "SingleBlockManager":
951
980
"""
952
981
Return the data as a SingleBlockManager.
953
982
"""
954
- block = self .blocks [self ._blknos [i ]]
955
- values = block .iget (self ._blklocs [i ])
983
+ block = self .blocks [self .blknos [i ]]
984
+ values = block .iget (self .blklocs [i ])
956
985
957
986
# shortcut for select a single-dim from a 2-dim BM
958
987
return SingleBlockManager (
@@ -980,7 +1009,7 @@ def delete(self, item):
980
1009
else :
981
1010
affected_start = is_deleted .nonzero ()[0 ][0 ]
982
1011
983
- for blkno , _ in _fast_count_smallints (self ._blknos [affected_start :]):
1012
+ for blkno , _ in _fast_count_smallints (self .blknos [affected_start :]):
984
1013
blk = self .blocks [blkno ]
985
1014
bml = blk .mgr_locs
986
1015
blk_del = is_deleted [bml .indexer ].nonzero ()[0 ]
@@ -1026,6 +1055,8 @@ def iset(self, loc: Union[int, slice, np.ndarray], value):
1026
1055
"""
1027
1056
# FIXME: refactor, clearly separate broadcasting & zip-like assignment
1028
1057
# can prob also fix the various if tests for sparse/categorical
1058
+ if self ._blklocs is None and self .ndim > 1 :
1059
+ self ._rebuild_blknos_and_blklocs ()
1029
1060
1030
1061
value_is_extension_type = is_extension_array_dtype (value )
1031
1062
@@ -1055,8 +1086,9 @@ def value_getitem(placement):
1055
1086
if isinstance (loc , int ):
1056
1087
loc = [loc ]
1057
1088
1058
- blknos = self ._blknos [loc ]
1059
- blklocs = self ._blklocs [loc ].copy ()
1089
+ # Accessing public blknos ensures the public versions are initialized
1090
+ blknos = self .blknos [loc ]
1091
+ blklocs = self .blklocs [loc ].copy ()
1060
1092
1061
1093
unfit_mgr_locs = []
1062
1094
unfit_val_locs = []
@@ -1161,7 +1193,7 @@ def insert(self, loc: int, item, value, allow_duplicates: bool = False):
1161
1193
1162
1194
block = make_block (values = value , ndim = self .ndim , placement = slice (loc , loc + 1 ))
1163
1195
1164
- for blkno , count in _fast_count_smallints (self ._blknos [loc :]):
1196
+ for blkno , count in _fast_count_smallints (self .blknos [loc :]):
1165
1197
blk = self .blocks [blkno ]
1166
1198
if count == len (blk .mgr_locs ):
1167
1199
blk .mgr_locs = blk .mgr_locs .add (1 )
@@ -1170,7 +1202,8 @@ def insert(self, loc: int, item, value, allow_duplicates: bool = False):
1170
1202
new_mgr_locs [new_mgr_locs >= loc ] += 1
1171
1203
blk .mgr_locs = new_mgr_locs
1172
1204
1173
- if loc == self ._blklocs .shape [0 ]:
1205
+ # Accessing public blklocs ensures the public versions are initialized
1206
+ if loc == self .blklocs .shape [0 ]:
1174
1207
# np.append is a lot faster, let's use it if we can.
1175
1208
self ._blklocs = np .append (self ._blklocs , 0 )
1176
1209
self ._blknos = np .append (self ._blknos , len (self .blocks ))
@@ -1301,14 +1334,14 @@ def _slice_take_blocks_ax0(self, slice_or_indexer, fill_tuple=None):
1301
1334
]
1302
1335
1303
1336
if sl_type in ("slice" , "mask" ):
1304
- blknos = self ._blknos [slobj ]
1305
- blklocs = self ._blklocs [slobj ]
1337
+ blknos = self .blknos [slobj ]
1338
+ blklocs = self .blklocs [slobj ]
1306
1339
else :
1307
1340
blknos = algos .take_1d (
1308
- self ._blknos , slobj , fill_value = - 1 , allow_fill = allow_fill
1341
+ self .blknos , slobj , fill_value = - 1 , allow_fill = allow_fill
1309
1342
)
1310
1343
blklocs = algos .take_1d (
1311
- self ._blklocs , slobj , fill_value = - 1 , allow_fill = allow_fill
1344
+ self .blklocs , slobj , fill_value = - 1 , allow_fill = allow_fill
1312
1345
)
1313
1346
1314
1347
# When filling blknos, make sure blknos is updated before appending to
0 commit comments