@@ -17,8 +17,7 @@ class Block(object):
17
17
"""
18
18
__slots__ = ['items' , 'ref_items' , '_ref_locs' , 'values' , 'ndim' ]
19
19
20
- def __init__ (self , values , items , ref_items , ndim = 2 ,
21
- do_integrity_check = False ):
20
+ def __init__ (self , values , items , ref_items , ndim = 2 ):
22
21
if issubclass (values .dtype .type , basestring ):
23
22
values = np .array (values , dtype = object )
24
23
@@ -31,15 +30,6 @@ def __init__(self, values, items, ref_items, ndim=2,
31
30
self .items = _ensure_index (items )
32
31
self .ref_items = _ensure_index (ref_items )
33
32
34
- if do_integrity_check :
35
- self ._check_integrity ()
36
-
37
- def _check_integrity (self ):
38
- if len (self .items ) < 2 :
39
- return
40
- # monotonicity
41
- return (self .ref_locs [1 :] > self .ref_locs [:- 1 ]).all ()
42
-
43
33
@property
44
34
def ref_locs (self ):
45
35
if self ._ref_locs is None :
@@ -400,13 +390,11 @@ def should_store(self, value):
400
390
class DatetimeBlock (Block ):
401
391
_can_hold_na = True
402
392
403
- def __init__ (self , values , items , ref_items , ndim = 2 ,
404
- do_integrity_check = False ):
393
+ def __init__ (self , values , items , ref_items , ndim = 2 ):
405
394
if values .dtype != _NS_DTYPE :
406
395
values = lib .cast_to_nanoseconds (values )
407
396
408
- Block .__init__ (self , values , items , ref_items , ndim = ndim ,
409
- do_integrity_check = do_integrity_check )
397
+ Block .__init__ (self , values , items , ref_items , ndim = ndim )
410
398
411
399
def _can_hold_element (self , element ):
412
400
return com .is_integer (element ) or isinstance (element , datetime )
@@ -443,7 +431,7 @@ def get_values(self, dtype):
443
431
return self .values
444
432
445
433
446
- def make_block (values , items , ref_items , do_integrity_check = False ):
434
+ def make_block (values , items , ref_items ):
447
435
dtype = values .dtype
448
436
vtype = dtype .type
449
437
@@ -462,8 +450,7 @@ def make_block(values, items, ref_items, do_integrity_check=False):
462
450
else :
463
451
klass = ObjectBlock
464
452
465
- return klass (values , items , ref_items , ndim = values .ndim ,
466
- do_integrity_check = do_integrity_check )
453
+ return klass (values , items , ref_items , ndim = values .ndim )
467
454
468
455
# TODO: flexible with index=None and/or items=None
469
456
@@ -548,8 +535,7 @@ def __setstate__(self, state):
548
535
549
536
blocks = []
550
537
for values , items in zip (bvalues , bitems ):
551
- blk = make_block (values , items , self .axes [0 ],
552
- do_integrity_check = True )
538
+ blk = make_block (values , items , self .axes [0 ])
553
539
blocks .append (blk )
554
540
self .blocks = blocks
555
541
@@ -1079,8 +1065,7 @@ def _make_na_block(self, items, ref_items, fill_value=np.nan):
1079
1065
dtype = com ._infer_dtype (fill_value )
1080
1066
block_values = np .empty (block_shape , dtype = dtype )
1081
1067
block_values .fill (fill_value )
1082
- na_block = make_block (block_values , items , ref_items ,
1083
- do_integrity_check = True )
1068
+ na_block = make_block (block_values , items , ref_items )
1084
1069
return na_block
1085
1070
1086
1071
def take (self , indexer , axis = 1 ):
@@ -1236,69 +1221,66 @@ def item_dtypes(self):
1236
1221
assert (mask .all ())
1237
1222
return result
1238
1223
1239
- def form_blocks (data , axes ):
1224
+ def form_blocks (arrays , names , axes ):
1240
1225
# pre-filter out items if we passed it
1241
1226
items = axes [0 ]
1242
1227
1243
- if len (data ) < len (items ):
1244
- extra_items = items - Index (data . keys () )
1228
+ if len (arrays ) < len (items ):
1229
+ extra_items = items - Index (names )
1245
1230
else :
1246
1231
extra_items = []
1247
1232
1248
1233
# put "leftover" items in float bucket, where else?
1249
1234
# generalize?
1250
- float_dict = {}
1251
- complex_dict = {}
1252
- int_dict = {}
1253
- bool_dict = {}
1254
- object_dict = {}
1255
- datetime_dict = {}
1256
- for k , v in data . iteritems ( ):
1235
+ float_items = []
1236
+ complex_items = []
1237
+ int_items = []
1238
+ bool_items = []
1239
+ object_items = []
1240
+ datetime_items = []
1241
+ for k , v in zip ( names , arrays ):
1257
1242
if issubclass (v .dtype .type , np .floating ):
1258
- float_dict [ k ] = v
1243
+ float_items . append (( k , v ))
1259
1244
elif issubclass (v .dtype .type , np .complexfloating ):
1260
- complex_dict [ k ] = v
1245
+ complex_items . append (( k , v ))
1261
1246
elif issubclass (v .dtype .type , np .datetime64 ):
1262
1247
if v .dtype != _NS_DTYPE :
1263
1248
v = lib .cast_to_nanoseconds (v )
1264
- datetime_dict [k ] = v
1249
+
1250
+ if hasattr (v , 'tz' ) and v .tz is not None :
1251
+ object_items .append ((k , v ))
1252
+ else :
1253
+ datetime_items .append ((k , v ))
1265
1254
elif issubclass (v .dtype .type , np .integer ):
1266
- int_dict [ k ] = v
1255
+ int_items . append (( k , v ))
1267
1256
elif v .dtype == np .bool_ :
1268
- bool_dict [ k ] = v
1257
+ bool_items . append (( k , v ))
1269
1258
else :
1270
- object_dict [ k ] = v
1259
+ object_items . append (( k , v ))
1271
1260
1272
1261
blocks = []
1273
- if len (float_dict ):
1274
- float_block = _simple_blockify (float_dict , items , np .float64 )
1262
+ if len (float_items ):
1263
+ float_block = _simple_blockify (float_items , items , np .float64 )
1275
1264
blocks .append (float_block )
1276
1265
1277
- if len (complex_dict ):
1278
- complex_block = _simple_blockify (complex_dict , items , np .complex128 )
1266
+ if len (complex_items ):
1267
+ complex_block = _simple_blockify (complex_items , items , np .complex128 )
1279
1268
blocks .append (complex_block )
1280
1269
1281
- if len (int_dict ):
1282
- int_block = _simple_blockify (int_dict , items , np .int64 )
1270
+ if len (int_items ):
1271
+ int_block = _simple_blockify (int_items , items , np .int64 )
1283
1272
blocks .append (int_block )
1284
1273
1285
- for k , v in list (datetime_dict .items ()):
1286
- # hackeroo
1287
- if hasattr (v , 'tz' ) and v .tz is not None :
1288
- del datetime_dict [k ]
1289
- object_dict [k ] = v .asobject
1290
-
1291
- if len (datetime_dict ):
1292
- datetime_block = _simple_blockify (datetime_dict , items ,
1293
- np .dtype ('M8[ns]' ))
1274
+ if len (datetime_items ):
1275
+ datetime_block = _simple_blockify (datetime_items , items , _NS_DTYPE )
1294
1276
blocks .append (datetime_block )
1295
1277
1296
- if len (bool_dict ):
1297
- bool_block = _simple_blockify (bool_dict , items , np .bool_ )
1278
+ if len (bool_items ):
1279
+ bool_block = _simple_blockify (bool_items , items , np .bool_ )
1298
1280
blocks .append (bool_block )
1299
1281
1300
- if len (object_dict ) > 0 :
1301
- object_block = _simple_blockify (object_dict , items , np .object_ )
1282
+ if len (object_items ) > 0 :
1283
+ object_block = _simple_blockify (object_items , items , np .object_ )
1302
1284
blocks .append (object_block )
1303
1285
1304
1286
if len (extra_items ):
@@ -1309,22 +1291,21 @@ def form_blocks(data, axes):
1309
1291
1310
1292
block_values .fill (nan )
1311
1293
1312
- na_block = make_block (block_values , extra_items , items ,
1313
- do_integrity_check = True )
1294
+ na_block = make_block (block_values , extra_items , items )
1314
1295
blocks .append (na_block )
1315
1296
blocks = _consolidate (blocks , items )
1316
1297
1317
1298
return blocks
1318
1299
1319
- def _simple_blockify (dct , ref_items , dtype ):
1320
- block_items , values = _stack_dict ( dct , ref_items , dtype )
1300
+ def _simple_blockify (tuples , ref_items , dtype ):
1301
+ block_items , values = _stack_arrays ( tuples , ref_items , dtype )
1321
1302
# CHECK DTYPE?
1322
1303
if values .dtype != dtype : # pragma: no cover
1323
1304
values = values .astype (dtype )
1324
1305
1325
- return make_block (values , block_items , ref_items , do_integrity_check = True )
1306
+ return make_block (values , block_items , ref_items )
1326
1307
1327
- def _stack_dict ( dct , ref_items , dtype ):
1308
+ def _stack_arrays ( tuples , ref_items , dtype ):
1328
1309
from pandas .core .series import Series
1329
1310
1330
1311
# fml
@@ -1342,17 +1323,18 @@ def _shape_compat(x):
1342
1323
else :
1343
1324
return x .shape
1344
1325
1326
+ names , arrays = zip (* tuples )
1327
+
1345
1328
# index may box values
1346
- items = ref_items [[ x in dct for x in ref_items ] ]
1329
+ items = ref_items [ref_items . isin ( names ) ]
1347
1330
1348
- first = dct [ items [ 0 ] ]
1349
- shape = (len (dct ),) + _shape_compat (first )
1331
+ first = arrays [ 0 ]
1332
+ shape = (len (arrays ),) + _shape_compat (first )
1350
1333
1351
1334
stacked = np .empty (shape , dtype = dtype )
1352
- for i , item in enumerate (items ):
1353
- stacked [i ] = _asarray_compat (dct [ item ] )
1335
+ for i , arr in enumerate (arrays ):
1336
+ stacked [i ] = _asarray_compat (arr )
1354
1337
1355
- # stacked = np.vstack([_asarray_compat(dct[k]) for k in items])
1356
1338
return items , stacked
1357
1339
1358
1340
def _blocks_to_series_dict (blocks , index = None ):
@@ -1419,8 +1401,7 @@ def _merge_blocks(blocks, items):
1419
1401
return blocks [0 ]
1420
1402
new_values = _vstack ([b .values for b in blocks ])
1421
1403
new_items = blocks [0 ].items .append ([b .items for b in blocks [1 :]])
1422
- new_block = make_block (new_values , new_items , items ,
1423
- do_integrity_check = True )
1404
+ new_block = make_block (new_values , new_items , items )
1424
1405
return new_block .reindex_items_from (items )
1425
1406
1426
1407
def _union_block_items (blocks ):
0 commit comments