12
12
from pandas import (
13
13
Series , TimeSeries , DataFrame , Panel , Index , MultiIndex , Int64Index
14
14
)
15
+ from pandas .sparse .api import SparseSeries , SparseDataFrame , SparsePanel
16
+ from pandas .sparse .array import BlockIndex , IntIndex
15
17
from pandas .tseries .api import PeriodIndex , DatetimeIndex
16
18
from pandas .core .common import adjoin
17
19
from pandas .core .algorithms import match , unique
28
30
# reading and writing the full object in one go
29
31
_TYPE_MAP = {
30
32
Series : 'series' ,
33
+ SparseSeries : 'sparse_series' ,
31
34
TimeSeries : 'series' ,
32
35
DataFrame : 'frame' ,
33
- Panel : 'wide'
36
+ SparseDataFrame : 'sparse_frame' ,
37
+ Panel : 'wide' ,
38
+ SparsePanel : 'sparse_panel'
34
39
}
35
40
36
41
_NAME_MAP = {
37
42
'series' : 'Series' ,
38
43
'time_series' : 'TimeSeries' ,
44
+ 'sparse_series' : 'SparseSeries' ,
39
45
'frame' : 'DataFrame' ,
46
+ 'sparse_frame' : 'SparseDataFrame' ,
40
47
'frame_table' : 'DataFrame (Table)' ,
41
48
'wide' : 'Panel' ,
49
+ 'sparse_panel' : 'SparsePanel' ,
42
50
'wide_table' : 'Panel (Table)' ,
43
51
'long' : 'LongPanel' ,
44
52
# legacy h5 files
@@ -406,6 +414,78 @@ def _write_series(self, group, series):
406
414
self ._write_array (group , 'values' , series .values )
407
415
group ._v_attrs .name = series .name
408
416
417
+ def _write_sparse_series (self , group , series ):
418
+ self ._write_index (group , 'index' , series .index )
419
+ self ._write_index (group , 'sp_index' , series .sp_index )
420
+ self ._write_array (group , 'sp_values' , series .sp_values )
421
+ group ._v_attrs .name = series .name
422
+ group ._v_attrs .fill_value = series .fill_value
423
+ group ._v_attrs .kind = series .kind
424
+
425
+ def _read_sparse_series (self , group , where = None ):
426
+ index = self ._read_index (group , 'index' )
427
+ sp_values = _read_array (group , 'sp_values' )
428
+ sp_index = self ._read_index (group , 'sp_index' )
429
+ name = getattr (group ._v_attrs , 'name' , None )
430
+ fill_value = getattr (group ._v_attrs , 'fill_value' , None )
431
+ kind = getattr (group ._v_attrs , 'kind' , 'block' )
432
+ return SparseSeries (sp_values , index = index , sparse_index = sp_index ,
433
+ kind = kind , fill_value = fill_value ,
434
+ name = name )
435
+
436
+ def _write_sparse_frame (self , group , sdf ):
437
+ for name , ss in sdf .iteritems ():
438
+ key = 'sparse_series_%s' % name
439
+ if key not in group ._v_children :
440
+ node = self .handle .createGroup (group , key )
441
+ else :
442
+ node = getattr (group , key )
443
+ self ._write_sparse_series (node , ss )
444
+ setattr (group ._v_attrs , 'default_fill_value' ,
445
+ sdf .default_fill_value )
446
+ setattr (group ._v_attrs , 'default_kind' ,
447
+ sdf .default_kind )
448
+ self ._write_index (group , 'columns' , sdf .columns )
449
+
450
+ def _read_sparse_frame (self , group , where = None ):
451
+ columns = self ._read_index (group , 'columns' )
452
+ sdict = {}
453
+ for c in columns :
454
+ key = 'sparse_series_%s' % c
455
+ node = getattr (group , key )
456
+ sdict [c ] = self ._read_sparse_series (node )
457
+ default_kind = getattr (group ._v_attrs , 'default_kind' )
458
+ default_fill_value = getattr (group ._v_attrs , 'default_fill_value' )
459
+ return SparseDataFrame (sdict , columns = columns ,
460
+ default_kind = default_kind ,
461
+ default_fill_value = default_fill_value )
462
+
463
+ def _write_sparse_panel (self , group , swide ):
464
+ setattr (group ._v_attrs , 'default_fill_value' , swide .default_fill_value )
465
+ setattr (group ._v_attrs , 'default_kind' , swide .default_kind )
466
+ self ._write_index (group , 'items' , swide .items )
467
+
468
+ for name , sdf in swide .iteritems ():
469
+ key = 'sparse_frame_%s' % name
470
+ if key not in group ._v_children :
471
+ node = self .handle .createGroup (group , key )
472
+ else :
473
+ node = getattr (group , key )
474
+ self ._write_sparse_frame (node , sdf )
475
+
476
+ def _read_sparse_panel (self , group , where = None ):
477
+ default_fill_value = getattr (group ._v_attrs , 'default_fill_value' )
478
+ default_kind = getattr (group ._v_attrs , 'default_kind' )
479
+ items = self ._read_index (group , 'items' )
480
+
481
+ sdict = {}
482
+ for name in items :
483
+ key = 'sparse_frame_%s' % name
484
+ node = getattr (group , key )
485
+ sdict [name ] = self ._read_sparse_frame (node )
486
+ return SparsePanel (sdict , items = items , default_kind = default_kind ,
487
+ default_fill_value = default_fill_value )
488
+
409
489
def _write_frame (self , group , df ):
410
490
self ._write_block_manager (group , df ._data )
411
491
@@ -474,21 +554,32 @@ def _read_wide_table(self, group, where=None):
474
554
return self ._read_panel_table (group , where )
475
555
476
556
def _write_index (self , group , key , index ):
477
- if len (index ) == 0 :
478
- raise ValueError ('Can not write empty structure, axis length was 0' )
479
-
480
557
if isinstance (index , MultiIndex ):
558
+ if len (index ) == 0 :
559
+ raise ValueError ('Can not write empty structure, '
560
+ 'axis length was 0' )
561
+
481
562
setattr (group ._v_attrs , '%s_variety' % key , 'multi' )
482
563
self ._write_multi_index (group , key , index )
564
+ elif isinstance (index , BlockIndex ):
565
+ setattr (group ._v_attrs , '%s_variety' % key , 'block' )
566
+ self ._write_block_index (group , key , index )
567
+ elif isinstance (index , IntIndex ):
568
+ setattr (group ._v_attrs , '%s_variety' % key , 'sparseint' )
569
+ self ._write_sparse_intindex (group , key , index )
483
570
else :
571
+ if len (index ) == 0 :
572
+ raise ValueError ('Can not write empty structure, '
573
+ 'axis length was 0' )
574
+
484
575
setattr (group ._v_attrs , '%s_variety' % key , 'regular' )
485
576
converted , kind , _ = _convert_index (index )
486
577
self ._write_array (group , key , converted )
487
578
node = getattr (group , key )
488
579
node ._v_attrs .kind = kind
489
580
node ._v_attrs .name = index .name
490
581
491
- if isinstance (index , (DatetimeIndex , PeriodIndex )):
582
+ if isinstance (index , (DatetimeIndex , PeriodIndex , IntIndex )):
492
583
node ._v_attrs .index_class = type (index )
493
584
494
585
if hasattr (index , 'freq' ):
@@ -499,12 +590,36 @@ def _read_index(self, group, key):
499
590
500
591
if variety == 'multi' :
501
592
return self ._read_multi_index (group , key )
593
+ elif variety == 'block' :
594
+ return self ._read_block_index (group , key )
595
+ elif variety == 'sparseint' :
596
+ return self ._read_sparse_intindex (group , key )
502
597
elif variety == 'regular' :
503
598
_ , index = self ._read_index_node (getattr (group , key ))
504
599
return index
505
600
else : # pragma: no cover
506
601
raise Exception ('unrecognized index variety: %s' % variety )
507
602
603
+ def _write_block_index (self , group , key , index ):
604
+ self ._write_array (group , '%s_blocs' % key , index .blocs )
605
+ self ._write_array (group , '%s_blengths' % key , index .blengths )
606
+ setattr (group ._v_attrs , '%s_length' % key , index .length )
607
+
608
+ def _read_block_index (self , group , key ):
609
+ length = getattr (group ._v_attrs , '%s_length' % key )
610
+ blocs = _read_array (group , '%s_blocs' % key )
611
+ blengths = _read_array (group , '%s_blengths' % key )
612
+ return BlockIndex (length , blocs , blengths )
613
+
614
+ def _write_sparse_intindex (self , group , key , index ):
615
+ self ._write_array (group , '%s_indices' % key , index .indices )
616
+ setattr (group ._v_attrs , '%s_length' % key , index .length )
617
+
618
+ def _read_sparse_intindex (self , group , key ):
619
+ length = getattr (group ._v_attrs , '%s_length' % key )
620
+ indices = _read_array (group , '%s_indices' % key )
621
+ return IntIndex (length , indices )
622
+
508
623
def _write_multi_index (self , group , key , index ):
509
624
setattr (group ._v_attrs , '%s_nlevels' % key , index .nlevels )
510
625
0 commit comments