@@ -470,6 +470,75 @@ Storing Attributes to a group node
470
470
store.close()
471
471
os.remove(' test.h5' )
472
472
473
+
474
+ .. _cookbook.binary :
475
+
476
+ Binary Files
477
+ ~~~~~~~~~~~~
478
+
479
+ Pandas readily accepts numpy record arrays, if you need to read in a binary
480
+ file consisting of an array of C structs. For example, given this C program
481
+ in a file called ``main.c `` compiled with ``gcc main.c -std=gnu99 `` on a
482
+ 64-bit machine,
483
+
484
+ .. code-block :: c
485
+
486
+ #include <stdio.h>
487
+ #include <stdint.h>
488
+
489
+ typedef struct _Data
490
+ {
491
+ int32_t count;
492
+ double avg;
493
+ float scale;
494
+ } Data;
495
+
496
+ int main(int argc, const char *argv[])
497
+ {
498
+ size_t n = 10;
499
+ Data d[n];
500
+
501
+ for (int i = 0; i < n; ++i)
502
+ {
503
+ d[i].count = i;
504
+ d[i].avg = i + 1.0;
505
+ d[i].scale = (float) i + 2.0f;
506
+ }
507
+
508
+ FILE *file = fopen("binary.dat", "wb");
509
+ fwrite(&d, sizeof(Data), n, file);
510
+ fclose(file);
511
+
512
+ return 0;
513
+ }
514
+
515
+ the following Python code will read the binary file ``'binary.dat' `` into a
516
+ pandas ``DataFrame ``, where each element of the struct corresponds to a column
517
+ in the frame:
518
+
519
+ .. code-block :: python
520
+
521
+ import numpy as np
522
+ from pandas import DataFrame
523
+
524
+ names = ' count' , ' avg' , ' scale'
525
+
526
+ # note that the offsets are larger than the size of the type because of
527
+ # struct padding
528
+ offsets = 0 , 8 , 16
529
+ formats = ' i4' , ' f8' , ' f4'
530
+ dt = np.dtype({' names' : names, ' offsets' : offsets, ' formats' : formats},
531
+ align = True )
532
+ df = DataFrame(np.fromfile(' binary.dat' , dt))
533
+
534
+ .. note ::
535
+
536
+ The offsets of the structure elements may be different depending on the
537
+ architecture of the machine on which the file was created. Using a raw
538
+ binary file format like this for general data storage is not recommended, as
539
+ it is not cross platform. We recommended either HDF5 or msgpack, both of
540
+ which are supported by pandas' IO facilities.
541
+
473
542
Computation
474
543
-----------
475
544
0 commit comments