@@ -417,272 +417,12 @@ struct bch_set {
417
417
struct bch_val v ;
418
418
};
419
419
420
- /* Extents */
421
-
422
- /*
423
- * In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally
424
- * preceded by checksum/compression information (bch_extent_crc32 or
425
- * bch_extent_crc64).
426
- *
427
- * One major determining factor in the format of extents is how we handle and
428
- * represent extents that have been partially overwritten and thus trimmed:
429
- *
430
- * If an extent is not checksummed or compressed, when the extent is trimmed we
431
- * don't have to remember the extent we originally allocated and wrote: we can
432
- * merely adjust ptr->offset to point to the start of the data that is currently
433
- * live. The size field in struct bkey records the current (live) size of the
434
- * extent, and is also used to mean "size of region on disk that we point to" in
435
- * this case.
436
- *
437
- * Thus an extent that is not checksummed or compressed will consist only of a
438
- * list of bch_extent_ptrs, with none of the fields in
439
- * bch_extent_crc32/bch_extent_crc64.
440
- *
441
- * When an extent is checksummed or compressed, it's not possible to read only
442
- * the data that is currently live: we have to read the entire extent that was
443
- * originally written, and then return only the part of the extent that is
444
- * currently live.
445
- *
446
- * Thus, in addition to the current size of the extent in struct bkey, we need
447
- * to store the size of the originally allocated space - this is the
448
- * compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also,
449
- * when the extent is trimmed, instead of modifying the offset field of the
450
- * pointer, we keep a second smaller offset field - "offset into the original
451
- * extent of the currently live region".
452
- *
453
- * The other major determining factor is replication and data migration:
454
- *
455
- * Each pointer may have its own bch_extent_crc32/64. When doing a replicated
456
- * write, we will initially write all the replicas in the same format, with the
457
- * same checksum type and compression format - however, when copygc runs later (or
458
- * tiering/cache promotion, anything that moves data), it is not in general
459
- * going to rewrite all the pointers at once - one of the replicas may be in a
460
- * bucket on one device that has very little fragmentation while another lives
461
- * in a bucket that has become heavily fragmented, and thus is being rewritten
462
- * sooner than the rest.
463
- *
464
- * Thus it will only move a subset of the pointers (or in the case of
465
- * tiering/cache promotion perhaps add a single pointer without dropping any
466
- * current pointers), and if the extent has been partially overwritten it must
467
- * write only the currently live portion (or copygc would not be able to reduce
468
- * fragmentation!) - which necessitates a different bch_extent_crc format for
469
- * the new pointer.
470
- *
471
- * But in the interests of space efficiency, we don't want to store one
472
- * bch_extent_crc for each pointer if we don't have to.
473
- *
474
- * Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and
475
- * bch_extent_ptrs appended arbitrarily one after the other. We determine the
476
- * type of a given entry with a scheme similar to utf8 (except we're encoding a
477
- * type, not a size), encoding the type in the position of the first set bit:
478
- *
479
- * bch_extent_crc32 - 0b1
480
- * bch_extent_ptr - 0b10
481
- * bch_extent_crc64 - 0b100
482
- *
483
- * We do it this way because bch_extent_crc32 is _very_ constrained on bits (and
484
- * bch_extent_crc64 is the least constrained).
485
- *
486
- * Then, each bch_extent_crc32/64 applies to the pointers that follow after it,
487
- * until the next bch_extent_crc32/64.
488
- *
489
- * If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer
490
- * is neither checksummed nor compressed.
491
- */
492
-
493
420
/* 128 bits, sufficient for cryptographic MACs: */
494
421
struct bch_csum {
495
422
__le64 lo ;
496
423
__le64 hi ;
497
424
} __packed __aligned (8 );
498
425
499
- #define BCH_EXTENT_ENTRY_TYPES () \
500
- x(ptr, 0) \
501
- x(crc32, 1) \
502
- x(crc64, 2) \
503
- x(crc128, 3) \
504
- x(stripe_ptr, 4) \
505
- x(rebalance, 5)
506
- #define BCH_EXTENT_ENTRY_MAX 6
507
-
508
- enum bch_extent_entry_type {
509
- #define x(f, n) BCH_EXTENT_ENTRY_##f = n,
510
- BCH_EXTENT_ENTRY_TYPES ()
511
- #undef x
512
- };
513
-
514
- /* Compressed/uncompressed size are stored biased by 1: */
515
- struct bch_extent_crc32 {
516
- #if defined(__LITTLE_ENDIAN_BITFIELD )
517
- __u32 type :2 ,
518
- _compressed_size :7 ,
519
- _uncompressed_size :7 ,
520
- offset :7 ,
521
- _unused :1 ,
522
- csum_type :4 ,
523
- compression_type :4 ;
524
- __u32 csum ;
525
- #elif defined (__BIG_ENDIAN_BITFIELD )
526
- __u32 csum ;
527
- __u32 compression_type :4 ,
528
- csum_type :4 ,
529
- _unused :1 ,
530
- offset :7 ,
531
- _uncompressed_size :7 ,
532
- _compressed_size :7 ,
533
- type :2 ;
534
- #endif
535
- } __packed __aligned (8 );
536
-
537
- #define CRC32_SIZE_MAX (1U << 7)
538
- #define CRC32_NONCE_MAX 0
539
-
540
- struct bch_extent_crc64 {
541
- #if defined(__LITTLE_ENDIAN_BITFIELD )
542
- __u64 type :3 ,
543
- _compressed_size :9 ,
544
- _uncompressed_size :9 ,
545
- offset :9 ,
546
- nonce :10 ,
547
- csum_type :4 ,
548
- compression_type :4 ,
549
- csum_hi :16 ;
550
- #elif defined (__BIG_ENDIAN_BITFIELD )
551
- __u64 csum_hi :16 ,
552
- compression_type :4 ,
553
- csum_type :4 ,
554
- nonce :10 ,
555
- offset :9 ,
556
- _uncompressed_size :9 ,
557
- _compressed_size :9 ,
558
- type :3 ;
559
- #endif
560
- __u64 csum_lo ;
561
- } __packed __aligned (8 );
562
-
563
- #define CRC64_SIZE_MAX (1U << 9)
564
- #define CRC64_NONCE_MAX ((1U << 10) - 1)
565
-
566
- struct bch_extent_crc128 {
567
- #if defined(__LITTLE_ENDIAN_BITFIELD )
568
- __u64 type :4 ,
569
- _compressed_size :13 ,
570
- _uncompressed_size :13 ,
571
- offset :13 ,
572
- nonce :13 ,
573
- csum_type :4 ,
574
- compression_type :4 ;
575
- #elif defined (__BIG_ENDIAN_BITFIELD )
576
- __u64 compression_type :4 ,
577
- csum_type :4 ,
578
- nonce :13 ,
579
- offset :13 ,
580
- _uncompressed_size :13 ,
581
- _compressed_size :13 ,
582
- type :4 ;
583
- #endif
584
- struct bch_csum csum ;
585
- } __packed __aligned (8 );
586
-
587
- #define CRC128_SIZE_MAX (1U << 13)
588
- #define CRC128_NONCE_MAX ((1U << 13) - 1)
589
-
590
- /*
591
- * @reservation - pointer hasn't been written to, just reserved
592
- */
593
- struct bch_extent_ptr {
594
- #if defined(__LITTLE_ENDIAN_BITFIELD )
595
- __u64 type :1 ,
596
- cached :1 ,
597
- unused :1 ,
598
- unwritten :1 ,
599
- offset :44 , /* 8 petabytes */
600
- dev :8 ,
601
- gen :8 ;
602
- #elif defined (__BIG_ENDIAN_BITFIELD )
603
- __u64 gen :8 ,
604
- dev :8 ,
605
- offset :44 ,
606
- unwritten :1 ,
607
- unused :1 ,
608
- cached :1 ,
609
- type :1 ;
610
- #endif
611
- } __packed __aligned (8 );
612
-
613
- struct bch_extent_stripe_ptr {
614
- #if defined(__LITTLE_ENDIAN_BITFIELD )
615
- __u64 type :5 ,
616
- block :8 ,
617
- redundancy :4 ,
618
- idx :47 ;
619
- #elif defined (__BIG_ENDIAN_BITFIELD )
620
- __u64 idx :47 ,
621
- redundancy :4 ,
622
- block :8 ,
623
- type :5 ;
624
- #endif
625
- };
626
-
627
- struct bch_extent_rebalance {
628
- #if defined(__LITTLE_ENDIAN_BITFIELD )
629
- __u64 type :6 ,
630
- unused :34 ,
631
- compression :8 , /* enum bch_compression_opt */
632
- target :16 ;
633
- #elif defined (__BIG_ENDIAN_BITFIELD )
634
- __u64 target :16 ,
635
- compression :8 ,
636
- unused :34 ,
637
- type :6 ;
638
- #endif
639
- };
640
-
641
- union bch_extent_entry {
642
- #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || __BITS_PER_LONG == 64
643
- unsigned long type ;
644
- #elif __BITS_PER_LONG == 32
645
- struct {
646
- unsigned long pad ;
647
- unsigned long type ;
648
- };
649
- #else
650
- #error edit for your odd byteorder.
651
- #endif
652
-
653
- #define x (f , n ) struct bch_extent_##f f;
654
- BCH_EXTENT_ENTRY_TYPES ()
655
- #undef x
656
- };
657
-
658
- struct bch_btree_ptr {
659
- struct bch_val v ;
660
-
661
- __u64 _data [0 ];
662
- struct bch_extent_ptr start [];
663
- } __packed __aligned (8 );
664
-
665
- struct bch_btree_ptr_v2 {
666
- struct bch_val v ;
667
-
668
- __u64 mem_ptr ;
669
- __le64 seq ;
670
- __le16 sectors_written ;
671
- __le16 flags ;
672
- struct bpos min_key ;
673
- __u64 _data [0 ];
674
- struct bch_extent_ptr start [];
675
- } __packed __aligned (8 );
676
-
677
- LE16_BITMASK (BTREE_PTR_RANGE_UPDATED , struct bch_btree_ptr_v2 , flags , 0 , 1 );
678
-
679
- struct bch_extent {
680
- struct bch_val v ;
681
-
682
- __u64 _data [0 ];
683
- union bch_extent_entry start [];
684
- } __packed __aligned (8 );
685
-
686
426
struct bch_reservation {
687
427
struct bch_val v ;
688
428
@@ -691,25 +431,6 @@ struct bch_reservation {
691
431
__u8 pad [3 ];
692
432
} __packed __aligned (8 );
693
433
694
- /* Maximum size (in u64s) a single pointer could be: */
695
- #define BKEY_EXTENT_PTR_U64s_MAX \
696
- ((sizeof(struct bch_extent_crc128) + \
697
- sizeof(struct bch_extent_ptr)) / sizeof(__u64))
698
-
699
- /* Maximum possible size of an entire extent value: */
700
- #define BKEY_EXTENT_VAL_U64s_MAX \
701
- (1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
702
-
703
- /* * Maximum possible size of an entire extent, key + value: */
704
- #define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX)
705
-
706
- /* Btree pointers don't carry around checksums: */
707
- #define BKEY_BTREE_PTR_VAL_U64s_MAX \
708
- ((sizeof(struct bch_btree_ptr_v2) + \
709
- sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(__u64))
710
- #define BKEY_BTREE_PTR_U64s_MAX \
711
- (BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX)
712
-
713
434
struct bch_backpointer {
714
435
struct bch_val v ;
715
436
__u8 btree_id ;
@@ -720,6 +441,8 @@ struct bch_backpointer {
720
441
struct bpos pos ;
721
442
} __packed __aligned (8 );
722
443
444
+ #include "extents_format.h"
445
+
723
446
/* Reflink: */
724
447
725
448
struct bch_reflink_p {
0 commit comments