Skip to content

Commit b2fa1b6

Browse files
author
Kent Overstreet
committed
bcachefs; extents_format.h
Signed-off-by: Kent Overstreet <[email protected]>
1 parent 0560eb9 commit b2fa1b6

File tree

2 files changed

+284
-279
lines changed

2 files changed

+284
-279
lines changed

fs/bcachefs/bcachefs_format.h

Lines changed: 2 additions & 279 deletions
Original file line numberDiff line numberDiff line change
@@ -417,272 +417,12 @@ struct bch_set {
417417
struct bch_val v;
418418
};
419419

420-
/* Extents */
421-
422-
/*
423-
* In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally
424-
* preceded by checksum/compression information (bch_extent_crc32 or
425-
* bch_extent_crc64).
426-
*
427-
* One major determining factor in the format of extents is how we handle and
428-
* represent extents that have been partially overwritten and thus trimmed:
429-
*
430-
* If an extent is not checksummed or compressed, when the extent is trimmed we
431-
* don't have to remember the extent we originally allocated and wrote: we can
432-
* merely adjust ptr->offset to point to the start of the data that is currently
433-
* live. The size field in struct bkey records the current (live) size of the
434-
* extent, and is also used to mean "size of region on disk that we point to" in
435-
* this case.
436-
*
437-
* Thus an extent that is not checksummed or compressed will consist only of a
438-
* list of bch_extent_ptrs, with none of the fields in
439-
* bch_extent_crc32/bch_extent_crc64.
440-
*
441-
* When an extent is checksummed or compressed, it's not possible to read only
442-
* the data that is currently live: we have to read the entire extent that was
443-
* originally written, and then return only the part of the extent that is
444-
* currently live.
445-
*
446-
* Thus, in addition to the current size of the extent in struct bkey, we need
447-
* to store the size of the originally allocated space - this is the
448-
* compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also,
449-
* when the extent is trimmed, instead of modifying the offset field of the
450-
* pointer, we keep a second smaller offset field - "offset into the original
451-
* extent of the currently live region".
452-
*
453-
* The other major determining factor is replication and data migration:
454-
*
455-
* Each pointer may have its own bch_extent_crc32/64. When doing a replicated
456-
* write, we will initially write all the replicas in the same format, with the
457-
* same checksum type and compression format - however, when copygc runs later (or
458-
* tiering/cache promotion, anything that moves data), it is not in general
459-
* going to rewrite all the pointers at once - one of the replicas may be in a
460-
* bucket on one device that has very little fragmentation while another lives
461-
* in a bucket that has become heavily fragmented, and thus is being rewritten
462-
* sooner than the rest.
463-
*
464-
* Thus it will only move a subset of the pointers (or in the case of
465-
* tiering/cache promotion perhaps add a single pointer without dropping any
466-
* current pointers), and if the extent has been partially overwritten it must
467-
* write only the currently live portion (or copygc would not be able to reduce
468-
* fragmentation!) - which necessitates a different bch_extent_crc format for
469-
* the new pointer.
470-
*
471-
* But in the interests of space efficiency, we don't want to store one
472-
* bch_extent_crc for each pointer if we don't have to.
473-
*
474-
* Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and
475-
* bch_extent_ptrs appended arbitrarily one after the other. We determine the
476-
* type of a given entry with a scheme similar to utf8 (except we're encoding a
477-
* type, not a size), encoding the type in the position of the first set bit:
478-
*
479-
* bch_extent_crc32 - 0b1
480-
* bch_extent_ptr - 0b10
481-
* bch_extent_crc64 - 0b100
482-
*
483-
* We do it this way because bch_extent_crc32 is _very_ constrained on bits (and
484-
* bch_extent_crc64 is the least constrained).
485-
*
486-
* Then, each bch_extent_crc32/64 applies to the pointers that follow after it,
487-
* until the next bch_extent_crc32/64.
488-
*
489-
* If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer
490-
* is neither checksummed nor compressed.
491-
*/
492-
493420
/* 128 bits, sufficient for cryptographic MACs: */
494421
struct bch_csum {
495422
__le64 lo;
496423
__le64 hi;
497424
} __packed __aligned(8);
498425

499-
#define BCH_EXTENT_ENTRY_TYPES() \
500-
x(ptr, 0) \
501-
x(crc32, 1) \
502-
x(crc64, 2) \
503-
x(crc128, 3) \
504-
x(stripe_ptr, 4) \
505-
x(rebalance, 5)
506-
#define BCH_EXTENT_ENTRY_MAX 6
507-
508-
enum bch_extent_entry_type {
509-
#define x(f, n) BCH_EXTENT_ENTRY_##f = n,
510-
BCH_EXTENT_ENTRY_TYPES()
511-
#undef x
512-
};
513-
514-
/* Compressed/uncompressed size are stored biased by 1: */
515-
struct bch_extent_crc32 {
516-
#if defined(__LITTLE_ENDIAN_BITFIELD)
517-
__u32 type:2,
518-
_compressed_size:7,
519-
_uncompressed_size:7,
520-
offset:7,
521-
_unused:1,
522-
csum_type:4,
523-
compression_type:4;
524-
__u32 csum;
525-
#elif defined (__BIG_ENDIAN_BITFIELD)
526-
__u32 csum;
527-
__u32 compression_type:4,
528-
csum_type:4,
529-
_unused:1,
530-
offset:7,
531-
_uncompressed_size:7,
532-
_compressed_size:7,
533-
type:2;
534-
#endif
535-
} __packed __aligned(8);
536-
537-
#define CRC32_SIZE_MAX (1U << 7)
538-
#define CRC32_NONCE_MAX 0
539-
540-
struct bch_extent_crc64 {
541-
#if defined(__LITTLE_ENDIAN_BITFIELD)
542-
__u64 type:3,
543-
_compressed_size:9,
544-
_uncompressed_size:9,
545-
offset:9,
546-
nonce:10,
547-
csum_type:4,
548-
compression_type:4,
549-
csum_hi:16;
550-
#elif defined (__BIG_ENDIAN_BITFIELD)
551-
__u64 csum_hi:16,
552-
compression_type:4,
553-
csum_type:4,
554-
nonce:10,
555-
offset:9,
556-
_uncompressed_size:9,
557-
_compressed_size:9,
558-
type:3;
559-
#endif
560-
__u64 csum_lo;
561-
} __packed __aligned(8);
562-
563-
#define CRC64_SIZE_MAX (1U << 9)
564-
#define CRC64_NONCE_MAX ((1U << 10) - 1)
565-
566-
struct bch_extent_crc128 {
567-
#if defined(__LITTLE_ENDIAN_BITFIELD)
568-
__u64 type:4,
569-
_compressed_size:13,
570-
_uncompressed_size:13,
571-
offset:13,
572-
nonce:13,
573-
csum_type:4,
574-
compression_type:4;
575-
#elif defined (__BIG_ENDIAN_BITFIELD)
576-
__u64 compression_type:4,
577-
csum_type:4,
578-
nonce:13,
579-
offset:13,
580-
_uncompressed_size:13,
581-
_compressed_size:13,
582-
type:4;
583-
#endif
584-
struct bch_csum csum;
585-
} __packed __aligned(8);
586-
587-
#define CRC128_SIZE_MAX (1U << 13)
588-
#define CRC128_NONCE_MAX ((1U << 13) - 1)
589-
590-
/*
591-
* @reservation - pointer hasn't been written to, just reserved
592-
*/
593-
struct bch_extent_ptr {
594-
#if defined(__LITTLE_ENDIAN_BITFIELD)
595-
__u64 type:1,
596-
cached:1,
597-
unused:1,
598-
unwritten:1,
599-
offset:44, /* 8 petabytes */
600-
dev:8,
601-
gen:8;
602-
#elif defined (__BIG_ENDIAN_BITFIELD)
603-
__u64 gen:8,
604-
dev:8,
605-
offset:44,
606-
unwritten:1,
607-
unused:1,
608-
cached:1,
609-
type:1;
610-
#endif
611-
} __packed __aligned(8);
612-
613-
struct bch_extent_stripe_ptr {
614-
#if defined(__LITTLE_ENDIAN_BITFIELD)
615-
__u64 type:5,
616-
block:8,
617-
redundancy:4,
618-
idx:47;
619-
#elif defined (__BIG_ENDIAN_BITFIELD)
620-
__u64 idx:47,
621-
redundancy:4,
622-
block:8,
623-
type:5;
624-
#endif
625-
};
626-
627-
struct bch_extent_rebalance {
628-
#if defined(__LITTLE_ENDIAN_BITFIELD)
629-
__u64 type:6,
630-
unused:34,
631-
compression:8, /* enum bch_compression_opt */
632-
target:16;
633-
#elif defined (__BIG_ENDIAN_BITFIELD)
634-
__u64 target:16,
635-
compression:8,
636-
unused:34,
637-
type:6;
638-
#endif
639-
};
640-
641-
union bch_extent_entry {
642-
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ || __BITS_PER_LONG == 64
643-
unsigned long type;
644-
#elif __BITS_PER_LONG == 32
645-
struct {
646-
unsigned long pad;
647-
unsigned long type;
648-
};
649-
#else
650-
#error edit for your odd byteorder.
651-
#endif
652-
653-
#define x(f, n) struct bch_extent_##f f;
654-
BCH_EXTENT_ENTRY_TYPES()
655-
#undef x
656-
};
657-
658-
struct bch_btree_ptr {
659-
struct bch_val v;
660-
661-
__u64 _data[0];
662-
struct bch_extent_ptr start[];
663-
} __packed __aligned(8);
664-
665-
struct bch_btree_ptr_v2 {
666-
struct bch_val v;
667-
668-
__u64 mem_ptr;
669-
__le64 seq;
670-
__le16 sectors_written;
671-
__le16 flags;
672-
struct bpos min_key;
673-
__u64 _data[0];
674-
struct bch_extent_ptr start[];
675-
} __packed __aligned(8);
676-
677-
LE16_BITMASK(BTREE_PTR_RANGE_UPDATED, struct bch_btree_ptr_v2, flags, 0, 1);
678-
679-
struct bch_extent {
680-
struct bch_val v;
681-
682-
__u64 _data[0];
683-
union bch_extent_entry start[];
684-
} __packed __aligned(8);
685-
686426
struct bch_reservation {
687427
struct bch_val v;
688428

@@ -691,25 +431,6 @@ struct bch_reservation {
691431
__u8 pad[3];
692432
} __packed __aligned(8);
693433

694-
/* Maximum size (in u64s) a single pointer could be: */
695-
#define BKEY_EXTENT_PTR_U64s_MAX\
696-
((sizeof(struct bch_extent_crc128) + \
697-
sizeof(struct bch_extent_ptr)) / sizeof(__u64))
698-
699-
/* Maximum possible size of an entire extent value: */
700-
#define BKEY_EXTENT_VAL_U64s_MAX \
701-
(1 + BKEY_EXTENT_PTR_U64s_MAX * (BCH_REPLICAS_MAX + 1))
702-
703-
/* * Maximum possible size of an entire extent, key + value: */
704-
#define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX)
705-
706-
/* Btree pointers don't carry around checksums: */
707-
#define BKEY_BTREE_PTR_VAL_U64s_MAX \
708-
((sizeof(struct bch_btree_ptr_v2) + \
709-
sizeof(struct bch_extent_ptr) * BCH_REPLICAS_MAX) / sizeof(__u64))
710-
#define BKEY_BTREE_PTR_U64s_MAX \
711-
(BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX)
712-
713434
struct bch_backpointer {
714435
struct bch_val v;
715436
__u8 btree_id;
@@ -720,6 +441,8 @@ struct bch_backpointer {
720441
struct bpos pos;
721442
} __packed __aligned(8);
722443

444+
#include "extents_format.h"
445+
723446
/* Reflink: */
724447

725448
struct bch_reflink_p {

0 commit comments

Comments
 (0)