Merge tag 'for-6.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux

Pull btrfs updates from David Sterba:
 "This round there are a lot of cleanups and moved code so the diffstat
  looks huge, otherwise there are some nice performance improvements and
  an update to raid56 reliability.

  User visible features:

   - raid56 reliability vs performance trade off:
      - fix destructive RMW for raid5 data (raid6 still needs work): do
        full checksum verification for all data during RMW cycle, this
        should prevent rewriting potentially corrupted data without
        notice
      - stripes are cached in memory which should reduce the performance
        impact but still can hurt some workloads
      - checksums are verified after repair again
      - this is the last option without introducing additional features
        (write intent bitmap, journal, another tree), the extra checksum
        read/verification was supposed to be avoided by the original
        implementation exactly for performance reasons but that caused
        all the reliability problems

   - discard=async by default for devices that support it

   - implement emergency flush reserve to avoid almost all unnecessary
     transaction aborts due to ENOSPC in cases where there are too many
     delayed refs or delayed allocation

   - skip block group synchronization if there's no change in used
     bytes, can reduce transaction commit count for some workloads

  Performance improvements:

   - fiemap and lseek:
      - overall speedup due to skipping unnecessary or duplicate
        searches (-40% run time)
      - cache some data structures and sharedness of extents (-30% run
        time)

   - send:
      - faster backref resolution when finding clones
      - cached leaf to root mapping for faster backref walking
      - improved clone/sharing detection
      - overall run time improvements (-70%)

  Core:

   - module initialization converted to a table of function pointers run
     in a sequence

   - preparation for fscrypt, extend passing file names across calls,
     dir item can store encryption status

   - raid56 updates:
      - more accurate error tracking of sectors within stripe
      - simplify recovery path and remove dedicated endio worker kthread
      - simplify scrub call paths
      - refactoring to support the extra data checksum verification
        during RMW cycle

   - tree block parentness checks consolidated and done at metadata read
     time

   - improved error handling

   - cleanups:
      - move a lot of code for better synchronization between kernel and
        user space sources, split big files
      - enum cleanups
      - GFP flag cleanups
      - header file cleanups, prototypes, dependencies
      - redundant parameter cleanups
      - inline extent handling simplifications
      - inode parameter conversion
      - data structure cleanups, reductions, renames, merges"

* tag 'for-6.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (249 commits)
  btrfs: print transaction aborted messages with an error level
  btrfs: sync some cleanups from progs into uapi/btrfs.h
  btrfs: do not BUG_ON() on ENOMEM when dropping extent items for a range
  btrfs: fix extent map use-after-free when handling missing device in read_one_chunk
  btrfs: remove outdated logic from overwrite_item() and add assertion
  btrfs: unify overwrite_item() and do_overwrite_item()
  btrfs: replace strncpy() with strscpy()
  btrfs: fix uninitialized variable in find_first_clear_extent_bit
  btrfs: fix uninitialized parent in insert_state
  btrfs: add might_sleep() annotations
  btrfs: add stack helpers for a few btrfs items
  btrfs: add nr_global_roots to the super block definition
  btrfs: remove BTRFS_LEAF_DATA_OFFSET
  btrfs: add helpers for manipulating leaf items and data
  btrfs: add eb to btrfs_node_key_ptr_offset
  btrfs: pass the extent buffer for the btrfs_item_nr helpers
  btrfs: move the csum helpers into ctree.h
  btrfs: move eb offset helpers into extent_io.h
  btrfs: move file_extent_item helpers into file-item.h
  btrfs: move leaf_data_end into ctree.c
  ...
This commit is contained in:
Linus Torvalds
2022-12-12 20:47:51 -08:00
121 changed files with 11247 additions and 9489 deletions

View File

@@ -19,8 +19,14 @@
#ifndef _UAPI_LINUX_BTRFS_H
#define _UAPI_LINUX_BTRFS_H
#ifdef __cplusplus
extern "C" {
#endif
#include <linux/types.h>
#include <linux/ioctl.h>
#include <linux/fs.h>
#define BTRFS_IOCTL_MAGIC 0x94
#define BTRFS_VOL_NAME_MAX 255
@@ -333,6 +339,12 @@ struct btrfs_ioctl_feature_flags {
*/
struct btrfs_balance_args {
__u64 profiles;
/*
* usage filter
* BTRFS_BALANCE_ARGS_USAGE with a single value means '0..N'
* BTRFS_BALANCE_ARGS_USAGE_RANGE - range syntax, min..max
*/
union {
__u64 usage;
struct {
@@ -549,7 +561,7 @@ struct btrfs_ioctl_search_header {
__u64 offset;
__u32 type;
__u32 len;
};
} __attribute__ ((__may_alias__));
#define BTRFS_SEARCH_ARGS_BUFSIZE (4096 - sizeof(struct btrfs_ioctl_search_key))
/*
@@ -562,6 +574,10 @@ struct btrfs_ioctl_search_args {
char buf[BTRFS_SEARCH_ARGS_BUFSIZE];
};
/*
* Extended version of TREE_SEARCH ioctl that can return more than 4k of bytes.
* The allocated size of the buffer is set in buf_size.
*/
struct btrfs_ioctl_search_args_v2 {
struct btrfs_ioctl_search_key key; /* in/out - search parameters */
__u64 buf_size; /* in - size of buffer
@@ -570,10 +586,11 @@ struct btrfs_ioctl_search_args_v2 {
__u64 buf[]; /* out - found items */
};
/* With a @src_length of zero, the range from @src_offset->EOF is cloned! */
struct btrfs_ioctl_clone_range_args {
__s64 src_fd;
__u64 src_offset, src_length;
__u64 dest_offset;
__s64 src_fd;
__u64 src_offset, src_length;
__u64 dest_offset;
};
/*
@@ -677,8 +694,11 @@ struct btrfs_ioctl_logical_ino_args {
/* struct btrfs_data_container *inodes; out */
__u64 inodes;
};
/* Return every ref to the extent, not just those containing logical block.
* Requires logical == extent bytenr. */
/*
* Return every ref to the extent, not just those containing logical block.
* Requires logical == extent bytenr.
*/
#define BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET (1ULL << 0)
enum btrfs_dev_stat_values {
@@ -1144,4 +1164,8 @@ enum btrfs_err_code {
#define BTRFS_IOC_ENCODED_WRITE _IOW(BTRFS_IOCTL_MAGIC, 64, \
struct btrfs_ioctl_encoded_io_args)
#ifdef __cplusplus
}
#endif
#endif /* _UAPI_LINUX_BTRFS_H */

View File

@@ -10,6 +10,23 @@
#include <stddef.h>
#endif
/* ASCII for _BHRfS_M, no terminating nul */
#define BTRFS_MAGIC 0x4D5F53665248425FULL
#define BTRFS_MAX_LEVEL 8
/*
* We can actually store much bigger names, but lets not confuse the rest of
* linux.
*/
#define BTRFS_NAME_LEN 255
/*
* Theoretical limit is larger, but we keep this down to a sane value. That
* should limit greatly the possibility of collisions on inode ref items.
*/
#define BTRFS_LINK_MAX 65535U
/*
* This header contains the structure definitions and constants used
* by file system objects that can be retrieved using
@@ -359,6 +376,50 @@ enum btrfs_csum_type {
#define BTRFS_FT_SYMLINK 7
#define BTRFS_FT_XATTR 8
#define BTRFS_FT_MAX 9
/* Directory contains encrypted data */
#define BTRFS_FT_ENCRYPTED 0x80
static inline __u8 btrfs_dir_flags_to_ftype(__u8 flags)
{
return flags & ~BTRFS_FT_ENCRYPTED;
}
/*
* Inode flags
*/
#define BTRFS_INODE_NODATASUM (1U << 0)
#define BTRFS_INODE_NODATACOW (1U << 1)
#define BTRFS_INODE_READONLY (1U << 2)
#define BTRFS_INODE_NOCOMPRESS (1U << 3)
#define BTRFS_INODE_PREALLOC (1U << 4)
#define BTRFS_INODE_SYNC (1U << 5)
#define BTRFS_INODE_IMMUTABLE (1U << 6)
#define BTRFS_INODE_APPEND (1U << 7)
#define BTRFS_INODE_NODUMP (1U << 8)
#define BTRFS_INODE_NOATIME (1U << 9)
#define BTRFS_INODE_DIRSYNC (1U << 10)
#define BTRFS_INODE_COMPRESS (1U << 11)
#define BTRFS_INODE_ROOT_ITEM_INIT (1U << 31)
#define BTRFS_INODE_FLAG_MASK \
(BTRFS_INODE_NODATASUM | \
BTRFS_INODE_NODATACOW | \
BTRFS_INODE_READONLY | \
BTRFS_INODE_NOCOMPRESS | \
BTRFS_INODE_PREALLOC | \
BTRFS_INODE_SYNC | \
BTRFS_INODE_IMMUTABLE | \
BTRFS_INODE_APPEND | \
BTRFS_INODE_NODUMP | \
BTRFS_INODE_NOATIME | \
BTRFS_INODE_DIRSYNC | \
BTRFS_INODE_COMPRESS | \
BTRFS_INODE_ROOT_ITEM_INIT)
#define BTRFS_INODE_RO_VERITY (1U << 0)
#define BTRFS_INODE_RO_FLAG_MASK (BTRFS_INODE_RO_VERITY)
/*
* The key defines the order in the tree, and so it also defines (optimal)
@@ -389,6 +450,109 @@ struct btrfs_key {
__u64 offset;
} __attribute__ ((__packed__));
/*
* Every tree block (leaf or node) starts with this header.
*/
struct btrfs_header {
/* These first four must match the super block */
__u8 csum[BTRFS_CSUM_SIZE];
/* FS specific uuid */
__u8 fsid[BTRFS_FSID_SIZE];
/* Which block this node is supposed to live in */
__le64 bytenr;
__le64 flags;
/* Allowed to be different from the super from here on down */
__u8 chunk_tree_uuid[BTRFS_UUID_SIZE];
__le64 generation;
__le64 owner;
__le32 nritems;
__u8 level;
} __attribute__ ((__packed__));
/*
* This is a very generous portion of the super block, giving us room to
* translate 14 chunks with 3 stripes each.
*/
#define BTRFS_SYSTEM_CHUNK_ARRAY_SIZE 2048
/*
* Just in case we somehow lose the roots and are not able to mount, we store
* an array of the roots from previous transactions in the super.
*/
#define BTRFS_NUM_BACKUP_ROOTS 4
struct btrfs_root_backup {
__le64 tree_root;
__le64 tree_root_gen;
__le64 chunk_root;
__le64 chunk_root_gen;
__le64 extent_root;
__le64 extent_root_gen;
__le64 fs_root;
__le64 fs_root_gen;
__le64 dev_root;
__le64 dev_root_gen;
__le64 csum_root;
__le64 csum_root_gen;
__le64 total_bytes;
__le64 bytes_used;
__le64 num_devices;
/* future */
__le64 unused_64[4];
__u8 tree_root_level;
__u8 chunk_root_level;
__u8 extent_root_level;
__u8 fs_root_level;
__u8 dev_root_level;
__u8 csum_root_level;
/* future and to align */
__u8 unused_8[10];
} __attribute__ ((__packed__));
/*
* A leaf is full of items. offset and size tell us where to find the item in
* the leaf (relative to the start of the data area)
*/
struct btrfs_item {
struct btrfs_disk_key key;
__le32 offset;
__le32 size;
} __attribute__ ((__packed__));
/*
* Leaves have an item area and a data area:
* [item0, item1....itemN] [free space] [dataN...data1, data0]
*
* The data is separate from the items to get the keys closer together during
* searches.
*/
struct btrfs_leaf {
struct btrfs_header header;
struct btrfs_item items[];
} __attribute__ ((__packed__));
/*
* All non-leaf blocks are nodes, they hold only keys and pointers to other
* blocks.
*/
struct btrfs_key_ptr {
struct btrfs_disk_key key;
__le64 blockptr;
__le64 generation;
} __attribute__ ((__packed__));
struct btrfs_node {
struct btrfs_header header;
struct btrfs_key_ptr ptrs[];
} __attribute__ ((__packed__));
struct btrfs_dev_item {
/* the internal btrfs device id */
__le64 devid;
@@ -472,6 +636,69 @@ struct btrfs_chunk {
/* additional stripes go here */
} __attribute__ ((__packed__));
/*
* The super block basically lists the main trees of the FS.
*/
struct btrfs_super_block {
/* The first 4 fields must match struct btrfs_header */
__u8 csum[BTRFS_CSUM_SIZE];
/* FS specific UUID, visible to user */
__u8 fsid[BTRFS_FSID_SIZE];
/* This block number */
__le64 bytenr;
__le64 flags;
/* Allowed to be different from the btrfs_header from here own down */
__le64 magic;
__le64 generation;
__le64 root;
__le64 chunk_root;
__le64 log_root;
/*
* This member has never been utilized since the very beginning, thus
* it's always 0 regardless of kernel version. We always use
* generation + 1 to read log tree root. So here we mark it deprecated.
*/
__le64 __unused_log_root_transid;
__le64 total_bytes;
__le64 bytes_used;
__le64 root_dir_objectid;
__le64 num_devices;
__le32 sectorsize;
__le32 nodesize;
__le32 __unused_leafsize;
__le32 stripesize;
__le32 sys_chunk_array_size;
__le64 chunk_root_generation;
__le64 compat_flags;
__le64 compat_ro_flags;
__le64 incompat_flags;
__le16 csum_type;
__u8 root_level;
__u8 chunk_root_level;
__u8 log_root_level;
struct btrfs_dev_item dev_item;
char label[BTRFS_LABEL_SIZE];
__le64 cache_generation;
__le64 uuid_tree_generation;
/* The UUID written into btree blocks */
__u8 metadata_uuid[BTRFS_FSID_SIZE];
__u64 nr_global_roots;
/* Future expansion */
__le64 reserved[27];
__u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
/* Padded to 4096 bytes */
__u8 padding[565];
} __attribute__ ((__packed__));
#define BTRFS_FREE_SPACE_EXTENT 1
#define BTRFS_FREE_SPACE_BITMAP 2
@@ -526,6 +753,14 @@ struct btrfs_extent_item_v0 {
/* use full backrefs for extent pointers in the block */
#define BTRFS_BLOCK_FLAG_FULL_BACKREF (1ULL << 8)
#define BTRFS_BACKREF_REV_MAX 256
#define BTRFS_BACKREF_REV_SHIFT 56
#define BTRFS_BACKREF_REV_MASK (((u64)BTRFS_BACKREF_REV_MAX - 1) << \
BTRFS_BACKREF_REV_SHIFT)
#define BTRFS_OLD_BACKREF_REV 0
#define BTRFS_MIXED_BACKREF_REV 1
/*
* this flag is only used internally by scrub and may be changed at any time
* it is only declared here to avoid collisions