btrfs: enable large data folio support under CONFIG_BTRFS_EXPERIMENTAL

With all the preparation patches already merged, it's pretty easy to
enable large data folios:

- Remove the ASSERT() on folio size in btrfs_end_repair_bio()

- Add a helper to properly set the max folio order
  Currently due to several call sites that are fetching the bitmap
  content directly into an unsigned long, we can only support
  BITS_PER_LONG blocks for each bitmap.

- Call the helper when reading/creating an inode

The support has the following limitations:

- No large folios for data reloc inode
  The relocation code still requires page sized folio.
  But it's not that hot nor common compared to regular buffered ios.

  Will be improved in the future.

- Requires CONFIG_BTRFS_EXPERIMENTAL

- Will require all folio related operations to check if it needs the
  extra btrfs_subpage structure
  Now any folio larger than block size will need btrfs_subpage structure
  handling.

Unfortunately I do not have a physical machine for performance test, but
if everything goes like XFS/EXT4, it should mostly bring single digits
percentage performance improvement in the real world.

Although I believe there are still quite some optimizations to be done,
let's focus on testing the current large data folio support first.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
Qu Wenruo
2025-04-23 18:28:02 +09:30
committed by David Sterba
parent b769777d92
commit cc38d178ff
5 changed files with 21 additions and 20 deletions

View File

@@ -114,6 +114,8 @@ config BTRFS_EXPERIMENTAL
- extent tree v2 - complex rework of extent tracking
- large folio support
If unsure, say N.
config BTRFS_FS_REF_VERIFY

View File

@@ -165,12 +165,6 @@ static void btrfs_end_repair_bio(struct btrfs_bio *repair_bbio,
struct bio_vec *bv = bio_first_bvec_all(&repair_bbio->bio);
int mirror = repair_bbio->mirror_num;
/*
* We can only trigger this for data bio, which doesn't support larger
* folios yet.
*/
ASSERT(folio_order(page_folio(bv->bv_page)) == 0);
if (repair_bbio->bio.bi_status ||
!btrfs_data_csum_ok(repair_bbio, dev, 0, bv)) {
bio_reset(&repair_bbio->bio, NULL, REQ_OP_READ);

View File

@@ -525,6 +525,23 @@ static inline void btrfs_update_inode_mapping_flags(struct btrfs_inode *inode)
mapping_set_stable_writes(inode->vfs_inode.i_mapping);
}
static inline void btrfs_set_inode_mapping_order(struct btrfs_inode *inode)
{
/* Metadata inode should not reach here. */
ASSERT(is_data_inode(inode));
/* For data reloc inode, it still requires page sized folio. */
if (unlikely(btrfs_is_data_reloc_root(inode->root)))
return;
/* We only allow BITS_PER_LONGS blocks for each bitmap. */
#ifdef CONFIG_BTRFS_EXPERIMENTAL
mapping_set_folio_order_range(inode->vfs_inode.i_mapping, 0,
ilog2(((BITS_PER_LONG << inode->root->fs_info->sectorsize_bits)
>> PAGE_SHIFT)));
#endif
}
/* Array of bytes with variable length, hexadecimal format 0x1234 */
#define CSUM_FMT "0x%*phN"
#define CSUM_FMT_VALUE(size, bytes) size, bytes

View File

@@ -3946,6 +3946,7 @@ static int btrfs_read_locked_inode(struct btrfs_inode *inode, struct btrfs_path
btrfs_inode_split_flags(btrfs_inode_flags(leaf, inode_item),
&inode->flags, &inode->ro_flags);
btrfs_update_inode_mapping_flags(inode);
btrfs_set_inode_mapping_order(inode);
cache_index:
/*
@@ -6463,6 +6464,7 @@ int btrfs_create_new_inode(struct btrfs_trans_handle *trans,
BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW |
BTRFS_INODE_NODATASUM;
btrfs_update_inode_mapping_flags(BTRFS_I(inode));
btrfs_set_inode_mapping_order(BTRFS_I(inode));
}
ret = btrfs_insert_inode_locked(inode);

View File

@@ -92,7 +92,6 @@ enum btrfs_folio_type {
BTRFS_SUBPAGE_DATA,
};
#if PAGE_SIZE > BTRFS_MIN_BLOCKSIZE
/*
* Subpage support for metadata is more complex, as we can have dummy extent
* buffers, where folios have no mapping to determine the owning inode.
@@ -113,19 +112,6 @@ static inline bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info,
ASSERT(is_data_inode(BTRFS_I(folio->mapping->host)));
return fs_info->sectorsize < folio_size(folio);
}
#else
static inline bool btrfs_meta_is_subpage(const struct btrfs_fs_info *fs_info)
{
return false;
}
static inline bool btrfs_is_subpage(const struct btrfs_fs_info *fs_info,
struct folio *folio)
{
if (folio->mapping && folio->mapping->host)
ASSERT(is_data_inode(BTRFS_I(folio->mapping->host)));
return false;
}
#endif
int btrfs_attach_folio_state(const struct btrfs_fs_info *fs_info,
struct folio *folio, enum btrfs_folio_type type);