mirror of
https://github.com/raspberrypi/linux.git
synced 2025-12-27 04:22:58 +00:00
Add a metadir path to select the realtime refcount btree inode and load it at mount time. The rtrefcountbt inode will have a unique extent format code, which means that we also have to update the inode validation and flush routines to look for it. Signed-off-by: "Darrick J. Wong" <djwong@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de>
422 lines
11 KiB
C
422 lines
11 KiB
C
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
/*
|
|
* Copyright (c) 2021-2024 Oracle. All Rights Reserved.
|
|
* Author: Darrick J. Wong <djwong@kernel.org>
|
|
*/
|
|
#include "xfs.h"
|
|
#include "xfs_fs.h"
|
|
#include "xfs_shared.h"
|
|
#include "xfs_format.h"
|
|
#include "xfs_log_format.h"
|
|
#include "xfs_trans_resv.h"
|
|
#include "xfs_bit.h"
|
|
#include "xfs_sb.h"
|
|
#include "xfs_mount.h"
|
|
#include "xfs_defer.h"
|
|
#include "xfs_inode.h"
|
|
#include "xfs_trans.h"
|
|
#include "xfs_alloc.h"
|
|
#include "xfs_btree.h"
|
|
#include "xfs_btree_staging.h"
|
|
#include "xfs_rtrefcount_btree.h"
|
|
#include "xfs_refcount.h"
|
|
#include "xfs_trace.h"
|
|
#include "xfs_cksum.h"
|
|
#include "xfs_error.h"
|
|
#include "xfs_extent_busy.h"
|
|
#include "xfs_rtgroup.h"
|
|
#include "xfs_rtbitmap.h"
|
|
#include "xfs_metafile.h"
|
|
|
|
static struct kmem_cache *xfs_rtrefcountbt_cur_cache;
|
|
|
|
/*
|
|
* Realtime Reference Count btree.
|
|
*
|
|
* This is a btree used to track the owner(s) of a given extent in the realtime
|
|
* device. See the comments in xfs_refcount_btree.c for more information.
|
|
*
|
|
* This tree is basically the same as the regular refcount btree except that
|
|
* it's rooted in an inode.
|
|
*/
|
|
|
|
static struct xfs_btree_cur *
|
|
xfs_rtrefcountbt_dup_cursor(
|
|
struct xfs_btree_cur *cur)
|
|
{
|
|
return xfs_rtrefcountbt_init_cursor(cur->bc_tp, to_rtg(cur->bc_group));
|
|
}
|
|
|
|
STATIC int
|
|
xfs_rtrefcountbt_get_minrecs(
|
|
struct xfs_btree_cur *cur,
|
|
int level)
|
|
{
|
|
if (level == cur->bc_nlevels - 1) {
|
|
struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur);
|
|
|
|
return xfs_rtrefcountbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes,
|
|
level == 0) / 2;
|
|
}
|
|
|
|
return cur->bc_mp->m_rtrefc_mnr[level != 0];
|
|
}
|
|
|
|
STATIC int
|
|
xfs_rtrefcountbt_get_maxrecs(
|
|
struct xfs_btree_cur *cur,
|
|
int level)
|
|
{
|
|
if (level == cur->bc_nlevels - 1) {
|
|
struct xfs_ifork *ifp = xfs_btree_ifork_ptr(cur);
|
|
|
|
return xfs_rtrefcountbt_maxrecs(cur->bc_mp, ifp->if_broot_bytes,
|
|
level == 0);
|
|
}
|
|
|
|
return cur->bc_mp->m_rtrefc_mxr[level != 0];
|
|
}
|
|
|
|
STATIC void
|
|
xfs_rtrefcountbt_init_key_from_rec(
|
|
union xfs_btree_key *key,
|
|
const union xfs_btree_rec *rec)
|
|
{
|
|
key->refc.rc_startblock = rec->refc.rc_startblock;
|
|
}
|
|
|
|
STATIC void
|
|
xfs_rtrefcountbt_init_high_key_from_rec(
|
|
union xfs_btree_key *key,
|
|
const union xfs_btree_rec *rec)
|
|
{
|
|
__u32 x;
|
|
|
|
x = be32_to_cpu(rec->refc.rc_startblock);
|
|
x += be32_to_cpu(rec->refc.rc_blockcount) - 1;
|
|
key->refc.rc_startblock = cpu_to_be32(x);
|
|
}
|
|
|
|
STATIC void
|
|
xfs_rtrefcountbt_init_rec_from_cur(
|
|
struct xfs_btree_cur *cur,
|
|
union xfs_btree_rec *rec)
|
|
{
|
|
const struct xfs_refcount_irec *irec = &cur->bc_rec.rc;
|
|
uint32_t start;
|
|
|
|
start = xfs_refcount_encode_startblock(irec->rc_startblock,
|
|
irec->rc_domain);
|
|
rec->refc.rc_startblock = cpu_to_be32(start);
|
|
rec->refc.rc_blockcount = cpu_to_be32(cur->bc_rec.rc.rc_blockcount);
|
|
rec->refc.rc_refcount = cpu_to_be32(cur->bc_rec.rc.rc_refcount);
|
|
}
|
|
|
|
STATIC void
|
|
xfs_rtrefcountbt_init_ptr_from_cur(
|
|
struct xfs_btree_cur *cur,
|
|
union xfs_btree_ptr *ptr)
|
|
{
|
|
ptr->l = 0;
|
|
}
|
|
|
|
STATIC int64_t
|
|
xfs_rtrefcountbt_key_diff(
|
|
struct xfs_btree_cur *cur,
|
|
const union xfs_btree_key *key)
|
|
{
|
|
const struct xfs_refcount_key *kp = &key->refc;
|
|
const struct xfs_refcount_irec *irec = &cur->bc_rec.rc;
|
|
uint32_t start;
|
|
|
|
start = xfs_refcount_encode_startblock(irec->rc_startblock,
|
|
irec->rc_domain);
|
|
return (int64_t)be32_to_cpu(kp->rc_startblock) - start;
|
|
}
|
|
|
|
STATIC int64_t
|
|
xfs_rtrefcountbt_diff_two_keys(
|
|
struct xfs_btree_cur *cur,
|
|
const union xfs_btree_key *k1,
|
|
const union xfs_btree_key *k2,
|
|
const union xfs_btree_key *mask)
|
|
{
|
|
ASSERT(!mask || mask->refc.rc_startblock);
|
|
|
|
return (int64_t)be32_to_cpu(k1->refc.rc_startblock) -
|
|
be32_to_cpu(k2->refc.rc_startblock);
|
|
}
|
|
|
|
static xfs_failaddr_t
|
|
xfs_rtrefcountbt_verify(
|
|
struct xfs_buf *bp)
|
|
{
|
|
struct xfs_mount *mp = bp->b_target->bt_mount;
|
|
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
|
|
xfs_failaddr_t fa;
|
|
int level;
|
|
|
|
if (!xfs_verify_magic(bp, block->bb_magic))
|
|
return __this_address;
|
|
|
|
if (!xfs_has_reflink(mp))
|
|
return __this_address;
|
|
fa = xfs_btree_fsblock_v5hdr_verify(bp, XFS_RMAP_OWN_UNKNOWN);
|
|
if (fa)
|
|
return fa;
|
|
level = be16_to_cpu(block->bb_level);
|
|
if (level > mp->m_rtrefc_maxlevels)
|
|
return __this_address;
|
|
|
|
return xfs_btree_fsblock_verify(bp, mp->m_rtrefc_mxr[level != 0]);
|
|
}
|
|
|
|
static void
|
|
xfs_rtrefcountbt_read_verify(
|
|
struct xfs_buf *bp)
|
|
{
|
|
xfs_failaddr_t fa;
|
|
|
|
if (!xfs_btree_fsblock_verify_crc(bp))
|
|
xfs_verifier_error(bp, -EFSBADCRC, __this_address);
|
|
else {
|
|
fa = xfs_rtrefcountbt_verify(bp);
|
|
if (fa)
|
|
xfs_verifier_error(bp, -EFSCORRUPTED, fa);
|
|
}
|
|
|
|
if (bp->b_error)
|
|
trace_xfs_btree_corrupt(bp, _RET_IP_);
|
|
}
|
|
|
|
static void
|
|
xfs_rtrefcountbt_write_verify(
|
|
struct xfs_buf *bp)
|
|
{
|
|
xfs_failaddr_t fa;
|
|
|
|
fa = xfs_rtrefcountbt_verify(bp);
|
|
if (fa) {
|
|
trace_xfs_btree_corrupt(bp, _RET_IP_);
|
|
xfs_verifier_error(bp, -EFSCORRUPTED, fa);
|
|
return;
|
|
}
|
|
xfs_btree_fsblock_calc_crc(bp);
|
|
|
|
}
|
|
|
|
const struct xfs_buf_ops xfs_rtrefcountbt_buf_ops = {
|
|
.name = "xfs_rtrefcountbt",
|
|
.magic = { 0, cpu_to_be32(XFS_RTREFC_CRC_MAGIC) },
|
|
.verify_read = xfs_rtrefcountbt_read_verify,
|
|
.verify_write = xfs_rtrefcountbt_write_verify,
|
|
.verify_struct = xfs_rtrefcountbt_verify,
|
|
};
|
|
|
|
STATIC int
|
|
xfs_rtrefcountbt_keys_inorder(
|
|
struct xfs_btree_cur *cur,
|
|
const union xfs_btree_key *k1,
|
|
const union xfs_btree_key *k2)
|
|
{
|
|
return be32_to_cpu(k1->refc.rc_startblock) <
|
|
be32_to_cpu(k2->refc.rc_startblock);
|
|
}
|
|
|
|
STATIC int
|
|
xfs_rtrefcountbt_recs_inorder(
|
|
struct xfs_btree_cur *cur,
|
|
const union xfs_btree_rec *r1,
|
|
const union xfs_btree_rec *r2)
|
|
{
|
|
return be32_to_cpu(r1->refc.rc_startblock) +
|
|
be32_to_cpu(r1->refc.rc_blockcount) <=
|
|
be32_to_cpu(r2->refc.rc_startblock);
|
|
}
|
|
|
|
STATIC enum xbtree_key_contig
|
|
xfs_rtrefcountbt_keys_contiguous(
|
|
struct xfs_btree_cur *cur,
|
|
const union xfs_btree_key *key1,
|
|
const union xfs_btree_key *key2,
|
|
const union xfs_btree_key *mask)
|
|
{
|
|
ASSERT(!mask || mask->refc.rc_startblock);
|
|
|
|
return xbtree_key_contig(be32_to_cpu(key1->refc.rc_startblock),
|
|
be32_to_cpu(key2->refc.rc_startblock));
|
|
}
|
|
|
|
const struct xfs_btree_ops xfs_rtrefcountbt_ops = {
|
|
.name = "rtrefcount",
|
|
.type = XFS_BTREE_TYPE_INODE,
|
|
.geom_flags = XFS_BTGEO_IROOT_RECORDS,
|
|
|
|
.rec_len = sizeof(struct xfs_refcount_rec),
|
|
.key_len = sizeof(struct xfs_refcount_key),
|
|
.ptr_len = XFS_BTREE_LONG_PTR_LEN,
|
|
|
|
.lru_refs = XFS_REFC_BTREE_REF,
|
|
.statoff = XFS_STATS_CALC_INDEX(xs_rtrefcbt_2),
|
|
|
|
.dup_cursor = xfs_rtrefcountbt_dup_cursor,
|
|
.alloc_block = xfs_btree_alloc_metafile_block,
|
|
.free_block = xfs_btree_free_metafile_block,
|
|
.get_minrecs = xfs_rtrefcountbt_get_minrecs,
|
|
.get_maxrecs = xfs_rtrefcountbt_get_maxrecs,
|
|
.init_key_from_rec = xfs_rtrefcountbt_init_key_from_rec,
|
|
.init_high_key_from_rec = xfs_rtrefcountbt_init_high_key_from_rec,
|
|
.init_rec_from_cur = xfs_rtrefcountbt_init_rec_from_cur,
|
|
.init_ptr_from_cur = xfs_rtrefcountbt_init_ptr_from_cur,
|
|
.key_diff = xfs_rtrefcountbt_key_diff,
|
|
.buf_ops = &xfs_rtrefcountbt_buf_ops,
|
|
.diff_two_keys = xfs_rtrefcountbt_diff_two_keys,
|
|
.keys_inorder = xfs_rtrefcountbt_keys_inorder,
|
|
.recs_inorder = xfs_rtrefcountbt_recs_inorder,
|
|
.keys_contiguous = xfs_rtrefcountbt_keys_contiguous,
|
|
};
|
|
|
|
/* Allocate a new rt refcount btree cursor. */
|
|
struct xfs_btree_cur *
|
|
xfs_rtrefcountbt_init_cursor(
|
|
struct xfs_trans *tp,
|
|
struct xfs_rtgroup *rtg)
|
|
{
|
|
struct xfs_inode *ip = rtg_refcount(rtg);
|
|
struct xfs_mount *mp = rtg_mount(rtg);
|
|
struct xfs_btree_cur *cur;
|
|
|
|
xfs_assert_ilocked(ip, XFS_ILOCK_SHARED | XFS_ILOCK_EXCL);
|
|
|
|
cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rtrefcountbt_ops,
|
|
mp->m_rtrefc_maxlevels, xfs_rtrefcountbt_cur_cache);
|
|
|
|
cur->bc_ino.ip = ip;
|
|
cur->bc_refc.nr_ops = 0;
|
|
cur->bc_refc.shape_changes = 0;
|
|
cur->bc_group = xfs_group_hold(rtg_group(rtg));
|
|
cur->bc_nlevels = be16_to_cpu(ip->i_df.if_broot->bb_level) + 1;
|
|
cur->bc_ino.forksize = xfs_inode_fork_size(ip, XFS_DATA_FORK);
|
|
cur->bc_ino.whichfork = XFS_DATA_FORK;
|
|
return cur;
|
|
}
|
|
|
|
/*
|
|
* Install a new rt reverse mapping btree root. Caller is responsible for
|
|
* invalidating and freeing the old btree blocks.
|
|
*/
|
|
void
|
|
xfs_rtrefcountbt_commit_staged_btree(
|
|
struct xfs_btree_cur *cur,
|
|
struct xfs_trans *tp)
|
|
{
|
|
struct xbtree_ifakeroot *ifake = cur->bc_ino.ifake;
|
|
struct xfs_ifork *ifp;
|
|
int flags = XFS_ILOG_CORE | XFS_ILOG_DBROOT;
|
|
|
|
ASSERT(cur->bc_flags & XFS_BTREE_STAGING);
|
|
ASSERT(ifake->if_fork->if_format == XFS_DINODE_FMT_META_BTREE);
|
|
|
|
/*
|
|
* Free any resources hanging off the real fork, then shallow-copy the
|
|
* staging fork's contents into the real fork to transfer everything
|
|
* we just built.
|
|
*/
|
|
ifp = xfs_ifork_ptr(cur->bc_ino.ip, XFS_DATA_FORK);
|
|
xfs_idestroy_fork(ifp);
|
|
memcpy(ifp, ifake->if_fork, sizeof(struct xfs_ifork));
|
|
|
|
cur->bc_ino.ip->i_projid = cur->bc_group->xg_gno;
|
|
xfs_trans_log_inode(tp, cur->bc_ino.ip, flags);
|
|
xfs_btree_commit_ifakeroot(cur, tp, XFS_DATA_FORK);
|
|
}
|
|
|
|
/* Calculate number of records in a realtime refcount btree block. */
|
|
static inline unsigned int
|
|
xfs_rtrefcountbt_block_maxrecs(
|
|
unsigned int blocklen,
|
|
bool leaf)
|
|
{
|
|
|
|
if (leaf)
|
|
return blocklen / sizeof(struct xfs_refcount_rec);
|
|
return blocklen / (sizeof(struct xfs_refcount_key) +
|
|
sizeof(xfs_rtrefcount_ptr_t));
|
|
}
|
|
|
|
/*
|
|
* Calculate number of records in an refcount btree block.
|
|
*/
|
|
unsigned int
|
|
xfs_rtrefcountbt_maxrecs(
|
|
struct xfs_mount *mp,
|
|
unsigned int blocklen,
|
|
bool leaf)
|
|
{
|
|
blocklen -= XFS_RTREFCOUNT_BLOCK_LEN;
|
|
return xfs_rtrefcountbt_block_maxrecs(blocklen, leaf);
|
|
}
|
|
|
|
/* Compute the max possible height for realtime refcount btrees. */
|
|
unsigned int
|
|
xfs_rtrefcountbt_maxlevels_ondisk(void)
|
|
{
|
|
unsigned int minrecs[2];
|
|
unsigned int blocklen;
|
|
|
|
blocklen = XFS_MIN_CRC_BLOCKSIZE - XFS_BTREE_LBLOCK_CRC_LEN;
|
|
|
|
minrecs[0] = xfs_rtrefcountbt_block_maxrecs(blocklen, true) / 2;
|
|
minrecs[1] = xfs_rtrefcountbt_block_maxrecs(blocklen, false) / 2;
|
|
|
|
/* We need at most one record for every block in an rt group. */
|
|
return xfs_btree_compute_maxlevels(minrecs, XFS_MAX_RGBLOCKS);
|
|
}
|
|
|
|
int __init
|
|
xfs_rtrefcountbt_init_cur_cache(void)
|
|
{
|
|
xfs_rtrefcountbt_cur_cache = kmem_cache_create("xfs_rtrefcountbt_cur",
|
|
xfs_btree_cur_sizeof(
|
|
xfs_rtrefcountbt_maxlevels_ondisk()),
|
|
0, 0, NULL);
|
|
|
|
if (!xfs_rtrefcountbt_cur_cache)
|
|
return -ENOMEM;
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
xfs_rtrefcountbt_destroy_cur_cache(void)
|
|
{
|
|
kmem_cache_destroy(xfs_rtrefcountbt_cur_cache);
|
|
xfs_rtrefcountbt_cur_cache = NULL;
|
|
}
|
|
|
|
/* Compute the maximum height of a realtime refcount btree. */
|
|
void
|
|
xfs_rtrefcountbt_compute_maxlevels(
|
|
struct xfs_mount *mp)
|
|
{
|
|
unsigned int d_maxlevels, r_maxlevels;
|
|
|
|
if (!xfs_has_rtreflink(mp)) {
|
|
mp->m_rtrefc_maxlevels = 0;
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* The realtime refcountbt lives on the data device, which means that
|
|
* its maximum height is constrained by the size of the data device and
|
|
* the height required to store one refcount record for each rtextent
|
|
* in an rt group.
|
|
*/
|
|
d_maxlevels = xfs_btree_space_to_height(mp->m_rtrefc_mnr,
|
|
mp->m_sb.sb_dblocks);
|
|
r_maxlevels = xfs_btree_compute_maxlevels(mp->m_rtrefc_mnr,
|
|
mp->m_sb.sb_rgextents);
|
|
|
|
/* Add one level to handle the inode root level. */
|
|
mp->m_rtrefc_maxlevels = min(d_maxlevels, r_maxlevels) + 1;
|
|
}
|