bpf: Handle in-place update for full LPM trie correctly

When a LPM trie is full, in-place updates of existing elements
incorrectly return -ENOSPC.

Fix this by deferring the check of trie->n_entries. For new insertions,
n_entries must not exceed max_entries. However, in-place updates are
allowed even when the trie is full.

Fixes: b95a5c4db0 ("bpf: add a longest prefix match trie map implementation")
Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Hou Tao <houtao1@huawei.com>
Link: https://lore.kernel.org/r/20241206110622.1161752-5-houtao@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
Hou Tao
2024-12-06 19:06:17 +08:00
committed by Alexei Starovoitov
parent eae6a075e9
commit 532d6b36b2

View File

@@ -310,6 +310,16 @@ static struct lpm_trie_node *lpm_trie_node_alloc(const struct lpm_trie *trie,
return node; return node;
} }
static int trie_check_add_elem(struct lpm_trie *trie, u64 flags)
{
if (flags == BPF_EXIST)
return -ENOENT;
if (trie->n_entries == trie->map.max_entries)
return -ENOSPC;
trie->n_entries++;
return 0;
}
/* Called from syscall or from eBPF program */ /* Called from syscall or from eBPF program */
static long trie_update_elem(struct bpf_map *map, static long trie_update_elem(struct bpf_map *map,
void *_key, void *value, u64 flags) void *_key, void *value, u64 flags)
@@ -333,20 +343,12 @@ static long trie_update_elem(struct bpf_map *map,
spin_lock_irqsave(&trie->lock, irq_flags); spin_lock_irqsave(&trie->lock, irq_flags);
/* Allocate and fill a new node */ /* Allocate and fill a new node */
if (trie->n_entries == trie->map.max_entries) {
ret = -ENOSPC;
goto out;
}
new_node = lpm_trie_node_alloc(trie, value); new_node = lpm_trie_node_alloc(trie, value);
if (!new_node) { if (!new_node) {
ret = -ENOMEM; ret = -ENOMEM;
goto out; goto out;
} }
trie->n_entries++;
new_node->prefixlen = key->prefixlen; new_node->prefixlen = key->prefixlen;
RCU_INIT_POINTER(new_node->child[0], NULL); RCU_INIT_POINTER(new_node->child[0], NULL);
RCU_INIT_POINTER(new_node->child[1], NULL); RCU_INIT_POINTER(new_node->child[1], NULL);
@@ -375,10 +377,10 @@ static long trie_update_elem(struct bpf_map *map,
* simply assign the @new_node to that slot and be done. * simply assign the @new_node to that slot and be done.
*/ */
if (!node) { if (!node) {
if (flags == BPF_EXIST) { ret = trie_check_add_elem(trie, flags);
ret = -ENOENT; if (ret)
goto out; goto out;
}
rcu_assign_pointer(*slot, new_node); rcu_assign_pointer(*slot, new_node);
goto out; goto out;
} }
@@ -392,10 +394,10 @@ static long trie_update_elem(struct bpf_map *map,
ret = -EEXIST; ret = -EEXIST;
goto out; goto out;
} }
trie->n_entries--; } else {
} else if (flags == BPF_EXIST) { ret = trie_check_add_elem(trie, flags);
ret = -ENOENT; if (ret)
goto out; goto out;
} }
new_node->child[0] = node->child[0]; new_node->child[0] = node->child[0];
@@ -407,10 +409,9 @@ static long trie_update_elem(struct bpf_map *map,
goto out; goto out;
} }
if (flags == BPF_EXIST) { ret = trie_check_add_elem(trie, flags);
ret = -ENOENT; if (ret)
goto out; goto out;
}
/* If the new node matches the prefix completely, it must be inserted /* If the new node matches the prefix completely, it must be inserted
* as an ancestor. Simply insert it between @node and *@slot. * as an ancestor. Simply insert it between @node and *@slot.
@@ -424,6 +425,7 @@ static long trie_update_elem(struct bpf_map *map,
im_node = lpm_trie_node_alloc(trie, NULL); im_node = lpm_trie_node_alloc(trie, NULL);
if (!im_node) { if (!im_node) {
trie->n_entries--;
ret = -ENOMEM; ret = -ENOMEM;
goto out; goto out;
} }
@@ -445,12 +447,8 @@ static long trie_update_elem(struct bpf_map *map,
rcu_assign_pointer(*slot, im_node); rcu_assign_pointer(*slot, im_node);
out: out:
if (ret) { if (ret)
if (new_node)
trie->n_entries--;
kfree(new_node); kfree(new_node);
}
spin_unlock_irqrestore(&trie->lock, irq_flags); spin_unlock_irqrestore(&trie->lock, irq_flags);
kfree_rcu(free_node, rcu); kfree_rcu(free_node, rcu);