mirror of
https://github.com/raspberrypi/linux.git
synced 2025-12-21 09:12:05 +00:00
Add sched_ext_ops operations to init/exit cgroups, and track task migrations
and config changes. A BPF scheduler may not implement or implement only
subset of cgroup features. The implemented features can be indicated using
%SCX_OPS_HAS_CGOUP_* flags. If cgroup configuration makes use of features
that are not implemented, a warning is triggered.
While a BPF scheduler is being enabled and disabled, relevant cgroup
operations are locked out using scx_cgroup_rwsem. This avoids situations
like task prep taking place while the task is being moved across cgroups,
making things easier for BPF schedulers.
v7: - cgroup interface file visibility toggling is dropped in favor just
warning messages. Dynamically changing interface visiblity caused more
confusion than helping.
v6: - Updated to reflect the removal of SCX_KF_SLEEPABLE.
- Updated to use CONFIG_GROUP_SCHED_WEIGHT and fixes for
!CONFIG_FAIR_GROUP_SCHED && CONFIG_EXT_GROUP_SCHED.
v5: - Flipped the locking order between scx_cgroup_rwsem and
cpus_read_lock() to avoid locking order conflict w/ cpuset. Better
documentation around locking.
- sched_move_task() takes an early exit if the source and destination
are identical. This triggered the warning in scx_cgroup_can_attach()
as it left p->scx.cgrp_moving_from uncleared. Updated the cgroup
migration path so that ops.cgroup_prep_move() is skipped for identity
migrations so that its invocations always match ops.cgroup_move()
one-to-one.
v4: - Example schedulers moved into their own patches.
- Fix build failure when !CONFIG_CGROUP_SCHED, reported by Andrea Righi.
v3: - Make scx_example_pair switch all tasks by default.
- Convert to BPF inline iterators.
- scx_bpf_task_cgroup() is added to determine the current cgroup from
CPU controller's POV. This allows BPF schedulers to accurately track
CPU cgroup membership.
- scx_example_flatcg added. This demonstrates flattened hierarchy
implementation of CPU cgroup control and shows significant performance
improvement when cgroups which are nested multiple levels are under
competition.
v2: - Build fixes for different CONFIG combinations.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: David Vernet <dvernet@meta.com>
Acked-by: Josh Don <joshdon@google.com>
Acked-by: Hao Luo <haoluo@google.com>
Acked-by: Barret Rhoden <brho@google.com>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Andrea Righi <andrea.righi@canonical.com>
165 lines
4.0 KiB
C
165 lines
4.0 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* A scheduler with every callback defined.
|
|
*
|
|
* This scheduler defines every callback.
|
|
*
|
|
* Copyright (c) 2024 Meta Platforms, Inc. and affiliates.
|
|
* Copyright (c) 2024 David Vernet <dvernet@meta.com>
|
|
*/
|
|
|
|
#include <scx/common.bpf.h>
|
|
|
|
char _license[] SEC("license") = "GPL";
|
|
|
|
s32 BPF_STRUCT_OPS(maximal_select_cpu, struct task_struct *p, s32 prev_cpu,
|
|
u64 wake_flags)
|
|
{
|
|
return prev_cpu;
|
|
}
|
|
|
|
void BPF_STRUCT_OPS(maximal_enqueue, struct task_struct *p, u64 enq_flags)
|
|
{
|
|
scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags);
|
|
}
|
|
|
|
void BPF_STRUCT_OPS(maximal_dequeue, struct task_struct *p, u64 deq_flags)
|
|
{}
|
|
|
|
void BPF_STRUCT_OPS(maximal_dispatch, s32 cpu, struct task_struct *prev)
|
|
{
|
|
scx_bpf_consume(SCX_DSQ_GLOBAL);
|
|
}
|
|
|
|
void BPF_STRUCT_OPS(maximal_runnable, struct task_struct *p, u64 enq_flags)
|
|
{}
|
|
|
|
void BPF_STRUCT_OPS(maximal_running, struct task_struct *p)
|
|
{}
|
|
|
|
void BPF_STRUCT_OPS(maximal_stopping, struct task_struct *p, bool runnable)
|
|
{}
|
|
|
|
void BPF_STRUCT_OPS(maximal_quiescent, struct task_struct *p, u64 deq_flags)
|
|
{}
|
|
|
|
bool BPF_STRUCT_OPS(maximal_yield, struct task_struct *from,
|
|
struct task_struct *to)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
bool BPF_STRUCT_OPS(maximal_core_sched_before, struct task_struct *a,
|
|
struct task_struct *b)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
void BPF_STRUCT_OPS(maximal_set_weight, struct task_struct *p, u32 weight)
|
|
{}
|
|
|
|
void BPF_STRUCT_OPS(maximal_set_cpumask, struct task_struct *p,
|
|
const struct cpumask *cpumask)
|
|
{}
|
|
|
|
void BPF_STRUCT_OPS(maximal_update_idle, s32 cpu, bool idle)
|
|
{}
|
|
|
|
void BPF_STRUCT_OPS(maximal_cpu_acquire, s32 cpu,
|
|
struct scx_cpu_acquire_args *args)
|
|
{}
|
|
|
|
void BPF_STRUCT_OPS(maximal_cpu_release, s32 cpu,
|
|
struct scx_cpu_release_args *args)
|
|
{}
|
|
|
|
void BPF_STRUCT_OPS(maximal_cpu_online, s32 cpu)
|
|
{}
|
|
|
|
void BPF_STRUCT_OPS(maximal_cpu_offline, s32 cpu)
|
|
{}
|
|
|
|
s32 BPF_STRUCT_OPS(maximal_init_task, struct task_struct *p,
|
|
struct scx_init_task_args *args)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
void BPF_STRUCT_OPS(maximal_enable, struct task_struct *p)
|
|
{}
|
|
|
|
void BPF_STRUCT_OPS(maximal_exit_task, struct task_struct *p,
|
|
struct scx_exit_task_args *args)
|
|
{}
|
|
|
|
void BPF_STRUCT_OPS(maximal_disable, struct task_struct *p)
|
|
{}
|
|
|
|
s32 BPF_STRUCT_OPS(maximal_cgroup_init, struct cgroup *cgrp,
|
|
struct scx_cgroup_init_args *args)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
void BPF_STRUCT_OPS(maximal_cgroup_exit, struct cgroup *cgrp)
|
|
{}
|
|
|
|
s32 BPF_STRUCT_OPS(maximal_cgroup_prep_move, struct task_struct *p,
|
|
struct cgroup *from, struct cgroup *to)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
void BPF_STRUCT_OPS(maximal_cgroup_move, struct task_struct *p,
|
|
struct cgroup *from, struct cgroup *to)
|
|
{}
|
|
|
|
void BPF_STRUCT_OPS(maximal_cgroup_cancel_move, struct task_struct *p,
|
|
struct cgroup *from, struct cgroup *to)
|
|
{}
|
|
|
|
void BPF_STRUCT_OPS(maximal_cgroup_set_weight, struct cgroup *cgrp, u32 weight)
|
|
{}
|
|
|
|
s32 BPF_STRUCT_OPS_SLEEPABLE(maximal_init)
|
|
{
|
|
return 0;
|
|
}
|
|
|
|
void BPF_STRUCT_OPS(maximal_exit, struct scx_exit_info *info)
|
|
{}
|
|
|
|
SEC(".struct_ops.link")
|
|
struct sched_ext_ops maximal_ops = {
|
|
.select_cpu = maximal_select_cpu,
|
|
.enqueue = maximal_enqueue,
|
|
.dequeue = maximal_dequeue,
|
|
.dispatch = maximal_dispatch,
|
|
.runnable = maximal_runnable,
|
|
.running = maximal_running,
|
|
.stopping = maximal_stopping,
|
|
.quiescent = maximal_quiescent,
|
|
.yield = maximal_yield,
|
|
.core_sched_before = maximal_core_sched_before,
|
|
.set_weight = maximal_set_weight,
|
|
.set_cpumask = maximal_set_cpumask,
|
|
.update_idle = maximal_update_idle,
|
|
.cpu_acquire = maximal_cpu_acquire,
|
|
.cpu_release = maximal_cpu_release,
|
|
.cpu_online = maximal_cpu_online,
|
|
.cpu_offline = maximal_cpu_offline,
|
|
.init_task = maximal_init_task,
|
|
.enable = maximal_enable,
|
|
.exit_task = maximal_exit_task,
|
|
.disable = maximal_disable,
|
|
.cgroup_init = maximal_cgroup_init,
|
|
.cgroup_exit = maximal_cgroup_exit,
|
|
.cgroup_prep_move = maximal_cgroup_prep_move,
|
|
.cgroup_move = maximal_cgroup_move,
|
|
.cgroup_cancel_move = maximal_cgroup_cancel_move,
|
|
.cgroup_set_weight = maximal_cgroup_set_weight,
|
|
.init = maximal_init,
|
|
.exit = maximal_exit,
|
|
.name = "maximal",
|
|
};
|