[PATCH 6/8] sched_ext: Require an arena for cid-form schedulers
Emil Tsalapatis
emil at etsalapatis.com
Wed May 20 21:15:26 PDT 2026
On Wed May 20, 2026 at 7:50 PM EDT, Tejun Heo wrote:
> Upcoming patches will let the kernel place arena-resident scratch shared
> with the BPF program (e.g. per-CPU set_cmask cmask) so the BPF side can
> dereference it directly via __arena pointers, replacing the current
> cmask_copy_from_kernel() probe-read loop. That requires each cid-form
> scheduler to expose its arena to the kernel. Kernel- side accesses are
> recovered by the per-arena scratch-page mechanism.
>
> bpf_scx_reg_cid() walks the struct_ops member progs via
> bpf_struct_ops_for_each_prog() and reads each prog's arena via
> bpf_prog_arena(). The verifier enforces one arena per program, so each
> member prog contributes at most one arena. All non-NULL contributions must
> match and at least one member prog must use an arena. The map ref is held on
> scx_sched and dropped on sched destroy. cpu-form schedulers (bpf_scx_reg)
> are unchanged - no arena requirement.
>
> Signed-off-by: Tejun Heo <tj at kernel.org>
> ---
> kernel/sched/ext.c | 56 ++++++++++++++++++++++++++++++++++++-
> kernel/sched/ext_internal.h | 8 ++++++
> 2 files changed, 63 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> index 9c458552d14f..56f94ac32ba0 100644
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -5003,6 +5003,8 @@ static void scx_sched_free_rcu_work(struct work_struct *work)
>
> rhashtable_free_and_destroy(&sch->dsq_hash, NULL, NULL);
> free_exit_info(sch->exit_info);
> + if (sch->arena_map)
> + bpf_map_put(sch->arena_map);
> kfree(sch);
> }
>
> @@ -6746,6 +6748,7 @@ struct scx_enable_cmd {
> struct sched_ext_ops_cid *ops_cid;
> };
> bool is_cid_type;
> + struct bpf_map *arena_map; /* arena ref to transfer to sch */
> int ret;
> };
>
> @@ -6913,6 +6916,15 @@ static struct scx_sched *scx_alloc_and_add_sched(struct scx_enable_cmd *cmd,
> return ERR_PTR(ret);
> }
> #endif /* CONFIG_EXT_SUB_SCHED */
> +
> + /*
> + * Consume the arena_map ref bpf_scx_reg_cid() took. Defer to here so
> + * earlier failure paths leave cmd->arena_map set and bpf_scx_reg_cid
> + * drops the ref. After this point, sch owns the ref and any cleanup
> + * runs through scx_sched_free_rcu_work() which puts it.
> + */
> + sch->arena_map = cmd->arena_map;
> + cmd->arena_map = NULL;
> return sch;
>
> #ifdef CONFIG_EXT_SUB_SCHED
> @@ -7898,11 +7910,53 @@ static int bpf_scx_reg(void *kdata, struct bpf_link *link)
> return scx_enable(&cmd, link);
> }
>
> +struct scx_arena_scan {
> + struct bpf_map *arena;
> + int err;
Can we skip the int err here...
> +};
> +
> +/*
> + * The verifier enforces one arena per BPF program, so each struct_ops
> + * member prog contributes at most one arena via bpf_prog_arena().
> + * Require all non-NULL contributions to match.
> + */
> +static int scx_arena_scan_prog(struct bpf_prog *prog, void *data)
> +{
> + struct scx_arena_scan *s = data;
> + struct bpf_map *arena = bpf_prog_arena(prog);
> +
> + if (!arena)
> + return 0;
> + if (s->arena && s->arena != arena) {
> + s->err = -EINVAL;
...and just directly return -EINVAL here? bpf_struct_ops_for_each_prog
breaks when we return non-zero so do we need the extra scx_arena_scan
struct?
> + return 1;
> + }
> + s->arena = arena;
> + return 0;
> +}
> +
> static int bpf_scx_reg_cid(void *kdata, struct bpf_link *link)
> {
> struct scx_enable_cmd cmd = { .ops_cid = kdata, .is_cid_type = true };
> + struct scx_arena_scan scan = {};
> + int ret;
>
> - return scx_enable(&cmd, link);
> + bpf_struct_ops_for_each_prog(kdata, scx_arena_scan_prog, &scan);
> + if (scan.err) {
> + pr_err("sched_ext: cid-form scheduler uses multiple arena maps\n");
> + return scan.err;
> + }
> + if (!scan.arena) {
> + pr_err("sched_ext: cid-form scheduler must use a BPF arena map\n");
> + return -EINVAL;
> + }
> +
> + bpf_map_inc(scan.arena);
> + cmd.arena_map = scan.arena;
> + ret = scx_enable(&cmd, link);
> + if (cmd.arena_map) /* not consumed by scx_alloc_and_add_sched() */
> + bpf_map_put(cmd.arena_map);
> + return ret;
> }
>
> static void bpf_scx_unreg(void *kdata, struct bpf_link *link)
> diff --git a/kernel/sched/ext_internal.h b/kernel/sched/ext_internal.h
> index 7258aea94b9f..d40cfd29ddaa 100644
> --- a/kernel/sched/ext_internal.h
> +++ b/kernel/sched/ext_internal.h
> @@ -1111,6 +1111,14 @@ struct scx_sched {
> struct sched_ext_ops_cid ops_cid;
> };
> bool is_cid_type; /* true if registered via bpf_sched_ext_ops_cid */
> +
> + /*
> + * Arena map auto-discovered from member progs at struct_ops attach.
> + * cid-form schedulers must use exactly one arena across all member
> + * progs. NULL on cpu-form.
> + */
> + struct bpf_map *arena_map;
> +
> DECLARE_BITMAP(has_op, SCX_OPI_END);
>
> /*
More information about the linux-arm-kernel
mailing list