[openwrt/openwrt] netifd: rewrite packet steering script

LEDE Commits lede-commits at lists.infradead.org
Sun Apr 14 07:48:28 PDT 2024


nbd pushed a commit to openwrt/openwrt.git, branch main:
https://git.openwrt.org/a205a5734eda4604a19b6cb0f65909ef69730699

commit a205a5734eda4604a19b6cb0f65909ef69730699
Author: Felix Fietkau <nbd at nbd.name>
AuthorDate: Fri Apr 12 21:51:50 2024 +0200

    netifd: rewrite packet steering script
    
    The new script uses a different strategy compared to the previous one.
    Instead of trying to split flows by hash and spread them to all CPUs,
    use RPS to redirect packets to a single core only.
    Try to spread NAPI thread and RPS target CPUs across available CPUs
    and try to ensure that the NAPI thread is on a different CPU than the
    RPS target. This significantly reduces cycles wasted on the scheduler.
    
    Signed-off-by: Felix Fietkau <nbd at nbd.name>
---
 package/network/config/netifd/Makefile             |   2 +-
 .../config/netifd/files/etc/init.d/packet_steering |   7 +-
 .../files/usr/libexec/network/packet-steering.sh   |  70 --------
 .../files/usr/libexec/network/packet-steering.uc   | 200 +++++++++++++++++++++
 4 files changed, 207 insertions(+), 72 deletions(-)

diff --git a/package/network/config/netifd/Makefile b/package/network/config/netifd/Makefile
index 35b5c0b277..d80c2eeed6 100644
--- a/package/network/config/netifd/Makefile
+++ b/package/network/config/netifd/Makefile
@@ -21,7 +21,7 @@ include $(INCLUDE_DIR)/cmake.mk
 define Package/netifd
   SECTION:=base
   CATEGORY:=Base system
-  DEPENDS:=+libuci +libnl-tiny +libubus +ubus +ubusd +jshn +libubox +libudebug
+  DEPENDS:=+libuci +libnl-tiny +libubus +ubus +ubusd +jshn +libubox +libudebug +ucode +ucode-mod-fs
   TITLE:=OpenWrt Network Interface Configuration Daemon
 endef
 
diff --git a/package/network/config/netifd/files/etc/init.d/packet_steering b/package/network/config/netifd/files/etc/init.d/packet_steering
index 9d8f791e23..d6f6afc2e1 100755
--- a/package/network/config/netifd/files/etc/init.d/packet_steering
+++ b/package/network/config/netifd/files/etc/init.d/packet_steering
@@ -14,5 +14,10 @@ service_triggers() {
 }
 
 reload_service() {
-	/usr/libexec/network/packet-steering.sh
+	packet_steering="$(uci get "network. at globals[0].packet_steering")"
+	if [ -e "/usr/libexec/platform/packet-steering.sh" ]; then
+		/usr/libexec/platform/packet-steering.sh "$packet_steering"
+	else
+		/usr/libexec/network/packet-steering.uc "$packet_steering"
+	fi
 }
diff --git a/package/network/config/netifd/files/usr/libexec/network/packet-steering.sh b/package/network/config/netifd/files/usr/libexec/network/packet-steering.sh
deleted file mode 100755
index 799c080805..0000000000
--- a/package/network/config/netifd/files/usr/libexec/network/packet-steering.sh
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/bin/sh
-NPROCS="$(grep -c "^processor.*:" /proc/cpuinfo)"
-[ "$NPROCS" -gt 1 ] || exit
-
-PROC_MASK="$(( (1 << $NPROCS) - 1 ))"
-
-find_irq_cpu() {
-	local dev="$1"
-	local match="$(grep -m 1 "$dev\$" /proc/interrupts)"
-	local cpu=0
-
-	[ -n "$match" ] && {
-		set -- $match
-		shift
-		for cur in $(seq 1 $NPROCS); do
-			[ "$1" -gt 0 ] && {
-				cpu=$(($cur - 1))
-				break
-			}
-			shift
-		done
-	}
-
-	echo "$cpu"
-}
-
-set_hex_val() {
-	local file="$1"
-	local val="$2"
-	val="$(printf %x "$val")"
-	[ -n "$DEBUG" ] && echo "$file = $val"
-	echo "$val" > "$file"
-}
-
-packet_steering="$(uci get "network. at globals[0].packet_steering")"
-[ "$packet_steering" != 1 ] && exit 0
-
-exec 512>/var/lock/smp_tune.lock
-flock 512 || exit 1
-
-[ -e "/usr/libexec/platform/packet-steering.sh" ] && {
-	/usr/libexec/platform/packet-steering.sh
-	exit 0
-}
-
-for dev in /sys/class/net/*; do
-	[ -d "$dev" ] || continue
-
-	# ignore virtual interfaces
-	[ -n "$(ls "${dev}/" | grep '^lower_')" ] && continue
-	[ -d "${dev}/device" ] || continue
-
-	device="$(readlink "${dev}/device")"
-	device="$(basename "$device")"
-	irq_cpu="$(find_irq_cpu "$device")"
-	irq_cpu_mask="$((1 << $irq_cpu))"
-
-	for q in ${dev}/queues/tx-*; do
-		set_hex_val "$q/xps_cpus" "$PROC_MASK"
-	done
-
-	# ignore dsa slave ports for RPS
-	subsys="$(readlink "${dev}/device/subsystem")"
-	subsys="$(basename "$subsys")"
-	[ "$subsys" = "mdio_bus" ] && continue
-
-	for q in ${dev}/queues/rx-*; do
-		set_hex_val "$q/rps_cpus" "$PROC_MASK"
-	done
-done
diff --git a/package/network/config/netifd/files/usr/libexec/network/packet-steering.uc b/package/network/config/netifd/files/usr/libexec/network/packet-steering.uc
new file mode 100755
index 0000000000..f146a96e41
--- /dev/null
+++ b/package/network/config/netifd/files/usr/libexec/network/packet-steering.uc
@@ -0,0 +1,200 @@
+#!/usr/bin/env ucode
+'use strict';
+import { glob, basename, dirname, readlink, readfile, realpath, writefile, error, open } from "fs";
+
+let napi_weight = 1.0;
+let cpu_thread_weight = 0.75;
+let rx_weight = 0.75;
+let eth_bias = 2.0;
+let debug = 0, do_nothing = 0;
+let disable;
+let cpus;
+
+for (let arg in ARGV) {
+	switch (arg) {
+	case "-d":
+		debug++;
+		break;
+	case "-n":
+		do_nothing++;
+		break;
+	case '0':
+		disable = true;
+		break;
+	}
+}
+
+function task_name(pid)
+{
+	let stat = open(`/proc/${pid}/status`, "r");
+	let line = stat.read("line");
+	stat.close();
+	return trim(split(line, "\t", 2)[1]);
+}
+
+function set_task_cpu(pid, cpu) {
+	if (disable)
+		cpu = join(",", map(cpus, (cpu) => cpu.id));
+	if (debug || do_nothing)
+		warn(`taskset -p -c ${cpu} ${task_name(pid)}\n`);
+	if (!do_nothing)
+		system(`taskset -p -c ${cpu} ${pid}`);
+}
+
+function set_netdev_cpu(dev, cpu) {
+	let queues = glob(`/sys/class/net/${dev}/queues/rx-*/rps_cpus`);
+	let val = sprintf("%x", (1 << int(cpu)));
+	if (disable)
+		val = 0;
+	for (let queue in queues) {
+		if (debug || do_nothing)
+			warn(`echo ${val} > ${queue}\n`);
+		if (!do_nothing)
+			writefile(queue, `${val}`);
+	}
+}
+
+function task_device_match(name, device)
+{
+	let napi_match = match(name, /napi\/([^-+])-\d+/);
+	if (!napi_match)
+		napi_match = match(name, /mt76-tx (phy\d+)/);
+	if (napi_match &&
+	    (index(device.phy, napi_match[1]) >= 0 ||
+	     index(device.netdev, napi_match[1]) >= 0))
+		return true;
+
+	if (device.driver == "mtk_soc_eth" && match(name, /napi\/mtk_eth-/))
+		return true;
+
+	return false;
+}
+
+cpus = map(glob("/sys/bus/cpu/devices/*"), (path) => {
+	return {
+		id: int(match(path, /.*cpu(\d+)/)[1]),
+		core: int(trim(readfile(`${path}/topology/core_id`))),
+		load: 0.0,
+	};
+});
+
+cpus = slice(cpus, 0, 64);
+if (length(cpus) < 2)
+	exit(0);
+
+function cpu_add_weight(cpu_id, weight)
+{
+	let cpu = cpus[cpu_id];
+	cpu.load += weight;
+	for (let sibling in cpus) {
+		if (sibling == cpu || sibling.core != cpu.core)
+			continue;
+		sibling.load += weight * cpu_thread_weight;
+	}
+}
+
+function get_next_cpu(weight, prev_cpu)
+{
+	if (disable)
+		return 0;
+
+	let sort_cpus = sort(slice(cpus), (a, b) => a.load - b.load);
+	let idx = 0;
+
+	if (prev_cpu != null && sort_cpus[idx].id == prev_cpu)
+		idx++;
+
+	let cpu = sort_cpus[idx].id;
+	cpu_add_weight(cpu, weight);
+	return cpu;
+}
+
+let phys_devs = {};
+let netdev_phys = {};
+let netdevs = map(glob("/sys/class/net/*"), (dev) => basename(dev));
+
+for (let dev in netdevs) {
+	let pdev_path = realpath(`/sys/class/net/${dev}/device`);
+	if (!pdev_path)
+		continue;
+
+	if (length(glob(`/sys/class/net/${dev}/lower_*`)) > 0)
+		continue;
+
+	let pdev = phys_devs[pdev_path];
+	if (!pdev) {
+		pdev = phys_devs[pdev_path] = {
+			path: pdev_path,
+			driver: basename(readlink(`${pdev_path}/driver`)),
+			netdev: [],
+			phy: [],
+			tasks: [],
+		};
+	}
+
+	let phyidx = trim(readfile(`/sys/class/net/${dev}/phy80211/index`));
+	if (phyidx != null) {
+		let phy = `phy${phyidx}`;
+		if (index(pdev.phy, phy) < 0)
+			push(pdev.phy, phy);
+	}
+
+	push(pdev.netdev, dev);
+	netdev_phys[dev] = pdev;
+}
+
+for (let path in glob("/proc/*/exe")) {
+	readlink(path);
+	if (error() != "No such file or directory")
+		continue;
+
+	let pid = basename(dirname(path));
+	let name = task_name(pid);
+	for (let devname in phys_devs) {
+		let dev = phys_devs[devname];
+		if (!task_device_match(name, dev))
+			continue;
+
+		push(dev.tasks, pid);
+		break;
+	}
+}
+
+function assign_dev_cpu(dev) {
+	if (length(dev.tasks) > 0) {
+		let cpu = dev.napi_cpu = get_next_cpu(napi_weight);
+		for (let task in dev.tasks)
+			set_task_cpu(task, cpu);
+	}
+
+	if (length(dev.netdev) > 0) {
+		let cpu = dev.rx_cpu = get_next_cpu(rx_weight, dev.napi_cpu);
+		for (let netdev in dev.netdev)
+			set_netdev_cpu(netdev, cpu);
+	}
+}
+
+// Assign ethernet devices first
+for (let devname in phys_devs) {
+	let dev = phys_devs[devname];
+	if (!length(dev.phy))
+		assign_dev_cpu(dev);
+}
+
+// Add bias to avoid assigning other tasks to CPUs with ethernet NAPI
+for (let devname in phys_devs) {
+	let dev = phys_devs[devname];
+	if (!length(dev.tasks) || dev.napi_cpu == null)
+		continue;
+	cpu_add_weight(dev.napi_cpu, eth_bias);
+}
+
+// Assign WLAN devices
+for (let devname in phys_devs) {
+	let dev = phys_devs[devname];
+	if (length(dev.phy) > 0)
+		assign_dev_cpu(dev);
+}
+
+if (debug > 1)
+	warn(sprintf("devices: %.J\ncpus: %.J\n", phys_devs, cpus));




More information about the lede-commits mailing list