[PATCH bpf-next 4/5] selftests/bpf: Add benchmark for bpf_csum_diff() helper

Puranjay Mohan puranjay at kernel.org
Mon Oct 21 05:21:11 PDT 2024


Add a microbenchmark for bpf_csum_diff() helper. This benchmark works by
filling a 4KB buffer with random data and calculating the internet
checksum on different parts of this buffer using bpf_csum_diff().

Example run using ./benchs/run_bench_csum_diff.sh on x86_64:

[bpf]$ ./benchs/run_bench_csum_diff.sh
4                    2.296 ± 0.066M/s (drops 0.000 ± 0.000M/s)
8                    2.320 ± 0.003M/s (drops 0.000 ± 0.000M/s)
16                   2.315 ± 0.001M/s (drops 0.000 ± 0.000M/s)
20                   2.318 ± 0.001M/s (drops 0.000 ± 0.000M/s)
32                   2.308 ± 0.003M/s (drops 0.000 ± 0.000M/s)
40                   2.300 ± 0.029M/s (drops 0.000 ± 0.000M/s)
64                   2.286 ± 0.001M/s (drops 0.000 ± 0.000M/s)
128                  2.250 ± 0.001M/s (drops 0.000 ± 0.000M/s)
256                  2.173 ± 0.001M/s (drops 0.000 ± 0.000M/s)
512                  2.023 ± 0.055M/s (drops 0.000 ± 0.000M/s)

Signed-off-by: Puranjay Mohan <puranjay at kernel.org>
---
 tools/testing/selftests/bpf/Makefile          |   2 +
 tools/testing/selftests/bpf/bench.c           |   4 +
 .../selftests/bpf/benchs/bench_csum_diff.c    | 164 ++++++++++++++++++
 .../bpf/benchs/run_bench_csum_diff.sh         |  10 ++
 .../selftests/bpf/progs/csum_diff_bench.c     |  25 +++
 5 files changed, 205 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/benchs/bench_csum_diff.c
 create mode 100755 tools/testing/selftests/bpf/benchs/run_bench_csum_diff.sh
 create mode 100644 tools/testing/selftests/bpf/progs/csum_diff_bench.c

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 28a76baa854d3..a0d86dd453e16 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -809,6 +809,7 @@ $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \
 $(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h
 $(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bpf_loop_bench.skel.h
 $(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h
+$(OUTPUT)/bench_csum_diff.o: $(OUTPUT)/csum_diff_bench.skel.h
 $(OUTPUT)/bench_bpf_hashmap_full_update.o: $(OUTPUT)/bpf_hashmap_full_update_bench.skel.h
 $(OUTPUT)/bench_local_storage.o: $(OUTPUT)/local_storage_bench.skel.h
 $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o: $(OUTPUT)/local_storage_rcu_tasks_trace_bench.skel.h
@@ -829,6 +830,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
 		 $(OUTPUT)/bench_bloom_filter_map.o \
 		 $(OUTPUT)/bench_bpf_loop.o \
 		 $(OUTPUT)/bench_strncmp.o \
+		 $(OUTPUT)/bench_csum_diff.o \
 		 $(OUTPUT)/bench_bpf_hashmap_full_update.o \
 		 $(OUTPUT)/bench_local_storage.o \
 		 $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o \
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 1bd403a5ef7b3..29bd6f4498ebc 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -278,6 +278,7 @@ extern struct argp bench_bpf_loop_argp;
 extern struct argp bench_local_storage_argp;
 extern struct argp bench_local_storage_rcu_tasks_trace_argp;
 extern struct argp bench_strncmp_argp;
+extern struct argp bench_csum_diff_argp;
 extern struct argp bench_hashmap_lookup_argp;
 extern struct argp bench_local_storage_create_argp;
 extern struct argp bench_htab_mem_argp;
@@ -290,6 +291,7 @@ static const struct argp_child bench_parsers[] = {
 	{ &bench_bpf_loop_argp, 0, "bpf_loop helper benchmark", 0 },
 	{ &bench_local_storage_argp, 0, "local_storage benchmark", 0 },
 	{ &bench_strncmp_argp, 0, "bpf_strncmp helper benchmark", 0 },
+	{ &bench_csum_diff_argp, 0, "bpf_csum_diff helper benchmark", 0 },
 	{ &bench_local_storage_rcu_tasks_trace_argp, 0,
 		"local_storage RCU Tasks Trace slowdown benchmark", 0 },
 	{ &bench_hashmap_lookup_argp, 0, "Hashmap lookup benchmark", 0 },
@@ -539,6 +541,7 @@ extern const struct bench bench_hashmap_with_bloom;
 extern const struct bench bench_bpf_loop;
 extern const struct bench bench_strncmp_no_helper;
 extern const struct bench bench_strncmp_helper;
+extern const struct bench bench_csum_diff;
 extern const struct bench bench_bpf_hashmap_full_update;
 extern const struct bench bench_local_storage_cache_seq_get;
 extern const struct bench bench_local_storage_cache_interleaved_get;
@@ -599,6 +602,7 @@ static const struct bench *benchs[] = {
 	&bench_bpf_loop,
 	&bench_strncmp_no_helper,
 	&bench_strncmp_helper,
+	&bench_csum_diff,
 	&bench_bpf_hashmap_full_update,
 	&bench_local_storage_cache_seq_get,
 	&bench_local_storage_cache_interleaved_get,
diff --git a/tools/testing/selftests/bpf/benchs/bench_csum_diff.c b/tools/testing/selftests/bpf/benchs/bench_csum_diff.c
new file mode 100644
index 0000000000000..2c30c8b54d9bc
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_csum_diff.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates */
+#include <argp.h>
+#include "bench.h"
+#include "csum_diff_bench.skel.h"
+
+static struct csum_diff_ctx {
+	struct csum_diff_bench *skel;
+	int pfd;
+} ctx;
+
+static struct csum_diff_args {
+	u32 buff_len;
+} args = {
+	.buff_len = 32,
+};
+
+enum {
+	ARG_BUFF_LEN = 5000,
+};
+
+static const struct argp_option opts[] = {
+	{ "buff-len", ARG_BUFF_LEN, "BUFF_LEN", 0,
+	  "Set the length of the buffer" },
+	{},
+};
+
+static error_t csum_diff_parse_arg(int key, char *arg, struct argp_state *state)
+{
+	switch (key) {
+	case ARG_BUFF_LEN:
+		args.buff_len = strtoul(arg, NULL, 10);
+		if (!args.buff_len ||
+		    args.buff_len >= sizeof(ctx.skel->rodata->buff)) {
+			fprintf(stderr, "Invalid buff len (limit %zu)\n",
+				sizeof(ctx.skel->rodata->buff));
+			argp_usage(state);
+		}
+		break;
+	default:
+		return ARGP_ERR_UNKNOWN;
+	}
+
+	return 0;
+}
+
+const struct argp bench_csum_diff_argp = {
+	.options = opts,
+	.parser = csum_diff_parse_arg,
+};
+
+static void csum_diff_validate(void)
+{
+	if (env.consumer_cnt != 0) {
+		fprintf(stderr, "csum_diff benchmark doesn't support consumer!\n");
+		exit(1);
+	}
+}
+
+static void csum_diff_setup(void)
+{
+	int err;
+	char *buff;
+	size_t i, sz;
+
+	sz = sizeof(ctx.skel->rodata->buff);
+
+	setup_libbpf();
+
+	ctx.skel = csum_diff_bench__open();
+	if (!ctx.skel) {
+		fprintf(stderr, "failed to open skeleton\n");
+		exit(1);
+	}
+
+	srandom(time(NULL));
+	buff = ctx.skel->rodata->buff;
+
+	/*
+	 * Set first 8 bytes of buffer to 0xdeadbeefdeadbeef, this is later used to verify the
+	 * correctness of the helper by comparing the checksum result for 0xdeadbeefdeadbeef that
+	 * should be 0x3b3b
+	 */
+
+	*(u64 *)buff = 0xdeadbeefdeadbeef;
+
+	for (i = 8; i < sz; i++)
+		buff[i] = '1' + random() % 9;
+
+	ctx.skel->rodata->buff_len = args.buff_len;
+
+	err = csum_diff_bench__load(ctx.skel);
+	if (err) {
+		fprintf(stderr, "failed to load skeleton\n");
+		csum_diff_bench__destroy(ctx.skel);
+		exit(1);
+	}
+}
+
+static void csum_diff_helper_setup(void)
+{
+	u8 tmp_out[64 << 2] = {};
+	u8 tmp_in[64] = {};
+	int err, saved_errno;
+
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = tmp_in,
+		.data_size_in = sizeof(tmp_in),
+		.data_out = tmp_out,
+		.data_size_out = sizeof(tmp_out),
+		.repeat = 1,
+	);
+	csum_diff_setup();
+	ctx.pfd = bpf_program__fd(ctx.skel->progs.compute_checksum);
+
+	err = bpf_prog_test_run_opts(ctx.pfd, &topts);
+	saved_errno = errno;
+
+	if (err) {
+		fprintf(stderr, "failed to run setup prog: err %d, result %d, serror %d\n",
+			err, ctx.skel->bss->result, saved_errno);
+		csum_diff_bench__destroy(ctx.skel);
+		exit(1);
+	}
+
+	/* Sanity check for correctness of helper */
+	if (args.buff_len == 8 && ctx.skel->bss->result != 0x3b3b) {
+		fprintf(stderr, "csum_diff helper broken: buff: %lx, result: %x, expected: %x\n",
+			*(u64 *)ctx.skel->rodata->buff, ctx.skel->bss->result, 0x3b3b);
+	}
+}
+
+static void *csum_diff_producer(void *unused)
+{
+	u8 tmp_out[64 << 2] = {};
+	u8 tmp_in[64] = {};
+
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = tmp_in,
+		.data_size_in = sizeof(tmp_in),
+		.data_out = tmp_out,
+		.data_size_out = sizeof(tmp_out),
+		.repeat = 64,
+	);
+	while (true)
+		(void)bpf_prog_test_run_opts(ctx.pfd, &topts);
+	return NULL;
+}
+
+static void csum_diff_measure(struct bench_res *res)
+{
+	res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
+}
+
+const struct bench bench_csum_diff = {
+	.name = "csum-diff-helper",
+	.argp = &bench_csum_diff_argp,
+	.validate = csum_diff_validate,
+	.setup = csum_diff_helper_setup,
+	.producer_thread = csum_diff_producer,
+	.measure = csum_diff_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_csum_diff.sh b/tools/testing/selftests/bpf/benchs/run_bench_csum_diff.sh
new file mode 100755
index 0000000000000..c4e147fbf2f98
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_csum_diff.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./benchs/run_common.sh
+
+set -eufo pipefail
+
+for s in 4 8 16 20 32 40 64 128 256 512; do
+	summarize ${s} "$($RUN_BENCH --buff-len=$s csum-diff-helper)"
+done
diff --git a/tools/testing/selftests/bpf/progs/csum_diff_bench.c b/tools/testing/selftests/bpf/progs/csum_diff_bench.c
new file mode 100644
index 0000000000000..85245edd6f9dc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/csum_diff_bench.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates */
+#include <linux/types.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define BUFF_SZ 4096
+
+/* Will be updated by benchmark before program loading */
+const char buff[BUFF_SZ];
+const volatile unsigned int buff_len = 4;
+
+long hits = 0;
+short result;
+
+char _license[] SEC("license") = "GPL";
+
+SEC("tc")
+int compute_checksum(void *ctx)
+{
+	result = bpf_csum_diff(0, 0, (void *)buff, buff_len, 0);
+	__sync_add_and_fetch(&hits, 1);
+	return 0;
+}
-- 
2.40.1




More information about the linux-riscv mailing list