[PATCH 1/1] xdp: Sample xdp program implementing ip forward
Daniel Borkmann
daniel at iogearbox.net
Tue Oct 3 08:54:10 PDT 2017
On 10/03/2017 09:37 AM, cjacob wrote:
> Implements port to port forwarding with route table and arp table
> lookup for ipv4 packets using bpf_redirect helper function and
> lpm_trie map.
>
> Signed-off-by: cjacob <Christina.Jacob at cavium.com>
Thanks for the patch, just few minor comments below!
Note, should be full name, e.g.:
Signed-off-by: Christina Jacob <Christina.Jacob at cavium.com>
Also you From: only shows 'cjacob' as can be seen from the cover letter
as well, so perhaps check your git settings to make that full name:
cjacob (1):
xdp: Sample xdp program implementing ip forward
If there's one single patch, then cover letter is not needed, only
for >1 sets.
[...]
> +#define KBUILD_MODNAME "foo"
> +#include <uapi/linux/bpf.h>
> +#include <linux/in.h>
> +#include <linux/if_ether.h>
> +#include <linux/if_packet.h>
> +#include <linux/if_vlan.h>
> +#include <linux/ip.h>
> +#include <linux/ipv6.h>
> +#include "bpf_helpers.h"
> +#include <linux/slab.h>
> +#include <net/ip_fib.h>
> +
> +struct trie_value {
> + __u8 prefix[4];
> + long value;
> + int gw;
> + int ifindex;
> + int metric;
> +};
> +
> +union key_4 {
> + u32 b32[2];
> + u8 b8[8];
> +};
> +
> +struct arp_entry {
> + int dst;
> + long mac;
> +};
> +
> +struct direct_map {
> + long mac;
> + int ifindex;
> + struct arp_entry arp;
> +};
> +
> +/* Map for trie implementation*/
> +struct bpf_map_def SEC("maps") lpm_map = {
> + .type = BPF_MAP_TYPE_LPM_TRIE,
> + .key_size = 8,
> + .value_size =
> + sizeof(struct trie_value),
(Nit: there are couple of such breaks throughout the patch, can we
just use single line for such cases where reasonable?)
> + .max_entries = 50,
> + .map_flags = BPF_F_NO_PREALLOC,
> +};
> +
> +/* Map for counter*/
> +struct bpf_map_def SEC("maps") rxcnt = {
> + .type = BPF_MAP_TYPE_PERCPU_ARRAY,
> + .key_size = sizeof(u32),
> + .value_size = sizeof(long),
> + .max_entries = 256,
> +};
> +
> +/* Map for ARP table*/
> +struct bpf_map_def SEC("maps") arp_table = {
> + .type = BPF_MAP_TYPE_HASH,
> + .key_size = sizeof(int),
> + .value_size = sizeof(long),
Perhaps these should be proper structs here, such that it
becomes easier to read/handle later on lookup.
> + .max_entries = 50,
> +};
> +
> +/* Map to keep the exact match entries in the route table*/
> +struct bpf_map_def SEC("maps") exact_match = {
> + .type = BPF_MAP_TYPE_HASH,
> + .key_size = sizeof(int),
> + .value_size = sizeof(struct direct_map),
> + .max_entries = 50,
> +};
> +
> +/**
> + * Function to set source and destination mac of the packet
> + */
> +static inline void set_src_dst_mac(void *data, void *src, void *dst)
> +{
> + unsigned short *p = data;
> + unsigned short *dest = dst;
> + unsigned short *source = src;
> +
> + p[3] = source[0];
> + p[4] = source[1];
> + p[5] = source[2];
> + p[0] = dest[0];
> + p[1] = dest[1];
> + p[2] = dest[2];
You could just use __builtin_memcpy() given length is
constant anyway, so LLVM will do the inlining.
> +}
> +
> +/**
> + * Parse IPV4 packet to get SRC, DST IP and protocol
> + */
> +static inline int parse_ipv4(void *data, u64 nh_off, void *data_end,
> + unsigned int *src, unsigned int *dest)
> +{
> + struct iphdr *iph = data + nh_off;
> +
> + if (iph + 1 > data_end)
> + return 0;
> + *src = (unsigned int)iph->saddr;
> + *dest = (unsigned int)iph->daddr;
Why not stay with __be32 types?
> + return iph->protocol;
> +}
> +
> +SEC("xdp3")
> +int xdp_prog3(struct xdp_md *ctx)
> +{
> + void *data_end = (void *)(long)ctx->data_end;
> + void *data = (void *)(long)ctx->data;
> + struct ethhdr *eth = data;
> + int rc = XDP_DROP, forward_to;
> + long *value;
> + struct trie_value *prefix_value;
> + long *dest_mac = NULL, *src_mac = NULL;
> + u16 h_proto;
> + u64 nh_off;
> + u32 ipproto;
> + union key_4 key4;
> +
> + nh_off = sizeof(*eth);
> + if (data + nh_off > data_end)
> + return rc;
> +
> + h_proto = eth->h_proto;
> +
> + if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) {
> + struct vlan_hdr *vhdr;
> +
> + vhdr = data + nh_off;
> + nh_off += sizeof(struct vlan_hdr);
> + if (data + nh_off > data_end)
> + return rc;
> + h_proto = vhdr->h_vlan_encapsulated_proto;
> + }
> + if (h_proto == htons(ETH_P_ARP)) {
> + return XDP_PASS;
> + } else if (h_proto == htons(ETH_P_IP)) {
> + int src_ip = 0, dest_ip = 0;
> + struct direct_map *direct_entry;
> +
> + ipproto = parse_ipv4(data, nh_off, data_end, &src_ip, &dest_ip);
> + direct_entry = (struct direct_map *)bpf_map_lookup_elem
> + (&exact_match, &dest_ip);
> + /*check for exact match, this would give a faster lookup*/
> + if (direct_entry && direct_entry->mac &&
> + direct_entry->arp.mac) {
> + src_mac = &direct_entry->mac;
> + dest_mac = &direct_entry->arp.mac;
> + forward_to = direct_entry->ifindex;
> + } else {
> + /*Look up in the trie for lpm*/
> + // Key for trie
Nit: please check style throughout the patch.
> + key4.b32[0] = 32;
> + key4.b8[4] = dest_ip % 0x100;
> + key4.b8[5] = (dest_ip >> 8) % 0x100;
> + key4.b8[6] = (dest_ip >> 16) % 0x100;
> + key4.b8[7] = (dest_ip >> 24) % 0x100;
> + prefix_value =
> + ((struct trie_value *)bpf_map_lookup_elem
> + (&lpm_map, &key4));
For key, please use proper struct bpf_lpm_trie_key, see also
usage example in tools/testing/selftests/bpf/test_lpm_map.c
for LPM handling.
> + if (!prefix_value) {
> + return XDP_DROP;
> + } else {
> + src_mac = &prefix_value->value;
> + if (src_mac) {
> + dest_mac = (long *)bpf_map_lookup_elem
> + (&arp_table, &dest_ip);
> + if (!dest_mac) {
> + if (prefix_value->gw) {
> + dest_ip = *(unsigned int *)(&(prefix_value->gw));
> + dest_mac = (long *)bpf_map_lookup_elem
> + (&arp_table, &dest_ip);
> + } else {
> + return XDP_DROP;
> + }
> + }
> + forward_to = prefix_value->ifindex;
> + } else {
> + return XDP_DROP;
> + }
> + }
> + }
> + } else {
> + ipproto = 0;
> + }
> + if (src_mac && dest_mac) {
> + set_src_dst_mac(data, src_mac,
> + dest_mac);
> + value = bpf_map_lookup_elem
> + (&rxcnt, &ipproto);
> + if (value)
> + *value += 1;
> + return bpf_redirect(
> + forward_to,
> + 0);
> + }
> + return rc;
More information about the linux-arm-kernel
mailing list