[openwrt/openwrt] netfilter: add a xt_FLOWOFFLOAD target for NAT/routing offload support

LEDE Commits lede-commits at lists.infradead.org
Wed Feb 21 11:13:12 PST 2018


nbd pushed a commit to openwrt/openwrt.git, branch master:
https://git.lede-project.org/820f03099894bd48638fb5be326b5c551f0f2b98

commit 820f03099894bd48638fb5be326b5c551f0f2b98
Author: Felix Fietkau <nbd at nbd.name>
AuthorDate: Tue Feb 20 15:58:42 2018 +0100

    netfilter: add a xt_FLOWOFFLOAD target for NAT/routing offload support
    
    This makes it possible to add an iptables rule that offloads routing/NAT
    packet processing to a software fast path. This fast path is much
    quicker than running packets through the regular tables/chains.
    
    Requires Linux 4.14
    
    Signed-off-by: Felix Fietkau <nbd at nbd.name>
---
 include/netfilter.mk                               |   3 +
 package/kernel/linux/modules/netfilter.mk          |  13 +-
 .../iptables/patches/800-flowoffload_target.patch  |  18 +
 .../650-netfilter-add-xt_OFFLOAD-target.patch      | 446 +++++++++++++++++++++
 4 files changed, 479 insertions(+), 1 deletion(-)

diff --git a/include/netfilter.mk b/include/netfilter.mk
index bad599b..c4e43a8 100644
--- a/include/netfilter.mk
+++ b/include/netfilter.mk
@@ -143,6 +143,8 @@ $(eval $(call nf_add,IPT_IPSEC,CONFIG_IP_NF_MATCH_AH, $(P_V4)ipt_ah))
 $(eval $(call nf_add,IPT_IPSEC,CONFIG_NETFILTER_XT_MATCH_ESP, $(P_XT)xt_esp))
 $(eval $(call nf_add,IPT_IPSEC,CONFIG_NETFILTER_XT_MATCH_POLICY, $(P_XT)xt_policy))
 
+# flow offload support
+$(eval $(call nf_add,IPT_FLOW,CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD, $(P_XT)xt_FLOWOFFLOAD))
 
 # IPv6
 
@@ -370,6 +372,7 @@ IPT_BUILTIN += $(IPT_CONNTRACK-y)
 IPT_BUILTIN += $(IPT_CONNTRACK_EXTRA-y)
 IPT_BUILTIN += $(IPT_EXTRA-y)
 IPT_BUILTIN += $(IPT_FILTER-y)
+IPT_BUILTIN += $(IPT_FLOW-y) $(IPT_FLOW-m)
 IPT_BUILTIN += $(IPT_IPOPT-y)
 IPT_BUILTIN += $(IPT_IPRANGE-y)
 IPT_BUILTIN += $(IPT_CLUSTER-y)
diff --git a/package/kernel/linux/modules/netfilter.mk b/package/kernel/linux/modules/netfilter.mk
index 57d68d4..f296a90 100644
--- a/package/kernel/linux/modules/netfilter.mk
+++ b/package/kernel/linux/modules/netfilter.mk
@@ -147,7 +147,7 @@ define KernelPackage/nf-flow
 	CONFIG_NETFILTER_INGRESS=y \
 	CONFIG_NF_FLOW_TABLE \
 	CONFIG_NF_FLOW_TABLE_HW
-  DEPENDS:=+kmod-nf-conntrack +kmod-nft-core @!LINUX_3_18 @!LINUX_4_4 @!LINUX_4_9
+  DEPENDS:=+kmod-nf-conntrack @!LINUX_3_18 @!LINUX_4_4 @!LINUX_4_9
   FILES:= \
 	$(LINUX_DIR)/net/netfilter/nf_flow_table.ko \
 	$(LINUX_DIR)/net/netfilter/nf_flow_table_hw.ko
@@ -237,6 +237,17 @@ endef
 $(eval $(call KernelPackage,ipt-filter))
 
 
+define KernelPackage/ipt-offload
+  TITLE:=Netfilter routing/NAT offload support
+  KCONFIG:=CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD
+  FILES:=$(foreach mod,$(IPT_FLOW-m),$(LINUX_DIR)/net/$(mod).ko)
+  AUTOLOAD:=$(call AutoProbe,$(notdir $(IPT_FLOW-m)))
+  $(call AddDepends/ipt,+kmod-nf-flow)
+endef
+
+$(eval $(call KernelPackage,ipt-offload))
+
+
 define KernelPackage/ipt-ipopt
   TITLE:=Modules for matching/changing IP packet options
   KCONFIG:=$(KCONFIG_IPT_IPOPT)
diff --git a/package/network/utils/iptables/patches/800-flowoffload_target.patch b/package/network/utils/iptables/patches/800-flowoffload_target.patch
new file mode 100644
index 0000000..c6fe65c
--- /dev/null
+++ b/package/network/utils/iptables/patches/800-flowoffload_target.patch
@@ -0,0 +1,18 @@
+--- /dev/null
++++ b/extensions/libxt_FLOWOFFLOAD.c
+@@ -0,0 +1,15 @@
++#include <xtables.h>
++
++static struct xtables_target offload_tg_reg[] = {
++	{
++		.family        = NFPROTO_UNSPEC,
++		.name          = "FLOWOFFLOAD",
++		.revision      = 0,
++		.version       = XTABLES_VERSION,
++	},
++};
++
++void _init(void)
++{
++	xtables_register_targets(offload_tg_reg, ARRAY_SIZE(offload_tg_reg));
++}
diff --git a/target/linux/generic/hack-4.14/650-netfilter-add-xt_OFFLOAD-target.patch b/target/linux/generic/hack-4.14/650-netfilter-add-xt_OFFLOAD-target.patch
new file mode 100644
index 0000000..cb05cf8
--- /dev/null
+++ b/target/linux/generic/hack-4.14/650-netfilter-add-xt_OFFLOAD-target.patch
@@ -0,0 +1,446 @@
+From: Felix Fietkau <nbd at nbd.name>
+Date: Tue, 20 Feb 2018 15:56:02 +0100
+Subject: [PATCH] netfilter: add xt_OFFLOAD target
+
+Signed-off-by: Felix Fietkau <nbd at nbd.name>
+---
+ create mode 100644 net/netfilter/xt_OFFLOAD.c
+
+--- a/net/ipv4/netfilter/Kconfig
++++ b/net/ipv4/netfilter/Kconfig
+@@ -75,8 +75,6 @@ config NF_TABLES_ARP
+ 	help
+ 	  This option enables the ARP support for nf_tables.
+ 
+-endif # NF_TABLES
+-
+ config NF_FLOW_TABLE_IPV4
+ 	tristate "Netfilter flow table IPv4 module"
+ 	depends on NF_FLOW_TABLE
+@@ -85,6 +83,8 @@ config NF_FLOW_TABLE_IPV4
+ 
+ 	  To compile it as a module, choose M here.
+ 
++endif # NF_TABLES
++
+ config NF_DUP_IPV4
+ 	tristate "Netfilter IPv4 packet duplication to alternate destination"
+ 	depends on !NF_CONNTRACK || NF_CONNTRACK
+--- a/net/ipv6/netfilter/Kconfig
++++ b/net/ipv6/netfilter/Kconfig
+@@ -69,7 +69,6 @@ config NFT_FIB_IPV6
+ 	  multicast or blackhole.
+ 
+ endif # NF_TABLES_IPV6
+-endif # NF_TABLES
+ 
+ config NF_FLOW_TABLE_IPV6
+ 	tristate "Netfilter flow table IPv6 module"
+@@ -79,6 +78,8 @@ config NF_FLOW_TABLE_IPV6
+ 
+ 	  To compile it as a module, choose M here.
+ 
++endif # NF_TABLES
++
+ config NF_DUP_IPV6
+ 	tristate "Netfilter IPv6 packet duplication to alternate destination"
+ 	depends on !NF_CONNTRACK || NF_CONNTRACK
+--- a/net/netfilter/Kconfig
++++ b/net/netfilter/Kconfig
+@@ -665,8 +665,6 @@ config NFT_FIB_NETDEV
+ 
+ endif # NF_TABLES_NETDEV
+ 
+-endif # NF_TABLES
+-
+ config NF_FLOW_TABLE_INET
+ 	tristate "Netfilter flow table mixed IPv4/IPv6 module"
+ 	depends on NF_FLOW_TABLE
+@@ -675,11 +673,12 @@ config NF_FLOW_TABLE_INET
+ 
+ 	  To compile it as a module, choose M here.
+ 
++endif # NF_TABLES
++
+ config NF_FLOW_TABLE
+ 	tristate "Netfilter flow table module"
+ 	depends on NETFILTER_INGRESS
+ 	depends on NF_CONNTRACK
+-	depends on NF_TABLES
+ 	help
+ 	  This option adds the flow table core infrastructure.
+ 
+@@ -968,6 +967,15 @@ config NETFILTER_XT_TARGET_NOTRACK
+ 	depends on NETFILTER_ADVANCED
+ 	select NETFILTER_XT_TARGET_CT
+ 
++config NETFILTER_XT_TARGET_FLOWOFFLOAD
++	tristate '"FLOWOFFLOAD" target support'
++	depends on NF_FLOW_TABLE
++	depends on NETFILTER_INGRESS
++	help
++	  This option adds a `FLOWOFFLOAD' target, which uses the nf_flow_offload
++	  module to speed up processing of packets by bypassing the usual
++	  netfilter chains
++
+ config NETFILTER_XT_TARGET_RATEEST
+ 	tristate '"RATEEST" target support'
+ 	depends on NETFILTER_ADVANCED
+--- a/net/netfilter/Makefile
++++ b/net/netfilter/Makefile
+@@ -134,6 +134,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIF
+ obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
++obj-$(CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD) += xt_FLOWOFFLOAD.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o
+ obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
+--- /dev/null
++++ b/net/netfilter/xt_FLOWOFFLOAD.c
+@@ -0,0 +1,335 @@
++/*
++ * Copyright (C) 2018 Felix Fietkau <nbd at nbd.name>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License version 2 as
++ * published by the Free Software Foundation.
++ */
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/netfilter.h>
++#include <net/ip.h>
++#include <net/netfilter/nf_conntrack.h>
++#include <net/netfilter/nf_flow_table.h>
++
++static struct nf_flowtable nf_flowtable;
++static HLIST_HEAD(hooks);
++static DEFINE_SPINLOCK(hooks_lock);
++static struct delayed_work hook_work;
++
++struct xt_flowoffload_hook {
++	struct hlist_node list;
++	struct nf_hook_ops ops;
++	bool registered;
++	bool used;
++};
++
++static unsigned int
++xt_flowoffload_net_hook(void *priv, struct sk_buff *skb,
++			  const struct nf_hook_state *state)
++{
++	switch (skb->protocol) {
++	case htons(ETH_P_IP):
++		return nf_flow_offload_ip_hook(priv, skb, state);
++	case htons(ETH_P_IPV6):
++		return nf_flow_offload_ipv6_hook(priv, skb, state);
++	}
++
++	return NF_ACCEPT;
++}
++
++static int
++xt_flowoffload_create_hook(struct net_device *dev)
++{
++	struct xt_flowoffload_hook *hook;
++	struct nf_hook_ops *ops;
++
++	hook = kzalloc(sizeof(*hook), GFP_ATOMIC);
++	if (!hook)
++		return -ENOMEM;
++
++	ops = &hook->ops;
++	ops->pf = NFPROTO_NETDEV;
++	ops->hooknum = NF_NETDEV_INGRESS;
++	ops->priority = 10;
++	ops->priv = &nf_flowtable;
++	ops->hook = xt_flowoffload_net_hook;
++	ops->dev = dev;
++
++	hlist_add_head(&hook->list, &hooks);
++	mod_delayed_work(system_power_efficient_wq, &hook_work, 0);
++
++	return 0;
++}
++
++static struct xt_flowoffload_hook *
++flow_offload_lookup_hook(struct net_device *dev)
++{
++	struct xt_flowoffload_hook *hook;
++
++	hlist_for_each_entry(hook, &hooks, list) {
++		if (hook->ops.dev == dev)
++			return hook;
++	}
++
++	return NULL;
++}
++
++static void
++xt_flowoffload_check_device(struct net_device *dev)
++{
++	struct xt_flowoffload_hook *hook;
++
++	spin_lock_bh(&hooks_lock);
++	hook = flow_offload_lookup_hook(dev);
++	if (hook)
++		hook->used = true;
++	else
++		xt_flowoffload_create_hook(dev);
++	spin_unlock_bh(&hooks_lock);
++}
++
++static void
++xt_flowoffload_register_hooks(void)
++{
++	struct xt_flowoffload_hook *hook;
++
++restart:
++	hlist_for_each_entry(hook, &hooks, list) {
++		if (hook->registered)
++			continue;
++
++		hook->registered = true;
++		spin_unlock_bh(&hooks_lock);
++		nf_register_net_hook(dev_net(hook->ops.dev), &hook->ops);
++		spin_lock_bh(&hooks_lock);
++		goto restart;
++	}
++
++}
++
++static void
++xt_flowoffload_cleanup_hooks(void)
++{
++	struct xt_flowoffload_hook *hook;
++
++restart:
++	hlist_for_each_entry(hook, &hooks, list) {
++		if (hook->used)
++			continue;
++
++		hlist_del(&hook->list);
++		spin_unlock_bh(&hooks_lock);
++		nf_unregister_net_hook(dev_net(hook->ops.dev), &hook->ops);
++		kfree(hook);
++		spin_lock_bh(&hooks_lock);
++		goto restart;
++	}
++
++}
++
++static void
++xt_flowoffload_check_hook(struct flow_offload *flow, void *data)
++{
++	struct flow_offload_tuple *tuple = &flow->tuplehash[0].tuple;
++	struct xt_flowoffload_hook *hook;
++	bool *found = data;
++
++	spin_lock_bh(&hooks_lock);
++	hlist_for_each_entry(hook, &hooks, list) {
++		if (hook->ops.dev->ifindex != tuple->iifidx &&
++		    hook->ops.dev->ifindex != tuple->oifidx)
++			continue;
++
++		hook->used = true;
++		*found = true;
++	}
++	spin_unlock_bh(&hooks_lock);
++}
++
++static void
++xt_flowoffload_hook_work(struct work_struct *work)
++{
++	struct xt_flowoffload_hook *hook;
++	bool found = false;
++	int err;
++
++	spin_lock_bh(&hooks_lock);
++	xt_flowoffload_register_hooks();
++	hlist_for_each_entry(hook, &hooks, list)
++		hook->used = false;
++	spin_unlock_bh(&hooks_lock);
++
++	err = nf_flow_table_iterate(&nf_flowtable, xt_flowoffload_check_hook,
++				    &found);
++	if (err && err != -EAGAIN)
++	    goto out;
++
++	spin_lock_bh(&hooks_lock);
++	xt_flowoffload_cleanup_hooks();
++	spin_unlock_bh(&hooks_lock);
++
++out:
++	if (found)
++		queue_delayed_work(system_power_efficient_wq, &hook_work, HZ);
++}
++
++static bool
++xt_flowoffload_skip(struct sk_buff *skb)
++{
++	struct ip_options *opt = &(IPCB(skb)->opt);
++
++	if (unlikely(opt->optlen))
++		return true;
++	if (skb_sec_path(skb))
++		return true;
++
++	return false;
++}
++
++static int
++xt_flowoffload_route(struct sk_buff *skb, const struct nf_conn *ct,
++		   const struct xt_action_param *par,
++		   struct nf_flow_route *route, enum ip_conntrack_dir dir)
++{
++	struct dst_entry *this_dst = skb_dst(skb);
++	struct dst_entry *other_dst = NULL;
++	struct flowi fl;
++
++	memset(&fl, 0, sizeof(fl));
++	switch (xt_family(par)) {
++	case NFPROTO_IPV4:
++		fl.u.ip4.daddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
++		break;
++	case NFPROTO_IPV6:
++		fl.u.ip6.daddr = ct->tuplehash[!dir].tuple.dst.u3.in6;
++		break;
++	}
++
++	nf_route(xt_net(par), &other_dst, &fl, false, xt_family(par));
++	if (!other_dst)
++		return -ENOENT;
++
++	route->tuple[dir].dst		= this_dst;
++	route->tuple[dir].ifindex	= xt_in(par)->ifindex;
++	route->tuple[!dir].dst		= other_dst;
++	route->tuple[!dir].ifindex	= xt_out(par)->ifindex;
++
++	return 0;
++}
++
++static unsigned int
++flowoffload_tg(struct sk_buff *skb, const struct xt_action_param *par)
++{
++	enum ip_conntrack_info ctinfo;
++	enum ip_conntrack_dir dir;
++	struct nf_flow_route route;
++	struct flow_offload *flow;
++	struct nf_conn *ct;
++
++	if (xt_flowoffload_skip(skb))
++		return XT_CONTINUE;
++
++	ct = nf_ct_get(skb, &ctinfo);
++	if (ct == NULL)
++		return XT_CONTINUE;
++
++	switch (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum) {
++	case IPPROTO_TCP:
++	case IPPROTO_UDP:
++		break;
++	default:
++		return XT_CONTINUE;
++	}
++
++	if (test_bit(IPS_HELPER_BIT, &ct->status))
++		return XT_CONTINUE;
++
++	if (ctinfo == IP_CT_NEW ||
++	    ctinfo == IP_CT_RELATED)
++		return XT_CONTINUE;
++
++	if (!xt_in(par) || !xt_out(par))
++		return XT_CONTINUE;
++
++	if (test_and_set_bit(IPS_OFFLOAD_BIT, &ct->status))
++		return XT_CONTINUE;
++
++	dir = CTINFO2DIR(ctinfo);
++
++	if (xt_flowoffload_route(skb, ct, par, &route, dir) < 0)
++		goto err_flow_route;
++
++	flow = flow_offload_alloc(ct, &route);
++	if (!flow)
++		goto err_flow_alloc;
++
++	if (flow_offload_add(&nf_flowtable, flow) < 0)
++		goto err_flow_add;
++
++	xt_flowoffload_check_device(xt_in(par));
++	xt_flowoffload_check_device(xt_out(par));
++
++	return XT_CONTINUE;
++
++err_flow_add:
++	flow_offload_free(flow);
++err_flow_alloc:
++	dst_release(route.tuple[!dir].dst);
++err_flow_route:
++	clear_bit(IPS_OFFLOAD_BIT, &ct->status);
++	return XT_CONTINUE;
++}
++
++
++static int flowoffload_chk(const struct xt_tgchk_param *par)
++{
++	return 0;
++}
++
++static struct xt_target offload_tg_reg __read_mostly = {
++	.family		= NFPROTO_UNSPEC,
++	.name		= "FLOWOFFLOAD",
++	.revision	= 0,
++	.checkentry	= flowoffload_chk,
++	.target		= flowoffload_tg,
++	.me		= THIS_MODULE,
++};
++
++static int xt_flowoffload_table_init(struct nf_flowtable *table)
++{
++	nf_flow_table_init(table);
++	return 0;
++}
++
++static void xt_flowoffload_table_cleanup(struct nf_flowtable *table)
++{
++	nf_flow_table_free(table);
++}
++
++static int __init xt_flowoffload_tg_init(void)
++{
++	int ret;
++
++	INIT_DELAYED_WORK(&hook_work, xt_flowoffload_hook_work);
++
++	ret = xt_flowoffload_table_init(&nf_flowtable);
++	if (ret)
++		return ret;
++
++	ret = xt_register_target(&offload_tg_reg);
++	if (ret)
++		xt_flowoffload_table_cleanup(&nf_flowtable);
++
++	return ret;
++}
++
++static void __exit xt_flowoffload_tg_exit(void)
++{
++	xt_unregister_target(&offload_tg_reg);
++	xt_flowoffload_table_cleanup(&nf_flowtable);
++}
++
++MODULE_LICENSE("GPL");
++module_init(xt_flowoffload_tg_init);
++module_exit(xt_flowoffload_tg_exit);
+--- a/net/netfilter/nf_flow_table_core.c
++++ b/net/netfilter/nf_flow_table_core.c
+@@ -6,7 +6,6 @@
+ #include <linux/netdevice.h>
+ #include <net/ip.h>
+ #include <net/ip6_route.h>
+-#include <net/netfilter/nf_tables.h>
+ #include <net/netfilter/nf_flow_table.h>
+ #include <net/netfilter/nf_conntrack.h>
+ #include <net/netfilter/nf_conntrack_core.h>



More information about the lede-commits mailing list