From b11a632c442eef34a0afeba61fab923241f317e9 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Mon, 5 Feb 2018 10:17:43 -0800
Subject: net: add a UID to use for ULP socket assignment

Create a UID field and enum that can be used to assign ULPs to
sockets. This saves a set of string comparisons if the ULP id
is known.

For sockmap, which is added in the next patches, a ULP is used to
hook into TCP sockets close state. In this case the ULP being added
is done at map insert time and the ULP is known and done on the kernel
side. In this case the named lookup is not needed. Because we don't
want to expose psock internals to user space socket options a user
visible flag is also added. For TLS this is set for BPF it will be
cleared.

Alos remove pr_notice, user gets an error code back and should check
that rather than rely on logs.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/tcp.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 58278669cc55..a58292d31e12 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1983,6 +1983,10 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer);
 #define TCP_ULP_MAX		128
 #define TCP_ULP_BUF_MAX		(TCP_ULP_NAME_MAX*TCP_ULP_MAX)
 
+enum {
+	TCP_ULP_TLS,
+};
+
 struct tcp_ulp_ops {
 	struct list_head	list;
 
@@ -1991,7 +1995,9 @@ struct tcp_ulp_ops {
 	/* cleanup ulp */
 	void (*release)(struct sock *sk);
 
+	int		uid;
 	char		name[TCP_ULP_NAME_MAX];
+	bool		user_visible;
 	struct module	*owner;
 };
 int tcp_register_ulp(struct tcp_ulp_ops *type);
-- 
cgit v1.2.3


From 1aa12bdf1bfb95db7e75bfecf0e39a65f4e8fbf8 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Mon, 5 Feb 2018 10:17:49 -0800
Subject: bpf: sockmap, add sock close() hook to remove socks

The selftests test_maps program was leaving dangling BPF sockmap
programs around because not all psock elements were removed from
the map. The elements in turn hold a reference on the BPF program
they are attached to causing BPF programs to stay open even after
test_maps has completed.

The original intent was that sk_state_change() would be called
when TCP socks went through TCP_CLOSE state. However, because
socks may be in SOCK_DEAD state or the sock may be a listening
socket the event is not always triggered.

To resolve this use the ULP infrastructure and register our own
proto close() handler. This fixes the above case.

Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support")
Reported-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/tcp.h    |   2 +
 kernel/bpf/sockmap.c | 168 +++++++++++++++++++++++++++++++--------------------
 2 files changed, 103 insertions(+), 67 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index a58292d31e12..e3fc667f9ac2 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1985,6 +1985,7 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer);
 
 enum {
 	TCP_ULP_TLS,
+	TCP_ULP_BPF,
 };
 
 struct tcp_ulp_ops {
@@ -2003,6 +2004,7 @@ struct tcp_ulp_ops {
 int tcp_register_ulp(struct tcp_ulp_ops *type);
 void tcp_unregister_ulp(struct tcp_ulp_ops *type);
 int tcp_set_ulp(struct sock *sk, const char *name);
+int tcp_set_ulp_id(struct sock *sk, const int ulp);
 void tcp_get_available_ulp(char *buf, size_t len);
 void tcp_cleanup_ulp(struct sock *sk);
 
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 0314d1783d77..bd4a6d9c6709 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -86,9 +86,10 @@ struct smap_psock {
 	struct work_struct tx_work;
 	struct work_struct gc_work;
 
+	struct proto *sk_proto;
+	void (*save_close)(struct sock *sk, long timeout);
 	void (*save_data_ready)(struct sock *sk);
 	void (*save_write_space)(struct sock *sk);
-	void (*save_state_change)(struct sock *sk);
 };
 
 static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
@@ -96,12 +97,102 @@ static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
 	return rcu_dereference_sk_user_data(sk);
 }
 
+static struct proto tcp_bpf_proto;
+static int bpf_tcp_init(struct sock *sk)
+{
+	struct smap_psock *psock;
+
+	rcu_read_lock();
+	psock = smap_psock_sk(sk);
+	if (unlikely(!psock)) {
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+
+	if (unlikely(psock->sk_proto)) {
+		rcu_read_unlock();
+		return -EBUSY;
+	}
+
+	psock->save_close = sk->sk_prot->close;
+	psock->sk_proto = sk->sk_prot;
+	sk->sk_prot = &tcp_bpf_proto;
+	rcu_read_unlock();
+	return 0;
+}
+
+static void bpf_tcp_release(struct sock *sk)
+{
+	struct smap_psock *psock;
+
+	rcu_read_lock();
+	psock = smap_psock_sk(sk);
+
+	if (likely(psock)) {
+		sk->sk_prot = psock->sk_proto;
+		psock->sk_proto = NULL;
+	}
+	rcu_read_unlock();
+}
+
+static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
+
+static void bpf_tcp_close(struct sock *sk, long timeout)
+{
+	void (*close_fun)(struct sock *sk, long timeout);
+	struct smap_psock_map_entry *e, *tmp;
+	struct smap_psock *psock;
+	struct sock *osk;
+
+	rcu_read_lock();
+	psock = smap_psock_sk(sk);
+	if (unlikely(!psock)) {
+		rcu_read_unlock();
+		return sk->sk_prot->close(sk, timeout);
+	}
+
+	/* The psock may be destroyed anytime after exiting the RCU critial
+	 * section so by the time we use close_fun the psock may no longer
+	 * be valid. However, bpf_tcp_close is called with the sock lock
+	 * held so the close hook and sk are still valid.
+	 */
+	close_fun = psock->save_close;
+
+	write_lock_bh(&sk->sk_callback_lock);
+	list_for_each_entry_safe(e, tmp, &psock->maps, list) {
+		osk = cmpxchg(e->entry, sk, NULL);
+		if (osk == sk) {
+			list_del(&e->list);
+			smap_release_sock(psock, sk);
+		}
+	}
+	write_unlock_bh(&sk->sk_callback_lock);
+	rcu_read_unlock();
+	close_fun(sk, timeout);
+}
+
 enum __sk_action {
 	__SK_DROP = 0,
 	__SK_PASS,
 	__SK_REDIRECT,
 };
 
+static struct tcp_ulp_ops bpf_tcp_ulp_ops __read_mostly = {
+	.name		= "bpf_tcp",
+	.uid		= TCP_ULP_BPF,
+	.user_visible	= false,
+	.owner		= NULL,
+	.init		= bpf_tcp_init,
+	.release	= bpf_tcp_release,
+};
+
+static int bpf_tcp_ulp_register(void)
+{
+	tcp_bpf_proto = tcp_prot;
+	tcp_bpf_proto.close = bpf_tcp_close;
+	return tcp_register_ulp(&bpf_tcp_ulp_ops);
+}
+
 static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
 {
 	struct bpf_prog *prog = READ_ONCE(psock->bpf_verdict);
@@ -166,68 +257,6 @@ static void smap_report_sk_error(struct smap_psock *psock, int err)
 	sk->sk_error_report(sk);
 }
 
-static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
-
-/* Called with lock_sock(sk) held */
-static void smap_state_change(struct sock *sk)
-{
-	struct smap_psock_map_entry *e, *tmp;
-	struct smap_psock *psock;
-	struct socket_wq *wq;
-	struct sock *osk;
-
-	rcu_read_lock();
-
-	/* Allowing transitions into an established syn_recv states allows
-	 * for early binding sockets to a smap object before the connection
-	 * is established.
-	 */
-	switch (sk->sk_state) {
-	case TCP_SYN_SENT:
-	case TCP_SYN_RECV:
-	case TCP_ESTABLISHED:
-		break;
-	case TCP_CLOSE_WAIT:
-	case TCP_CLOSING:
-	case TCP_LAST_ACK:
-	case TCP_FIN_WAIT1:
-	case TCP_FIN_WAIT2:
-	case TCP_LISTEN:
-		break;
-	case TCP_CLOSE:
-		/* Only release if the map entry is in fact the sock in
-		 * question. There is a case where the operator deletes
-		 * the sock from the map, but the TCP sock is closed before
-		 * the psock is detached. Use cmpxchg to verify correct
-		 * sock is removed.
-		 */
-		psock = smap_psock_sk(sk);
-		if (unlikely(!psock))
-			break;
-		write_lock_bh(&sk->sk_callback_lock);
-		list_for_each_entry_safe(e, tmp, &psock->maps, list) {
-			osk = cmpxchg(e->entry, sk, NULL);
-			if (osk == sk) {
-				list_del(&e->list);
-				smap_release_sock(psock, sk);
-			}
-		}
-		write_unlock_bh(&sk->sk_callback_lock);
-		break;
-	default:
-		psock = smap_psock_sk(sk);
-		if (unlikely(!psock))
-			break;
-		smap_report_sk_error(psock, EPIPE);
-		break;
-	}
-
-	wq = rcu_dereference(sk->sk_wq);
-	if (skwq_has_sleeper(wq))
-		wake_up_interruptible_all(&wq->wait);
-	rcu_read_unlock();
-}
-
 static void smap_read_sock_strparser(struct strparser *strp,
 				     struct sk_buff *skb)
 {
@@ -322,10 +351,8 @@ static void smap_stop_sock(struct smap_psock *psock, struct sock *sk)
 		return;
 	sk->sk_data_ready = psock->save_data_ready;
 	sk->sk_write_space = psock->save_write_space;
-	sk->sk_state_change = psock->save_state_change;
 	psock->save_data_ready = NULL;
 	psock->save_write_space = NULL;
-	psock->save_state_change = NULL;
 	strp_stop(&psock->strp);
 	psock->strp_enabled = false;
 }
@@ -350,6 +377,7 @@ static void smap_release_sock(struct smap_psock *psock, struct sock *sock)
 	if (psock->refcnt)
 		return;
 
+	tcp_cleanup_ulp(sock);
 	smap_stop_sock(psock, sock);
 	clear_bit(SMAP_TX_RUNNING, &psock->state);
 	rcu_assign_sk_user_data(sock, NULL);
@@ -427,10 +455,8 @@ static void smap_start_sock(struct smap_psock *psock, struct sock *sk)
 		return;
 	psock->save_data_ready = sk->sk_data_ready;
 	psock->save_write_space = sk->sk_write_space;
-	psock->save_state_change = sk->sk_state_change;
 	sk->sk_data_ready = smap_data_ready;
 	sk->sk_write_space = smap_write_space;
-	sk->sk_state_change = smap_state_change;
 	psock->strp_enabled = true;
 }
 
@@ -509,6 +535,10 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
 	if (attr->value_size > KMALLOC_MAX_SIZE)
 		return ERR_PTR(-E2BIG);
 
+	err = bpf_tcp_ulp_register();
+	if (err && err != -EEXIST)
+		return ERR_PTR(err);
+
 	stab = kzalloc(sizeof(*stab), GFP_USER);
 	if (!stab)
 		return ERR_PTR(-ENOMEM);
@@ -754,6 +784,10 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops,
 			goto out_progs;
 		}
 
+		err = tcp_set_ulp_id(sock, TCP_ULP_BPF);
+		if (err)
+			goto out_progs;
+
 		set_bit(SMAP_TX_RUNNING, &psock->state);
 	}
 
-- 
cgit v1.2.3


From c0ea1bcb39352b57ac5c4b6da8acd65bddeee2c5 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 6 Feb 2018 13:22:44 +0100
Subject: netfilter: nft_flow_offload: move flowtable cleanup routines to
 nf_flow_table

Move the flowtable cleanup routines to nf_flow_table and expose the
nf_flow_table_cleanup() helper function.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_flow_table.h |  3 +++
 net/netfilter/nf_flow_table.c         | 24 ++++++++++++++++++++++++
 net/netfilter/nft_flow_offload.c      | 19 +------------------
 3 files changed, 28 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index b22b22082733..ed49cd169ecf 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -95,6 +95,9 @@ struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_t
 int nf_flow_table_iterate(struct nf_flowtable *flow_table,
 			  void (*iter)(struct flow_offload *flow, void *data),
 			  void *data);
+
+void nf_flow_table_cleanup(struct net *net, struct net_device *dev);
+
 void nf_flow_offload_work_gc(struct work_struct *work);
 extern const struct rhashtable_params nf_flow_offload_rhash_params;
 
diff --git a/net/netfilter/nf_flow_table.c b/net/netfilter/nf_flow_table.c
index 2f5099cb85b8..04c08f6b9015 100644
--- a/net/netfilter/nf_flow_table.c
+++ b/net/netfilter/nf_flow_table.c
@@ -4,6 +4,7 @@
 #include <linux/netfilter.h>
 #include <linux/rhashtable.h>
 #include <linux/netdevice.h>
+#include <net/netfilter/nf_tables.h>
 #include <net/netfilter/nf_flow_table.h>
 #include <net/netfilter/nf_conntrack.h>
 #include <net/netfilter/nf_conntrack_core.h>
@@ -425,5 +426,28 @@ int nf_flow_dnat_port(const struct flow_offload *flow,
 }
 EXPORT_SYMBOL_GPL(nf_flow_dnat_port);
 
+static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
+{
+	struct net_device *dev = data;
+
+	if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
+		return;
+
+	flow_offload_dead(flow);
+}
+
+static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
+					  void *data)
+{
+	nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, data);
+	flush_delayed_work(&flowtable->gc_work);
+}
+
+void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
+{
+	nft_flow_table_iterate(net, nf_flow_table_iterate_cleanup, dev);
+}
+EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index e5c45c7ac02a..b65829b2be22 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -194,23 +194,6 @@ static struct nft_expr_type nft_flow_offload_type __read_mostly = {
 	.owner		= THIS_MODULE,
 };
 
-static void flow_offload_iterate_cleanup(struct flow_offload *flow, void *data)
-{
-	struct net_device *dev = data;
-
-	if (dev && flow->tuplehash[0].tuple.iifidx != dev->ifindex)
-		return;
-
-	flow_offload_dead(flow);
-}
-
-static void nft_flow_offload_iterate_cleanup(struct nf_flowtable *flowtable,
-					     void *data)
-{
-	nf_flow_table_iterate(flowtable, flow_offload_iterate_cleanup, data);
-	flush_delayed_work(&flowtable->gc_work);
-}
-
 static int flow_offload_netdev_event(struct notifier_block *this,
 				     unsigned long event, void *ptr)
 {
@@ -219,7 +202,7 @@ static int flow_offload_netdev_event(struct notifier_block *this,
 	if (event != NETDEV_DOWN)
 		return NOTIFY_DONE;
 
-	nft_flow_table_iterate(dev_net(dev), nft_flow_offload_iterate_cleanup, dev);
+	nf_flow_table_cleanup(dev_net(dev), dev);
 
 	return NOTIFY_DONE;
 }
-- 
cgit v1.2.3


From b408c5b04f82fe4e20bceb8e4f219453d4f21f02 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 6 Feb 2018 13:22:47 +0100
Subject: netfilter: nf_tables: fix flowtable free

Every flow_offload entry is added into the table twice. Because of this,
rhashtable_free_and_destroy can't be used, since it would call kfree for
each flow_offload object twice.

This patch cleans up the flowtable via nf_flow_table_iterate() to
schedule removal of entries by setting on the dying bit, then there is
an explicitly invocation of the garbage collector to release resources.

Based on patch from Felix Fietkau.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_flow_table.h   |  2 ++
 net/ipv4/netfilter/nf_flow_table_ipv4.c |  1 +
 net/ipv6/netfilter/nf_flow_table_ipv6.c |  1 +
 net/netfilter/nf_flow_table.c           | 25 +++++++++++++++++++------
 net/netfilter/nf_flow_table_inet.c      |  1 +
 net/netfilter/nf_tables_api.c           |  9 ++-------
 6 files changed, 26 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index ed49cd169ecf..020ae903066f 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -14,6 +14,7 @@ struct nf_flowtable_type {
 	struct list_head		list;
 	int				family;
 	void				(*gc)(struct work_struct *work);
+	void				(*free)(struct nf_flowtable *ft);
 	const struct rhashtable_params	*params;
 	nf_hookfn			*hook;
 	struct module			*owner;
@@ -98,6 +99,7 @@ int nf_flow_table_iterate(struct nf_flowtable *flow_table,
 
 void nf_flow_table_cleanup(struct net *net, struct net_device *dev);
 
+void nf_flow_table_free(struct nf_flowtable *flow_table);
 void nf_flow_offload_work_gc(struct work_struct *work);
 extern const struct rhashtable_params nf_flow_offload_rhash_params;
 
diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c
index b2d01eb25f2c..25d2975da156 100644
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
@@ -260,6 +260,7 @@ static struct nf_flowtable_type flowtable_ipv4 = {
 	.family		= NFPROTO_IPV4,
 	.params		= &nf_flow_offload_rhash_params,
 	.gc		= nf_flow_offload_work_gc,
+	.free		= nf_flow_table_free,
 	.hook		= nf_flow_offload_ip_hook,
 	.owner		= THIS_MODULE,
 };
diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c
index fff21602875a..d346705d6ee6 100644
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -253,6 +253,7 @@ static struct nf_flowtable_type flowtable_ipv6 = {
 	.family		= NFPROTO_IPV6,
 	.params		= &nf_flow_offload_rhash_params,
 	.gc		= nf_flow_offload_work_gc,
+	.free		= nf_flow_table_free,
 	.hook		= nf_flow_offload_ipv6_hook,
 	.owner		= THIS_MODULE,
 };
diff --git a/net/netfilter/nf_flow_table.c b/net/netfilter/nf_flow_table.c
index 04c08f6b9015..c17f1af42daa 100644
--- a/net/netfilter/nf_flow_table.c
+++ b/net/netfilter/nf_flow_table.c
@@ -232,19 +232,16 @@ static inline bool nf_flow_is_dying(const struct flow_offload *flow)
 	return flow->flags & FLOW_OFFLOAD_DYING;
 }
 
-void nf_flow_offload_work_gc(struct work_struct *work)
+static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
 {
 	struct flow_offload_tuple_rhash *tuplehash;
-	struct nf_flowtable *flow_table;
 	struct rhashtable_iter hti;
 	struct flow_offload *flow;
 	int err;
 
-	flow_table = container_of(work, struct nf_flowtable, gc_work.work);
-
 	err = rhashtable_walk_init(&flow_table->rhashtable, &hti, GFP_KERNEL);
 	if (err)
-		goto schedule;
+		return 0;
 
 	rhashtable_walk_start(&hti);
 
@@ -270,7 +267,16 @@ void nf_flow_offload_work_gc(struct work_struct *work)
 out:
 	rhashtable_walk_stop(&hti);
 	rhashtable_walk_exit(&hti);
-schedule:
+
+	return 1;
+}
+
+void nf_flow_offload_work_gc(struct work_struct *work)
+{
+	struct nf_flowtable *flow_table;
+
+	flow_table = container_of(work, struct nf_flowtable, gc_work.work);
+	nf_flow_offload_gc_step(flow_table);
 	queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
 }
 EXPORT_SYMBOL_GPL(nf_flow_offload_work_gc);
@@ -449,5 +455,12 @@ void nf_flow_table_cleanup(struct net *net, struct net_device *dev)
 }
 EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
 
+void nf_flow_table_free(struct nf_flowtable *flow_table)
+{
+	nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
+	WARN_ON(!nf_flow_offload_gc_step(flow_table));
+}
+EXPORT_SYMBOL_GPL(nf_flow_table_free);
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
index 281209aeba8f..375a1881d93d 100644
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -24,6 +24,7 @@ static struct nf_flowtable_type flowtable_inet = {
 	.family		= NFPROTO_INET,
 	.params		= &nf_flow_offload_rhash_params,
 	.gc		= nf_flow_offload_work_gc,
+	.free		= nf_flow_table_free,
 	.hook		= nf_flow_offload_inet_hook,
 	.owner		= THIS_MODULE,
 };
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 07dd1fac78a8..8b9fe30de0cd 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -5399,17 +5399,12 @@ err:
 	nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
 }
 
-static void nft_flowtable_destroy(void *ptr, void *arg)
-{
-	kfree(ptr);
-}
-
 static void nf_tables_flowtable_destroy(struct nft_flowtable *flowtable)
 {
 	cancel_delayed_work_sync(&flowtable->data.gc_work);
 	kfree(flowtable->name);
-	rhashtable_free_and_destroy(&flowtable->data.rhashtable,
-				    nft_flowtable_destroy, NULL);
+	flowtable->data.type->free(&flowtable->data);
+	rhashtable_destroy(&flowtable->data.rhashtable);
 	module_put(flowtable->data.type->owner);
 }
 
-- 
cgit v1.2.3


From d8ed9600581d40d818ae417b3086a333841b0559 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Wed, 7 Feb 2018 11:50:41 +0900
Subject: netfilter: remove useless prototype

prototype nf_ct_nat_offset is not used anymore.

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
---
 include/net/netfilter/nf_conntrack.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index f5223bf2c420..062dc19b5840 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -213,11 +213,6 @@ static inline bool nf_ct_kill(struct nf_conn *ct)
 	return nf_ct_delete(ct, 0, 0);
 }
 
-/* These are for NAT.  Icky. */
-extern s32 (*nf_ct_nat_offset)(const struct nf_conn *ct,
-			       enum ip_conntrack_dir dir,
-			       u32 seq);
-
 /* Set all unconfirmed conntrack as dying */
 void nf_ct_unconfirmed_destroy(struct net *);
 
-- 
cgit v1.2.3


From 0ff90b6c20340e57616a51ae1a1bf18156d6638a Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 7 Feb 2018 09:49:02 +0100
Subject: netfilter: nf_flow_offload: fix use-after-free and a resource leak

flow_offload_del frees the flow, so all associated resource must be
freed before.

Since the ct entry in struct flow_offload_entry was allocated by
flow_offload_alloc, it should be freed by flow_offload_free to take care
of the error handling path when flow_offload_add fails.

While at it, make flow_offload_del static, since it should never be
called directly, only from the gc step

Signed-off-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_flow_table.h |  1 -
 net/netfilter/nf_flow_table.c         | 27 +++++++--------------------
 2 files changed, 7 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 020ae903066f..833752dd0c58 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -90,7 +90,6 @@ struct flow_offload *flow_offload_alloc(struct nf_conn *ct,
 void flow_offload_free(struct flow_offload *flow);
 
 int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
-void flow_offload_del(struct nf_flowtable *flow_table, struct flow_offload *flow);
 struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
 						     struct flow_offload_tuple *tuple);
 int nf_flow_table_iterate(struct nf_flowtable *flow_table,
diff --git a/net/netfilter/nf_flow_table.c b/net/netfilter/nf_flow_table.c
index c17f1af42daa..ec410cae9307 100644
--- a/net/netfilter/nf_flow_table.c
+++ b/net/netfilter/nf_flow_table.c
@@ -125,7 +125,9 @@ void flow_offload_free(struct flow_offload *flow)
 	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_cache);
 	dst_release(flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_cache);
 	e = container_of(flow, struct flow_offload_entry, flow);
-	kfree(e);
+	nf_ct_delete(e->ct, 0, 0);
+	nf_ct_put(e->ct);
+	kfree_rcu(e, rcu_head);
 }
 EXPORT_SYMBOL_GPL(flow_offload_free);
 
@@ -149,11 +151,9 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
 }
 EXPORT_SYMBOL_GPL(flow_offload_add);
 
-void flow_offload_del(struct nf_flowtable *flow_table,
-		      struct flow_offload *flow)
+static void flow_offload_del(struct nf_flowtable *flow_table,
+			     struct flow_offload *flow)
 {
-	struct flow_offload_entry *e;
-
 	rhashtable_remove_fast(&flow_table->rhashtable,
 			       &flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].node,
 			       *flow_table->type->params);
@@ -161,10 +161,8 @@ void flow_offload_del(struct nf_flowtable *flow_table,
 			       &flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].node,
 			       *flow_table->type->params);
 
-	e = container_of(flow, struct flow_offload_entry, flow);
-	kfree_rcu(e, rcu_head);
+	flow_offload_free(flow);
 }
-EXPORT_SYMBOL_GPL(flow_offload_del);
 
 struct flow_offload_tuple_rhash *
 flow_offload_lookup(struct nf_flowtable *flow_table,
@@ -175,15 +173,6 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
 }
 EXPORT_SYMBOL_GPL(flow_offload_lookup);
 
-static void nf_flow_release_ct(const struct flow_offload *flow)
-{
-	struct flow_offload_entry *e;
-
-	e = container_of(flow, struct flow_offload_entry, flow);
-	nf_ct_delete(e->ct, 0, 0);
-	nf_ct_put(e->ct);
-}
-
 int nf_flow_table_iterate(struct nf_flowtable *flow_table,
 			  void (*iter)(struct flow_offload *flow, void *data),
 			  void *data)
@@ -259,10 +248,8 @@ static int nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
 		flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
 
 		if (nf_flow_has_expired(flow) ||
-		    nf_flow_is_dying(flow)) {
+		    nf_flow_is_dying(flow))
 			flow_offload_del(flow_table, flow);
-			nf_flow_release_ct(flow);
-		}
 	}
 out:
 	rhashtable_walk_stop(&hti);
-- 
cgit v1.2.3


From d5cc61119343b6f1b8716cfe591d4989ea0c5c28 Mon Sep 17 00:00:00 2001
From: Tobias Schramm <tobleminer@gmail.com>
Date: Tue, 30 Jan 2018 14:06:02 +0200
Subject: PCI: Add Ubiquiti Networks vendor ID

Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Tobias Schramm <tobleminer@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 include/linux/pci_ids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index ab20dc5db423..35871adfdc86 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -149,6 +149,8 @@
 #define PCI_VENDOR_ID_DYNALINK		0x0675
 #define PCI_DEVICE_ID_DYNALINK_IS64PH	0x1702
 
+#define PCI_VENDOR_ID_UBIQUITI		0x0777
+
 #define PCI_VENDOR_ID_BERKOM			0x0871
 #define PCI_DEVICE_ID_BERKOM_A1T		0xffa1
 #define PCI_DEVICE_ID_BERKOM_T_CONCEPT		0xffa2
-- 
cgit v1.2.3


From e728789c52afccc1275cba1dd812f03abe16ea3c Mon Sep 17 00:00:00 2001
From: Mathieu Malaterre <malat@debian.org>
Date: Wed, 7 Feb 2018 20:35:00 +0100
Subject: net: Extra '_get' in declaration of arch_get_platform_mac_address
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In commit c7f5d105495a ("net: Add eth_platform_get_mac_address() helper."),
two declarations were added:

  int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr);
  unsigned char *arch_get_platform_get_mac_address(void);

An extra '_get' was introduced in arch_get_platform_get_mac_address, remove
it. Fix compile warning using W=1:

  CC      net/ethernet/eth.o
net/ethernet/eth.c:523:24: warning: no previous prototype for ‘arch_get_platform_mac_address’ [-Wmissing-prototypes]
 unsigned char * __weak arch_get_platform_mac_address(void)
                        ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  AR      net/ethernet/built-in.o

Signed-off-by: Mathieu Malaterre <malat@debian.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 263dbcad22fc..79563840c295 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -31,7 +31,7 @@
 #ifdef __KERNEL__
 struct device;
 int eth_platform_get_mac_address(struct device *dev, u8 *mac_addr);
-unsigned char *arch_get_platform_get_mac_address(void);
+unsigned char *arch_get_platform_mac_address(void);
 u32 eth_get_headlen(void *data, unsigned int max_len);
 __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
 extern const struct header_ops eth_header_ops;
-- 
cgit v1.2.3


From 6e6e41c3112276288ccaf80c70916779b84bb276 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Fri, 9 Feb 2018 17:45:49 +0800
Subject: ptr_ring: fail early if queue occupies more than KMALLOC_MAX_SIZE

To avoid slab to warn about exceeded size, fail early if queue
occupies more than KMALLOC_MAX_SIZE.

Reported-by: syzbot+e4d4f9ddd4295539735d@syzkaller.appspotmail.com
Fixes: 2e0ab8ca83c12 ("ptr_ring: array based FIFO for pointers")
Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptr_ring.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index 1883d6137e9b..6051a5fe6913 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -466,6 +466,8 @@ static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
 
 static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
 {
+	if (size * sizeof(void *) > KMALLOC_MAX_SIZE)
+		return NULL;
 	return kcalloc(size, sizeof(void *), gfp);
 }
 
-- 
cgit v1.2.3


From 0bf7800f1799b5b1fd7d4f024e9ece53ac489011 Mon Sep 17 00:00:00 2001
From: Jason Wang <jasowang@redhat.com>
Date: Fri, 9 Feb 2018 17:45:50 +0800
Subject: ptr_ring: try vmalloc() when kmalloc() fails

This patch switch to use kvmalloc_array() for using a vmalloc()
fallback to help in case kmalloc() fails.

Reported-by: syzbot+e4d4f9ddd4295539735d@syzkaller.appspotmail.com
Fixes: 2e0ab8ca83c12 ("ptr_ring: array based FIFO for pointers")
Signed-off-by: Jason Wang <jasowang@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ptr_ring.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h
index 6051a5fe6913..b884b7794187 100644
--- a/include/linux/ptr_ring.h
+++ b/include/linux/ptr_ring.h
@@ -464,11 +464,14 @@ static inline int ptr_ring_consume_batched_bh(struct ptr_ring *r,
 	__PTR_RING_PEEK_CALL_v; \
 })
 
+/* Not all gfp_t flags (besides GFP_KERNEL) are allowed. See
+ * documentation for vmalloc for which of them are legal.
+ */
 static inline void **__ptr_ring_init_queue_alloc(unsigned int size, gfp_t gfp)
 {
 	if (size * sizeof(void *) > KMALLOC_MAX_SIZE)
 		return NULL;
-	return kcalloc(size, sizeof(void *), gfp);
+	return kvmalloc_array(size, sizeof(void *), gfp | __GFP_ZERO);
 }
 
 static inline void __ptr_ring_set_size(struct ptr_ring *r, int size)
@@ -603,7 +606,7 @@ static inline int ptr_ring_resize(struct ptr_ring *r, int size, gfp_t gfp,
 	spin_unlock(&(r)->producer_lock);
 	spin_unlock_irqrestore(&(r)->consumer_lock, flags);
 
-	kfree(old);
+	kvfree(old);
 
 	return 0;
 }
@@ -643,7 +646,7 @@ static inline int ptr_ring_resize_multiple(struct ptr_ring **rings,
 	}
 
 	for (i = 0; i < nrings; ++i)
-		kfree(queues[i]);
+		kvfree(queues[i]);
 
 	kfree(queues);
 
@@ -651,7 +654,7 @@ static inline int ptr_ring_resize_multiple(struct ptr_ring **rings,
 
 nomem:
 	while (--i >= 0)
-		kfree(queues[i]);
+		kvfree(queues[i]);
 
 	kfree(queues);
 
@@ -666,7 +669,7 @@ static inline void ptr_ring_cleanup(struct ptr_ring *r, void (*destroy)(void *))
 	if (destroy)
 		while ((ptr = ptr_ring_consume(r)))
 			destroy(ptr);
-	kfree(r->queue);
+	kvfree(r->queue);
 }
 
 #endif /* _LINUX_PTR_RING_H  */
-- 
cgit v1.2.3