summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c35
-rw-r--r--net/core/filter.c61
-rw-r--r--net/core/neighbour.c32
-rw-r--r--net/core/net-sysfs.c9
-rw-r--r--net/core/netpoll.c23
-rw-r--r--net/core/request_sock.c9
-rw-r--r--net/core/rtnetlink.c2
7 files changed, 134 insertions, 37 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 6bb6470f5b7b..323c04edd779 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2915,9 +2915,11 @@ EXPORT_SYMBOL(xmit_recursion);
/**
* dev_loopback_xmit - loop back @skb
+ * @net: network namespace this loopback is happening in
+ * @sk: sk needed to be a netfilter okfn
* @skb: buffer to transmit
*/
-int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb)
+int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
{
skb_reset_mac_header(skb);
__skb_pull(skb, skb_network_offset(skb));
@@ -3143,11 +3145,11 @@ out:
return rc;
}
-int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb)
+int dev_queue_xmit(struct sk_buff *skb)
{
return __dev_queue_xmit(skb, NULL);
}
-EXPORT_SYMBOL(dev_queue_xmit_sk);
+EXPORT_SYMBOL(dev_queue_xmit);
int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
{
@@ -3668,6 +3670,14 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
case TC_ACT_QUEUED:
kfree_skb(skb);
return NULL;
+ case TC_ACT_REDIRECT:
+ /* skb_mac_header check was done by cls/act_bpf, so
+ * we can safely push the L2 header back before
+ * redirecting to another netdev
+ */
+ __skb_push(skb, skb->mac_len);
+ skb_do_redirect(skb);
+ return NULL;
default:
break;
}
@@ -3982,13 +3992,13 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
* NET_RX_SUCCESS: no congestion
* NET_RX_DROP: packet was dropped
*/
-int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb)
+int netif_receive_skb(struct sk_buff *skb)
{
trace_netif_receive_skb_entry(skb);
return netif_receive_skb_internal(skb);
}
-EXPORT_SYMBOL(netif_receive_skb_sk);
+EXPORT_SYMBOL(netif_receive_skb);
/* Network device is going away, flush any packets still pending
* Called with irqs disabled.
@@ -4857,8 +4867,7 @@ struct netdev_adjacent {
struct rcu_head rcu;
};
-static struct netdev_adjacent *__netdev_find_adj(struct net_device *dev,
- struct net_device *adj_dev,
+static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev,
struct list_head *adj_list)
{
struct netdev_adjacent *adj;
@@ -4884,7 +4893,7 @@ bool netdev_has_upper_dev(struct net_device *dev,
{
ASSERT_RTNL();
- return __netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper);
+ return __netdev_find_adj(upper_dev, &dev->all_adj_list.upper);
}
EXPORT_SYMBOL(netdev_has_upper_dev);
@@ -5146,7 +5155,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev,
struct netdev_adjacent *adj;
int ret;
- adj = __netdev_find_adj(dev, adj_dev, dev_list);
+ adj = __netdev_find_adj(adj_dev, dev_list);
if (adj) {
adj->ref_nr++;
@@ -5202,7 +5211,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev,
{
struct netdev_adjacent *adj;
- adj = __netdev_find_adj(dev, adj_dev, dev_list);
+ adj = __netdev_find_adj(adj_dev, dev_list);
if (!adj) {
pr_err("tried to remove device %s from %s\n",
@@ -5323,10 +5332,10 @@ static int __netdev_upper_dev_link(struct net_device *dev,
return -EBUSY;
/* To prevent loops, check if dev is not upper device to upper_dev. */
- if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper))
+ if (__netdev_find_adj(dev, &upper_dev->all_adj_list.upper))
return -EBUSY;
- if (__netdev_find_adj(dev, upper_dev, &dev->adj_list.upper))
+ if (__netdev_find_adj(upper_dev, &dev->adj_list.upper))
return -EEXIST;
if (master && netdev_master_upper_dev_get(dev))
@@ -5604,7 +5613,7 @@ void *netdev_lower_dev_get_private(struct net_device *dev,
if (!lower_dev)
return NULL;
- lower = __netdev_find_adj(dev, lower_dev, &dev->adj_list.lower);
+ lower = __netdev_find_adj(lower_dev, &dev->adj_list.lower);
if (!lower)
return NULL;
diff --git a/net/core/filter.c b/net/core/filter.c
index 05a04ea87172..60e3fe7c59c0 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1404,9 +1404,6 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
if (unlikely(!dev))
return -EINVAL;
- if (unlikely(!(dev->flags & IFF_UP)))
- return -EINVAL;
-
skb2 = skb_clone(skb, GFP_ATOMIC);
if (unlikely(!skb2))
return -ENOMEM;
@@ -1427,6 +1424,48 @@ const struct bpf_func_proto bpf_clone_redirect_proto = {
.arg3_type = ARG_ANYTHING,
};
+struct redirect_info {
+ u32 ifindex;
+ u32 flags;
+};
+
+static DEFINE_PER_CPU(struct redirect_info, redirect_info);
+static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5)
+{
+ struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+
+ ri->ifindex = ifindex;
+ ri->flags = flags;
+ return TC_ACT_REDIRECT;
+}
+
+int skb_do_redirect(struct sk_buff *skb)
+{
+ struct redirect_info *ri = this_cpu_ptr(&redirect_info);
+ struct net_device *dev;
+
+ dev = dev_get_by_index_rcu(dev_net(skb->dev), ri->ifindex);
+ ri->ifindex = 0;
+ if (unlikely(!dev)) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ if (BPF_IS_REDIRECT_INGRESS(ri->flags))
+ return dev_forward_skb(dev, skb);
+
+ skb->dev = dev;
+ return dev_queue_xmit(skb);
+}
+
+const struct bpf_func_proto bpf_redirect_proto = {
+ .func = bpf_redirect,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+ .arg2_type = ARG_ANYTHING,
+};
+
static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
{
return task_get_classid((struct sk_buff *) (unsigned long) r1);
@@ -1607,6 +1646,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
return &bpf_skb_get_tunnel_key_proto;
case BPF_FUNC_skb_set_tunnel_key:
return bpf_get_skb_set_tunnel_key_proto();
+ case BPF_FUNC_redirect:
+ return &bpf_redirect_proto;
default:
return sk_filter_func_proto(func_id);
}
@@ -1632,6 +1673,9 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type)
static bool sk_filter_is_valid_access(int off, int size,
enum bpf_access_type type)
{
+ if (off == offsetof(struct __sk_buff, tc_classid))
+ return false;
+
if (type == BPF_WRITE) {
switch (off) {
case offsetof(struct __sk_buff, cb[0]) ...
@@ -1648,6 +1692,9 @@ static bool sk_filter_is_valid_access(int off, int size,
static bool tc_cls_act_is_valid_access(int off, int size,
enum bpf_access_type type)
{
+ if (off == offsetof(struct __sk_buff, tc_classid))
+ return type == BPF_WRITE ? true : false;
+
if (type == BPF_WRITE) {
switch (off) {
case offsetof(struct __sk_buff, mark):
@@ -1760,6 +1807,14 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
break;
+ case offsetof(struct __sk_buff, tc_classid):
+ ctx_off -= offsetof(struct __sk_buff, tc_classid);
+ ctx_off += offsetof(struct sk_buff, cb);
+ ctx_off += offsetof(struct qdisc_skb_cb, tc_classid);
+ WARN_ON(type != BPF_WRITE);
+ *insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg, ctx_off);
+ break;
+
case offsetof(struct __sk_buff, tc_index):
#ifdef CONFIG_NET_SCHED
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 2b515ba7e94f..8c57fdf4d68e 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -2235,14 +2235,42 @@ static void neigh_update_notify(struct neighbour *neigh)
__neigh_notify(neigh, RTM_NEWNEIGH, 0);
}
+static bool neigh_master_filtered(struct net_device *dev, int master_idx)
+{
+ struct net_device *master;
+
+ if (!master_idx)
+ return false;
+
+ master = netdev_master_upper_dev_get(dev);
+ if (!master || master->ifindex != master_idx)
+ return true;
+
+ return false;
+}
+
static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
+ const struct nlmsghdr *nlh = cb->nlh;
+ struct nlattr *tb[NDA_MAX + 1];
struct neighbour *n;
int rc, h, s_h = cb->args[1];
int idx, s_idx = idx = cb->args[2];
struct neigh_hash_table *nht;
+ int filter_master_idx = 0;
+ unsigned int flags = NLM_F_MULTI;
+ int err;
+
+ err = nlmsg_parse(nlh, sizeof(struct ndmsg), tb, NDA_MAX, NULL);
+ if (!err) {
+ if (tb[NDA_MASTER])
+ filter_master_idx = nla_get_u32(tb[NDA_MASTER]);
+
+ if (filter_master_idx)
+ flags |= NLM_F_DUMP_FILTERED;
+ }
rcu_read_lock_bh();
nht = rcu_dereference_bh(tbl->nht);
@@ -2255,12 +2283,14 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
n = rcu_dereference_bh(n->next)) {
if (!net_eq(dev_net(n->dev), net))
continue;
+ if (neigh_master_filtered(n->dev, filter_master_idx))
+ continue;
if (idx < s_idx)
goto next;
if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
RTM_NEWNEIGH,
- NLM_F_MULTI) < 0) {
+ flags) < 0) {
rc = -1;
goto out;
}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 830f8a7c1cb1..410c6e42bf1f 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1003,15 +1003,12 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue,
}
#ifdef CONFIG_XPS
-static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
+static unsigned int get_netdev_queue_index(struct netdev_queue *queue)
{
struct net_device *dev = queue->dev;
- int i;
-
- for (i = 0; i < dev->num_tx_queues; i++)
- if (queue == &dev->_tx[i])
- break;
+ unsigned int i;
+ i = queue - dev->_tx;
BUG_ON(i >= dev->num_tx_queues);
return i;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 8bdada242a7d..94acfc89ad97 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -140,7 +140,7 @@ static void queue_process(struct work_struct *work)
* case. Further, we test the poll_owner to avoid recursion on UP
* systems where the lock doesn't exist.
*/
-static int poll_one_napi(struct napi_struct *napi, int budget)
+static void poll_one_napi(struct napi_struct *napi)
{
int work = 0;
@@ -149,33 +149,33 @@ static int poll_one_napi(struct napi_struct *napi, int budget)
* holding the napi->poll_lock.
*/
if (!test_bit(NAPI_STATE_SCHED, &napi->state))
- return budget;
+ return;
/* If we set this bit but see that it has already been set,
* that indicates that napi has been disabled and we need
* to abort this operation
*/
if (test_and_set_bit(NAPI_STATE_NPSVC, &napi->state))
- goto out;
+ return;
- work = napi->poll(napi, budget);
- WARN_ONCE(work > budget, "%pF exceeded budget in poll\n", napi->poll);
+ /* We explicilty pass the polling call a budget of 0 to
+ * indicate that we are clearing the Tx path only.
+ */
+ work = napi->poll(napi, 0);
+ WARN_ONCE(work, "%pF exceeded budget in poll\n", napi->poll);
trace_napi_poll(napi);
clear_bit(NAPI_STATE_NPSVC, &napi->state);
-
-out:
- return budget - work;
}
-static void poll_napi(struct net_device *dev, int budget)
+static void poll_napi(struct net_device *dev)
{
struct napi_struct *napi;
list_for_each_entry(napi, &dev->napi_list, dev_list) {
if (napi->poll_owner != smp_processor_id() &&
spin_trylock(&napi->poll_lock)) {
- budget = poll_one_napi(napi, budget);
+ poll_one_napi(napi);
spin_unlock(&napi->poll_lock);
}
}
@@ -185,7 +185,6 @@ static void netpoll_poll_dev(struct net_device *dev)
{
const struct net_device_ops *ops;
struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
- int budget = 0;
/* Don't do any rx activity if the dev_lock mutex is held
* the dev_open/close paths use this to block netpoll activity
@@ -208,7 +207,7 @@ static void netpoll_poll_dev(struct net_device *dev)
/* Process pending work on NIC */
ops->ndo_poll_controller(dev);
- poll_napi(dev, budget);
+ poll_napi(dev);
up(&ni->dev_lock);
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index b42f0e26f89e..e22cfa4ed25f 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -59,6 +59,13 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
spin_lock_init(&queue->syn_wait_lock);
+
+ spin_lock_init(&queue->fastopenq.lock);
+ queue->fastopenq.rskq_rst_head = NULL;
+ queue->fastopenq.rskq_rst_tail = NULL;
+ queue->fastopenq.qlen = 0;
+ queue->fastopenq.max_qlen = 0;
+
queue->rskq_accept_head = NULL;
lopt->nr_table_entries = nr_table_entries;
lopt->max_qlen_log = ilog2(nr_table_entries);
@@ -174,7 +181,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
struct sock *lsk = req->rsk_listener;
struct fastopen_queue *fastopenq;
- fastopenq = inet_csk(lsk)->icsk_accept_queue.fastopenq;
+ fastopenq = &inet_csk(lsk)->icsk_accept_queue.fastopenq;
tcp_sk(sk)->fastopen_rsk = NULL;
spin_lock_bh(&fastopenq->lock);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 0ec48403ed68..474a6da3b51a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1272,7 +1272,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
if (!(af = nla_nest_start(skb, af_ops->family)))
goto nla_put_failure;
- err = af_ops->fill_link_af(skb, dev);
+ err = af_ops->fill_link_af(skb, dev, ext_filter_mask);
/*
* Caller may return ENODATA to indicate that there