summaryrefslogtreecommitdiffstats
path: root/net/core/net_namespace.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-02-10 20:01:30 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-10 20:01:30 -0800
commitc5ce28df0e7c01a1de23c36ebdefcd803f2b6cbb (patch)
tree9830baf38832769e1cf621708889111bbe3c93df /net/core/net_namespace.c
parent29afc4e9a408f2304e09c6dd0dbcfbd2356d0faa (diff)
parent9399f0c51489ae8c16d6559b82a452fdc1895e91 (diff)
downloadlinux-c5ce28df0e7c01a1de23c36ebdefcd803f2b6cbb.tar.bz2
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) More iov_iter conversion work from Al Viro. [ The "crypto: switch af_alg_make_sg() to iov_iter" commit was wrong, and this pull actually adds an extra commit on top of the branch I'm pulling to fix that up, so that the pre-merge state is ok. - Linus ] 2) Various optimizations to the ipv4 forwarding information base trie lookup implementation. From Alexander Duyck. 3) Remove sock_iocb altogether, from CHristoph Hellwig. 4) Allow congestion control algorithm selection via routing metrics. From Daniel Borkmann. 5) Make ipv4 uncached route list per-cpu, from Eric Dumazet. 6) Handle rfs hash collisions more gracefully, also from Eric Dumazet. 7) Add xmit_more support to r8169, e1000, and e1000e drivers. From Florian Westphal. 8) Transparent Ethernet Bridging support for GRO, from Jesse Gross. 9) Add BPF packet actions to packet scheduler, from Jiri Pirko. 10) Add support for uniqu flow IDs to openvswitch, from Joe Stringer. 11) New NetCP ethernet driver, from Muralidharan Karicheri and Wingman Kwok. 12) More sanely handle out-of-window dupacks, which can result in serious ACK storms. From Neal Cardwell. 13) Various rhashtable bug fixes and enhancements, from Herbert Xu, Patrick McHardy, and Thomas Graf. 14) Support xmit_more in be2net, from Sathya Perla. 15) Group Policy extensions for vxlan, from Thomas Graf. 16) Remove Checksum Offload support for vxlan, from Tom Herbert. 17) Like ipv4, support lockless transmit over ipv6 UDP sockets. From Vlad Yasevich. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1494+1 commits) crypto: fix af_alg_make_sg() conversion to iov_iter ipv4: Namespecify TCP PMTU mechanism i40e: Fix for stats init function call in Rx setup tcp: don't include Fast Open option in SYN-ACK on pure SYN-data openvswitch: Only set TUNNEL_VXLAN_OPT if VXLAN-GBP metadata is set ipv6: Make __ipv6_select_ident static ipv6: Fix fragment id assignment on LE arches. bridge: Fix inability to add non-vlan fdb entry net: Mellanox: Delete unnecessary checks before the function call "vunmap" cxgb4: Add support in cxgb4 to get expansion rom version via ethtool ethtool: rename reserved1 memeber in ethtool_drvinfo for expansion ROM version net: dsa: Remove redundant phy_attach() IB/mlx4: Reset flow support for IB kernel ULPs IB/mlx4: Always use the correct port for mirrored multicast attachments net/bonding: Fix potential bad memory access during bonding events tipc: remove tipc_snprintf tipc: nl compat add noop and remove legacy nl framework tipc: convert legacy nl stats show to nl compat tipc: convert legacy nl net id get to nl compat tipc: convert legacy nl net id set to nl compat ...
Diffstat (limited to 'net/core/net_namespace.c')
-rw-r--r--net/core/net_namespace.c213
1 files changed, 213 insertions, 0 deletions
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index ce780c722e48..cb5290b8c428 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -15,6 +15,10 @@
#include <linux/file.h>
#include <linux/export.h>
#include <linux/user_namespace.h>
+#include <linux/net_namespace.h>
+#include <linux/rtnetlink.h>
+#include <net/sock.h>
+#include <net/netlink.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@@ -144,6 +148,78 @@ static void ops_free_list(const struct pernet_operations *ops,
}
}
+static int alloc_netid(struct net *net, struct net *peer, int reqid)
+{
+ int min = 0, max = 0;
+
+ ASSERT_RTNL();
+
+ if (reqid >= 0) {
+ min = reqid;
+ max = reqid + 1;
+ }
+
+ return idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL);
+}
+
+/* This function is used by idr_for_each(). If net is equal to peer, the
+ * function returns the id so that idr_for_each() stops. Because we cannot
+ * returns the id 0 (idr_for_each() will not stop), we return the magic value
+ * NET_ID_ZERO (-1) for it.
+ */
+#define NET_ID_ZERO -1
+static int net_eq_idr(int id, void *net, void *peer)
+{
+ if (net_eq(net, peer))
+ return id ? : NET_ID_ZERO;
+ return 0;
+}
+
+static int __peernet2id(struct net *net, struct net *peer, bool alloc)
+{
+ int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);
+
+ ASSERT_RTNL();
+
+ /* Magic value for id 0. */
+ if (id == NET_ID_ZERO)
+ return 0;
+ if (id > 0)
+ return id;
+
+ if (alloc)
+ return alloc_netid(net, peer, -1);
+
+ return -ENOENT;
+}
+
+/* This function returns the id of a peer netns. If no id is assigned, one will
+ * be allocated and returned.
+ */
+int peernet2id(struct net *net, struct net *peer)
+{
+ int id = __peernet2id(net, peer, true);
+
+ return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
+}
+EXPORT_SYMBOL(peernet2id);
+
+struct net *get_net_ns_by_id(struct net *net, int id)
+{
+ struct net *peer;
+
+ if (id < 0)
+ return NULL;
+
+ rcu_read_lock();
+ peer = idr_find(&net->netns_ids, id);
+ if (peer)
+ get_net(peer);
+ rcu_read_unlock();
+
+ return peer;
+}
+
/*
* setup_net runs the initializers for the network namespace object.
*/
@@ -158,6 +234,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
atomic_set(&net->passive, 1);
net->dev_base_seq = 1;
net->user_ns = user_ns;
+ idr_init(&net->netns_ids);
#ifdef NETNS_REFCNT_DEBUG
atomic_set(&net->use_count, 0);
@@ -288,6 +365,14 @@ static void cleanup_net(struct work_struct *work)
list_for_each_entry(net, &net_kill_list, cleanup_list) {
list_del_rcu(&net->list);
list_add_tail(&net->exit_list, &net_exit_list);
+ for_each_net(tmp) {
+ int id = __peernet2id(tmp, net, false);
+
+ if (id >= 0)
+ idr_remove(&tmp->netns_ids, id);
+ }
+ idr_destroy(&net->netns_ids);
+
}
rtnl_unlock();
@@ -361,6 +446,7 @@ struct net *get_net_ns_by_fd(int fd)
return ERR_PTR(-EINVAL);
}
#endif
+EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
struct net *get_net_ns_by_pid(pid_t pid)
{
@@ -402,6 +488,130 @@ static struct pernet_operations __net_initdata net_ns_ops = {
.exit = net_ns_net_exit,
};
+static struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
+ [NETNSA_NONE] = { .type = NLA_UNSPEC },
+ [NETNSA_NSID] = { .type = NLA_S32 },
+ [NETNSA_PID] = { .type = NLA_U32 },
+ [NETNSA_FD] = { .type = NLA_U32 },
+};
+
+static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *tb[NETNSA_MAX + 1];
+ struct net *peer;
+ int nsid, err;
+
+ err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
+ rtnl_net_policy);
+ if (err < 0)
+ return err;
+ if (!tb[NETNSA_NSID])
+ return -EINVAL;
+ nsid = nla_get_s32(tb[NETNSA_NSID]);
+
+ if (tb[NETNSA_PID])
+ peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
+ else if (tb[NETNSA_FD])
+ peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
+ else
+ return -EINVAL;
+ if (IS_ERR(peer))
+ return PTR_ERR(peer);
+
+ if (__peernet2id(net, peer, false) >= 0) {
+ err = -EEXIST;
+ goto out;
+ }
+
+ err = alloc_netid(net, peer, nsid);
+ if (err > 0)
+ err = 0;
+out:
+ put_net(peer);
+ return err;
+}
+
+static int rtnl_net_get_size(void)
+{
+ return NLMSG_ALIGN(sizeof(struct rtgenmsg))
+ + nla_total_size(sizeof(s32)) /* NETNSA_NSID */
+ ;
+}
+
+static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
+ int cmd, struct net *net, struct net *peer)
+{
+ struct nlmsghdr *nlh;
+ struct rtgenmsg *rth;
+ int id;
+
+ ASSERT_RTNL();
+
+ nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ rth = nlmsg_data(nlh);
+ rth->rtgen_family = AF_UNSPEC;
+
+ id = __peernet2id(net, peer, false);
+ if (id < 0)
+ id = NETNSA_NSID_NOT_ASSIGNED;
+ if (nla_put_s32(skb, NETNSA_NSID, id))
+ goto nla_put_failure;
+
+ nlmsg_end(skb, nlh);
+ return 0;
+
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+ return -EMSGSIZE;
+}
+
+static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+ struct net *net = sock_net(skb->sk);
+ struct nlattr *tb[NETNSA_MAX + 1];
+ struct sk_buff *msg;
+ int err = -ENOBUFS;
+ struct net *peer;
+
+ err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
+ rtnl_net_policy);
+ if (err < 0)
+ return err;
+ if (tb[NETNSA_PID])
+ peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
+ else if (tb[NETNSA_FD])
+ peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
+ else
+ return -EINVAL;
+
+ if (IS_ERR(peer))
+ return PTR_ERR(peer);
+
+ msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
+ if (!msg) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
+ RTM_GETNSID, net, peer);
+ if (err < 0)
+ goto err_out;
+
+ err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid);
+ goto out;
+
+err_out:
+ nlmsg_free(msg);
+out:
+ put_net(peer);
+ return err;
+}
+
static int __init net_ns_init(void)
{
struct net_generic *ng;
@@ -435,6 +645,9 @@ static int __init net_ns_init(void)
register_pernet_subsys(&net_ns_ops);
+ rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL);
+ rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, NULL, NULL);
+
return 0;
}