diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/netprio_cgroup.c | 260 |
1 files changed, 113 insertions, 147 deletions
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 79285a36035f..bde53da9cd86 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -27,11 +27,7 @@ #include <linux/fdtable.h> -#define PRIOIDX_SZ 128 - -static unsigned long prioidx_map[PRIOIDX_SZ]; -static DEFINE_SPINLOCK(prioidx_map_lock); -static atomic_t max_prioidx = ATOMIC_INIT(0); +#define PRIOMAP_MIN_SZ 128 static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp) { @@ -39,136 +35,157 @@ static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgr struct cgroup_netprio_state, css); } -static int get_prioidx(u32 *prio) -{ - unsigned long flags; - u32 prioidx; - - spin_lock_irqsave(&prioidx_map_lock, flags); - prioidx = find_first_zero_bit(prioidx_map, sizeof(unsigned long) * PRIOIDX_SZ); - if (prioidx == sizeof(unsigned long) * PRIOIDX_SZ) { - spin_unlock_irqrestore(&prioidx_map_lock, flags); - return -ENOSPC; - } - set_bit(prioidx, prioidx_map); - if (atomic_read(&max_prioidx) < prioidx) - atomic_set(&max_prioidx, prioidx); - spin_unlock_irqrestore(&prioidx_map_lock, flags); - *prio = prioidx; - return 0; -} - -static void put_prioidx(u32 idx) +/* + * Extend @dev->priomap so that it's large enough to accomodate + * @target_idx. @dev->priomap.priomap_len > @target_idx after successful + * return. Must be called under rtnl lock. + */ +static int extend_netdev_table(struct net_device *dev, u32 target_idx) { - unsigned long flags; - - spin_lock_irqsave(&prioidx_map_lock, flags); - clear_bit(idx, prioidx_map); - spin_unlock_irqrestore(&prioidx_map_lock, flags); -} + struct netprio_map *old, *new; + size_t new_sz, new_len; -static int extend_netdev_table(struct net_device *dev, u32 new_len) -{ - size_t new_size = sizeof(struct netprio_map) + - ((sizeof(u32) * new_len)); - struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL); - struct netprio_map *old_priomap; + /* is the existing priomap large enough? */ + old = rtnl_dereference(dev->priomap); + if (old && old->priomap_len > target_idx) + return 0; - old_priomap = rtnl_dereference(dev->priomap); + /* + * Determine the new size. Let's keep it power-of-two. We start + * from PRIOMAP_MIN_SZ and double it until it's large enough to + * accommodate @target_idx. + */ + new_sz = PRIOMAP_MIN_SZ; + while (true) { + new_len = (new_sz - offsetof(struct netprio_map, priomap)) / + sizeof(new->priomap[0]); + if (new_len > target_idx) + break; + new_sz *= 2; + /* overflowed? */ + if (WARN_ON(new_sz < PRIOMAP_MIN_SZ)) + return -ENOSPC; + } - if (!new_priomap) { + /* allocate & copy */ + new = kzalloc(new_sz, GFP_KERNEL); + if (!new) { pr_warn("Unable to alloc new priomap!\n"); return -ENOMEM; } - if (old_priomap) - memcpy(new_priomap->priomap, old_priomap->priomap, - old_priomap->priomap_len * - sizeof(old_priomap->priomap[0])); + if (old) + memcpy(new->priomap, old->priomap, + old->priomap_len * sizeof(old->priomap[0])); - new_priomap->priomap_len = new_len; + new->priomap_len = new_len; - rcu_assign_pointer(dev->priomap, new_priomap); - if (old_priomap) - kfree_rcu(old_priomap, rcu); + /* install the new priomap */ + rcu_assign_pointer(dev->priomap, new); + if (old) + kfree_rcu(old, rcu); return 0; } -static int write_update_netdev_table(struct net_device *dev) +/** + * netprio_prio - return the effective netprio of a cgroup-net_device pair + * @cgrp: cgroup part of the target pair + * @dev: net_device part of the target pair + * + * Should be called under RCU read or rtnl lock. + */ +static u32 netprio_prio(struct cgroup *cgrp, struct net_device *dev) +{ + struct netprio_map *map = rcu_dereference_rtnl(dev->priomap); + + if (map && cgrp->id < map->priomap_len) + return map->priomap[cgrp->id]; + return 0; +} + +/** + * netprio_set_prio - set netprio on a cgroup-net_device pair + * @cgrp: cgroup part of the target pair + * @dev: net_device part of the target pair + * @prio: prio to set + * + * Set netprio to @prio on @cgrp-@dev pair. Should be called under rtnl + * lock and may fail under memory pressure for non-zero @prio. + */ +static int netprio_set_prio(struct cgroup *cgrp, struct net_device *dev, + u32 prio) { - int ret = 0; - u32 max_len; struct netprio_map *map; + int ret; - max_len = atomic_read(&max_prioidx) + 1; + /* avoid extending priomap for zero writes */ map = rtnl_dereference(dev->priomap); - if (!map || map->priomap_len < max_len) - ret = extend_netdev_table(dev, max_len); + if (!prio && (!map || map->priomap_len <= cgrp->id)) + return 0; - return ret; + ret = extend_netdev_table(dev, cgrp->id); + if (ret) + return ret; + + map = rtnl_dereference(dev->priomap); + map->priomap[cgrp->id] = prio; + return 0; } -static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) +static struct cgroup_subsys_state *cgrp_css_alloc(struct cgroup *cgrp) { struct cgroup_netprio_state *cs; - int ret = -EINVAL; cs = kzalloc(sizeof(*cs), GFP_KERNEL); if (!cs) return ERR_PTR(-ENOMEM); - if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) - goto out; - - ret = get_prioidx(&cs->prioidx); - if (ret < 0) { - pr_warn("No space in priority index array\n"); - goto out; - } - return &cs->css; -out: - kfree(cs); - return ERR_PTR(ret); } -static void cgrp_destroy(struct cgroup *cgrp) +static int cgrp_css_online(struct cgroup *cgrp) { - struct cgroup_netprio_state *cs; + struct cgroup *parent = cgrp->parent; struct net_device *dev; - struct netprio_map *map; + int ret = 0; + + if (!parent) + return 0; - cs = cgrp_netprio_state(cgrp); rtnl_lock(); + /* + * Inherit prios from the parent. As all prios are set during + * onlining, there is no need to clear them on offline. + */ for_each_netdev(&init_net, dev) { - map = rtnl_dereference(dev->priomap); - if (map && cs->prioidx < map->priomap_len) - map->priomap[cs->prioidx] = 0; + u32 prio = netprio_prio(parent, dev); + + ret = netprio_set_prio(cgrp, dev, prio); + if (ret) + break; } rtnl_unlock(); - put_prioidx(cs->prioidx); - kfree(cs); + return ret; +} + +static void cgrp_css_free(struct cgroup *cgrp) +{ + kfree(cgrp_netprio_state(cgrp)); } static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft) { - return (u64)cgrp_netprio_state(cgrp)->prioidx; + return cgrp->id; } static int read_priomap(struct cgroup *cont, struct cftype *cft, struct cgroup_map_cb *cb) { struct net_device *dev; - u32 prioidx = cgrp_netprio_state(cont)->prioidx; - u32 priority; - struct netprio_map *map; rcu_read_lock(); - for_each_netdev_rcu(&init_net, dev) { - map = rcu_dereference(dev->priomap); - priority = (map && prioidx < map->priomap_len) ? map->priomap[prioidx] : 0; - cb->fill(cb, dev->name, priority); - } + for_each_netdev_rcu(&init_net, dev) + cb->fill(cb, dev->name, netprio_prio(cont, dev)); rcu_read_unlock(); return 0; } @@ -176,66 +193,24 @@ static int read_priomap(struct cgroup *cont, struct cftype *cft, static int write_priomap(struct cgroup *cgrp, struct cftype *cft, const char *buffer) { - char *devname = kstrdup(buffer, GFP_KERNEL); - int ret = -EINVAL; - u32 prioidx = cgrp_netprio_state(cgrp)->prioidx; - unsigned long priority; - char *priostr; + char devname[IFNAMSIZ + 1]; struct net_device *dev; - struct netprio_map *map; - - if (!devname) - return -ENOMEM; - - /* - * Minimally sized valid priomap string - */ - if (strlen(devname) < 3) - goto out_free_devname; - - priostr = strstr(devname, " "); - if (!priostr) - goto out_free_devname; - - /* - *Separate the devname from the associated priority - *and advance the priostr pointer to the priority value - */ - *priostr = '\0'; - priostr++; - - /* - * If the priostr points to NULL, we're at the end of the passed - * in string, and its not a valid write - */ - if (*priostr == '\0') - goto out_free_devname; - - ret = kstrtoul(priostr, 10, &priority); - if (ret < 0) - goto out_free_devname; + u32 prio; + int ret; - ret = -ENODEV; + if (sscanf(buffer, "%"__stringify(IFNAMSIZ)"s %u", devname, &prio) != 2) + return -EINVAL; dev = dev_get_by_name(&init_net, devname); if (!dev) - goto out_free_devname; + return -ENODEV; rtnl_lock(); - ret = write_update_netdev_table(dev); - if (ret < 0) - goto out_put_dev; - map = rtnl_dereference(dev->priomap); - if (map) - map->priomap[prioidx] = priority; + ret = netprio_set_prio(cgrp, dev, prio); -out_put_dev: rtnl_unlock(); dev_put(dev); - -out_free_devname: - kfree(devname); return ret; } @@ -276,22 +251,13 @@ static struct cftype ss_files[] = { struct cgroup_subsys net_prio_subsys = { .name = "net_prio", - .create = cgrp_create, - .destroy = cgrp_destroy, + .css_alloc = cgrp_css_alloc, + .css_online = cgrp_css_online, + .css_free = cgrp_css_free, .attach = net_prio_attach, .subsys_id = net_prio_subsys_id, .base_cftypes = ss_files, .module = THIS_MODULE, - - /* - * net_prio has artificial limit on the number of cgroups and - * disallows nesting making it impossible to co-mount it with other - * hierarchical subsystems. Remove the artificially low PRIOIDX_SZ - * limit and properly nest configuration such that children follow - * their parents' configurations by default and are allowed to - * override and remove the following. - */ - .broken_hierarchy = true, }; static int netprio_device_event(struct notifier_block *unused, |