diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 236 | ||||
-rw-r--r-- | net/core/ethtool.c | 2 | ||||
-rw-r--r-- | net/core/filter.c | 139 | ||||
-rw-r--r-- | net/core/flow.c | 4 | ||||
-rw-r--r-- | net/core/neighbour.c | 20 | ||||
-rw-r--r-- | net/core/net-sysfs.c | 17 | ||||
-rw-r--r-- | net/core/net_namespace.c | 23 | ||||
-rw-r--r-- | net/core/netpoll.c | 6 | ||||
-rw-r--r-- | net/core/netprio_cgroup.c | 2 | ||||
-rw-r--r-- | net/core/pktgen.c | 47 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 230 | ||||
-rw-r--r-- | net/core/scm.c | 6 | ||||
-rw-r--r-- | net/core/skbuff.c | 34 | ||||
-rw-r--r-- | net/core/sock.c | 84 | ||||
-rw-r--r-- | net/core/sysctl_net_core.c | 5 |
15 files changed, 710 insertions, 145 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index e5942bf45a6d..d0cbc93fcf32 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -176,8 +176,10 @@ #define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1) static DEFINE_SPINLOCK(ptype_lock); +static DEFINE_SPINLOCK(offload_lock); static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly; static struct list_head ptype_all __read_mostly; /* Taps */ +static struct list_head offload_base __read_mostly; /* * The @dev_base_head list is protected by @dev_base_lock and the rtnl @@ -201,6 +203,8 @@ static struct list_head ptype_all __read_mostly; /* Taps */ DEFINE_RWLOCK(dev_base_lock); EXPORT_SYMBOL(dev_base_lock); +DEFINE_SEQLOCK(devnet_rename_seq); + static inline void dev_base_seq_inc(struct net *net) { while (++net->dev_base_seq == 0); @@ -470,6 +474,82 @@ void dev_remove_pack(struct packet_type *pt) } EXPORT_SYMBOL(dev_remove_pack); + +/** + * dev_add_offload - register offload handlers + * @po: protocol offload declaration + * + * Add protocol offload handlers to the networking stack. The passed + * &proto_offload is linked into kernel lists and may not be freed until + * it has been removed from the kernel lists. + * + * This call does not sleep therefore it can not + * guarantee all CPU's that are in middle of receiving packets + * will see the new offload handlers (until the next received packet). + */ +void dev_add_offload(struct packet_offload *po) +{ + struct list_head *head = &offload_base; + + spin_lock(&offload_lock); + list_add_rcu(&po->list, head); + spin_unlock(&offload_lock); +} +EXPORT_SYMBOL(dev_add_offload); + +/** + * __dev_remove_offload - remove offload handler + * @po: packet offload declaration + * + * Remove a protocol offload handler that was previously added to the + * kernel offload handlers by dev_add_offload(). The passed &offload_type + * is removed from the kernel lists and can be freed or reused once this + * function returns. + * + * The packet type might still be in use by receivers + * and must not be freed until after all the CPU's have gone + * through a quiescent state. + */ +void __dev_remove_offload(struct packet_offload *po) +{ + struct list_head *head = &offload_base; + struct packet_offload *po1; + + spin_lock(&offload_lock); + + list_for_each_entry(po1, head, list) { + if (po == po1) { + list_del_rcu(&po->list); + goto out; + } + } + + pr_warn("dev_remove_offload: %p not found\n", po); +out: + spin_unlock(&offload_lock); +} +EXPORT_SYMBOL(__dev_remove_offload); + +/** + * dev_remove_offload - remove packet offload handler + * @po: packet offload declaration + * + * Remove a packet offload handler that was previously added to the kernel + * offload handlers by dev_add_offload(). The passed &offload_type is + * removed from the kernel lists and can be freed or reused once this + * function returns. + * + * This call sleeps to guarantee that no CPU is looking at the packet + * type after return. + */ +void dev_remove_offload(struct packet_offload *po) +{ + __dev_remove_offload(po); + + synchronize_net(); +} +EXPORT_SYMBOL(dev_remove_offload); + /****************************************************************************** Device Boot-time Settings Routines @@ -1013,22 +1093,31 @@ int dev_change_name(struct net_device *dev, const char *newname) if (dev->flags & IFF_UP) return -EBUSY; - if (strncmp(newname, dev->name, IFNAMSIZ) == 0) + write_seqlock(&devnet_rename_seq); + + if (strncmp(newname, dev->name, IFNAMSIZ) == 0) { + write_sequnlock(&devnet_rename_seq); return 0; + } memcpy(oldname, dev->name, IFNAMSIZ); err = dev_get_valid_name(net, dev, newname); - if (err < 0) + if (err < 0) { + write_sequnlock(&devnet_rename_seq); return err; + } rollback: ret = device_rename(&dev->dev, dev->name); if (ret) { memcpy(dev->name, oldname, IFNAMSIZ); + write_sequnlock(&devnet_rename_seq); return ret; } + write_sequnlock(&devnet_rename_seq); + write_lock_bh(&dev_base_lock); hlist_del_rcu(&dev->name_hlist); write_unlock_bh(&dev_base_lock); @@ -1046,6 +1135,7 @@ rollback: /* err >= 0 after dev_alloc_name() or stores the first errno */ if (err >= 0) { err = ret; + write_seqlock(&devnet_rename_seq); memcpy(dev->name, oldname, IFNAMSIZ); goto rollback; } else { @@ -1075,10 +1165,8 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) return -EINVAL; if (!len) { - if (dev->ifalias) { - kfree(dev->ifalias); - dev->ifalias = NULL; - } + kfree(dev->ifalias); + dev->ifalias = NULL; return 0; } @@ -1994,7 +2082,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); - struct packet_type *ptype; + struct packet_offload *ptype; __be16 type = skb->protocol; int vlan_depth = ETH_HLEN; int err; @@ -2023,18 +2111,17 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, } rcu_read_lock(); - list_for_each_entry_rcu(ptype, - &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) { - if (ptype->type == type && !ptype->dev && ptype->gso_segment) { + list_for_each_entry_rcu(ptype, &offload_base, list) { + if (ptype->type == type && ptype->callbacks.gso_segment) { if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { - err = ptype->gso_send_check(skb); + err = ptype->callbacks.gso_send_check(skb); segs = ERR_PTR(err); if (err || skb_gso_ok(skb, features)) break; __skb_push(skb, (skb->data - skb_network_header(skb))); } - segs = ptype->gso_segment(skb, features); + segs = ptype->callbacks.gso_segment(skb, features); break; } } @@ -2237,6 +2324,13 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, skb->vlan_tci = 0; } + /* If encapsulation offload request, verify we are testing + * hardware encapsulation features instead of standard + * features for the netdev + */ + if (skb->encapsulation) + features &= dev->hw_enc_features; + if (netif_needs_gso(skb, features)) { if (unlikely(dev_gso_segment(skb, features))) goto out_kfree_skb; @@ -2252,8 +2346,12 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, * checksumming here. */ if (skb->ip_summed == CHECKSUM_PARTIAL) { - skb_set_transport_header(skb, - skb_checksum_start_offset(skb)); + if (skb->encapsulation) + skb_set_inner_transport_header(skb, + skb_checksum_start_offset(skb)); + else + skb_set_transport_header(skb, + skb_checksum_start_offset(skb)); if (!(features & NETIF_F_ALL_CSUM) && skb_checksum_help(skb)) goto out_kfree_skb; @@ -3446,9 +3544,9 @@ static void flush_backlog(void *arg) static int napi_gro_complete(struct sk_buff *skb) { - struct packet_type *ptype; + struct packet_offload *ptype; __be16 type = skb->protocol; - struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; + struct list_head *head = &offload_base; int err = -ENOENT; BUILD_BUG_ON(sizeof(struct napi_gro_cb) > sizeof(skb->cb)); @@ -3460,10 +3558,10 @@ static int napi_gro_complete(struct sk_buff *skb) rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { - if (ptype->type != type || ptype->dev || !ptype->gro_complete) + if (ptype->type != type || !ptype->callbacks.gro_complete) continue; - err = ptype->gro_complete(skb); + err = ptype->callbacks.gro_complete(skb); break; } rcu_read_unlock(); @@ -3507,12 +3605,34 @@ void napi_gro_flush(struct napi_struct *napi, bool flush_old) } EXPORT_SYMBOL(napi_gro_flush); -enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) +static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) +{ + struct sk_buff *p; + unsigned int maclen = skb->dev->hard_header_len; + + for (p = napi->gro_list; p; p = p->next) { + unsigned long diffs; + + diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; + diffs |= p->vlan_tci ^ skb->vlan_tci; + if (maclen == ETH_HLEN) + diffs |= compare_ether_header(skb_mac_header(p), + skb_gro_mac_header(skb)); + else if (!diffs) + diffs = memcmp(skb_mac_header(p), + skb_gro_mac_header(skb), + maclen); + NAPI_GRO_CB(p)->same_flow = !diffs; + NAPI_GRO_CB(p)->flush = 0; + } +} + +static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { struct sk_buff **pp = NULL; - struct packet_type *ptype; + struct packet_offload *ptype; __be16 type = skb->protocol; - struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK]; + struct list_head *head = &offload_base; int same_flow; int mac_len; enum gro_result ret; @@ -3523,9 +3643,11 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) if (skb_is_gso(skb) || skb_has_frag_list(skb)) goto normal; + gro_list_prepare(napi, skb); + rcu_read_lock(); list_for_each_entry_rcu(ptype, head, list) { - if (ptype->type != type || ptype->dev || !ptype->gro_receive) + if (ptype->type != type || !ptype->callbacks.gro_receive) continue; skb_set_network_header(skb, skb_gro_offset(skb)); @@ -3535,7 +3657,7 @@ enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) NAPI_GRO_CB(skb)->flush = 0; NAPI_GRO_CB(skb)->free = 0; - pp = ptype->gro_receive(&napi->gro_list, skb); + pp = ptype->callbacks.gro_receive(&napi->gro_list, skb); break; } rcu_read_unlock(); @@ -3598,34 +3720,9 @@ normal: ret = GRO_NORMAL; goto pull; } -EXPORT_SYMBOL(dev_gro_receive); - -static inline gro_result_t -__napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) -{ - struct sk_buff *p; - unsigned int maclen = skb->dev->hard_header_len; - - for (p = napi->gro_list; p; p = p->next) { - unsigned long diffs; - - diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; - diffs |= p->vlan_tci ^ skb->vlan_tci; - if (maclen == ETH_HLEN) - diffs |= compare_ether_header(skb_mac_header(p), - skb_gro_mac_header(skb)); - else if (!diffs) - diffs = memcmp(skb_mac_header(p), - skb_gro_mac_header(skb), - maclen); - NAPI_GRO_CB(p)->same_flow = !diffs; - NAPI_GRO_CB(p)->flush = 0; - } - return dev_gro_receive(napi, skb); -} -gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) +static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) { switch (ret) { case GRO_NORMAL: @@ -3651,7 +3748,6 @@ gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb) return ret; } -EXPORT_SYMBOL(napi_skb_finish); static void skb_gro_reset_offset(struct sk_buff *skb) { @@ -3674,7 +3770,7 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb) { skb_gro_reset_offset(skb); - return napi_skb_finish(__napi_gro_receive(napi, skb), skb); + return napi_skb_finish(dev_gro_receive(napi, skb), skb); } EXPORT_SYMBOL(napi_gro_receive); @@ -3703,7 +3799,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi) } EXPORT_SYMBOL(napi_get_frags); -gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, +static gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, gro_result_t ret) { switch (ret) { @@ -3728,7 +3824,6 @@ gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, return ret; } -EXPORT_SYMBOL(napi_frags_finish); static struct sk_buff *napi_frags_skb(struct napi_struct *napi) { @@ -3773,7 +3868,7 @@ gro_result_t napi_gro_frags(struct napi_struct *napi) if (!skb) return GRO_DROP; - return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb)); + return napi_frags_finish(napi, skb, dev_gro_receive(napi, skb)); } EXPORT_SYMBOL(napi_gro_frags); @@ -4075,6 +4170,7 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg) { struct net_device *dev; struct ifreq ifr; + unsigned seq; /* * Fetch the caller's info block. @@ -4083,6 +4179,8 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg) if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) return -EFAULT; +retry: + seq = read_seqbegin(&devnet_rename_seq); rcu_read_lock(); dev = dev_get_by_index_rcu(net, ifr.ifr_ifindex); if (!dev) { @@ -4092,6 +4190,8 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg) strcpy(ifr.ifr_name, dev->name); rcu_read_unlock(); + if (read_seqretry(&devnet_rename_seq, seq)) + goto retry; if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) return -EFAULT; @@ -4884,7 +4984,7 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) else dev->mtu = new_mtu; - if (!err && dev->flags & IFF_UP) + if (!err) call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); return err; } @@ -5204,7 +5304,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSIFNAME: - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; dev_load(net, ifr.ifr_name); rtnl_lock(); @@ -5225,16 +5325,25 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) * - require strict serialization. * - do not return a value */ + case SIOCSIFMAP: + case SIOCSIFTXQLEN: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + /* fall through */ + /* + * These ioctl calls: + * - require local superuser power. + * - require strict serialization. + * - do not return a value + */ case SIOCSIFFLAGS: case SIOCSIFMETRIC: case SIOCSIFMTU: - case SIOCSIFMAP: case SIOCSIFHWADDR: case SIOCSIFSLAVE: case SIOCADDMULTI: case SIOCDELMULTI: case SIOCSIFHWBROADCAST: - case SIOCSIFTXQLEN: case SIOCSMIIREG: case SIOCBONDENSLAVE: case SIOCBONDRELEASE: @@ -5243,7 +5352,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCBRADDIF: case SIOCBRDELIF: case SIOCSHWTSTAMP: - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; /* fall through */ case SIOCBONDSLAVEINFOQUERY: @@ -6268,7 +6377,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char goto out; /* Ensure the device has been registrered */ - err = -EINVAL; if (dev->reg_state != NETREG_REGISTERED) goto out; @@ -6323,6 +6431,9 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char dev_uc_flush(dev); dev_mc_flush(dev); + /* Send a netdev-removed uevent to the old namespace */ + kobject_uevent(&dev->dev.kobj, KOBJ_REMOVE); + /* Actually switch the network namespace */ dev_net_set(dev, net); @@ -6334,6 +6445,9 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char dev->iflink = dev->ifindex; } + /* Send a netdev-add uevent to the new namespace */ + kobject_uevent(&dev->dev.kobj, KOBJ_ADD); + /* Fixup kobjects */ err = device_rename(&dev->dev, dev->name); WARN_ON(err); @@ -6666,6 +6780,8 @@ static int __init net_dev_init(void) for (i = 0; i < PTYPE_HASH_SIZE; i++) INIT_LIST_HEAD(&ptype_base[i]); + INIT_LIST_HEAD(&offload_base); + if (register_pernet_subsys(&netdev_net_ops)) goto out; diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 4d64cc2e3fa9..a8705432e4b1 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1460,7 +1460,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GEEE: break; default: - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; } diff --git a/net/core/filter.c b/net/core/filter.c index 3d92ebb7fbcf..c23543cba132 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -39,6 +39,7 @@ #include <linux/reciprocal_div.h> #include <linux/ratelimit.h> #include <linux/seccomp.h> +#include <linux/if_vlan.h> /* No hurry in this branch * @@ -341,6 +342,12 @@ load_b: case BPF_S_ANC_CPU: A = raw_smp_processor_id(); continue; + case BPF_S_ANC_VLAN_TAG: + A = vlan_tx_tag_get(skb); + continue; + case BPF_S_ANC_VLAN_TAG_PRESENT: + A = !!vlan_tx_tag_present(skb); + continue; case BPF_S_ANC_NLATTR: { struct nlattr *nla; @@ -600,6 +607,8 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) ANCILLARY(RXHASH); ANCILLARY(CPU); ANCILLARY(ALU_XOR_X); + ANCILLARY(VLAN_TAG); + ANCILLARY(VLAN_TAG_PRESENT); } } ftest->code = code; @@ -751,3 +760,133 @@ int sk_detach_filter(struct sock *sk) return ret; } EXPORT_SYMBOL_GPL(sk_detach_filter); + +static void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) +{ + static const u16 decodes[] = { + [BPF_S_ALU_ADD_K] = BPF_ALU|BPF_ADD|BPF_K, + [BPF_S_ALU_ADD_X] = BPF_ALU|BPF_ADD|BPF_X, + [BPF_S_ALU_SUB_K] = BPF_ALU|BPF_SUB|BPF_K, + [BPF_S_ALU_SUB_X] = BPF_ALU|BPF_SUB|BPF_X, + [BPF_S_ALU_MUL_K] = BPF_ALU|BPF_MUL|BPF_K, + [BPF_S_ALU_MUL_X] = BPF_ALU|BPF_MUL|BPF_X, + [BPF_S_ALU_DIV_X] = BPF_ALU|BPF_DIV|BPF_X, + [BPF_S_ALU_MOD_K] = BPF_ALU|BPF_MOD|BPF_K, + [BPF_S_ALU_MOD_X] = BPF_ALU|BPF_MOD|BPF_X, + [BPF_S_ALU_AND_K] = BPF_ALU|BPF_AND|BPF_K, + [BPF_S_ALU_AND_X] = BPF_ALU|BPF_AND|BPF_X, + [BPF_S_ALU_OR_K] = BPF_ALU|BPF_OR|BPF_K, + [BPF_S_ALU_OR_X] = BPF_ALU|BPF_OR|BPF_X, + [BPF_S_ALU_XOR_K] = BPF_ALU|BPF_XOR|BPF_K, + [BPF_S_ALU_XOR_X] = BPF_ALU|BPF_XOR|BPF_X, + [BPF_S_ALU_LSH_K] = BPF_ALU|BPF_LSH|BPF_K, + [BPF_S_ALU_LSH_X] = BPF_ALU|BPF_LSH|BPF_X, + [BPF_S_ALU_RSH_K] = BPF_ALU|BPF_RSH|BPF_K, + [BPF_S_ALU_RSH_X] = BPF_ALU|BPF_RSH|BPF_X, + [BPF_S_ALU_NEG] = BPF_ALU|BPF_NEG, + [BPF_S_LD_W_ABS] = BPF_LD|BPF_W|BPF_ABS, + [BPF_S_LD_H_ABS] = BPF_LD|BPF_H|BPF_ABS, + [BPF_S_LD_B_ABS] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_PROTOCOL] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_PKTTYPE] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_IFINDEX] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_NLATTR] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_NLATTR_NEST] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_MARK] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_QUEUE] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_HATYPE] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_RXHASH] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_CPU] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_ALU_XOR_X] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_SECCOMP_LD_W] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_VLAN_TAG] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_ANC_VLAN_TAG_PRESENT] = BPF_LD|BPF_B|BPF_ABS, + [BPF_S_LD_W_LEN] = BPF_LD|BPF_W|BPF_LEN, + [BPF_S_LD_W_IND] = BPF_LD|BPF_W|BPF_IND, + [BPF_S_LD_H_IND] = BPF_LD|BPF_H|BPF_IND, + [BPF_S_LD_B_IND] = BPF_LD|BPF_B|BPF_IND, + [BPF_S_LD_IMM] = BPF_LD|BPF_IMM, + [BPF_S_LDX_W_LEN] = BPF_LDX|BPF_W|BPF_LEN, + [BPF_S_LDX_B_MSH] = BPF_LDX|BPF_B|BPF_MSH, + [BPF_S_LDX_IMM] = BPF_LDX|BPF_IMM, + [BPF_S_MISC_TAX] = BPF_MISC|BPF_TAX, + [BPF_S_MISC_TXA] = BPF_MISC|BPF_TXA, + [BPF_S_RET_K] = BPF_RET|BPF_K, + [BPF_S_RET_A] = BPF_RET|BPF_A, + [BPF_S_ALU_DIV_K] = BPF_ALU|BPF_DIV|BPF_K, + [BPF_S_LD_MEM] = BPF_LD|BPF_MEM, + [BPF_S_LDX_MEM] = BPF_LDX|BPF_MEM, + [BPF_S_ST] = BPF_ST, + [BPF_S_STX] = BPF_STX, + [BPF_S_JMP_JA] = BPF_JMP|BPF_JA, + [BPF_S_JMP_JEQ_K] = BPF_JMP|BPF_JEQ|BPF_K, + [BPF_S_JMP_JEQ_X] = BPF_JMP|BPF_JEQ|BPF_X, + [BPF_S_JMP_JGE_K] = BPF_JMP|BPF_JGE|BPF_K, + [BPF_S_JMP_JGE_X] = BPF_JMP|BPF_JGE|BPF_X, + [BPF_S_JMP_JGT_K] = BPF_JMP|BPF_JGT|BPF_K, + [BPF_S_JMP_JGT_X] = BPF_JMP|BPF_JGT|BPF_X, + [BPF_S_JMP_JSET_K] = BPF_JMP|BPF_JSET|BPF_K, + [BPF_S_JMP_JSET_X] = BPF_JMP|BPF_JSET|BPF_X, + }; + u16 code; + + code = filt->code; + + to->code = decodes[code]; + to->jt = filt->jt; + to->jf = filt->jf; + + if (code == BPF_S_ALU_DIV_K) { + /* + * When loaded this rule user gave us X, which was + * translated into R = r(X). Now we calculate the + * RR = r(R) and report it back. If next time this + * value is loaded and RRR = r(RR) is calculated + * then the R == RRR will be true. + * + * One exception. X == 1 translates into R == 0 and + * we can't calculate RR out of it with r(). + */ + + if (filt->k == 0) + to->k = 1; + else + to->k = reciprocal_value(filt->k); + + BUG_ON(reciprocal_value(to->k) != filt->k); + } else + to->k = filt->k; +} + +int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) +{ + struct sk_filter *filter; + int i, ret; + + lock_sock(sk); + filter = rcu_dereference_protected(sk->sk_filter, + sock_owned_by_user(sk)); + ret = 0; + if (!filter) + goto out; + ret = filter->len; + if (!len) + goto out; + ret = -EINVAL; + if (len < filter->len) + goto out; + + ret = -EFAULT; + for (i = 0; i < filter->len; i++) { + struct sock_filter fb; + + sk_decode_filter(&filter->insns[i], &fb); + if (copy_to_user(&ubuf[i], &fb, sizeof(fb))) + goto out; + } + + ret = filter->len; +out: + release_sock(sk); + return ret; +} diff --git a/net/core/flow.c b/net/core/flow.c index e318c7e98042..b0901ee5a002 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -327,11 +327,9 @@ static void flow_cache_flush_tasklet(unsigned long data) static void flow_cache_flush_per_cpu(void *data) { struct flow_flush_info *info = data; - int cpu; struct tasklet_struct *tasklet; - cpu = smp_processor_id(); - tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet; + tasklet = this_cpu_ptr(&info->cache->percpu->flush_tasklet); tasklet->data = (unsigned long)info; tasklet_schedule(tasklet); } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 22571488730a..c815f285e5ab 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1787,8 +1787,7 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes) || /* approximative value for deprecated QUEUE_LEN (in packets) */ nla_put_u32(skb, NDTPA_QUEUE_LEN, - DIV_ROUND_UP(parms->queue_len_bytes, - SKB_TRUESIZE(ETH_FRAME_LEN))) || + parms->queue_len_bytes / SKB_TRUESIZE(ETH_FRAME_LEN)) || nla_put_u32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen) || nla_put_u32(skb, NDTPA_APP_PROBES, parms->app_probes) || nla_put_u32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes) || @@ -2770,6 +2769,8 @@ EXPORT_SYMBOL(neigh_app_ns); #endif /* CONFIG_ARPD */ #ifdef CONFIG_SYSCTL +static int zero; +static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -2777,9 +2778,13 @@ static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer, int size, ret; ctl_table tmp = *ctl; + tmp.extra1 = &zero; + tmp.extra2 = &unres_qlen_max; tmp.data = &size; - size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN)); - ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); + + size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN); + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + if (write && !ret) *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN); return ret; @@ -2865,7 +2870,8 @@ static struct neigh_sysctl_table { .procname = "unres_qlen_bytes", .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .extra1 = &zero, + .proc_handler = proc_dointvec_minmax, }, [NEIGH_VAR_PROXY_QLEN] = { .procname = "proxy_qlen", @@ -2987,6 +2993,10 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev; } + /* Don't export sysctls to unprivileged users */ + if (neigh_parms_net(p)->user_ns != &init_user_ns) + t->neigh_vars[0].procname = NULL; + snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s", p_name, dev_name_source); t->sysctl_header = diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 017a8bacfb27..334efd5d67a9 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -18,11 +18,9 @@ #include <net/sock.h> #include <net/net_namespace.h> #include <linux/rtnetlink.h> -#include <linux/wireless.h> #include <linux/vmalloc.h> #include <linux/export.h> #include <linux/jiffies.h> -#include <net/wext.h> #include "net-sysfs.h" @@ -73,11 +71,12 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len, int (*set)(struct net_device *, unsigned long)) { - struct net_device *net = to_net_dev(dev); + struct net_device *netdev = to_net_dev(dev); + struct net *net = dev_net(netdev); unsigned long new; int ret = -EINVAL; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; ret = kstrtoul(buf, 0, &new); @@ -87,8 +86,8 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, if (!rtnl_trylock()) return restart_syscall(); - if (dev_isalive(net)) { - if ((ret = (*set)(net, new)) == 0) + if (dev_isalive(netdev)) { + if ((ret = (*set)(netdev, new)) == 0) ret = len; } rtnl_unlock(); @@ -264,6 +263,9 @@ static ssize_t store_tx_queue_len(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + return netdev_store(dev, attr, buf, len, change_tx_queue_len); } @@ -271,10 +273,11 @@ static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { struct net_device *netdev = to_net_dev(dev); + struct net *net = dev_net(netdev); size_t count = len; ssize_t ret; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; /* ignore trailing newline */ diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 42f1e1c7514f..6456439cbbd9 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -13,6 +13,7 @@ #include <linux/proc_fs.h> #include <linux/file.h> #include <linux/export.h> +#include <linux/user_namespace.h> #include <net/net_namespace.h> #include <net/netns/generic.h> @@ -145,7 +146,7 @@ static void ops_free_list(const struct pernet_operations *ops, /* * setup_net runs the initializers for the network namespace object. */ -static __net_init int setup_net(struct net *net) +static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) { /* Must be called with net_mutex held */ const struct pernet_operations *ops, *saved_ops; @@ -155,6 +156,7 @@ static __net_init int setup_net(struct net *net) atomic_set(&net->count, 1); atomic_set(&net->passive, 1); net->dev_base_seq = 1; + net->user_ns = user_ns; #ifdef NETNS_REFCNT_DEBUG atomic_set(&net->use_count, 0); @@ -232,7 +234,8 @@ void net_drop_ns(void *p) net_free(ns); } -struct net *copy_net_ns(unsigned long flags, struct net *old_net) +struct net *copy_net_ns(unsigned long flags, + struct user_namespace *user_ns, struct net *old_net) { struct net *net; int rv; @@ -243,8 +246,11 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) net = net_alloc(); if (!net) return ERR_PTR(-ENOMEM); + + get_user_ns(user_ns); + mutex_lock(&net_mutex); - rv = setup_net(net); + rv = setup_net(net, user_ns); if (rv == 0) { rtnl_lock(); list_add_tail_rcu(&net->list, &net_namespace_list); @@ -252,6 +258,7 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net) } mutex_unlock(&net_mutex); if (rv < 0) { + put_user_ns(user_ns); net_drop_ns(net); return ERR_PTR(rv); } @@ -308,6 +315,7 @@ static void cleanup_net(struct work_struct *work) /* Finally it is safe to free my network namespace structure */ list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { list_del_init(&net->exit_list); + put_user_ns(net->user_ns); net_drop_ns(net); } } @@ -347,13 +355,6 @@ struct net *get_net_ns_by_fd(int fd) } #else -struct net *copy_net_ns(unsigned long flags, struct net *old_net) -{ - if (flags & CLONE_NEWNET) - return ERR_PTR(-EINVAL); - return old_net; -} - struct net *get_net_ns_by_fd(int fd) { return ERR_PTR(-EINVAL); @@ -402,7 +403,7 @@ static int __init net_ns_init(void) rcu_assign_pointer(init_net.gen, ng); mutex_lock(&net_mutex); - if (setup_net(&init_net)) + if (setup_net(&init_net, &init_user_ns)) panic("Could not setup the initial network namespace"); rtnl_lock(); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 77a0388fc3be..3151acf5ec13 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -674,7 +674,8 @@ int netpoll_parse_options(struct netpoll *np, char *opt) if ((delim = strchr(cur, '@')) == NULL) goto parse_failed; *delim = 0; - np->local_port = simple_strtol(cur, NULL, 10); + if (kstrtou16(cur, 10, &np->local_port)) + goto parse_failed; cur = delim; } cur++; @@ -705,7 +706,8 @@ int netpoll_parse_options(struct netpoll *np, char *opt) *delim = 0; if (*cur == ' ' || *cur == '\t') np_info(np, "warning: whitespace is not allowed\n"); - np->remote_port = simple_strtol(cur, NULL, 10); + if (kstrtou16(cur, 10, &np->remote_port)) + goto parse_failed; cur = delim; } cur++; diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index bde53da9cd86..5e67defe2cb0 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -223,7 +223,7 @@ static int update_netprio(const void *v, struct file *file, unsigned n) return 0; } -void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) +static void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) { struct task_struct *p; void *v; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index d1dc14c2aac4..b29dacf900f9 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -419,20 +419,6 @@ struct pktgen_thread { #define REMOVE 1 #define FIND 0 -static inline ktime_t ktime_now(void) -{ - struct timespec ts; - ktime_get_ts(&ts); - - return timespec_to_ktime(ts); -} - -/* This works even if 32 bit because of careful byte order choice */ -static inline int ktime_lt(const ktime_t cmp1, const ktime_t cmp2) -{ - return cmp1.tv64 < cmp2.tv64; -} - static const char version[] = "Packet Generator for packet performance testing. " "Version: " VERSION "\n"; @@ -675,7 +661,7 @@ static int pktgen_if_show(struct seq_file *seq, void *v) seq_puts(seq, "\n"); /* not really stopped, more like last-running-at */ - stopped = pkt_dev->running ? ktime_now() : pkt_dev->stopped_at; + stopped = pkt_dev->running ? ktime_get() : pkt_dev->stopped_at; idle = pkt_dev->idle_acc; do_div(idle, NSEC_PER_USEC); @@ -2141,12 +2127,12 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) return; } - start_time = ktime_now(); + start_time = ktime_get(); if (remaining < 100000) { /* for small delays (<100us), just loop until limit is reached */ do { - end_time = ktime_now(); - } while (ktime_lt(end_time, spin_until)); + end_time = ktime_get(); + } while (ktime_compare(end_time, spin_until) < 0); } else { /* see do_nanosleep */ hrtimer_init_sleeper(&t, current); @@ -2162,7 +2148,7 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) hrtimer_cancel(&t.timer); } while (t.task && pkt_dev->running && !signal_pending(current)); __set_current_state(TASK_RUNNING); - end_time = ktime_now(); + end_time = ktime_get(); } pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time)); @@ -2427,11 +2413,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev) } } else { /* IPV6 * */ - if (pkt_dev->min_in6_daddr.s6_addr32[0] == 0 && - pkt_dev->min_in6_daddr.s6_addr32[1] == 0 && - pkt_dev->min_in6_daddr.s6_addr32[2] == 0 && - pkt_dev->min_in6_daddr.s6_addr32[3] == 0) ; - else { + if (!ipv6_addr_any(&pkt_dev->min_in6_daddr)) { int i; /* Only random destinations yet */ @@ -2916,8 +2898,7 @@ static void pktgen_run(struct pktgen_thread *t) pktgen_clear_counters(pkt_dev); pkt_dev->running = 1; /* Cranke yeself! */ pkt_dev->skb = NULL; - pkt_dev->started_at = - pkt_dev->next_tx = ktime_now(); + pkt_dev->started_at = pkt_dev->next_tx = ktime_get(); set_pkt_overhead(pkt_dev); @@ -3076,7 +3057,7 @@ static int pktgen_stop_device(struct pktgen_dev *pkt_dev) kfree_skb(pkt_dev->skb); pkt_dev->skb = NULL; - pkt_dev->stopped_at = ktime_now(); + pkt_dev->stopped_at = ktime_get(); pkt_dev->running = 0; show_results(pkt_dev, nr_frags); @@ -3095,7 +3076,7 @@ static struct pktgen_dev *next_to_run(struct pktgen_thread *t) continue; if (best == NULL) best = pkt_dev; - else if (ktime_lt(pkt_dev->next_tx, best->next_tx)) + else if (ktime_compare(pkt_dev->next_tx, best->next_tx) < 0) best = pkt_dev; } if_unlock(t); @@ -3180,14 +3161,14 @@ static void pktgen_rem_thread(struct pktgen_thread *t) static void pktgen_resched(struct pktgen_dev *pkt_dev) { - ktime_t idle_start = ktime_now(); + ktime_t idle_start = ktime_get(); schedule(); - pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start)); + pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_get(), idle_start)); } static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) { - ktime_t idle_start = ktime_now(); + ktime_t idle_start = ktime_get(); while (atomic_read(&(pkt_dev->skb->users)) != 1) { if (signal_pending(current)) @@ -3198,7 +3179,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) else cpu_relax(); } - pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_now(), idle_start)); + pkt_dev->idle_acc += ktime_to_ns(ktime_sub(ktime_get(), idle_start)); } static void pktgen_xmit(struct pktgen_dev *pkt_dev) @@ -3220,7 +3201,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev) * "never transmit" */ if (unlikely(pkt_dev->delay == ULLONG_MAX)) { - pkt_dev->next_tx = ktime_add_ns(ktime_now(), ULONG_MAX); + pkt_dev->next_tx = ktime_add_ns(ktime_get(), ULONG_MAX); return; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index fad649ae4dec..1868625af25e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -128,7 +128,7 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex) if (tab == NULL || tab[msgindex].doit == NULL) tab = rtnl_msg_handlers[PF_UNSPEC]; - return tab ? tab[msgindex].doit : NULL; + return tab[msgindex].doit; } static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) @@ -143,7 +143,7 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) if (tab == NULL || tab[msgindex].dumpit == NULL) tab = rtnl_msg_handlers[PF_UNSPEC]; - return tab ? tab[msgindex].dumpit : NULL; + return tab[msgindex].dumpit; } static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex) @@ -158,7 +158,7 @@ static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex) if (tab == NULL || tab[msgindex].calcit == NULL) tab = rtnl_msg_handlers[PF_UNSPEC]; - return tab ? tab[msgindex].calcit : NULL; + return tab[msgindex].calcit; } /** @@ -1316,6 +1316,10 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, err = PTR_ERR(net); goto errout; } + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) { + err = -EPERM; + goto errout; + } err = dev_change_net_namespace(dev, net, ifname); put_net(net); if (err) @@ -1638,7 +1642,7 @@ int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm) } EXPORT_SYMBOL(rtnl_configure_link); -struct net_device *rtnl_create_link(struct net *src_net, struct net *net, +struct net_device *rtnl_create_link(struct net *net, char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[]) { int err; @@ -1836,7 +1840,7 @@ replay: if (IS_ERR(dest_net)) return PTR_ERR(dest_net); - dev = rtnl_create_link(net, dest_net, ifname, ops, tb); + dev = rtnl_create_link(dest_net, ifname, ops, tb); if (IS_ERR(dev)) { err = PTR_ERR(dev); goto out; @@ -2057,6 +2061,9 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) u8 *addr; int err; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); if (err < 0) return err; @@ -2123,6 +2130,9 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) int err = -EINVAL; __u8 *addr; + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (nlmsg_len(nlh) < sizeof(*ndm)) return -EINVAL; @@ -2253,6 +2263,211 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, u16 mode) +{ + struct nlmsghdr *nlh; + struct ifinfomsg *ifm; + struct nlattr *br_afspec; + u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; + + nlh = nlmsg_put(skb, pid, seq, RTM_NEWLINK, sizeof(*ifm), NLM_F_MULTI); + if (nlh == NULL) + return -EMSGSIZE; + + ifm = nlmsg_data(nlh); + ifm->ifi_family = AF_BRIDGE; + ifm->__ifi_pad = 0; + ifm->ifi_type = dev->type; + ifm->ifi_index = dev->ifindex; + ifm->ifi_flags = dev_get_flags(dev); + ifm->ifi_change = 0; + + + if (nla_put_string(skb, IFLA_IFNAME, dev->name) || + nla_put_u32(skb, IFLA_MTU, dev->mtu) || + nla_put_u8(skb, IFLA_OPERSTATE, operstate) || + (dev->master && + nla_put_u32(skb, IFLA_MASTER, dev->master->ifindex)) || + (dev->addr_len && + nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || + (dev->ifindex != dev->iflink && + nla_put_u32(skb, IFLA_LINK, dev->iflink))) + goto nla_put_failure; + + br_afspec = nla_nest_start(skb, IFLA_AF_SPEC); + if (!br_afspec) + goto nla_put_failure; + + if (nla_put_u16(skb, IFLA_BRIDGE_FLAGS, BRIDGE_FLAGS_SELF) || + nla_put_u16(skb, IFLA_BRIDGE_MODE, mode)) { + nla_nest_cancel(skb, br_afspec); + goto nla_put_failure; + } + nla_nest_end(skb, br_afspec); + + return nlmsg_end(skb, nlh); +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} +EXPORT_SYMBOL(ndo_dflt_bridge_getlink); + +static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + struct net_device *dev; + int idx = 0; + u32 portid = NETLINK_CB(cb->skb).portid; + u32 seq = cb->nlh->nlmsg_seq; + + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { + const struct net_device_ops *ops = dev->netdev_ops; + struct net_device *master = dev->master; + + if (master && master->netdev_ops->ndo_bridge_getlink) { + if (idx >= cb->args[0] && + master->netdev_ops->ndo_bridge_getlink( + skb, portid, seq, dev) < 0) + break; + idx++; + } + + if (ops->ndo_bridge_getlink) { + if (idx >= cb->args[0] && + ops->ndo_bridge_getlink(skb, portid, seq, dev) < 0) + break; + idx++; + } + } + rcu_read_unlock(); + cb->args[0] = idx; + + return skb->len; +} + +static inline size_t bridge_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + + nla_total_size(sizeof(u32)) /* IFLA_MASTER */ + + nla_total_size(sizeof(u32)) /* IFLA_MTU */ + + nla_total_size(sizeof(u32)) /* IFLA_LINK */ + + nla_total_size(sizeof(u32)) /* IFLA_OPERSTATE */ + + nla_total_size(sizeof(u8)) /* IFLA_PROTINFO */ + + nla_total_size(sizeof(struct nlattr)) /* IFLA_AF_SPEC */ + + nla_total_size(sizeof(u16)) /* IFLA_BRIDGE_FLAGS */ + + nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_MODE */ +} + +static int rtnl_bridge_notify(struct net_device *dev, u16 flags) +{ + struct net *net = dev_net(dev); + struct net_device *master = dev->master; + struct sk_buff *skb; + int err = -EOPNOTSUPP; + + skb = nlmsg_new(bridge_nlmsg_size(), GFP_ATOMIC); + if (!skb) { + err = -ENOMEM; + goto errout; + } + + if ((!flags || (flags & BRIDGE_FLAGS_MASTER)) && + master && master->netdev_ops->ndo_bridge_getlink) { + err = master->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + if (err < 0) + goto errout; + } + + if ((flags & BRIDGE_FLAGS_SELF) && + dev->netdev_ops->ndo_bridge_getlink) { + err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + if (err < 0) + goto errout; + } + + rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + return 0; +errout: + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + rtnl_set_sk_err(net, RTNLGRP_LINK, err); + return err; +} + +static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, + void *arg) +{ + struct net *net = sock_net(skb->sk); + struct ifinfomsg *ifm; + struct net_device *dev; + struct nlattr *br_spec, *attr = NULL; + int rem, err = -EOPNOTSUPP; + u16 oflags, flags = 0; + bool have_flags = false; + + if (nlmsg_len(nlh) < sizeof(*ifm)) + return -EINVAL; + + ifm = nlmsg_data(nlh); + if (ifm->ifi_family != AF_BRIDGE) + return -EPFNOSUPPORT; + + dev = __dev_get_by_index(net, ifm->ifi_index); + if (!dev) { + pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n"); + return -ENODEV; + } + + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (br_spec) { + nla_for_each_nested(attr, br_spec, rem) { + if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { + have_flags = true; + flags = nla_get_u16(attr); + break; + } + } + } + + oflags = flags; + + if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { + if (!dev->master || + !dev->master->netdev_ops->ndo_bridge_setlink) { + err = -EOPNOTSUPP; + goto out; + } + + err = dev->master->netdev_ops->ndo_bridge_setlink(dev, nlh); + if (err) + goto out; + + flags &= ~BRIDGE_FLAGS_MASTER; + } + + if ((flags & BRIDGE_FLAGS_SELF)) { + if (!dev->netdev_ops->ndo_bridge_setlink) + err = -EOPNOTSUPP; + else + err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh); + + if (!err) + flags &= ~BRIDGE_FLAGS_SELF; + } + + if (have_flags) + memcpy(nla_data(attr), &flags, sizeof(flags)); + /* Generate event to notify upper layer of bridge change */ + if (!err) + err = rtnl_bridge_notify(dev, oflags); +out: + return err; +} + /* Protected by RTNL sempahore. */ static struct rtattr **rta_buf; static int rtattr_max; @@ -2283,7 +2498,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) sz_idx = type>>2; kind = type&3; - if (kind != 2 && !capable(CAP_NET_ADMIN)) + if (kind != 2 && !ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { @@ -2434,5 +2649,8 @@ void __init rtnetlink_init(void) rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL); rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL); + + rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL); + rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL); } diff --git a/net/core/scm.c b/net/core/scm.c index ab570841a532..57fb1ee6649f 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -51,11 +51,11 @@ static __inline__ int scm_check_creds(struct ucred *creds) if (!uid_valid(uid) || !gid_valid(gid)) return -EINVAL; - if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) && + if ((creds->pid == task_tgid_vnr(current) || nsown_capable(CAP_SYS_ADMIN)) && ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || - uid_eq(uid, cred->suid)) || capable(CAP_SETUID)) && + uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) && ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || - gid_eq(gid, cred->sgid)) || capable(CAP_SETGID))) { + gid_eq(gid, cred->sgid)) || nsown_capable(CAP_SETGID))) { return 0; } return -EPERM; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3f0636cd76cd..3ab989b0de42 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -519,7 +519,7 @@ static void skb_release_data(struct sk_buff *skb) uarg = skb_shinfo(skb)->destructor_arg; if (uarg->callback) - uarg->callback(uarg); + uarg->callback(uarg, true); } if (skb_has_frag_list(skb)) @@ -635,6 +635,26 @@ void kfree_skb(struct sk_buff *skb) EXPORT_SYMBOL(kfree_skb); /** + * skb_tx_error - report an sk_buff xmit error + * @skb: buffer that triggered an error + * + * Report xmit error if a device callback is tracking this skb. + * skb must be freed afterwards. + */ +void skb_tx_error(struct sk_buff *skb) +{ + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { + struct ubuf_info *uarg; + + uarg = skb_shinfo(skb)->destructor_arg; + if (uarg->callback) + uarg->callback(uarg, false); + skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; + } +} +EXPORT_SYMBOL(skb_tx_error); + +/** * consume_skb - free an skbuff * @skb: buffer to free * @@ -662,11 +682,14 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->transport_header = old->transport_header; new->network_header = old->network_header; new->mac_header = old->mac_header; + new->inner_transport_header = old->inner_transport_header; + new->inner_network_header = old->inner_transport_header; skb_dst_copy(new, old); new->rxhash = old->rxhash; new->ooo_okay = old->ooo_okay; new->l4_rxhash = old->l4_rxhash; new->no_fcs = old->no_fcs; + new->encapsulation = old->encapsulation; #ifdef CONFIG_XFRM new->sp = secpath_get(old->sp); #endif @@ -797,7 +820,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) for (i = 0; i < num_frags; i++) skb_frag_unref(skb, i); - uarg->callback(uarg); + uarg->callback(uarg, false); /* skb frags point to kernel buffers */ for (i = num_frags - 1; i >= 0; i--) { @@ -872,6 +895,8 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->network_header += offset; if (skb_mac_header_was_set(new)) new->mac_header += offset; + new->inner_transport_header += offset; + new->inner_network_header += offset; #endif skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; @@ -1069,6 +1094,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->network_header += off; if (skb_mac_header_was_set(skb)) skb->mac_header += off; + skb->inner_transport_header += off; + skb->inner_network_header += off; /* Only adjust this if it actually is csum_start rather than csum */ if (skb->ip_summed == CHECKSUM_PARTIAL) skb->csum_start += nhead; @@ -1168,6 +1195,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, n->network_header += off; if (skb_mac_header_was_set(skb)) n->mac_header += off; + n->inner_transport_header += off; + n->inner_network_header += off; #endif return n; @@ -2999,7 +3028,6 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) memcpy(skb_mac_header(nskb), skb_mac_header(p), p->data - skb_mac_header(p)); - *NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p); skb_shinfo(nskb)->frag_list = p; skb_shinfo(nskb)->gso_size = pinfo->gso_size; pinfo->gso_size = 0; diff --git a/net/core/sock.c b/net/core/sock.c index 8a146cfcc366..a692ef49c9bb 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -505,7 +505,8 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie) } EXPORT_SYMBOL(sk_dst_check); -static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) +static int sock_setbindtodevice(struct sock *sk, char __user *optval, + int optlen) { int ret = -ENOPROTOOPT; #ifdef CONFIG_NETDEVICES @@ -515,7 +516,7 @@ static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) /* Sorry... */ ret = -EPERM; - if (!capable(CAP_NET_RAW)) + if (!ns_capable(net->user_ns, CAP_NET_RAW)) goto out; ret = -EINVAL; @@ -562,6 +563,59 @@ out: return ret; } +static int sock_getbindtodevice(struct sock *sk, char __user *optval, + int __user *optlen, int len) +{ + int ret = -ENOPROTOOPT; +#ifdef CONFIG_NETDEVICES + struct net *net = sock_net(sk); + struct net_device *dev; + char devname[IFNAMSIZ]; + unsigned seq; + + if (sk->sk_bound_dev_if == 0) { + len = 0; + goto zero; + } + + ret = -EINVAL; + if (len < IFNAMSIZ) + goto out; + +retry: + seq = read_seqbegin(&devnet_rename_seq); + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if); + ret = -ENODEV; + if (!dev) { + rcu_read_unlock(); + goto out; + } + + strcpy(devname, dev->name); + rcu_read_unlock(); + if (read_seqretry(&devnet_rename_seq, seq)) + goto retry; + + len = strlen(devname) + 1; + + ret = -EFAULT; + if (copy_to_user(optval, devname, len)) + goto out; + +zero: + ret = -EFAULT; + if (put_user(len, optlen)) + goto out; + + ret = 0; + +out: +#endif + + return ret; +} + static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) { if (valbool) @@ -589,7 +643,7 @@ int sock_setsockopt(struct socket *sock, int level, int optname, */ if (optname == SO_BINDTODEVICE) - return sock_bindtodevice(sk, optval, optlen); + return sock_setbindtodevice(sk, optval, optlen); if (optlen < sizeof(int)) return -EINVAL; @@ -696,7 +750,8 @@ set_rcvbuf: break; case SO_PRIORITY: - if ((val >= 0 && val <= 6) || capable(CAP_NET_ADMIN)) + if ((val >= 0 && val <= 6) || + ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) sk->sk_priority = val; else ret = -EPERM; @@ -813,7 +868,7 @@ set_rcvbuf: clear_bit(SOCK_PASSSEC, &sock->flags); break; case SO_MARK: - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) ret = -EPERM; else sk->sk_mark = val; @@ -1074,6 +1129,17 @@ int sock_getsockopt(struct socket *sock, int level, int optname, case SO_NOFCS: v.val = sock_flag(sk, SOCK_NOFCS); break; + + case SO_BINDTODEVICE: + return sock_getbindtodevice(sk, optval, optlen, len); + + case SO_GET_FILTER: + len = sk_get_filter(sk, (struct sock_filter __user *)optval, len); + if (len < 0) + return len; + + goto lenout; + default: return -ENOPROTOOPT; } @@ -1214,13 +1280,11 @@ static void sk_prot_free(struct proto *prot, struct sock *sk) #ifdef CONFIG_CGROUPS #if IS_ENABLED(CONFIG_NET_CLS_CGROUP) -void sock_update_classid(struct sock *sk) +void sock_update_classid(struct sock *sk, struct task_struct *task) { u32 classid; - rcu_read_lock(); /* doing current task, which cannot vanish. */ - classid = task_cls_classid(current); - rcu_read_unlock(); + classid = task_cls_classid(task); if (classid != sk->sk_classid) sk->sk_classid = classid; } @@ -1263,7 +1327,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, sock_net_set(sk, get_net(net)); atomic_set(&sk->sk_wmem_alloc, 1); - sock_update_classid(sk); + sock_update_classid(sk, current); sock_update_netprioidx(sk, current); } diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index a7c36845b123..d1b08045a9df 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -216,6 +216,11 @@ static __net_init int sysctl_core_net_init(struct net *net) goto err_dup; tbl[0].data = &net->core.sysctl_somaxconn; + + /* Don't export any sysctls to unprivileged users */ + if (net->user_ns != &init_user_ns) { + tbl[0].procname = NULL; + } } net->core.sysctl_hdr = register_net_sysctl(net, "net/core", tbl); |