From ac5ccdba3a1659b3517e7e99ef7d35a6a2d77cf4 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 19 Jun 2014 21:22:56 +0300 Subject: iovec: move memcpy_from/toiovecend to lib/iovec.c ERROR: "memcpy_fromiovecend" [drivers/vhost/vhost_scsi.ko] undefined! commit 9f977ef7b671f6169eca78bf40f230fe84b7c7e5 vhost-scsi: Include prot_bytes into expected data transfer length in target-pending makes drivers/vhost/scsi.c call memcpy_fromiovecend(). This function is not available when CONFIG_NET is not enabled. socket.h already includes uio.h, so no callers need updating. Reported-by: Randy Dunlap Cc: Stephen Rothwell Cc: "David S. Miller" Signed-off-by: David S. Miller Signed-off-by: Michael S. Tsirkin Signed-off-by: Nicholas Bellinger --- net/core/iovec.c | 55 ------------------------------------------------------- 1 file changed, 55 deletions(-) (limited to 'net/core') diff --git a/net/core/iovec.c b/net/core/iovec.c index b61869429f4c..827dd6beb49c 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -74,61 +74,6 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a return err; } -/* - * Copy kernel to iovec. Returns -EFAULT on error. - */ - -int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata, - int offset, int len) -{ - int copy; - for (; len > 0; ++iov) { - /* Skip over the finished iovecs */ - if (unlikely(offset >= iov->iov_len)) { - offset -= iov->iov_len; - continue; - } - copy = min_t(unsigned int, iov->iov_len - offset, len); - if (copy_to_user(iov->iov_base + offset, kdata, copy)) - return -EFAULT; - offset = 0; - kdata += copy; - len -= copy; - } - - return 0; -} -EXPORT_SYMBOL(memcpy_toiovecend); - -/* - * Copy iovec to kernel. Returns -EFAULT on error. - */ - -int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, - int offset, int len) -{ - /* Skip over the finished iovecs */ - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - iov++; - } - - while (len > 0) { - u8 __user *base = iov->iov_base + offset; - int copy = min_t(unsigned int, len, iov->iov_len - offset); - - offset = 0; - if (copy_from_user(kdata, base, copy)) - return -EFAULT; - len -= copy; - kdata += copy; - iov++; - } - - return 0; -} -EXPORT_SYMBOL(memcpy_fromiovecend); - /* * And now for the all-in-one: copy and checksum from a user iovec * directly to a datagram -- cgit v1.2.3 From 11ef7a8996d5d433c9cd75d80651297eccbf6d42 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Mon, 30 Jun 2014 09:50:40 -0700 Subject: net: Performance fix for process_backlog In process_backlog the input_pkt_queue is only checked once for new packets and quota is artificially reduced to reflect precisely the number of packets on the input_pkt_queue so that the loop exits appropriately. This patches changes the behavior to be more straightforward and less convoluted. Packets are processed until either the quota is met or there are no more packets to process. This patch seems to provide a small, but noticeable performance improvement. The performance improvement is a result of staying in the process_backlog loop longer which can reduce number of IPI's. Performance data using super_netperf TCP_RR with 200 flows: Before fix: 88.06% CPU utilization 125/190/309 90/95/99% latencies 1.46808e+06 tps 1145382 intrs.sec. With fix: 87.73% CPU utilization 122/183/296 90/95/99% latencies 1.4921e+06 tps 1021674.30 intrs./sec. Signed-off-by: Tom Herbert Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 30eedf677913..77c19c7bb490 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4227,9 +4227,8 @@ static int process_backlog(struct napi_struct *napi, int quota) #endif napi->weight = weight_p; local_irq_disable(); - while (work < quota) { + while (1) { struct sk_buff *skb; - unsigned int qlen; while ((skb = __skb_dequeue(&sd->process_queue))) { local_irq_enable(); @@ -4243,24 +4242,24 @@ static int process_backlog(struct napi_struct *napi, int quota) } rps_lock(sd); - qlen = skb_queue_len(&sd->input_pkt_queue); - if (qlen) - skb_queue_splice_tail_init(&sd->input_pkt_queue, - &sd->process_queue); - - if (qlen < quota - work) { + if (skb_queue_empty(&sd->input_pkt_queue)) { /* * Inline a custom version of __napi_complete(). * only current cpu owns and manipulates this napi, - * and NAPI_STATE_SCHED is the only possible flag set on backlog. - * we can use a plain write instead of clear_bit(), + * and NAPI_STATE_SCHED is the only possible flag set + * on backlog. + * We can use a plain write instead of clear_bit(), * and we dont need an smp_mb() memory barrier. */ list_del(&napi->poll_list); napi->state = 0; + rps_unlock(sd); - quota = work + qlen; + break; } + + skb_queue_splice_tail_init(&sd->input_pkt_queue, + &sd->process_queue); rps_unlock(sd); } local_irq_enable(); -- cgit v1.2.3 From 54951194656e4853e441266fd095f880bc0398f3 Mon Sep 17 00:00:00 2001 From: Loic Prylli Date: Tue, 1 Jul 2014 21:39:43 -0700 Subject: net: Fix NETDEV_CHANGE notifier usage causing spurious arp flush A bug was introduced in NETDEV_CHANGE notifier sequence causing the arp table to be sometimes spuriously cleared (including manual arp entries marked permanent), upon network link carrier changes. The changed argument for the notifier was applied only to a single caller of NETDEV_CHANGE, missing among others netdev_state_change(). So upon net_carrier events induced by the network, which are triggering a call to netdev_state_change(), arp_netdev_event() would decide whether to clear or not arp cache based on random/junk stack values (a kind of read buffer overflow). Fixes: be9efd365328 ("net: pass changed flags along with NETDEV_CHANGE event") Fixes: 6c8b4e3ff81b ("arp: flush arp cache on IFF_NOARP change") Signed-off-by: Loic Prylli Signed-off-by: David S. Miller --- net/core/dev.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 77c19c7bb490..7990984ca364 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -148,6 +148,9 @@ struct list_head ptype_all __read_mostly; /* Taps */ static struct list_head offload_base __read_mostly; static int netif_rx_internal(struct sk_buff *skb); +static int call_netdevice_notifiers_info(unsigned long val, + struct net_device *dev, + struct netdev_notifier_info *info); /* * The @dev_base_head list is protected by @dev_base_lock and the rtnl @@ -1207,7 +1210,11 @@ EXPORT_SYMBOL(netdev_features_change); void netdev_state_change(struct net_device *dev) { if (dev->flags & IFF_UP) { - call_netdevice_notifiers(NETDEV_CHANGE, dev); + struct netdev_notifier_change_info change_info; + + change_info.flags_changed = 0; + call_netdevice_notifiers_info(NETDEV_CHANGE, dev, + &change_info.info); rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); } } -- cgit v1.2.3 From 9ecf07a1d8f70f72ec99a0f102c8aa24609d84f4 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Sat, 12 Jul 2014 22:36:44 +0200 Subject: neigh: sysctl - simplify address calculation of gc_* variables The code in neigh_sysctl_register() relies on a specific layout of struct neigh_table, namely that the 'gc_*' variables are directly following the 'parms' member in a specific order. The code, though, expresses this in the most ugly way. Get rid of the ugly casts and use the 'tbl' pointer to get a handle to the table. This way we can refer to the 'gc_*' variables directly. Similarly seen in the grsecurity patch, written by Brad Spengler. Signed-off-by: Mathias Krause Cc: Brad Spengler Signed-off-by: David S. Miller --- include/net/neighbour.h | 1 - net/core/neighbour.c | 9 +++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'net/core') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 7277caf3743d..47f425464f84 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -203,7 +203,6 @@ struct neigh_table { void (*proxy_redo)(struct sk_buff *skb); char *id; struct neigh_parms parms; - /* HACK. gc_* should follow parms without a gap! */ int gc_interval; int gc_thresh1; int gc_thresh2; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 32d872eec7f5..559890b0f0a2 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -3059,11 +3059,12 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0, sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL])); } else { + struct neigh_table *tbl = p->tbl; dev_name_source = "default"; - t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1); - t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1; - t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2; - t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3; + t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval; + t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1; + t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2; + t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3; } if (handler) { -- cgit v1.2.3