From a9b3cd7f323b2e57593e7215362a7b02fc933e3a Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 1 Aug 2011 16:19:00 +0000 Subject: rcu: convert uses of rcu_assign_pointer(x, NULL) to RCU_INIT_POINTER When assigning a NULL value to an RCU protected pointer, no barrier is needed. The rcu_assign_pointer, used to handle that but will soon change to not handle the special case. Convert all rcu_assign_pointer of NULL value. //smpl @@ expression P; @@ - rcu_assign_pointer(P, NULL) + RCU_INIT_POINTER(P, NULL) // Signed-off-by: Stephen Hemminger Acked-by: Paul E. McKenney Signed-off-by: David S. Miller --- net/core/dev.c | 4 ++-- net/core/fib_rules.c | 2 +- net/core/filter.c | 2 +- net/core/net-sysfs.c | 4 ++-- net/core/netpoll.c | 4 ++-- net/core/sock.c | 4 ++-- 6 files changed, 10 insertions(+), 10 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 17d67b579beb..9428766d0140 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3094,8 +3094,8 @@ void netdev_rx_handler_unregister(struct net_device *dev) { ASSERT_RTNL(); - rcu_assign_pointer(dev->rx_handler, NULL); - rcu_assign_pointer(dev->rx_handler_data, NULL); + RCU_INIT_POINTER(dev->rx_handler, NULL); + RCU_INIT_POINTER(dev->rx_handler_data, NULL); } EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index e7ab0c0285b5..0657b57df558 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -487,7 +487,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (ops->nr_goto_rules > 0) { list_for_each_entry(tmp, &ops->rules_list, list) { if (rtnl_dereference(tmp->ctarget) == rule) { - rcu_assign_pointer(tmp->ctarget, NULL); + RCU_INIT_POINTER(tmp->ctarget, NULL); ops->unresolved_rules++; } } diff --git a/net/core/filter.c b/net/core/filter.c index 36f975fa87cb..8fcc2d776e09 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -645,7 +645,7 @@ int sk_detach_filter(struct sock *sk) filter = rcu_dereference_protected(sk->sk_filter, sock_owned_by_user(sk)); if (filter) { - rcu_assign_pointer(sk->sk_filter, NULL); + RCU_INIT_POINTER(sk->sk_filter, NULL); sk_filter_uncharge(sk, filter); ret = 0; } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 1683e5db2f27..b1ab887520a8 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -987,10 +987,10 @@ static ssize_t store_xps_map(struct netdev_queue *queue, } if (nonempty) - rcu_assign_pointer(dev->xps_maps, new_dev_maps); + RCU_INIT_POINTER(dev->xps_maps, new_dev_maps); else { kfree(new_dev_maps); - rcu_assign_pointer(dev->xps_maps, NULL); + RCU_INIT_POINTER(dev->xps_maps, NULL); } if (dev_maps) diff --git a/net/core/netpoll.c b/net/core/netpoll.c index adf84dd8c7b5..d676a561d983 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -760,7 +760,7 @@ int __netpoll_setup(struct netpoll *np) } /* last thing to do is link it to the net device structure */ - rcu_assign_pointer(ndev->npinfo, npinfo); + RCU_INIT_POINTER(ndev->npinfo, npinfo); return 0; @@ -901,7 +901,7 @@ void __netpoll_cleanup(struct netpoll *np) if (ops->ndo_netpoll_cleanup) ops->ndo_netpoll_cleanup(np->dev); - rcu_assign_pointer(np->dev->npinfo, NULL); + RCU_INIT_POINTER(np->dev->npinfo, NULL); /* avoid racing with NAPI reading npinfo */ synchronize_rcu_bh(); diff --git a/net/core/sock.c b/net/core/sock.c index bc745d00ea4d..9997026b44b2 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -387,7 +387,7 @@ struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie) if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) { sk_tx_queue_clear(sk); - rcu_assign_pointer(sk->sk_dst_cache, NULL); + RCU_INIT_POINTER(sk->sk_dst_cache, NULL); dst_release(dst); return NULL; } @@ -1158,7 +1158,7 @@ static void __sk_free(struct sock *sk) atomic_read(&sk->sk_wmem_alloc) == 0); if (filter) { sk_filter_uncharge(sk, filter); - rcu_assign_pointer(sk->sk_filter, NULL); + RCU_INIT_POINTER(sk->sk_filter, NULL); } sock_disable_timestamp(sk, SOCK_TIMESTAMP); -- cgit v1.2.3 From 9de79c127cccecb11ae6a21ab1499e87aa222880 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 8 Aug 2011 20:56:14 +0000 Subject: net: fix potential neighbour race in dst_ifdown() Followup of commit f2c31e32b378a (fix NULL dereferences in check_peer_redir()). We need to make sure dst neighbour doesnt change in dst_ifdown(). Fix some sparse errors. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dst.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'net/core') diff --git a/net/core/dst.c b/net/core/dst.c index 14b33baf0733..d5e2c4c09107 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -171,7 +171,7 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev, dst_init_metrics(dst, dst_default_metrics, true); dst->expires = 0UL; dst->path = dst; - dst->_neighbour = NULL; + RCU_INIT_POINTER(dst->_neighbour, NULL); #ifdef CONFIG_XFRM dst->xfrm = NULL; #endif @@ -229,11 +229,11 @@ struct dst_entry *dst_destroy(struct dst_entry * dst) smp_rmb(); again: - neigh = dst->_neighbour; + neigh = rcu_dereference_protected(dst->_neighbour, 1); child = dst->child; if (neigh) { - dst->_neighbour = NULL; + RCU_INIT_POINTER(dst->_neighbour, NULL); neigh_release(neigh); } @@ -360,14 +360,19 @@ static void dst_ifdown(struct dst_entry *dst, struct net_device *dev, if (!unregister) { dst->input = dst->output = dst_discard; } else { + struct neighbour *neigh; + dst->dev = dev_net(dst->dev)->loopback_dev; dev_hold(dst->dev); dev_put(dev); - if (dst->_neighbour && dst->_neighbour->dev == dev) { - dst->_neighbour->dev = dst->dev; + rcu_read_lock(); + neigh = dst_get_neighbour(dst); + if (neigh && neigh->dev == dev) { + neigh->dev = dst->dev; dev_hold(dst->dev); dev_put(dev); } + rcu_read_unlock(); } } -- cgit v1.2.3 From e7c379d2a0dcb8c30cb580184a0df11805464703 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 10 Aug 2011 06:09:45 +0000 Subject: rtnetlink: remove initialization of dev->real_num_tx_queues dev->real_num_tx_queues is correctly set already in alloc_netdev_mqs. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/core') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 99d9e953fe39..39f8dd6a2821 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1604,7 +1604,6 @@ struct net_device *rtnl_create_link(struct net *src_net, struct net *net, dev_net_set(dev, net); dev->rtnl_link_ops = ops; dev->rtnl_link_state = RTNL_LINK_INITIALIZING; - dev->real_num_tx_queues = real_num_queues; if (tb[IFLA_MTU]) dev->mtu = nla_get_u32(tb[IFLA_MTU]); -- cgit v1.2.3 From cd28ca0a3dd17c68d24b839602a0e6268ad28b5d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 9 Aug 2011 08:15:58 +0000 Subject: neigh: reduce arp latency Remove the artificial HZ latency on arp resolution. Instead of firing a timer in one jiffy (up to 10 ms if HZ=100), lets send the ARP message immediately. Before patch : # arp -d 192.168.20.108 ; ping -c 3 192.168.20.108 PING 192.168.20.108 (192.168.20.108) 56(84) bytes of data. 64 bytes from 192.168.20.108: icmp_seq=1 ttl=64 time=9.91 ms 64 bytes from 192.168.20.108: icmp_seq=2 ttl=64 time=0.065 ms 64 bytes from 192.168.20.108: icmp_seq=3 ttl=64 time=0.061 ms After patch : $ arp -d 192.168.20.108 ; ping -c 3 192.168.20.108 PING 192.168.20.108 (192.168.20.108) 56(84) bytes of data. 64 bytes from 192.168.20.108: icmp_seq=1 ttl=64 time=0.152 ms 64 bytes from 192.168.20.108: icmp_seq=2 ttl=64 time=0.064 ms 64 bytes from 192.168.20.108: icmp_seq=3 ttl=64 time=0.074 ms Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/neighbour.c | 40 ++++++++++++++++++++++++++-------------- 1 file changed, 26 insertions(+), 14 deletions(-) (limited to 'net/core') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8fab9b0bb203..4002261f20d1 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -844,6 +844,19 @@ static void neigh_invalidate(struct neighbour *neigh) skb_queue_purge(&neigh->arp_queue); } +static void neigh_probe(struct neighbour *neigh) + __releases(neigh->lock) +{ + struct sk_buff *skb = skb_peek(&neigh->arp_queue); + /* keep skb alive even if arp_queue overflows */ + if (skb) + skb = skb_copy(skb, GFP_ATOMIC); + write_unlock(&neigh->lock); + neigh->ops->solicit(neigh, skb); + atomic_inc(&neigh->probes); + kfree_skb(skb); +} + /* Called when a timer expires for a neighbour entry. */ static void neigh_timer_handler(unsigned long arg) @@ -920,14 +933,7 @@ static void neigh_timer_handler(unsigned long arg) neigh_hold(neigh); } if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { - struct sk_buff *skb = skb_peek(&neigh->arp_queue); - /* keep skb alive even if arp_queue overflows */ - if (skb) - skb = skb_copy(skb, GFP_ATOMIC); - write_unlock(&neigh->lock); - neigh->ops->solicit(neigh, skb); - atomic_inc(&neigh->probes); - kfree_skb(skb); + neigh_probe(neigh); } else { out: write_unlock(&neigh->lock); @@ -942,7 +948,7 @@ out: int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { int rc; - unsigned long now; + bool immediate_probe = false; write_lock_bh(&neigh->lock); @@ -950,14 +956,16 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) goto out_unlock_bh; - now = jiffies; - if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { if (neigh->parms->mcast_probes + neigh->parms->app_probes) { + unsigned long next, now = jiffies; + atomic_set(&neigh->probes, neigh->parms->ucast_probes); neigh->nud_state = NUD_INCOMPLETE; - neigh->updated = jiffies; - neigh_add_timer(neigh, now + 1); + neigh->updated = now; + next = now + max(neigh->parms->retrans_time, HZ/2); + neigh_add_timer(neigh, next); + immediate_probe = true; } else { neigh->nud_state = NUD_FAILED; neigh->updated = jiffies; @@ -989,7 +997,11 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) rc = 1; } out_unlock_bh: - write_unlock_bh(&neigh->lock); + if (immediate_probe) + neigh_probe(neigh); + else + write_unlock(&neigh->lock); + local_bh_enable(); return rc; } EXPORT_SYMBOL(__neigh_event_send); -- cgit v1.2.3 From 33d480ce6d43326e2541fd79b3548858a174ec3c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 11 Aug 2011 19:30:52 +0000 Subject: net: cleanup some rcu_dereference_raw RCU api had been completed and rcu_access_pointer() or rcu_dereference_protected() are better than generic rcu_dereference_raw() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 10 +++++----- net/core/fib_rules.c | 2 +- net/core/net-sysfs.c | 4 ++-- net/ipv4/ipmr.c | 4 ++-- net/ipv4/route.c | 6 +++--- net/ipv4/udp.c | 7 +++---- net/ipv6/raw.c | 4 ++-- net/ipv6/udp.c | 2 +- net/mac80211/mesh_pathtbl.c | 4 ++-- net/netlink/af_netlink.c | 2 +- 10 files changed, 22 insertions(+), 23 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 9428766d0140..d22ffd722ee3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2673,13 +2673,13 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, map = rcu_dereference(rxqueue->rps_map); if (map) { if (map->len == 1 && - !rcu_dereference_raw(rxqueue->rps_flow_table)) { + !rcu_access_pointer(rxqueue->rps_flow_table)) { tcpu = map->cpus[0]; if (cpu_online(tcpu)) cpu = tcpu; goto done; } - } else if (!rcu_dereference_raw(rxqueue->rps_flow_table)) { + } else if (!rcu_access_pointer(rxqueue->rps_flow_table)) { goto done; } @@ -5727,8 +5727,8 @@ void netdev_run_todo(void) /* paranoia */ BUG_ON(netdev_refcnt_read(dev)); - WARN_ON(rcu_dereference_raw(dev->ip_ptr)); - WARN_ON(rcu_dereference_raw(dev->ip6_ptr)); + WARN_ON(rcu_access_pointer(dev->ip_ptr)); + WARN_ON(rcu_access_pointer(dev->ip6_ptr)); WARN_ON(dev->dn_ptr); if (dev->destructor) @@ -5932,7 +5932,7 @@ void free_netdev(struct net_device *dev) kfree(dev->_rx); #endif - kfree(rcu_dereference_raw(dev->ingress_queue)); + kfree(rcu_dereference_protected(dev->ingress_queue, 1)); /* Flush device addresses */ dev_addr_flush(dev); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 0657b57df558..67c5c288cd80 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -545,7 +545,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, frh->flags = rule->flags; if (rule->action == FR_ACT_GOTO && - rcu_dereference_raw(rule->ctarget) == NULL) + rcu_access_pointer(rule->ctarget) == NULL) frh->flags |= FIB_RULE_UNRESOLVED; if (rule->iifname[0]) { diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index b1ab887520a8..56e42ab7cbc6 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -712,13 +712,13 @@ static void rx_queue_release(struct kobject *kobj) struct rps_dev_flow_table *flow_table; - map = rcu_dereference_raw(queue->rps_map); + map = rcu_dereference_protected(queue->rps_map, 1); if (map) { RCU_INIT_POINTER(queue->rps_map, NULL); kfree_rcu(map, rcu); } - flow_table = rcu_dereference_raw(queue->rps_flow_table); + flow_table = rcu_dereference_protected(queue->rps_flow_table, 1); if (flow_table) { RCU_INIT_POINTER(queue->rps_flow_table, NULL); call_rcu(&flow_table->rcu, rps_dev_flow_table_release); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index f550285c977b..6164e982e0ef 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1203,7 +1203,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi return -ENOENT; if (optname != MRT_INIT) { - if (sk != rcu_dereference_raw(mrt->mroute_sk) && + if (sk != rcu_access_pointer(mrt->mroute_sk) && !capable(CAP_NET_ADMIN)) return -EACCES; } @@ -1230,7 +1230,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi rtnl_unlock(); return ret; case MRT_DONE: - if (sk != rcu_dereference_raw(mrt->mroute_sk)) + if (sk != rcu_access_pointer(mrt->mroute_sk)) return -EACCES; return ip_ra_control(sk, 0, NULL); case MRT_ADD_VIF: diff --git a/net/ipv4/route.c b/net/ipv4/route.c index cb7efe0567f0..d6e32138f712 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -324,7 +324,7 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq) struct rtable *r = NULL; for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { - if (!rcu_dereference_raw(rt_hash_table[st->bucket].chain)) + if (!rcu_access_pointer(rt_hash_table[st->bucket].chain)) continue; rcu_read_lock_bh(); r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); @@ -350,7 +350,7 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq, do { if (--st->bucket < 0) return NULL; - } while (!rcu_dereference_raw(rt_hash_table[st->bucket].chain)); + } while (!rcu_access_pointer(rt_hash_table[st->bucket].chain)); rcu_read_lock_bh(); r = rcu_dereference_bh(rt_hash_table[st->bucket].chain); } @@ -762,7 +762,7 @@ static void rt_do_flush(struct net *net, int process_context) if (process_context && need_resched()) cond_resched(); - rth = rcu_dereference_raw(rt_hash_table[i].chain); + rth = rcu_access_pointer(rt_hash_table[i].chain); if (!rth) continue; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1b5a19340a95..c1d5facab7c9 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1461,10 +1461,9 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } } - if (rcu_dereference_raw(sk->sk_filter)) { - if (udp_lib_checksum_complete(skb)) - goto drop; - } + if (rcu_access_pointer(sk->sk_filter) && + udp_lib_checksum_complete(skb)) + goto drop; if (sk_rcvqueues_full(sk, skb)) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 4f45dc9e4f5e..f34902f1ba33 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -372,9 +372,9 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr, read_unlock(&raw_v6_hashinfo.lock); } -static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) +static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb) { - if ((raw6_sk(sk)->checksum || rcu_dereference_raw(sk->sk_filter)) && + if ((raw6_sk(sk)->checksum || rcu_access_pointer(sk->sk_filter)) && skb_checksum_complete(skb)) { atomic_inc(&sk->sk_drops); kfree_skb(skb); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 29213b51c499..97e47f06e8b7 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -533,7 +533,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) } } - if (rcu_dereference_raw(sk->sk_filter)) { + if (rcu_access_pointer(sk->sk_filter)) { if (udp_lib_checksum_complete(skb)) goto drop; } diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 068ee6518254..dc7ae8d31aaf 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -843,6 +843,6 @@ void mesh_path_expire(struct ieee80211_sub_if_data *sdata) void mesh_pathtbl_unregister(void) { /* no need for locking during exit path */ - mesh_table_free(rcu_dereference_raw(mesh_paths), true); - mesh_table_free(rcu_dereference_raw(mpp_paths), true); + mesh_table_free(rcu_dereference_protected(mesh_paths, 1), true); + mesh_table_free(rcu_dereference_protected(mpp_paths, 1), true); } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 0a4db0211da0..4330db99fabf 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1578,7 +1578,7 @@ int __netlink_change_ngroups(struct sock *sk, unsigned int groups) new = kzalloc(sizeof(*new) + NLGRPSZ(groups), GFP_ATOMIC); if (!new) return -ENOMEM; - old = rcu_dereference_raw(tbl->listeners); + old = rcu_dereference_protected(tbl->listeners, 1); memcpy(new->masks, old->masks, NLGRPSZ(tbl->groups)); rcu_assign_pointer(tbl->listeners, new); -- cgit v1.2.3 From 792df22cd0499b4e662d4618b0008fdcfef8b04e Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 14 Aug 2011 19:45:04 +0000 Subject: rps: Some minor cleanup in get_rps_cpus Use some variables for clarity and extensibility. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/dev.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index d22ffd722ee3..6578d9483043 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2528,15 +2528,17 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) const struct ipv6hdr *ip6; const struct iphdr *ip; u8 ip_proto; - u32 addr1, addr2, ihl; + u32 addr1, addr2; + u16 proto; union { u32 v32; u16 v16[2]; } ports; nhoff = skb_network_offset(skb); + proto = skb->protocol; - switch (skb->protocol) { + switch (proto) { case __constant_htons(ETH_P_IP): if (!pskb_may_pull(skb, sizeof(*ip) + nhoff)) goto done; @@ -2548,7 +2550,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) ip_proto = ip->protocol; addr1 = (__force u32) ip->saddr; addr2 = (__force u32) ip->daddr; - ihl = ip->ihl; + nhoff += ip->ihl * 4; break; case __constant_htons(ETH_P_IPV6): if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff)) @@ -2558,7 +2560,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) ip_proto = ip6->nexthdr; addr1 = (__force u32) ip6->saddr.s6_addr32[3]; addr2 = (__force u32) ip6->daddr.s6_addr32[3]; - ihl = (40 >> 2); + nhoff += 40; break; default: goto done; @@ -2567,7 +2569,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) ports.v32 = 0; poff = proto_ports_offset(ip_proto); if (poff >= 0) { - nhoff += ihl * 4 + poff; + nhoff += poff; if (pskb_may_pull(skb, nhoff + 4)) { ports.v32 = * (__force u32 *) (skb->data + nhoff); if (ports.v16[1] < ports.v16[0]) -- cgit v1.2.3 From bdeab991918663aed38757904219e8398214334c Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 14 Aug 2011 19:45:55 +0000 Subject: rps: Add flag to skb to indicate rxhash is based on L4 tuple The l4_rxhash flag was added to the skb structure to indicate that the rxhash value was computed over the 4 tuple for the packet which includes the port information in the encapsulated transport packet. This is used by the stack to preserve the rxhash value in __skb_rx_tunnel. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++-- include/net/dst.h | 9 ++++++++- include/net/sock.h | 15 ++++++++++++--- net/core/dev.c | 10 ++++++---- net/core/skbuff.c | 1 + net/ipv4/tcp_ipv4.c | 6 +++--- net/ipv4/udp.c | 4 ++-- net/ipv6/tcp_ipv6.c | 6 +++--- net/ipv6/udp.c | 2 +- 9 files changed, 39 insertions(+), 19 deletions(-) (limited to 'net/core') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7b996ed86d5b..f902c331217b 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -414,6 +414,7 @@ struct sk_buff { __u8 ndisc_nodetype:2; #endif __u8 ooo_okay:1; + __u8 l4_rxhash:1; kmemcheck_bitfield_end(flags2); /* 0/13 bit hole */ @@ -572,11 +573,11 @@ extern unsigned int skb_find_text(struct sk_buff *skb, unsigned int from, unsigned int to, struct ts_config *config, struct ts_state *state); -extern __u32 __skb_get_rxhash(struct sk_buff *skb); +extern void __skb_get_rxhash(struct sk_buff *skb); static inline __u32 skb_get_rxhash(struct sk_buff *skb) { if (!skb->rxhash) - skb->rxhash = __skb_get_rxhash(skb); + __skb_get_rxhash(skb); return skb->rxhash; } diff --git a/include/net/dst.h b/include/net/dst.h index 13d507d69ddb..4fb6c4381791 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -325,7 +325,14 @@ static inline void skb_dst_force(struct sk_buff *skb) static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev) { skb->dev = dev; - skb->rxhash = 0; + + /* + * Clear rxhash so that we can recalulate the hash for the + * encapsulated packet, unless we have already determine the hash + * over the L4 4-tuple. + */ + if (!skb->l4_rxhash) + skb->rxhash = 0; skb_set_queue_mapping(skb, 0); skb_dst_drop(skb); nf_reset(skb); diff --git a/include/net/sock.h b/include/net/sock.h index 8e4062f165b8..5ac682f73d63 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -686,16 +686,25 @@ static inline void sock_rps_reset_flow(const struct sock *sk) #endif } -static inline void sock_rps_save_rxhash(struct sock *sk, u32 rxhash) +static inline void sock_rps_save_rxhash(struct sock *sk, + const struct sk_buff *skb) { #ifdef CONFIG_RPS - if (unlikely(sk->sk_rxhash != rxhash)) { + if (unlikely(sk->sk_rxhash != skb->rxhash)) { sock_rps_reset_flow(sk); - sk->sk_rxhash = rxhash; + sk->sk_rxhash = skb->rxhash; } #endif } +static inline void sock_rps_reset_rxhash(struct sock *sk) +{ +#ifdef CONFIG_RPS + sock_rps_reset_flow(sk); + sk->sk_rxhash = 0; +#endif +} + #define sk_wait_event(__sk, __timeo, __condition) \ ({ int __rc; \ release_sock(__sk); \ diff --git a/net/core/dev.c b/net/core/dev.c index 6578d9483043..e485cb37228f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2519,10 +2519,11 @@ static inline void ____napi_schedule(struct softnet_data *sd, /* * __skb_get_rxhash: calculate a flow hash based on src/dst addresses - * and src/dst port numbers. Returns a non-zero hash number on success - * and 0 on failure. + * and src/dst port numbers. Sets rxhash in skb to non-zero hash value + * on success, zero indicates no valid hash. Also, sets l4_rxhash in skb + * if hash is a canonical 4-tuple hash over transport ports. */ -__u32 __skb_get_rxhash(struct sk_buff *skb) +void __skb_get_rxhash(struct sk_buff *skb) { int nhoff, hash = 0, poff; const struct ipv6hdr *ip6; @@ -2574,6 +2575,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) ports.v32 = * (__force u32 *) (skb->data + nhoff); if (ports.v16[1] < ports.v16[0]) swap(ports.v16[0], ports.v16[1]); + skb->l4_rxhash = 1; } } @@ -2586,7 +2588,7 @@ __u32 __skb_get_rxhash(struct sk_buff *skb) hash = 1; done: - return hash; + skb->rxhash = hash; } EXPORT_SYMBOL(__skb_get_rxhash); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 27002dffe7ed..edb66f3e24f1 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -529,6 +529,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->mac_header = old->mac_header; skb_dst_copy(new, old); new->rxhash = old->rxhash; + new->l4_rxhash = old->l4_rxhash; #ifdef CONFIG_XFRM new->sp = secpath_get(old->sp); #endif diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1c12b8ec849d..b3f26114b03e 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1578,7 +1578,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) #endif if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ - sock_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb); if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; goto reset; @@ -1595,7 +1595,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) goto discard; if (nsk != sk) { - sock_rps_save_rxhash(nsk, skb->rxhash); + sock_rps_save_rxhash(nsk, skb); if (tcp_child_process(sk, nsk, skb)) { rsk = nsk; goto reset; @@ -1603,7 +1603,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; } } else - sock_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb); if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c1d5facab7c9..ebaa96bd3464 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1267,7 +1267,7 @@ int udp_disconnect(struct sock *sk, int flags) sk->sk_state = TCP_CLOSE; inet->inet_daddr = 0; inet->inet_dport = 0; - sock_rps_save_rxhash(sk, 0); + sock_rps_reset_rxhash(sk); sk->sk_bound_dev_if = 0; if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) inet_reset_saddr(sk); @@ -1355,7 +1355,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) int rc; if (inet_sk(sk)->inet_daddr) - sock_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb); rc = ip_queue_rcv_skb(sk, skb); if (rc < 0) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d1fb63f4aeb7..44a5859535b5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1628,7 +1628,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) opt_skb = skb_clone(skb, GFP_ATOMIC); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ - sock_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb); if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) goto reset; if (opt_skb) @@ -1650,7 +1650,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) * the new socket.. */ if(nsk != sk) { - sock_rps_save_rxhash(nsk, skb->rxhash); + sock_rps_save_rxhash(nsk, skb); if (tcp_child_process(sk, nsk, skb)) goto reset; if (opt_skb) @@ -1658,7 +1658,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; } } else - sock_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb); if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len)) goto reset; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 97e47f06e8b7..35bbdc42241e 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -509,7 +509,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) int is_udplite = IS_UDPLITE(sk); if (!ipv6_addr_any(&inet6_sk(sk)->daddr)) - sock_rps_save_rxhash(sk, skb->rxhash); + sock_rps_save_rxhash(sk, skb); if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto drop; -- cgit v1.2.3 From e971b7225bcb1f318811ef04628c441497372999 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 14 Aug 2011 19:46:12 +0000 Subject: rps: Infrastructure in __skb_get_rxhash for deep inspection Basics for looking for ports in encapsulated packets in tunnels. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/dev.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index e485cb37228f..4bee9a9aeef6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -133,6 +133,7 @@ #include #include #include +#include #include "net-sysfs.h" @@ -2539,6 +2540,7 @@ void __skb_get_rxhash(struct sk_buff *skb) nhoff = skb_network_offset(skb); proto = skb->protocol; +again: switch (proto) { case __constant_htons(ETH_P_IP): if (!pskb_may_pull(skb, sizeof(*ip) + nhoff)) @@ -2567,6 +2569,11 @@ void __skb_get_rxhash(struct sk_buff *skb) goto done; } + switch (ip_proto) { + default: + break; + } + ports.v32 = 0; poff = proto_ports_offset(ip_proto); if (poff >= 0) { -- cgit v1.2.3 From c6865cb3cc6f3c2857fa4c6f5fda2945d70b1e84 Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 14 Aug 2011 19:46:29 +0000 Subject: rps: Inspect GRE encapsulated packets to get flow hash Crack open GRE packets in __skb_get_rxhash to compute 4-tuple hash on in encapsulated packet. Note that this is used only when the __skb_get_rxhash is taken, in particular only when the device does not compute provide the rxhash (ie. feature is disabled). This was tested by creating a single GRE tunnel between two 16 core AMD machines. 200 netperf TCP_RR streams were ran with 1 byte request and response size. Without patch: 157497 tps, 50/90/99% latencies 1250/1292/1364 usecs With patch: 325896 tps, 50/90/99% latencies 603/848/1169 Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- net/core/dev.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 4bee9a9aeef6..a8d91a5dd909 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2570,6 +2570,28 @@ again: } switch (ip_proto) { + case IPPROTO_GRE: + if (pskb_may_pull(skb, nhoff + 16)) { + u8 *h = skb->data + nhoff; + __be16 flags = *(__be16 *)h; + + /* + * Only look inside GRE if version zero and no + * routing + */ + if (!(flags & (GRE_VERSION|GRE_ROUTING))) { + proto = *(__be16 *)(h + 2); + nhoff += 4; + if (flags & GRE_CSUM) + nhoff += 4; + if (flags & GRE_KEY) + nhoff += 4; + if (flags & GRE_SEQ) + nhoff += 4; + goto again; + } + } + break; default: break; } -- cgit v1.2.3 From 01789349ee52e4a3faf376f1485303d9723c4f1f Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 16 Aug 2011 06:29:00 +0000 Subject: net: introduce IFF_UNICAST_FLT private flag Use IFF_UNICAST_FTL to find out if driver handles unicast address filtering. In case it does not, promisc mode is entered. Patch also fixes following drivers: stmmac, niu: support uc filtering and yet it propagated ndo_set_multicast_list bna, benet, pxa168_eth, ks8851, ks8851_mll, ksz884x : has set ndo_set_rx_mode but do not support uc filtering Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2.c | 2 ++ drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c | 3 +++ drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 3 +++ drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 2 ++ drivers/net/ethernet/cisco/enic/enic_main.c | 3 +++ drivers/net/ethernet/intel/e1000/e1000_main.c | 2 ++ drivers/net/ethernet/intel/igb/igb_main.c | 3 +++ drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 +++ drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 3 +++ drivers/net/ethernet/marvell/mv643xx_eth.c | 2 ++ drivers/net/ethernet/octeon/octeon_mgmt.c | 3 +++ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 13 ++++++++----- drivers/net/ethernet/sun/niu.c | 5 ++++- drivers/net/virtio_net.c | 1 + include/linux/if.h | 1 + include/linux/netdevice.h | 2 ++ net/core/dev.c | 12 ++++++------ 17 files changed, 51 insertions(+), 12 deletions(-) (limited to 'net/core') diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 4b2b57018a02..4a9a8c8184d8 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -8370,6 +8371,7 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) dev->vlan_features = dev->hw_features; dev->hw_features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; dev->features |= dev->hw_features; + dev->priv_flags |= IFF_UNICAST_FLT; if ((rc = register_netdev(dev))) { dev_err(&pdev->dev, "Cannot register net device\n"); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index f90e3fa61ac2..f4ab90c20891 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -10266,6 +10267,8 @@ static int __devinit bnx2x_init_dev(struct pci_dev *pdev, dev->netdev_ops = &bnx2x_netdev_ops; bnx2x_set_ethtool_ops(dev); + dev->priv_flags |= IFF_UNICAST_FLT; + dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_RXCSUM | NETIF_F_LRO | NETIF_F_HW_VLAN_TX; diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index c9957b7f17b5..90b4921cac9b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -3639,6 +3640,8 @@ static int __devinit init_one(struct pci_dev *pdev, netdev->features |= netdev->hw_features | highdma; netdev->vlan_features = netdev->features & VLAN_FEAT; + netdev->priv_flags |= IFF_UNICAST_FLT; + netdev->netdev_ops = &cxgb4_netdev_ops; SET_ETHTOOL_OPS(netdev, &cxgb_ethtool_ops); } diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index ec799139dfe2..da9072bfca8b 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -2625,6 +2625,8 @@ static int __devinit cxgb4vf_pci_probe(struct pci_dev *pdev, if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; + netdev->priv_flags |= IFF_UNICAST_FLT; + netdev->netdev_ops = &cxgb4vf_netdev_ops; SET_ETHTOOL_OPS(netdev, &cxgb4vf_ethtool_ops); diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 67a27cd304dd..f342be0c51aa 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -2442,6 +2443,8 @@ static int __devinit enic_probe(struct pci_dev *pdev, if (using_dac) netdev->features |= NETIF_F_HIGHDMA; + netdev->priv_flags |= IFF_UNICAST_FLT; + err = register_netdev(netdev); if (err) { dev_err(dev, "Cannot register net device, aborting\n"); diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index f97afda941d7..7c280e5832b2 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -1080,6 +1080,8 @@ static int __devinit e1000_probe(struct pci_dev *pdev, netdev->vlan_features |= NETIF_F_HW_CSUM; netdev->vlan_features |= NETIF_F_SG; + netdev->priv_flags |= IFF_UNICAST_FLT; + adapter->en_mng_pt = e1000_enable_mng_pass_thru(hw); /* initialize eeprom parameters */ diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 40d4c405fd7e..592b5c1827bc 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -1973,6 +1974,8 @@ static int __devinit igb_probe(struct pci_dev *pdev, netdev->features |= NETIF_F_SCTP_CSUM; } + netdev->priv_flags |= IFF_UNICAST_FLT; + adapter->en_mng_pt = igb_enable_mng_pass_thru(hw); /* before reading the NVM, reset the controller to put the device in a diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index e86297b32733..8c70273b01bc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -7527,6 +7528,8 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev, netdev->vlan_features |= NETIF_F_IPV6_CSUM; netdev->vlan_features |= NETIF_F_SG; + netdev->priv_flags |= IFF_UNICAST_FLT; + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) adapter->flags &= ~(IXGBE_FLAG_RSS_ENABLED | IXGBE_FLAG_DCB_ENABLED); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 3b880a27f8d1..45b007827024 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -3358,6 +3359,8 @@ static int __devinit ixgbevf_probe(struct pci_dev *pdev, if (pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; + netdev->priv_flags |= IFF_UNICAST_FLT; + /* The HW MAC address was set and/or determined in sw_init */ memcpy(netdev->dev_addr, adapter->hw.mac.addr, netdev->addr_len); memcpy(netdev->perm_addr, adapter->hw.mac.addr, netdev->addr_len); diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index 259699983ca5..1e2c9f072bfd 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -2923,6 +2923,8 @@ static int mv643xx_eth_probe(struct platform_device *pdev) dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM; dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM; + dev->priv_flags |= IFF_UNICAST_FLT; + SET_NETDEV_DEV(dev, &pdev->dev); if (mp->shared->win_protect) diff --git a/drivers/net/ethernet/octeon/octeon_mgmt.c b/drivers/net/ethernet/octeon/octeon_mgmt.c index 429e08c84e9b..d6f96e50e2f4 100644 --- a/drivers/net/ethernet/octeon/octeon_mgmt.c +++ b/drivers/net/ethernet/octeon/octeon_mgmt.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -1102,6 +1103,8 @@ static int __devinit octeon_mgmt_probe(struct platform_device *pdev) tasklet_init(&p->tx_clean_tasklet, octeon_mgmt_clean_tx_tasklet, (unsigned long)p); + netdev->priv_flags |= IFF_UNICAST_FLT; + netdev->netdev_ops = &octeon_mgmt_ops; netdev->ethtool_ops = &octeon_mgmt_ethtool_ops; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index c6e567e04eff..68fb5b0593a0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -1284,7 +1285,7 @@ static int stmmac_config(struct net_device *dev, struct ifmap *map) } /** - * stmmac_multicast_list - entry point for multicast addressing + * stmmac_set_rx_mode - entry point for multicast addressing * @dev : pointer to the device structure * Description: * This function is a driver entry point which gets called by the kernel @@ -1292,7 +1293,7 @@ static int stmmac_config(struct net_device *dev, struct ifmap *map) * Return value: * void. */ -static void stmmac_multicast_list(struct net_device *dev) +static void stmmac_set_rx_mode(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); @@ -1421,7 +1422,7 @@ static const struct net_device_ops stmmac_netdev_ops = { .ndo_stop = stmmac_release, .ndo_change_mtu = stmmac_change_mtu, .ndo_fix_features = stmmac_fix_features, - .ndo_set_multicast_list = stmmac_multicast_list, + .ndo_set_rx_mode = stmmac_set_rx_mode, .ndo_tx_timeout = stmmac_tx_timeout, .ndo_do_ioctl = stmmac_ioctl, .ndo_set_config = stmmac_config, @@ -1498,10 +1499,12 @@ static int stmmac_mac_device_setup(struct net_device *dev) struct mac_device_info *device; - if (priv->plat->has_gmac) + if (priv->plat->has_gmac) { + dev->priv_flags |= IFF_UNICAST_FLT; device = dwmac1000_setup(priv->ioaddr); - else + } else { device = dwmac100_setup(priv->ioaddr); + } if (!device) return -ENOMEM; diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index ed47585a6862..3c9ef1c196a9 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -9716,7 +9717,7 @@ static const struct net_device_ops niu_netdev_ops = { .ndo_stop = niu_close, .ndo_start_xmit = niu_start_xmit, .ndo_get_stats64 = niu_get_stats, - .ndo_set_multicast_list = niu_set_rx_mode, + .ndo_set_rx_mode = niu_set_rx_mode, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = niu_set_mac_addr, .ndo_do_ioctl = niu_ioctl, @@ -9852,6 +9853,8 @@ static int __devinit niu_pci_init_one(struct pci_dev *pdev, niu_set_basic_features(dev); + dev->priv_flags |= IFF_UNICAST_FLT; + np->regs = pci_ioremap_bar(pdev, 0); if (!np->regs) { dev_err(&pdev->dev, "Cannot map device registers, aborting\n"); diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 0c7321c35ad4..4f09f88f1c28 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -949,6 +949,7 @@ static int virtnet_probe(struct virtio_device *vdev) return -ENOMEM; /* Set up network device as normal. */ + dev->priv_flags |= IFF_UNICAST_FLT; dev->netdev_ops = &virtnet_netdev; dev->features = NETIF_F_HIGHDMA; diff --git a/include/linux/if.h b/include/linux/if.h index 03489ca92ded..db20bd4fd16b 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -78,6 +78,7 @@ * datapath port */ #define IFF_TX_SKB_SHARING 0x10000 /* The interface supports sharing * skbs on transmit */ +#define IFF_UNICAST_FLT 0x20000 /* Supports unicast filtering */ #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ddee79bb8f15..96e4f7e0ad68 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -723,6 +723,8 @@ struct netdev_tc_txq { * * void (*ndo_set_rx_mode)(struct net_device *dev); * This function is called device changes address list filtering. + * If driver handles unicast address filtering, it should set + * IFF_UNICAST_FLT to its priv_flags. * * void (*ndo_set_multicast_list)(struct net_device *dev); * This function is called when the multicast address list changes. diff --git a/net/core/dev.c b/net/core/dev.c index a8d91a5dd909..6eb03fdaf075 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4522,9 +4522,7 @@ void __dev_set_rx_mode(struct net_device *dev) if (!netif_device_present(dev)) return; - if (ops->ndo_set_rx_mode) - ops->ndo_set_rx_mode(dev); - else { + if (!(dev->priv_flags & IFF_UNICAST_FLT)) { /* Unicast addresses changes may only happen under the rtnl, * therefore calling __dev_set_promiscuity here is safe. */ @@ -4535,10 +4533,12 @@ void __dev_set_rx_mode(struct net_device *dev) __dev_set_promiscuity(dev, -1); dev->uc_promisc = false; } - - if (ops->ndo_set_multicast_list) - ops->ndo_set_multicast_list(dev); } + + if (ops->ndo_set_rx_mode) + ops->ndo_set_rx_mode(dev); + else if (ops->ndo_set_multicast_list) + ops->ndo_set_multicast_list(dev); } void dev_set_rx_mode(struct net_device *dev) -- cgit v1.2.3 From b81693d9149c598302e8eb9c20cb20330d922c8e Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 16 Aug 2011 06:29:02 +0000 Subject: net: remove ndo_set_multicast_list callback Remove no longer used operation. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- Documentation/networking/netdevices.txt | 4 ++-- include/linux/netdevice.h | 4 ---- net/core/dev.c | 6 ++---- net/core/dev_addr_lists.c | 4 ++-- net/ipv4/igmp.c | 2 +- 5 files changed, 7 insertions(+), 13 deletions(-) (limited to 'net/core') diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt index 87b3d15f523a..89358341682a 100644 --- a/Documentation/networking/netdevices.txt +++ b/Documentation/networking/netdevices.txt @@ -73,7 +73,7 @@ dev->hard_start_xmit: has to lock by itself when needed. It is recommended to use a try lock for this and return NETDEV_TX_LOCKED when the spin lock fails. The locking there should also properly protect against - set_multicast_list. Note that the use of NETIF_F_LLTX is deprecated. + set_rx_mode. Note that the use of NETIF_F_LLTX is deprecated. Don't use it for new drivers. Context: Process with BHs disabled or BH (timer), @@ -92,7 +92,7 @@ dev->tx_timeout: Context: BHs disabled Notes: netif_queue_stopped() is guaranteed true -dev->set_multicast_list: +dev->set_rx_mode: Synchronization: netif_tx_lock spinlock. Context: BHs disabled diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 96e4f7e0ad68..125f9fb8ece4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -726,9 +726,6 @@ struct netdev_tc_txq { * If driver handles unicast address filtering, it should set * IFF_UNICAST_FLT to its priv_flags. * - * void (*ndo_set_multicast_list)(struct net_device *dev); - * This function is called when the multicast address list changes. - * * int (*ndo_set_mac_address)(struct net_device *dev, void *addr); * This function is called when the Media Access Control address * needs to be changed. If this interface is not defined, the @@ -870,7 +867,6 @@ struct net_device_ops { void (*ndo_change_rx_flags)(struct net_device *dev, int flags); void (*ndo_set_rx_mode)(struct net_device *dev); - void (*ndo_set_multicast_list)(struct net_device *dev); int (*ndo_set_mac_address)(struct net_device *dev, void *addr); int (*ndo_validate_addr)(struct net_device *dev); diff --git a/net/core/dev.c b/net/core/dev.c index 6eb03fdaf075..ead0366ee1e4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4537,8 +4537,6 @@ void __dev_set_rx_mode(struct net_device *dev) if (ops->ndo_set_rx_mode) ops->ndo_set_rx_mode(dev); - else if (ops->ndo_set_multicast_list) - ops->ndo_set_multicast_list(dev); } void dev_set_rx_mode(struct net_device *dev) @@ -4888,7 +4886,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) return -EOPNOTSUPP; case SIOCADDMULTI: - if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || + if (!ops->ndo_set_rx_mode || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) @@ -4896,7 +4894,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data); case SIOCDELMULTI: - if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) || + if (!ops->ndo_set_rx_mode || ifr->ifr_hwaddr.sa_family != AF_UNSPEC) return -EINVAL; if (!netif_device_present(dev)) diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c index e2e66939ed00..283d1b863876 100644 --- a/net/core/dev_addr_lists.c +++ b/net/core/dev_addr_lists.c @@ -591,8 +591,8 @@ EXPORT_SYMBOL(dev_mc_del_global); * addresses that have no users left. The source device must be * locked by netif_tx_lock_bh. * - * This function is intended to be called from the dev->set_multicast_list - * or dev->set_rx_mode function of layered software devices. + * This function is intended to be called from the ndo_set_rx_mode + * function of layered software devices. */ int dev_mc_sync(struct net_device *to, struct net_device *from) { diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 70695221a10d..ce57bdee14cb 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1009,7 +1009,7 @@ static void ip_mc_filter_add(struct in_device *in_dev, __be32 addr) /* Checking for IFF_MULTICAST here is WRONG-WRONG-WRONG. We will get multicast token leakage, when IFF_MULTICAST - is changed. This check should be done in dev->set_multicast_list + is changed. This check should be done in ndo_set_rx_mode routine. Something sort of: if (dev->mc_list && dev->flags&IFF_MULTICAST) { do it; } --ANK -- cgit v1.2.3 From 1ff1986fc94ee711df3cf19d45f2abf351436a6d Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Thu, 18 Aug 2011 22:07:54 -0700 Subject: net: rps: support 802.1Q For the 802.1Q packets, if the NIC doesn't support hw-accel-vlan-rx, RPS won't inspect the internal 4 tuples to generate skb->rxhash, so this kind of traffic can't get any benefit from RPS. This patch adds the support for 802.1Q to RPS. Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index ead0366ee1e4..be7ee506f17a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2529,6 +2529,7 @@ void __skb_get_rxhash(struct sk_buff *skb) int nhoff, hash = 0, poff; const struct ipv6hdr *ip6; const struct iphdr *ip; + const struct vlan_hdr *vlan; u8 ip_proto; u32 addr1, addr2; u16 proto; @@ -2565,6 +2566,13 @@ again: addr2 = (__force u32) ip6->daddr.s6_addr32[3]; nhoff += 40; break; + case __constant_htons(ETH_P_8021Q): + if (!pskb_may_pull(skb, sizeof(*vlan) + nhoff)) + goto done; + vlan = (const struct vlan_hdr *) (skb->data + nhoff); + proto = vlan->h_vlan_encapsulated_proto; + nhoff += sizeof(*vlan); + goto again; default: goto done; } -- cgit v1.2.3 From ae1511bf769cafeae5ab61aaf9947a16a22cbd10 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Thu, 18 Aug 2011 23:23:47 -0700 Subject: net: rps: support PPPOE session messages Inspect the payload of PPPOE session messages for the 4 tuples to generate skb->rxhash. Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- net/core/dev.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index be7ee506f17a..c2442b46646e 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -134,6 +134,7 @@ #include #include #include +#include #include "net-sysfs.h" @@ -2573,6 +2574,13 @@ again: proto = vlan->h_vlan_encapsulated_proto; nhoff += sizeof(*vlan); goto again; + case __constant_htons(ETH_P_PPP_SES): + if (!pskb_may_pull(skb, PPPOE_SES_HLEN + nhoff)) + goto done; + proto = *((__be16 *) (skb->data + nhoff + + sizeof(struct pppoe_hdr))); + nhoff += PPPOE_SES_HLEN; + goto again; default: goto done; } -- cgit v1.2.3 From 6461be3a54f802e00d5dcba3537271f92a90eaf3 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Fri, 19 Aug 2011 04:44:18 +0000 Subject: net: Preserve ooo_okay when copying skb header Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- net/core/skbuff.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/core') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index edb66f3e24f1..e27334ec367a 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -529,6 +529,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) new->mac_header = old->mac_header; skb_dst_copy(new, old); new->rxhash = old->rxhash; + new->ooo_okay = old->ooo_okay; new->l4_rxhash = old->l4_rxhash; #ifdef CONFIG_XFRM new->sp = secpath_get(old->sp); -- cgit v1.2.3 From 0dfe178239453547d4297a4583ee7847948a481b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Mon, 22 Aug 2011 12:43:22 -0700 Subject: net: vlan: goto another_round instead of calling __netif_receive_skb Now, when vlan tag on untagged in non-accelerated path is stripped from skb, headers are reset right away. Benefit from that and avoid calling __netif_receive_skb recursivelly and just use another_round. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- net/core/dev.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index c2442b46646e..a4306f7e4d09 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3236,10 +3236,9 @@ ncls: ret = deliver_skb(skb, pt_prev, orig_dev); pt_prev = NULL; } - if (vlan_do_receive(&skb)) { - ret = __netif_receive_skb(skb); - goto out; - } else if (unlikely(!skb)) + if (vlan_do_receive(&skb)) + goto another_round; + else if (unlikely(!skb)) goto out; } -- cgit v1.2.3 From ec5efe7946280d1e84603389a1030ccec0a767ae Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 24 Aug 2011 10:41:19 +0000 Subject: rps: support IPIP encapsulation Skip IPIP header to get proper layer-4 information. Like GRE tunnels, this only works if rxhash is not already provided by the device itself (ethtool -K ethX rxhash off), to allow kernel compute a software rxhash. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index a4306f7e4d09..b668a3d9a189 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2608,6 +2608,8 @@ again: } } break; + case IPPROTO_IPIP: + goto again; default: break; } -- cgit v1.2.3 From ea2ab69379a941c6f8884e290fdd28c93936a778 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Mon, 22 Aug 2011 23:44:58 +0000 Subject: net: convert core to skb paged frag APIs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Ian Campbell Cc: "David S. Miller" Cc: Eric Dumazet Cc: "Michał Mirosław" Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++-- net/core/datagram.c | 8 ++++---- net/core/dev.c | 16 +++++++++------- net/core/kmap_skb.h | 2 +- net/core/pktgen.c | 3 +-- net/core/skbuff.c | 29 +++++++++++++++-------------- net/core/sock.c | 12 +++++------- net/core/user_dma.c | 2 +- 8 files changed, 38 insertions(+), 38 deletions(-) (limited to 'net/core') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 7b0e1773f9cd..8d426281259d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1898,12 +1898,12 @@ static inline int skb_add_data(struct sk_buff *skb, } static inline int skb_can_coalesce(struct sk_buff *skb, int i, - struct page *page, int off) + const struct page *page, int off) { if (i) { struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1]; - return page == frag->page && + return page == skb_frag_page(frag) && off == frag->page_offset + frag->size; } return 0; diff --git a/net/core/datagram.c b/net/core/datagram.c index 18ac112ea7ae..6449bed457d4 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -332,7 +332,7 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, int err; u8 *vaddr; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + struct page *page = skb_frag_page(frag); if (copy > len) copy = len; @@ -418,7 +418,7 @@ int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset, int err; u8 *vaddr; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + struct page *page = skb_frag_page(frag); if (copy > len) copy = len; @@ -508,7 +508,7 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, int err; u8 *vaddr; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + struct page *page = skb_frag_page(frag); if (copy > len) copy = len; @@ -594,7 +594,7 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, int err = 0; u8 *vaddr; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + struct page *page = skb_frag_page(frag); if (copy > len) copy = len; diff --git a/net/core/dev.c b/net/core/dev.c index b668a3d9a189..b2e262ed3963 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1949,9 +1949,11 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) #ifdef CONFIG_HIGHMEM int i; if (!(dev->features & NETIF_F_HIGHDMA)) { - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - if (PageHighMem(skb_shinfo(skb)->frags[i].page)) + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + if (PageHighMem(skb_frag_page(frag))) return 1; + } } if (PCI_DMA_BUS_IS_PHYS) { @@ -1960,7 +1962,8 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) if (!pdev) return 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - dma_addr_t addr = page_to_phys(skb_shinfo(skb)->frags[i].page); + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + dma_addr_t addr = page_to_phys(skb_frag_page(frag)); if (!pdev->dma_mask || addr + PAGE_SIZE - 1 > *pdev->dma_mask) return 1; } @@ -3474,7 +3477,7 @@ pull: skb_shinfo(skb)->frags[0].size -= grow; if (unlikely(!skb_shinfo(skb)->frags[0].size)) { - put_page(skb_shinfo(skb)->frags[0].page); + skb_frag_unref(skb, 0); memmove(skb_shinfo(skb)->frags, skb_shinfo(skb)->frags + 1, --skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t)); @@ -3538,10 +3541,9 @@ void skb_gro_reset_offset(struct sk_buff *skb) NAPI_GRO_CB(skb)->frag0_len = 0; if (skb->mac_header == skb->tail && - !PageHighMem(skb_shinfo(skb)->frags[0].page)) { + !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) { NAPI_GRO_CB(skb)->frag0 = - page_address(skb_shinfo(skb)->frags[0].page) + - skb_shinfo(skb)->frags[0].page_offset; + skb_frag_address(&skb_shinfo(skb)->frags[0]); NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size; } } diff --git a/net/core/kmap_skb.h b/net/core/kmap_skb.h index 283c2b993fb8..81e1ed7c8383 100644 --- a/net/core/kmap_skb.h +++ b/net/core/kmap_skb.h @@ -7,7 +7,7 @@ static inline void *kmap_skb_frag(const skb_frag_t *frag) local_bh_disable(); #endif - return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ); + return kmap_atomic(skb_frag_page(frag), KM_SKB_DATA_SOFTIRQ); } static inline void kunmap_skb_frag(void *vaddr) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index e35a6fbb8110..796044ac0bf3 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2602,8 +2602,7 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, if (!pkt_dev->page) break; } - skb_shinfo(skb)->frags[i].page = pkt_dev->page; - get_page(pkt_dev->page); + skb_frag_set_page(skb, i, pkt_dev->page); skb_shinfo(skb)->frags[i].page_offset = 0; /*last fragment, fill rest of data*/ if (i == (frags - 1)) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e27334ec367a..296afd0aa8d2 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -326,7 +326,7 @@ static void skb_release_data(struct sk_buff *skb) if (skb_shinfo(skb)->nr_frags) { int i; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - put_page(skb_shinfo(skb)->frags[i].page); + skb_frag_unref(skb, i); } /* @@ -809,7 +809,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask) } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; - get_page(skb_shinfo(n)->frags[i].page); + skb_frag_ref(skb, i); } skb_shinfo(n)->nr_frags = i; } @@ -901,7 +901,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - get_page(skb_shinfo(skb)->frags[i].page); + skb_frag_ref(skb, i); if (skb_has_frag_list(skb)) skb_clone_fraglist(skb); @@ -1181,7 +1181,7 @@ drop_pages: skb_shinfo(skb)->nr_frags = i; for (; i < nfrags; i++) - put_page(skb_shinfo(skb)->frags[i].page); + skb_frag_unref(skb, i); if (skb_has_frag_list(skb)) skb_drop_fraglist(skb); @@ -1350,7 +1350,7 @@ pull_pages: k = 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { if (skb_shinfo(skb)->frags[i].size <= eat) { - put_page(skb_shinfo(skb)->frags[i].page); + skb_frag_unref(skb, i); eat -= skb_shinfo(skb)->frags[i].size; } else { skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; @@ -1609,7 +1609,8 @@ static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) { const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; - if (__splice_segment(f->page, f->page_offset, f->size, + if (__splice_segment(skb_frag_page(f), + f->page_offset, f->size, offset, len, skb, spd, 0, sk, pipe)) return 1; } @@ -2154,7 +2155,7 @@ static inline void skb_split_no_header(struct sk_buff *skb, * where splitting is expensive. * 2. Split is accurately. We make this. */ - get_page(skb_shinfo(skb)->frags[i].page); + skb_frag_ref(skb, i); skb_shinfo(skb1)->frags[0].page_offset += len - pos; skb_shinfo(skb1)->frags[0].size -= len - pos; skb_shinfo(skb)->frags[i].size = len - pos; @@ -2229,7 +2230,8 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) * commit all, so that we don't have to undo partial changes */ if (!to || - !skb_can_coalesce(tgt, to, fragfrom->page, fragfrom->page_offset)) { + !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom), + fragfrom->page_offset)) { merge = -1; } else { merge = to - 1; @@ -2276,7 +2278,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) to++; } else { - get_page(fragfrom->page); + __skb_frag_ref(fragfrom); fragto->page = fragfrom->page; fragto->page_offset = fragfrom->page_offset; fragto->size = todo; @@ -2298,7 +2300,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) fragto = &skb_shinfo(tgt)->frags[merge]; fragto->size += fragfrom->size; - put_page(fragfrom->page); + __skb_frag_unref(fragfrom); } /* Reposition in the original skb */ @@ -2543,8 +2545,7 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, left = PAGE_SIZE - frag->page_offset; copy = (length > left)? left : length; - ret = getfrag(from, (page_address(frag->page) + - frag->page_offset + frag->size), + ret = getfrag(from, skb_frag_address(frag) + frag->size, offset, copy, 0, skb); if (ret < 0) return -EFAULT; @@ -2696,7 +2697,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, u32 features) while (pos < offset + len && i < nfrags) { *frag = skb_shinfo(skb)->frags[i]; - get_page(frag->page); + __skb_frag_ref(frag); size = frag->size; if (pos < offset) { @@ -2919,7 +2920,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) if (copy > len) copy = len; - sg_set_page(&sg[elt], frag->page, copy, + sg_set_page(&sg[elt], skb_frag_page(frag), copy, frag->page_offset+offset-start); elt++; if (!(len -= copy)) diff --git a/net/core/sock.c b/net/core/sock.c index 9997026b44b2..b29ab61b029c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1533,7 +1533,6 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, skb_shinfo(skb)->nr_frags = npages; for (i = 0; i < npages; i++) { struct page *page; - skb_frag_t *frag; page = alloc_pages(sk->sk_allocation, 0); if (!page) { @@ -1543,12 +1542,11 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, goto failure; } - frag = &skb_shinfo(skb)->frags[i]; - frag->page = page; - frag->page_offset = 0; - frag->size = (data_len >= PAGE_SIZE ? - PAGE_SIZE : - data_len); + __skb_fill_page_desc(skb, i, + page, 0, + (data_len >= PAGE_SIZE ? + PAGE_SIZE : + data_len)); data_len -= PAGE_SIZE; } diff --git a/net/core/user_dma.c b/net/core/user_dma.c index 25d717ebc92e..34e9664cae3b 100644 --- a/net/core/user_dma.c +++ b/net/core/user_dma.c @@ -78,7 +78,7 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan, copy = end - offset; if (copy > 0) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - struct page *page = frag->page; + struct page *page = skb_frag_page(frag); if (copy > len) copy = len; -- cgit v1.2.3 From c37e0c993055d8c4fd82202331d06e6ef9bfec4b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 30 Aug 2011 23:31:58 +0000 Subject: net: linkwatch: allow vlans to get carrier changes faster MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is a time-lag of IFF_RUNNING flag consistency between vlan and real devices when the real devices are in problem such as link or cable broken. This leads to a degradation of Availability such as a delay of failover in HA systems using vlan since the detection of the problem at real device is delayed. We can avoid the linkwatch delay (~1 sec) for devices linked to another ones, since delay is already done for the realdev. Based on a previous patch from Mitsuo Hayasaka Reported-by: Mitsuo Hayasaka Signed-off-by: Eric Dumazet Cc: Herbert Xu Cc: Patrick McHardy Cc: "Michał Mirosław" Cc: Tom Herbert Cc: Stephen Hemminger Cc: Jesse Gross Tested-by: Mitsuo Hayasaka Signed-off-by: David S. Miller --- net/core/link_watch.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 357bd4ee4baa..c3519c6d1b16 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -78,8 +78,13 @@ static void rfc2863_policy(struct net_device *dev) static bool linkwatch_urgent_event(struct net_device *dev) { - return netif_running(dev) && netif_carrier_ok(dev) && - qdisc_tx_changing(dev); + if (!netif_running(dev)) + return false; + + if (dev->ifindex != dev->iflink) + return true; + + return netif_carrier_ok(dev) && qdisc_tx_changing(dev); } -- cgit v1.2.3 From 4bc71cb983fd2844e603bf633df2bb53385182d2 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 3 Sep 2011 03:34:30 +0000 Subject: net: consolidate and fix ethtool_ops->get_settings calling This patch does several things: - introduces __ethtool_get_settings which is called from ethtool code and from drivers as well. Put ASSERT_RTNL there. - dev_ethtool_get_settings() is replaced by __ethtool_get_settings() - changes calling in drivers so rtnl locking is respected. In iboe_get_rate was previously ->get_settings() called unlocked. This fixes it. Also prb_calc_retire_blk_tmo() in af_packet.c had the same problem. Also fixed by calling __dev_get_by_index() instead of dev_get_by_index() and holding rtnl_lock for both calls. - introduces rtnl_lock in bnx2fc_vport_create() and fcoe_vport_create() so bnx2fc_if_create() and fcoe_if_create() are called locked as they are from other places. - use __ethtool_get_settings() in bonding code Signed-off-by: Jiri Pirko v2->v3: -removed dev_ethtool_get_settings() -added ASSERT_RTNL into __ethtool_get_settings() -prb_calc_retire_blk_tmo - use __dev_get_by_index() and lock around it and __ethtool_get_settings() call v1->v2: add missing export_symbol Reviewed-by: Ben Hutchings [except FCoE bits] Acked-by: Ralf Baechle Signed-off-by: David S. Miller --- arch/mips/txx9/generic/setup_tx4939.c | 2 +- drivers/net/bonding/bond_main.c | 13 ++++----- drivers/net/macvlan.c | 3 +- drivers/scsi/bnx2fc/bnx2fc_fcoe.c | 4 ++- drivers/scsi/fcoe/fcoe.c | 4 ++- include/linux/ethtool.h | 3 ++ include/linux/netdevice.h | 3 -- include/rdma/ib_addr.h | 6 +++- net/8021q/vlan_dev.c | 3 +- net/bridge/br_if.c | 2 +- net/core/dev.c | 24 ---------------- net/core/ethtool.c | 20 ++++++++++---- net/core/net-sysfs.c | 4 +-- net/packet/af_packet.c | 52 ++++++++++++++++++----------------- 14 files changed, 69 insertions(+), 74 deletions(-) (limited to 'net/core') diff --git a/arch/mips/txx9/generic/setup_tx4939.c b/arch/mips/txx9/generic/setup_tx4939.c index e9f95dcde379..ba3cec3155df 100644 --- a/arch/mips/txx9/generic/setup_tx4939.c +++ b/arch/mips/txx9/generic/setup_tx4939.c @@ -321,7 +321,7 @@ void __init tx4939_sio_init(unsigned int sclk, unsigned int cts_mask) static u32 tx4939_get_eth_speed(struct net_device *dev) { struct ethtool_cmd cmd; - if (dev_ethtool_get_settings(dev, &cmd)) + if (__ethtool_get_settings(dev, &cmd)) return 100; /* default 100Mbps */ return ethtool_cmd_speed(&cmd); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 8cb75a6efec3..1dcb07ce5263 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -557,7 +557,7 @@ down: static int bond_update_speed_duplex(struct slave *slave) { struct net_device *slave_dev = slave->dev; - struct ethtool_cmd etool = { .cmd = ETHTOOL_GSET }; + struct ethtool_cmd ecmd; u32 slave_speed; int res; @@ -565,18 +565,15 @@ static int bond_update_speed_duplex(struct slave *slave) slave->speed = SPEED_100; slave->duplex = DUPLEX_FULL; - if (!slave_dev->ethtool_ops || !slave_dev->ethtool_ops->get_settings) - return -1; - - res = slave_dev->ethtool_ops->get_settings(slave_dev, &etool); + res = __ethtool_get_settings(slave_dev, &ecmd); if (res < 0) return -1; - slave_speed = ethtool_cmd_speed(&etool); + slave_speed = ethtool_cmd_speed(&ecmd); if (slave_speed == 0 || slave_speed == ((__u32) -1)) return -1; - switch (etool.duplex) { + switch (ecmd.duplex) { case DUPLEX_FULL: case DUPLEX_HALF: break; @@ -585,7 +582,7 @@ static int bond_update_speed_duplex(struct slave *slave) } slave->speed = slave_speed; - slave->duplex = etool.duplex; + slave->duplex = ecmd.duplex; return 0; } diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index 836e13fcb3ec..b100c90e8507 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -543,7 +543,8 @@ static int macvlan_ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { const struct macvlan_dev *vlan = netdev_priv(dev); - return dev_ethtool_get_settings(vlan->lowerdev, cmd); + + return __ethtool_get_settings(vlan->lowerdev, cmd); } static const struct ethtool_ops macvlan_ethtool_ops = { diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c index 2c780a78fcbd..820a1840c3f7 100644 --- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c +++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c @@ -673,7 +673,7 @@ static void bnx2fc_link_speed_update(struct fc_lport *lport) struct net_device *netdev = interface->netdev; struct ethtool_cmd ecmd; - if (!dev_ethtool_get_settings(netdev, &ecmd)) { + if (!__ethtool_get_settings(netdev, &ecmd)) { lport->link_supported_speeds &= ~(FC_PORTSPEED_1GBIT | FC_PORTSPEED_10GBIT); if (ecmd.supported & (SUPPORTED_1000baseT_Half | @@ -1001,9 +1001,11 @@ static int bnx2fc_vport_create(struct fc_vport *vport, bool disabled) "this interface\n"); return -EIO; } + rtnl_lock(); mutex_lock(&bnx2fc_dev_lock); vn_port = bnx2fc_if_create(interface, &vport->dev, 1); mutex_unlock(&bnx2fc_dev_lock); + rtnl_unlock(); if (IS_ERR(vn_port)) { printk(KERN_ERR PFX "bnx2fc_vport_create (%s) failed\n", diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c index 3416ab673814..83aa3ac52c40 100644 --- a/drivers/scsi/fcoe/fcoe.c +++ b/drivers/scsi/fcoe/fcoe.c @@ -2043,7 +2043,7 @@ int fcoe_link_speed_update(struct fc_lport *lport) struct net_device *netdev = fcoe_netdev(lport); struct ethtool_cmd ecmd; - if (!dev_ethtool_get_settings(netdev, &ecmd)) { + if (!__ethtool_get_settings(netdev, &ecmd)) { lport->link_supported_speeds &= ~(FC_PORTSPEED_1GBIT | FC_PORTSPEED_10GBIT); if (ecmd.supported & (SUPPORTED_1000baseT_Half | @@ -2452,7 +2452,9 @@ static int fcoe_vport_create(struct fc_vport *vport, bool disabled) } mutex_lock(&fcoe_config_mutex); + rtnl_lock(); vn_port = fcoe_if_create(fcoe, &vport->dev, 1); + rtnl_unlock(); mutex_unlock(&fcoe_config_mutex); if (IS_ERR(vn_port)) { diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 3829712ccc05..8571f18c38a6 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -728,6 +728,9 @@ enum ethtool_sfeatures_retval_bits { /* needed by dev_disable_lro() */ extern int __ethtool_set_flags(struct net_device *dev, u32 flags); +extern int __ethtool_get_settings(struct net_device *dev, + struct ethtool_cmd *cmd); + /** * enum ethtool_phys_id_state - indicator state for physical identification * @ETHTOOL_ID_INACTIVE: Physical ID indicator should be deactivated diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0a7f619f284e..43b32983ba10 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2589,9 +2589,6 @@ static inline int netif_is_bond_slave(struct net_device *dev) extern struct pernet_operations __net_initdata loopback_net_ops; -int dev_ethtool_get_settings(struct net_device *dev, - struct ethtool_cmd *cmd); - static inline u32 dev_ethtool_get_rx_csum(struct net_device *dev) { if (dev->features & NETIF_F_RXCSUM) diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h index ae8c68f30f1b..639a4491fc0d 100644 --- a/include/rdma/ib_addr.h +++ b/include/rdma/ib_addr.h @@ -218,8 +218,12 @@ static inline int iboe_get_rate(struct net_device *dev) { struct ethtool_cmd cmd; u32 speed; + int err; - if (dev_ethtool_get_settings(dev, &cmd)) + rtnl_lock(); + err = __ethtool_get_settings(dev, &cmd); + rtnl_unlock(); + if (err) return IB_RATE_PORT_CURRENT; speed = ethtool_cmd_speed(&cmd); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index eba705b92d6f..c8cf9391417e 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -610,7 +610,8 @@ static int vlan_ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { const struct vlan_dev_info *vlan = vlan_dev_info(dev); - return dev_ethtool_get_settings(vlan->real_dev, cmd); + + return __ethtool_get_settings(vlan->real_dev, cmd); } static void vlan_ethtool_get_drvinfo(struct net_device *dev, diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index b365bba84d19..043a5eb8cafc 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -35,7 +35,7 @@ static int port_cost(struct net_device *dev) { struct ethtool_cmd ecmd; - if (!dev_ethtool_get_settings(dev, &ecmd)) { + if (!__ethtool_get_settings(dev, &ecmd)) { switch (ethtool_cmd_speed(&ecmd)) { case SPEED_10000: return 2; diff --git a/net/core/dev.c b/net/core/dev.c index b2e262ed3963..4b9981caf06f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4565,30 +4565,6 @@ void dev_set_rx_mode(struct net_device *dev) netif_addr_unlock_bh(dev); } -/** - * dev_ethtool_get_settings - call device's ethtool_ops::get_settings() - * @dev: device - * @cmd: memory area for ethtool_ops::get_settings() result - * - * The cmd arg is initialized properly (cleared and - * ethtool_cmd::cmd field set to ETHTOOL_GSET). - * - * Return device's ethtool_ops::get_settings() result value or - * -EOPNOTSUPP when device doesn't expose - * ethtool_ops::get_settings() operation. - */ -int dev_ethtool_get_settings(struct net_device *dev, - struct ethtool_cmd *cmd) -{ - if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings) - return -EOPNOTSUPP; - - memset(cmd, 0, sizeof(struct ethtool_cmd)); - cmd->cmd = ETHTOOL_GSET; - return dev->ethtool_ops->get_settings(dev, cmd); -} -EXPORT_SYMBOL(dev_ethtool_get_settings); - /** * dev_get_flags - get flags reported to userspace * @dev: device diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 6cdba5fc2bed..f44481707124 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -569,15 +569,25 @@ int __ethtool_set_flags(struct net_device *dev, u32 data) return 0; } -static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) +int __ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) { - struct ethtool_cmd cmd = { .cmd = ETHTOOL_GSET }; - int err; + ASSERT_RTNL(); - if (!dev->ethtool_ops->get_settings) + if (!dev->ethtool_ops || !dev->ethtool_ops->get_settings) return -EOPNOTSUPP; - err = dev->ethtool_ops->get_settings(dev, &cmd); + memset(cmd, 0, sizeof(struct ethtool_cmd)); + cmd->cmd = ETHTOOL_GSET; + return dev->ethtool_ops->get_settings(dev, cmd); +} +EXPORT_SYMBOL(__ethtool_get_settings); + +static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) +{ + int err; + struct ethtool_cmd cmd; + + err = __ethtool_get_settings(dev, &cmd); if (err < 0) return err; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 56e42ab7cbc6..7604a635376b 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -147,7 +147,7 @@ static ssize_t show_speed(struct device *dev, if (netif_running(netdev)) { struct ethtool_cmd cmd; - if (!dev_ethtool_get_settings(netdev, &cmd)) + if (!__ethtool_get_settings(netdev, &cmd)) ret = sprintf(buf, fmt_udec, ethtool_cmd_speed(&cmd)); } rtnl_unlock(); @@ -165,7 +165,7 @@ static ssize_t show_duplex(struct device *dev, if (netif_running(netdev)) { struct ethtool_cmd cmd; - if (!dev_ethtool_get_settings(netdev, &cmd)) + if (!__ethtool_get_settings(netdev, &cmd)) ret = sprintf(buf, "%s\n", cmd.duplex ? "full" : "half"); } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 2ea3d63e1d4c..25e68f56b4ba 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -530,33 +530,35 @@ static int prb_calc_retire_blk_tmo(struct packet_sock *po, { struct net_device *dev; unsigned int mbits = 0, msec = 0, div = 0, tmo = 0; + struct ethtool_cmd ecmd; + int err; - dev = dev_get_by_index(sock_net(&po->sk), po->ifindex); - if (unlikely(dev == NULL)) + rtnl_lock(); + dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex); + if (unlikely(!dev)) { + rtnl_unlock(); return DEFAULT_PRB_RETIRE_TOV; - - if (dev->ethtool_ops && dev->ethtool_ops->get_settings) { - struct ethtool_cmd ecmd = { .cmd = ETHTOOL_GSET, }; - - if (!dev->ethtool_ops->get_settings(dev, &ecmd)) { - switch (ecmd.speed) { - case SPEED_10000: - msec = 1; - div = 10000/1000; - break; - case SPEED_1000: - msec = 1; - div = 1000/1000; - break; - /* - * If the link speed is so slow you don't really - * need to worry about perf anyways - */ - case SPEED_100: - case SPEED_10: - default: - return DEFAULT_PRB_RETIRE_TOV; - } + } + err = __ethtool_get_settings(dev, &ecmd); + rtnl_unlock(); + if (!err) { + switch (ecmd.speed) { + case SPEED_10000: + msec = 1; + div = 10000/1000; + break; + case SPEED_1000: + msec = 1; + div = 1000/1000; + break; + /* + * If the link speed is so slow you don't really + * need to worry about perf anyways + */ + case SPEED_100: + case SPEED_10: + default: + return DEFAULT_PRB_RETIRE_TOV; } } -- cgit v1.2.3 From 16e5726269611b71c930054ffe9b858c1cea88eb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 19 Sep 2011 05:52:27 +0000 Subject: af_unix: dont send SCM_CREDENTIALS by default Since commit 7361c36c5224 (af_unix: Allow credentials to work across user and pid namespaces) af_unix performance dropped a lot. This is because we now take a reference on pid and cred in each write(), and release them in read(), usually done from another process, eventually from another cpu. This triggers false sharing. # Events: 154K cycles # # Overhead Command Shared Object Symbol # ........ ....... .................. ......................... # 10.40% hackbench [kernel.kallsyms] [k] put_pid 8.60% hackbench [kernel.kallsyms] [k] unix_stream_recvmsg 7.87% hackbench [kernel.kallsyms] [k] unix_stream_sendmsg 6.11% hackbench [kernel.kallsyms] [k] do_raw_spin_lock 4.95% hackbench [kernel.kallsyms] [k] unix_scm_to_skb 4.87% hackbench [kernel.kallsyms] [k] pid_nr_ns 4.34% hackbench [kernel.kallsyms] [k] cred_to_ucred 2.39% hackbench [kernel.kallsyms] [k] unix_destruct_scm 2.24% hackbench [kernel.kallsyms] [k] sub_preempt_count 1.75% hackbench [kernel.kallsyms] [k] fget_light 1.51% hackbench [kernel.kallsyms] [k] __mutex_lock_interruptible_slowpath 1.42% hackbench [kernel.kallsyms] [k] sock_alloc_send_pskb This patch includes SCM_CREDENTIALS information in a af_unix message/skb only if requested by the sender, [man 7 unix for details how to include ancillary data using sendmsg() system call] Note: This might break buggy applications that expected SCM_CREDENTIAL from an unaware write() system call, and receiver not using SO_PASSCRED socket option. If SOCK_PASSCRED is set on source or destination socket, we still include credentials for mere write() syscalls. Performance boost in hackbench : more than 50% gain on a 16 thread machine (2 quad-core cpus, 2 threads per core) hackbench 20 thread 2000 4.228 sec instead of 9.102 sec Signed-off-by: Eric Dumazet Acked-by: Tim Chen Signed-off-by: David S. Miller --- include/net/scm.h | 5 ++--- net/core/scm.c | 10 ++++++---- net/netlink/af_netlink.c | 5 ++--- net/unix/af_unix.c | 24 +++++++++++++++++++++++- 4 files changed, 33 insertions(+), 11 deletions(-) (limited to 'net/core') diff --git a/include/net/scm.h b/include/net/scm.h index 745460fa2f02..d456f4c71a32 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -49,7 +49,7 @@ static __inline__ void scm_set_cred(struct scm_cookie *scm, struct pid *pid, const struct cred *cred) { scm->pid = get_pid(pid); - scm->cred = get_cred(cred); + scm->cred = cred ? get_cred(cred) : NULL; cred_to_ucred(pid, cred, &scm->creds); } @@ -73,8 +73,7 @@ static __inline__ void scm_destroy(struct scm_cookie *scm) static __inline__ int scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm) { - scm_set_cred(scm, task_tgid(current), current_cred()); - scm->fp = NULL; + memset(scm, 0, sizeof(*scm)); unix_get_peersec_dgram(sock, scm); if (msg->msg_controllen <= 0) return 0; diff --git a/net/core/scm.c b/net/core/scm.c index 811b53fb330e..ff52ad0a5150 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -173,7 +173,7 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) if (err) goto error; - if (pid_vnr(p->pid) != p->creds.pid) { + if (!p->pid || pid_vnr(p->pid) != p->creds.pid) { struct pid *pid; err = -ESRCH; pid = find_get_pid(p->creds.pid); @@ -183,8 +183,9 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) p->pid = pid; } - if ((p->cred->euid != p->creds.uid) || - (p->cred->egid != p->creds.gid)) { + if (!p->cred || + (p->cred->euid != p->creds.uid) || + (p->cred->egid != p->creds.gid)) { struct cred *cred; err = -ENOMEM; cred = prepare_creds(); @@ -193,7 +194,8 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) cred->uid = cred->euid = p->creds.uid; cred->gid = cred->egid = p->creds.gid; - put_cred(p->cred); + if (p->cred) + put_cred(p->cred); p->cred = cred; } break; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4330db99fabf..1201b6d4183d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1324,10 +1324,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (msg->msg_flags&MSG_OOB) return -EOPNOTSUPP; - if (NULL == siocb->scm) { + if (NULL == siocb->scm) siocb->scm = &scm; - memset(&scm, 0, sizeof(scm)); - } + err = scm_send(sock, msg, siocb->scm); if (err < 0) return err; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index ec68e1c05b85..466fbcc5cf77 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1381,8 +1381,10 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) { int err = 0; + UNIXCB(skb).pid = get_pid(scm->pid); - UNIXCB(skb).cred = get_cred(scm->cred); + if (scm->cred) + UNIXCB(skb).cred = get_cred(scm->cred); UNIXCB(skb).fp = NULL; if (scm->fp && send_fds) err = unix_attach_fds(scm, skb); @@ -1391,6 +1393,24 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen return err; } +/* + * Some apps rely on write() giving SCM_CREDENTIALS + * We include credentials if source or destination socket + * asserted SOCK_PASSCRED. + */ +static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock, + const struct sock *other) +{ + if (UNIXCB(skb).cred) + return; + if (test_bit(SOCK_PASSCRED, &sock->flags) || + !other->sk_socket || + test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) { + UNIXCB(skb).pid = get_pid(task_tgid(current)); + UNIXCB(skb).cred = get_current_cred(); + } +} + /* * Send AF_UNIX data. */ @@ -1538,6 +1558,7 @@ restart: if (sock_flag(other, SOCK_RCVTSTAMP)) __net_timestamp(skb); + maybe_add_creds(skb, sock, other); skb_queue_tail(&other->sk_receive_queue, skb); if (max_level > unix_sk(other)->recursion_level) unix_sk(other)->recursion_level = max_level; @@ -1652,6 +1673,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, (other->sk_shutdown & RCV_SHUTDOWN)) goto pipe_err_free; + maybe_add_creds(skb, sock, other); skb_queue_tail(&other->sk_receive_queue, skb); if (max_level > unix_sk(other)->recursion_level) unix_sk(other)->recursion_level = max_level; -- cgit v1.2.3 From 5dd17e08f333cde0fa11000792e33d8d39b5599f Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Tue, 20 Sep 2011 22:36:07 +0000 Subject: net: rps: fix the support for PPPOE The upper protocol numbers of PPPOE are different, and should be treated specially. Signed-off-by: Changli Gao Signed-off-by: David S. Miller --- net/core/dev.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index bf49a47ddfdb..7f4486e127e9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -135,6 +135,7 @@ #include #include #include +#include #include "net-sysfs.h" @@ -2556,6 +2557,7 @@ void __skb_get_rxhash(struct sk_buff *skb) again: switch (proto) { case __constant_htons(ETH_P_IP): +ip: if (!pskb_may_pull(skb, sizeof(*ip) + nhoff)) goto done; @@ -2569,6 +2571,7 @@ again: nhoff += ip->ihl * 4; break; case __constant_htons(ETH_P_IPV6): +ipv6: if (!pskb_may_pull(skb, sizeof(*ip6) + nhoff)) goto done; @@ -2591,7 +2594,14 @@ again: proto = *((__be16 *) (skb->data + nhoff + sizeof(struct pppoe_hdr))); nhoff += PPPOE_SES_HLEN; - goto again; + switch (proto) { + case __constant_htons(PPP_IP): + goto ip; + case __constant_htons(PPP_IPV6): + goto ipv6; + default: + goto done; + } default: goto done; } -- cgit v1.2.3 From 09994d1b09bd9b0046a4708fa50d2106610a4058 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 3 Oct 2011 04:42:46 +0000 Subject: RPS: Ensure that an expired hardware filter can be re-added later Amir Vadai wrote: > When a stream is paused, and its rule is expired while it is paused, > no new rule will be configured to the HW when traffic resume. [...] > - When stream was resumed, traffic was steered again by RSS, and > because current-cpu was equal to desired-cpu, ndo_rx_flow_steer > wasn't called and no rule was configured to the HW. Fix this by setting the flow's current CPU only in the table for the newly selected RX queue. Reported-and-tested-by: Amir Vadai Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/core/dev.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 7f4486e127e9..70ecb86439ca 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2670,10 +2670,7 @@ static struct rps_dev_flow * set_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_dev_flow *rflow, u16 next_cpu) { - u16 tcpu; - - tcpu = rflow->cpu = next_cpu; - if (tcpu != RPS_NO_CPU) { + if (next_cpu != RPS_NO_CPU) { #ifdef CONFIG_RFS_ACCEL struct netdev_rx_queue *rxqueue; struct rps_dev_flow_table *flow_table; @@ -2701,16 +2698,16 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, goto out; old_rflow = rflow; rflow = &flow_table->flows[flow_id]; - rflow->cpu = next_cpu; rflow->filter = rc; if (old_rflow->filter == rflow->filter) old_rflow->filter = RPS_NO_FILTER; out: #endif rflow->last_qtail = - per_cpu(softnet_data, tcpu).input_queue_head; + per_cpu(softnet_data, next_cpu).input_queue_head; } + rflow->cpu = next_cpu; return rflow; } -- cgit v1.2.3 From 8083f0fc969d9b5353061a7a6f963405057e26b1 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 7 Oct 2011 03:30:20 +0000 Subject: net: use sock_valbool_flag to set/clear SOCK_RXQ_OVFL There's no point in open-coding sock_valbool_flag(). Signed-off-by: Johannes Berg Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/core/sock.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'net/core') diff --git a/net/core/sock.c b/net/core/sock.c index b29ab61b029c..83c462d3f451 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -738,10 +738,7 @@ set_rcvbuf: /* We implement the SO_SNDLOWAT etc to not be settable (1003.1g 5.3) */ case SO_RXQ_OVFL: - if (valbool) - sock_set_flag(sk, SOCK_RXQ_OVFL); - else - sock_reset_flag(sk, SOCK_RXQ_OVFL); + sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool); break; default: ret = -ENOPROTOOPT; -- cgit v1.2.3 From 87fb4b7b533073eeeaed0b6bf7c2328995f6c075 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 13 Oct 2011 07:28:54 +0000 Subject: net: more accurate skb truesize skb truesize currently accounts for sk_buff struct and part of skb head. kmalloc() roundings are also ignored. Considering that skb_shared_info is larger than sk_buff, its time to take it into account for better memory accounting. This patch introduces SKB_TRUESIZE(X) macro to centralize various assumptions into a single place. At skb alloc phase, we put skb_shared_info struct at the exact end of skb head, to allow a better use of memory (lowering number of reallocations), since kmalloc() gives us power-of-two memory blocks. Unless SLUB/SLUB debug is active, both skb->head and skb_shared_info are aligned to cache lines, as before. Note: This patch might trigger performance regressions because of misconfigured protocol stacks, hitting per socket or global memory limits that were previously not reached. But its a necessary step for a more accurate memory accounting. Signed-off-by: Eric Dumazet CC: Andi Kleen CC: Ben Hutchings Signed-off-by: David S. Miller --- include/linux/skbuff.h | 5 +++++ net/core/skbuff.c | 18 ++++++++++++++---- net/core/sock.c | 2 +- net/ipv4/icmp.c | 5 ++--- net/ipv4/tcp_input.c | 14 +++++++------- net/ipv6/icmp.c | 3 +-- net/iucv/af_iucv.c | 2 +- net/sctp/protocol.c | 2 +- 8 files changed, 32 insertions(+), 19 deletions(-) (limited to 'net/core') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ac6b05a325cc..64f86951ef74 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -46,6 +46,11 @@ #define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0)) #define SKB_MAX_ALLOC (SKB_MAX_ORDER(0, 2)) +/* return minimum truesize of one skb containing X bytes of data */ +#define SKB_TRUESIZE(X) ((X) + \ + SKB_DATA_ALIGN(sizeof(struct sk_buff)) + \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) + /* A. Checksumming of received packets by device. * * NONE: device failed to checksum this packet. diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5b2c5f1d4dba..a7f855dca922 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -184,11 +184,20 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, goto out; prefetchw(skb); - size = SKB_DATA_ALIGN(size); - data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info), - gfp_mask, node); + /* We do our best to align skb_shared_info on a separate cache + * line. It usually works because kmalloc(X > SMP_CACHE_BYTES) gives + * aligned memory blocks, unless SLUB/SLAB debug is enabled. + * Both skb->head and skb_shared_info are cache line aligned. + */ + size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + data = kmalloc_node_track_caller(size, gfp_mask, node); if (!data) goto nodata; + /* kmalloc(size) might give us more room than requested. + * Put skb_shared_info exactly at the end of allocated zone, + * to allow max possible filling before reallocation. + */ + size = SKB_WITH_OVERHEAD(ksize(data)); prefetchw(data + size); /* @@ -197,7 +206,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, * the tail pointer in struct sk_buff! */ memset(skb, 0, offsetof(struct sk_buff, tail)); - skb->truesize = size + sizeof(struct sk_buff); + /* Account for allocated memory : skb + skb->head */ + skb->truesize = SKB_TRUESIZE(size); atomic_set(&skb->users, 1); skb->head = data; skb->data = data; diff --git a/net/core/sock.c b/net/core/sock.c index 83c462d3f451..5a087626bb3a 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -207,7 +207,7 @@ static struct lock_class_key af_callback_keys[AF_MAX]; * not depend upon such differences. */ #define _SK_MEM_PACKETS 256 -#define _SK_MEM_OVERHEAD (sizeof(struct sk_buff) + 256) +#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256) #define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) #define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 23ef31baa1af..ab188ae12fd9 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1152,10 +1152,9 @@ static int __net_init icmp_sk_init(struct net *net) net->ipv4.icmp_sk[i] = sk; /* Enough space for 2 64K ICMP packets, including - * sk_buff struct overhead. + * sk_buff/skb_shared_info struct overhead. */ - sk->sk_sndbuf = - (2 * ((64 * 1024) + sizeof(struct sk_buff))); + sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024); /* * Speedup sock_wfree() diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 81cae641c9a9..c1653fe47255 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -265,8 +265,7 @@ static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th) static void tcp_fixup_sndbuf(struct sock *sk) { - int sndmem = tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER + 16 + - sizeof(struct sk_buff); + int sndmem = SKB_TRUESIZE(tcp_sk(sk)->rx_opt.mss_clamp + MAX_TCP_HEADER); if (sk->sk_sndbuf < 3 * sndmem) { sk->sk_sndbuf = 3 * sndmem; @@ -349,7 +348,7 @@ static void tcp_grow_window(struct sock *sk, struct sk_buff *skb) static void tcp_fixup_rcvbuf(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - int rcvmem = tp->advmss + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); + int rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); /* Try to select rcvbuf so that 4 mss-sized segments * will fit to window and corresponding skbs will fit to our rcvbuf. @@ -540,8 +539,7 @@ void tcp_rcv_space_adjust(struct sock *sk) space /= tp->advmss; if (!space) space = 1; - rcvmem = (tp->advmss + MAX_TCP_HEADER + - 16 + sizeof(struct sk_buff)); + rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); while (tcp_win_from_space(rcvmem) < tp->advmss) rcvmem += 128; space *= rcvmem; @@ -4950,8 +4948,10 @@ static void tcp_new_space(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); if (tcp_should_expand_sndbuf(sk)) { - int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + - MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); + int sndmem = SKB_TRUESIZE(max_t(u32, + tp->rx_opt.mss_clamp, + tp->mss_cache) + + MAX_TCP_HEADER); int demanded = max_t(unsigned int, tp->snd_cwnd, tp->reordering + 1); sndmem *= 2 * demanded; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 2b59154c65d3..90868fb42757 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -835,8 +835,7 @@ static int __net_init icmpv6_sk_init(struct net *net) /* Enough space for 2 64K ICMP packets, including * sk_buff struct overhead. */ - sk->sk_sndbuf = - (2 * ((64 * 1024) + sizeof(struct sk_buff))); + sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024); } return 0; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index c39f3a43cd80..274d150320c0 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1819,7 +1819,7 @@ static void iucv_callback_rx(struct iucv_path *path, struct iucv_message *msg) goto save_message; len = atomic_read(&sk->sk_rmem_alloc); - len += iucv_msg_length(msg) + sizeof(struct sk_buff); + len += SKB_TRUESIZE(iucv_msg_length(msg)); if (len > sk->sk_rcvbuf) goto save_message; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 91784f44a2e2..61b9fca5a173 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1299,7 +1299,7 @@ SCTP_STATIC __init int sctp_init(void) max_share = min(4UL*1024*1024, limit); sysctl_sctp_rmem[0] = SK_MEM_QUANTUM; /* give each asoc 1 page min */ - sysctl_sctp_rmem[1] = (1500 *(sizeof(struct sk_buff) + 1)); + sysctl_sctp_rmem[1] = 1500 * SKB_TRUESIZE(1); sysctl_sctp_rmem[2] = max(sysctl_sctp_rmem[1], max_share); sysctl_sctp_wmem[0] = SK_MEM_QUANTUM; -- cgit v1.2.3 From 5f8444a3fa617076f8da51a3e8ecce01a5d7f738 Mon Sep 17 00:00:00 2001 From: Greg Rose Date: Sat, 8 Oct 2011 03:05:24 +0000 Subject: if_link: Add additional parameter to IFLA_VF_INFO for spoof checking Add configuration setting for drivers to turn spoof checking on or off for discrete VFs. v2 - Fix indentation problem, wrap the ifla_vf_info structure in #ifdef __KERNEL__ to prevent user space from accessing and change function paramater for the spoof check setting netdev op from u8 to bool. v3 - Preset spoof check setting to -1 so that user space tools such as ip can detect that the driver didn't report a spoofcheck setting. Prevents incorrect display of spoof check settings for drivers that don't report it. Signed-off-by: Greg Rose Signed-off-by: Jeff Kirsher --- include/linux/if_link.h | 10 ++++++++++ include/linux/netdevice.h | 3 +++ net/core/rtnetlink.c | 33 ++++++++++++++++++++++++++++++--- 3 files changed, 43 insertions(+), 3 deletions(-) (limited to 'net/core') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index 0ee969a5593d..c52d4b5f872a 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -279,6 +279,7 @@ enum { IFLA_VF_MAC, /* Hardware queue specific attributes */ IFLA_VF_VLAN, IFLA_VF_TX_RATE, /* TX Bandwidth Allocation */ + IFLA_VF_SPOOFCHK, /* Spoof Checking on/off switch */ __IFLA_VF_MAX, }; @@ -300,13 +301,22 @@ struct ifla_vf_tx_rate { __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */ }; +struct ifla_vf_spoofchk { + __u32 vf; + __u32 setting; +}; +#ifdef __KERNEL__ + +/* We don't want this structure exposed to user space */ struct ifla_vf_info { __u32 vf; __u8 mac[32]; __u32 vlan; __u32 qos; __u32 tx_rate; + __u32 spoofchk; }; +#endif /* VF ports management section * diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 43b32983ba10..0db1f5f6d4a8 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -781,6 +781,7 @@ struct netdev_tc_txq { * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac); * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos); * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate); + * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting); * int (*ndo_get_vf_config)(struct net_device *dev, * int vf, struct ifla_vf_info *ivf); * int (*ndo_set_vf_port)(struct net_device *dev, int vf, @@ -900,6 +901,8 @@ struct net_device_ops { int queue, u16 vlan, u8 qos); int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate); + int (*ndo_set_vf_spoofchk)(struct net_device *dev, + int vf, bool setting); int (*ndo_get_vf_config)(struct net_device *dev, int vf, struct ifla_vf_info *ivf); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 39f8dd6a2821..9083e82bdae5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -731,7 +731,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev) size += num_vfs * (nla_total_size(sizeof(struct ifla_vf_mac)) + nla_total_size(sizeof(struct ifla_vf_vlan)) + - nla_total_size(sizeof(struct ifla_vf_tx_rate))); + nla_total_size(sizeof(struct ifla_vf_tx_rate)) + + nla_total_size(sizeof(struct ifla_vf_spoofchk))); return size; } else return 0; @@ -954,13 +955,27 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, struct ifla_vf_mac vf_mac; struct ifla_vf_vlan vf_vlan; struct ifla_vf_tx_rate vf_tx_rate; + struct ifla_vf_spoofchk vf_spoofchk; + + /* + * Not all SR-IOV capable drivers support the + * spoofcheck query. Preset to -1 so the user + * space tool can detect that the driver didn't + * report anything. + */ + ivi.spoofchk = -1; if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi)) break; - vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = ivi.vf; + vf_mac.vf = + vf_vlan.vf = + vf_tx_rate.vf = + vf_spoofchk.vf = ivi.vf; + memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); vf_vlan.vlan = ivi.vlan; vf_vlan.qos = ivi.qos; vf_tx_rate.rate = ivi.tx_rate; + vf_spoofchk.setting = ivi.spoofchk; vf = nla_nest_start(skb, IFLA_VF_INFO); if (!vf) { nla_nest_cancel(skb, vfinfo); @@ -968,7 +983,10 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, } NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac); NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan); - NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate); + NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), + &vf_tx_rate); + NLA_PUT(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk), + &vf_spoofchk); nla_nest_end(skb, vf); } nla_nest_end(skb, vfinfo); @@ -1202,6 +1220,15 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr) ivt->rate); break; } + case IFLA_VF_SPOOFCHK: { + struct ifla_vf_spoofchk *ivs; + ivs = nla_data(vf); + err = -EOPNOTSUPP; + if (ops->ndo_set_vf_spoofchk) + err = ops->ndo_set_vf_spoofchk(dev, ivs->vf, + ivs->setting); + break; + } default: err = -EINVAL; break; -- cgit v1.2.3 From 6ccc3abdc97e07e6895323fdab89f84e58b40ece Mon Sep 17 00:00:00 2001 From: huajun li Date: Tue, 27 Sep 2011 22:51:39 +0000 Subject: net/flow: Fix potential memory leak While preparing net flow caches, once a fail may cause potential memory leak , fix it. Signed-off-by: Huajun Li Signed-off-by: David S. Miller --- net/core/flow.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/flow.c b/net/core/flow.c index 555a456efb07..8ae42de9c79e 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -413,7 +413,7 @@ static int __init flow_cache_init(struct flow_cache *fc) for_each_online_cpu(i) { if (flow_cache_cpu_prepare(fc, i)) - return -ENOMEM; + goto err; } fc->hotcpu_notifier = (struct notifier_block){ .notifier_call = flow_cache_cpu, @@ -426,6 +426,18 @@ static int __init flow_cache_init(struct flow_cache *fc) add_timer(&fc->rnd_timer); return 0; + +err: + for_each_possible_cpu(i) { + struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i); + kfree(fcp->hash_table); + fcp->hash_table = NULL; + } + + free_percpu(fc->percpu); + fc->percpu = NULL; + + return -ENOMEM; } static int __init flow_cache_init_global(void) -- cgit v1.2.3 From 2425717b27eb92b175335ca4ff0bb218cbe0cb64 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Mon, 10 Oct 2011 09:16:41 +0000 Subject: net: allow vlan traffic to be received under bond The following configuration used to work as I expected. At least we could use the fcoe interfaces to do MPIO and the bond0 iface to do load balancing or failover. ---eth2.228-fcoe | eth2 -----| | |---- bond0 | eth3 -----| | ---eth3.228-fcoe This worked because of a change we added to allow inactive slaves to rx 'exact' matches. This functionality was kept intact with the rx_handler mechanism. However now the vlan interface attached to the active slave never receives traffic because the bonding rx_handler updates the skb->dev and goto's another_round. Previously, the vlan_do_receive() logic was called before the bonding rx_handler. Now by the time vlan_do_receive calls vlan_find_dev() the skb->dev is set to bond0 and it is clear no vlan is attached to this iface. The vlan lookup fails. This patch moves the VLAN check above the rx_handler. A VLAN tagged frame is now routed to the eth2.228-fcoe iface in the above schematic. Untagged frames continue to the bond0 as normal. This case also remains intact, eth2 --> bond0 --> vlan.228 Here the skb is VLAN tagged but the vlan lookup fails on eth2 causing the bonding rx_handler to be called. On the second pass the vlan lookup is on the bond0 iface and completes as expected. Putting a VLAN.228 on both the bond0 and eth2 device will result in eth2.228 receiving the skb. I don't think this is completely unexpected and was the result prior to the rx_handler result. Note, the same setup is also used for other storage traffic that MPIO is used with eg. iSCSI and similar setups can be contrived without storage protocols. Signed-off-by: John Fastabend Acked-by: Jesse Gross Reviewed-by: Jiri Pirko Tested-by: Hans Schillstrom Signed-off-by: David S. Miller --- net/core/dev.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 70ecb86439ca..8b6118a16b87 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3231,6 +3231,17 @@ another_round: ncls: #endif + if (vlan_tx_tag_present(skb)) { + if (pt_prev) { + ret = deliver_skb(skb, pt_prev, orig_dev); + pt_prev = NULL; + } + if (vlan_do_receive(&skb)) + goto another_round; + else if (unlikely(!skb)) + goto out; + } + rx_handler = rcu_dereference(skb->dev->rx_handler); if (rx_handler) { if (pt_prev) { @@ -3251,17 +3262,6 @@ ncls: } } - if (vlan_tx_tag_present(skb)) { - if (pt_prev) { - ret = deliver_skb(skb, pt_prev, orig_dev); - pt_prev = NULL; - } - if (vlan_do_receive(&skb)) - goto another_round; - else if (unlikely(!skb)) - goto out; - } - /* deliver only exact match when indicated */ null_or_dev = deliver_exact ? skb->dev : NULL; -- cgit v1.2.3 From 9e903e085262ffbf1fc44a17ac06058aca03524a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 18 Oct 2011 21:00:24 +0000 Subject: net: add skb frag size accessors To ease skb->truesize sanitization, its better to be able to localize all references to skb frags size. Define accessors : skb_frag_size() to fetch frag size, and skb_frag_size_{set|add|sub}() to manipulate it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/atm/eni.c | 2 +- drivers/infiniband/hw/amso1100/c2.c | 4 +- drivers/infiniband/hw/nes/nes_nic.c | 10 +-- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_ib.c | 18 +++--- drivers/net/ethernet/3com/3c59x.c | 6 +- drivers/net/ethernet/3com/typhoon.c | 6 +- drivers/net/ethernet/adaptec/starfire.c | 8 +-- drivers/net/ethernet/aeroflex/greth.c | 8 +-- drivers/net/ethernet/alteon/acenic.c | 10 +-- drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 2 +- drivers/net/ethernet/atheros/atl1e/atl1e_main.c | 6 +- drivers/net/ethernet/atheros/atlx/atl1.c | 12 ++-- drivers/net/ethernet/broadcom/bnx2.c | 12 ++-- drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 14 ++--- drivers/net/ethernet/broadcom/tg3.c | 8 +-- drivers/net/ethernet/brocade/bna/bnad.c | 6 +- drivers/net/ethernet/chelsio/cxgb/sge.c | 10 +-- drivers/net/ethernet/chelsio/cxgb3/sge.c | 12 ++-- drivers/net/ethernet/chelsio/cxgb4/sge.c | 26 ++++---- drivers/net/ethernet/chelsio/cxgb4vf/sge.c | 26 ++++---- drivers/net/ethernet/cisco/enic/enic_main.c | 12 ++-- drivers/net/ethernet/emulex/benet/be_main.c | 18 +++--- drivers/net/ethernet/ibm/ehea/ehea_main.c | 4 +- drivers/net/ethernet/ibm/emac/core.c | 2 +- drivers/net/ethernet/ibm/ibmveth.c | 6 +- drivers/net/ethernet/intel/e1000/e1000_main.c | 6 +- drivers/net/ethernet/intel/e1000e/netdev.c | 6 +- drivers/net/ethernet/intel/igb/igb_main.c | 2 +- drivers/net/ethernet/intel/igbvf/netdev.c | 4 +- drivers/net/ethernet/intel/ixgb/ixgb_main.c | 4 +- drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 4 +- drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 6 +- drivers/net/ethernet/jme.c | 4 +- drivers/net/ethernet/marvell/mv643xx_eth.c | 9 +-- drivers/net/ethernet/marvell/skge.c | 8 +-- drivers/net/ethernet/marvell/sky2.c | 16 ++--- drivers/net/ethernet/mellanox/mlx4/en_rx.c | 14 ++--- drivers/net/ethernet/mellanox/mlx4/en_tx.c | 12 ++-- drivers/net/ethernet/micrel/ksz884x.c | 2 +- drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 14 ++--- drivers/net/ethernet/natsemi/ns83820.c | 4 +- drivers/net/ethernet/neterion/s2io.c | 12 ++-- drivers/net/ethernet/neterion/vxge/vxge-main.c | 12 ++-- drivers/net/ethernet/nvidia/forcedeth.c | 18 +++--- drivers/net/ethernet/pasemi/pasemi_mac.c | 8 +-- .../net/ethernet/qlogic/netxen/netxen_nic_main.c | 6 +- drivers/net/ethernet/qlogic/qla3xxx.c | 6 +- drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 6 +- drivers/net/ethernet/qlogic/qlge/qlge_main.c | 6 +- drivers/net/ethernet/realtek/8139cp.c | 4 +- drivers/net/ethernet/realtek/r8169.c | 4 +- drivers/net/ethernet/sfc/rx.c | 2 +- drivers/net/ethernet/sfc/tx.c | 8 +-- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 +- drivers/net/ethernet/sun/cassini.c | 8 +-- drivers/net/ethernet/sun/niu.c | 6 +- drivers/net/ethernet/sun/sungem.c | 4 +- drivers/net/ethernet/sun/sunhme.c | 4 +- drivers/net/ethernet/tehuti/tehuti.c | 6 +- drivers/net/ethernet/tile/tilepro.c | 2 +- drivers/net/ethernet/tundra/tsi108_eth.c | 6 +- drivers/net/ethernet/via/via-velocity.c | 6 +- drivers/net/ethernet/xilinx/ll_temac_main.c | 4 +- drivers/net/virtio_net.c | 8 +-- drivers/net/vmxnet3/vmxnet3_drv.c | 12 ++-- drivers/net/xen-netback/netback.c | 4 +- drivers/net/xen-netfront.c | 4 +- drivers/scsi/cxgbi/libcxgbi.c | 10 +-- drivers/scsi/fcoe/fcoe_transport.c | 2 +- drivers/staging/hv/netvsc_drv.c | 4 +- include/linux/skbuff.h | 28 +++++++-- net/appletalk/ddp.c | 5 +- net/core/datagram.c | 16 ++--- net/core/dev.c | 6 +- net/core/pktgen.c | 12 ++-- net/core/skbuff.c | 72 ++++++++++++---------- net/core/user_dma.c | 4 +- net/ipv4/inet_lro.c | 8 +-- net/ipv4/ip_fragment.c | 4 +- net/ipv4/ip_output.c | 6 +- net/ipv4/tcp.c | 9 ++- net/ipv4/tcp_output.c | 8 ++- net/ipv6/ip6_output.c | 5 +- net/ipv6/netfilter/nf_conntrack_reasm.c | 4 +- net/ipv6/reassembly.c | 4 +- net/xfrm/xfrm_ipcomp.c | 2 +- 87 files changed, 387 insertions(+), 357 deletions(-) (limited to 'net/core') diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index f7ca4c13d61d..956e9accb051 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -1136,7 +1136,7 @@ DPRINTK("doing direct send\n"); /* @@@ well, this doesn't work anyway */ put_dma(tx->index,eni_dev->dma,&j,(unsigned long) skb_frag_page(&skb_shinfo(skb)->frags[i]) + skb_shinfo(skb)->frags[i].page_offset, - skb_shinfo(skb)->frags[i].size); + skb_frag_size(&skb_shinfo(skb)->frags[i])); } if (skb->len & 3) put_dma(tx->index,eni_dev->dma,&j,zeroes,4-(skb->len & 3)); diff --git a/drivers/infiniband/hw/amso1100/c2.c b/drivers/infiniband/hw/amso1100/c2.c index 6e85a75289e8..5ce7b9e8bff6 100644 --- a/drivers/infiniband/hw/amso1100/c2.c +++ b/drivers/infiniband/hw/amso1100/c2.c @@ -800,8 +800,8 @@ static int c2_xmit_frame(struct sk_buff *skb, struct net_device *netdev) /* Loop thru additional data fragments and queue them */ if (skb_shinfo(skb)->nr_frags) { for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - maplen = frag->size; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + maplen = skb_frag_size(frag); mapaddr = skb_frag_dma_map(&c2dev->pcidev->dev, frag, 0, maplen, DMA_TO_DEVICE); elem = elem->next; diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c index 7cb7f292dfd1..47b2ee4c01e2 100644 --- a/drivers/infiniband/hw/nes/nes_nic.c +++ b/drivers/infiniband/hw/nes/nes_nic.c @@ -444,10 +444,10 @@ static int nes_nic_send(struct sk_buff *skb, struct net_device *netdev) skb_frag_t *frag = &skb_shinfo(skb)->frags[skb_fragment_index]; bus_address = skb_frag_dma_map(&nesdev->pcidev->dev, - frag, 0, frag->size, + frag, 0, skb_frag_size(frag), DMA_TO_DEVICE); wqe_fragment_length[wqe_fragment_index] = - cpu_to_le16(skb_shinfo(skb)->frags[skb_fragment_index].size); + cpu_to_le16(skb_frag_size(&skb_shinfo(skb)->frags[skb_fragment_index])); set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX+(2*wqe_fragment_index), bus_address); wqe_fragment_index++; @@ -565,7 +565,7 @@ tso_sq_no_longer_full: &skb_shinfo(skb)->frags[tso_frag_count]; tso_bus_address[tso_frag_count] = skb_frag_dma_map(&nesdev->pcidev->dev, - frag, 0, frag->size, + frag, 0, skb_frag_size(frag), DMA_TO_DEVICE); } @@ -637,11 +637,11 @@ tso_sq_no_longer_full: } while (wqe_fragment_index < 5) { wqe_fragment_length[wqe_fragment_index] = - cpu_to_le16(skb_shinfo(skb)->frags[tso_frag_index].size); + cpu_to_le16(skb_frag_size(&skb_shinfo(skb)->frags[tso_frag_index])); set_wqe_64bit_value(nic_sqe->wqe_words, NES_NIC_SQ_WQE_FRAG0_LOW_IDX+(2*wqe_fragment_index), (u64)tso_bus_address[tso_frag_index]); wqe_fragment_index++; - tso_wqe_length += skb_shinfo(skb)->frags[tso_frag_index++].size; + tso_wqe_length += skb_frag_size(&skb_shinfo(skb)->frags[tso_frag_index++]); if (wqe_fragment_index < 5) wqe_fragment_length[wqe_fragment_index] = 0; if (tso_frag_index == tso_frag_count) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 67a477be237e..c74548a586ea 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -543,7 +543,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space, } else { size = min(length, (unsigned) PAGE_SIZE); - frag->size = size; + skb_frag_size_set(frag, size); skb->data_len += size; skb->truesize += size; skb->len += size; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index 00435be4a44b..2b060f45bec3 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -117,7 +117,7 @@ static void ipoib_ud_skb_put_frags(struct ipoib_dev_priv *priv, size = length - IPOIB_UD_HEAD_SIZE; - frag->size = size; + skb_frag_size_set(frag, size); skb->data_len += size; skb->truesize += size; } else @@ -322,10 +322,10 @@ static int ipoib_dma_map_tx(struct ib_device *ca, off = 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; mapping[i + off] = ib_dma_map_page(ca, skb_frag_page(frag), - frag->page_offset, frag->size, + frag->page_offset, skb_frag_size(frag), DMA_TO_DEVICE); if (unlikely(ib_dma_mapping_error(ca, mapping[i + off]))) goto partial_error; @@ -334,8 +334,9 @@ static int ipoib_dma_map_tx(struct ib_device *ca, partial_error: for (; i > 0; --i) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; - ib_dma_unmap_page(ca, mapping[i - !off], frag->size, DMA_TO_DEVICE); + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; + + ib_dma_unmap_page(ca, mapping[i - !off], skb_frag_size(frag), DMA_TO_DEVICE); } if (off) @@ -359,8 +360,9 @@ static void ipoib_dma_unmap_tx(struct ib_device *ca, off = 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; ++i) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - ib_dma_unmap_page(ca, mapping[i + off], frag->size, + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + ib_dma_unmap_page(ca, mapping[i + off], skb_frag_size(frag), DMA_TO_DEVICE); } } @@ -510,7 +512,7 @@ static inline int post_send(struct ipoib_dev_priv *priv, for (i = 0; i < nr_frags; ++i) { priv->tx_sge[i + off].addr = mapping[i + off]; - priv->tx_sge[i + off].length = frags[i].size; + priv->tx_sge[i + off].length = skb_frag_size(&frags[i]); } priv->tx_wr.num_sge = nr_frags + off; priv->tx_wr.wr_id = wr_id; diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index 9ca45dcba755..b42c06baba89 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -2182,12 +2182,12 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev) cpu_to_le32(pci_map_single( VORTEX_PCI(vp), (void *)skb_frag_address(frag), - frag->size, PCI_DMA_TODEVICE)); + skb_frag_size(frag), PCI_DMA_TODEVICE)); if (i == skb_shinfo(skb)->nr_frags-1) - vp->tx_ring[entry].frag[i+1].length = cpu_to_le32(frag->size|LAST_FRAG); + vp->tx_ring[entry].frag[i+1].length = cpu_to_le32(skb_frag_size(frag)|LAST_FRAG); else - vp->tx_ring[entry].frag[i+1].length = cpu_to_le32(frag->size); + vp->tx_ring[entry].frag[i+1].length = cpu_to_le32(skb_frag_size(frag)); } } #else diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c index 11f8858c786d..20ea07508ac7 100644 --- a/drivers/net/ethernet/3com/typhoon.c +++ b/drivers/net/ethernet/3com/typhoon.c @@ -810,15 +810,15 @@ typhoon_start_tx(struct sk_buff *skb, struct net_device *dev) txd->frag.addrHi = 0; first_txd->numDesc++; - for(i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; void *frag_addr; txd = (struct tx_desc *) (txRing->ringBase + txRing->lastWrite); typhoon_inc_tx_index(&txRing->lastWrite, 1); - len = frag->size; + len = skb_frag_size(frag); frag_addr = skb_frag_address(frag); skb_dma = pci_map_single(tp->tx_pdev, frag_addr, len, PCI_DMA_TODEVICE); diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c index d6b015598569..6d9f6911000f 100644 --- a/drivers/net/ethernet/adaptec/starfire.c +++ b/drivers/net/ethernet/adaptec/starfire.c @@ -1256,12 +1256,12 @@ static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev) np->tx_info[entry].mapping = pci_map_single(np->pci_dev, skb->data, skb_first_frag_len(skb), PCI_DMA_TODEVICE); } else { - skb_frag_t *this_frag = &skb_shinfo(skb)->frags[i - 1]; - status |= this_frag->size; + const skb_frag_t *this_frag = &skb_shinfo(skb)->frags[i - 1]; + status |= skb_frag_size(this_frag); np->tx_info[entry].mapping = pci_map_single(np->pci_dev, skb_frag_address(this_frag), - this_frag->size, + skb_frag_size(this_frag), PCI_DMA_TODEVICE); } @@ -1378,7 +1378,7 @@ static irqreturn_t intr_handler(int irq, void *dev_instance) for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { pci_unmap_single(np->pci_dev, np->tx_info[entry].mapping, - skb_shinfo(skb)->frags[i].size, + skb_frag_size(&skb_shinfo(skb)->frags[i]), PCI_DMA_TODEVICE); np->dirty_tx++; entry++; diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c index 6715bf54f04e..442fefa4f2ca 100644 --- a/drivers/net/ethernet/aeroflex/greth.c +++ b/drivers/net/ethernet/aeroflex/greth.c @@ -198,7 +198,7 @@ static void greth_clean_rings(struct greth_private *greth) dma_unmap_page(greth->dev, greth_read_bd(&tx_bdp->addr), - frag->size, + skb_frag_size(frag), DMA_TO_DEVICE); greth->tx_last = NEXT_TX(greth->tx_last); @@ -517,7 +517,7 @@ greth_start_xmit_gbit(struct sk_buff *skb, struct net_device *dev) status = GRETH_BD_EN; if (skb->ip_summed == CHECKSUM_PARTIAL) status |= GRETH_TXBD_CSALL; - status |= frag->size & GRETH_BD_LEN; + status |= skb_frag_size(frag) & GRETH_BD_LEN; /* Wrap around descriptor ring */ if (curr_tx == GRETH_TXBD_NUM_MASK) @@ -531,7 +531,7 @@ greth_start_xmit_gbit(struct sk_buff *skb, struct net_device *dev) greth_write_bd(&bdp->stat, status); - dma_addr = skb_frag_dma_map(greth->dev, frag, 0, frag->size, + dma_addr = skb_frag_dma_map(greth->dev, frag, 0, skb_frag_size(frag), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(greth->dev, dma_addr))) @@ -713,7 +713,7 @@ static void greth_clean_tx_gbit(struct net_device *dev) dma_unmap_page(greth->dev, greth_read_bd(&bdp->addr), - frag->size, + skb_frag_size(frag), DMA_TO_DEVICE); greth->tx_last = NEXT_TX(greth->tx_last); diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c index b1a4e8204437..f872748ab4e6 100644 --- a/drivers/net/ethernet/alteon/acenic.c +++ b/drivers/net/ethernet/alteon/acenic.c @@ -2478,18 +2478,18 @@ restart: idx = (idx + 1) % ACE_TX_RING_ENTRIES(ap); for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; struct tx_ring_info *info; - len += frag->size; + len += skb_frag_size(frag); info = ap->skb->tx_skbuff + idx; desc = ap->tx_ring + idx; mapping = skb_frag_dma_map(&ap->pdev->dev, frag, 0, - frag->size, + skb_frag_size(frag), DMA_TO_DEVICE); - flagsize = (frag->size << 16); + flagsize = skb_frag_size(frag) << 16; if (skb->ip_summed == CHECKSUM_PARTIAL) flagsize |= BD_FLG_TCP_UDP_SUM; idx = (idx + 1) % ACE_TX_RING_ENTRIES(ap); @@ -2508,7 +2508,7 @@ restart: info->skb = NULL; } dma_unmap_addr_set(info, mapping, mapping); - dma_unmap_len_set(info, maplen, frag->size); + dma_unmap_len_set(info, maplen, skb_frag_size(frag)); ace_load_tx_bd(ap, desc, mapping, flagsize, vlan_tag); } } diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c index 12a0b30319db..02c7ed8d9eca 100644 --- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c +++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c @@ -2179,7 +2179,7 @@ static void atl1c_tx_map(struct atl1c_adapter *adapter, memcpy(use_tpd, tpd, sizeof(struct atl1c_tpd_desc)); buffer_info = atl1c_get_tx_buffer(adapter, use_tpd); - buffer_info->length = frag->size; + buffer_info->length = skb_frag_size(frag); buffer_info->dma = skb_frag_dma_map(&adapter->pdev->dev, frag, 0, buffer_info->length, diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c index 97c45a4b855a..95483bcac1d0 100644 --- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c +++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c @@ -1593,7 +1593,7 @@ static u16 atl1e_cal_tdp_req(const struct sk_buff *skb) u16 proto_hdr_len = 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - fg_size = skb_shinfo(skb)->frags[i].size; + fg_size = skb_frag_size(&skb_shinfo(skb)->frags[i]); tpd_req += ((fg_size + MAX_TX_BUF_LEN - 1) >> MAX_TX_BUF_SHIFT); } @@ -1744,12 +1744,12 @@ static void atl1e_tx_map(struct atl1e_adapter *adapter, } for (f = 0; f < nr_frags; f++) { - struct skb_frag_struct *frag; + const struct skb_frag_struct *frag; u16 i; u16 seg_num; frag = &skb_shinfo(skb)->frags[f]; - buf_len = frag->size; + buf_len = skb_frag_size(frag); seg_num = (buf_len + MAX_TX_BUF_LEN - 1) / MAX_TX_BUF_LEN; for (i = 0; i < seg_num; i++) { diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c index 7381a49fefb4..0405261efb5c 100644 --- a/drivers/net/ethernet/atheros/atlx/atl1.c +++ b/drivers/net/ethernet/atheros/atlx/atl1.c @@ -2267,11 +2267,11 @@ static void atl1_tx_map(struct atl1_adapter *adapter, struct sk_buff *skb, } for (f = 0; f < nr_frags; f++) { - struct skb_frag_struct *frag; + const struct skb_frag_struct *frag; u16 i, nseg; frag = &skb_shinfo(skb)->frags[f]; - buf_len = frag->size; + buf_len = skb_frag_size(frag); nseg = (buf_len + ATL1_MAX_TX_BUF_LEN - 1) / ATL1_MAX_TX_BUF_LEN; @@ -2356,7 +2356,6 @@ static netdev_tx_t atl1_xmit_frame(struct sk_buff *skb, int count = 1; int ret_val; struct tx_packet_desc *ptpd; - u16 frag_size; u16 vlan_tag; unsigned int nr_frags = 0; unsigned int mss = 0; @@ -2372,10 +2371,9 @@ static netdev_tx_t atl1_xmit_frame(struct sk_buff *skb, nr_frags = skb_shinfo(skb)->nr_frags; for (f = 0; f < nr_frags; f++) { - frag_size = skb_shinfo(skb)->frags[f].size; - if (frag_size) - count += (frag_size + ATL1_MAX_TX_BUF_LEN - 1) / - ATL1_MAX_TX_BUF_LEN; + unsigned int f_size = skb_frag_size(&skb_shinfo(skb)->frags[f]); + count += (f_size + ATL1_MAX_TX_BUF_LEN - 1) / + ATL1_MAX_TX_BUF_LEN; } mss = skb_shinfo(skb)->gso_size; diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 6ff7636e73a2..965c7235804d 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -2871,7 +2871,7 @@ bnx2_tx_int(struct bnx2 *bp, struct bnx2_napi *bnapi, int budget) dma_unmap_addr( &txr->tx_buf_ring[TX_RING_IDX(sw_cons)], mapping), - skb_shinfo(skb)->frags[i].size, + skb_frag_size(&skb_shinfo(skb)->frags[i]), PCI_DMA_TODEVICE); } @@ -3049,7 +3049,7 @@ bnx2_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, struct sk_buff *skb, } else { skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; - frag->size -= tail; + skb_frag_size_sub(frag, tail); skb->data_len -= tail; } return 0; @@ -5395,7 +5395,7 @@ bnx2_free_tx_skbs(struct bnx2 *bp) tx_buf = &txr->tx_buf_ring[TX_RING_IDX(j)]; dma_unmap_page(&bp->pdev->dev, dma_unmap_addr(tx_buf, mapping), - skb_shinfo(skb)->frags[k].size, + skb_frag_size(&skb_shinfo(skb)->frags[k]), PCI_DMA_TODEVICE); } dev_kfree_skb(skb); @@ -6530,13 +6530,13 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev) tx_buf->is_gso = skb_is_gso(skb); for (i = 0; i < last_frag; i++) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; prod = NEXT_TX_BD(prod); ring_prod = TX_RING_IDX(prod); txbd = &txr->tx_desc_ring[ring_prod]; - len = frag->size; + len = skb_frag_size(frag); mapping = skb_frag_dma_map(&bp->pdev->dev, frag, 0, len, DMA_TO_DEVICE); if (dma_mapping_error(&bp->pdev->dev, mapping)) @@ -6594,7 +6594,7 @@ dma_error: ring_prod = TX_RING_IDX(prod); tx_buf = &txr->tx_buf_ring[ring_prod]; dma_unmap_page(&bp->pdev->dev, dma_unmap_addr(tx_buf, mapping), - skb_shinfo(skb)->frags[i].size, + skb_frag_size(&skb_shinfo(skb)->frags[i]), PCI_DMA_TODEVICE); } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c index e575e89c7d46..dd8ee56396b2 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c @@ -2363,7 +2363,7 @@ static int bnx2x_pkt_req_lin(struct bnx2x *bp, struct sk_buff *skb, /* Calculate the first sum - it's special */ for (frag_idx = 0; frag_idx < wnd_size - 1; frag_idx++) wnd_sum += - skb_shinfo(skb)->frags[frag_idx].size; + skb_frag_size(&skb_shinfo(skb)->frags[frag_idx]); /* If there was data on linear skb data - check it */ if (first_bd_sz > 0) { @@ -2379,14 +2379,14 @@ static int bnx2x_pkt_req_lin(struct bnx2x *bp, struct sk_buff *skb, check all windows */ for (wnd_idx = 0; wnd_idx <= num_wnds; wnd_idx++) { wnd_sum += - skb_shinfo(skb)->frags[wnd_idx + wnd_size - 1].size; + skb_frag_size(&skb_shinfo(skb)->frags[wnd_idx + wnd_size - 1]); if (unlikely(wnd_sum < lso_mss)) { to_copy = 1; break; } wnd_sum -= - skb_shinfo(skb)->frags[wnd_idx].size; + skb_frag_size(&skb_shinfo(skb)->frags[wnd_idx]); } } else { /* in non-LSO too fragmented packet should always @@ -2796,8 +2796,8 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - mapping = skb_frag_dma_map(&bp->pdev->dev, frag, 0, frag->size, - DMA_TO_DEVICE); + mapping = skb_frag_dma_map(&bp->pdev->dev, frag, 0, + skb_frag_size(frag), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(&bp->pdev->dev, mapping))) { DP(NETIF_MSG_TX_QUEUED, "Unable to map page - " @@ -2821,8 +2821,8 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev) tx_data_bd->addr_hi = cpu_to_le32(U64_HI(mapping)); tx_data_bd->addr_lo = cpu_to_le32(U64_LO(mapping)); - tx_data_bd->nbytes = cpu_to_le16(frag->size); - le16_add_cpu(&pkt_size, frag->size); + tx_data_bd->nbytes = cpu_to_le16(skb_frag_size(frag)); + le16_add_cpu(&pkt_size, skb_frag_size(frag)); nbd++; DP(NETIF_MSG_TX_QUEUED, diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index fe712f955110..b89027c61937 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -5356,7 +5356,7 @@ static void tg3_tx(struct tg3_napi *tnapi) pci_unmap_page(tp->pdev, dma_unmap_addr(ri, mapping), - skb_shinfo(skb)->frags[i].size, + skb_frag_size(&skb_shinfo(skb)->frags[i]), PCI_DMA_TODEVICE); while (ri->fragmented) { @@ -6510,14 +6510,14 @@ static void tg3_tx_skb_unmap(struct tg3_napi *tnapi, u32 entry, int last) } for (i = 0; i < last; i++) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; entry = NEXT_TX(entry); txb = &tnapi->tx_buffers[entry]; pci_unmap_page(tnapi->tp->pdev, dma_unmap_addr(txb, mapping), - frag->size, PCI_DMA_TODEVICE); + skb_frag_size(frag), PCI_DMA_TODEVICE); while (txb->fragmented) { txb->fragmented = false; @@ -6777,7 +6777,7 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) for (i = 0; i <= last; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - len = frag->size; + len = skb_frag_size(frag); mapping = skb_frag_dma_map(&tp->pdev->dev, frag, 0, len, DMA_TO_DEVICE); diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index 2f4ced66612a..5d7872ecff52 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -116,7 +116,7 @@ bnad_pci_unmap_skb(struct device *pdev, struct bnad_skb_unmap *array, for (j = 0; j < frag; j++) { dma_unmap_page(pdev, dma_unmap_addr(&array[index], dma_addr), - skb_shinfo(skb)->frags[j].size, DMA_TO_DEVICE); + skb_frag_size(&skb_shinfo(skb)->frags[j]), DMA_TO_DEVICE); dma_unmap_addr_set(&array[index], dma_addr, 0); BNA_QE_INDX_ADD(index, 1, depth); } @@ -2741,8 +2741,8 @@ bnad_start_xmit(struct sk_buff *skb, struct net_device *netdev) wis_used = 1; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; - u16 size = frag->size; + const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; + u16 size = skb_frag_size(frag); if (unlikely(size == 0)) { unmap_prod = unmap_q->producer_index; diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c index 0a511c4a0472..f9b602300040 100644 --- a/drivers/net/ethernet/chelsio/cxgb/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb/sge.c @@ -1135,8 +1135,8 @@ static inline unsigned int compute_large_page_tx_descs(struct sk_buff *skb) len -= SGE_TX_DESC_MAX_PLEN; } for (i = 0; nfrags--; i++) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - len = frag->size; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + len = skb_frag_size(frag); while (len > SGE_TX_DESC_MAX_PLEN) { count++; len -= SGE_TX_DESC_MAX_PLEN; @@ -1278,9 +1278,9 @@ static inline void write_tx_descs(struct adapter *adapter, struct sk_buff *skb, } mapping = skb_frag_dma_map(&adapter->pdev->dev, frag, 0, - frag->size, DMA_TO_DEVICE); + skb_frag_size(frag), DMA_TO_DEVICE); desc_mapping = mapping; - desc_len = frag->size; + desc_len = skb_frag_size(frag); pidx = write_large_page_tx_descs(pidx, &e1, &ce, &gen, &desc_mapping, &desc_len, @@ -1290,7 +1290,7 @@ static inline void write_tx_descs(struct adapter *adapter, struct sk_buff *skb, nfrags == 0); ce->skb = NULL; dma_unmap_addr_set(ce, dma_addr, mapping); - dma_unmap_len_set(ce, dma_len, frag->size); + dma_unmap_len_set(ce, dma_len, skb_frag_size(frag)); } ce->skb = skb; wmb(); diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c index 2f46b37e5d16..cfb60e1f51da 100644 --- a/drivers/net/ethernet/chelsio/cxgb3/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c @@ -254,7 +254,7 @@ static inline void unmap_skb(struct sk_buff *skb, struct sge_txq *q, while (frag_idx < nfrags && curflit < WR_FLITS) { pci_unmap_page(pdev, be64_to_cpu(sgp->addr[j]), - skb_shinfo(skb)->frags[frag_idx].size, + skb_frag_size(&skb_shinfo(skb)->frags[frag_idx]), PCI_DMA_TODEVICE); j ^= 1; if (j == 0) { @@ -977,11 +977,11 @@ static inline unsigned int make_sgl(const struct sk_buff *skb, nfrags = skb_shinfo(skb)->nr_frags; for (i = 0; i < nfrags; i++) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - mapping = skb_frag_dma_map(&pdev->dev, frag, 0, frag->size, + mapping = skb_frag_dma_map(&pdev->dev, frag, 0, skb_frag_size(frag), DMA_TO_DEVICE); - sgp->len[j] = cpu_to_be32(frag->size); + sgp->len[j] = cpu_to_be32(skb_frag_size(frag)); sgp->addr[j] = cpu_to_be64(mapping); j ^= 1; if (j == 0) @@ -1544,7 +1544,7 @@ static void deferred_unmap_destructor(struct sk_buff *skb) si = skb_shinfo(skb); for (i = 0; i < si->nr_frags; i++) - pci_unmap_page(dui->pdev, *p++, si->frags[i].size, + pci_unmap_page(dui->pdev, *p++, skb_frag_size(&si->frags[i]), PCI_DMA_TODEVICE); } @@ -2118,7 +2118,7 @@ static void lro_add_page(struct adapter *adap, struct sge_qset *qs, rx_frag += nr_frags; __skb_frag_set_page(rx_frag, sd->pg_chunk.page); rx_frag->page_offset = sd->pg_chunk.offset + offset; - rx_frag->size = len; + skb_frag_size_set(rx_frag, len); skb->len += len; skb->data_len += len; diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index 56adf448b9fe..14f31d3a18d7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -215,8 +215,8 @@ static int map_skb(struct device *dev, const struct sk_buff *skb, end = &si->frags[si->nr_frags]; for (fp = si->frags; fp < end; fp++) { - *++addr = dma_map_page(dev, fp->page, fp->page_offset, fp->size, - DMA_TO_DEVICE); + *++addr = dma_map_page(dev, fp->page, fp->page_offset, + skb_frag_size(fp), DMA_TO_DEVICE); if (dma_mapping_error(dev, *addr)) goto unwind; } @@ -224,7 +224,7 @@ static int map_skb(struct device *dev, const struct sk_buff *skb, unwind: while (fp-- > si->frags) - dma_unmap_page(dev, *--addr, fp->size, DMA_TO_DEVICE); + dma_unmap_page(dev, *--addr, skb_frag_size(fp), DMA_TO_DEVICE); dma_unmap_single(dev, addr[-1], skb_headlen(skb), DMA_TO_DEVICE); out_err: @@ -243,7 +243,7 @@ static void unmap_skb(struct device *dev, const struct sk_buff *skb, si = skb_shinfo(skb); end = &si->frags[si->nr_frags]; for (fp = si->frags; fp < end; fp++) - dma_unmap_page(dev, *addr++, fp->size, DMA_TO_DEVICE); + dma_unmap_page(dev, *addr++, skb_frag_size(fp), DMA_TO_DEVICE); } /** @@ -717,7 +717,7 @@ static void write_sgl(const struct sk_buff *skb, struct sge_txq *q, sgl->addr0 = cpu_to_be64(addr[0] + start); nfrags++; } else { - sgl->len0 = htonl(si->frags[0].size); + sgl->len0 = htonl(skb_frag_size(&si->frags[0])); sgl->addr0 = cpu_to_be64(addr[1]); } @@ -732,13 +732,13 @@ static void write_sgl(const struct sk_buff *skb, struct sge_txq *q, to = (u8 *)end > (u8 *)q->stat ? buf : sgl->sge; for (i = (nfrags != si->nr_frags); nfrags >= 2; nfrags -= 2, to++) { - to->len[0] = cpu_to_be32(si->frags[i].size); - to->len[1] = cpu_to_be32(si->frags[++i].size); + to->len[0] = cpu_to_be32(skb_frag_size(&si->frags[i])); + to->len[1] = cpu_to_be32(skb_frag_size(&si->frags[++i])); to->addr[0] = cpu_to_be64(addr[i]); to->addr[1] = cpu_to_be64(addr[++i]); } if (nfrags) { - to->len[0] = cpu_to_be32(si->frags[i].size); + to->len[0] = cpu_to_be32(skb_frag_size(&si->frags[i])); to->len[1] = cpu_to_be32(0); to->addr[0] = cpu_to_be64(addr[i + 1]); } @@ -1417,7 +1417,7 @@ static inline void copy_frags(struct skb_shared_info *ssi, /* usually there's just one frag */ ssi->frags[0].page = gl->frags[0].page; ssi->frags[0].page_offset = gl->frags[0].page_offset + offset; - ssi->frags[0].size = gl->frags[0].size - offset; + skb_frag_size_set(&ssi->frags[0], skb_frag_size(&gl->frags[0]) - offset); ssi->nr_frags = gl->nfrags; n = gl->nfrags - 1; if (n) @@ -1718,8 +1718,8 @@ static int process_responses(struct sge_rspq *q, int budget) bufsz = get_buf_size(rsd); fp->page = rsd->page; fp->page_offset = q->offset; - fp->size = min(bufsz, len); - len -= fp->size; + skb_frag_size_set(fp, min(bufsz, len)); + len -= skb_frag_size(fp); if (!len) break; unmap_rx_buf(q->adap, &rxq->fl); @@ -1731,7 +1731,7 @@ static int process_responses(struct sge_rspq *q, int budget) */ dma_sync_single_for_cpu(q->adap->pdev_dev, get_buf_addr(rsd), - fp->size, DMA_FROM_DEVICE); + skb_frag_size(fp), DMA_FROM_DEVICE); si.va = page_address(si.frags[0].page) + si.frags[0].page_offset; @@ -1740,7 +1740,7 @@ static int process_responses(struct sge_rspq *q, int budget) si.nfrags = frags + 1; ret = q->handler(q, q->cur_desc, &si); if (likely(ret == 0)) - q->offset += ALIGN(fp->size, FL_ALIGN); + q->offset += ALIGN(skb_frag_size(fp), FL_ALIGN); else restore_rx_bufs(&si, &rxq->fl, frags); } else if (likely(rsp_type == RSP_TYPE_CPL)) { diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index cffb328c46c3..c2d456d90c00 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -296,8 +296,8 @@ static int map_skb(struct device *dev, const struct sk_buff *skb, si = skb_shinfo(skb); end = &si->frags[si->nr_frags]; for (fp = si->frags; fp < end; fp++) { - *++addr = dma_map_page(dev, fp->page, fp->page_offset, fp->size, - DMA_TO_DEVICE); + *++addr = dma_map_page(dev, fp->page, fp->page_offset, + skb_frag_size(fp), DMA_TO_DEVICE); if (dma_mapping_error(dev, *addr)) goto unwind; } @@ -305,7 +305,7 @@ static int map_skb(struct device *dev, const struct sk_buff *skb, unwind: while (fp-- > si->frags) - dma_unmap_page(dev, *--addr, fp->size, DMA_TO_DEVICE); + dma_unmap_page(dev, *--addr, skb_frag_size(fp), DMA_TO_DEVICE); dma_unmap_single(dev, addr[-1], skb_headlen(skb), DMA_TO_DEVICE); out_err: @@ -899,7 +899,7 @@ static void write_sgl(const struct sk_buff *skb, struct sge_txq *tq, sgl->addr0 = cpu_to_be64(addr[0] + start); nfrags++; } else { - sgl->len0 = htonl(si->frags[0].size); + sgl->len0 = htonl(skb_frag_size(&si->frags[0])); sgl->addr0 = cpu_to_be64(addr[1]); } @@ -915,13 +915,13 @@ static void write_sgl(const struct sk_buff *skb, struct sge_txq *tq, to = (u8 *)end > (u8 *)tq->stat ? buf : sgl->sge; for (i = (nfrags != si->nr_frags); nfrags >= 2; nfrags -= 2, to++) { - to->len[0] = cpu_to_be32(si->frags[i].size); - to->len[1] = cpu_to_be32(si->frags[++i].size); + to->len[0] = cpu_to_be32(skb_frag_size(&si->frags[i])); + to->len[1] = cpu_to_be32(skb_frag_size(&si->frags[++i])); to->addr[0] = cpu_to_be64(addr[i]); to->addr[1] = cpu_to_be64(addr[++i]); } if (nfrags) { - to->len[0] = cpu_to_be32(si->frags[i].size); + to->len[0] = cpu_to_be32(skb_frag_size(&si->frags[i])); to->len[1] = cpu_to_be32(0); to->addr[0] = cpu_to_be64(addr[i + 1]); } @@ -1399,7 +1399,7 @@ struct sk_buff *t4vf_pktgl_to_skb(const struct pkt_gl *gl, ssi = skb_shinfo(skb); ssi->frags[0].page = gl->frags[0].page; ssi->frags[0].page_offset = gl->frags[0].page_offset + pull_len; - ssi->frags[0].size = gl->frags[0].size - pull_len; + skb_frag_size_set(&ssi->frags[0], skb_frag_size(&gl->frags[0]) - pull_len); if (gl->nfrags > 1) memcpy(&ssi->frags[1], &gl->frags[1], (gl->nfrags-1) * sizeof(skb_frag_t)); @@ -1451,7 +1451,7 @@ static inline void copy_frags(struct skb_shared_info *si, /* usually there's just one frag */ si->frags[0].page = gl->frags[0].page; si->frags[0].page_offset = gl->frags[0].page_offset + offset; - si->frags[0].size = gl->frags[0].size - offset; + skb_frag_size_set(&si->frags[0], skb_frag_size(&gl->frags[0]) - offset); si->nr_frags = gl->nfrags; n = gl->nfrags - 1; @@ -1702,8 +1702,8 @@ int process_responses(struct sge_rspq *rspq, int budget) bufsz = get_buf_size(sdesc); fp->page = sdesc->page; fp->page_offset = rspq->offset; - fp->size = min(bufsz, len); - len -= fp->size; + skb_frag_size_set(fp, min(bufsz, len)); + len -= skb_frag_size(fp); if (!len) break; unmap_rx_buf(rspq->adapter, &rxq->fl); @@ -1717,7 +1717,7 @@ int process_responses(struct sge_rspq *rspq, int budget) */ dma_sync_single_for_cpu(rspq->adapter->pdev_dev, get_buf_addr(sdesc), - fp->size, DMA_FROM_DEVICE); + skb_frag_size(fp), DMA_FROM_DEVICE); gl.va = (page_address(gl.frags[0].page) + gl.frags[0].page_offset); prefetch(gl.va); @@ -1728,7 +1728,7 @@ int process_responses(struct sge_rspq *rspq, int budget) */ ret = rspq->handler(rspq, rspq->cur_desc, &gl); if (likely(ret == 0)) - rspq->offset += ALIGN(fp->size, FL_ALIGN); + rspq->offset += ALIGN(skb_frag_size(fp), FL_ALIGN); else restore_rx_bufs(&gl, &rxq->fl, frag); } else if (likely(rsp_type == RSP_TYPE_CPL)) { diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c index 1bc908f595de..c3786fda11db 100644 --- a/drivers/net/ethernet/cisco/enic/enic_main.c +++ b/drivers/net/ethernet/cisco/enic/enic_main.c @@ -599,16 +599,16 @@ static inline void enic_queue_wq_skb_cont(struct enic *enic, struct vnic_wq *wq, struct sk_buff *skb, unsigned int len_left, int loopback) { - skb_frag_t *frag; + const skb_frag_t *frag; /* Queue additional data fragments */ for (frag = skb_shinfo(skb)->frags; len_left; frag++) { - len_left -= frag->size; + len_left -= skb_frag_size(frag); enic_queue_wq_desc_cont(wq, skb, skb_frag_dma_map(&enic->pdev->dev, - frag, 0, frag->size, + frag, 0, skb_frag_size(frag), DMA_TO_DEVICE), - frag->size, + skb_frag_size(frag), (len_left == 0), /* EOP? */ loopback); } @@ -717,8 +717,8 @@ static inline void enic_queue_wq_skb_tso(struct enic *enic, * for additional data fragments */ for (frag = skb_shinfo(skb)->frags; len_left; frag++) { - len_left -= frag->size; - frag_len_left = frag->size; + len_left -= skb_frag_size(frag); + frag_len_left = skb_frag_size(frag); offset = 0; while (frag_len_left) { diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 679b8041e43a..706fc5989939 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -636,17 +636,17 @@ static int make_tx_wrbs(struct be_adapter *adapter, struct be_queue_info *txq, } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - struct skb_frag_struct *frag = + const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; busaddr = skb_frag_dma_map(dev, frag, 0, - frag->size, DMA_TO_DEVICE); + skb_frag_size(frag), DMA_TO_DEVICE); if (dma_mapping_error(dev, busaddr)) goto dma_err; wrb = queue_head_node(txq); - wrb_fill(wrb, busaddr, frag->size); + wrb_fill(wrb, busaddr, skb_frag_size(frag)); be_dws_cpu_to_le(wrb, sizeof(*wrb)); queue_head_inc(txq); - copied += frag->size; + copied += skb_frag_size(frag); } if (dummy_wrb) { @@ -1069,7 +1069,7 @@ static void skb_fill_rx_data(struct be_adapter *adapter, struct be_rx_obj *rxo, skb_frag_set_page(skb, 0, page_info->page); skb_shinfo(skb)->frags[0].page_offset = page_info->page_offset + hdr_len; - skb_shinfo(skb)->frags[0].size = curr_frag_len - hdr_len; + skb_frag_size_set(&skb_shinfo(skb)->frags[0], curr_frag_len - hdr_len); skb->data_len = curr_frag_len - hdr_len; skb->truesize += rx_frag_size; skb->tail += hdr_len; @@ -1095,13 +1095,13 @@ static void skb_fill_rx_data(struct be_adapter *adapter, struct be_rx_obj *rxo, skb_frag_set_page(skb, j, page_info->page); skb_shinfo(skb)->frags[j].page_offset = page_info->page_offset; - skb_shinfo(skb)->frags[j].size = 0; + skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0); skb_shinfo(skb)->nr_frags++; } else { put_page(page_info->page); } - skb_shinfo(skb)->frags[j].size += curr_frag_len; + skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len); skb->len += curr_frag_len; skb->data_len += curr_frag_len; skb->truesize += rx_frag_size; @@ -1176,11 +1176,11 @@ static void be_rx_compl_process_gro(struct be_adapter *adapter, skb_frag_set_page(skb, j, page_info->page); skb_shinfo(skb)->frags[j].page_offset = page_info->page_offset; - skb_shinfo(skb)->frags[j].size = 0; + skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0); } else { put_page(page_info->page); } - skb_shinfo(skb)->frags[j].size += curr_frag_len; + skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len); skb->truesize += rx_frag_size; remaining -= curr_frag_len; index_inc(&rxcp->rxq_idx, rxq->len); diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c index adb462d0b8d3..0d4d4f68d4ed 100644 --- a/drivers/net/ethernet/ibm/ehea/ehea_main.c +++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c @@ -1676,7 +1676,7 @@ static inline void write_swqe2_data(struct sk_buff *skb, struct net_device *dev, /* copy sg1entry data */ sg1entry->l_key = lkey; - sg1entry->len = frag->size; + sg1entry->len = skb_frag_size(frag); sg1entry->vaddr = ehea_map_vaddr(skb_frag_address(frag)); swqe->descriptors++; @@ -1689,7 +1689,7 @@ static inline void write_swqe2_data(struct sk_buff *skb, struct net_device *dev, sgentry = &sg_list[i - sg1entry_contains_frag_data]; sgentry->l_key = lkey; - sgentry->len = frag->size; + sgentry->len = frag_size(frag); sgentry->vaddr = ehea_map_vaddr(skb_frag_address(frag)); swqe->descriptors++; } diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 6b3a033d9de5..ed79b2d3ad3e 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -1453,7 +1453,7 @@ static int emac_start_xmit_sg(struct sk_buff *skb, struct net_device *ndev) /* skb fragments */ for (i = 0; i < nr_frags; ++i) { struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; - len = frag->size; + len = skb_frag_size(frag); if (unlikely(dev->tx_cnt + mal_tx_chunks(len) >= NUM_TX_BUFF)) goto undo_frame; diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c index 4da972eaabb4..b1cd41b9c61c 100644 --- a/drivers/net/ethernet/ibm/ibmveth.c +++ b/drivers/net/ethernet/ibm/ibmveth.c @@ -1014,15 +1014,15 @@ retry_bounce: /* Map the frags */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; dma_addr = skb_frag_dma_map(&adapter->vdev->dev, frag, 0, - frag->size, DMA_TO_DEVICE); + skb_frag_size(frag), DMA_TO_DEVICE); if (dma_mapping_error(&adapter->vdev->dev, dma_addr)) goto map_failed_frags; - descs[i+1].fields.flags_len = desc_flags | frag->size; + descs[i+1].fields.flags_len = desc_flags | skb_frag_size(frag); descs[i+1].fields.address = dma_addr; } diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index 7b54d7246150..cf480b554622 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -2894,10 +2894,10 @@ static int e1000_tx_map(struct e1000_adapter *adapter, } for (f = 0; f < nr_frags; f++) { - struct skb_frag_struct *frag; + const struct skb_frag_struct *frag; frag = &skb_shinfo(skb)->frags[f]; - len = frag->size; + len = skb_frag_size(frag); offset = 0; while (len) { @@ -3183,7 +3183,7 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, nr_frags = skb_shinfo(skb)->nr_frags; for (f = 0; f < nr_frags; f++) - count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size, + count += TXD_USE_COUNT(skb_frag_size(&skb_shinfo(skb)->frags[f]), max_txd_pwr); if (adapter->pcix_82544) count += nr_frags; diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 035ce73c388e..680312710a78 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -4673,10 +4673,10 @@ static int e1000_tx_map(struct e1000_adapter *adapter, } for (f = 0; f < nr_frags; f++) { - struct skb_frag_struct *frag; + const struct skb_frag_struct *frag; frag = &skb_shinfo(skb)->frags[f]; - len = frag->size; + len = skb_frag_size(frag); offset = 0; while (len) { @@ -4943,7 +4943,7 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb, nr_frags = skb_shinfo(skb)->nr_frags; for (f = 0; f < nr_frags; f++) - count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size, + count += TXD_USE_COUNT(skb_frag_size(&skb_shinfo(skb)->frags[f]), max_txd_pwr); if (adapter->hw.mac.tx_pkt_filtering) diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 837adbbce772..f9b818267de8 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -4268,7 +4268,7 @@ static void igb_tx_map(struct igb_ring *tx_ring, i = 0; } - size = frag->size; + size = skb_frag_size(frag); data_len -= size; dma = skb_frag_dma_map(tx_ring->dev, frag, 0, diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index 23cc40f22d6f..1bd9abddcc59 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -2045,7 +2045,7 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter, for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) { - struct skb_frag_struct *frag; + const struct skb_frag_struct *frag; count++; i++; @@ -2053,7 +2053,7 @@ static inline int igbvf_tx_map_adv(struct igbvf_adapter *adapter, i = 0; frag = &skb_shinfo(skb)->frags[f]; - len = frag->size; + len = skb_frag_size(frag); buffer_info = &tx_ring->buffer_info[i]; BUG_ON(len >= IGBVF_MAX_DATA_PER_TXD); diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index 88558b1aac07..e21148f8b160 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -1383,10 +1383,10 @@ ixgb_tx_map(struct ixgb_adapter *adapter, struct sk_buff *skb, } for (f = 0; f < nr_frags; f++) { - struct skb_frag_struct *frag; + const struct skb_frag_struct *frag; frag = &skb_shinfo(skb)->frags[f]; - len = frag->size; + len = skb_frag_size(frag); offset = 0; while (len) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 8075d11b4cde..09b8e88b2999 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -6545,9 +6545,9 @@ static void ixgbe_tx_map(struct ixgbe_ring *tx_ring, frag = &skb_shinfo(skb)->frags[f]; #ifdef IXGBE_FCOE - size = min_t(unsigned int, data_len, frag->size); + size = min_t(unsigned int, data_len, skb_frag_size(frag)); #else - size = frag->size; + size = skb_frag_size(frag); #endif data_len -= size; f++; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 4930c4605493..5e92cc2079bd 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -2912,10 +2912,10 @@ static int ixgbevf_tx_map(struct ixgbevf_adapter *adapter, } for (f = 0; f < nr_frags; f++) { - struct skb_frag_struct *frag; + const struct skb_frag_struct *frag; frag = &skb_shinfo(skb)->frags[f]; - len = min((unsigned int)frag->size, total); + len = min((unsigned int)skb_frag_size(frag), total); offset = 0; while (len) { @@ -3096,7 +3096,7 @@ static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev) count += TXD_USE_COUNT(skb_headlen(skb)); for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) - count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); + count += TXD_USE_COUNT(skb_frag_size(&skb_shinfo(skb)->frags[f])); if (ixgbevf_maybe_stop_tx(netdev, tx_ring, count)) { adapter->tx_busy++; diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index 48a0a23f342f..7a0c746f2749 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -1920,7 +1920,7 @@ jme_map_tx_skb(struct jme_adapter *jme, struct sk_buff *skb, int idx) u8 hidma = jme->dev->features & NETIF_F_HIGHDMA; int i, nr_frags = skb_shinfo(skb)->nr_frags; int mask = jme->tx_ring_mask; - struct skb_frag_struct *frag; + const struct skb_frag_struct *frag; u32 len; for (i = 0 ; i < nr_frags ; ++i) { @@ -1930,7 +1930,7 @@ jme_map_tx_skb(struct jme_adapter *jme, struct sk_buff *skb, int idx) jme_fill_tx_map(jme->pdev, ctxdesc, ctxbi, skb_frag_page(frag), - frag->page_offset, frag->size, hidma); + frag->page_offset, skb_frag_size(frag), hidma); } len = skb_is_nonlinear(skb) ? skb_headlen(skb) : skb->len; diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c index f6821aa5ffbf..194a03113802 100644 --- a/drivers/net/ethernet/marvell/mv643xx_eth.c +++ b/drivers/net/ethernet/marvell/mv643xx_eth.c @@ -713,8 +713,9 @@ static inline unsigned int has_tiny_unaligned_frags(struct sk_buff *skb) int frag; for (frag = 0; frag < skb_shinfo(skb)->nr_frags; frag++) { - skb_frag_t *fragp = &skb_shinfo(skb)->frags[frag]; - if (fragp->size <= 8 && fragp->page_offset & 7) + const skb_frag_t *fragp = &skb_shinfo(skb)->frags[frag]; + + if (skb_frag_size(fragp) <= 8 && fragp->page_offset & 7) return 1; } @@ -751,10 +752,10 @@ static void txq_submit_frag_skb(struct tx_queue *txq, struct sk_buff *skb) } desc->l4i_chk = 0; - desc->byte_cnt = this_frag->size; + desc->byte_cnt = skb_frag_size(this_frag); desc->buf_ptr = skb_frag_dma_map(mp->dev->dev.parent, this_frag, 0, - this_frag->size, + skb_frag_size(this_frag), DMA_TO_DEVICE); } } diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c index 297730359b79..c7b60839ac99 100644 --- a/drivers/net/ethernet/marvell/skge.c +++ b/drivers/net/ethernet/marvell/skge.c @@ -2770,10 +2770,10 @@ static netdev_tx_t skge_xmit_frame(struct sk_buff *skb, control |= BMU_STFWD; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; map = skb_frag_dma_map(&hw->pdev->dev, frag, 0, - frag->size, DMA_TO_DEVICE); + skb_frag_size(frag), DMA_TO_DEVICE); e = e->next; e->skb = skb; @@ -2783,9 +2783,9 @@ static netdev_tx_t skge_xmit_frame(struct sk_buff *skb, tf->dma_lo = map; tf->dma_hi = (u64) map >> 32; dma_unmap_addr_set(e, mapaddr, map); - dma_unmap_len_set(e, maplen, frag->size); + dma_unmap_len_set(e, maplen, skb_frag_size(frag)); - tf->control = BMU_OWN | BMU_SW | control | frag->size; + tf->control = BMU_OWN | BMU_SW | control | skb_frag_size(frag); } tf->control |= BMU_EOF | BMU_IRQ_EOF; } diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 92634907bf8d..7b083c438a14 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -1225,10 +1225,10 @@ static int sky2_rx_map_skb(struct pci_dev *pdev, struct rx_ring_info *re, dma_unmap_len_set(re, data_size, size); for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; re->frag_addr[i] = skb_frag_dma_map(&pdev->dev, frag, 0, - frag->size, + skb_frag_size(frag), DMA_FROM_DEVICE); if (dma_mapping_error(&pdev->dev, re->frag_addr[i])) @@ -1239,7 +1239,7 @@ static int sky2_rx_map_skb(struct pci_dev *pdev, struct rx_ring_info *re, map_page_error: while (--i >= 0) { pci_unmap_page(pdev, re->frag_addr[i], - skb_shinfo(skb)->frags[i].size, + skb_frag_size(&skb_shinfo(skb)->frags[i]), PCI_DMA_FROMDEVICE); } @@ -1263,7 +1263,7 @@ static void sky2_rx_unmap_skb(struct pci_dev *pdev, struct rx_ring_info *re) for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) pci_unmap_page(pdev, re->frag_addr[i], - skb_shinfo(skb)->frags[i].size, + skb_frag_size(&skb_shinfo(skb)->frags[i]), PCI_DMA_FROMDEVICE); } @@ -1936,7 +1936,7 @@ static netdev_tx_t sky2_xmit_frame(struct sk_buff *skb, const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; mapping = skb_frag_dma_map(&hw->pdev->dev, frag, 0, - frag->size, DMA_TO_DEVICE); + skb_frag_size(frag), DMA_TO_DEVICE); if (dma_mapping_error(&hw->pdev->dev, mapping)) goto mapping_unwind; @@ -1952,11 +1952,11 @@ static netdev_tx_t sky2_xmit_frame(struct sk_buff *skb, re = sky2->tx_ring + slot; re->flags = TX_MAP_PAGE; dma_unmap_addr_set(re, mapaddr, mapping); - dma_unmap_len_set(re, maplen, frag->size); + dma_unmap_len_set(re, maplen, skb_frag_size(frag)); le = get_tx_le(sky2, &slot); le->addr = cpu_to_le32(lower_32_bits(mapping)); - le->length = cpu_to_le16(frag->size); + le->length = cpu_to_le16(skb_frag_size(frag)); le->ctrl = ctrl; le->opcode = OP_BUFFER | HW_OWNER; } @@ -2484,7 +2484,7 @@ static void skb_put_frags(struct sk_buff *skb, unsigned int hdr_space, } else { size = min(length, (unsigned) PAGE_SIZE); - frag->size = size; + skb_frag_size_set(frag, size); skb->data_len += size; skb->truesize += PAGE_SIZE; skb->len += size; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 37cc9e5c56be..46a0df9afc3c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -135,7 +135,7 @@ static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv, /* Set size and memtype fields */ for (i = 0; i < priv->num_frags; i++) { - skb_frags[i].size = priv->frag_info[i].frag_size; + skb_frag_size_set(&skb_frags[i], priv->frag_info[i].frag_size); rx_desc->data[i].byte_count = cpu_to_be32(priv->frag_info[i].frag_size); rx_desc->data[i].lkey = cpu_to_be32(priv->mdev->mr.key); @@ -194,7 +194,7 @@ static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv, dma = be64_to_cpu(rx_desc->data[nr].addr); en_dbg(DRV, priv, "Unmapping buffer at dma:0x%llx\n", (u64) dma); - pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size, + pci_unmap_single(mdev->pdev, dma, skb_frag_size(&skb_frags[nr]), PCI_DMA_FROMDEVICE); put_page(skb_frags[nr].page); } @@ -421,7 +421,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, /* Save page reference in skb */ skb_frags_rx[nr].page = skb_frags[nr].page; - skb_frags_rx[nr].size = skb_frags[nr].size; + skb_frag_size_set(&skb_frags_rx[nr], skb_frag_size(&skb_frags[nr])); skb_frags_rx[nr].page_offset = skb_frags[nr].page_offset; dma = be64_to_cpu(rx_desc->data[nr].addr); @@ -430,13 +430,13 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, goto fail; /* Unmap buffer */ - pci_unmap_single(mdev->pdev, dma, skb_frags_rx[nr].size, + pci_unmap_single(mdev->pdev, dma, skb_frag_size(&skb_frags_rx[nr]), PCI_DMA_FROMDEVICE); } /* Adjust size of last fragment to match actual length */ if (nr > 0) - skb_frags_rx[nr - 1].size = length - - priv->frag_info[nr - 1].frag_prefix_size; + skb_frag_size_set(&skb_frags_rx[nr - 1], + length - priv->frag_info[nr - 1].frag_prefix_size); return nr; fail: @@ -506,7 +506,7 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, skb_shinfo(skb)->frags[0].page_offset += HEADER_COPY_SIZE; /* Adjust size of first fragment */ - skb_shinfo(skb)->frags[0].size -= HEADER_COPY_SIZE; + skb_frag_size_sub(&skb_shinfo(skb)->frags[0], HEADER_COPY_SIZE); skb->data_len = length - HEADER_COPY_SIZE; } return skb; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 6e03de034ac7..2a192c2f207d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -226,7 +226,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, frag = &skb_shinfo(skb)->frags[i]; pci_unmap_page(mdev->pdev, (dma_addr_t) be64_to_cpu(data[i].addr), - frag->size, PCI_DMA_TODEVICE); + skb_frag_size(frag), PCI_DMA_TODEVICE); } } /* Stamp the freed descriptor */ @@ -256,7 +256,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, frag = &skb_shinfo(skb)->frags[i]; pci_unmap_page(mdev->pdev, (dma_addr_t) be64_to_cpu(data->addr), - frag->size, PCI_DMA_TODEVICE); + skb_frag_size(frag), PCI_DMA_TODEVICE); ++data; } } @@ -550,7 +550,7 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct sk_buff *sk skb_copy_from_linear_data(skb, inl + 1, skb_headlen(skb)); if (skb_shinfo(skb)->nr_frags) memcpy(((void *)(inl + 1)) + skb_headlen(skb), fragptr, - skb_shinfo(skb)->frags[0].size); + skb_frag_size(&skb_shinfo(skb)->frags[0])); } else { inl->byte_count = cpu_to_be32(1 << 31 | spc); @@ -570,7 +570,7 @@ static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct sk_buff *sk skb_headlen(skb) - spc); if (skb_shinfo(skb)->nr_frags) memcpy(((void *)(inl + 1)) + skb_headlen(skb) - spc, - fragptr, skb_shinfo(skb)->frags[0].size); + fragptr, skb_frag_size(&skb_shinfo(skb)->frags[0])); } wmb(); @@ -757,11 +757,11 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) { frag = &skb_shinfo(skb)->frags[i]; dma = pci_map_page(mdev->dev->pdev, frag->page, frag->page_offset, - frag->size, PCI_DMA_TODEVICE); + skb_frag_size(frag), PCI_DMA_TODEVICE); data->addr = cpu_to_be64(dma); data->lkey = cpu_to_be32(mdev->mr.key); wmb(); - data->byte_count = cpu_to_be32(frag->size); + data->byte_count = cpu_to_be32(skb_frag_size(frag)); --data; } diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c index 710c4aead146..7ece990381c8 100644 --- a/drivers/net/ethernet/micrel/ksz884x.c +++ b/drivers/net/ethernet/micrel/ksz884x.c @@ -4700,7 +4700,7 @@ static void send_packet(struct sk_buff *skb, struct net_device *dev) ++hw->tx_int_cnt; dma_buf = DMA_BUFFER(desc); - dma_buf->len = this_frag->size; + dma_buf->len = skb_frag_size(this_frag); dma_buf->dma = pci_map_single( hw_priv->pdev, diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c index 26637279cd67..c970a48436dc 100644 --- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c +++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c @@ -1216,7 +1216,7 @@ myri10ge_rx_skb_build(struct sk_buff *skb, u8 * va, skb_frags = skb_shinfo(skb)->frags; while (len > 0) { memcpy(skb_frags, rx_frags, sizeof(*skb_frags)); - len -= rx_frags->size; + len -= skb_frag_size(rx_frags); skb_frags++; rx_frags++; skb_shinfo(skb)->nr_frags++; @@ -1228,7 +1228,7 @@ myri10ge_rx_skb_build(struct sk_buff *skb, u8 * va, * manually */ skb_copy_to_linear_data(skb, va, hlen); skb_shinfo(skb)->frags[0].page_offset += hlen; - skb_shinfo(skb)->frags[0].size -= hlen; + skb_frag_size_sub(&skb_shinfo(skb)->frags[0], hlen); skb->data_len -= hlen; skb->tail += hlen; skb_pull(skb, MXGEFW_PAD); @@ -1345,9 +1345,9 @@ myri10ge_rx_done(struct myri10ge_slice_state *ss, int len, __wsum csum, __skb_frag_set_page(&rx_frags[i], rx->info[idx].page); rx_frags[i].page_offset = rx->info[idx].page_offset; if (remainder < MYRI10GE_ALLOC_SIZE) - rx_frags[i].size = remainder; + skb_frag_size_set(&rx_frags[i], remainder); else - rx_frags[i].size = MYRI10GE_ALLOC_SIZE; + skb_frag_size_set(&rx_frags[i], MYRI10GE_ALLOC_SIZE); rx->cnt++; idx = rx->cnt & rx->mask; remainder -= MYRI10GE_ALLOC_SIZE; @@ -1355,7 +1355,7 @@ myri10ge_rx_done(struct myri10ge_slice_state *ss, int len, __wsum csum, if (lro_enabled) { rx_frags[0].page_offset += MXGEFW_PAD; - rx_frags[0].size -= MXGEFW_PAD; + skb_frag_size_sub(&rx_frags[0], MXGEFW_PAD); len -= MXGEFW_PAD; lro_receive_frags(&ss->rx_done.lro_mgr, rx_frags, /* opaque, will come back in get_frag_header */ @@ -1382,7 +1382,7 @@ myri10ge_rx_done(struct myri10ge_slice_state *ss, int len, __wsum csum, /* Attach the pages to the skb, and trim off any padding */ myri10ge_rx_skb_build(skb, va, rx_frags, len, hlen); - if (skb_shinfo(skb)->frags[0].size <= 0) { + if (skb_frag_size(&skb_shinfo(skb)->frags[0]) <= 0) { skb_frag_unref(skb, 0); skb_shinfo(skb)->nr_frags = 0; } @@ -2926,7 +2926,7 @@ again: idx = (count + tx->req) & tx->mask; frag = &skb_shinfo(skb)->frags[frag_idx]; frag_idx++; - len = frag->size; + len = skb_frag_size(frag); bus = skb_frag_dma_map(&mgp->pdev->dev, frag, 0, len, DMA_TO_DEVICE); dma_unmap_addr_set(&tx->info[idx], bus, bus); diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index 73616b911327..2b8f64ddfb55 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -1161,11 +1161,11 @@ again: break; buf = skb_frag_dma_map(&dev->pci_dev->dev, frag, 0, - frag->size, DMA_TO_DEVICE); + skb_frag_size(frag), DMA_TO_DEVICE); dprintk("frag: buf=%08Lx page=%08lx offset=%08lx\n", (long long)buf, (long) page_to_pfn(frag->page), frag->page_offset); - len = frag->size; + len = skb_frag_size(frag); frag++; nr_frags--; } diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c index bdd3e6a330cd..c27fb3dda9f4 100644 --- a/drivers/net/ethernet/neterion/s2io.c +++ b/drivers/net/ethernet/neterion/s2io.c @@ -2350,12 +2350,12 @@ static struct sk_buff *s2io_txdl_getskb(struct fifo_info *fifo_data, if (frg_cnt) { txds++; for (j = 0; j < frg_cnt; j++, txds++) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[j]; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[j]; if (!txds->Buffer_Pointer) break; pci_unmap_page(nic->pdev, (dma_addr_t)txds->Buffer_Pointer, - frag->size, PCI_DMA_TODEVICE); + skb_frag_size(frag), PCI_DMA_TODEVICE); } } memset(txdlp, 0, (sizeof(struct TxD) * fifo_data->max_txds)); @@ -4185,16 +4185,16 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev) frg_cnt = skb_shinfo(skb)->nr_frags; /* For fragmented SKB. */ for (i = 0; i < frg_cnt; i++) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; /* A '0' length fragment will be ignored */ - if (!frag->size) + if (!skb_frag_size(frag)) continue; txdp++; txdp->Buffer_Pointer = (u64)skb_frag_dma_map(&sp->pdev->dev, frag, 0, - frag->size, + skb_frag_size(frag), DMA_TO_DEVICE); - txdp->Control_1 = TXD_BUFFER0_SIZE(frag->size); + txdp->Control_1 = TXD_BUFFER0_SIZE(skb_frag_size(frag)); if (offload_type == SKB_GSO_UDP) txdp->Control_1 |= TXD_UFO_EN; } diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c index a66f8fc0401e..671e166b5af1 100644 --- a/drivers/net/ethernet/neterion/vxge/vxge-main.c +++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c @@ -585,7 +585,7 @@ vxge_xmit_compl(struct __vxge_hw_fifo *fifo_hw, void *dtr, for (j = 0; j < frg_cnt; j++) { pci_unmap_page(fifo->pdev, txd_priv->dma_buffers[i++], - frag->size, PCI_DMA_TODEVICE); + skb_frag_size(frag), PCI_DMA_TODEVICE); frag += 1; } @@ -920,11 +920,11 @@ vxge_xmit(struct sk_buff *skb, struct net_device *dev) frag = &skb_shinfo(skb)->frags[0]; for (i = 0; i < frg_cnt; i++) { /* ignore 0 length fragment */ - if (!frag->size) + if (!skb_frag_size(frag)) continue; dma_pointer = (u64)skb_frag_dma_map(&fifo->pdev->dev, frag, - 0, frag->size, + 0, skb_frag_size(frag), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(&fifo->pdev->dev, dma_pointer))) @@ -936,7 +936,7 @@ vxge_xmit(struct sk_buff *skb, struct net_device *dev) txdl_priv->dma_buffers[j] = dma_pointer; vxge_hw_fifo_txdl_buffer_set(fifo_hw, dtr, j++, dma_pointer, - frag->size); + skb_frag_size(frag)); frag += 1; } @@ -979,7 +979,7 @@ _exit1: for (; j < i; j++) { pci_unmap_page(fifo->pdev, txdl_priv->dma_buffers[j], - frag->size, PCI_DMA_TODEVICE); + skb_frag_size(frag), PCI_DMA_TODEVICE); frag += 1; } @@ -1050,7 +1050,7 @@ vxge_tx_term(void *dtrh, enum vxge_hw_txdl_state state, void *userdata) for (j = 0; j < frg_cnt; j++) { pci_unmap_page(fifo->pdev, txd_priv->dma_buffers[i++], - frag->size, PCI_DMA_TODEVICE); + skb_frag_size(frag), PCI_DMA_TODEVICE); frag += 1; } diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c index d7763ab841d8..1e37eb98c4e2 100644 --- a/drivers/net/ethernet/nvidia/forcedeth.c +++ b/drivers/net/ethernet/nvidia/forcedeth.c @@ -2099,8 +2099,10 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev) /* add fragments to entries count */ for (i = 0; i < fragments; i++) { - entries += (skb_shinfo(skb)->frags[i].size >> NV_TX2_TSO_MAX_SHIFT) + - ((skb_shinfo(skb)->frags[i].size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0); + u32 size = skb_frag_size(&skb_shinfo(skb)->frags[i]); + + entries += (size >> NV_TX2_TSO_MAX_SHIFT) + + ((size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0); } spin_lock_irqsave(&np->lock, flags); @@ -2138,8 +2140,8 @@ static netdev_tx_t nv_start_xmit(struct sk_buff *skb, struct net_device *dev) /* setup the fragments */ for (i = 0; i < fragments; i++) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - u32 size = frag->size; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + u32 size = skb_frag_size(frag); offset = 0; do { @@ -2211,8 +2213,10 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb, /* add fragments to entries count */ for (i = 0; i < fragments; i++) { - entries += (skb_shinfo(skb)->frags[i].size >> NV_TX2_TSO_MAX_SHIFT) + - ((skb_shinfo(skb)->frags[i].size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0); + u32 size = skb_frag_size(&skb_shinfo(skb)->frags[i]); + + entries += (size >> NV_TX2_TSO_MAX_SHIFT) + + ((size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0); } spin_lock_irqsave(&np->lock, flags); @@ -2253,7 +2257,7 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb, /* setup the fragments */ for (i = 0; i < fragments; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - u32 size = frag->size; + u32 size = skb_frag_size(frag); offset = 0; do { diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c index c6f005684677..49b549ff2c78 100644 --- a/drivers/net/ethernet/pasemi/pasemi_mac.c +++ b/drivers/net/ethernet/pasemi/pasemi_mac.c @@ -300,9 +300,9 @@ static int pasemi_mac_unmap_tx_skb(struct pasemi_mac *mac, pci_unmap_single(pdev, dmas[0], skb_headlen(skb), PCI_DMA_TODEVICE); for (f = 0; f < nfrags; f++) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[f]; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[f]; - pci_unmap_page(pdev, dmas[f+1], frag->size, PCI_DMA_TODEVICE); + pci_unmap_page(pdev, dmas[f+1], skb_frag_size(frag), PCI_DMA_TODEVICE); } dev_kfree_skb_irq(skb); @@ -1506,8 +1506,8 @@ static int pasemi_mac_start_tx(struct sk_buff *skb, struct net_device *dev) skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; map[i + 1] = skb_frag_dma_map(&mac->dma_pdev->dev, frag, 0, - frag->size, DMA_TO_DEVICE); - map_size[i+1] = frag->size; + skb_frag_size(frag), DMA_TO_DEVICE); + map_size[i+1] = skb_frag_size(frag); if (dma_mapping_error(&mac->dma_pdev->dev, map[i + 1])) { nfrags = i; goto out_err_nolock; diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index e2ba78be1c2a..8cf3173ba488 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -1905,13 +1905,13 @@ netxen_map_tx_skb(struct pci_dev *pdev, frag = &skb_shinfo(skb)->frags[i]; nf = &pbuf->frag_array[i+1]; - map = skb_frag_dma_map(&pdev->dev, frag, 0, frag->size, + map = skb_frag_dma_map(&pdev->dev, frag, 0, skb_frag_size(frag), DMA_TO_DEVICE); if (dma_mapping_error(&pdev->dev, map)) goto unwind; nf->dma = map; - nf->length = frag->size; + nf->length = skb_frag_size(frag); } return 0; @@ -1962,7 +1962,7 @@ netxen_nic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) for (i = 0; i < (frag_count - NETXEN_MAX_FRAGS_PER_TX); i++) { frag = &skb_shinfo(skb)->frags[i]; - delta += frag->size; + delta += skb_frag_size(frag); } if (!__pskb_pull_tail(skb, delta)) diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 46f9b6499f9b..a4bdff438a5e 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -2388,7 +2388,7 @@ static int ql_send_map(struct ql3_adapter *qdev, seg++; } - map = skb_frag_dma_map(&qdev->pdev->dev, frag, 0, frag->size, + map = skb_frag_dma_map(&qdev->pdev->dev, frag, 0, skb_frag_size(frag), DMA_TO_DEVICE); err = dma_mapping_error(&qdev->pdev->dev, map); @@ -2401,9 +2401,9 @@ static int ql_send_map(struct ql3_adapter *qdev, oal_entry->dma_lo = cpu_to_le32(LS_64BITS(map)); oal_entry->dma_hi = cpu_to_le32(MS_64BITS(map)); - oal_entry->len = cpu_to_le32(frag->size); + oal_entry->len = cpu_to_le32(skb_frag_size(frag)); dma_unmap_addr_set(&tx_cb->map[seg], mapaddr, map); - dma_unmap_len_set(&tx_cb->map[seg], maplen, frag->size); + dma_unmap_len_set(&tx_cb->map[seg], maplen, skb_frag_size(frag)); } /* Terminate the last segment. */ oal_entry->len |= cpu_to_le32(OAL_LAST_ENTRY); diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index eac19e7d2761..106503f118f6 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -2135,13 +2135,13 @@ qlcnic_map_tx_skb(struct pci_dev *pdev, frag = &skb_shinfo(skb)->frags[i]; nf = &pbuf->frag_array[i+1]; - map = skb_frag_dma_map(&pdev->dev, frag, 0, frag->size, + map = skb_frag_dma_map(&pdev->dev, frag, 0, skb_frag_size(frag), DMA_TO_DEVICE); if (dma_mapping_error(&pdev->dev, map)) goto unwind; nf->dma = map; - nf->length = frag->size; + nf->length = skb_frag_size(frag); } return 0; @@ -2221,7 +2221,7 @@ qlcnic_xmit_frame(struct sk_buff *skb, struct net_device *netdev) if (!skb_is_gso(skb) && frag_count > QLCNIC_MAX_FRAGS_PER_TX) { for (i = 0; i < (frag_count - QLCNIC_MAX_FRAGS_PER_TX); i++) - delta += skb_shinfo(skb)->frags[i].size; + delta += skb_frag_size(&skb_shinfo(skb)->frags[i]); if (!__pskb_pull_tail(skb, delta)) goto drop_packet; diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index f2d9bb78ec7f..c92afcd912e2 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -1431,7 +1431,7 @@ static int ql_map_send(struct ql_adapter *qdev, map_idx++; } - map = skb_frag_dma_map(&qdev->pdev->dev, frag, 0, frag->size, + map = skb_frag_dma_map(&qdev->pdev->dev, frag, 0, skb_frag_size(frag), DMA_TO_DEVICE); err = dma_mapping_error(&qdev->pdev->dev, map); @@ -1443,10 +1443,10 @@ static int ql_map_send(struct ql_adapter *qdev, } tbd->addr = cpu_to_le64(map); - tbd->len = cpu_to_le32(frag->size); + tbd->len = cpu_to_le32(skb_frag_size(frag)); dma_unmap_addr_set(&tx_ring_desc->map[map_idx], mapaddr, map); dma_unmap_len_set(&tx_ring_desc->map[map_idx], maplen, - frag->size); + skb_frag_size(frag)); } /* Save the number of segments we've mapped. */ diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c index 5dcd5be03f31..ee5da9293ce0 100644 --- a/drivers/net/ethernet/realtek/8139cp.c +++ b/drivers/net/ethernet/realtek/8139cp.c @@ -777,12 +777,12 @@ static netdev_tx_t cp_start_xmit (struct sk_buff *skb, entry = NEXT_TX(entry); for (frag = 0; frag < skb_shinfo(skb)->nr_frags; frag++) { - skb_frag_t *this_frag = &skb_shinfo(skb)->frags[frag]; + const skb_frag_t *this_frag = &skb_shinfo(skb)->frags[frag]; u32 len; u32 ctrl; dma_addr_t mapping; - len = this_frag->size; + len = skb_frag_size(this_frag); mapping = dma_map_single(&cp->pdev->dev, skb_frag_address(this_frag), len, PCI_DMA_TODEVICE); diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 2ce60709a455..aa39e771175c 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -5413,7 +5413,7 @@ static int rtl8169_xmit_frags(struct rtl8169_private *tp, struct sk_buff *skb, entry = tp->cur_tx; for (cur_frag = 0; cur_frag < info->nr_frags; cur_frag++) { - skb_frag_t *frag = info->frags + cur_frag; + const skb_frag_t *frag = info->frags + cur_frag; dma_addr_t mapping; u32 status, len; void *addr; @@ -5421,7 +5421,7 @@ static int rtl8169_xmit_frags(struct rtl8169_private *tp, struct sk_buff *skb, entry = (entry + 1) % NUM_TX_DESC; txd = tp->TxDescArray + entry; - len = frag->size; + len = skb_frag_size(frag); addr = skb_frag_address(frag); mapping = dma_map_single(d, addr, len, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(d, mapping))) { diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c index 91a6b7123539..adbda182f159 100644 --- a/drivers/net/ethernet/sfc/rx.c +++ b/drivers/net/ethernet/sfc/rx.c @@ -481,7 +481,7 @@ static void efx_rx_packet_gro(struct efx_channel *channel, skb_frag_set_page(skb, 0, page); skb_shinfo(skb)->frags[0].page_offset = efx_rx_buf_offset(efx, rx_buf); - skb_shinfo(skb)->frags[0].size = rx_buf->len; + skb_frag_size_set(&skb_shinfo(skb)->frags[0], rx_buf->len); skb_shinfo(skb)->nr_frags = 1; skb->len = rx_buf->len; diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c index 3964a62dde8b..df88c5430f95 100644 --- a/drivers/net/ethernet/sfc/tx.c +++ b/drivers/net/ethernet/sfc/tx.c @@ -238,7 +238,7 @@ netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb) if (i >= skb_shinfo(skb)->nr_frags) break; fragment = &skb_shinfo(skb)->frags[i]; - len = fragment->size; + len = skb_frag_size(fragment); i++; /* Map for DMA */ unmap_single = false; @@ -926,11 +926,11 @@ static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx, skb_frag_t *frag) { st->unmap_addr = skb_frag_dma_map(&efx->pci_dev->dev, frag, 0, - frag->size, DMA_TO_DEVICE); + skb_frag_size(frag), DMA_TO_DEVICE); if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) { st->unmap_single = false; - st->unmap_len = frag->size; - st->in_len = frag->size; + st->unmap_len = skb_frag_size(frag); + st->in_len = skb_frag_size(frag); st->dma_addr = st->unmap_addr; return 0; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index c0ee6b6b0198..87a6b2e59e04 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1106,8 +1106,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev) } for (i = 0; i < nfrags; i++) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - int len = frag->size; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + int len = skb_frag_size(frag); entry = (++priv->cur_tx) % txsize; desc = priv->dma_tx + entry; diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c index d9460d81a137..fd40988c19a6 100644 --- a/drivers/net/ethernet/sun/cassini.c +++ b/drivers/net/ethernet/sun/cassini.c @@ -2051,7 +2051,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, __skb_frag_set_page(frag, page->buffer); __skb_frag_ref(frag); frag->page_offset = off; - frag->size = hlen - swivel; + skb_frag_size_set(frag, hlen - swivel); /* any more data? */ if ((words[0] & RX_COMP1_SPLIT_PKT) && ((dlen -= hlen) > 0)) { @@ -2075,7 +2075,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, __skb_frag_set_page(frag, page->buffer); __skb_frag_ref(frag); frag->page_offset = 0; - frag->size = hlen; + skb_frag_size_set(frag, hlen); RX_USED_ADD(page, hlen + cp->crc_size); } @@ -2826,9 +2826,9 @@ static inline int cas_xmit_tx_ringN(struct cas *cp, int ring, entry = TX_DESC_NEXT(ring, entry); for (frag = 0; frag < nr_frags; frag++) { - skb_frag_t *fragp = &skb_shinfo(skb)->frags[frag]; + const skb_frag_t *fragp = &skb_shinfo(skb)->frags[frag]; - len = fragp->size; + len = skb_frag_size(fragp); mapping = skb_frag_dma_map(&cp->pdev->dev, fragp, 0, len, DMA_TO_DEVICE); diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c index 23740e848ac9..73c708107a37 100644 --- a/drivers/net/ethernet/sun/niu.c +++ b/drivers/net/ethernet/sun/niu.c @@ -3594,7 +3594,7 @@ static int release_tx_packet(struct niu *np, struct tx_ring_info *rp, int idx) tb = &rp->tx_buffs[idx]; BUG_ON(tb->skb != NULL); np->ops->unmap_page(np->device, tb->mapping, - skb_shinfo(skb)->frags[i].size, + skb_frag_size(&skb_shinfo(skb)->frags[i]), DMA_TO_DEVICE); idx = NEXT_TX(rp, idx); } @@ -6727,9 +6727,9 @@ static netdev_tx_t niu_start_xmit(struct sk_buff *skb, } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - len = frag->size; + len = skb_frag_size(frag); mapping = np->ops->map_page(np->device, skb_frag_page(frag), frag->page_offset, len, DMA_TO_DEVICE); diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c index 6b62a73227c2..ceab215bb4a3 100644 --- a/drivers/net/ethernet/sun/sungem.c +++ b/drivers/net/ethernet/sun/sungem.c @@ -1065,12 +1065,12 @@ static netdev_tx_t gem_start_xmit(struct sk_buff *skb, entry = NEXT_TX(entry); for (frag = 0; frag < skb_shinfo(skb)->nr_frags; frag++) { - skb_frag_t *this_frag = &skb_shinfo(skb)->frags[frag]; + const skb_frag_t *this_frag = &skb_shinfo(skb)->frags[frag]; u32 len; dma_addr_t mapping; u64 this_ctrl; - len = this_frag->size; + len = skb_frag_size(this_frag); mapping = skb_frag_dma_map(&gp->pdev->dev, this_frag, 0, len, DMA_TO_DEVICE); this_ctrl = ctrl; diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c index 869d47be54b4..c517dac02ae1 100644 --- a/drivers/net/ethernet/sun/sunhme.c +++ b/drivers/net/ethernet/sun/sunhme.c @@ -2305,10 +2305,10 @@ static netdev_tx_t happy_meal_start_xmit(struct sk_buff *skb, entry = NEXT_TX(entry); for (frag = 0; frag < skb_shinfo(skb)->nr_frags; frag++) { - skb_frag_t *this_frag = &skb_shinfo(skb)->frags[frag]; + const skb_frag_t *this_frag = &skb_shinfo(skb)->frags[frag]; u32 len, mapping, this_txflags; - len = this_frag->size; + len = skb_frag_size(this_frag); mapping = skb_frag_dma_map(hp->dma_dev, this_frag, 0, len, DMA_TO_DEVICE); this_txflags = tx_flags; diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c index c77e3bf4750a..3a90af6d111c 100644 --- a/drivers/net/ethernet/tehuti/tehuti.c +++ b/drivers/net/ethernet/tehuti/tehuti.c @@ -1493,12 +1493,12 @@ bdx_tx_map_skb(struct bdx_priv *priv, struct sk_buff *skb, bdx_tx_db_inc_wptr(db); for (i = 0; i < nr_frags; i++) { - struct skb_frag_struct *frag; + const struct skb_frag_struct *frag; frag = &skb_shinfo(skb)->frags[i]; - db->wptr->len = frag->size; + db->wptr->len = skb_frag_size(frag); db->wptr->addr.dma = skb_frag_dma_map(&priv->pdev->dev, frag, - 0, frag->size, + 0, skb_frag_size(frag), DMA_TO_DEVICE); pbl++; diff --git a/drivers/net/ethernet/tile/tilepro.c b/drivers/net/ethernet/tile/tilepro.c index 1e2af96fc29c..78e3fb226cce 100644 --- a/drivers/net/ethernet/tile/tilepro.c +++ b/drivers/net/ethernet/tile/tilepro.c @@ -1713,7 +1713,7 @@ static unsigned int tile_net_tx_frags(lepp_frag_t *frags, cpa = ((phys_addr_t)pfn << PAGE_SHIFT) + f->page_offset; frags[n].cpa_lo = cpa; frags[n].cpa_hi = cpa >> 32; - frags[n].length = f->size; + frags[n].length = skb_frag_size(f); frags[n].hash_for_home = hash_for_home; n++; } diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c index a03996cf88ed..a8df7eca0956 100644 --- a/drivers/net/ethernet/tundra/tsi108_eth.c +++ b/drivers/net/ethernet/tundra/tsi108_eth.c @@ -709,13 +709,13 @@ static int tsi108_send_packet(struct sk_buff * skb, struct net_device *dev) data->txring[tx].len = skb_headlen(skb); misc |= TSI108_TX_SOF; } else { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i - 1]; data->txring[tx].buf0 = skb_frag_dma_map(NULL, frag, 0, - frag->size, + skb_frag_size(frag), DMA_TO_DEVICE); - data->txring[tx].len = frag->size; + data->txring[tx].len = skb_frag_size(frag); } if (i == frags - 1) diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c index b47bce1a2e2a..4535d7cc848e 100644 --- a/drivers/net/ethernet/via/via-velocity.c +++ b/drivers/net/ethernet/via/via-velocity.c @@ -2554,16 +2554,16 @@ static netdev_tx_t velocity_xmit(struct sk_buff *skb, /* Handle fragments */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; tdinfo->skb_dma[i + 1] = skb_frag_dma_map(&vptr->pdev->dev, frag, 0, - frag->size, + skb_frag_size(frag), DMA_TO_DEVICE); td_ptr->td_buf[i + 1].pa_low = cpu_to_le32(tdinfo->skb_dma[i + 1]); td_ptr->td_buf[i + 1].pa_high = 0; - td_ptr->td_buf[i + 1].size = cpu_to_le16(frag->size); + td_ptr->td_buf[i + 1].size = cpu_to_le16(skb_frag_size(frag)); } tdinfo->nskb_dma = i + 1; diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 66e3c36c3733..85ba4d9ac170 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -716,8 +716,8 @@ static int temac_start_xmit(struct sk_buff *skb, struct net_device *ndev) cur_p = &lp->tx_bd_v[lp->tx_bd_tail]; cur_p->phys = dma_map_single(ndev->dev.parent, skb_frag_address(frag), - frag->size, DMA_TO_DEVICE); - cur_p->len = frag->size; + frag_size(frag), DMA_TO_DEVICE); + cur_p->len = frag_size(frag); cur_p->app0 = 0; frag++; } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index b8225f3b31d1..0d4841bed0f9 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -147,14 +147,14 @@ static void set_skb_frag(struct sk_buff *skb, struct page *page, skb_frag_t *f; f = &skb_shinfo(skb)->frags[i]; - f->size = min((unsigned)PAGE_SIZE - offset, *len); + skb_frag_size_set(f, min((unsigned)PAGE_SIZE - offset, *len)); f->page_offset = offset; __skb_frag_set_page(f, page); - skb->data_len += f->size; - skb->len += f->size; + skb->data_len += skb_frag_size(f); + skb->len += skb_frag_size(f); skb_shinfo(skb)->nr_frags++; - *len -= f->size; + *len -= skb_frag_size(f); } static struct sk_buff *page_to_skb(struct virtnet_info *vi, diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 902f284fd054..b771ebac0f01 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -656,8 +656,8 @@ vmxnet3_append_frag(struct sk_buff *skb, struct Vmxnet3_RxCompDesc *rcd, __skb_frag_set_page(frag, rbi->page); frag->page_offset = 0; - frag->size = rcd->len; - skb->data_len += frag->size; + skb_frag_size_set(frag, rcd->len); + skb->data_len += rcd->len; skb->truesize += PAGE_SIZE; skb_shinfo(skb)->nr_frags++; } @@ -745,21 +745,21 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx, } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; + const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i]; tbi = tq->buf_info + tq->tx_ring.next2fill; tbi->map_type = VMXNET3_MAP_PAGE; tbi->dma_addr = skb_frag_dma_map(&adapter->pdev->dev, frag, - 0, frag->size, + 0, skb_frag_size(frag), DMA_TO_DEVICE); - tbi->len = frag->size; + tbi->len = skb_frag_size(frag); gdesc = tq->tx_ring.base + tq->tx_ring.next2fill; BUG_ON(gdesc->txd.gen == tq->tx_ring.gen); gdesc->txd.addr = cpu_to_le64(tbi->dma_addr); - gdesc->dword[2] = cpu_to_le32(dw2 | frag->size); + gdesc->dword[2] = cpu_to_le32(dw2 | skb_frag_size(frag)); gdesc->dword[3] = 0; dev_dbg(&adapter->netdev->dev, diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index 8d70b44fcd8a..d5508957200e 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -334,7 +334,7 @@ unsigned int xen_netbk_count_skb_slots(struct xenvif *vif, struct sk_buff *skb) count++; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - unsigned long size = skb_shinfo(skb)->frags[i].size; + unsigned long size = skb_frag_size(&skb_shinfo(skb)->frags[i]); unsigned long bytes; while (size > 0) { BUG_ON(copy_off > MAX_BUFFER_OFFSET); @@ -526,7 +526,7 @@ static int netbk_gop_skb(struct sk_buff *skb, for (i = 0; i < nr_frags; i++) { netbk_gop_frag_copy(vif, skb, npo, skb_frag_page(&skb_shinfo(skb)->frags[i]), - skb_shinfo(skb)->frags[i].size, + skb_frag_size(&skb_shinfo(skb)->frags[i]), skb_shinfo(skb)->frags[i].page_offset, &head); } diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 6e5d4c09e5d7..226faab23603 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -467,7 +467,7 @@ static void xennet_make_frags(struct sk_buff *skb, struct net_device *dev, tx->gref = np->grant_tx_ref[id] = ref; tx->offset = frag->page_offset; - tx->size = frag->size; + tx->size = skb_frag_size(frag); tx->flags = 0; } @@ -965,7 +965,7 @@ err: if (rx->status > len) { skb_shinfo(skb)->frags[0].page_offset = rx->offset + len; - skb_shinfo(skb)->frags[0].size = rx->status - len; + skb_frag_size_set(&skb_shinfo(skb)->frags[0], rx->status - len); skb->data_len = rx->status - len; } else { __skb_fill_page_desc(skb, 0, NULL, 0, 0); diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c index 77ac217ad5ce..be69da38ccaa 100644 --- a/drivers/scsi/cxgbi/libcxgbi.c +++ b/drivers/scsi/cxgbi/libcxgbi.c @@ -1814,8 +1814,8 @@ static int sgl_read_to_frags(struct scatterlist *sg, unsigned int sgoffset, copy = min(datalen, sglen); if (i && page == frags[i - 1].page && sgoffset + sg->offset == - frags[i - 1].page_offset + frags[i - 1].size) { - frags[i - 1].size += copy; + frags[i - 1].page_offset + skb_frag_size(&frags[i - 1])) { + skb_frag_size_add(&frags[i - 1], copy); } else { if (i >= frag_max) { pr_warn("too many pages %u, dlen %u.\n", @@ -1825,7 +1825,7 @@ static int sgl_read_to_frags(struct scatterlist *sg, unsigned int sgoffset, frags[i].page = page; frags[i].page_offset = sg->offset + sgoffset; - frags[i].size = copy; + skb_frag_size_set(&frags[i], copy); i++; } datalen -= copy; @@ -1951,8 +1951,8 @@ int cxgbi_conn_init_pdu(struct iscsi_task *task, unsigned int offset, char *src = kmap_atomic(frag->page, KM_SOFTIRQ0); - memcpy(dst, src+frag->page_offset, frag->size); - dst += frag->size; + memcpy(dst, src+frag->page_offset, skb_frag_size(frag)); + dst += skb_frag_size(frag); kunmap_atomic(src, KM_SOFTIRQ0); } if (padlen) { diff --git a/drivers/scsi/fcoe/fcoe_transport.c b/drivers/scsi/fcoe/fcoe_transport.c index f6613f9f1bdb..dac8e39a5188 100644 --- a/drivers/scsi/fcoe/fcoe_transport.c +++ b/drivers/scsi/fcoe/fcoe_transport.c @@ -105,7 +105,7 @@ u32 fcoe_fc_crc(struct fc_frame *fp) for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { frag = &skb_shinfo(skb)->frags[i]; off = frag->page_offset; - len = frag->size; + len = skb_frag_size(frag); while (len > 0) { clen = min(len, PAGE_SIZE - (off & ~PAGE_MASK)); data = kmap_atomic( diff --git a/drivers/staging/hv/netvsc_drv.c b/drivers/staging/hv/netvsc_drv.c index 58792aefc8d3..4c7739f929ef 100644 --- a/drivers/staging/hv/netvsc_drv.c +++ b/drivers/staging/hv/netvsc_drv.c @@ -169,11 +169,11 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) /* Additional fragments are after SKB data */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - skb_frag_t *f = &skb_shinfo(skb)->frags[i]; + const skb_frag_t *f = &skb_shinfo(skb)->frags[i]; packet->page_buf[i+2].pfn = page_to_pfn(skb_frag_page(f)); packet->page_buf[i+2].offset = f->page_offset; - packet->page_buf[i+2].len = f->size; + packet->page_buf[i+2].len = skb_frag_size(f); } /* Set the completion routine */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 64f86951ef74..6fcbbbd12ceb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -150,6 +150,26 @@ struct skb_frag_struct { #endif }; +static inline unsigned int skb_frag_size(const skb_frag_t *frag) +{ + return frag->size; +} + +static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size) +{ + frag->size = size; +} + +static inline void skb_frag_size_add(skb_frag_t *frag, int delta) +{ + frag->size += delta; +} + +static inline void skb_frag_size_sub(skb_frag_t *frag, int delta) +{ + frag->size -= delta; +} + #define HAVE_HW_TIME_STAMP /** @@ -1132,7 +1152,7 @@ static inline int skb_pagelen(const struct sk_buff *skb) int i, len = 0; for (i = (int)skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) - len += skb_shinfo(skb)->frags[i].size; + len += skb_frag_size(&skb_shinfo(skb)->frags[i]); return len + skb_headlen(skb); } @@ -1156,7 +1176,7 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, frag->page = page; frag->page_offset = off; - frag->size = size; + skb_frag_size_set(frag, size); } /** @@ -1907,10 +1927,10 @@ static inline int skb_can_coalesce(struct sk_buff *skb, int i, const struct page *page, int off) { if (i) { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1]; + const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1]; return page == skb_frag_page(frag) && - off == frag->page_offset + frag->size; + off == frag->page_offset + skb_frag_size(frag); } return 0; } diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index b1fe7c35e8d1..bfa9ab93eda5 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -951,13 +951,12 @@ static unsigned long atalk_sum_skb(const struct sk_buff *skb, int offset, /* checksum stuff in frags */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; - + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; if (copy > len) copy = len; diff --git a/net/core/datagram.c b/net/core/datagram.c index 6449bed457d4..68bbf9f65cb0 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -324,14 +324,14 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset, /* Copy paged appendix. Hmm... why does this look so complicated? */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { int err; u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; struct page *page = skb_frag_page(frag); if (copy > len) @@ -410,14 +410,14 @@ int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset, /* Copy paged appendix. Hmm... why does this look so complicated? */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { int err; u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; struct page *page = skb_frag_page(frag); if (copy > len) @@ -500,14 +500,14 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset, /* Copy paged appendix. Hmm... why does this look so complicated? */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { int err; u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; struct page *page = skb_frag_page(frag); if (copy > len) @@ -585,15 +585,15 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { __wsum csum2; int err = 0; u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; struct page *page = skb_frag_page(frag); if (copy > len) diff --git a/net/core/dev.c b/net/core/dev.c index 8b6118a16b87..cbb5918e4fc5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3489,9 +3489,9 @@ pull: skb->data_len -= grow; skb_shinfo(skb)->frags[0].page_offset += grow; - skb_shinfo(skb)->frags[0].size -= grow; + skb_frag_size_sub(&skb_shinfo(skb)->frags[0], grow); - if (unlikely(!skb_shinfo(skb)->frags[0].size)) { + if (unlikely(!skb_frag_size(&skb_shinfo(skb)->frags[0]))) { skb_frag_unref(skb, 0); memmove(skb_shinfo(skb)->frags, skb_shinfo(skb)->frags + 1, @@ -3559,7 +3559,7 @@ void skb_gro_reset_offset(struct sk_buff *skb) !PageHighMem(skb_frag_page(&skb_shinfo(skb)->frags[0]))) { NAPI_GRO_CB(skb)->frag0 = skb_frag_address(&skb_shinfo(skb)->frags[0]); - NAPI_GRO_CB(skb)->frag0_len = skb_shinfo(skb)->frags[0].size; + NAPI_GRO_CB(skb)->frag0_len = skb_frag_size(&skb_shinfo(skb)->frags[0]); } } EXPORT_SYMBOL(skb_gro_reset_offset); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 796044ac0bf3..38d657737498 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2606,13 +2606,13 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, skb_shinfo(skb)->frags[i].page_offset = 0; /*last fragment, fill rest of data*/ if (i == (frags - 1)) - skb_shinfo(skb)->frags[i].size = - (datalen < PAGE_SIZE ? datalen : PAGE_SIZE); + skb_frag_size_set(&skb_shinfo(skb)->frags[i], + (datalen < PAGE_SIZE ? datalen : PAGE_SIZE)); else - skb_shinfo(skb)->frags[i].size = frag_len; - datalen -= skb_shinfo(skb)->frags[i].size; - skb->len += skb_shinfo(skb)->frags[i].size; - skb->data_len += skb_shinfo(skb)->frags[i].size; + skb_frag_size_set(&skb_shinfo(skb)->frags[i], frag_len); + datalen -= skb_frag_size(&skb_shinfo(skb)->frags[i]); + skb->len += skb_frag_size(&skb_shinfo(skb)->frags[i]); + skb->data_len += skb_frag_size(&skb_shinfo(skb)->frags[i]); i++; skb_shinfo(skb)->nr_frags = i; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index a7f855dca922..ce357d986251 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -659,7 +659,7 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) } vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); memcpy(page_address(page), - vaddr + f->page_offset, f->size); + vaddr + f->page_offset, skb_frag_size(f)); kunmap_skb_frag(vaddr); page->private = (unsigned long)head; head = page; @@ -1190,14 +1190,14 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len) goto drop_pages; for (; i < nfrags; i++) { - int end = offset + skb_shinfo(skb)->frags[i].size; + int end = offset + skb_frag_size(&skb_shinfo(skb)->frags[i]); if (end < len) { offset = end; continue; } - skb_shinfo(skb)->frags[i++].size = len - offset; + skb_frag_size_set(&skb_shinfo(skb)->frags[i++], len - offset); drop_pages: skb_shinfo(skb)->nr_frags = i; @@ -1306,9 +1306,11 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) /* Estimate size of pulled pages. */ eat = delta; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - if (skb_shinfo(skb)->frags[i].size >= eat) + int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); + + if (size >= eat) goto pull_pages; - eat -= skb_shinfo(skb)->frags[i].size; + eat -= size; } /* If we need update frag list, we are in troubles. @@ -1371,14 +1373,16 @@ pull_pages: eat = delta; k = 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - if (skb_shinfo(skb)->frags[i].size <= eat) { + int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); + + if (size <= eat) { skb_frag_unref(skb, i); - eat -= skb_shinfo(skb)->frags[i].size; + eat -= size; } else { skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; if (eat) { skb_shinfo(skb)->frags[k].page_offset += eat; - skb_shinfo(skb)->frags[k].size -= eat; + skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat); eat = 0; } k++; @@ -1433,7 +1437,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); if ((copy = end - offset) > 0) { u8 *vaddr; @@ -1632,7 +1636,7 @@ static int __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; if (__splice_segment(skb_frag_page(f), - f->page_offset, f->size, + f->page_offset, skb_frag_size(f), offset, len, skb, spd, 0, sk, pipe)) return 1; } @@ -1742,7 +1746,7 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) WARN_ON(start > offset + len); - end = start + frag->size; + end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { u8 *vaddr; @@ -1815,7 +1819,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset, WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); if ((copy = end - offset) > 0) { __wsum csum2; u8 *vaddr; @@ -1890,7 +1894,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); if ((copy = end - offset) > 0) { __wsum csum2; u8 *vaddr; @@ -2163,7 +2167,7 @@ static inline void skb_split_no_header(struct sk_buff *skb, skb->data_len = len - pos; for (i = 0; i < nfrags; i++) { - int size = skb_shinfo(skb)->frags[i].size; + int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); if (pos + size > len) { skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i]; @@ -2179,8 +2183,8 @@ static inline void skb_split_no_header(struct sk_buff *skb, */ skb_frag_ref(skb, i); skb_shinfo(skb1)->frags[0].page_offset += len - pos; - skb_shinfo(skb1)->frags[0].size -= len - pos; - skb_shinfo(skb)->frags[i].size = len - pos; + skb_frag_size_sub(&skb_shinfo(skb1)->frags[0], len - pos); + skb_frag_size_set(&skb_shinfo(skb)->frags[i], len - pos); skb_shinfo(skb)->nr_frags++; } k++; @@ -2258,7 +2262,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) } else { merge = to - 1; - todo -= fragfrom->size; + todo -= skb_frag_size(fragfrom); if (todo < 0) { if (skb_prepare_for_shift(skb) || skb_prepare_for_shift(tgt)) @@ -2268,8 +2272,8 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) fragfrom = &skb_shinfo(skb)->frags[from]; fragto = &skb_shinfo(tgt)->frags[merge]; - fragto->size += shiftlen; - fragfrom->size -= shiftlen; + skb_frag_size_add(fragto, shiftlen); + skb_frag_size_sub(fragfrom, shiftlen); fragfrom->page_offset += shiftlen; goto onlymerged; @@ -2293,9 +2297,9 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) fragfrom = &skb_shinfo(skb)->frags[from]; fragto = &skb_shinfo(tgt)->frags[to]; - if (todo >= fragfrom->size) { + if (todo >= skb_frag_size(fragfrom)) { *fragto = *fragfrom; - todo -= fragfrom->size; + todo -= skb_frag_size(fragfrom); from++; to++; @@ -2303,10 +2307,10 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) __skb_frag_ref(fragfrom); fragto->page = fragfrom->page; fragto->page_offset = fragfrom->page_offset; - fragto->size = todo; + skb_frag_size_set(fragto, todo); fragfrom->page_offset += todo; - fragfrom->size -= todo; + skb_frag_size_sub(fragfrom, todo); todo = 0; to++; @@ -2321,7 +2325,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) fragfrom = &skb_shinfo(skb)->frags[0]; fragto = &skb_shinfo(tgt)->frags[merge]; - fragto->size += fragfrom->size; + skb_frag_size_add(fragto, skb_frag_size(fragfrom)); __skb_frag_unref(fragfrom); } @@ -2419,7 +2423,7 @@ next_skb: while (st->frag_idx < skb_shinfo(st->cur_skb)->nr_frags) { frag = &skb_shinfo(st->cur_skb)->frags[st->frag_idx]; - block_limit = frag->size + st->stepped_offset; + block_limit = skb_frag_size(frag) + st->stepped_offset; if (abs_offset < block_limit) { if (!st->frag_data) @@ -2437,7 +2441,7 @@ next_skb: } st->frag_idx++; - st->stepped_offset += frag->size; + st->stepped_offset += skb_frag_size(frag); } if (st->frag_data) { @@ -2567,13 +2571,13 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, left = PAGE_SIZE - frag->page_offset; copy = (length > left)? left : length; - ret = getfrag(from, skb_frag_address(frag) + frag->size, + ret = getfrag(from, skb_frag_address(frag) + skb_frag_size(frag), offset, copy, 0, skb); if (ret < 0) return -EFAULT; /* copy was successful so update the size parameters */ - frag->size += copy; + skb_frag_size_add(frag, copy); skb->len += copy; skb->data_len += copy; offset += copy; @@ -2720,11 +2724,11 @@ struct sk_buff *skb_segment(struct sk_buff *skb, u32 features) while (pos < offset + len && i < nfrags) { *frag = skb_shinfo(skb)->frags[i]; __skb_frag_ref(frag); - size = frag->size; + size = skb_frag_size(frag); if (pos < offset) { frag->page_offset += offset - pos; - frag->size -= offset - pos; + skb_frag_size_sub(frag, offset - pos); } skb_shinfo(nskb)->nr_frags++; @@ -2733,7 +2737,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, u32 features) i++; pos += size; } else { - frag->size -= pos + size - (offset + len); + skb_frag_size_sub(frag, pos + size - (offset + len)); goto skip_fraglist; } @@ -2813,7 +2817,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb) } while (--i); frag->page_offset += offset; - frag->size -= offset; + skb_frag_size_sub(frag, offset); skb->truesize -= skb->data_len; skb->len -= skb->data_len; @@ -2865,7 +2869,7 @@ merge: unsigned int eat = offset - headlen; skbinfo->frags[0].page_offset += eat; - skbinfo->frags[0].size -= eat; + skb_frag_size_sub(&skbinfo->frags[0], eat); skb->data_len -= eat; skb->len -= eat; offset = headlen; @@ -2936,7 +2940,7 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len) WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); if ((copy = end - offset) > 0) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; diff --git a/net/core/user_dma.c b/net/core/user_dma.c index 34e9664cae3b..2d7cf3d52b4c 100644 --- a/net/core/user_dma.c +++ b/net/core/user_dma.c @@ -71,13 +71,13 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan, /* Copy paged appendix. Hmm... why does this look so complicated? */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; WARN_ON(start > offset + len); - end = start + skb_shinfo(skb)->frags[i].size; + end = start + skb_frag_size(frag); copy = end - offset; if (copy > 0) { - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; struct page *page = skb_frag_page(frag); if (copy > len) diff --git a/net/ipv4/inet_lro.c b/net/ipv4/inet_lro.c index 8e6be5aad115..cc280a3f4f96 100644 --- a/net/ipv4/inet_lro.c +++ b/net/ipv4/inet_lro.c @@ -244,11 +244,11 @@ static void lro_add_frags(struct net_lro_desc *lro_desc, skb->truesize += truesize; skb_frags[0].page_offset += hlen; - skb_frags[0].size -= hlen; + skb_frag_size_sub(&skb_frags[0], hlen); while (tcp_data_len > 0) { *(lro_desc->next_frag) = *skb_frags; - tcp_data_len -= skb_frags->size; + tcp_data_len -= skb_frag_size(skb_frags); lro_desc->next_frag++; skb_frags++; skb_shinfo(skb)->nr_frags++; @@ -400,14 +400,14 @@ static struct sk_buff *lro_gen_skb(struct net_lro_mgr *lro_mgr, skb_frags = skb_shinfo(skb)->frags; while (data_len > 0) { *skb_frags = *frags; - data_len -= frags->size; + data_len -= skb_frag_size(frags); skb_frags++; frags++; skb_shinfo(skb)->nr_frags++; } skb_shinfo(skb)->frags[0].page_offset += hdr_len; - skb_shinfo(skb)->frags[0].size -= hdr_len; + skb_frag_size_sub(&skb_shinfo(skb)->frags[0], hdr_len); skb->ip_summed = ip_summed; skb->csum = sum; diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 763589ad673d..fdaabf2f2b68 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -599,8 +599,8 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, head->next = clone; skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; skb_frag_list_init(head); - for (i=0; inr_frags; i++) - plen += skb_shinfo(head)->frags[i].size; + for (i = 0; i < skb_shinfo(head)->nr_frags; i++) + plen += skb_frag_size(&skb_shinfo(head)->frags[i]); clone->len = clone->data_len = head->data_len - plen; head->data_len -= clone->len; head->len -= clone->len; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index ae3bb147affd..e1374ab034bb 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1015,13 +1015,13 @@ alloc_new_skb: err = -EMSGSIZE; goto error; } - if (getfrag(from, skb_frag_address(frag)+frag->size, + if (getfrag(from, skb_frag_address(frag)+skb_frag_size(frag), offset, copy, skb->len, skb) < 0) { err = -EFAULT; goto error; } cork->off += copy; - frag->size += copy; + skb_frag_size_add(frag, copy); skb->len += copy; skb->data_len += copy; skb->truesize += copy; @@ -1230,7 +1230,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, if (len > size) len = size; if (skb_can_coalesce(skb, i, page, offset)) { - skb_shinfo(skb)->frags[i-1].size += len; + skb_frag_size_add(&skb_shinfo(skb)->frags[i-1], len); } else if (i < MAX_SKB_FRAGS) { get_page(page); skb_fill_page_desc(skb, i, page, offset, len); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 4c0da24fb649..132be081cd00 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -813,7 +813,7 @@ new_segment: goto wait_for_memory; if (can_coalesce) { - skb_shinfo(skb)->frags[i - 1].size += copy; + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); } else { get_page(page); skb_fill_page_desc(skb, i, page, offset, copy); @@ -1058,8 +1058,7 @@ new_segment: /* Update the skb. */ if (merge) { - skb_shinfo(skb)->frags[i - 1].size += - copy; + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); } else { skb_fill_page_desc(skb, i, page, off, copy); if (TCP_PAGE(sk)) { @@ -3031,8 +3030,8 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, for (i = 0; i < shi->nr_frags; ++i) { const struct skb_frag_struct *f = &shi->frags[i]; struct page *page = skb_frag_page(f); - sg_set_page(&sg, page, f->size, f->page_offset); - if (crypto_hash_update(desc, &sg, f->size)) + sg_set_page(&sg, page, skb_frag_size(f), f->page_offset); + if (crypto_hash_update(desc, &sg, skb_frag_size(f))) return 1; } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index dde6b5768316..ed96c543f1cf 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1094,14 +1094,16 @@ static void __pskb_trim_head(struct sk_buff *skb, int len) eat = len; k = 0; for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - if (skb_shinfo(skb)->frags[i].size <= eat) { + int size = skb_frag_size(&skb_shinfo(skb)->frags[i]); + + if (size <= eat) { skb_frag_unref(skb, i); - eat -= skb_shinfo(skb)->frags[i].size; + eat -= size; } else { skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; if (eat) { skb_shinfo(skb)->frags[k].page_offset += eat; - skb_shinfo(skb)->frags[k].size -= eat; + skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat); eat = 0; } k++; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1e20b64e646c..1c9bf8b5c30a 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1512,13 +1512,14 @@ alloc_new_skb: err = -EMSGSIZE; goto error; } - if (getfrag(from, skb_frag_address(frag)+frag->size, + if (getfrag(from, + skb_frag_address(frag) + skb_frag_size(frag), offset, copy, skb->len, skb) < 0) { err = -EFAULT; goto error; } sk->sk_sndmsg_off += copy; - frag->size += copy; + skb_frag_size_add(frag, copy); skb->len += copy; skb->data_len += copy; skb->truesize += copy; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 085727263812..e8762c73b170 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -378,8 +378,8 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) head->next = clone; skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; skb_frag_list_init(head); - for (i=0; inr_frags; i++) - plen += skb_shinfo(head)->frags[i].size; + for (i = 0; i < skb_shinfo(head)->nr_frags; i++) + plen += skb_frag_size(&skb_shinfo(head)->frags[i]); clone->len = clone->data_len = head->data_len - plen; head->data_len -= clone->len; head->len -= clone->len; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 7b954e2539d0..cc22099ac8b6 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -464,8 +464,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->next = clone; skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; skb_frag_list_init(head); - for (i=0; inr_frags; i++) - plen += skb_shinfo(head)->frags[i].size; + for (i = 0; i < skb_shinfo(head)->nr_frags; i++) + plen += skb_frag_size(&skb_shinfo(head)->frags[i]); clone->len = clone->data_len = head->data_len - plen; head->data_len -= clone->len; head->len -= clone->len; diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index f781b9ab8a54..e5246fbe36c4 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -90,7 +90,7 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) len = dlen; frag->page_offset = 0; - frag->size = len; + skb_frag_size_set(frag, len); memcpy(skb_frag_address(frag), scratch, len); skb->truesize += len; -- cgit v1.2.3 From 3d153a7c8b23031df35e61377c0600a6c96a8b0b Mon Sep 17 00:00:00 2001 From: Andy Fleming Date: Thu, 13 Oct 2011 04:33:54 +0000 Subject: net: Allow skb_recycle_check to be done in stages skb_recycle_check resets the skb if it's eligible for recycling. However, there are times when a driver might want to optionally manipulate the skb data with the skb before resetting the skb, but after it has determined eligibility. We do this by splitting the eligibility check from the skb reset, creating two inline functions to accomplish that task. Signed-off-by: Andy Fleming Acked-by: David Daney Signed-off-by: David S. Miller --- include/linux/skbuff.h | 21 +++++++++++++++++++++ net/core/skbuff.c | 51 +++++++++++++++++++++++++------------------------- 2 files changed, 47 insertions(+), 25 deletions(-) (limited to 'net/core') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 6fcbbbd12ceb..77ddf2de712f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -550,6 +550,7 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, return __alloc_skb(size, priority, 1, NUMA_NO_NODE); } +extern void skb_recycle(struct sk_buff *skb); extern bool skb_recycle_check(struct sk_buff *skb, int skb_size); extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); @@ -2484,5 +2485,25 @@ static inline void skb_checksum_none_assert(struct sk_buff *skb) bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off); +static inline bool skb_is_recycleable(struct sk_buff *skb, int skb_size) +{ + if (irqs_disabled()) + return false; + + if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) + return false; + + if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE) + return false; + + skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD); + if (skb_end_pointer(skb) - skb->head < skb_size) + return false; + + if (skb_shared(skb) || skb_cloned(skb)) + return false; + + return true; +} #endif /* __KERNEL__ */ #endif /* _LINUX_SKBUFF_H */ diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ce357d986251..e27104039a39 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -484,6 +484,30 @@ void consume_skb(struct sk_buff *skb) } EXPORT_SYMBOL(consume_skb); +/** + * skb_recycle - clean up an skb for reuse + * @skb: buffer + * + * Recycles the skb to be reused as a receive buffer. This + * function does any necessary reference count dropping, and + * cleans up the skbuff as if it just came from __alloc_skb(). + */ +void skb_recycle(struct sk_buff *skb) +{ + struct skb_shared_info *shinfo; + + skb_release_head_state(skb); + + shinfo = skb_shinfo(skb); + memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); + atomic_set(&shinfo->dataref, 1); + + memset(skb, 0, offsetof(struct sk_buff, tail)); + skb->data = skb->head + NET_SKB_PAD; + skb_reset_tail_pointer(skb); +} +EXPORT_SYMBOL(skb_recycle); + /** * skb_recycle_check - check if skb can be reused for receive * @skb: buffer @@ -498,33 +522,10 @@ EXPORT_SYMBOL(consume_skb); */ bool skb_recycle_check(struct sk_buff *skb, int skb_size) { - struct skb_shared_info *shinfo; - - if (irqs_disabled()) - return false; - - if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) - return false; - - if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE) - return false; - - skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD); - if (skb_end_pointer(skb) - skb->head < skb_size) - return false; - - if (skb_shared(skb) || skb_cloned(skb)) + if (!skb_is_recycleable(skb, skb_size)) return false; - skb_release_head_state(skb); - - shinfo = skb_shinfo(skb); - memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); - atomic_set(&shinfo->dataref, 1); - - memset(skb, 0, offsetof(struct sk_buff, tail)); - skb->data = skb->head + NET_SKB_PAD; - skb_reset_tail_pointer(skb); + skb_recycle(skb); return true; } -- cgit v1.2.3 From 850a545bd8a41648445bfc5541afe36ca105b0b8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 13 Oct 2011 22:25:23 +0000 Subject: net: Move rcu_barrier from rollback_registered_many to netdev_run_todo. This patch moves the rcu_barrier from rollback_registered_many (inside the rtnl_lock) into netdev_run_todo (just outside the rtnl_lock). This allows us to gain the full benefit of sychronize_net calling synchronize_rcu_expedited when the rtnl_lock is held. The rcu_barrier in rollback_registered_many was originally a synchronize_net but was promoted to be a rcu_barrier() when it was found that people were unnecessarily hitting the 250ms wait in netdev_wait_allrefs(). Changing the rcu_barrier back to a synchronize_net is therefore safe. Since we only care about waiting for the rcu callbacks before we get to netdev_wait_allrefs() it is also safe to move the wait into netdev_run_todo. This was tested by creating and destroying 1000 tap devices and observing /proc/lock_stat. /proc/lock_stat reports this change reduces the hold times of the rtnl_lock by a factor of 10. There was no observable difference in the amount of time it takes to destroy a network device. Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/core/dev.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index cbb5918e4fc5..09aef266d4d1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5235,7 +5235,7 @@ static void rollback_registered_many(struct list_head *head) dev = list_first_entry(head, struct net_device, unreg_list); call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); - rcu_barrier(); + synchronize_net(); list_for_each_entry(dev, head, unreg_list) dev_put(dev); @@ -5748,6 +5748,12 @@ void netdev_run_todo(void) __rtnl_unlock(); + /* Wait for rcu callbacks to finish before attempting to drain + * the device list. This usually avoids a 250ms wait. + */ + if (!list_empty(&list)) + rcu_barrier(); + while (!list_empty(&list)) { struct net_device *dev = list_first_entry(&list, struct net_device, todo_list); -- cgit v1.2.3 From 4dc360c5e7e155373bffbb3c1f7ea0022dee650c Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Wed, 19 Oct 2011 17:00:35 -0400 Subject: net: validate HWTSTAMP ioctl parameters This patch adds a sanity check on the values provided by user space for the hardware time stamping configuration. If the values lie outside of the absolute limits, then the ioctl request will be denied. Signed-off-by: Richard Cochran Signed-off-by: David S. Miller --- include/linux/net_tstamp.h | 4 ++-- net/core/dev.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/include/linux/net_tstamp.h b/include/linux/net_tstamp.h index 3df0984cd0d5..ae5df122e42f 100644 --- a/include/linux/net_tstamp.h +++ b/include/linux/net_tstamp.h @@ -45,7 +45,7 @@ struct hwtstamp_config { }; /* possible values for hwtstamp_config->tx_type */ -enum { +enum hwtstamp_tx_types { /* * No outgoing packet will need hardware time stamping; * should a packet arrive which asks for it, no hardware @@ -72,7 +72,7 @@ enum { }; /* possible values for hwtstamp_config->rx_filter */ -enum { +enum hwtstamp_rx_filters { /* time stamp no incoming packet at all */ HWTSTAMP_FILTER_NONE, diff --git a/net/core/dev.c b/net/core/dev.c index 09aef266d4d1..40d439524f49 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -136,6 +136,7 @@ #include #include #include +#include #include "net-sysfs.h" @@ -1477,6 +1478,57 @@ static inline void net_timestamp_check(struct sk_buff *skb) __net_timestamp(skb); } +static int net_hwtstamp_validate(struct ifreq *ifr) +{ + struct hwtstamp_config cfg; + enum hwtstamp_tx_types tx_type; + enum hwtstamp_rx_filters rx_filter; + int tx_type_valid = 0; + int rx_filter_valid = 0; + + if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) + return -EFAULT; + + if (cfg.flags) /* reserved for future extensions */ + return -EINVAL; + + tx_type = cfg.tx_type; + rx_filter = cfg.rx_filter; + + switch (tx_type) { + case HWTSTAMP_TX_OFF: + case HWTSTAMP_TX_ON: + case HWTSTAMP_TX_ONESTEP_SYNC: + tx_type_valid = 1; + break; + } + + switch (rx_filter) { + case HWTSTAMP_FILTER_NONE: + case HWTSTAMP_FILTER_ALL: + case HWTSTAMP_FILTER_SOME: + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + rx_filter_valid = 1; + break; + } + + if (!tx_type_valid || !rx_filter_valid) + return -ERANGE; + + return 0; +} + static inline bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb) { @@ -4921,6 +4973,12 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) ifr->ifr_newname[IFNAMSIZ-1] = '\0'; return dev_change_name(dev, ifr->ifr_newname); + case SIOCSHWTSTAMP: + err = net_hwtstamp_validate(ifr); + if (err) + return err; + /* fall through */ + /* * Unknown or private ioctl */ -- cgit v1.2.3 From 4f25af27827080c3163e59c7af1ca84a05ce121c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 17 Oct 2011 21:04:20 +0000 Subject: filter: use unsigned int to silence static checker warning This is just a cleanup. My testing version of Smatch warns about this: net/core/filter.c +380 check_load_and_stores(6) warn: check 'flen' for negative values flen comes from the user. We try to clamp the values here between 1 and BPF_MAXINSNS but the clamp doesn't work because it could be negative. This is a bug, but it's not exploitable. Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- include/linux/filter.h | 2 +- net/core/filter.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net/core') diff --git a/include/linux/filter.h b/include/linux/filter.h index 741956fa5bfd..8eeb205f298b 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -155,7 +155,7 @@ extern unsigned int sk_run_filter(const struct sk_buff *skb, const struct sock_filter *filter); extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); extern int sk_detach_filter(struct sock *sk); -extern int sk_chk_filter(struct sock_filter *filter, int flen); +extern int sk_chk_filter(struct sock_filter *filter, unsigned int flen); #ifdef CONFIG_BPF_JIT extern void bpf_jit_compile(struct sk_filter *fp); diff --git a/net/core/filter.c b/net/core/filter.c index 8fcc2d776e09..5dea45279215 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -436,7 +436,7 @@ error: * * Returns 0 if the rule set is legal or -EINVAL if not. */ -int sk_chk_filter(struct sock_filter *filter, int flen) +int sk_chk_filter(struct sock_filter *filter, unsigned int flen) { /* * Valid instructions are initialized to non-0. -- cgit v1.2.3 From e049f28883126c689cf95859480d9ee4ab23b7fa Mon Sep 17 00:00:00 2001 From: "roy.qing.li@gmail.com" Date: Mon, 17 Oct 2011 22:32:42 +0000 Subject: neigh: fix rcu splat in neigh_update() when use dst_get_neighbour to get neighbour, we need rcu_read_lock to protect, since dst_get_neighbour uses rcu_dereference. The bug was reported by Ari Savolainen [ 105.612095] [ 105.612096] =================================================== [ 105.612100] [ INFO: suspicious rcu_dereference_check() usage. ] [ 105.612101] --------------------------------------------------- [ 105.612103] include/net/dst.h:91 invoked rcu_dereference_check() without protection! [ 105.612105] [ 105.612106] other info that might help us debug this: [ 105.612106] [ 105.612108] [ 105.612108] rcu_scheduler_active = 1, debug_locks = 0 [ 105.612110] 1 lock held by dnsmasq/2618: [ 105.612111] #0: (rtnl_mutex){+.+.+.}, at: [] rtnl_lock+0x17/0x20 [ 105.612120] [ 105.612121] stack backtrace: [ 105.612123] Pid: 2618, comm: dnsmasq Not tainted 3.1.0-rc1 #41 [ 105.612125] Call Trace: [ 105.612129] [] lockdep_rcu_dereference+0xbb/0xc0 [ 105.612132] [] neigh_update+0x4f9/0x5f0 [ 105.612135] [] ? neigh_lookup+0xe1/0x220 [ 105.612139] [] arp_req_set+0xb8/0x230 [ 105.612142] [] arp_ioctl+0x1bf/0x310 [ 105.612146] [] ? lock_hrtimer_base.isra.26+0x30/0x60 [ 105.612150] [] inet_ioctl+0x85/0x90 [ 105.612154] [] sock_do_ioctl+0x30/0x70 [ 105.612157] [] sock_ioctl+0x73/0x280 [ 105.612162] [] do_vfs_ioctl+0x98/0x570 [ 105.612165] [] ? fget_light+0x340/0x3a0 [ 105.612168] [] sys_ioctl+0x4f/0x80 [ 105.612172] [] system_call_fastpath+0x16/0x1b Reported-by: Ari Savolainen Signed-off-by: RongQing Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/neighbour.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/core') diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 43449649cf73..909ecb3c2a33 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1168,10 +1168,14 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, struct dst_entry *dst = skb_dst(skb); struct neighbour *n2, *n1 = neigh; write_unlock_bh(&neigh->lock); + + rcu_read_lock(); /* On shaper/eql skb->dst->neighbour != neigh :( */ if (dst && (n2 = dst_get_neighbour(dst)) != NULL) n1 = n2; n1->output(n1, skb); + rcu_read_unlock(); + write_lock_bh(&neigh->lock); } skb_queue_purge(&neigh->arp_queue); -- cgit v1.2.3 From a0bec1cd8f7ac966f2885f7e56ec44df87bab738 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Tue, 18 Oct 2011 22:55:11 +0000 Subject: net: do not take an additional reference in skb_frag_set_page I audited all of the callers in the tree and only one of them (pktgen) expects it to do so. Taking this reference is pretty obviously confusing and error prone. In particular I looked at the following commits which switched callers of (__)skb_frag_set_page to the skb paged fragment api: 6a930b9f163d7e6d9ef692e05616c4ede65038ec cxgb3: convert to SKB paged frag API. 5dc3e196ea21e833128d51eb5b788a070fea1f28 myri10ge: convert to SKB paged frag API. 0e0634d20dd670a89af19af2a686a6cce943ac14 vmxnet3: convert to SKB paged frag API. 86ee8130a46769f73f8f423f99dbf782a09f9233 virtionet: convert to SKB paged frag API. 4a22c4c919c201c2a7f4ee09e672435a3072d875 sfc: convert to SKB paged frag API. 18324d690d6a5028e3c174fc1921447aedead2b8 cassini: convert to SKB paged frag API. b061b39e3ae18ad75466258cf2116e18fa5bbd80 benet: convert to SKB paged frag API. b7b6a688d217936459ff5cf1087b2361db952509 bnx2: convert to SKB paged frag API. 804cf14ea5ceca46554d5801e2817bba8116b7e5 net: xfrm: convert to SKB frag APIs ea2ab69379a941c6f8884e290fdd28c93936a778 net: convert core to skb paged frag APIs Signed-off-by: Ian Campbell Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 - net/core/pktgen.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'net/core') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 77ddf2de712f..1ebf1ea29d60 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1786,7 +1786,6 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag) static inline void __skb_frag_set_page(skb_frag_t *frag, struct page *page) { frag->page = page; - __skb_frag_ref(frag); } /** diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 38d657737498..6bbf00801f61 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2602,6 +2602,7 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, if (!pkt_dev->page) break; } + get_page(pkt_dev->page); skb_frag_set_page(skb, i, pkt_dev->page); skb_shinfo(skb)->frags[i].page_offset = 0; /*last fragment, fill rest of data*/ -- cgit v1.2.3 From 33136d12be00596a8320d4fe88f95c44f67afbdb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 20 Oct 2011 17:00:21 -0400 Subject: pktgen: remove ndelay() call Daniel Turull reported inaccuracies in pktgen when using low packet rates, because we call ndelay(val) with values bigger than 20000. Instead of calling ndelay() for delays < 100us, we can instead loop calling ktime_now() only. Reported-by: Daniel Turull Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/pktgen.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'net/core') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 6bbf00801f61..0001c243b35c 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2145,9 +2145,12 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) } start_time = ktime_now(); - if (remaining < 100000) - ndelay(remaining); /* really small just spin */ - else { + if (remaining < 100000) { + /* for small delays (<100us), just loop until limit is reached */ + do { + end_time = ktime_now(); + } while (ktime_lt(end_time, spin_until)); + } else { /* see do_nanosleep */ hrtimer_init_sleeper(&t, current); do { @@ -2162,8 +2165,8 @@ static void spin(struct pktgen_dev *pkt_dev, ktime_t spin_until) hrtimer_cancel(&t.timer); } while (t.task && pkt_dev->running && !signal_pending(current)); __set_current_state(TASK_RUNNING); + end_time = ktime_now(); } - end_time = ktime_now(); pkt_dev->idle_acc += ktime_to_ns(ktime_sub(end_time, start_time)); pkt_dev->next_tx = ktime_add_ns(spin_until, pkt_dev->delay); -- cgit v1.2.3 From a8605c6063f785858c1bc431d0bfe66c41e71cfa Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Wed, 19 Oct 2011 23:01:49 +0000 Subject: net: add opaque struct around skb frag page I've split this bit out of the skb frag destructor patch since it helps enforce the use of the fragment API. Signed-off-by: Ian Campbell Signed-off-by: David S. Miller --- include/linux/skbuff.h | 10 ++++++---- net/core/skbuff.c | 6 +++--- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'net/core') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 3411f22e7d16..94a23ffcc073 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -140,7 +140,9 @@ struct sk_buff; typedef struct skb_frag_struct skb_frag_t; struct skb_frag_struct { - struct page *page; + struct { + struct page *p; + } page; #if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) __u32 page_offset; __u32 size; @@ -1175,7 +1177,7 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - frag->page = page; + frag->page.p = page; frag->page_offset = off; skb_frag_size_set(frag, size); } @@ -1699,7 +1701,7 @@ static inline void netdev_free_page(struct net_device *dev, struct page *page) */ static inline struct page *skb_frag_page(const skb_frag_t *frag) { - return frag->page; + return frag->page.p; } /** @@ -1785,7 +1787,7 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag) */ static inline void __skb_frag_set_page(skb_frag_t *frag, struct page *page) { - frag->page = page; + frag->page.p = page; } /** diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e27104039a39..ca4db40e75b8 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -668,14 +668,14 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) /* skb frags release userspace buffers */ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - put_page(skb_shinfo(skb)->frags[i].page); + skb_frag_unref(skb, i); uarg->callback(uarg); /* skb frags point to kernel buffers */ for (i = skb_shinfo(skb)->nr_frags; i > 0; i--) { - skb_shinfo(skb)->frags[i - 1].page_offset = 0; - skb_shinfo(skb)->frags[i - 1].page = head; + __skb_fill_page_desc(skb, i-1, head, 0, + skb_shinfo(skb)->frags[i - 1].size); head = (struct page *)head->private; } -- cgit v1.2.3 From f04565ddf52e401880f8ba51de0dff8ba51c99fd Mon Sep 17 00:00:00 2001 From: Mihai Maruseac Date: Thu, 20 Oct 2011 20:45:10 +0000 Subject: dev: use name hash for dev_seq_ops Instead of using the dev->next chain and trying to resync at each call to dev_seq_start, use the name hash, keeping the bucket and the offset in seq->private field. Tests revealed the following results for ifconfig > /dev/null * 1000 interfaces: * 0.114s without patch * 0.089s with patch * 3000 interfaces: * 0.489s without patch * 0.110s with patch * 5000 interfaces: * 1.363s without patch * 0.250s with patch * 128000 interfaces (other setup): * ~100s without patch * ~30s with patch Signed-off-by: Mihai Maruseac Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/dev.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 69 insertions(+), 15 deletions(-) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index 40d439524f49..ad5d7027c545 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4093,6 +4093,60 @@ static int dev_ifconf(struct net *net, char __user *arg) } #ifdef CONFIG_PROC_FS + +#define BUCKET_SPACE (32 - NETDEV_HASHBITS) + +struct dev_iter_state { + struct seq_net_private p; + unsigned int pos; /* bucket << BUCKET_SPACE + offset */ +}; + +#define get_bucket(x) ((x) >> BUCKET_SPACE) +#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1)) +#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o)) + +static inline struct net_device *dev_from_same_bucket(struct seq_file *seq) +{ + struct dev_iter_state *state = seq->private; + struct net *net = seq_file_net(seq); + struct net_device *dev; + struct hlist_node *p; + struct hlist_head *h; + unsigned int count, bucket, offset; + + bucket = get_bucket(state->pos); + offset = get_offset(state->pos); + h = &net->dev_name_head[bucket]; + count = 0; + hlist_for_each_entry_rcu(dev, p, h, name_hlist) { + if (count++ == offset) { + state->pos = set_bucket_offset(bucket, count); + return dev; + } + } + + return NULL; +} + +static inline struct net_device *dev_from_new_bucket(struct seq_file *seq) +{ + struct dev_iter_state *state = seq->private; + struct net_device *dev; + unsigned int bucket; + + bucket = get_bucket(state->pos); + do { + dev = dev_from_same_bucket(seq); + if (dev) + return dev; + + bucket++; + state->pos = set_bucket_offset(bucket, 0); + } while (bucket < NETDEV_HASHENTRIES); + + return NULL; +} + /* * This is invoked by the /proc filesystem handler to display a device * in detail. @@ -4100,33 +4154,33 @@ static int dev_ifconf(struct net *net, char __user *arg) void *dev_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { - struct net *net = seq_file_net(seq); - loff_t off; - struct net_device *dev; + struct dev_iter_state *state = seq->private; rcu_read_lock(); if (!*pos) return SEQ_START_TOKEN; - off = 1; - for_each_netdev_rcu(net, dev) - if (off++ == *pos) - return dev; + /* check for end of the hash */ + if (state->pos == 0 && *pos > 1) + return NULL; - return NULL; + return dev_from_new_bucket(seq); } void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct net_device *dev = v; + struct net_device *dev; + + ++*pos; if (v == SEQ_START_TOKEN) - dev = first_net_device_rcu(seq_file_net(seq)); - else - dev = next_net_device_rcu(dev); + return dev_from_new_bucket(seq); - ++*pos; - return dev; + dev = dev_from_same_bucket(seq); + if (dev) + return dev; + + return dev_from_new_bucket(seq); } void dev_seq_stop(struct seq_file *seq, void *v) @@ -4225,7 +4279,7 @@ static const struct seq_operations dev_seq_ops = { static int dev_seq_open(struct inode *inode, struct file *file) { return seq_open_net(inode, file, &dev_seq_ops, - sizeof(struct seq_net_private)); + sizeof(struct dev_iter_state)); } static const struct file_operations dev_seq_fops = { -- cgit v1.2.3 From cf533ea53ebfae41be15b103d78e7ebec30b9969 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 21 Oct 2011 05:22:42 -0400 Subject: tcp: add const qualifiers where possible Adding const qualifiers to pointers can ease code review, and spot some bugs. It might allow compiler to optimize code further. For example, is it legal to temporary write a null cksum into tcphdr in tcp_md5_hash_header() ? I am afraid a sniffer could catch the temporary null value... Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/secure_seq.h | 2 +- include/net/tcp.h | 43 +++++++++--------- net/core/secure_seq.c | 2 +- net/ipv4/syncookies.c | 2 +- net/ipv4/tcp.c | 18 ++++---- net/ipv4/tcp_input.c | 110 ++++++++++++++++++++++++----------------------- net/ipv4/tcp_ipv4.c | 26 +++++------ net/ipv4/tcp_minisocks.c | 4 +- net/ipv4/tcp_output.c | 72 ++++++++++++++++--------------- net/ipv6/syncookies.c | 6 +-- net/ipv6/tcp_ipv6.c | 27 ++++++------ 11 files changed, 160 insertions(+), 152 deletions(-) (limited to 'net/core') diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h index d97f6892c019..c2e542b27a5a 100644 --- a/include/net/secure_seq.h +++ b/include/net/secure_seq.h @@ -10,7 +10,7 @@ extern u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, __be16 dport); extern __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport); -extern __u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr, +extern __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, __be16 sport, __be16 dport); extern u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, __be16 sport, __be16 dport); diff --git a/include/net/tcp.h b/include/net/tcp.h index 0113d306fcb0..3edef0bebdd1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -327,9 +327,9 @@ extern int tcp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags); extern int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg); extern int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, unsigned len); + const struct tcphdr *th, unsigned int len); extern int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, unsigned len); + const struct tcphdr *th, unsigned int len); extern void tcp_rcv_space_adjust(struct sock *sk); extern void tcp_cleanup_rbuf(struct sock *sk, int copied); extern int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp); @@ -401,10 +401,10 @@ extern void tcp_set_keepalive(struct sock *sk, int val); extern void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req); extern int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); -extern void tcp_parse_options(struct sk_buff *skb, - struct tcp_options_received *opt_rx, u8 **hvpp, +extern void tcp_parse_options(const struct sk_buff *skb, + struct tcp_options_received *opt_rx, const u8 **hvpp, int estab); -extern u8 *tcp_parse_md5sig_option(struct tcphdr *th); +extern const u8 *tcp_parse_md5sig_option(const struct tcphdr *th); /* * TCP v4 functions exported for the inet6 API @@ -450,7 +450,7 @@ extern bool cookie_check_timestamp(struct tcp_options_received *opt, bool *); /* From net/ipv6/syncookies.c */ extern struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb); #ifdef CONFIG_SYN_COOKIES -extern __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, +extern __u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mss); #else static inline __u32 cookie_v6_init_sequence(struct sock *sk, @@ -522,7 +522,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) } /* tcp.c */ -extern void tcp_get_info(struct sock *, struct tcp_info *); +extern void tcp_get_info(const struct sock *, struct tcp_info *); /* Read 'sendfile()'-style from a TCP socket */ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, @@ -532,8 +532,8 @@ extern int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, extern void tcp_initialize_rcv_mss(struct sock *sk); -extern int tcp_mtu_to_mss(struct sock *sk, int pmtu); -extern int tcp_mss_to_mtu(struct sock *sk, int mss); +extern int tcp_mtu_to_mss(const struct sock *sk, int pmtu); +extern int tcp_mss_to_mtu(const struct sock *sk, int mss); extern void tcp_mtup_init(struct sock *sk); extern void tcp_valid_rtt_meas(struct sock *sk, u32 seq_rtt); @@ -574,7 +574,7 @@ static inline void tcp_fast_path_check(struct sock *sk) /* Compute the actual rto_min value */ static inline u32 tcp_rto_min(struct sock *sk) { - struct dst_entry *dst = __sk_dst_get(sk); + const struct dst_entry *dst = __sk_dst_get(sk); u32 rto_min = TCP_RTO_MIN; if (dst && dst_metric_locked(dst, RTAX_RTO_MIN)) @@ -820,6 +820,7 @@ static inline bool tcp_in_initial_slowstart(const struct tcp_sock *tp) static inline __u32 tcp_current_ssthresh(const struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); + if ((1 << inet_csk(sk)->icsk_ca_state) & (TCPF_CA_CWR | TCPF_CA_Recovery)) return tp->snd_ssthresh; else @@ -832,7 +833,7 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) #define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out) extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh); -extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst); +extern __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst); /* Slow start with delack produces 3 packets of burst, so that * it is safe "de facto". This will be the default - same as @@ -861,7 +862,7 @@ static inline void tcp_minshall_update(struct tcp_sock *tp, unsigned int mss, static inline void tcp_check_probe_timer(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); if (!tp->packets_out && !icsk->icsk_pending) @@ -1209,10 +1210,10 @@ extern struct tcp_md5sig_pool *tcp_get_md5sig_pool(void); extern void tcp_put_md5sig_pool(void); extern int tcp_md5_hash_header(struct tcp_md5sig_pool *, struct tcphdr *); -extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, struct sk_buff *, +extern int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *, const struct sk_buff *, unsigned header_len); extern int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, - struct tcp_md5sig_key *key); + const struct tcp_md5sig_key *key); /* write queue abstraction */ static inline void tcp_write_queue_purge(struct sock *sk) @@ -1225,22 +1226,24 @@ static inline void tcp_write_queue_purge(struct sock *sk) tcp_clear_all_retrans_hints(tcp_sk(sk)); } -static inline struct sk_buff *tcp_write_queue_head(struct sock *sk) +static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk) { return skb_peek(&sk->sk_write_queue); } -static inline struct sk_buff *tcp_write_queue_tail(struct sock *sk) +static inline struct sk_buff *tcp_write_queue_tail(const struct sock *sk) { return skb_peek_tail(&sk->sk_write_queue); } -static inline struct sk_buff *tcp_write_queue_next(struct sock *sk, struct sk_buff *skb) +static inline struct sk_buff *tcp_write_queue_next(const struct sock *sk, + const struct sk_buff *skb) { return skb_queue_next(&sk->sk_write_queue, skb); } -static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_buff *skb) +static inline struct sk_buff *tcp_write_queue_prev(const struct sock *sk, + const struct sk_buff *skb) { return skb_queue_prev(&sk->sk_write_queue, skb); } @@ -1254,7 +1257,7 @@ static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_bu #define tcp_for_write_queue_from_safe(skb, tmp, sk) \ skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp) -static inline struct sk_buff *tcp_send_head(struct sock *sk) +static inline struct sk_buff *tcp_send_head(const struct sock *sk) { return sk->sk_send_head; } @@ -1265,7 +1268,7 @@ static inline bool tcp_skb_is_last(const struct sock *sk, return skb_queue_is_last(&sk->sk_write_queue, skb); } -static inline void tcp_advance_send_head(struct sock *sk, struct sk_buff *skb) +static inline void tcp_advance_send_head(struct sock *sk, const struct sk_buff *skb) { if (tcp_skb_is_last(sk, skb)) sk->sk_send_head = NULL; diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 45329d7c9dd9..025233de25f9 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -35,7 +35,7 @@ static u32 seq_scale(u32 seq) } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -__u32 secure_tcpv6_sequence_number(__be32 *saddr, __be32 *daddr, +__u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, __be16 sport, __be16 dport) { u32 secret[MD5_MESSAGE_BYTES / 4]; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 3bc5c8f7c71b..d7b89b12f6d8 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -265,7 +265,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt) { struct tcp_options_received tcp_opt; - u8 *hash_location; + const u8 *hash_location; struct inet_request_sock *ireq; struct tcp_request_sock *treq; struct tcp_sock *tp = tcp_sk(sk); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 132be081cd00..704adad8f07f 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -374,7 +374,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) { unsigned int mask; struct sock *sk = sock->sk; - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == TCP_LISTEN) @@ -528,7 +528,7 @@ static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) tp->pushed_seq = tp->write_seq; } -static inline int forced_push(struct tcp_sock *tp) +static inline int forced_push(const struct tcp_sock *tp) { return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1)); } @@ -891,9 +891,9 @@ EXPORT_SYMBOL(tcp_sendpage); #define TCP_PAGE(sk) (sk->sk_sndmsg_page) #define TCP_OFF(sk) (sk->sk_sndmsg_off) -static inline int select_size(struct sock *sk, int sg) +static inline int select_size(const struct sock *sk, int sg) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); int tmp = tp->mss_cache; if (sg) { @@ -2408,7 +2408,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { - struct inet_connection_sock *icsk = inet_csk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); if (level != SOL_TCP) return icsk->icsk_af_ops->setsockopt(sk, level, optname, @@ -2430,9 +2430,9 @@ EXPORT_SYMBOL(compat_tcp_setsockopt); #endif /* Return information about state of tcp endpoint in API format. */ -void tcp_get_info(struct sock *sk, struct tcp_info *info) +void tcp_get_info(const struct sock *sk, struct tcp_info *info) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp; @@ -3010,7 +3010,7 @@ int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, EXPORT_SYMBOL(tcp_md5_hash_header); int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, - struct sk_buff *skb, unsigned header_len) + const struct sk_buff *skb, unsigned int header_len) { struct scatterlist sg; const struct tcphdr *tp = tcp_hdr(skb); @@ -3043,7 +3043,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, } EXPORT_SYMBOL(tcp_md5_hash_skb_data); -int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) +int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key) { struct scatterlist sg; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 69a90b839984..52b5c2d0ecd0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -206,7 +206,7 @@ static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp) tp->ecn_flags |= TCP_ECN_QUEUE_CWR; } -static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, struct sk_buff *skb) +static inline void TCP_ECN_accept_cwr(struct tcp_sock *tp, const struct sk_buff *skb) { if (tcp_hdr(skb)->cwr) tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR; @@ -239,19 +239,19 @@ static inline void TCP_ECN_check_ce(struct tcp_sock *tp, const struct sk_buff *s } } -static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th) +static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, const struct tcphdr *th) { if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr)) tp->ecn_flags &= ~TCP_ECN_OK; } -static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th) +static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, const struct tcphdr *th) { if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr)) tp->ecn_flags &= ~TCP_ECN_OK; } -static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th) +static inline int TCP_ECN_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr *th) { if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK)) return 1; @@ -315,7 +315,7 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb) return 0; } -static void tcp_grow_window(struct sock *sk, struct sk_buff *skb) +static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -429,7 +429,7 @@ static void tcp_clamp_window(struct sock *sk) */ void tcp_initialize_rcv_mss(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); hint = min(hint, tp->rcv_wnd / 2); @@ -824,7 +824,7 @@ void tcp_update_metrics(struct sock *sk) } } -__u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) +__u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst) { __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); @@ -1216,7 +1216,7 @@ static void tcp_mark_lost_retrans(struct sock *sk) tp->lost_retrans_low = new_low_seq; } -static int tcp_check_dsack(struct sock *sk, struct sk_buff *ack_skb, +static int tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, struct tcp_sack_block_wire *sp, int num_sacks, u32 prior_snd_una) { @@ -1310,7 +1310,7 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, return in_sack; } -static u8 tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, +static u8 tcp_sacktag_one(const struct sk_buff *skb, struct sock *sk, struct tcp_sacktag_state *state, int dup_sack, int pcount) { @@ -1465,13 +1465,13 @@ static int tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, /* I wish gso_size would have a bit more sane initialization than * something-or-zero which complicates things */ -static int tcp_skb_seglen(struct sk_buff *skb) +static int tcp_skb_seglen(const struct sk_buff *skb) { return tcp_skb_pcount(skb) == 1 ? skb->len : tcp_skb_mss(skb); } /* Shifting pages past head area doesn't work */ -static int skb_can_shift(struct sk_buff *skb) +static int skb_can_shift(const struct sk_buff *skb) { return !skb_headlen(skb) && skb_is_nonlinear(skb); } @@ -1720,19 +1720,19 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb, return skb; } -static int tcp_sack_cache_ok(struct tcp_sock *tp, struct tcp_sack_block *cache) +static int tcp_sack_cache_ok(const struct tcp_sock *tp, const struct tcp_sack_block *cache) { return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache); } static int -tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, +tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, u32 prior_snd_una) { const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - unsigned char *ptr = (skb_transport_header(ack_skb) + - TCP_SKB_CB(ack_skb)->sacked); + const unsigned char *ptr = (skb_transport_header(ack_skb) + + TCP_SKB_CB(ack_skb)->sacked); struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2); struct tcp_sack_block sp[TCP_NUM_SACKS]; struct tcp_sack_block *cache; @@ -2296,7 +2296,7 @@ static int tcp_check_sack_reneging(struct sock *sk, int flag) return 0; } -static inline int tcp_fackets_out(struct tcp_sock *tp) +static inline int tcp_fackets_out(const struct tcp_sock *tp) { return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out; } @@ -2316,19 +2316,20 @@ static inline int tcp_fackets_out(struct tcp_sock *tp) * they differ. Since neither occurs due to loss, TCP should really * ignore them. */ -static inline int tcp_dupack_heuristics(struct tcp_sock *tp) +static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) { return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; } -static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) +static inline int tcp_skb_timedout(const struct sock *sk, + const struct sk_buff *skb) { return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto; } -static inline int tcp_head_timedout(struct sock *sk) +static inline int tcp_head_timedout(const struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); return tp->packets_out && tcp_skb_timedout(sk, tcp_write_queue_head(sk)); @@ -2639,7 +2640,7 @@ static void tcp_cwnd_down(struct sock *sk, int flag) /* Nothing was retransmitted or returned timestamp is less * than timestamp of the first retransmission. */ -static inline int tcp_packet_delayed(struct tcp_sock *tp) +static inline int tcp_packet_delayed(const struct tcp_sock *tp) { return !tp->retrans_stamp || (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && @@ -2700,7 +2701,7 @@ static void tcp_undo_cwr(struct sock *sk, const bool undo_ssthresh) tp->snd_cwnd_stamp = tcp_time_stamp; } -static inline int tcp_may_undo(struct tcp_sock *tp) +static inline int tcp_may_undo(const struct tcp_sock *tp) { return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp)); } @@ -2764,9 +2765,9 @@ static void tcp_try_undo_dsack(struct sock *sk) * that successive retransmissions of a segment must not advance * retrans_stamp under any conditions. */ -static int tcp_any_retrans_done(struct sock *sk) +static int tcp_any_retrans_done(const struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; if (tp->retrans_out) @@ -3245,7 +3246,7 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) */ static void tcp_rearm_rto(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); if (!tp->packets_out) { inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); @@ -3497,7 +3498,7 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp, * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2 * and in FreeBSD. NetBSD's one is even worse.) is wrong. */ -static int tcp_ack_update_window(struct sock *sk, struct sk_buff *skb, u32 ack, +static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 ack, u32 ack_seq) { struct tcp_sock *tp = tcp_sk(sk); @@ -3673,7 +3674,7 @@ static int tcp_process_frto(struct sock *sk, int flag) } /* This routine deals with incoming acks, but not outgoing ones. */ -static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) +static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); @@ -3810,14 +3811,14 @@ old_ack: * But, this can also be called on packets in the established flow when * the fast version below fails. */ -void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, - u8 **hvpp, int estab) +void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx, + const u8 **hvpp, int estab) { - unsigned char *ptr; - struct tcphdr *th = tcp_hdr(skb); + const unsigned char *ptr; + const struct tcphdr *th = tcp_hdr(skb); int length = (th->doff * 4) - sizeof(struct tcphdr); - ptr = (unsigned char *)(th + 1); + ptr = (const unsigned char *)(th + 1); opt_rx->saw_tstamp = 0; while (length > 0) { @@ -3928,9 +3929,9 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, } EXPORT_SYMBOL(tcp_parse_options); -static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) +static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr *th) { - __be32 *ptr = (__be32 *)(th + 1); + const __be32 *ptr = (const __be32 *)(th + 1); if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { @@ -3947,8 +3948,9 @@ static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) /* Fast parse options. This hopes to only see timestamps. * If it is wrong it falls back on tcp_parse_options(). */ -static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, - struct tcp_sock *tp, u8 **hvpp) +static int tcp_fast_parse_options(const struct sk_buff *skb, + const struct tcphdr *th, + struct tcp_sock *tp, const u8 **hvpp) { /* In the spirit of fast parsing, compare doff directly to constant * values. Because equality is used, short doff can be ignored here. @@ -3969,10 +3971,10 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, /* * Parse MD5 Signature option */ -u8 *tcp_parse_md5sig_option(struct tcphdr *th) +const u8 *tcp_parse_md5sig_option(const struct tcphdr *th) { - int length = (th->doff << 2) - sizeof (*th); - u8 *ptr = (u8*)(th + 1); + int length = (th->doff << 2) - sizeof(*th); + const u8 *ptr = (const u8 *)(th + 1); /* If the TCP option is too short, we can short cut */ if (length < TCPOLEN_MD5SIG) @@ -4049,8 +4051,8 @@ static inline void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) static int tcp_disordered_ack(const struct sock *sk, const struct sk_buff *skb) { - struct tcp_sock *tp = tcp_sk(sk); - struct tcphdr *th = tcp_hdr(skb); + const struct tcp_sock *tp = tcp_sk(sk); + const struct tcphdr *th = tcp_hdr(skb); u32 seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; @@ -4089,7 +4091,7 @@ static inline int tcp_paws_discard(const struct sock *sk, * (borrowed from freebsd) */ -static inline int tcp_sequence(struct tcp_sock *tp, u32 seq, u32 end_seq) +static inline int tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq) { return !before(end_seq, tp->rcv_wup) && !after(seq, tp->rcv_nxt + tcp_receive_window(tp)); @@ -4246,7 +4248,7 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq) tcp_sack_extend(tp->duplicate_sack, seq, end_seq); } -static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) +static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -4433,7 +4435,7 @@ static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size) static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) { - struct tcphdr *th = tcp_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); struct tcp_sock *tp = tcp_sk(sk); int eaten = -1; @@ -4917,9 +4919,9 @@ void tcp_cwnd_application_limited(struct sock *sk) tp->snd_cwnd_stamp = tcp_time_stamp; } -static int tcp_should_expand_sndbuf(struct sock *sk) +static int tcp_should_expand_sndbuf(const struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); /* If the user specified a specific send buffer setting, do * not modify it. @@ -5028,7 +5030,7 @@ static inline void tcp_ack_snd_check(struct sock *sk) * either form (or just set the sysctl tcp_stdurg). */ -static void tcp_check_urg(struct sock *sk, struct tcphdr *th) +static void tcp_check_urg(struct sock *sk, const struct tcphdr *th) { struct tcp_sock *tp = tcp_sk(sk); u32 ptr = ntohs(th->urg_ptr); @@ -5094,7 +5096,7 @@ static void tcp_check_urg(struct sock *sk, struct tcphdr *th) } /* This is the 'fast' part of urgent handling. */ -static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th) +static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th) { struct tcp_sock *tp = tcp_sk(sk); @@ -5215,9 +5217,9 @@ out: * play significant role here. */ static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, int syn_inerr) + const struct tcphdr *th, int syn_inerr) { - u8 *hash_location; + const u8 *hash_location; struct tcp_sock *tp = tcp_sk(sk); /* RFC1323: H1. Apply PAWS check first. */ @@ -5298,7 +5300,7 @@ discard: * tcp_data_queue when everything is OK. */ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, unsigned len) + const struct tcphdr *th, unsigned int len) { struct tcp_sock *tp = tcp_sk(sk); int res; @@ -5509,9 +5511,9 @@ discard: EXPORT_SYMBOL(tcp_rcv_established); static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, unsigned len) + const struct tcphdr *th, unsigned int len) { - u8 *hash_location; + const u8 *hash_location; struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct tcp_cookie_values *cvp = tp->cookie_values; @@ -5786,7 +5788,7 @@ reset_and_undo: */ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, - struct tcphdr *th, unsigned len) + const struct tcphdr *th, unsigned int len) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 48da7cc41e23..955c9255cd98 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -104,7 +104,7 @@ struct tcp_md5sig_key *tcp_v4_md5_do_lookup(struct sock *sk, __be32 addr) struct inet_hashinfo tcp_hashinfo; EXPORT_SYMBOL(tcp_hashinfo); -static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb) +static inline __u32 tcp_v4_init_sequence(const struct sk_buff *skb) { return secure_tcp_sequence_number(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, @@ -552,7 +552,7 @@ static void __tcp_v4_send_check(struct sk_buff *skb, /* This routine computes an IPv4 TCP checksum. */ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) { - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); } @@ -590,7 +590,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb) static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) { - struct tcphdr *th = tcp_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); struct { struct tcphdr th; #ifdef CONFIG_TCP_MD5SIG @@ -668,7 +668,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, struct tcp_md5sig_key *key, int reply_flags) { - struct tcphdr *th = tcp_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); struct { struct tcphdr th; __be32 opt[(TCPOLEN_TSTAMP_ALIGNED >> 2) @@ -1182,10 +1182,10 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb) * o MD5 hash and we're not expecting one. * o MD5 hash and its wrong. */ - __u8 *hash_location = NULL; + const __u8 *hash_location = NULL; struct tcp_md5sig_key *hash_expected; const struct iphdr *iph = ip_hdr(skb); - struct tcphdr *th = tcp_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); int genhash; unsigned char newhash[16]; @@ -1248,7 +1248,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { struct tcp_extend_values tmp_ext; struct tcp_options_received tmp_opt; - u8 *hash_location; + const u8 *hash_location; struct request_sock *req; struct inet_request_sock *ireq; struct tcp_sock *tp = tcp_sk(sk); @@ -1645,7 +1645,7 @@ EXPORT_SYMBOL(tcp_v4_do_rcv); int tcp_v4_rcv(struct sk_buff *skb) { const struct iphdr *iph; - struct tcphdr *th; + const struct tcphdr *th; struct sock *sk; int ret; struct net *net = dev_net(skb->dev); @@ -1809,7 +1809,7 @@ EXPORT_SYMBOL(tcp_v4_get_peer); void *tcp_v4_tw_get_peer(struct sock *sk) { - struct inet_timewait_sock *tw = inet_twsk(sk); + const struct inet_timewait_sock *tw = inet_twsk(sk); return inet_getpeer_v4(tw->tw_daddr, 1); } @@ -2381,7 +2381,7 @@ void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) } EXPORT_SYMBOL(tcp_proc_unregister); -static void get_openreq4(struct sock *sk, struct request_sock *req, +static void get_openreq4(const struct sock *sk, const struct request_sock *req, struct seq_file *f, int i, int uid, int *len) { const struct inet_request_sock *ireq = inet_rsk(req); @@ -2411,9 +2411,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) { int timer_active; unsigned long timer_expires; - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); - struct inet_sock *inet = inet_sk(sk); + const struct inet_sock *inet = inet_sk(sk); __be32 dest = inet->inet_daddr; __be32 src = inet->inet_rcv_saddr; __u16 destp = ntohs(inet->inet_dport); @@ -2462,7 +2462,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) len); } -static void get_timewait4_sock(struct inet_timewait_sock *tw, +static void get_timewait4_sock(const struct inet_timewait_sock *tw, struct seq_file *f, int i, int *len) { __be32 dest, src; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index d2fe4e06b472..b767a951d47c 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -141,7 +141,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, const struct tcphdr *th) { struct tcp_options_received tmp_opt; - u8 *hash_location; + const u8 *hash_location; struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); int paws_reject = 0; @@ -566,7 +566,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock **prev) { struct tcp_options_received tmp_opt; - u8 *hash_location; + const u8 *hash_location; struct sock *child; const struct tcphdr *th = tcp_hdr(skb); __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ed96c543f1cf..980b98f6288c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -65,7 +65,7 @@ EXPORT_SYMBOL_GPL(sysctl_tcp_cookie_size); /* Account for new data that has been sent to the network. */ -static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) +static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); unsigned int prior_packets = tp->packets_out; @@ -89,9 +89,9 @@ static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) * Anything in between SND.UNA...SND.UNA+SND.WND also can be already * invalid. OK, let's make this for now: */ -static inline __u32 tcp_acceptable_seq(struct sock *sk) +static inline __u32 tcp_acceptable_seq(const struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); if (!before(tcp_wnd_end(tp), tp->snd_nxt)) return tp->snd_nxt; @@ -116,7 +116,7 @@ static inline __u32 tcp_acceptable_seq(struct sock *sk) static __u16 tcp_advertise_mss(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - struct dst_entry *dst = __sk_dst_get(sk); + const struct dst_entry *dst = __sk_dst_get(sk); int mss = tp->advmss; if (dst) { @@ -133,7 +133,7 @@ static __u16 tcp_advertise_mss(struct sock *sk) /* RFC2861. Reset CWND after idle period longer RTO to "restart window". * This is the first part of cwnd validation mechanism. */ -static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst) +static void tcp_cwnd_restart(struct sock *sk, const struct dst_entry *dst) { struct tcp_sock *tp = tcp_sk(sk); s32 delta = tcp_time_stamp - tp->lsndtime; @@ -154,7 +154,7 @@ static void tcp_cwnd_restart(struct sock *sk, struct dst_entry *dst) /* Congestion state accounting after a packet has been sent. */ static void tcp_event_data_sent(struct tcp_sock *tp, - struct sk_buff *skb, struct sock *sk) + struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); const u32 now = tcp_time_stamp; @@ -295,7 +295,7 @@ static u16 tcp_select_window(struct sock *sk) } /* Packet ECN state for a SYN-ACK */ -static inline void TCP_ECN_send_synack(struct tcp_sock *tp, struct sk_buff *skb) +static inline void TCP_ECN_send_synack(const struct tcp_sock *tp, struct sk_buff *skb) { TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR; if (!(tp->ecn_flags & TCP_ECN_OK)) @@ -315,7 +315,7 @@ static inline void TCP_ECN_send_syn(struct sock *sk, struct sk_buff *skb) } static __inline__ void -TCP_ECN_make_synack(struct request_sock *req, struct tcphdr *th) +TCP_ECN_make_synack(const struct request_sock *req, struct tcphdr *th) { if (inet_rsk(req)->ecn_ok) th->ece = 1; @@ -565,7 +565,8 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, */ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, struct tcp_out_options *opts, - struct tcp_md5sig_key **md5) { + struct tcp_md5sig_key **md5) +{ struct tcp_sock *tp = tcp_sk(sk); struct tcp_cookie_values *cvp = tp->cookie_values; unsigned remaining = MAX_TCP_OPTION_SPACE; @@ -743,7 +744,8 @@ static unsigned tcp_synack_options(struct sock *sk, */ static unsigned tcp_established_options(struct sock *sk, struct sk_buff *skb, struct tcp_out_options *opts, - struct tcp_md5sig_key **md5) { + struct tcp_md5sig_key **md5) +{ struct tcp_skb_cb *tcb = skb ? TCP_SKB_CB(skb) : NULL; struct tcp_sock *tp = tcp_sk(sk); unsigned size = 0; @@ -893,7 +895,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, tcp_event_ack_sent(sk, tcp_skb_pcount(skb)); if (skb->len != tcp_header_size) - tcp_event_data_sent(tp, skb, sk); + tcp_event_data_sent(tp, sk); if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS, @@ -926,7 +928,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) } /* Initialize TSO segments for a packet. */ -static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, +static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { if (skb->len <= mss_now || !sk_can_gso(sk) || @@ -947,7 +949,7 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, /* When a modification to fackets out becomes necessary, we need to check * skb is counted to fackets_out or not. */ -static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb, +static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb, int decr) { struct tcp_sock *tp = tcp_sk(sk); @@ -962,7 +964,7 @@ static void tcp_adjust_fackets_out(struct sock *sk, struct sk_buff *skb, /* Pcount in the middle of the write queue got changed, we need to do various * tweaks to fix counters */ -static void tcp_adjust_pcount(struct sock *sk, struct sk_buff *skb, int decr) +static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int decr) { struct tcp_sock *tp = tcp_sk(sk); @@ -1146,10 +1148,10 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) } /* Calculate MSS. Not accounting for SACKs here. */ -int tcp_mtu_to_mss(struct sock *sk, int pmtu) +int tcp_mtu_to_mss(const struct sock *sk, int pmtu) { - struct tcp_sock *tp = tcp_sk(sk); - struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); int mss_now; /* Calculate base mss without TCP options: @@ -1175,10 +1177,10 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu) } /* Inverse of above */ -int tcp_mss_to_mtu(struct sock *sk, int mss) +int tcp_mss_to_mtu(const struct sock *sk, int mss) { - struct tcp_sock *tp = tcp_sk(sk); - struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_sock *tp = tcp_sk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); int mtu; mtu = mss + @@ -1252,8 +1254,8 @@ EXPORT_SYMBOL(tcp_sync_mss); */ unsigned int tcp_current_mss(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); - struct dst_entry *dst = __sk_dst_get(sk); + const struct tcp_sock *tp = tcp_sk(sk); + const struct dst_entry *dst = __sk_dst_get(sk); u32 mss_now; unsigned header_len; struct tcp_out_options opts; @@ -1313,10 +1315,10 @@ static void tcp_cwnd_validate(struct sock *sk) * modulo only when the receiver window alone is the limiting factor or * when we would be allowed to send the split-due-to-Nagle skb fully. */ -static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb, +static unsigned int tcp_mss_split_point(const struct sock *sk, const struct sk_buff *skb, unsigned int mss_now, unsigned int cwnd) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); u32 needed, window, cwnd_len; window = tcp_wnd_end(tp) - TCP_SKB_CB(skb)->seq; @@ -1336,8 +1338,8 @@ static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb, /* Can at least one segment of SKB be sent right now, according to the * congestion window rules? If so, return how many segments are allowed. */ -static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, - struct sk_buff *skb) +static inline unsigned int tcp_cwnd_test(const struct tcp_sock *tp, + const struct sk_buff *skb) { u32 in_flight, cwnd; @@ -1358,7 +1360,7 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, * This must be invoked the first time we consider transmitting * SKB onto the wire. */ -static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, +static int tcp_init_tso_segs(const struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { int tso_segs = tcp_skb_pcount(skb); @@ -1396,7 +1398,7 @@ static inline int tcp_nagle_check(const struct tcp_sock *tp, /* Return non-zero if the Nagle test allows this packet to be * sent now. */ -static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb, +static inline int tcp_nagle_test(const struct tcp_sock *tp, const struct sk_buff *skb, unsigned int cur_mss, int nonagle) { /* Nagle rule does not apply to frames, which sit in the middle of the @@ -1422,7 +1424,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb, } /* Does at least the first segment of SKB fit into the send window? */ -static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, +static inline int tcp_snd_wnd_test(const struct tcp_sock *tp, const struct sk_buff *skb, unsigned int cur_mss) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; @@ -1437,10 +1439,10 @@ static inline int tcp_snd_wnd_test(struct tcp_sock *tp, struct sk_buff *skb, * should be put on the wire right now. If so, it returns the number of * packets allowed by the congestion window. */ -static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb, +static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb, unsigned int cur_mss, int nonagle) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); unsigned int cwnd_quota; tcp_init_tso_segs(sk, skb, cur_mss); @@ -1458,7 +1460,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb, /* Test if sending is allowed right now. */ int tcp_may_send_now(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb = tcp_send_head(sk); return skb && @@ -2008,7 +2010,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) } /* Check if coalescing SKBs is legal. */ -static int tcp_can_collapse(struct sock *sk, struct sk_buff *skb) +static int tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb) { if (tcp_skb_pcount(skb) > 1) return 0; @@ -2184,7 +2186,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) static int tcp_can_forward_retransmit(struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); /* Forward retransmissions are possible only during Recovery. */ if (icsk->icsk_ca_state != TCP_CA_Recovery) @@ -2550,7 +2552,7 @@ EXPORT_SYMBOL(tcp_make_synack); /* Do all connect socket setups that can be done AF independent. */ static void tcp_connect_init(struct sock *sk) { - struct dst_entry *dst = __sk_dst_get(sk); + const struct dst_entry *dst = __sk_dst_get(sk); struct tcp_sock *tp = tcp_sk(sk); __u8 rcv_wscale; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index ac838965ff34..5a0d6648bbbc 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -115,7 +115,7 @@ static __u32 check_tcp_syn_cookie(__u32 cookie, const struct in6_addr *saddr, & COOKIEMASK; } -__u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) +__u32 cookie_v6_init_sequence(struct sock *sk, const struct sk_buff *skb, __u16 *mssp) { const struct ipv6hdr *iph = ipv6_hdr(skb); const struct tcphdr *th = tcp_hdr(skb); @@ -137,7 +137,7 @@ __u32 cookie_v6_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp) jiffies / (HZ * 60), mssind); } -static inline int cookie_check(struct sk_buff *skb, __u32 cookie) +static inline int cookie_check(const struct sk_buff *skb, __u32 cookie) { const struct ipv6hdr *iph = ipv6_hdr(skb); const struct tcphdr *th = tcp_hdr(skb); @@ -152,7 +152,7 @@ static inline int cookie_check(struct sk_buff *skb, __u32 cookie) struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) { struct tcp_options_received tcp_opt; - u8 *hash_location; + const u8 *hash_location; struct inet_request_sock *ireq; struct inet6_request_sock *ireq6; struct tcp_request_sock *treq; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5357902c7978..da2ada881cfa 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -114,7 +114,7 @@ static __inline__ __sum16 tcp_v6_check(int len, return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base); } -static __u32 tcp_v6_init_sequence(struct sk_buff *skb) +static __u32 tcp_v6_init_sequence(const struct sk_buff *skb) { return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32, ipv6_hdr(skb)->saddr.s6_addr32, @@ -844,7 +844,7 @@ clear_hash_noput: static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb) { - __u8 *hash_location = NULL; + const __u8 *hash_location = NULL; struct tcp_md5sig_key *hash_expected; const struct ipv6hdr *ip6h = ipv6_hdr(skb); struct tcphdr *th = tcp_hdr(skb); @@ -980,7 +980,8 @@ static int tcp6_gro_complete(struct sk_buff *skb) static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, struct tcp_md5sig_key *key, int rst) { - struct tcphdr *th = tcp_hdr(skb), *t1; + const struct tcphdr *th = tcp_hdr(skb); + struct tcphdr *t1; struct sk_buff *buff; struct flowi6 fl6; struct net *net = dev_net(skb_dst(skb)->dev); @@ -1070,7 +1071,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) { - struct tcphdr *th = tcp_hdr(skb); + const struct tcphdr *th = tcp_hdr(skb); u32 seq = 0, ack_seq = 0; struct tcp_md5sig_key *key = NULL; @@ -1160,7 +1161,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) { struct tcp_extend_values tmp_ext; struct tcp_options_received tmp_opt; - u8 *hash_location; + const u8 *hash_location; struct request_sock *req; struct inet6_request_sock *treq; struct ipv6_pinfo *np = inet6_sk(sk); @@ -1688,7 +1689,7 @@ ipv6_pktoptions: static int tcp_v6_rcv(struct sk_buff *skb) { - struct tcphdr *th; + const struct tcphdr *th; const struct ipv6hdr *hdr; struct sock *sk; int ret; @@ -1856,8 +1857,8 @@ static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it) static void *tcp_v6_tw_get_peer(struct sock *sk) { - struct inet6_timewait_sock *tw6 = inet6_twsk(sk); - struct inet_timewait_sock *tw = inet_twsk(sk); + const struct inet6_timewait_sock *tw6 = inet6_twsk(sk); + const struct inet_timewait_sock *tw = inet_twsk(sk); if (tw->tw_family == AF_INET) return tcp_v4_tw_get_peer(sk); @@ -2012,7 +2013,7 @@ static void tcp_v6_destroy_sock(struct sock *sk) #ifdef CONFIG_PROC_FS /* Proc filesystem TCPv6 sock list dumping. */ static void get_openreq6(struct seq_file *seq, - struct sock *sk, struct request_sock *req, int i, int uid) + const struct sock *sk, struct request_sock *req, int i, int uid) { int ttd = req->expires - jiffies; const struct in6_addr *src = &inet6_rsk(req)->loc_addr; @@ -2048,10 +2049,10 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) __u16 destp, srcp; int timer_active; unsigned long timer_expires; - struct inet_sock *inet = inet_sk(sp); - struct tcp_sock *tp = tcp_sk(sp); + const struct inet_sock *inet = inet_sk(sp); + const struct tcp_sock *tp = tcp_sk(sp); const struct inet_connection_sock *icsk = inet_csk(sp); - struct ipv6_pinfo *np = inet6_sk(sp); + const struct ipv6_pinfo *np = inet6_sk(sp); dest = &np->daddr; src = &np->rcv_saddr; @@ -2103,7 +2104,7 @@ static void get_timewait6_sock(struct seq_file *seq, { const struct in6_addr *dest, *src; __u16 destp, srcp; - struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); + const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); int ttd = tw->tw_ttd - jiffies; if (ttd < 0) -- cgit v1.2.3 From da92b194cc36b5dc1fbd85206aeeffd80bee0c39 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Fri, 21 Oct 2011 00:49:15 +0000 Subject: net: hold sock reference while processing tx timestamps The pair of functions, * skb_clone_tx_timestamp() * skb_complete_tx_timestamp() were designed to allow timestamping in PHY devices. The first function, called during the MAC driver's hard_xmit method, identifies PTP protocol packets, clones them, and gives them to the PHY device driver. The PHY driver may hold onto the packet and deliver it at a later time using the second function, which adds the packet to the socket's error queue. As pointed out by Johannes, nothing prevents the socket from disappearing while the cloned packet is sitting in the PHY driver awaiting a timestamp. This patch fixes the issue by taking a reference on the socket for each such packet. In addition, the comments regarding the usage of these function are expanded to highlight the rule that PHY drivers must use skb_complete_tx_timestamp() to release the packet, in order to release the socket reference, too. These functions first appeared in v2.6.36. Reported-by: Johannes Berg Signed-off-by: Richard Cochran Cc: Signed-off-by: Eric Dumazet Reviewed-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/phy.h | 2 +- include/linux/skbuff.h | 7 ++++++- net/core/timestamping.c | 12 ++++++++++-- 3 files changed, 17 insertions(+), 4 deletions(-) (limited to 'net/core') diff --git a/include/linux/phy.h b/include/linux/phy.h index 54fc4138955f..79f337c47388 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -420,7 +420,7 @@ struct phy_driver { /* * Requests a Tx timestamp for 'skb'. The phy driver promises - * to deliver it to the socket's error queue as soon as a + * to deliver it using skb_complete_tx_timestamp() as soon as a * timestamp becomes available. One of the PTP_CLASS_ values * is passed in 'type'. */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 8bd383caa363..0f966460a345 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2020,8 +2020,13 @@ static inline bool skb_defer_rx_timestamp(struct sk_buff *skb) /** * skb_complete_tx_timestamp() - deliver cloned skb with tx timestamps * + * PHY drivers may accept clones of transmitted packets for + * timestamping via their phy_driver.txtstamp method. These drivers + * must call this function to return the skb back to the stack, with + * or without a timestamp. + * * @skb: clone of the the original outgoing packet - * @hwtstamps: hardware time stamps + * @hwtstamps: hardware time stamps, may be NULL if not available * */ void skb_complete_tx_timestamp(struct sk_buff *skb, diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 98a52640e7cd..82fb28857b64 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -57,9 +57,13 @@ void skb_clone_tx_timestamp(struct sk_buff *skb) case PTP_CLASS_V2_VLAN: phydev = skb->dev->phydev; if (likely(phydev->drv->txtstamp)) { + if (!atomic_inc_not_zero(&sk->sk_refcnt)) + return; clone = skb_clone(skb, GFP_ATOMIC); - if (!clone) + if (!clone) { + sock_put(sk); return; + } clone->sk = sk; phydev->drv->txtstamp(phydev, clone, type); } @@ -77,8 +81,11 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, struct sock_exterr_skb *serr; int err; - if (!hwtstamps) + if (!hwtstamps) { + sock_put(sk); + kfree_skb(skb); return; + } *skb_hwtstamps(skb) = *hwtstamps; serr = SKB_EXT_ERR(skb); @@ -87,6 +94,7 @@ void skb_complete_tx_timestamp(struct sk_buff *skb, serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING; skb->sk = NULL; err = sock_queue_err_skb(sk, skb); + sock_put(sk); if (err) kfree_skb(skb); } -- cgit v1.2.3 From d2237d35748e7f448a9c2d9dc6a85ef637466e24 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 21 Oct 2011 06:24:20 +0000 Subject: rtnetlink: Add missing manual netlink notification in dev_change_net_namespaces Renato Westphal noticed that since commit a2835763e130c343ace5320c20d33c281e7097b7 "rtnetlink: handle rtnl_link netlink notifications manually" was merged we no longer send a netlink message when a networking device is moved from one network namespace to another. Fix this by adding the missing manual notification in dev_change_net_namespaces. Since all network devices that are processed by dev_change_net_namspaces are in the initialized state the complicated tests that guard the manual rtmsg_ifinfo calls in rollback_registered and register_netdevice are unnecessary and we can just perform a plain notification. Cc: stable@kernel.org Tested-by: Renato Westphal Signed-off-by: Eric W. Biederman Signed-off-by: David S. Miller --- net/core/dev.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/core') diff --git a/net/core/dev.c b/net/core/dev.c index b10ff0a71855..ae5cf2d630eb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6115,6 +6115,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); + rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); /* * Flush the unicast and multicast chains -- cgit v1.2.3