diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-09-16 21:14:54 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-09-16 21:14:54 -0700 |
commit | edb1e9671a990e6eb9f593636deed7ac43ba9084 (patch) | |
tree | 1b8b592411d9d7e4321479f57cb6d1f38ec483e3 | |
parent | fa890d586cc127ce72597ba0a909bfecf784e10c (diff) | |
parent | d9f30ec0b0d129b9cbf2b041a6a3159aa24592f6 (diff) | |
download | linux-edb1e9671a990e6eb9f593636deed7ac43ba9084.tar.bz2 |
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
* 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6:
[VLAN]: Fix net_device leak.
[PPP] generic: Fix receive path data clobbering & non-linear handling
[PPP] generic: Call skb_cow_head before scribbling over skb
[NET] skbuff: Add skb_cow_head
[BRIDGE]: Kill clone argument to br_flood_*
[PPP] pppoe: Fill in header directly in __pppoe_xmit
[PPP] pppoe: Fix data clobbering in __pppoe_xmit and return value
[PPP] pppoe: Fix skb_unshare_check call position
[SCTP]: Convert bind_addr_list locking to RCU
[SCTP]: Add RCU synchronization around sctp_localaddr_list
[PKT_SCHED]: sch_cbq.c: Shut up uninitialized variable warning
[PKTGEN]: srcmac fix
[IPV6]: Fix source address selection.
[IPV4]: Just increment OutDatagrams once per a datagram.
[IPV6]: Just increment OutDatagrams once per a datagram.
[IPV6]: Fix unbalanced socket reference with MSG_CONFIRM.
[NET_SCHED] protect action config/dump from irqs
[NET]: Fix two issues wrt. SO_BINDTODEVICE.
-rw-r--r-- | drivers/net/ppp_generic.c | 58 | ||||
-rw-r--r-- | drivers/net/pppoe.c | 70 | ||||
-rw-r--r-- | include/linux/skbuff.h | 40 | ||||
-rw-r--r-- | include/net/sctp/sctp.h | 1 | ||||
-rw-r--r-- | include/net/sctp/structs.h | 13 | ||||
-rw-r--r-- | net/8021q/vlan.c | 2 | ||||
-rw-r--r-- | net/bridge/br_device.c | 4 | ||||
-rw-r--r-- | net/bridge/br_forward.c | 21 | ||||
-rw-r--r-- | net/bridge/br_input.c | 48 | ||||
-rw-r--r-- | net/bridge/br_netfilter.c | 2 | ||||
-rw-r--r-- | net/bridge/br_private.h | 8 | ||||
-rw-r--r-- | net/core/pktgen.c | 10 | ||||
-rw-r--r-- | net/core/sock.c | 106 | ||||
-rw-r--r-- | net/ipv4/udp.c | 6 | ||||
-rw-r--r-- | net/ipv6/addrconf.c | 2 | ||||
-rw-r--r-- | net/ipv6/raw.c | 3 | ||||
-rw-r--r-- | net/ipv6/udp.c | 6 | ||||
-rw-r--r-- | net/sched/act_api.c | 8 | ||||
-rw-r--r-- | net/sched/act_police.c | 4 | ||||
-rw-r--r-- | net/sched/sch_cbq.c | 2 | ||||
-rw-r--r-- | net/sctp/associola.c | 14 | ||||
-rw-r--r-- | net/sctp/bind_addr.c | 70 | ||||
-rw-r--r-- | net/sctp/endpointola.c | 27 | ||||
-rw-r--r-- | net/sctp/ipv6.c | 46 | ||||
-rw-r--r-- | net/sctp/protocol.c | 79 | ||||
-rw-r--r-- | net/sctp/sm_make_chunk.c | 18 | ||||
-rw-r--r-- | net/sctp/socket.c | 134 |
27 files changed, 398 insertions, 404 deletions
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c index 9293c82ef2af..4b49d0e8c7eb 100644 --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -899,17 +899,9 @@ ppp_start_xmit(struct sk_buff *skb, struct net_device *dev) /* Put the 2-byte PPP protocol number on the front, making sure there is room for the address and control fields. */ - if (skb_headroom(skb) < PPP_HDRLEN) { - struct sk_buff *ns; - - ns = alloc_skb(skb->len + dev->hard_header_len, GFP_ATOMIC); - if (ns == 0) - goto outf; - skb_reserve(ns, dev->hard_header_len); - skb_copy_bits(skb, 0, skb_put(ns, skb->len), skb->len); - kfree_skb(skb); - skb = ns; - } + if (skb_cow_head(skb, PPP_HDRLEN)) + goto outf; + pp = skb_push(skb, 2); proto = npindex_to_proto[npi]; pp[0] = proto >> 8; @@ -1533,7 +1525,7 @@ ppp_input_error(struct ppp_channel *chan, int code) static void ppp_receive_frame(struct ppp *ppp, struct sk_buff *skb, struct channel *pch) { - if (skb->len >= 2) { + if (pskb_may_pull(skb, 2)) { #ifdef CONFIG_PPP_MULTILINK /* XXX do channel-level decompression here */ if (PPP_PROTO(skb) == PPP_MP) @@ -1585,7 +1577,7 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb) if (ppp->vj == 0 || (ppp->flags & SC_REJ_COMP_TCP)) goto err; - if (skb_tailroom(skb) < 124) { + if (skb_tailroom(skb) < 124 || skb_cloned(skb)) { /* copy to a new sk_buff with more tailroom */ ns = dev_alloc_skb(skb->len + 128); if (ns == 0) { @@ -1656,23 +1648,29 @@ ppp_receive_nonmp_frame(struct ppp *ppp, struct sk_buff *skb) /* check if the packet passes the pass and active filters */ /* the filter instructions are constructed assuming a four-byte PPP header on each packet */ - *skb_push(skb, 2) = 0; - if (ppp->pass_filter - && sk_run_filter(skb, ppp->pass_filter, - ppp->pass_len) == 0) { - if (ppp->debug & 1) - printk(KERN_DEBUG "PPP: inbound frame not passed\n"); - kfree_skb(skb); - return; - } - if (!(ppp->active_filter - && sk_run_filter(skb, ppp->active_filter, - ppp->active_len) == 0)) - ppp->last_recv = jiffies; - skb_pull(skb, 2); -#else - ppp->last_recv = jiffies; + if (ppp->pass_filter || ppp->active_filter) { + if (skb_cloned(skb) && + pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + goto err; + + *skb_push(skb, 2) = 0; + if (ppp->pass_filter + && sk_run_filter(skb, ppp->pass_filter, + ppp->pass_len) == 0) { + if (ppp->debug & 1) + printk(KERN_DEBUG "PPP: inbound frame " + "not passed\n"); + kfree_skb(skb); + return; + } + if (!(ppp->active_filter + && sk_run_filter(skb, ppp->active_filter, + ppp->active_len) == 0)) + ppp->last_recv = jiffies; + __skb_pull(skb, 2); + } else #endif /* CONFIG_PPP_FILTER */ + ppp->last_recv = jiffies; if ((ppp->dev->flags & IFF_UP) == 0 || ppp->npmode[npi] != NPMODE_PASS) { @@ -1770,7 +1768,7 @@ ppp_receive_mp_frame(struct ppp *ppp, struct sk_buff *skb, struct channel *pch) struct channel *ch; int mphdrlen = (ppp->flags & SC_MP_SHORTSEQ)? MPHDRLEN_SSN: MPHDRLEN; - if (!pskb_may_pull(skb, mphdrlen) || ppp->mrru == 0) + if (!pskb_may_pull(skb, mphdrlen + 1) || ppp->mrru == 0) goto err; /* no good, throw it away */ /* Decode sequence number and begin/end bits */ diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c index 68631a5721ac..0d7f570b9a54 100644 --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -385,12 +385,12 @@ static int pppoe_rcv(struct sk_buff *skb, struct pppoe_hdr *ph; struct pppox_sock *po; - if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr))) - goto drop; - if (!(skb = skb_share_check(skb, GFP_ATOMIC))) goto out; + if (!pskb_may_pull(skb, sizeof(struct pppoe_hdr))) + goto drop; + ph = pppoe_hdr(skb); po = get_item((unsigned long) ph->sid, eth_hdr(skb)->h_source, dev->ifindex); @@ -848,71 +848,45 @@ static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb) { struct pppox_sock *po = pppox_sk(sk); struct net_device *dev = po->pppoe_dev; - struct pppoe_hdr hdr; struct pppoe_hdr *ph; - int headroom = skb_headroom(skb); int data_len = skb->len; - struct sk_buff *skb2; if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) goto abort; - hdr.ver = 1; - hdr.type = 1; - hdr.code = 0; - hdr.sid = po->num; - hdr.length = htons(skb->len); - if (!dev) goto abort; - /* Copy the skb if there is no space for the header. */ - if (headroom < (sizeof(struct pppoe_hdr) + dev->hard_header_len)) { - skb2 = dev_alloc_skb(32+skb->len + - sizeof(struct pppoe_hdr) + - dev->hard_header_len); - - if (skb2 == NULL) - goto abort; - - skb_reserve(skb2, dev->hard_header_len + sizeof(struct pppoe_hdr)); - skb_copy_from_linear_data(skb, skb_put(skb2, skb->len), - skb->len); - } else { - /* Make a clone so as to not disturb the original skb, - * give dev_queue_xmit something it can free. - */ - skb2 = skb_clone(skb, GFP_ATOMIC); - - if (skb2 == NULL) - goto abort; - } + /* Copy the data if there is no space for the header or if it's + * read-only. + */ + if (skb_cow_head(skb, sizeof(*ph) + dev->hard_header_len)) + goto abort; - ph = (struct pppoe_hdr *) skb_push(skb2, sizeof(struct pppoe_hdr)); - memcpy(ph, &hdr, sizeof(struct pppoe_hdr)); - skb2->protocol = __constant_htons(ETH_P_PPP_SES); + __skb_push(skb, sizeof(*ph)); + skb_reset_network_header(skb); - skb_reset_network_header(skb2); + ph = pppoe_hdr(skb); + ph->ver = 1; + ph->type = 1; + ph->code = 0; + ph->sid = po->num; + ph->length = htons(data_len); - skb2->dev = dev; + skb->protocol = __constant_htons(ETH_P_PPP_SES); + skb->dev = dev; - dev->hard_header(skb2, dev, ETH_P_PPP_SES, + dev->hard_header(skb, dev, ETH_P_PPP_SES, po->pppoe_pa.remote, NULL, data_len); - /* We're transmitting skb2, and assuming that dev_queue_xmit - * will free it. The generic ppp layer however, is expecting - * that we give back 'skb' (not 'skb2') in case of failure, - * but free it in case of success. - */ - - if (dev_queue_xmit(skb2) < 0) + if (dev_queue_xmit(skb) < 0) goto abort; - kfree_skb(skb); return 1; abort: - return 0; + kfree_skb(skb); + return 1; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 93c27f71122a..a656cecd373c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1352,6 +1352,22 @@ static inline int skb_clone_writable(struct sk_buff *skb, int len) skb_headroom(skb) + len <= skb->hdr_len; } +static inline int __skb_cow(struct sk_buff *skb, unsigned int headroom, + int cloned) +{ + int delta = 0; + + if (headroom < NET_SKB_PAD) + headroom = NET_SKB_PAD; + if (headroom > skb_headroom(skb)) + delta = headroom - skb_headroom(skb); + + if (delta || cloned) + return pskb_expand_head(skb, ALIGN(delta, NET_SKB_PAD), 0, + GFP_ATOMIC); + return 0; +} + /** * skb_cow - copy header of skb when it is required * @skb: buffer to cow @@ -1366,16 +1382,22 @@ static inline int skb_clone_writable(struct sk_buff *skb, int len) */ static inline int skb_cow(struct sk_buff *skb, unsigned int headroom) { - int delta = (headroom > NET_SKB_PAD ? headroom : NET_SKB_PAD) - - skb_headroom(skb); - - if (delta < 0) - delta = 0; + return __skb_cow(skb, headroom, skb_cloned(skb)); +} - if (delta || skb_cloned(skb)) - return pskb_expand_head(skb, (delta + (NET_SKB_PAD-1)) & - ~(NET_SKB_PAD-1), 0, GFP_ATOMIC); - return 0; +/** + * skb_cow_head - skb_cow but only making the head writable + * @skb: buffer to cow + * @headroom: needed headroom + * + * This function is identical to skb_cow except that we replace the + * skb_cloned check by skb_header_cloned. It should be used when + * you only need to push on some header and do not need to modify + * the data. + */ +static inline int skb_cow_head(struct sk_buff *skb, unsigned int headroom) +{ + return __skb_cow(skb, headroom, skb_header_cloned(skb)); } /** diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index d529045c1679..c9cc00c85782 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -123,6 +123,7 @@ * sctp/protocol.c */ extern struct sock *sctp_get_ctl_sock(void); +extern void sctp_local_addr_free(struct rcu_head *head); extern int sctp_copy_local_addr_list(struct sctp_bind_addr *, sctp_scope_t, gfp_t gfp, int flags); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index c0d5848c33dc..c2fe2dcc9afc 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -207,6 +207,9 @@ extern struct sctp_globals { * It is a list of sctp_sockaddr_entry. */ struct list_head local_addr_list; + + /* Lock that protects the local_addr_list writers */ + spinlock_t addr_list_lock; /* Flag to indicate if addip is enabled. */ int addip_enable; @@ -242,6 +245,7 @@ extern struct sctp_globals { #define sctp_port_alloc_lock (sctp_globals.port_alloc_lock) #define sctp_port_hashtable (sctp_globals.port_hashtable) #define sctp_local_addr_list (sctp_globals.local_addr_list) +#define sctp_local_addr_lock (sctp_globals.addr_list_lock) #define sctp_addip_enable (sctp_globals.addip_enable) #define sctp_prsctp_enable (sctp_globals.prsctp_enable) @@ -737,8 +741,10 @@ const union sctp_addr *sctp_source(const struct sctp_chunk *chunk); /* This is a structure for holding either an IPv6 or an IPv4 address. */ struct sctp_sockaddr_entry { struct list_head list; + struct rcu_head rcu; union sctp_addr a; __u8 use_as_src; + __u8 valid; }; typedef struct sctp_chunk *(sctp_packet_phandler_t)(struct sctp_association *); @@ -1149,7 +1155,9 @@ int sctp_bind_addr_copy(struct sctp_bind_addr *dest, int flags); int sctp_add_bind_addr(struct sctp_bind_addr *, union sctp_addr *, __u8 use_as_src, gfp_t gfp); -int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *); +int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *, + void (*rcu_call)(struct rcu_head *, + void (*func)(struct rcu_head *))); int sctp_bind_addr_match(struct sctp_bind_addr *, const union sctp_addr *, struct sctp_sock *); union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp, @@ -1220,9 +1228,6 @@ struct sctp_ep_common { * bind_addr.address_list is our set of local IP addresses. */ struct sctp_bind_addr bind_addr; - - /* Protection during address list comparisons. */ - rwlock_t addr_lock; }; diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 1583c5ef963f..2a546919d6fb 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -562,8 +562,6 @@ static int register_vlan_device(struct net_device *real_dev, if (err < 0) goto out_free_newdev; - /* Account for reference in struct vlan_dev_info */ - dev_hold(real_dev); #ifdef VLAN_DEBUG printk(VLAN_DBG "Allocated new device successfully, returning.\n"); #endif diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 0eded176ce99..99292e8e1d0f 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -41,11 +41,11 @@ int br_dev_xmit(struct sk_buff *skb, struct net_device *dev) skb_pull(skb, ETH_HLEN); if (dest[0] & 1) - br_flood_deliver(br, skb, 0); + br_flood_deliver(br, skb); else if ((dst = __br_fdb_get(br, dest)) != NULL) br_deliver(dst->dst, skb); else - br_flood_deliver(br, skb, 0); + br_flood_deliver(br, skb); return 0; } diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index ada7f495445c..bdd7c35c3c7b 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -100,24 +100,13 @@ void br_forward(const struct net_bridge_port *to, struct sk_buff *skb) } /* called under bridge lock */ -static void br_flood(struct net_bridge *br, struct sk_buff *skb, int clone, +static void br_flood(struct net_bridge *br, struct sk_buff *skb, void (*__packet_hook)(const struct net_bridge_port *p, struct sk_buff *skb)) { struct net_bridge_port *p; struct net_bridge_port *prev; - if (clone) { - struct sk_buff *skb2; - - if ((skb2 = skb_clone(skb, GFP_ATOMIC)) == NULL) { - br->statistics.tx_dropped++; - return; - } - - skb = skb2; - } - prev = NULL; list_for_each_entry_rcu(p, &br->port_list, list) { @@ -148,13 +137,13 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb, int clone, /* called with rcu_read_lock */ -void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, int clone) +void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb) { - br_flood(br, skb, clone, __br_deliver); + br_flood(br, skb, __br_deliver); } /* called under bridge lock */ -void br_flood_forward(struct net_bridge *br, struct sk_buff *skb, int clone) +void br_flood_forward(struct net_bridge *br, struct sk_buff *skb) { - br_flood(br, skb, clone, __br_forward); + br_flood(br, skb, __br_forward); } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 6f468fc3357a..3a8a015c92e0 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -43,7 +43,7 @@ int br_handle_frame_finish(struct sk_buff *skb) struct net_bridge_port *p = rcu_dereference(skb->dev->br_port); struct net_bridge *br; struct net_bridge_fdb_entry *dst; - int passedup = 0; + struct sk_buff *skb2; if (!p || p->state == BR_STATE_DISABLED) goto drop; @@ -55,39 +55,35 @@ int br_handle_frame_finish(struct sk_buff *skb) if (p->state == BR_STATE_LEARNING) goto drop; - if (br->dev->flags & IFF_PROMISC) { - struct sk_buff *skb2; + /* The packet skb2 goes to the local host (NULL to skip). */ + skb2 = NULL; - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 != NULL) { - passedup = 1; - br_pass_frame_up(br, skb2); - } - } + if (br->dev->flags & IFF_PROMISC) + skb2 = skb; + + dst = NULL; if (is_multicast_ether_addr(dest)) { br->statistics.multicast++; - br_flood_forward(br, skb, !passedup); - if (!passedup) - br_pass_frame_up(br, skb); - goto out; + skb2 = skb; + } else if ((dst = __br_fdb_get(br, dest)) && dst->is_local) { + skb2 = skb; + /* Do not forward the packet since it's local. */ + skb = NULL; } - dst = __br_fdb_get(br, dest); - if (dst != NULL && dst->is_local) { - if (!passedup) - br_pass_frame_up(br, skb); - else - kfree_skb(skb); - goto out; - } + if (skb2 == skb) + skb2 = skb_clone(skb, GFP_ATOMIC); - if (dst != NULL) { - br_forward(dst->dst, skb); - goto out; - } + if (skb2) + br_pass_frame_up(br, skb2); - br_flood_forward(br, skb, 0); + if (skb) { + if (dst) + br_forward(dst->dst, skb); + else + br_flood_forward(br, skb); + } out: return 0; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 3ee2022928e3..fc13130035e7 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -183,7 +183,7 @@ int nf_bridge_copy_header(struct sk_buff *skb) int err; int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb); - err = skb_cow(skb, header_size); + err = skb_cow_head(skb, header_size); if (err) return err; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 21bf3a9a03fd..e6dc6f52990d 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -170,12 +170,8 @@ extern int br_dev_queue_push_xmit(struct sk_buff *skb); extern void br_forward(const struct net_bridge_port *to, struct sk_buff *skb); extern int br_forward_finish(struct sk_buff *skb); -extern void br_flood_deliver(struct net_bridge *br, - struct sk_buff *skb, - int clone); -extern void br_flood_forward(struct net_bridge *br, - struct sk_buff *skb, - int clone); +extern void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb); +extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb); /* br_if.c */ extern void br_port_carrier_check(struct net_bridge_port *p); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 36fdea71d742..803d0c8826af 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -111,6 +111,9 @@ * * 802.1Q/Q-in-Q support by Francesco Fondelli (FF) <francesco.fondelli@gmail.com> * + * Fixed src_mac command to set source mac of packet to value specified in + * command by Adit Ranadive <adit.262@gmail.com> + * */ #include <linux/sys.h> #include <linux/types.h> @@ -1451,8 +1454,11 @@ static ssize_t pktgen_if_write(struct file *file, } if (!strcmp(name, "src_mac")) { char *v = valstr; + unsigned char old_smac[ETH_ALEN]; unsigned char *m = pkt_dev->src_mac; + memcpy(old_smac, pkt_dev->src_mac, ETH_ALEN); + len = strn_len(&user_buffer[i], sizeof(valstr) - 1); if (len < 0) { return len; @@ -1481,6 +1487,10 @@ static ssize_t pktgen_if_write(struct file *file, } } + /* Set up Src MAC */ + if (compare_ether_addr(old_smac, pkt_dev->src_mac)) + memcpy(&(pkt_dev->hh[6]), pkt_dev->src_mac, ETH_ALEN); + sprintf(pg_result, "OK: srcmac"); return count; } diff --git a/net/core/sock.c b/net/core/sock.c index cfed7d42c485..190de61cd648 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -362,6 +362,61 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie) } EXPORT_SYMBOL(sk_dst_check); +static int sock_bindtodevice(struct sock *sk, char __user *optval, int optlen) +{ + int ret = -ENOPROTOOPT; +#ifdef CONFIG_NETDEVICES + char devname[IFNAMSIZ]; + int index; + + /* Sorry... */ + ret = -EPERM; + if (!capable(CAP_NET_RAW)) + goto out; + + ret = -EINVAL; + if (optlen < 0) + goto out; + + /* Bind this socket to a particular device like "eth0", + * as specified in the passed interface name. If the + * name is "" or the option length is zero the socket + * is not bound. + */ + if (optlen > IFNAMSIZ - 1) + optlen = IFNAMSIZ - 1; + memset(devname, 0, sizeof(devname)); + + ret = -EFAULT; + if (copy_from_user(devname, optval, optlen)) + goto out; + + if (devname[0] == '\0') { + index = 0; + } else { + struct net_device *dev = dev_get_by_name(devname); + + ret = -ENODEV; + if (!dev) + goto out; + + index = dev->ifindex; + dev_put(dev); + } + + lock_sock(sk); + sk->sk_bound_dev_if = index; + sk_dst_reset(sk); + release_sock(sk); + + ret = 0; + +out: +#endif + + return ret; +} + /* * This is meant for all protocols to use and covers goings on * at the socket level. Everything here is generic. @@ -390,6 +445,9 @@ int sock_setsockopt(struct socket *sock, int level, int optname, } #endif + if (optname == SO_BINDTODEVICE) + return sock_bindtodevice(sk, optval, optlen); + if (optlen < sizeof(int)) return -EINVAL; @@ -578,54 +636,6 @@ set_rcvbuf: ret = sock_set_timeout(&sk->sk_sndtimeo, optval, optlen); break; -#ifdef CONFIG_NETDEVICES - case SO_BINDTODEVICE: - { - char devname[IFNAMSIZ]; - - /* Sorry... */ - if (!capable(CAP_NET_RAW)) { - ret = -EPERM; - break; - } - - /* Bind this socket to a particular device like "eth0", - * as specified in the passed interface name. If the - * name is "" or the option length is zero the socket - * is not bound. - */ - - if (!valbool) { - sk->sk_bound_dev_if = 0; - } else { - if (optlen > IFNAMSIZ - 1) - optlen = IFNAMSIZ - 1; - memset(devname, 0, sizeof(devname)); - if (copy_from_user(devname, optval, optlen)) { - ret = -EFAULT; - break; - } - - /* Remove any cached route for this socket. */ - sk_dst_reset(sk); - - if (devname[0] == '\0') { - sk->sk_bound_dev_if = 0; - } else { - struct net_device *dev = dev_get_by_name(devname); - if (!dev) { - ret = -ENODEV; - break; - } - sk->sk_bound_dev_if = dev->ifindex; - dev_put(dev); - } - } - break; - } -#endif - - case SO_ATTACH_FILTER: ret = -EINVAL; if (optlen == sizeof(struct sock_fprog)) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 28355350fb62..69d4bd10f9c6 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -505,6 +505,8 @@ send: out: up->len = 0; up->pending = 0; + if (!err) + UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, up->pcflag); return err; } @@ -693,10 +695,8 @@ out: ip_rt_put(rt); if (free) kfree(ipc.opt); - if (!err) { - UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); + if (!err) return len; - } /* * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting * ENOBUFS might not be good (it's not tunable per se), but otherwise diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 91ef3be5abad..45b4c82148a0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1021,7 +1021,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, hiscore.rule++; } if (ipv6_saddr_preferred(score.addr_type) || - (((ifa_result->flags & + (((ifa->flags & (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC)) == 0))) { score.attrs |= IPV6_SADDR_SCORE_PREFERRED; if (!(hiscore.attrs & IPV6_SADDR_SCORE_PREFERRED)) { diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index e27383d855de..77167afa3455 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -882,11 +882,10 @@ back_from_confirm: ip6_flush_pending_frames(sk); else if (!(msg->msg_flags & MSG_MORE)) err = rawv6_push_pending_frames(sk, &fl, rp); + release_sock(sk); } done: dst_release(dst); - if (!inet->hdrincl) - release_sock(sk); out: fl6_sock_release(flowlabel); return err<0?err:len; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 4210951edb6e..c347f3e30e2e 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -555,6 +555,8 @@ static int udp_v6_push_pending_frames(struct sock *sk) out: up->len = 0; up->pending = 0; + if (!err) + UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, up->pcflag); return err; } @@ -823,10 +825,8 @@ do_append_data: release_sock(sk); out: fl6_sock_release(flowlabel); - if (!err) { - UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); + if (!err) return len; - } /* * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting * ENOBUFS might not be good (it's not tunable per se), but otherwise diff --git a/net/sched/act_api.c b/net/sched/act_api.c index feef366cad5d..72cdb0fade20 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -68,7 +68,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, int err = 0, index = -1,i = 0, s_i = 0, n_i = 0; struct rtattr *r ; - read_lock(hinfo->lock); + read_lock_bh(hinfo->lock); s_i = cb->args[0]; @@ -96,7 +96,7 @@ static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, } } done: - read_unlock(hinfo->lock); + read_unlock_bh(hinfo->lock); if (n_i) cb->args[0] += n_i; return n_i; @@ -156,13 +156,13 @@ struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo) { struct tcf_common *p; - read_lock(hinfo->lock); + read_lock_bh(hinfo->lock); for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p; p = p->tcfc_next) { if (p->tcfc_index == index) break; } - read_unlock(hinfo->lock); + read_unlock_bh(hinfo->lock); return p; } diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 6085be578459..17f6f27e28a2 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -56,7 +56,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; struct rtattr *r; - read_lock(&police_lock); + read_lock_bh(&police_lock); s_i = cb->args[0]; @@ -85,7 +85,7 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c } } done: - read_unlock(&police_lock); + read_unlock_bh(&police_lock); if (n_i) cb->args[0] += n_i; return n_i; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index e38c2839b25c..cbef3bbfc20f 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -380,7 +380,7 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct cbq_sched_data *q = qdisc_priv(sch); int len = skb->len; - int ret; + int uninitialized_var(ret); struct cbq_class *cl = cbq_classify(skb, sch, &ret); #ifdef CONFIG_NET_CLS_ACT diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 2ad1caf1ea42..9bad8ba0feda 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -99,7 +99,6 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* Initialize the bind addr area. */ sctp_bind_addr_init(&asoc->base.bind_addr, ep->base.bind_addr.port); - rwlock_init(&asoc->base.addr_lock); asoc->state = SCTP_STATE_CLOSED; @@ -937,8 +936,6 @@ struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc, { struct sctp_transport *transport; - sctp_read_lock(&asoc->base.addr_lock); - if ((htons(asoc->base.bind_addr.port) == laddr->v4.sin_port) && (htons(asoc->peer.port) == paddr->v4.sin_port)) { transport = sctp_assoc_lookup_paddr(asoc, paddr); @@ -952,7 +949,6 @@ struct sctp_transport *sctp_assoc_is_match(struct sctp_association *asoc, transport = NULL; out: - sctp_read_unlock(&asoc->base.addr_lock); return transport; } @@ -1376,19 +1372,13 @@ int sctp_assoc_set_bind_addr_from_cookie(struct sctp_association *asoc, int sctp_assoc_lookup_laddr(struct sctp_association *asoc, const union sctp_addr *laddr) { - int found; + int found = 0; - sctp_read_lock(&asoc->base.addr_lock); if ((asoc->base.bind_addr.port == ntohs(laddr->v4.sin_port)) && sctp_bind_addr_match(&asoc->base.bind_addr, laddr, - sctp_sk(asoc->base.sk))) { + sctp_sk(asoc->base.sk))) found = 1; - goto out; - } - found = 0; -out: - sctp_read_unlock(&asoc->base.addr_lock); return found; } diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index fdb287a9e2e2..d35cbf5aae33 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -163,9 +163,15 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, addr->a.v4.sin_port = htons(bp->port); addr->use_as_src = use_as_src; + addr->valid = 1; INIT_LIST_HEAD(&addr->list); - list_add_tail(&addr->list, &bp->address_list); + INIT_RCU_HEAD(&addr->rcu); + + /* We always hold a socket lock when calling this function, + * and that acts as a writer synchronizing lock. + */ + list_add_tail_rcu(&addr->list, &bp->address_list); SCTP_DBG_OBJCNT_INC(addr); return 0; @@ -174,23 +180,35 @@ int sctp_add_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *new, /* Delete an address from the bind address list in the SCTP_bind_addr * structure. */ -int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr) +int sctp_del_bind_addr(struct sctp_bind_addr *bp, union sctp_addr *del_addr, + void (*rcu_call)(struct rcu_head *head, + void (*func)(struct rcu_head *head))) { - struct list_head *pos, *temp; - struct sctp_sockaddr_entry *addr; + struct sctp_sockaddr_entry *addr, *temp; - list_for_each_safe(pos, temp, &bp->address_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); + /* We hold the socket lock when calling this function, + * and that acts as a writer synchronizing lock. + */ + list_for_each_entry_safe(addr, temp, &bp->address_list, list) { if (sctp_cmp_addr_exact(&addr->a, del_addr)) { /* Found the exact match. */ - list_del(pos); - kfree(addr); - SCTP_DBG_OBJCNT_DEC(addr); - - return 0; + addr->valid = 0; + list_del_rcu(&addr->list); + break; } } + /* Call the rcu callback provided in the args. This function is + * called by both BH packet processing and user side socket option + * processing, but it works on different lists in those 2 contexts. + * Each context provides it's own callback, whether call_rcu_bh() + * or call_rcu(), to make sure that we wait for an appropriate time. + */ + if (addr && !addr->valid) { + rcu_call(&addr->rcu, sctp_local_addr_free); + SCTP_DBG_OBJCNT_DEC(addr); + } + return -EINVAL; } @@ -300,15 +318,20 @@ int sctp_bind_addr_match(struct sctp_bind_addr *bp, struct sctp_sock *opt) { struct sctp_sockaddr_entry *laddr; - struct list_head *pos; - - list_for_each(pos, &bp->address_list) { - laddr = list_entry(pos, struct sctp_sockaddr_entry, list); - if (opt->pf->cmp_addr(&laddr->a, addr, opt)) - return 1; + int match = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(laddr, &bp->address_list, list) { + if (!laddr->valid) + continue; + if (opt->pf->cmp_addr(&laddr->a, addr, opt)) { + match = 1; + break; + } } + rcu_read_unlock(); - return 0; + return match; } /* Find the first address in the bind address list that is not present in @@ -323,18 +346,19 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp, union sctp_addr *addr; void *addr_buf; struct sctp_af *af; - struct list_head *pos; int i; - list_for_each(pos, &bp->address_list) { - laddr = list_entry(pos, struct sctp_sockaddr_entry, list); - + /* This is only called sctp_send_asconf_del_ip() and we hold + * the socket lock in that code patch, so that address list + * can't change. + */ + list_for_each_entry(laddr, &bp->address_list, list) { addr_buf = (union sctp_addr *)addrs; for (i = 0; i < addrcnt; i++) { addr = (union sctp_addr *)addr_buf; af = sctp_get_af_specific(addr->v4.sin_family); if (!af) - return NULL; + break; if (opt->pf->cmp_addr(&laddr->a, addr, opt)) break; diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 1404a9e2e78f..8f485a0d14bd 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -92,7 +92,6 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, /* Initialize the bind addr area */ sctp_bind_addr_init(&ep->base.bind_addr, 0); - rwlock_init(&ep->base.addr_lock); /* Remember who we are attached to. */ ep->base.sk = sk; @@ -225,21 +224,14 @@ void sctp_endpoint_put(struct sctp_endpoint *ep) struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep, const union sctp_addr *laddr) { - struct sctp_endpoint *retval; + struct sctp_endpoint *retval = NULL; - sctp_read_lock(&ep->base.addr_lock); if (htons(ep->base.bind_addr.port) == laddr->v4.sin_port) { if (sctp_bind_addr_match(&ep->base.bind_addr, laddr, - sctp_sk(ep->base.sk))) { + sctp_sk(ep->base.sk))) retval = ep; - goto out; - } } - retval = NULL; - -out: - sctp_read_unlock(&ep->base.addr_lock); return retval; } @@ -261,9 +253,7 @@ static struct sctp_association *__sctp_endpoint_lookup_assoc( list_for_each(pos, &ep->asocs) { asoc = list_entry(pos, struct sctp_association, asocs); if (rport == asoc->peer.port) { - sctp_read_lock(&asoc->base.addr_lock); *transport = sctp_assoc_lookup_paddr(asoc, paddr); - sctp_read_unlock(&asoc->base.addr_lock); if (*transport) return asoc; @@ -295,20 +285,17 @@ struct sctp_association *sctp_endpoint_lookup_assoc( int sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep, const union sctp_addr *paddr) { - struct list_head *pos; struct sctp_sockaddr_entry *addr; struct sctp_bind_addr *bp; - sctp_read_lock(&ep->base.addr_lock); bp = &ep->base.bind_addr; - list_for_each(pos, &bp->address_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); - if (sctp_has_association(&addr->a, paddr)) { - sctp_read_unlock(&ep->base.addr_lock); + /* This function is called with the socket lock held, + * so the address_list can not change. + */ + list_for_each_entry(addr, &bp->address_list, list) { + if (sctp_has_association(&addr->a, paddr)) return 1; - } } - sctp_read_unlock(&ep->base.addr_lock); return 0; } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index f8aa23dda1c1..670fd2740b89 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -77,13 +77,18 @@ #include <asm/uaccess.h> -/* Event handler for inet6 address addition/deletion events. */ +/* Event handler for inet6 address addition/deletion events. + * The sctp_local_addr_list needs to be protocted by a spin lock since + * multiple notifiers (say IPv4 and IPv6) may be running at the same + * time and thus corrupt the list. + * The reader side is protected with RCU. + */ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, void *ptr) { struct inet6_ifaddr *ifa = (struct inet6_ifaddr *)ptr; - struct sctp_sockaddr_entry *addr; - struct list_head *pos, *temp; + struct sctp_sockaddr_entry *addr = NULL; + struct sctp_sockaddr_entry *temp; switch (ev) { case NETDEV_UP: @@ -94,19 +99,26 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev, memcpy(&addr->a.v6.sin6_addr, &ifa->addr, sizeof(struct in6_addr)); addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex; - list_add_tail(&addr->list, &sctp_local_addr_list); + addr->valid = 1; + spin_lock_bh(&sctp_local_addr_lock); + list_add_tail_rcu(&addr->list, &sctp_local_addr_list); + spin_unlock_bh(&sctp_local_addr_lock); } break; case NETDEV_DOWN: - list_for_each_safe(pos, temp, &sctp_local_addr_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); - if (ipv6_addr_equal(&addr->a.v6.sin6_addr, &ifa->addr)) { - list_del(pos); - kfree(addr); + spin_lock_bh(&sctp_local_addr_lock); + list_for_each_entry_safe(addr, temp, + &sctp_local_addr_list, list) { + if (ipv6_addr_equal(&addr->a.v6.sin6_addr, + &ifa->addr)) { + addr->valid = 0; + list_del_rcu(&addr->list); break; } } - + spin_unlock_bh(&sctp_local_addr_lock); + if (addr && !addr->valid) + call_rcu(&addr->rcu, sctp_local_addr_free); break; } @@ -290,9 +302,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc, union sctp_addr *saddr) { struct sctp_bind_addr *bp; - rwlock_t *addr_lock; struct sctp_sockaddr_entry *laddr; - struct list_head *pos; sctp_scope_t scope; union sctp_addr *baddr = NULL; __u8 matchlen = 0; @@ -312,14 +322,14 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc, scope = sctp_scope(daddr); bp = &asoc->base.bind_addr; - addr_lock = &asoc->base.addr_lock; /* Go through the bind address list and find the best source address * that matches the scope of the destination address. */ - sctp_read_lock(addr_lock); - list_for_each(pos, &bp->address_list) { - laddr = list_entry(pos, struct sctp_sockaddr_entry, list); + rcu_read_lock(); + list_for_each_entry_rcu(laddr, &bp->address_list, list) { + if (!laddr->valid) + continue; if ((laddr->use_as_src) && (laddr->a.sa.sa_family == AF_INET6) && (scope <= sctp_scope(&laddr->a))) { @@ -341,7 +351,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc, __FUNCTION__, asoc, NIP6(daddr->v6.sin6_addr)); } - sctp_read_unlock(addr_lock); + rcu_read_unlock(); } /* Make a copy of all potential local addresses. */ @@ -367,7 +377,9 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, addr->a.v6.sin6_port = 0; addr->a.v6.sin6_addr = ifp->addr; addr->a.v6.sin6_scope_id = dev->ifindex; + addr->valid = 1; INIT_LIST_HEAD(&addr->list); + INIT_RCU_HEAD(&addr->rcu); list_add_tail(&addr->list, addrlist); } } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index e98579b788b8..3d036cdfae41 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -153,6 +153,9 @@ static void sctp_v4_copy_addrlist(struct list_head *addrlist, addr->a.v4.sin_family = AF_INET; addr->a.v4.sin_port = 0; addr->a.v4.sin_addr.s_addr = ifa->ifa_local; + addr->valid = 1; + INIT_LIST_HEAD(&addr->list); + INIT_RCU_HEAD(&addr->rcu); list_add_tail(&addr->list, addrlist); } } @@ -192,16 +195,24 @@ static void sctp_free_local_addr_list(void) } } +void sctp_local_addr_free(struct rcu_head *head) +{ + struct sctp_sockaddr_entry *e = container_of(head, + struct sctp_sockaddr_entry, rcu); + kfree(e); +} + /* Copy the local addresses which are valid for 'scope' into 'bp'. */ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, gfp_t gfp, int copy_flags) { struct sctp_sockaddr_entry *addr; int error = 0; - struct list_head *pos, *temp; - list_for_each_safe(pos, temp, &sctp_local_addr_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); + rcu_read_lock(); + list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) { + if (!addr->valid) + continue; if (sctp_in_scope(&addr->a, scope)) { /* Now that the address is in scope, check to see if * the address type is really supported by the local @@ -213,7 +224,7 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, (copy_flags & SCTP_ADDR6_ALLOWED) && (copy_flags & SCTP_ADDR6_PEERSUPP)))) { error = sctp_add_bind_addr(bp, &addr->a, 1, - GFP_ATOMIC); + GFP_ATOMIC); if (error) goto end_copy; } @@ -221,6 +232,7 @@ int sctp_copy_local_addr_list(struct sctp_bind_addr *bp, sctp_scope_t scope, } end_copy: + rcu_read_unlock(); return error; } @@ -416,9 +428,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, struct rtable *rt; struct flowi fl; struct sctp_bind_addr *bp; - rwlock_t *addr_lock; struct sctp_sockaddr_entry *laddr; - struct list_head *pos; struct dst_entry *dst = NULL; union sctp_addr dst_saddr; @@ -447,23 +457,20 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, goto out; bp = &asoc->base.bind_addr; - addr_lock = &asoc->base.addr_lock; if (dst) { /* Walk through the bind address list and look for a bind * address that matches the source address of the returned dst. */ - sctp_read_lock(addr_lock); - list_for_each(pos, &bp->address_list) { - laddr = list_entry(pos, struct sctp_sockaddr_entry, - list); - if (!laddr->use_as_src) + rcu_read_lock(); + list_for_each_entry_rcu(laddr, &bp->address_list, list) { + if (!laddr->valid || !laddr->use_as_src) continue; sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port)); if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a)) goto out_unlock; } - sctp_read_unlock(addr_lock); + rcu_read_unlock(); /* None of the bound addresses match the source address of the * dst. So release it. @@ -475,10 +482,10 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, /* Walk through the bind address list and try to get a dst that * matches a bind address as the source address. */ - sctp_read_lock(addr_lock); - list_for_each(pos, &bp->address_list) { - laddr = list_entry(pos, struct sctp_sockaddr_entry, list); - + rcu_read_lock(); + list_for_each_entry_rcu(laddr, &bp->address_list, list) { + if (!laddr->valid) + continue; if ((laddr->use_as_src) && (AF_INET == laddr->a.sa.sa_family)) { fl.fl4_src = laddr->a.v4.sin_addr.s_addr; @@ -490,7 +497,7 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, } out_unlock: - sctp_read_unlock(addr_lock); + rcu_read_unlock(); out: if (dst) SCTP_DEBUG_PRINTK("rt_dst:%u.%u.%u.%u, rt_src:%u.%u.%u.%u\n", @@ -600,13 +607,18 @@ static void sctp_v4_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr) seq_printf(seq, "%d.%d.%d.%d ", NIPQUAD(addr->v4.sin_addr)); } -/* Event handler for inet address addition/deletion events. */ +/* Event handler for inet address addition/deletion events. + * The sctp_local_addr_list needs to be protocted by a spin lock since + * multiple notifiers (say IPv4 and IPv6) may be running at the same + * time and thus corrupt the list. + * The reader side is protected with RCU. + */ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, void *ptr) { struct in_ifaddr *ifa = (struct in_ifaddr *)ptr; - struct sctp_sockaddr_entry *addr; - struct list_head *pos, *temp; + struct sctp_sockaddr_entry *addr = NULL; + struct sctp_sockaddr_entry *temp; switch (ev) { case NETDEV_UP: @@ -615,19 +627,25 @@ static int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, addr->a.v4.sin_family = AF_INET; addr->a.v4.sin_port = 0; addr->a.v4.sin_addr.s_addr = ifa->ifa_local; - list_add_tail(&addr->list, &sctp_local_addr_list); + addr->valid = 1; + spin_lock_bh(&sctp_local_addr_lock); + list_add_tail_rcu(&addr->list, &sctp_local_addr_list); + spin_unlock_bh(&sctp_local_addr_lock); } break; case NETDEV_DOWN: - list_for_each_safe(pos, temp, &sctp_local_addr_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); + spin_lock_bh(&sctp_local_addr_lock); + list_for_each_entry_safe(addr, temp, + &sctp_local_addr_list, list) { if (addr->a.v4.sin_addr.s_addr == ifa->ifa_local) { - list_del(pos); - kfree(addr); + addr->valid = 0; + list_del_rcu(&addr->list); break; } } - + spin_unlock_bh(&sctp_local_addr_lock); + if (addr && !addr->valid) + call_rcu(&addr->rcu, sctp_local_addr_free); break; } @@ -1160,6 +1178,7 @@ SCTP_STATIC __init int sctp_init(void) /* Initialize the local address list. */ INIT_LIST_HEAD(&sctp_local_addr_list); + spin_lock_init(&sctp_local_addr_lock); sctp_get_local_addr_list(); /* Register notifier for inet address additions/deletions. */ @@ -1227,6 +1246,9 @@ SCTP_STATIC __exit void sctp_exit(void) sctp_v6_del_protocol(); inet_del_protocol(&sctp_protocol, IPPROTO_SCTP); + /* Unregister notifier for inet address additions/deletions. */ + unregister_inetaddr_notifier(&sctp_inetaddr_notifier); + /* Free the local address list. */ sctp_free_local_addr_list(); @@ -1240,9 +1262,6 @@ SCTP_STATIC __exit void sctp_exit(void) inet_unregister_protosw(&sctp_stream_protosw); inet_unregister_protosw(&sctp_seqpacket_protosw); - /* Unregister notifier for inet address additions/deletions. */ - unregister_inetaddr_notifier(&sctp_inetaddr_notifier); - sctp_sysctl_unregister(); list_del(&sctp_ipv4_specific.list); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 79856c924525..2e34220d94cd 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1531,7 +1531,7 @@ no_hmac: /* Also, add the destination address. */ if (list_empty(&retval->base.bind_addr.address_list)) { sctp_add_bind_addr(&retval->base.bind_addr, &chunk->dest, 1, - GFP_ATOMIC); + GFP_ATOMIC); } retval->next_tsn = retval->c.initial_tsn; @@ -2613,22 +2613,16 @@ static int sctp_asconf_param_success(struct sctp_association *asoc, switch (asconf_param->param_hdr.type) { case SCTP_PARAM_ADD_IP: - sctp_local_bh_disable(); - sctp_write_lock(&asoc->base.addr_lock); - list_for_each(pos, &bp->address_list) { - saddr = list_entry(pos, struct sctp_sockaddr_entry, list); + /* This is always done in BH context with a socket lock + * held, so the list can not change. + */ + list_for_each_entry(saddr, &bp->address_list, list) { if (sctp_cmp_addr_exact(&saddr->a, &addr)) saddr->use_as_src = 1; } - sctp_write_unlock(&asoc->base.addr_lock); - sctp_local_bh_enable(); break; case SCTP_PARAM_DEL_IP: - sctp_local_bh_disable(); - sctp_write_lock(&asoc->base.addr_lock); - retval = sctp_del_bind_addr(bp, &addr); - sctp_write_unlock(&asoc->base.addr_lock); - sctp_local_bh_enable(); + retval = sctp_del_bind_addr(bp, &addr, call_rcu_bh); list_for_each(pos, &asoc->peer.transport_addr_list) { transport = list_entry(pos, struct sctp_transport, transports); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 33354602ae86..772fbfb4bfda 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -367,14 +367,10 @@ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) if (!bp->port) bp->port = inet_sk(sk)->num; - /* Add the address to the bind address list. */ - sctp_local_bh_disable(); - sctp_write_lock(&ep->base.addr_lock); - - /* Use GFP_ATOMIC since BHs are disabled. */ + /* Add the address to the bind address list. + * Use GFP_ATOMIC since BHs will be disabled. + */ ret = sctp_add_bind_addr(bp, addr, 1, GFP_ATOMIC); - sctp_write_unlock(&ep->base.addr_lock); - sctp_local_bh_enable(); /* Copy back into socket for getsockname() use. */ if (!ret) { @@ -544,15 +540,12 @@ static int sctp_send_asconf_add_ip(struct sock *sk, if (i < addrcnt) continue; - /* Use the first address in bind addr list of association as - * Address Parameter of ASCONF CHUNK. + /* Use the first valid address in bind addr list of + * association as Address Parameter of ASCONF CHUNK. */ - sctp_read_lock(&asoc->base.addr_lock); bp = &asoc->base.bind_addr; p = bp->address_list.next; laddr = list_entry(p, struct sctp_sockaddr_entry, list); - sctp_read_unlock(&asoc->base.addr_lock); - chunk = sctp_make_asconf_update_ip(asoc, &laddr->a, addrs, addrcnt, SCTP_PARAM_ADD_IP); if (!chunk) { @@ -567,8 +560,6 @@ static int sctp_send_asconf_add_ip(struct sock *sk, /* Add the new addresses to the bind address list with * use_as_src set to 0. */ - sctp_local_bh_disable(); - sctp_write_lock(&asoc->base.addr_lock); addr_buf = addrs; for (i = 0; i < addrcnt; i++) { addr = (union sctp_addr *)addr_buf; @@ -578,8 +569,6 @@ static int sctp_send_asconf_add_ip(struct sock *sk, GFP_ATOMIC); addr_buf += af->sockaddr_len; } - sctp_write_unlock(&asoc->base.addr_lock); - sctp_local_bh_enable(); } out: @@ -651,13 +640,7 @@ static int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt) * socket routing and failover schemes. Refer to comments in * sctp_do_bind(). -daisy */ - sctp_local_bh_disable(); - sctp_write_lock(&ep->base.addr_lock); - - retval = sctp_del_bind_addr(bp, sa_addr); - - sctp_write_unlock(&ep->base.addr_lock); - sctp_local_bh_enable(); + retval = sctp_del_bind_addr(bp, sa_addr, call_rcu); addr_buf += af->sockaddr_len; err_bindx_rem: @@ -748,14 +731,16 @@ static int sctp_send_asconf_del_ip(struct sock *sk, * make sure that we do not delete all the addresses in the * association. */ - sctp_read_lock(&asoc->base.addr_lock); bp = &asoc->base.bind_addr; laddr = sctp_find_unmatch_addr(bp, (union sctp_addr *)addrs, addrcnt, sp); - sctp_read_unlock(&asoc->base.addr_lock); if (!laddr) continue; + /* We do not need RCU protection throughout this loop + * because this is done under a socket lock from the + * setsockopt call. + */ chunk = sctp_make_asconf_update_ip(asoc, laddr, addrs, addrcnt, SCTP_PARAM_DEL_IP); if (!chunk) { @@ -766,23 +751,16 @@ static int sctp_send_asconf_del_ip(struct sock *sk, /* Reset use_as_src flag for the addresses in the bind address * list that are to be deleted. */ - sctp_local_bh_disable(); - sctp_write_lock(&asoc->base.addr_lock); addr_buf = addrs; for (i = 0; i < addrcnt; i++) { laddr = (union sctp_addr *)addr_buf; af = sctp_get_af_specific(laddr->v4.sin_family); - list_for_each(pos1, &bp->address_list) { - saddr = list_entry(pos1, - struct sctp_sockaddr_entry, - list); + list_for_each_entry(saddr, &bp->address_list, list) { if (sctp_cmp_addr_exact(&saddr->a, laddr)) saddr->use_as_src = 0; } addr_buf += af->sockaddr_len; } - sctp_write_unlock(&asoc->base.addr_lock); - sctp_local_bh_enable(); /* Update the route and saddr entries for all the transports * as some of the addresses in the bind address list are @@ -4059,9 +4037,7 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, sctp_assoc_t id; struct sctp_bind_addr *bp; struct sctp_association *asoc; - struct list_head *pos, *temp; struct sctp_sockaddr_entry *addr; - rwlock_t *addr_lock; int cnt = 0; if (len < sizeof(sctp_assoc_t)) @@ -4078,17 +4054,13 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, */ if (0 == id) { bp = &sctp_sk(sk)->ep->base.bind_addr; - addr_lock = &sctp_sk(sk)->ep->base.addr_lock; } else { asoc = sctp_id2assoc(sk, id); if (!asoc) return -EINVAL; bp = &asoc->base.bind_addr; - addr_lock = &asoc->base.addr_lock; } - sctp_read_lock(addr_lock); - /* If the endpoint is bound to 0.0.0.0 or ::0, count the valid * addresses from the global local address list. */ @@ -4096,27 +4068,33 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, addr = list_entry(bp->address_list.next, struct sctp_sockaddr_entry, list); if (sctp_is_any(&addr->a)) { - list_for_each_safe(pos, temp, &sctp_local_addr_list) { - addr = list_entry(pos, - struct sctp_sockaddr_entry, - list); + rcu_read_lock(); + list_for_each_entry_rcu(addr, + &sctp_local_addr_list, list) { + if (!addr->valid) + continue; + if ((PF_INET == sk->sk_family) && (AF_INET6 == addr->a.sa.sa_family)) continue; + cnt++; } + rcu_read_unlock(); } else { cnt = 1; } goto done; } - list_for_each(pos, &bp->address_list) { + /* Protection on the bound address list is not needed, + * since in the socket option context we hold the socket lock, + * so there is no way that the bound address list can change. + */ + list_for_each_entry(addr, &bp->address_list, list) { cnt ++; } - done: - sctp_read_unlock(addr_lock); return cnt; } @@ -4127,14 +4105,16 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port, int max_addrs, void *to, int *bytes_copied) { - struct list_head *pos, *next; struct sctp_sockaddr_entry *addr; union sctp_addr temp; int cnt = 0; int addrlen; - list_for_each_safe(pos, next, &sctp_local_addr_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); + rcu_read_lock(); + list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) { + if (!addr->valid) + continue; + if ((PF_INET == sk->sk_family) && (AF_INET6 == addr->a.sa.sa_family)) continue; @@ -4149,6 +4129,7 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port, cnt ++; if (cnt >= max_addrs) break; } + rcu_read_unlock(); return cnt; } @@ -4156,14 +4137,16 @@ static int sctp_copy_laddrs_old(struct sock *sk, __u16 port, static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, size_t space_left, int *bytes_copied) { - struct list_head *pos, *next; struct sctp_sockaddr_entry *addr; union sctp_addr temp; int cnt = 0; int addrlen; - list_for_each_safe(pos, next, &sctp_local_addr_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); + rcu_read_lock(); + list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) { + if (!addr->valid) + continue; + if ((PF_INET == sk->sk_family) && (AF_INET6 == addr->a.sa.sa_family)) continue; @@ -4171,8 +4154,10 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, sctp_get_pf_specific(sk->sk_family)->addr_v4map(sctp_sk(sk), &temp); addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; - if (space_left < addrlen) - return -ENOMEM; + if (space_left < addrlen) { + cnt = -ENOMEM; + break; + } memcpy(to, &temp, addrlen); to += addrlen; @@ -4180,6 +4165,7 @@ static int sctp_copy_laddrs(struct sock *sk, __u16 port, void *to, space_left -= addrlen; *bytes_copied += addrlen; } + rcu_read_unlock(); return cnt; } @@ -4192,7 +4178,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, { struct sctp_bind_addr *bp; struct sctp_association *asoc; - struct list_head *pos; int cnt = 0; struct sctp_getaddrs_old getaddrs; struct sctp_sockaddr_entry *addr; @@ -4200,7 +4185,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, union sctp_addr temp; struct sctp_sock *sp = sctp_sk(sk); int addrlen; - rwlock_t *addr_lock; int err = 0; void *addrs; void *buf; @@ -4222,13 +4206,11 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, */ if (0 == getaddrs.assoc_id) { bp = &sctp_sk(sk)->ep->base.bind_addr; - addr_lock = &sctp_sk(sk)->ep->base.addr_lock; } else { asoc = sctp_id2assoc(sk, getaddrs.assoc_id); if (!asoc) return -EINVAL; bp = &asoc->base.bind_addr; - addr_lock = &asoc->base.addr_lock; } to = getaddrs.addrs; @@ -4242,8 +4224,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, if (!addrs) return -ENOMEM; - sctp_read_lock(addr_lock); - /* If the endpoint is bound to 0.0.0.0 or ::0, get the valid * addresses from the global local address list. */ @@ -4259,8 +4239,11 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, } buf = addrs; - list_for_each(pos, &bp->address_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); + /* Protection on the bound address list is not needed since + * in the socket option context we hold a socket lock and + * thus the bound address list can't change. + */ + list_for_each_entry(addr, &bp->address_list, list) { memcpy(&temp, &addr->a, sizeof(temp)); sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; @@ -4272,8 +4255,6 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, } copy_getaddrs: - sctp_read_unlock(addr_lock); - /* copy the entire address list into the user provided space */ if (copy_to_user(to, addrs, bytes_copied)) { err = -EFAULT; @@ -4295,7 +4276,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, { struct sctp_bind_addr *bp; struct sctp_association *asoc; - struct list_head *pos; int cnt = 0; struct sctp_getaddrs getaddrs; struct sctp_sockaddr_entry *addr; @@ -4303,7 +4283,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, union sctp_addr temp; struct sctp_sock *sp = sctp_sk(sk); int addrlen; - rwlock_t *addr_lock; int err = 0; size_t space_left; int bytes_copied = 0; @@ -4324,13 +4303,11 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, */ if (0 == getaddrs.assoc_id) { bp = &sctp_sk(sk)->ep->base.bind_addr; - addr_lock = &sctp_sk(sk)->ep->base.addr_lock; } else { asoc = sctp_id2assoc(sk, getaddrs.assoc_id); if (!asoc) return -EINVAL; bp = &asoc->base.bind_addr; - addr_lock = &asoc->base.addr_lock; } to = optval + offsetof(struct sctp_getaddrs,addrs); @@ -4340,8 +4317,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, if (!addrs) return -ENOMEM; - sctp_read_lock(addr_lock); - /* If the endpoint is bound to 0.0.0.0 or ::0, get the valid * addresses from the global local address list. */ @@ -4353,21 +4328,24 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, space_left, &bytes_copied); if (cnt < 0) { err = cnt; - goto error_lock; + goto out; } goto copy_getaddrs; } } buf = addrs; - list_for_each(pos, &bp->address_list) { - addr = list_entry(pos, struct sctp_sockaddr_entry, list); + /* Protection on the bound address list is not needed since + * in the socket option context we hold a socket lock and + * thus the bound address list can't change. + */ + list_for_each_entry(addr, &bp->address_list, list) { memcpy(&temp, &addr->a, sizeof(temp)); sctp_get_pf_specific(sk->sk_family)->addr_v4map(sp, &temp); addrlen = sctp_get_af_specific(temp.sa.sa_family)->sockaddr_len; if (space_left < addrlen) { err = -ENOMEM; /*fixme: right error?*/ - goto error_lock; + goto out; } memcpy(buf, &temp, addrlen); buf += addrlen; @@ -4377,8 +4355,6 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, } copy_getaddrs: - sctp_read_unlock(addr_lock); - if (copy_to_user(to, addrs, bytes_copied)) { err = -EFAULT; goto out; @@ -4389,12 +4365,6 @@ copy_getaddrs: } if (put_user(bytes_copied, optlen)) err = -EFAULT; - - goto out; - -error_lock: - sctp_read_unlock(addr_lock); - out: kfree(addrs); return err; |