diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/core/dev.c | 12 | ||||
-rw-r--r-- | net/core/skbuff.c | 30 | ||||
-rw-r--r-- | net/dccp/ipv4.c | 3 | ||||
-rw-r--r-- | net/dccp/ipv6.c | 3 | ||||
-rw-r--r-- | net/dccp/minisocks.c | 3 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 34 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 3 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 7 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 64 | ||||
-rw-r--r-- | net/ipv6/ip6_gre.c | 9 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 3 | ||||
-rw-r--r-- | net/mpls/af_mpls.c | 125 | ||||
-rw-r--r-- | net/mpls/internal.h | 6 | ||||
-rw-r--r-- | net/netfilter/nft_reject.c | 2 | ||||
-rw-r--r-- | net/netfilter/nft_reject_inet.c | 2 | ||||
-rw-r--r-- | net/netlink/af_netlink.c | 6 | ||||
-rw-r--r-- | net/tipc/link.c | 1 | ||||
-rw-r--r-- | net/tipc/server.c | 9 | ||||
-rw-r--r-- | net/tipc/socket.c | 3 | ||||
-rw-r--r-- | net/unix/garbage.c | 70 |
20 files changed, 290 insertions, 105 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 1796cef55ab5..c7ba0388f1be 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3079,7 +3079,7 @@ static struct rps_dev_flow * set_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_dev_flow *rflow, u16 next_cpu) { - if (next_cpu != RPS_NO_CPU) { + if (next_cpu < nr_cpu_ids) { #ifdef CONFIG_RFS_ACCEL struct netdev_rx_queue *rxqueue; struct rps_dev_flow_table *flow_table; @@ -3184,7 +3184,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, * If the desired CPU (where last recvmsg was done) is * different from current CPU (one in the rx-queue flow * table entry), switch if one of the following holds: - * - Current CPU is unset (equal to RPS_NO_CPU). + * - Current CPU is unset (>= nr_cpu_ids). * - Current CPU is offline. * - The current CPU's queue tail has advanced beyond the * last packet that was enqueued using this table entry. @@ -3192,14 +3192,14 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, * have been dequeued, thus preserving in order delivery. */ if (unlikely(tcpu != next_cpu) && - (tcpu == RPS_NO_CPU || !cpu_online(tcpu) || + (tcpu >= nr_cpu_ids || !cpu_online(tcpu) || ((int)(per_cpu(softnet_data, tcpu).input_queue_head - rflow->last_qtail)) >= 0)) { tcpu = next_cpu; rflow = set_rps_cpu(dev, skb, rflow, next_cpu); } - if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) { + if (tcpu < nr_cpu_ids && cpu_online(tcpu)) { *rflowp = rflow; cpu = tcpu; goto done; @@ -3240,14 +3240,14 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, struct rps_dev_flow_table *flow_table; struct rps_dev_flow *rflow; bool expire = true; - int cpu; + unsigned int cpu; rcu_read_lock(); flow_table = rcu_dereference(rxqueue->rps_flow_table); if (flow_table && flow_id <= flow_table->mask) { rflow = &flow_table->flows[flow_id]; cpu = ACCESS_ONCE(rflow->cpu); - if (rflow->filter == filter_id && cpu != RPS_NO_CPU && + if (rflow->filter == filter_id && cpu < nr_cpu_ids && ((int)(per_cpu(softnet_data, cpu).input_queue_head - rflow->last_qtail) < (int)(10 * flow_table->mask))) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index d1967dab9cc6..3cfff2a3d651 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -280,13 +280,14 @@ nodata: EXPORT_SYMBOL(__alloc_skb); /** - * build_skb - build a network buffer + * __build_skb - build a network buffer * @data: data buffer provided by caller - * @frag_size: size of fragment, or 0 if head was kmalloced + * @frag_size: size of data, or 0 if head was kmalloced * * Allocate a new &sk_buff. Caller provides space holding head and * skb_shared_info. @data must have been allocated by kmalloc() only if - * @frag_size is 0, otherwise data should come from the page allocator. + * @frag_size is 0, otherwise data should come from the page allocator + * or vmalloc() * The return is the new skb buffer. * On a failure the return is %NULL, and @data is not freed. * Notes : @@ -297,7 +298,7 @@ EXPORT_SYMBOL(__alloc_skb); * before giving packet to stack. * RX rings only contains data buffers, not full skbs. */ -struct sk_buff *build_skb(void *data, unsigned int frag_size) +struct sk_buff *__build_skb(void *data, unsigned int frag_size) { struct skb_shared_info *shinfo; struct sk_buff *skb; @@ -311,7 +312,6 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) memset(skb, 0, offsetof(struct sk_buff, tail)); skb->truesize = SKB_TRUESIZE(size); - skb->head_frag = frag_size != 0; atomic_set(&skb->users, 1); skb->head = data; skb->data = data; @@ -328,6 +328,23 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) return skb; } + +/* build_skb() is wrapper over __build_skb(), that specifically + * takes care of skb->head and skb->pfmemalloc + * This means that if @frag_size is not zero, then @data must be backed + * by a page fragment, not kmalloc() or vmalloc() + */ +struct sk_buff *build_skb(void *data, unsigned int frag_size) +{ + struct sk_buff *skb = __build_skb(data, frag_size); + + if (skb && frag_size) { + skb->head_frag = 1; + if (virt_to_head_page(data)->pfmemalloc) + skb->pfmemalloc = 1; + } + return skb; +} EXPORT_SYMBOL(build_skb); struct netdev_alloc_cache { @@ -348,7 +365,8 @@ static struct page *__page_frag_refill(struct netdev_alloc_cache *nc, gfp_t gfp = gfp_mask; if (order) { - gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY; + gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | + __GFP_NOMEMALLOC; page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); nc->frag.size = PAGE_SIZE << (page ? order : 0); } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 2b4f21d34df6..ccf4c5629b3c 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -453,7 +453,8 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) iph->saddr, iph->daddr); if (req) { nsk = dccp_check_req(sk, skb, req); - reqsk_put(req); + if (!nsk) + reqsk_put(req); return nsk; } nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo, diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 9d0551092c6c..5165571f397a 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -301,7 +301,8 @@ static struct sock *dccp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) &iph->daddr, inet6_iif(skb)); if (req) { nsk = dccp_check_req(sk, skb, req); - reqsk_put(req); + if (!nsk) + reqsk_put(req); return nsk; } nsk = __inet6_lookup_established(sock_net(sk), &dccp_hashinfo, diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index 5f566663e47f..30addee2dd03 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -186,8 +186,7 @@ struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb, if (child == NULL) goto listen_overflow; - inet_csk_reqsk_queue_unlink(sk, req); - inet_csk_reqsk_queue_removed(sk, req); + inet_csk_reqsk_queue_drop(sk, req); inet_csk_reqsk_queue_add(sk, req, child); out: return child; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 5c3dd6267ed3..8976ca423a07 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -564,6 +564,40 @@ int inet_rtx_syn_ack(struct sock *parent, struct request_sock *req) } EXPORT_SYMBOL(inet_rtx_syn_ack); +/* return true if req was found in the syn_table[] */ +static bool reqsk_queue_unlink(struct request_sock_queue *queue, + struct request_sock *req) +{ + struct listen_sock *lopt = queue->listen_opt; + struct request_sock **prev; + bool found = false; + + spin_lock(&queue->syn_wait_lock); + + for (prev = &lopt->syn_table[req->rsk_hash]; *prev != NULL; + prev = &(*prev)->dl_next) { + if (*prev == req) { + *prev = req->dl_next; + found = true; + break; + } + } + + spin_unlock(&queue->syn_wait_lock); + if (del_timer(&req->rsk_timer)) + reqsk_put(req); + return found; +} + +void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req) +{ + if (reqsk_queue_unlink(&inet_csk(sk)->icsk_accept_queue, req)) { + reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req); + reqsk_put(req); + } +} +EXPORT_SYMBOL(inet_csk_reqsk_queue_drop); + static void reqsk_timer_handler(unsigned long data) { struct request_sock *req = (struct request_sock *)data; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3571f2be4470..fc1c658ec6c1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1348,7 +1348,8 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) req = inet_csk_search_req(sk, th->source, iph->saddr, iph->daddr); if (req) { nsk = tcp_check_req(sk, skb, req, false); - reqsk_put(req); + if (!nsk) + reqsk_put(req); return nsk; } diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 63d6311b5365..e5d7649136fc 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -755,10 +755,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, if (!child) goto listen_overflow; - inet_csk_reqsk_queue_unlink(sk, req); - inet_csk_reqsk_queue_removed(sk, req); - + inet_csk_reqsk_queue_drop(sk, req); inet_csk_reqsk_queue_add(sk, req, child); + /* Warning: caller must not call reqsk_put(req); + * child stole last reference on it. + */ return child; listen_overflow: diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8c8d7e06b72f..a369e8a70b2c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2812,39 +2812,65 @@ begin_fwd: } } -/* Send a fin. The caller locks the socket for us. This cannot be - * allowed to fail queueing a FIN frame under any circumstances. +/* We allow to exceed memory limits for FIN packets to expedite + * connection tear down and (memory) recovery. + * Otherwise tcp_send_fin() could be tempted to either delay FIN + * or even be forced to close flow without any FIN. + */ +static void sk_forced_wmem_schedule(struct sock *sk, int size) +{ + int amt, status; + + if (size <= sk->sk_forward_alloc) + return; + amt = sk_mem_pages(size); + sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; + sk_memory_allocated_add(sk, amt, &status); +} + +/* Send a FIN. The caller locks the socket for us. + * We should try to send a FIN packet really hard, but eventually give up. */ void tcp_send_fin(struct sock *sk) { + struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk); struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb = tcp_write_queue_tail(sk); - int mss_now; - /* Optimization, tack on the FIN if we have a queue of - * unsent frames. But be careful about outgoing SACKS - * and IP options. + /* Optimization, tack on the FIN if we have one skb in write queue and + * this skb was not yet sent, or we are under memory pressure. + * Note: in the latter case, FIN packet will be sent after a timeout, + * as TCP stack thinks it has already been transmitted. */ - mss_now = tcp_current_mss(sk); - - if (tcp_send_head(sk)) { - TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; - TCP_SKB_CB(skb)->end_seq++; + if (tskb && (tcp_send_head(sk) || sk_under_memory_pressure(sk))) { +coalesce: + TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; + TCP_SKB_CB(tskb)->end_seq++; tp->write_seq++; + if (!tcp_send_head(sk)) { + /* This means tskb was already sent. + * Pretend we included the FIN on previous transmit. + * We need to set tp->snd_nxt to the value it would have + * if FIN had been sent. This is because retransmit path + * does not change tp->snd_nxt. + */ + tp->snd_nxt++; + return; + } } else { - /* Socket is locked, keep trying until memory is available. */ - for (;;) { - skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); - if (skb) - break; - yield(); + skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation); + if (unlikely(!skb)) { + if (tskb) + goto coalesce; + return; } + skb_reserve(skb, MAX_TCP_HEADER); + sk_forced_wmem_schedule(sk, skb->truesize); /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ tcp_init_nondata_skb(skb, tp->write_seq, TCPHDR_ACK | TCPHDR_FIN); tcp_queue_skb(sk, skb); } - __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); + __tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF); } /* We get here when a process closes a file descriptor (either due to diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index b5e6cc1d4a73..a38d3ac0f18f 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1246,7 +1246,6 @@ static void ip6gre_tunnel_setup(struct net_device *dev) static int ip6gre_tunnel_init(struct net_device *dev) { struct ip6_tnl *tunnel; - int i; tunnel = netdev_priv(dev); @@ -1260,16 +1259,10 @@ static int ip6gre_tunnel_init(struct net_device *dev) if (ipv6_addr_any(&tunnel->parms.raddr)) dev->header_ops = &ip6gre_header_ops; - dev->tstats = alloc_percpu(struct pcpu_sw_netstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - for_each_possible_cpu(i) { - struct pcpu_sw_netstats *ip6gre_tunnel_stats; - ip6gre_tunnel_stats = per_cpu_ptr(dev->tstats, i); - u64_stats_init(&ip6gre_tunnel_stats->syncp); - } - return 0; } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index ad51df85aa00..b6575d665568 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -946,7 +946,8 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); if (req) { nsk = tcp_check_req(sk, skb, req, false); - reqsk_put(req); + if (!nsk) + reqsk_put(req); return nsk; } nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index db8a2ea6d4de..954810c76a86 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -53,6 +53,11 @@ static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index) return rt; } +static inline struct mpls_dev *mpls_dev_get(const struct net_device *dev) +{ + return rcu_dereference_rtnl(dev->mpls_ptr); +} + static bool mpls_output_possible(const struct net_device *dev) { return dev && (dev->flags & IFF_UP) && netif_carrier_ok(dev); @@ -136,6 +141,7 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, struct mpls_route *rt; struct mpls_entry_decoded dec; struct net_device *out_dev; + struct mpls_dev *mdev; unsigned int hh_len; unsigned int new_header_size; unsigned int mtu; @@ -143,6 +149,10 @@ static int mpls_forward(struct sk_buff *skb, struct net_device *dev, /* Careful this entire function runs inside of an rcu critical section */ + mdev = mpls_dev_get(dev); + if (!mdev || !mdev->input_enabled) + goto drop; + if (skb->pkt_type != PACKET_HOST) goto drop; @@ -352,9 +362,9 @@ static int mpls_route_add(struct mpls_route_config *cfg) if (!dev) goto errout; - /* For now just support ethernet devices */ + /* Ensure this is a supported device */ err = -EINVAL; - if ((dev->type != ARPHRD_ETHER) && (dev->type != ARPHRD_LOOPBACK)) + if (!mpls_dev_get(dev)) goto errout; err = -EINVAL; @@ -428,10 +438,89 @@ errout: return err; } +#define MPLS_PERDEV_SYSCTL_OFFSET(field) \ + (&((struct mpls_dev *)0)->field) + +static const struct ctl_table mpls_dev_table[] = { + { + .procname = "input", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + .data = MPLS_PERDEV_SYSCTL_OFFSET(input_enabled), + }, + { } +}; + +static int mpls_dev_sysctl_register(struct net_device *dev, + struct mpls_dev *mdev) +{ + char path[sizeof("net/mpls/conf/") + IFNAMSIZ]; + struct ctl_table *table; + int i; + + table = kmemdup(&mpls_dev_table, sizeof(mpls_dev_table), GFP_KERNEL); + if (!table) + goto out; + + /* Table data contains only offsets relative to the base of + * the mdev at this point, so make them absolute. + */ + for (i = 0; i < ARRAY_SIZE(mpls_dev_table); i++) + table[i].data = (char *)mdev + (uintptr_t)table[i].data; + + snprintf(path, sizeof(path), "net/mpls/conf/%s", dev->name); + + mdev->sysctl = register_net_sysctl(dev_net(dev), path, table); + if (!mdev->sysctl) + goto free; + + return 0; + +free: + kfree(table); +out: + return -ENOBUFS; +} + +static void mpls_dev_sysctl_unregister(struct mpls_dev *mdev) +{ + struct ctl_table *table; + + table = mdev->sysctl->ctl_table_arg; + unregister_net_sysctl_table(mdev->sysctl); + kfree(table); +} + +static struct mpls_dev *mpls_add_dev(struct net_device *dev) +{ + struct mpls_dev *mdev; + int err = -ENOMEM; + + ASSERT_RTNL(); + + mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); + if (!mdev) + return ERR_PTR(err); + + err = mpls_dev_sysctl_register(dev, mdev); + if (err) + goto free; + + rcu_assign_pointer(dev->mpls_ptr, mdev); + + return mdev; + +free: + kfree(mdev); + return ERR_PTR(err); +} + static void mpls_ifdown(struct net_device *dev) { struct mpls_route __rcu **platform_label; struct net *net = dev_net(dev); + struct mpls_dev *mdev; unsigned index; platform_label = rtnl_dereference(net->mpls.platform_label); @@ -443,14 +532,35 @@ static void mpls_ifdown(struct net_device *dev) continue; rt->rt_dev = NULL; } + + mdev = mpls_dev_get(dev); + if (!mdev) + return; + + mpls_dev_sysctl_unregister(mdev); + + RCU_INIT_POINTER(dev->mpls_ptr, NULL); + + kfree(mdev); } static int mpls_dev_notify(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct mpls_dev *mdev; switch(event) { + case NETDEV_REGISTER: + /* For now just support ethernet devices */ + if ((dev->type == ARPHRD_ETHER) || + (dev->type == ARPHRD_LOOPBACK)) { + mdev = mpls_add_dev(dev); + if (IS_ERR(mdev)) + return notifier_from_errno(PTR_ERR(mdev)); + } + break; + case NETDEV_UNREGISTER: mpls_ifdown(dev); break; @@ -536,6 +646,15 @@ int nla_get_labels(const struct nlattr *nla, if ((dec.bos != bos) || dec.ttl || dec.tc) return -EINVAL; + switch (dec.label) { + case LABEL_IMPLICIT_NULL: + /* RFC3032: This is a label that an LSR may + * assign and distribute, but which never + * actually appears in the encapsulation. + */ + return -EINVAL; + } + label[i] = dec.label; } *labels = nla_labels; @@ -912,7 +1031,7 @@ static int mpls_platform_labels(struct ctl_table *table, int write, return ret; } -static struct ctl_table mpls_table[] = { +static const struct ctl_table mpls_table[] = { { .procname = "platform_labels", .data = NULL, diff --git a/net/mpls/internal.h b/net/mpls/internal.h index fb6de92052c4..693877d69606 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -22,6 +22,12 @@ struct mpls_entry_decoded { u8 bos; }; +struct mpls_dev { + int input_enabled; + + struct ctl_table_header *sysctl; +}; + struct sk_buff; static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c index 57d3e1af5630..0522fc9bfb0a 100644 --- a/net/netfilter/nft_reject.c +++ b/net/netfilter/nft_reject.c @@ -63,6 +63,8 @@ int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) goto nla_put_failure; break; + default: + break; } return 0; diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index 62cabee42fbe..635dbba93d01 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c @@ -108,6 +108,8 @@ static int nft_reject_inet_dump(struct sk_buff *skb, if (nla_put_u8(skb, NFTA_REJECT_ICMP_CODE, priv->icmp_code)) goto nla_put_failure; break; + default: + break; } return 0; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 19909d0786a2..ec4adbdcb9b4 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1629,13 +1629,11 @@ static struct sk_buff *netlink_alloc_large_skb(unsigned int size, if (data == NULL) return NULL; - skb = build_skb(data, size); + skb = __build_skb(data, size); if (skb == NULL) vfree(data); - else { - skb->head_frag = 0; + else skb->destructor = netlink_skb_destructor; - } return skb; } diff --git a/net/tipc/link.c b/net/tipc/link.c index a6b30df6ec02..57be6e6aff99 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -2143,7 +2143,6 @@ int tipc_nl_link_dump(struct sk_buff *skb, struct netlink_callback *cb) err = __tipc_nl_add_node_links(net, &msg, node, &prev_link); tipc_node_unlock(node); - tipc_node_put(node); if (err) goto out; diff --git a/net/tipc/server.c b/net/tipc/server.c index ab6183cdb121..77ff03ed1e18 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -102,7 +102,7 @@ static void tipc_conn_kref_release(struct kref *kref) } saddr->scope = -TIPC_NODE_SCOPE; kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); - sk_release_kernel(sk); + sock_release(sock); con->sock = NULL; } @@ -321,12 +321,9 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con) struct socket *sock = NULL; int ret; - ret = sock_create_kern(AF_TIPC, SOCK_SEQPACKET, 0, &sock); + ret = __sock_create(s->net, AF_TIPC, SOCK_SEQPACKET, 0, &sock, 1); if (ret < 0) return NULL; - - sk_change_net(sock->sk, s->net); - ret = kernel_setsockopt(sock, SOL_TIPC, TIPC_IMPORTANCE, (char *)&s->imp, sizeof(s->imp)); if (ret < 0) @@ -376,7 +373,7 @@ static struct socket *tipc_create_listen_sock(struct tipc_conn *con) create_err: kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sock->sk); + sock_release(sock); return NULL; } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ee90d74d7516..9074b5cede38 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1764,13 +1764,14 @@ static int tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, int tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) { u32 dnode, dport = 0; - int err = -TIPC_ERR_NO_PORT; + int err; struct sk_buff *skb; struct tipc_sock *tsk; struct tipc_net *tn; struct sock *sk; while (skb_queue_len(inputq)) { + err = -TIPC_ERR_NO_PORT; skb = NULL; dport = tipc_skb_peek_port(inputq, dport); tsk = tipc_sk_lookup(net, dport); diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 99f7012b23b9..a73a226f2d33 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -95,39 +95,36 @@ static DECLARE_WAIT_QUEUE_HEAD(unix_gc_wait); unsigned int unix_tot_inflight; - struct sock *unix_get_socket(struct file *filp) { struct sock *u_sock = NULL; struct inode *inode = file_inode(filp); - /* - * Socket ? - */ + /* Socket ? */ if (S_ISSOCK(inode->i_mode) && !(filp->f_mode & FMODE_PATH)) { struct socket *sock = SOCKET_I(inode); struct sock *s = sock->sk; - /* - * PF_UNIX ? - */ + /* PF_UNIX ? */ if (s && sock->ops && sock->ops->family == PF_UNIX) u_sock = s; } return u_sock; } -/* - * Keep the number of times in flight count for the file - * descriptor if it is for an AF_UNIX socket. +/* Keep the number of times in flight count for the file + * descriptor if it is for an AF_UNIX socket. */ void unix_inflight(struct file *fp) { struct sock *s = unix_get_socket(fp); + if (s) { struct unix_sock *u = unix_sk(s); + spin_lock(&unix_gc_lock); + if (atomic_long_inc_return(&u->inflight) == 1) { BUG_ON(!list_empty(&u->link)); list_add_tail(&u->link, &gc_inflight_list); @@ -142,10 +139,13 @@ void unix_inflight(struct file *fp) void unix_notinflight(struct file *fp) { struct sock *s = unix_get_socket(fp); + if (s) { struct unix_sock *u = unix_sk(s); + spin_lock(&unix_gc_lock); BUG_ON(list_empty(&u->link)); + if (atomic_long_dec_and_test(&u->inflight)) list_del_init(&u->link); unix_tot_inflight--; @@ -161,32 +161,27 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), spin_lock(&x->sk_receive_queue.lock); skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { - /* - * Do we have file descriptors ? - */ + /* Do we have file descriptors ? */ if (UNIXCB(skb).fp) { bool hit = false; - /* - * Process the descriptors of this socket - */ + /* Process the descriptors of this socket */ int nfd = UNIXCB(skb).fp->count; struct file **fp = UNIXCB(skb).fp->fp; + while (nfd--) { - /* - * Get the socket the fd matches - * if it indeed does so - */ + /* Get the socket the fd matches if it indeed does so */ struct sock *sk = unix_get_socket(*fp++); + if (sk) { struct unix_sock *u = unix_sk(sk); - /* - * Ignore non-candidates, they could + /* Ignore non-candidates, they could * have been added to the queues after * starting the garbage collection */ if (test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) { hit = true; + func(u); } } @@ -203,24 +198,22 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), static void scan_children(struct sock *x, void (*func)(struct unix_sock *), struct sk_buff_head *hitlist) { - if (x->sk_state != TCP_LISTEN) + if (x->sk_state != TCP_LISTEN) { scan_inflight(x, func, hitlist); - else { + } else { struct sk_buff *skb; struct sk_buff *next; struct unix_sock *u; LIST_HEAD(embryos); - /* - * For a listening socket collect the queued embryos + /* For a listening socket collect the queued embryos * and perform a scan on them as well. */ spin_lock(&x->sk_receive_queue.lock); skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { u = unix_sk(skb->sk); - /* - * An embryo cannot be in-flight, so it's safe + /* An embryo cannot be in-flight, so it's safe * to use the list link. */ BUG_ON(!list_empty(&u->link)); @@ -249,8 +242,7 @@ static void inc_inflight(struct unix_sock *usk) static void inc_inflight_move_tail(struct unix_sock *u) { atomic_long_inc(&u->inflight); - /* - * If this still might be part of a cycle, move it to the end + /* If this still might be part of a cycle, move it to the end * of the list, so that it's checked even if it was already * passed over */ @@ -263,8 +255,7 @@ static bool gc_in_progress; void wait_for_unix_gc(void) { - /* - * If number of inflight sockets is insane, + /* If number of inflight sockets is insane, * force a garbage collect right now. */ if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress) @@ -288,8 +279,7 @@ void unix_gc(void) goto out; gc_in_progress = true; - /* - * First, select candidates for garbage collection. Only + /* First, select candidates for garbage collection. Only * in-flight sockets are considered, and from those only ones * which don't have any external reference. * @@ -320,15 +310,13 @@ void unix_gc(void) } } - /* - * Now remove all internal in-flight reference to children of + /* Now remove all internal in-flight reference to children of * the candidates. */ list_for_each_entry(u, &gc_candidates, link) scan_children(&u->sk, dec_inflight, NULL); - /* - * Restore the references for children of all candidates, + /* Restore the references for children of all candidates, * which have remaining references. Do this recursively, so * only those remain, which form cyclic references. * @@ -350,8 +338,7 @@ void unix_gc(void) } list_del(&cursor); - /* - * not_cycle_list contains those sockets which do not make up a + /* not_cycle_list contains those sockets which do not make up a * cycle. Restore these to the inflight list. */ while (!list_empty(¬_cycle_list)) { @@ -360,8 +347,7 @@ void unix_gc(void) list_move_tail(&u->link, &gc_inflight_list); } - /* - * Now gc_candidates contains only garbage. Restore original + /* Now gc_candidates contains only garbage. Restore original * inflight counters for these as well, and remove the skbuffs * which are creating the cycle(s). */ |