From d1fe19444d82e399e38c1594c71b850eca8e9de0 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 23 Jul 2015 12:05:37 +0200 Subject: inet: frag: don't re-use chainlist for evictor commit 65ba1f1ec0eff ("inet: frags: fix a race between inet_evict_bucket and inet_frag_kill") describes the bug, but the fix doesn't work reliably. Problem is that ->flags member can be set on other cpu without chainlock being held by that task, i.e. the RMW-Cycle can clear INET_FRAG_EVICTED bit after we put the element on the evictor private list. We can crash when walking the 'private' evictor list since an element can be deleted from list underneath the evictor. Join work with Nikolay Alexandrov. Fixes: b13d3cbfb8e8 ("inet: frag: move eviction of queues to work queue") Reported-by: Johan Schuijt Tested-by: Frank Schreuder Signed-off-by: Nikolay Alexandrov Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/inet_frag.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net/inet_frag.h') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index e1300b3dd597..56a3a5685f76 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -45,6 +45,7 @@ enum { * @flags: fragment queue flags * @max_size: maximum received fragment size * @net: namespace that this frag belongs to + * @list_evictor: list of queues to forcefully evict (e.g. due to low memory) */ struct inet_frag_queue { spinlock_t lock; @@ -59,6 +60,7 @@ struct inet_frag_queue { __u8 flags; u16 max_size; struct netns_frags *net; + struct hlist_node list_evictor; }; #define INETFRAGS_HASHSZ 1024 -- cgit v1.2.3 From 0e60d245a0be7fdbb723607f1d6621007916b252 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 23 Jul 2015 12:05:38 +0200 Subject: inet: frag: change *_frag_mem_limit functions to take netns_frags as argument Followup patch will call it after inet_frag_queue was freed, so q->net doesn't work anymore (but netf = q->net; free(q); mem_limit(netf) would). Tested-by: Frank Schreuder Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/inet_frag.h | 8 ++++---- net/ieee802154/6lowpan/reassembly.c | 6 +++--- net/ipv4/inet_fragment.c | 4 ++-- net/ipv4/ip_fragment.c | 10 +++++----- net/ipv6/netfilter/nf_conntrack_reasm.c | 6 +++--- net/ipv6/reassembly.c | 6 +++--- 6 files changed, 20 insertions(+), 20 deletions(-) (limited to 'include/net/inet_frag.h') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 56a3a5685f76..e71ca17024f2 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -141,14 +141,14 @@ static inline int frag_mem_limit(struct netns_frags *nf) return percpu_counter_read(&nf->mem); } -static inline void sub_frag_mem_limit(struct inet_frag_queue *q, int i) +static inline void sub_frag_mem_limit(struct netns_frags *nf, int i) { - __percpu_counter_add(&q->net->mem, -i, frag_percpu_counter_batch); + __percpu_counter_add(&nf->mem, -i, frag_percpu_counter_batch); } -static inline void add_frag_mem_limit(struct inet_frag_queue *q, int i) +static inline void add_frag_mem_limit(struct netns_frags *nf, int i) { - __percpu_counter_add(&q->net->mem, i, frag_percpu_counter_batch); + __percpu_counter_add(&nf->mem, i, frag_percpu_counter_batch); } static inline void init_frag_mem_limit(struct netns_frags *nf) diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index f46e4d1306f2..214d44aef35b 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -207,7 +207,7 @@ found: } else { fq->q.meat += skb->len; } - add_frag_mem_limit(&fq->q, skb->truesize); + add_frag_mem_limit(fq->q.net, skb->truesize); if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && fq->q.meat == fq->q.len) { @@ -287,7 +287,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev, clone->data_len = clone->len; head->data_len -= clone->len; head->len -= clone->len; - add_frag_mem_limit(&fq->q, clone->truesize); + add_frag_mem_limit(fq->q.net, clone->truesize); } WARN_ON(head == NULL); @@ -310,7 +310,7 @@ static int lowpan_frag_reasm(struct lowpan_frag_queue *fq, struct sk_buff *prev, } fp = next; } - sub_frag_mem_limit(&fq->q, sum_truesize); + sub_frag_mem_limit(fq->q.net, sum_truesize); head->next = NULL; head->dev = dev; diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 172234864fec..4473232e4e88 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -328,7 +328,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f) fp = xp; } sum = sum_truesize + f->qsize; - sub_frag_mem_limit(q, sum); + sub_frag_mem_limit(q->net, sum); if (f->destructor) f->destructor(q); @@ -388,7 +388,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, q->net = nf; f->constructor(q, arg); - add_frag_mem_limit(q, f->qsize); + add_frag_mem_limit(nf, f->qsize); setup_timer(&q->timer, f->frag_expire, (unsigned long)q); spin_lock_init(&q->lock); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 31f71b15cfba..b4a77d021c0d 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -309,7 +309,7 @@ static int ip_frag_reinit(struct ipq *qp) kfree_skb(fp); fp = xp; } while (fp); - sub_frag_mem_limit(&qp->q, sum_truesize); + sub_frag_mem_limit(qp->q.net, sum_truesize); qp->q.flags = 0; qp->q.len = 0; @@ -455,7 +455,7 @@ found: qp->q.fragments = next; qp->q.meat -= free_it->len; - sub_frag_mem_limit(&qp->q, free_it->truesize); + sub_frag_mem_limit(qp->q.net, free_it->truesize); kfree_skb(free_it); } } @@ -479,7 +479,7 @@ found: qp->q.stamp = skb->tstamp; qp->q.meat += skb->len; qp->ecn |= ecn; - add_frag_mem_limit(&qp->q, skb->truesize); + add_frag_mem_limit(qp->q.net, skb->truesize); if (offset == 0) qp->q.flags |= INET_FRAG_FIRST_IN; @@ -587,7 +587,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - add_frag_mem_limit(&qp->q, clone->truesize); + add_frag_mem_limit(qp->q.net, clone->truesize); } skb_push(head, head->data - skb_network_header(head)); @@ -615,7 +615,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev, } fp = next; } - sub_frag_mem_limit(&qp->q, sum_truesize); + sub_frag_mem_limit(qp->q.net, sum_truesize); head->next = NULL; head->dev = dev; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 6f187c8d8a1b..6d02498172c1 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -348,7 +348,7 @@ found: fq->ecn |= ecn; if (payload_len > fq->q.max_size) fq->q.max_size = payload_len; - add_frag_mem_limit(&fq->q, skb->truesize); + add_frag_mem_limit(fq->q.net, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -430,7 +430,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) clone->ip_summed = head->ip_summed; NFCT_FRAG6_CB(clone)->orig = NULL; - add_frag_mem_limit(&fq->q, clone->truesize); + add_frag_mem_limit(fq->q.net, clone->truesize); } /* We have to remove fragment header from datagram and to relocate @@ -454,7 +454,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; } - sub_frag_mem_limit(&fq->q, head->truesize); + sub_frag_mem_limit(fq->q.net, head->truesize); head->ignore_df = 1; head->next = NULL; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 8ffa2c8cce77..5c3bbca6a150 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -330,7 +330,7 @@ found: fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; fq->ecn |= ecn; - add_frag_mem_limit(&fq->q, skb->truesize); + add_frag_mem_limit(fq->q.net, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -443,7 +443,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - add_frag_mem_limit(&fq->q, clone->truesize); + add_frag_mem_limit(fq->q.net, clone->truesize); } /* We have to remove fragment header from datagram and to relocate @@ -481,7 +481,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, } fp = next; } - sub_frag_mem_limit(&fq->q, sum_truesize); + sub_frag_mem_limit(fq->q.net, sum_truesize); head->next = NULL; head->dev = dev; -- cgit v1.2.3 From caaecdd3d3f8ec0ea9906c54b1dd8ec8316d26b9 Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Thu, 23 Jul 2015 12:05:40 +0200 Subject: inet: frags: remove INET_FRAG_EVICTED and use list_evictor for the test We can simply remove the INET_FRAG_EVICTED flag to avoid all the flags race conditions with the evictor and use a participation test for the evictor list, when we're at that point (after inet_frag_kill) in the timer there're 2 possible cases: 1. The evictor added the entry to its evictor list while the timer was waiting for the chainlock or 2. The timer unchained the entry and the evictor won't see it In both cases we should be able to see list_evictor correctly due to the sync on the chainlock. Joint work with Florian Westphal. Tested-by: Frank Schreuder Signed-off-by: Nikolay Aleksandrov Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/net/inet_frag.h | 7 +++++-- net/ipv4/inet_fragment.c | 1 - net/ipv4/ip_fragment.c | 2 +- net/ipv6/reassembly.c | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/net/inet_frag.h') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index e71ca17024f2..53eead2da743 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -21,13 +21,11 @@ struct netns_frags { * @INET_FRAG_FIRST_IN: first fragment has arrived * @INET_FRAG_LAST_IN: final fragment has arrived * @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction - * @INET_FRAG_EVICTED: frag queue is being evicted */ enum { INET_FRAG_FIRST_IN = BIT(0), INET_FRAG_LAST_IN = BIT(1), INET_FRAG_COMPLETE = BIT(2), - INET_FRAG_EVICTED = BIT(3) }; /** @@ -127,6 +125,11 @@ static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f inet_frag_destroy(q, f); } +static inline bool inet_frag_evicting(struct inet_frag_queue *q) +{ + return !hlist_unhashed(&q->list_evictor); +} + /* Memory Tracking Functions. */ /* The default percpu_counter batch size is not big enough to scale to diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index a00ca4c00c35..d0a7c0319e3d 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -140,7 +140,6 @@ inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb) if (!del_timer(&fq->timer)) continue; - fq->flags |= INET_FRAG_EVICTED; hlist_add_head(&fq->list_evictor, &expired); ++evicted; } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index b4a77d021c0d..921138f6c97c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -202,7 +202,7 @@ static void ip_expire(unsigned long arg) ipq_kill(qp); IP_INC_STATS_BH(net, IPSTATS_MIB_REASMFAILS); - if (!(qp->q.flags & INET_FRAG_EVICTED)) { + if (!inet_frag_evicting(&qp->q)) { struct sk_buff *head = qp->q.fragments; const struct iphdr *iph; int err; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 5c3bbca6a150..f1159bb76e0a 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -144,7 +144,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); - if (fq->q.flags & INET_FRAG_EVICTED) + if (inet_frag_evicting(&fq->q)) goto out_rcu_unlock; IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); -- cgit v1.2.3